diff --git a/common/meson.build b/common/meson.build index dc2f62af..6348dce5 100644 --- a/common/meson.build +++ b/common/meson.build @@ -5,4 +5,7 @@ libui_sources += [ 'common/controls.c', 'common/errors.c', 'common/main.c', + 'common/utf8.c', + + 'common/third_party/utf.c', ] diff --git a/zOLD_common/utf.c b/common/third_party/utf.c similarity index 100% rename from zOLD_common/utf.c rename to common/third_party/utf.c diff --git a/zOLD_common/utf.h b/common/third_party/utf.h similarity index 100% rename from zOLD_common/utf.h rename to common/third_party/utf.h diff --git a/common/uipriv.h b/common/uipriv.h index cb661dc5..7cb3b5a1 100644 --- a/common/uipriv.h +++ b/common/uipriv.h @@ -75,6 +75,10 @@ extern void uiprivReportError(const char *prefix, const char *msg, const char *s extern bool uiprivOSVtableValid(const uiControlOSVtable *osVtable, const char *func); extern uiControlOSVtable *uiprivCloneOSVtable(const uiControlOSVtable *osVtable); +// utf8.c +extern char *uiprivSanitizeUTF8(const char *str); +extern void uiprivFreeUTF8(char *sanitized); + #ifdef __cplusplus } #endif diff --git a/common/utf8.c b/common/utf8.c new file mode 100644 index 00000000..ccc984c7 --- /dev/null +++ b/common/utf8.c @@ -0,0 +1,41 @@ +// 17 may 2020 +#include "uipriv.h" +#include "third_party/utf.h" + +// TODO write separate tests for this file? +// TODO ideally this functionality should really be part of utf itself, in some form or another (for instance, via utf8SanitizedLen() + the requisite loop) + +#define nGrow 32 + +char *uiprivSanitizeUTF8(const char *str) +{ + size_t len; + char *out; + const char *s; + size_t i; + uint32_t rune; + char encoded[4]; + size_t n; + + // TODO can we even use strlen() with UTF-8 strings? or is '\0' == 0 == actual memory zero just a source code connection (and thus the last one isn't necessarily true)? + len = strlen(str); + out = (char *) uiprivAlloc((len + 1) * sizeof (char), "sanitized UTF-8 string"); + s = str; + i = 0; + while (*s != '\0') { + s = uiprivUTF8DecodeRune(s, 0, &rune); + n = uiprivUTF8EncodeRune(rune, encoded); + if ((i + n) >= len) { + out = (char *) uiprivRealloc(out, (len + 1) * sizeof (char), (len + nGrow + 1) * sizeof (char), "sanitized UTF-8 string"); + len += nGrow; + } + memcpy(out + i, encoded, n); + i += n; + } + return out; +} + +void uiprivFreeUTF8(char *sanitized) +{ + uiprivFree(sanitized); +}