uipriv-ized utf.c.

This commit is contained in:
Pietro Gagliardi 2018-04-15 23:08:57 -04:00
parent 8e2004cf67
commit c3992cc647
5 changed files with 55 additions and 48 deletions

View File

@ -93,11 +93,11 @@ static void u8u16len(const char *str, size_t *n8, size_t *n16)
*n8 = 0;
*n16 = 0;
while (*str) {
str = utf8DecodeRune(str, 0, &rune);
str = uiprivUTF8DecodeRune(str, 0, &rune);
// TODO document the use of the function vs a pointer subtract here
// TODO also we need to consider namespace collision with utf.h...
*n8 += utf8EncodeRune(rune, buf);
*n16 += utf16EncodeRune(rune, buf16);
*n8 += uiprivUTF8EncodeRune(rune, buf);
*n16 += uiprivUTF16EncodeRune(rune, buf16);
}
}
@ -179,9 +179,9 @@ void uiAttributedStringInsertAtUnattributed(uiAttributedString *s, const char *s
while (*str) {
size_t n;
str = utf8DecodeRune(str, 0, &rune);
n = utf8EncodeRune(rune, buf);
n16 = utf16EncodeRune(rune, buf16);
str = uiprivUTF8DecodeRune(str, 0, &rune);
n = uiprivUTF8EncodeRune(rune, buf);
n16 = uiprivUTF16EncodeRune(rune, buf16);
s->s[old] = buf[0];
s->u8tou16[old] = old16;
if (n > 1) {

View File

@ -1,5 +1,7 @@
// 24 april 2016
// TODO if I don't decide to remove these outright, should they be renamed uiprivTypeNameSignature? these aren't real symbols, so...
#define uiAreaSignature 0x41726561
#define uiBoxSignature 0x426F784C
#define uiButtonSignature 0x42746F6E

View File

@ -1,5 +1,6 @@
// utf by pietro gagliardi (andlabs) — https://github.com/andlabs/utf/
// 10 november 2016
// function names have been altered to avoid namespace collisions in libui static builds (see utf.h)
#include "utf.h"
// this code imitates Go's unicode/utf8 and unicode/utf16
@ -9,7 +10,7 @@
// encoded must be at most 4 bytes
// TODO clean this code up somehow
size_t utf8EncodeRune(uint32_t rune, char *encoded)
size_t uiprivUTF8EncodeRune(uint32_t rune, char *encoded)
{
uint8_t b, c, d, e;
size_t n;
@ -72,7 +73,7 @@ done:
return n;
}
const char *utf8DecodeRune(const char *s, size_t nElem, uint32_t *rune)
const char *uiprivUTF8DecodeRune(const char *s, size_t nElem, uint32_t *rune)
{
uint8_t b, c;
uint8_t lowestAllowed, highestAllowed;
@ -172,7 +173,7 @@ const char *utf8DecodeRune(const char *s, size_t nElem, uint32_t *rune)
}
// encoded must have at most 2 elements
size_t utf16EncodeRune(uint32_t rune, uint16_t *encoded)
size_t uiprivUTF16EncodeRune(uint32_t rune, uint16_t *encoded)
{
uint16_t low, high;
@ -198,7 +199,7 @@ size_t utf16EncodeRune(uint32_t rune, uint16_t *encoded)
}
// TODO see if this can be cleaned up somehow
const uint16_t *utf16DecodeRune(const uint16_t *s, size_t nElem, uint32_t *rune)
const uint16_t *uiprivUTF16DecodeRune(const uint16_t *s, size_t nElem, uint32_t *rune)
{
uint16_t high, low;
@ -240,7 +241,7 @@ const uint16_t *utf16DecodeRune(const uint16_t *s, size_t nElem, uint32_t *rune)
// TODO find a way to reduce the code in all of these somehow
// TODO find a way to remove u as well
size_t utf8RuneCount(const char *s, size_t nElem)
size_t uiprivUTF8RuneCount(const char *s, size_t nElem)
{
size_t len;
uint32_t rune;
@ -251,7 +252,7 @@ size_t utf8RuneCount(const char *s, size_t nElem)
len = 0;
t = s;
while (nElem != 0) {
u = utf8DecodeRune(t, nElem, &rune);
u = uiprivUTF8DecodeRune(t, nElem, &rune);
len++;
nElem -= u - t;
t = u;
@ -260,13 +261,13 @@ size_t utf8RuneCount(const char *s, size_t nElem)
}
len = 0;
while (*s) {
s = utf8DecodeRune(s, nElem, &rune);
s = uiprivUTF8DecodeRune(s, nElem, &rune);
len++;
}
return len;
}
size_t utf8UTF16Count(const char *s, size_t nElem)
size_t uiprivUTF8UTF16Count(const char *s, size_t nElem)
{
size_t len;
uint32_t rune;
@ -278,8 +279,8 @@ size_t utf8UTF16Count(const char *s, size_t nElem)
len = 0;
t = s;
while (nElem != 0) {
u = utf8DecodeRune(t, nElem, &rune);
len += utf16EncodeRune(rune, encoded);
u = uiprivUTF8DecodeRune(t, nElem, &rune);
len += uiprivUTF16EncodeRune(rune, encoded);
nElem -= u - t;
t = u;
}
@ -287,13 +288,13 @@ size_t utf8UTF16Count(const char *s, size_t nElem)
}
len = 0;
while (*s) {
s = utf8DecodeRune(s, nElem, &rune);
len += utf16EncodeRune(rune, encoded);
s = uiprivUTF8DecodeRune(s, nElem, &rune);
len += uiprivUTF16EncodeRune(rune, encoded);
}
return len;
}
size_t utf16RuneCount(const uint16_t *s, size_t nElem)
size_t uiprivUTF16RuneCount(const uint16_t *s, size_t nElem)
{
size_t len;
uint32_t rune;
@ -304,7 +305,7 @@ size_t utf16RuneCount(const uint16_t *s, size_t nElem)
len = 0;
t = s;
while (nElem != 0) {
u = utf16DecodeRune(t, nElem, &rune);
u = uiprivUTF16DecodeRune(t, nElem, &rune);
len++;
nElem -= u - t;
t = u;
@ -313,13 +314,13 @@ size_t utf16RuneCount(const uint16_t *s, size_t nElem)
}
len = 0;
while (*s) {
s = utf16DecodeRune(s, nElem, &rune);
s = uiprivUTF16DecodeRune(s, nElem, &rune);
len++;
}
return len;
}
size_t utf16UTF8Count(const uint16_t *s, size_t nElem)
size_t uiprivUTF16UTF8Count(const uint16_t *s, size_t nElem)
{
size_t len;
uint32_t rune;
@ -331,8 +332,8 @@ size_t utf16UTF8Count(const uint16_t *s, size_t nElem)
len = 0;
t = s;
while (nElem != 0) {
u = utf16DecodeRune(t, nElem, &rune);
len += utf8EncodeRune(rune, encoded);
u = uiprivUTF16DecodeRune(t, nElem, &rune);
len += uiprivUTF8EncodeRune(rune, encoded);
nElem -= u - t;
t = u;
}
@ -340,8 +341,8 @@ size_t utf16UTF8Count(const uint16_t *s, size_t nElem)
}
len = 0;
while (*s) {
s = utf16DecodeRune(s, nElem, &rune);
len += utf8EncodeRune(rune, encoded);
s = uiprivUTF16DecodeRune(s, nElem, &rune);
len += uiprivUTF8EncodeRune(rune, encoded);
}
return len;
}

View File

@ -1,25 +1,29 @@
// utf by pietro gagliardi (andlabs) — https://github.com/andlabs/utf/
// 10 november 2016
// note the overridden names with uipriv at the beginning; this avoids potential symbol clashes when building libui as a static library
// LONGTERM find a way to encode the name overrides directly into the utf library
#ifdef __cplusplus
extern "C" {
#endif
// TODO (for utf itself as well) should this go outside the extern "C" block or not
#include <stddef.h>
#include <stdint.h>
// if nElem == 0, assume the buffer has no upper limit and is '\0' terminated
// otherwise, assume buffer is NOT '\0' terminated but is bounded by nElem *elements*
extern size_t utf8EncodeRune(uint32_t rune, char *encoded);
extern const char *utf8DecodeRune(const char *s, size_t nElem, uint32_t *rune);
extern size_t utf16EncodeRune(uint32_t rune, uint16_t *encoded);
extern const uint16_t *utf16DecodeRune(const uint16_t *s, size_t nElem, uint32_t *rune);
extern size_t uiprivUTF8EncodeRune(uint32_t rune, char *encoded);
extern const char *uiprivUTF8DecodeRune(const char *s, size_t nElem, uint32_t *rune);
extern size_t uiprivUTF16EncodeRune(uint32_t rune, uint16_t *encoded);
extern const uint16_t *uiprivUTF16DecodeRune(const uint16_t *s, size_t nElem, uint32_t *rune);
extern size_t utf8RuneCount(const char *s, size_t nElem);
extern size_t utf8UTF16Count(const char *s, size_t nElem);
extern size_t utf16RuneCount(const uint16_t *s, size_t nElem);
extern size_t utf16UTF8Count(const uint16_t *s, size_t nElem);
extern size_t uiprivUTF8RuneCount(const char *s, size_t nElem);
extern size_t uiprivUTF8UTF16Count(const char *s, size_t nElem);
extern size_t uiprivUTF16RuneCount(const uint16_t *s, size_t nElem);
extern size_t uiprivUTF16UTF8Count(const uint16_t *s, size_t nElem);
#ifdef __cplusplus
}
@ -33,27 +37,27 @@ extern size_t utf16UTF8Count(const uint16_t *s, size_t nElem);
// TODO same for UniChar/unichar on Mac? if both are unsigned then we have nothing to worry about
#if defined(_MSC_VER)
inline size_t utf16EncodeRune(uint32_t rune, __wchar_t *encoded)
inline size_t uiprivUTF16EncodeRune(uint32_t rune, __wchar_t *encoded)
{
return utf16EncodeRune(rune, reinterpret_cast<uint16_t *>(encoded));
return uiprivUTF16EncodeRune(rune, reinterpret_cast<uint16_t *>(encoded));
}
inline const __wchar_t *utf16DecodeRune(const __wchar_t *s, size_t nElem, uint32_t *rune)
inline const __wchar_t *uiprivUTF16DecodeRune(const __wchar_t *s, size_t nElem, uint32_t *rune)
{
const uint16_t *ret;
ret = utf16DecodeRune(reinterpret_cast<const uint16_t *>(s), nElem, rune);
ret = uiprivUTF16DecodeRune(reinterpret_cast<const uint16_t *>(s), nElem, rune);
return reinterpret_cast<const __wchar_t *>(ret);
}
inline size_t utf16RuneCount(const __wchar_t *s, size_t nElem)
inline size_t uiprivUTF16RuneCount(const __wchar_t *s, size_t nElem)
{
return utf16RuneCount(reinterpret_cast<const uint16_t *>(s), nElem);
return uiprivUTF16RuneCount(reinterpret_cast<const uint16_t *>(s), nElem);
}
inline size_t utf16UTF8Count(const __wchar_t *s, size_t nElem)
inline size_t uiprivUTF16UTF8Count(const __wchar_t *s, size_t nElem)
{
return utf16UTF8Count(reinterpret_cast<const uint16_t *>(s), nElem);
return uiprivUTF16UTF8Count(reinterpret_cast<const uint16_t *>(s), nElem);
}
#endif

View File

@ -12,12 +12,12 @@ WCHAR *toUTF16(const char *str)
if (*str == '\0') // empty string
return emptyUTF16();
n = utf8UTF16Count(str, 0);
n = uiprivUTF8UTF16Count(str, 0);
wstr = (WCHAR *) uiprivAlloc((n + 1) * sizeof (WCHAR), "WCHAR[]");
wp = wstr;
while (*str) {
str = utf8DecodeRune(str, 0, &rune);
n = utf16EncodeRune(rune, wp);
str = uiprivUTF8DecodeRune(str, 0, &rune);
n = uiprivUTF16EncodeRune(rune, wp);
wp += n;
}
return wstr;
@ -32,12 +32,12 @@ char *toUTF8(const WCHAR *wstr)
if (*wstr == L'\0') // empty string
return emptyUTF8();
n = utf16RuneCount(wstr, 0);
n = uiprivUTF16RuneCount(wstr, 0);
str = (char *) uiprivAlloc((n + 1) * sizeof (char), "char[]");
sp = str;
while (*wstr) {
wstr = utf16DecodeRune(wstr, 0, &rune);
n = utf8EncodeRune(rune, sp);
wstr = uiprivUTF16DecodeRune(wstr, 0, &rune);
n = uiprivUTF8EncodeRune(rune, sp);
sp += n;
}
return str;