From b45e5f4de2fba1d0ed189529095bcd275da94738 Mon Sep 17 00:00:00 2001 From: Pietro Gagliardi Date: Sun, 4 Dec 2016 16:02:56 -0500 Subject: [PATCH] Extended attrstr.c. Much more complete API now. --- common/attrstr.c | 150 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 138 insertions(+), 12 deletions(-) diff --git a/common/attrstr.c b/common/attrstr.c index 03a1dadc..98e8e90a 100644 --- a/common/attrstr.c +++ b/common/attrstr.c @@ -39,6 +39,7 @@ uiAttributedString *uiNewAttributedString(const char *initialString) return s; } +// TODO make sure that all implementations of graphemes() work fine with empty strings; in particular, the Windows one might not static void recomputeGraphemes(uiAttributedString *s) { if (s->graphemes != NULL) @@ -75,31 +76,95 @@ const char *uiAttributedStringString(uiAttributedString *s) return s->s; } +size_t uiAttributedStringLen(uiAttributedString *s) +{ + return s->len; +} + +static void u8u16len(const char *str, size_t *n8, size_t *n16) +{ + uint32_t rune; + char buf[4]; + uint16_t buf16[2]; + + *n8 = 0; + *n16 = 0; + while (*str) { + str = utf8DecodeRune(str, 0, &rune); + *n8 += utf8EncodeRune(rune, buf); + *n16 += utf16EncodeRune(rune, buf16); + } +} + void uiAttributedStringAppendUnattributed(uiAttributedString *s, const char *str) { - const char *t; + uiAttributedStringInsertAtUnattributed(s, str, s->len); +} + +// this works (and returns true, which is what we want) at s->len too because s->s[s->len] is always going to be 0 due to us allocating s->len + 1 bytes and because uiRealloc() always zero-fills allocated memory +static int onCodepointBoundary(uiAttributedString *s, size_t at) +{ + uint8_t c; + + // for uiNewAttributedString() + if (s->s == NULL && at == 0) + return 1; + c = (uint8_t) (s->s[at]); + return c < 0x80 || c >= 0xC0; +} + +// TODO note that at must be on a codeoint boundary +void uiAttributedStringInsertAtUnattributed(uiAttributedString *s, const char *str, size_t at) +{ uint32_t rune; char buf[4]; uint16_t u16buf[2]; size_t n, n16; - size_t old, old; + size_t old, old16; + size_t oldlen, old16len; + size_t at16; + size_t i; + + if (!onCodepointBoundary(s, at)) { + // TODO + } + + at16 = s->u8tou16[at]; + + // do this first to reclaim memory + invalidateGraphemes(s); // first figure out how much we need to grow by // this includes post-validated UTF-8 - t = str; - n = 0; - n16 = 0; - while (*t) { - t = utf8DecodeRune(t, 0, &rune); - n += utf8EncodeRune(rune, buf); - n16 += utf16EncodeRune(rune, buf16); - } + u8u16len(str, &n, &n16); // and resize - old = s->len; - old16 = s->len16; + old = at; + old16 = at16; + oldlen = s->len; + old16len = s->u16len; resize(s, s->len + n, s->u16len + n16); + // move existing characters out of the way + // note the use of memmove(): https://twitter.com/rob_pike/status/737797688217894912 + memmove( + s->s + at + n8, + s->s + at, + (oldlen - at) * sizeof (char)); + memmove( + s->u16 + at16 + n16, + s->u16 + at16, + (old16len - at16) * sizeof (uint16_t)); + // note the + 1 for these; we want to copy the terminating null too + memmove( + s->u8tou16 + at + n8, + s->u8tou16 + at, + (oldlen - at + 1) * sizeof (size_t)); + memmove( + s->u16tou8 + at16 + n16, + s->u16tou8 + at16, + (old16len - at16 + 1) * sizeof (size_t)); + // and copy while (*str) { str = utf8DecodeRune(str, 0, &rune); @@ -129,10 +194,71 @@ void uiAttributedStringAppendUnattributed(uiAttributedString *s, const char *str old16 += n16; } // and have an index for the end of the string + // TODO is this done by the below? s->u8tou16[old] = old16; s->u16tou8[old16] = old; + // and finally adjust the prior values in the conversion tables + // use <= so the terminating 0 gets updated too + for (i = 0; i <= oldlen - at; i++) + s->u8tou16[at + n + i] += n16; + for (i = 0; i <= old16len - at16; i++) + s->u16tou8[at16 + n16 + i] += n; +} + +// TODO document that end is the first index that will be maintained +void uiAttributedStringDelete(uiAttributedString *s, size_t start, size_t end) +{ + size_t start16, end16; + size_t count, count16; + size_t i; + + if (!onCodepointBoundary(s, start)) { + // TODO + } + if (!onCodepointBoundary(s, end)) { + // TODO + } + + count = end - start; + start16 = s->u8tou16[start]; + end16 = s->u8tou16[end]; + count16 = end16 - start16; + invalidateGraphemes(s); + + // overwrite old characters + memmove( + s->s + start, + s->s + end, + (oldlen - end) * sizeof (char)); + memmove( + s->u16 + start16, + s->u16 + end16, + (old16len - end16) * sizeof (uint16_t)); + // note the + 1 for these; we want to copy the terminating null too + memmove( + s->u8tou16 + start, + s->u8tou16 + end, + (oldlen - end + 1) * sizeof (size_t)); + memmove( + s->u16tou8 + start16, + s->u16tou8 + end16, + (old16len - end16 + 1) * sizeof (size_t)); + + // update the conversion tables + // note the use of <= to include the null terminator + for (i = 0; i <= count; i++) + s->u8tou16[start + i] -= count16; + for (i = 0; i <= count16; i++) + s->u16tou8[start16 + i] -= count; + + // null-terminate the string + s->s[start + count] = 0; + s->u16[start16 + count16] = 0; + + // and finally resize + resize(s, start + count, start16 + count16); } // TODO figure out if we should count the grapheme past the end