Started writing the implementation of uiAttributedString. Updated windows/graphemes.cpp for this new implementation. Also fixed up a bunch of subtle errors and other issues with windows/graphemes.cpp.

This commit is contained in:
Pietro Gagliardi 2016-12-03 18:34:06 -05:00
parent 0d5ff432b3
commit 526173bf76
3 changed files with 223 additions and 29 deletions

160
common/attrstr.c Normal file
View File

@ -0,0 +1,160 @@
// 3 december 2016
#include "../ui.h"
#include "uipriv.h"
struct uiAttributedString {
char *s;
size_t len;
// TODO attributes
// indiscriminately keep a UTF-16 copy of the string on all platforms so we can hand this off to the grapheme calculator
// this ensures no one platform has a speed advantage (sorry GTK+)
uint16_t *u16;
size_t u16len;
size_t *u8tou16;
size_t *u16tou8;
// this is lazily created to keep things from getting *too* slow
struct graphemes *graphemes;
};
static void resize(uiAttributedString *s, size_t u8, size_t u16)
{
s->len = u8;
s->s = (char *) uiRealloc(s->s, (s->len + 1) * sizeof (char), "char[] (uiAttributedString)");
s->u8tou16 = (size_t *) uiRealloc(s->u8tou16, (s->len + 1) * sizeof (size_t), "size_t[] (uiAttributedString)");
s->u16len = u16;
s->u16 = (uint16_t *) uiRealloc(s->u16, (s->u16len + 1) * sizeof (uint16_t), "uint16_t[] (uiAttributedString)");
s->u16tou8 = (size_t *) uiRealloc(s->u16tou8, (s->u16len + 1) * sizeof (size_t), "size_t[] (uiAttributedString)");
}
uiAttributedString *uiNewAttributedString(const char *initialString)
{
uiAttributedString *s;
s = uiNew(uiAttributedString);
uiAttributedStringAppendUnattributed(s, initialString);
return s;
}
static void recomputeGraphemes(uiAttributedString *s)
{
if (s->graphemes != NULL)
return;
if (graphemesTakesUTF16()) {
s->graphemes = graphemes(s->u16, s->u16len);
return;
}
s->graphemes = graphemes(s->s, s->len);
}
static void invalidateGraphemes(uiAttributedString *s)
{
if (s->graphemes == NULL)
return;
uiFree(s->graphemes->pointsToGraphemes);
uiFree(s->graphemes->graphemesToPoints);
uiFree(s->graphemes);
s->graphemes = NULL;
}
void uiFreeAttributedString(uiAttributedString *s)
{
invalidateGraphemes(s);
uiFree(s->u16tou8);
uiFree(s->u8tou16);
uiFree(s->u16);
uiFree(s->s);
uiFree(s);
}
const char *uiAttributedStringString(uiAttributedString *s)
{
return s->s;
}
void uiAttributedStringAppendUnattributed(uiAttributedString *s, const char *str)
{
const char *t;
uint32_t rune;
char buf[4];
uint16_t u16buf[2];
size_t n, n16;
size_t old, old;
// first figure out how much we need to grow by
// this includes post-validated UTF-8
t = str;
n = 0;
n16 = 0;
while (*t) {
t = utf8DecodeRune(t, 0, &rune);
n += utf8EncodeRune(rune, buf);
n16 += utf16EncodeRune(rune, buf16);
}
// and resize
old = s->len;
old16 = s->len16;
resize(s, s->len + n, s->u16len + n16);
// and copy
while (*str) {
str = utf8DecodeRune(str, 0, &rune);
n = utf8EncodeRune(rune, buf);
n16 = utf16EncodeRune(rune, buf16);
s->s[old] = buf[0];
s->u8tou16[old] = old16;
if (n > 1) {
s->s[old + 1] = buf[1];
s->u8tou16[old + 1] = old16;
}
if (n > 2) {
s->s[old + 2] = buf[2];
s->u8tou16[old + 2] = old16;
}
if (n > 3) {
s->s[old + 3] = buf[3];
s->u8tou16[old + 3] = old16;
}
s->u16[old16] = buf16[0];
s->u16tou8[old16] = old;
if (n16 > 1) {
s->u16[old16 + 1] = buf16[1];
s->u16tou8[old16 + 1] = old;
}
old += n;
old16 += n16;
}
// and have an index for the end of the string
s->u8tou16[old] = old16;
s->u16tou8[old16] = old;
invalidateGraphemes(s);
}
// TODO figure out if we should count the grapheme past the end
size_t uiAttributedStringNumGraphemes(uiAttributedString *s)
{
recomputeGraphemes(s);
return s->graphemes->len;
}
size_t uiAttributedStringByteIndexToGrapheme(uiAttributedString *s, size_t pos)
{
recomputeGraphemes(s);
if (graphemesTakesUTF16())
pos = s->u8tou16[pos];
return s->graphemes->pointsToGraphemes[pos];
}
size_t uiAttributedStringGraphemeToByteIndex(uiAttributedString *s, size_t pos)
{
recomputeGraphemes(s);
pos = s->graphemes->graphemesToPoints[pos];
if (graphemesTakesUTF16())
pos = s->u16tou8[pos];
return pos;
}

View File

@ -1,6 +1,4 @@
// 6 april 2015
// TODO can extern "C"s nest?
#include "utf.h"
#ifdef __cplusplus
extern "C" {
@ -8,6 +6,7 @@ extern "C" {
#include <stdarg.h>
#include "controlsigs.h"
#include "utf.h"
extern uiInitOptions options;
@ -56,6 +55,15 @@ extern void fallbackSkew(uiDrawMatrix *, double, double, double, double);
extern void scaleCenter(double, double, double *, double *);
extern void fallbackTransformSize(uiDrawMatrix *, double *, double *);
// for attrstr.c
struct graphemes {
size_t len;
size_t *pointsToGraphemes;
size_t *graphemesToPoints;
};
extern int graphemesTakesUTF16(void);
extern struct graphemes *graphemes(void *s, size_t len);
#ifdef __cplusplus
}
#endif

View File

@ -3,9 +3,14 @@
// We could use CharNext() to generate grapheme cluster boundaries, but it doesn't handle surrogate pairs properly (see http://archives.miloush.net/michkap/archive/2008/12/16/9223301.html).
// So let's use Uniscribe (see http://archives.miloush.net/michkap/archive/2005/01/14/352802.html)
// See also http://www.catch22.net/tuts/uniscribe-mysteries and http://www.catch22.net/tuts/keyboard-navigation for more details.
// See also http://www.catch22.net/tuts/uniscribe-mysteries, http://www.catch22.net/tuts/keyboard-navigation, and https://maxradi.us/documents/uniscribe/ for more details.
static HRESULT itemize(WCHAR *msg, size_t len, SCRIPT_ITEM **out, int *outn)
int graphemesTakesUTF16(void)
{
return 1;
}
static HRESULT itemize(WCHAR *s, size_t len, SCRIPT_ITEM **out, int *outn)
{
SCRIPT_CONTROL sc;
SCRIPT_STATE ss;
@ -20,8 +25,8 @@ static HRESULT itemize(WCHAR *msg, size_t len, SCRIPT_ITEM **out, int *outn)
maxItems = len + 2;
for (;;) {
items = new SCRIPT_ITEM[maxItems];
hr = ScriptItemize(msg, len,
items = new SCRIPT_ITEM[maxItems + 1];
hr = ScriptItemize(s, len,
maxItems,
&sc, &ss,
items, &n);
@ -39,42 +44,63 @@ static HRESULT itemize(WCHAR *msg, size_t len, SCRIPT_ITEM **out, int *outn)
return S_OK;
}
size_t *graphemes(WCHAR *msg)
struct graphemes *graphemes(void *s, size_t len)
{
size_t len;
struct graphemes *g;
WCHAR *str = (WCHAR *) s;
SCRIPT_ITEM *items;
int i, n;
size_t *out;
size_t *op;
SCRIPT_LOGATTR *logattr;
int j, nn;
int nItems;
int curItemIndex;
int nCharsInCurItem;
size_t *pPTG, *pGTP;
HRESULT hr;
len = wcslen(msg);
hr = itemize(msg, len, &items, &n);
g = uiNew(struct graphemes);
hr = itemize(str, len, &items, &n);
if (hr != S_OK)
logHRESULT(L"error itemizing string for finding grapheme cluster boundaries", hr);
g->len = nItems;
g->pointsToGraphemes = (size_t *) uiAlloc((len + 1) * sizeof (size_t), "size_t[] (graphemes)");
// note that there are actually nItems + 1 elements in items
// items[nItems] is the grapheme one past the end
g->graphemesToPoints = (size_t *) uiAlloc((g->len + 1) * sizeof (size_t), "size_t[] (graphemes)");
// should be enough; 2 more just to be safe
out = (size_t *) uiAlloc((len + 2) * sizeof (size_t), "size_t[]");
op = out;
pPTG = g->pointsToGraphemes;
pGTP = g->graphemesToPoints;
for (curItemIndex = 0; curItemIndex < nItems; curItemIndex++) {
SCRIPT_ITEM *curItem, *nextItem;
SCRIPT_LOGATTR *logattr;
size_t *curGTP;
// note that there are actually n + 1 elements in items
for (i = 0; i < n; i++) {
nn = items[i + 1].iCharPos - items[i].iCharPos;
logattr = new SCRIPT_LOGATTR[nn];
hr = ScriptBreak(msg + items[i].iCharPos, nn,
&(items[i].a), logattr);
curItem = items + curItemIndex;
nextItem = curItem + 1;
nCharsInCurItem = nextItem->iCharPos - curItem->iCharPos;
logattr = new SCRIPT_LOGATTR[nCharsInCurItem];
hr = ScriptBreak(str + curItem->iCharPos, nCharsInCurItem,
&(curItem->a), logattr);
if (hr != S_OK)
logHRESULT(L"error breaking string for finding grapheme cluster boundaries", hr);
for (j = 0; j < nn; j++)
if (logattr[j].fCharStop != 0)
*op++ = items[i].iCharPos + j;
// TODO can we merge these loops somehow?
curGTP = pGTP;
for (i = 0; i < nCharsInCurItem; i++)
if (logattr[i].fCharStop != 0)
*pGTP++ = curItem->iCharPos + i;
for (i = 0; i < nCharsInCurItem; i++) {
*pPTG++ = curGTP - g->graphemesToPoints;
if (logattr[i].fCharStop != 0)
curGTP++;
}
delete[] logattr;
}
// and handle the last item for the end of the string
*op++ = items[i].iCharPos;
*pGTP++ = items[nItems].iCharPos;
*pPTG++ = pGTP - g->graphemesToPoints;
delete[] items;
return out;
return g;
}