Started writing the implementation of uiAttributedString. Updated windows/graphemes.cpp for this new implementation. Also fixed up a bunch of subtle errors and other issues with windows/graphemes.cpp.
This commit is contained in:
parent
0d5ff432b3
commit
526173bf76
|
@ -0,0 +1,160 @@
|
|||
// 3 december 2016
|
||||
#include "../ui.h"
|
||||
#include "uipriv.h"
|
||||
|
||||
struct uiAttributedString {
|
||||
char *s;
|
||||
size_t len;
|
||||
|
||||
// TODO attributes
|
||||
|
||||
// indiscriminately keep a UTF-16 copy of the string on all platforms so we can hand this off to the grapheme calculator
|
||||
// this ensures no one platform has a speed advantage (sorry GTK+)
|
||||
uint16_t *u16;
|
||||
size_t u16len;
|
||||
|
||||
size_t *u8tou16;
|
||||
size_t *u16tou8;
|
||||
|
||||
// this is lazily created to keep things from getting *too* slow
|
||||
struct graphemes *graphemes;
|
||||
};
|
||||
|
||||
static void resize(uiAttributedString *s, size_t u8, size_t u16)
|
||||
{
|
||||
s->len = u8;
|
||||
s->s = (char *) uiRealloc(s->s, (s->len + 1) * sizeof (char), "char[] (uiAttributedString)");
|
||||
s->u8tou16 = (size_t *) uiRealloc(s->u8tou16, (s->len + 1) * sizeof (size_t), "size_t[] (uiAttributedString)");
|
||||
s->u16len = u16;
|
||||
s->u16 = (uint16_t *) uiRealloc(s->u16, (s->u16len + 1) * sizeof (uint16_t), "uint16_t[] (uiAttributedString)");
|
||||
s->u16tou8 = (size_t *) uiRealloc(s->u16tou8, (s->u16len + 1) * sizeof (size_t), "size_t[] (uiAttributedString)");
|
||||
}
|
||||
|
||||
uiAttributedString *uiNewAttributedString(const char *initialString)
|
||||
{
|
||||
uiAttributedString *s;
|
||||
|
||||
s = uiNew(uiAttributedString);
|
||||
uiAttributedStringAppendUnattributed(s, initialString);
|
||||
return s;
|
||||
}
|
||||
|
||||
static void recomputeGraphemes(uiAttributedString *s)
|
||||
{
|
||||
if (s->graphemes != NULL)
|
||||
return;
|
||||
if (graphemesTakesUTF16()) {
|
||||
s->graphemes = graphemes(s->u16, s->u16len);
|
||||
return;
|
||||
}
|
||||
s->graphemes = graphemes(s->s, s->len);
|
||||
}
|
||||
|
||||
static void invalidateGraphemes(uiAttributedString *s)
|
||||
{
|
||||
if (s->graphemes == NULL)
|
||||
return;
|
||||
uiFree(s->graphemes->pointsToGraphemes);
|
||||
uiFree(s->graphemes->graphemesToPoints);
|
||||
uiFree(s->graphemes);
|
||||
s->graphemes = NULL;
|
||||
}
|
||||
|
||||
void uiFreeAttributedString(uiAttributedString *s)
|
||||
{
|
||||
invalidateGraphemes(s);
|
||||
uiFree(s->u16tou8);
|
||||
uiFree(s->u8tou16);
|
||||
uiFree(s->u16);
|
||||
uiFree(s->s);
|
||||
uiFree(s);
|
||||
}
|
||||
|
||||
const char *uiAttributedStringString(uiAttributedString *s)
|
||||
{
|
||||
return s->s;
|
||||
}
|
||||
|
||||
void uiAttributedStringAppendUnattributed(uiAttributedString *s, const char *str)
|
||||
{
|
||||
const char *t;
|
||||
uint32_t rune;
|
||||
char buf[4];
|
||||
uint16_t u16buf[2];
|
||||
size_t n, n16;
|
||||
size_t old, old;
|
||||
|
||||
// first figure out how much we need to grow by
|
||||
// this includes post-validated UTF-8
|
||||
t = str;
|
||||
n = 0;
|
||||
n16 = 0;
|
||||
while (*t) {
|
||||
t = utf8DecodeRune(t, 0, &rune);
|
||||
n += utf8EncodeRune(rune, buf);
|
||||
n16 += utf16EncodeRune(rune, buf16);
|
||||
}
|
||||
|
||||
// and resize
|
||||
old = s->len;
|
||||
old16 = s->len16;
|
||||
resize(s, s->len + n, s->u16len + n16);
|
||||
|
||||
// and copy
|
||||
while (*str) {
|
||||
str = utf8DecodeRune(str, 0, &rune);
|
||||
n = utf8EncodeRune(rune, buf);
|
||||
n16 = utf16EncodeRune(rune, buf16);
|
||||
s->s[old] = buf[0];
|
||||
s->u8tou16[old] = old16;
|
||||
if (n > 1) {
|
||||
s->s[old + 1] = buf[1];
|
||||
s->u8tou16[old + 1] = old16;
|
||||
}
|
||||
if (n > 2) {
|
||||
s->s[old + 2] = buf[2];
|
||||
s->u8tou16[old + 2] = old16;
|
||||
}
|
||||
if (n > 3) {
|
||||
s->s[old + 3] = buf[3];
|
||||
s->u8tou16[old + 3] = old16;
|
||||
}
|
||||
s->u16[old16] = buf16[0];
|
||||
s->u16tou8[old16] = old;
|
||||
if (n16 > 1) {
|
||||
s->u16[old16 + 1] = buf16[1];
|
||||
s->u16tou8[old16 + 1] = old;
|
||||
}
|
||||
old += n;
|
||||
old16 += n16;
|
||||
}
|
||||
// and have an index for the end of the string
|
||||
s->u8tou16[old] = old16;
|
||||
s->u16tou8[old16] = old;
|
||||
|
||||
invalidateGraphemes(s);
|
||||
}
|
||||
|
||||
// TODO figure out if we should count the grapheme past the end
|
||||
size_t uiAttributedStringNumGraphemes(uiAttributedString *s)
|
||||
{
|
||||
recomputeGraphemes(s);
|
||||
return s->graphemes->len;
|
||||
}
|
||||
|
||||
size_t uiAttributedStringByteIndexToGrapheme(uiAttributedString *s, size_t pos)
|
||||
{
|
||||
recomputeGraphemes(s);
|
||||
if (graphemesTakesUTF16())
|
||||
pos = s->u8tou16[pos];
|
||||
return s->graphemes->pointsToGraphemes[pos];
|
||||
}
|
||||
|
||||
size_t uiAttributedStringGraphemeToByteIndex(uiAttributedString *s, size_t pos)
|
||||
{
|
||||
recomputeGraphemes(s);
|
||||
pos = s->graphemes->graphemesToPoints[pos];
|
||||
if (graphemesTakesUTF16())
|
||||
pos = s->u16tou8[pos];
|
||||
return pos;
|
||||
}
|
|
@ -1,6 +1,4 @@
|
|||
// 6 april 2015
|
||||
// TODO can extern "C"s nest?
|
||||
#include "utf.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
@ -8,6 +6,7 @@ extern "C" {
|
|||
|
||||
#include <stdarg.h>
|
||||
#include "controlsigs.h"
|
||||
#include "utf.h"
|
||||
|
||||
extern uiInitOptions options;
|
||||
|
||||
|
@ -56,6 +55,15 @@ extern void fallbackSkew(uiDrawMatrix *, double, double, double, double);
|
|||
extern void scaleCenter(double, double, double *, double *);
|
||||
extern void fallbackTransformSize(uiDrawMatrix *, double *, double *);
|
||||
|
||||
// for attrstr.c
|
||||
struct graphemes {
|
||||
size_t len;
|
||||
size_t *pointsToGraphemes;
|
||||
size_t *graphemesToPoints;
|
||||
};
|
||||
extern int graphemesTakesUTF16(void);
|
||||
extern struct graphemes *graphemes(void *s, size_t len);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -3,9 +3,14 @@
|
|||
|
||||
// We could use CharNext() to generate grapheme cluster boundaries, but it doesn't handle surrogate pairs properly (see http://archives.miloush.net/michkap/archive/2008/12/16/9223301.html).
|
||||
// So let's use Uniscribe (see http://archives.miloush.net/michkap/archive/2005/01/14/352802.html)
|
||||
// See also http://www.catch22.net/tuts/uniscribe-mysteries and http://www.catch22.net/tuts/keyboard-navigation for more details.
|
||||
// See also http://www.catch22.net/tuts/uniscribe-mysteries, http://www.catch22.net/tuts/keyboard-navigation, and https://maxradi.us/documents/uniscribe/ for more details.
|
||||
|
||||
static HRESULT itemize(WCHAR *msg, size_t len, SCRIPT_ITEM **out, int *outn)
|
||||
int graphemesTakesUTF16(void)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
static HRESULT itemize(WCHAR *s, size_t len, SCRIPT_ITEM **out, int *outn)
|
||||
{
|
||||
SCRIPT_CONTROL sc;
|
||||
SCRIPT_STATE ss;
|
||||
|
@ -20,8 +25,8 @@ static HRESULT itemize(WCHAR *msg, size_t len, SCRIPT_ITEM **out, int *outn)
|
|||
|
||||
maxItems = len + 2;
|
||||
for (;;) {
|
||||
items = new SCRIPT_ITEM[maxItems];
|
||||
hr = ScriptItemize(msg, len,
|
||||
items = new SCRIPT_ITEM[maxItems + 1];
|
||||
hr = ScriptItemize(s, len,
|
||||
maxItems,
|
||||
&sc, &ss,
|
||||
items, &n);
|
||||
|
@ -39,42 +44,63 @@ static HRESULT itemize(WCHAR *msg, size_t len, SCRIPT_ITEM **out, int *outn)
|
|||
return S_OK;
|
||||
}
|
||||
|
||||
size_t *graphemes(WCHAR *msg)
|
||||
struct graphemes *graphemes(void *s, size_t len)
|
||||
{
|
||||
size_t len;
|
||||
struct graphemes *g;
|
||||
WCHAR *str = (WCHAR *) s;
|
||||
SCRIPT_ITEM *items;
|
||||
int i, n;
|
||||
size_t *out;
|
||||
size_t *op;
|
||||
SCRIPT_LOGATTR *logattr;
|
||||
int j, nn;
|
||||
int nItems;
|
||||
int curItemIndex;
|
||||
int nCharsInCurItem;
|
||||
size_t *pPTG, *pGTP;
|
||||
HRESULT hr;
|
||||
|
||||
len = wcslen(msg);
|
||||
hr = itemize(msg, len, &items, &n);
|
||||
g = uiNew(struct graphemes);
|
||||
|
||||
hr = itemize(str, len, &items, &n);
|
||||
if (hr != S_OK)
|
||||
logHRESULT(L"error itemizing string for finding grapheme cluster boundaries", hr);
|
||||
g->len = nItems;
|
||||
g->pointsToGraphemes = (size_t *) uiAlloc((len + 1) * sizeof (size_t), "size_t[] (graphemes)");
|
||||
// note that there are actually nItems + 1 elements in items
|
||||
// items[nItems] is the grapheme one past the end
|
||||
g->graphemesToPoints = (size_t *) uiAlloc((g->len + 1) * sizeof (size_t), "size_t[] (graphemes)");
|
||||
|
||||
// should be enough; 2 more just to be safe
|
||||
out = (size_t *) uiAlloc((len + 2) * sizeof (size_t), "size_t[]");
|
||||
op = out;
|
||||
pPTG = g->pointsToGraphemes;
|
||||
pGTP = g->graphemesToPoints;
|
||||
for (curItemIndex = 0; curItemIndex < nItems; curItemIndex++) {
|
||||
SCRIPT_ITEM *curItem, *nextItem;
|
||||
SCRIPT_LOGATTR *logattr;
|
||||
size_t *curGTP;
|
||||
|
||||
// note that there are actually n + 1 elements in items
|
||||
for (i = 0; i < n; i++) {
|
||||
nn = items[i + 1].iCharPos - items[i].iCharPos;
|
||||
logattr = new SCRIPT_LOGATTR[nn];
|
||||
hr = ScriptBreak(msg + items[i].iCharPos, nn,
|
||||
&(items[i].a), logattr);
|
||||
curItem = items + curItemIndex;
|
||||
nextItem = curItem + 1;
|
||||
|
||||
nCharsInCurItem = nextItem->iCharPos - curItem->iCharPos;
|
||||
|
||||
logattr = new SCRIPT_LOGATTR[nCharsInCurItem];
|
||||
hr = ScriptBreak(str + curItem->iCharPos, nCharsInCurItem,
|
||||
&(curItem->a), logattr);
|
||||
if (hr != S_OK)
|
||||
logHRESULT(L"error breaking string for finding grapheme cluster boundaries", hr);
|
||||
for (j = 0; j < nn; j++)
|
||||
if (logattr[j].fCharStop != 0)
|
||||
*op++ = items[i].iCharPos + j;
|
||||
|
||||
// TODO can we merge these loops somehow?
|
||||
curGTP = pGTP;
|
||||
for (i = 0; i < nCharsInCurItem; i++)
|
||||
if (logattr[i].fCharStop != 0)
|
||||
*pGTP++ = curItem->iCharPos + i;
|
||||
for (i = 0; i < nCharsInCurItem; i++) {
|
||||
*pPTG++ = curGTP - g->graphemesToPoints;
|
||||
if (logattr[i].fCharStop != 0)
|
||||
curGTP++;
|
||||
}
|
||||
|
||||
delete[] logattr;
|
||||
}
|
||||
// and handle the last item for the end of the string
|
||||
*op++ = items[i].iCharPos;
|
||||
*pGTP++ = items[nItems].iCharPos;
|
||||
*pPTG++ = pGTP - g->graphemesToPoints;
|
||||
|
||||
delete[] items;
|
||||
return out;
|
||||
return g;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue