Started writing the implementation of uiAttributedString. Updated windows/graphemes.cpp for this new implementation. Also fixed up a bunch of subtle errors and other issues with windows/graphemes.cpp.
This commit is contained in:
parent
0d5ff432b3
commit
526173bf76
|
@ -0,0 +1,160 @@
|
||||||
|
// 3 december 2016
|
||||||
|
#include "../ui.h"
|
||||||
|
#include "uipriv.h"
|
||||||
|
|
||||||
|
struct uiAttributedString {
|
||||||
|
char *s;
|
||||||
|
size_t len;
|
||||||
|
|
||||||
|
// TODO attributes
|
||||||
|
|
||||||
|
// indiscriminately keep a UTF-16 copy of the string on all platforms so we can hand this off to the grapheme calculator
|
||||||
|
// this ensures no one platform has a speed advantage (sorry GTK+)
|
||||||
|
uint16_t *u16;
|
||||||
|
size_t u16len;
|
||||||
|
|
||||||
|
size_t *u8tou16;
|
||||||
|
size_t *u16tou8;
|
||||||
|
|
||||||
|
// this is lazily created to keep things from getting *too* slow
|
||||||
|
struct graphemes *graphemes;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void resize(uiAttributedString *s, size_t u8, size_t u16)
|
||||||
|
{
|
||||||
|
s->len = u8;
|
||||||
|
s->s = (char *) uiRealloc(s->s, (s->len + 1) * sizeof (char), "char[] (uiAttributedString)");
|
||||||
|
s->u8tou16 = (size_t *) uiRealloc(s->u8tou16, (s->len + 1) * sizeof (size_t), "size_t[] (uiAttributedString)");
|
||||||
|
s->u16len = u16;
|
||||||
|
s->u16 = (uint16_t *) uiRealloc(s->u16, (s->u16len + 1) * sizeof (uint16_t), "uint16_t[] (uiAttributedString)");
|
||||||
|
s->u16tou8 = (size_t *) uiRealloc(s->u16tou8, (s->u16len + 1) * sizeof (size_t), "size_t[] (uiAttributedString)");
|
||||||
|
}
|
||||||
|
|
||||||
|
uiAttributedString *uiNewAttributedString(const char *initialString)
|
||||||
|
{
|
||||||
|
uiAttributedString *s;
|
||||||
|
|
||||||
|
s = uiNew(uiAttributedString);
|
||||||
|
uiAttributedStringAppendUnattributed(s, initialString);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void recomputeGraphemes(uiAttributedString *s)
|
||||||
|
{
|
||||||
|
if (s->graphemes != NULL)
|
||||||
|
return;
|
||||||
|
if (graphemesTakesUTF16()) {
|
||||||
|
s->graphemes = graphemes(s->u16, s->u16len);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
s->graphemes = graphemes(s->s, s->len);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void invalidateGraphemes(uiAttributedString *s)
|
||||||
|
{
|
||||||
|
if (s->graphemes == NULL)
|
||||||
|
return;
|
||||||
|
uiFree(s->graphemes->pointsToGraphemes);
|
||||||
|
uiFree(s->graphemes->graphemesToPoints);
|
||||||
|
uiFree(s->graphemes);
|
||||||
|
s->graphemes = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void uiFreeAttributedString(uiAttributedString *s)
|
||||||
|
{
|
||||||
|
invalidateGraphemes(s);
|
||||||
|
uiFree(s->u16tou8);
|
||||||
|
uiFree(s->u8tou16);
|
||||||
|
uiFree(s->u16);
|
||||||
|
uiFree(s->s);
|
||||||
|
uiFree(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *uiAttributedStringString(uiAttributedString *s)
|
||||||
|
{
|
||||||
|
return s->s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void uiAttributedStringAppendUnattributed(uiAttributedString *s, const char *str)
|
||||||
|
{
|
||||||
|
const char *t;
|
||||||
|
uint32_t rune;
|
||||||
|
char buf[4];
|
||||||
|
uint16_t u16buf[2];
|
||||||
|
size_t n, n16;
|
||||||
|
size_t old, old;
|
||||||
|
|
||||||
|
// first figure out how much we need to grow by
|
||||||
|
// this includes post-validated UTF-8
|
||||||
|
t = str;
|
||||||
|
n = 0;
|
||||||
|
n16 = 0;
|
||||||
|
while (*t) {
|
||||||
|
t = utf8DecodeRune(t, 0, &rune);
|
||||||
|
n += utf8EncodeRune(rune, buf);
|
||||||
|
n16 += utf16EncodeRune(rune, buf16);
|
||||||
|
}
|
||||||
|
|
||||||
|
// and resize
|
||||||
|
old = s->len;
|
||||||
|
old16 = s->len16;
|
||||||
|
resize(s, s->len + n, s->u16len + n16);
|
||||||
|
|
||||||
|
// and copy
|
||||||
|
while (*str) {
|
||||||
|
str = utf8DecodeRune(str, 0, &rune);
|
||||||
|
n = utf8EncodeRune(rune, buf);
|
||||||
|
n16 = utf16EncodeRune(rune, buf16);
|
||||||
|
s->s[old] = buf[0];
|
||||||
|
s->u8tou16[old] = old16;
|
||||||
|
if (n > 1) {
|
||||||
|
s->s[old + 1] = buf[1];
|
||||||
|
s->u8tou16[old + 1] = old16;
|
||||||
|
}
|
||||||
|
if (n > 2) {
|
||||||
|
s->s[old + 2] = buf[2];
|
||||||
|
s->u8tou16[old + 2] = old16;
|
||||||
|
}
|
||||||
|
if (n > 3) {
|
||||||
|
s->s[old + 3] = buf[3];
|
||||||
|
s->u8tou16[old + 3] = old16;
|
||||||
|
}
|
||||||
|
s->u16[old16] = buf16[0];
|
||||||
|
s->u16tou8[old16] = old;
|
||||||
|
if (n16 > 1) {
|
||||||
|
s->u16[old16 + 1] = buf16[1];
|
||||||
|
s->u16tou8[old16 + 1] = old;
|
||||||
|
}
|
||||||
|
old += n;
|
||||||
|
old16 += n16;
|
||||||
|
}
|
||||||
|
// and have an index for the end of the string
|
||||||
|
s->u8tou16[old] = old16;
|
||||||
|
s->u16tou8[old16] = old;
|
||||||
|
|
||||||
|
invalidateGraphemes(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO figure out if we should count the grapheme past the end
|
||||||
|
size_t uiAttributedStringNumGraphemes(uiAttributedString *s)
|
||||||
|
{
|
||||||
|
recomputeGraphemes(s);
|
||||||
|
return s->graphemes->len;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t uiAttributedStringByteIndexToGrapheme(uiAttributedString *s, size_t pos)
|
||||||
|
{
|
||||||
|
recomputeGraphemes(s);
|
||||||
|
if (graphemesTakesUTF16())
|
||||||
|
pos = s->u8tou16[pos];
|
||||||
|
return s->graphemes->pointsToGraphemes[pos];
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t uiAttributedStringGraphemeToByteIndex(uiAttributedString *s, size_t pos)
|
||||||
|
{
|
||||||
|
recomputeGraphemes(s);
|
||||||
|
pos = s->graphemes->graphemesToPoints[pos];
|
||||||
|
if (graphemesTakesUTF16())
|
||||||
|
pos = s->u16tou8[pos];
|
||||||
|
return pos;
|
||||||
|
}
|
|
@ -1,6 +1,4 @@
|
||||||
// 6 april 2015
|
// 6 april 2015
|
||||||
// TODO can extern "C"s nest?
|
|
||||||
#include "utf.h"
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
@ -8,6 +6,7 @@ extern "C" {
|
||||||
|
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
#include "controlsigs.h"
|
#include "controlsigs.h"
|
||||||
|
#include "utf.h"
|
||||||
|
|
||||||
extern uiInitOptions options;
|
extern uiInitOptions options;
|
||||||
|
|
||||||
|
@ -56,6 +55,15 @@ extern void fallbackSkew(uiDrawMatrix *, double, double, double, double);
|
||||||
extern void scaleCenter(double, double, double *, double *);
|
extern void scaleCenter(double, double, double *, double *);
|
||||||
extern void fallbackTransformSize(uiDrawMatrix *, double *, double *);
|
extern void fallbackTransformSize(uiDrawMatrix *, double *, double *);
|
||||||
|
|
||||||
|
// for attrstr.c
|
||||||
|
struct graphemes {
|
||||||
|
size_t len;
|
||||||
|
size_t *pointsToGraphemes;
|
||||||
|
size_t *graphemesToPoints;
|
||||||
|
};
|
||||||
|
extern int graphemesTakesUTF16(void);
|
||||||
|
extern struct graphemes *graphemes(void *s, size_t len);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -3,9 +3,14 @@
|
||||||
|
|
||||||
// We could use CharNext() to generate grapheme cluster boundaries, but it doesn't handle surrogate pairs properly (see http://archives.miloush.net/michkap/archive/2008/12/16/9223301.html).
|
// We could use CharNext() to generate grapheme cluster boundaries, but it doesn't handle surrogate pairs properly (see http://archives.miloush.net/michkap/archive/2008/12/16/9223301.html).
|
||||||
// So let's use Uniscribe (see http://archives.miloush.net/michkap/archive/2005/01/14/352802.html)
|
// So let's use Uniscribe (see http://archives.miloush.net/michkap/archive/2005/01/14/352802.html)
|
||||||
// See also http://www.catch22.net/tuts/uniscribe-mysteries and http://www.catch22.net/tuts/keyboard-navigation for more details.
|
// See also http://www.catch22.net/tuts/uniscribe-mysteries, http://www.catch22.net/tuts/keyboard-navigation, and https://maxradi.us/documents/uniscribe/ for more details.
|
||||||
|
|
||||||
static HRESULT itemize(WCHAR *msg, size_t len, SCRIPT_ITEM **out, int *outn)
|
int graphemesTakesUTF16(void)
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static HRESULT itemize(WCHAR *s, size_t len, SCRIPT_ITEM **out, int *outn)
|
||||||
{
|
{
|
||||||
SCRIPT_CONTROL sc;
|
SCRIPT_CONTROL sc;
|
||||||
SCRIPT_STATE ss;
|
SCRIPT_STATE ss;
|
||||||
|
@ -20,8 +25,8 @@ static HRESULT itemize(WCHAR *msg, size_t len, SCRIPT_ITEM **out, int *outn)
|
||||||
|
|
||||||
maxItems = len + 2;
|
maxItems = len + 2;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
items = new SCRIPT_ITEM[maxItems];
|
items = new SCRIPT_ITEM[maxItems + 1];
|
||||||
hr = ScriptItemize(msg, len,
|
hr = ScriptItemize(s, len,
|
||||||
maxItems,
|
maxItems,
|
||||||
&sc, &ss,
|
&sc, &ss,
|
||||||
items, &n);
|
items, &n);
|
||||||
|
@ -39,42 +44,63 @@ static HRESULT itemize(WCHAR *msg, size_t len, SCRIPT_ITEM **out, int *outn)
|
||||||
return S_OK;
|
return S_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t *graphemes(WCHAR *msg)
|
struct graphemes *graphemes(void *s, size_t len)
|
||||||
{
|
{
|
||||||
size_t len;
|
struct graphemes *g;
|
||||||
|
WCHAR *str = (WCHAR *) s;
|
||||||
SCRIPT_ITEM *items;
|
SCRIPT_ITEM *items;
|
||||||
int i, n;
|
int nItems;
|
||||||
size_t *out;
|
int curItemIndex;
|
||||||
size_t *op;
|
int nCharsInCurItem;
|
||||||
SCRIPT_LOGATTR *logattr;
|
size_t *pPTG, *pGTP;
|
||||||
int j, nn;
|
|
||||||
HRESULT hr;
|
HRESULT hr;
|
||||||
|
|
||||||
len = wcslen(msg);
|
g = uiNew(struct graphemes);
|
||||||
hr = itemize(msg, len, &items, &n);
|
|
||||||
|
hr = itemize(str, len, &items, &n);
|
||||||
if (hr != S_OK)
|
if (hr != S_OK)
|
||||||
logHRESULT(L"error itemizing string for finding grapheme cluster boundaries", hr);
|
logHRESULT(L"error itemizing string for finding grapheme cluster boundaries", hr);
|
||||||
|
g->len = nItems;
|
||||||
|
g->pointsToGraphemes = (size_t *) uiAlloc((len + 1) * sizeof (size_t), "size_t[] (graphemes)");
|
||||||
|
// note that there are actually nItems + 1 elements in items
|
||||||
|
// items[nItems] is the grapheme one past the end
|
||||||
|
g->graphemesToPoints = (size_t *) uiAlloc((g->len + 1) * sizeof (size_t), "size_t[] (graphemes)");
|
||||||
|
|
||||||
// should be enough; 2 more just to be safe
|
pPTG = g->pointsToGraphemes;
|
||||||
out = (size_t *) uiAlloc((len + 2) * sizeof (size_t), "size_t[]");
|
pGTP = g->graphemesToPoints;
|
||||||
op = out;
|
for (curItemIndex = 0; curItemIndex < nItems; curItemIndex++) {
|
||||||
|
SCRIPT_ITEM *curItem, *nextItem;
|
||||||
|
SCRIPT_LOGATTR *logattr;
|
||||||
|
size_t *curGTP;
|
||||||
|
|
||||||
// note that there are actually n + 1 elements in items
|
curItem = items + curItemIndex;
|
||||||
for (i = 0; i < n; i++) {
|
nextItem = curItem + 1;
|
||||||
nn = items[i + 1].iCharPos - items[i].iCharPos;
|
|
||||||
logattr = new SCRIPT_LOGATTR[nn];
|
nCharsInCurItem = nextItem->iCharPos - curItem->iCharPos;
|
||||||
hr = ScriptBreak(msg + items[i].iCharPos, nn,
|
|
||||||
&(items[i].a), logattr);
|
logattr = new SCRIPT_LOGATTR[nCharsInCurItem];
|
||||||
|
hr = ScriptBreak(str + curItem->iCharPos, nCharsInCurItem,
|
||||||
|
&(curItem->a), logattr);
|
||||||
if (hr != S_OK)
|
if (hr != S_OK)
|
||||||
logHRESULT(L"error breaking string for finding grapheme cluster boundaries", hr);
|
logHRESULT(L"error breaking string for finding grapheme cluster boundaries", hr);
|
||||||
for (j = 0; j < nn; j++)
|
|
||||||
if (logattr[j].fCharStop != 0)
|
// TODO can we merge these loops somehow?
|
||||||
*op++ = items[i].iCharPos + j;
|
curGTP = pGTP;
|
||||||
|
for (i = 0; i < nCharsInCurItem; i++)
|
||||||
|
if (logattr[i].fCharStop != 0)
|
||||||
|
*pGTP++ = curItem->iCharPos + i;
|
||||||
|
for (i = 0; i < nCharsInCurItem; i++) {
|
||||||
|
*pPTG++ = curGTP - g->graphemesToPoints;
|
||||||
|
if (logattr[i].fCharStop != 0)
|
||||||
|
curGTP++;
|
||||||
|
}
|
||||||
|
|
||||||
delete[] logattr;
|
delete[] logattr;
|
||||||
}
|
}
|
||||||
// and handle the last item for the end of the string
|
// and handle the last item for the end of the string
|
||||||
*op++ = items[i].iCharPos;
|
*pGTP++ = items[nItems].iCharPos;
|
||||||
|
*pPTG++ = pGTP - g->graphemesToPoints;
|
||||||
|
|
||||||
delete[] items;
|
delete[] items;
|
||||||
return out;
|
return g;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue