107 lines
3.0 KiB
C++
107 lines
3.0 KiB
C++
// 25 may 2016
|
|
#include "uipriv_windows.hpp"
|
|
|
|
// We could use CharNext() to generate grapheme cluster boundaries, but it doesn't handle surrogate pairs properly (see http://archives.miloush.net/michkap/archive/2008/12/16/9223301.html).
|
|
// So let's use Uniscribe (see http://archives.miloush.net/michkap/archive/2005/01/14/352802.html)
|
|
// See also http://www.catch22.net/tuts/uniscribe-mysteries, http://www.catch22.net/tuts/keyboard-navigation, and https://maxradi.us/documents/uniscribe/ for more details.
|
|
|
|
int graphemesTakesUTF16(void)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
static HRESULT itemize(WCHAR *s, size_t len, SCRIPT_ITEM **out, int *outn)
|
|
{
|
|
SCRIPT_CONTROL sc;
|
|
SCRIPT_STATE ss;
|
|
SCRIPT_ITEM *items;
|
|
size_t maxItems;
|
|
int n;
|
|
HRESULT hr;
|
|
|
|
// make sure these are zero-initialized to avoid mangling the text
|
|
ZeroMemory(&sc, sizeof (SCRIPT_CONTROL));
|
|
ZeroMemory(&ss, sizeof (SCRIPT_STATE));
|
|
|
|
maxItems = len + 2;
|
|
for (;;) {
|
|
items = new SCRIPT_ITEM[maxItems + 1];
|
|
hr = ScriptItemize(s, len,
|
|
maxItems,
|
|
&sc, &ss,
|
|
items, &n);
|
|
if (hr == S_OK)
|
|
break;
|
|
// otherwise either an error or not enough room
|
|
delete[] items;
|
|
if (hr != E_OUTOFMEMORY)
|
|
return hr;
|
|
maxItems *= 2; // add some more and try again
|
|
}
|
|
|
|
*out = items;
|
|
*outn = n;
|
|
return S_OK;
|
|
}
|
|
|
|
struct graphemes *graphemes(void *s, size_t len)
|
|
{
|
|
struct graphemes *g;
|
|
WCHAR *str = (WCHAR *) s;
|
|
SCRIPT_ITEM *items;
|
|
int nItems;
|
|
int curItemIndex;
|
|
int nCharsInCurItem;
|
|
size_t *pPTG, *pGTP;
|
|
HRESULT hr;
|
|
|
|
g = uiNew(struct graphemes);
|
|
|
|
hr = itemize(str, len, &items, &n);
|
|
if (hr != S_OK)
|
|
logHRESULT(L"error itemizing string for finding grapheme cluster boundaries", hr);
|
|
g->len = nItems;
|
|
g->pointsToGraphemes = (size_t *) uiAlloc((len + 1) * sizeof (size_t), "size_t[] (graphemes)");
|
|
// note that there are actually nItems + 1 elements in items
|
|
// items[nItems] is the grapheme one past the end
|
|
g->graphemesToPoints = (size_t *) uiAlloc((g->len + 1) * sizeof (size_t), "size_t[] (graphemes)");
|
|
|
|
pPTG = g->pointsToGraphemes;
|
|
pGTP = g->graphemesToPoints;
|
|
for (curItemIndex = 0; curItemIndex < nItems; curItemIndex++) {
|
|
SCRIPT_ITEM *curItem, *nextItem;
|
|
SCRIPT_LOGATTR *logattr;
|
|
size_t *curGTP;
|
|
|
|
curItem = items + curItemIndex;
|
|
nextItem = curItem + 1;
|
|
|
|
nCharsInCurItem = nextItem->iCharPos - curItem->iCharPos;
|
|
|
|
logattr = new SCRIPT_LOGATTR[nCharsInCurItem];
|
|
hr = ScriptBreak(str + curItem->iCharPos, nCharsInCurItem,
|
|
&(curItem->a), logattr);
|
|
if (hr != S_OK)
|
|
logHRESULT(L"error breaking string for finding grapheme cluster boundaries", hr);
|
|
|
|
// TODO can we merge these loops somehow?
|
|
curGTP = pGTP;
|
|
for (i = 0; i < nCharsInCurItem; i++)
|
|
if (logattr[i].fCharStop != 0)
|
|
*pGTP++ = curItem->iCharPos + i;
|
|
for (i = 0; i < nCharsInCurItem; i++) {
|
|
*pPTG++ = curGTP - g->graphemesToPoints;
|
|
if (logattr[i].fCharStop != 0)
|
|
curGTP++;
|
|
}
|
|
|
|
delete[] logattr;
|
|
}
|
|
// and handle the last item for the end of the string
|
|
*pGTP++ = items[nItems].iCharPos;
|
|
*pPTG++ = pGTP - g->graphemesToPoints;
|
|
|
|
delete[] items;
|
|
return g;
|
|
}
|