2016-05-25 00:08:55 -05:00
// 25 may 2016
# include "uipriv_windows.hpp"
// We could use CharNext() to generate grapheme cluster boundaries, but it doesn't handle surrogate pairs properly (see http://archives.miloush.net/michkap/archive/2008/12/16/9223301.html).
// So let's use Uniscribe (see http://archives.miloush.net/michkap/archive/2005/01/14/352802.html)
2016-12-03 17:34:06 -06:00
// See also http://www.catch22.net/tuts/uniscribe-mysteries, http://www.catch22.net/tuts/keyboard-navigation, and https://maxradi.us/documents/uniscribe/ for more details.
2016-05-25 00:08:55 -05:00
2016-12-03 17:34:06 -06:00
int graphemesTakesUTF16 ( void )
{
return 1 ;
}
static HRESULT itemize ( WCHAR * s , size_t len , SCRIPT_ITEM * * out , int * outn )
2016-05-25 00:08:55 -05:00
{
SCRIPT_CONTROL sc ;
SCRIPT_STATE ss ;
SCRIPT_ITEM * items ;
size_t maxItems ;
int n ;
HRESULT hr ;
// make sure these are zero-initialized to avoid mangling the text
ZeroMemory ( & sc , sizeof ( SCRIPT_CONTROL ) ) ;
ZeroMemory ( & ss , sizeof ( SCRIPT_STATE ) ) ;
maxItems = len + 2 ;
for ( ; ; ) {
2016-12-03 17:34:06 -06:00
items = new SCRIPT_ITEM [ maxItems + 1 ] ;
hr = ScriptItemize ( s , len ,
2016-05-25 00:08:55 -05:00
maxItems ,
& sc , & ss ,
items , & n ) ;
if ( hr = = S_OK )
break ;
// otherwise either an error or not enough room
delete [ ] items ;
if ( hr ! = E_OUTOFMEMORY )
return hr ;
maxItems * = 2 ; // add some more and try again
}
* out = items ;
* outn = n ;
return S_OK ;
}
2016-12-03 17:34:06 -06:00
struct graphemes * graphemes ( void * s , size_t len )
2016-05-25 00:08:55 -05:00
{
2016-12-03 17:34:06 -06:00
struct graphemes * g ;
WCHAR * str = ( WCHAR * ) s ;
2016-05-25 00:08:55 -05:00
SCRIPT_ITEM * items ;
2016-12-03 17:34:06 -06:00
int nItems ;
int curItemIndex ;
int nCharsInCurItem ;
size_t * pPTG , * pGTP ;
2016-05-25 00:08:55 -05:00
HRESULT hr ;
2016-12-03 17:34:06 -06:00
g = uiNew ( struct graphemes ) ;
2017-01-20 02:24:06 -06:00
hr = itemize ( str , len , & items , & nItems ) ;
2016-05-25 00:08:55 -05:00
if ( hr ! = S_OK )
logHRESULT ( L " error itemizing string for finding grapheme cluster boundaries " , hr ) ;
2016-12-03 17:34:06 -06:00
g - > len = nItems ;
g - > pointsToGraphemes = ( size_t * ) uiAlloc ( ( len + 1 ) * sizeof ( size_t ) , " size_t[] (graphemes) " ) ;
// note that there are actually nItems + 1 elements in items
// items[nItems] is the grapheme one past the end
g - > graphemesToPoints = ( size_t * ) uiAlloc ( ( g - > len + 1 ) * sizeof ( size_t ) , " size_t[] (graphemes) " ) ;
2016-05-25 00:08:55 -05:00
2016-12-03 17:34:06 -06:00
pPTG = g - > pointsToGraphemes ;
pGTP = g - > graphemesToPoints ;
for ( curItemIndex = 0 ; curItemIndex < nItems ; curItemIndex + + ) {
SCRIPT_ITEM * curItem , * nextItem ;
SCRIPT_LOGATTR * logattr ;
size_t * curGTP ;
2017-01-20 02:24:06 -06:00
int i ;
2016-05-25 00:08:55 -05:00
2016-12-03 17:34:06 -06:00
curItem = items + curItemIndex ;
nextItem = curItem + 1 ;
nCharsInCurItem = nextItem - > iCharPos - curItem - > iCharPos ;
logattr = new SCRIPT_LOGATTR [ nCharsInCurItem ] ;
hr = ScriptBreak ( str + curItem - > iCharPos , nCharsInCurItem ,
& ( curItem - > a ) , logattr ) ;
2016-05-25 00:08:55 -05:00
if ( hr ! = S_OK )
logHRESULT ( L " error breaking string for finding grapheme cluster boundaries " , hr ) ;
2016-12-03 17:34:06 -06:00
// TODO can we merge these loops somehow?
curGTP = pGTP ;
for ( i = 0 ; i < nCharsInCurItem ; i + + )
if ( logattr [ i ] . fCharStop ! = 0 )
* pGTP + + = curItem - > iCharPos + i ;
for ( i = 0 ; i < nCharsInCurItem ; i + + ) {
* pPTG + + = curGTP - g - > graphemesToPoints ;
if ( logattr [ i ] . fCharStop ! = 0 )
curGTP + + ;
}
2016-05-25 00:08:55 -05:00
delete [ ] logattr ;
}
// and handle the last item for the end of the string
2016-12-03 17:34:06 -06:00
* pGTP + + = items [ nItems ] . iCharPos ;
* pPTG + + = pGTP - g - > graphemesToPoints ;
2016-05-25 00:08:55 -05:00
delete [ ] items ;
2016-12-03 17:34:06 -06:00
return g ;
2016-05-25 00:08:55 -05:00
}