2016-05-25 00:08:55 -05:00
// 25 may 2016
# include "uipriv_windows.hpp"
2018-03-11 20:17:39 -05:00
# include "attrstr.hpp"
2016-05-25 00:08:55 -05:00
2017-02-10 21:12:37 -06:00
// We could use CharNextW() to generate grapheme cluster boundaries, but it doesn't handle surrogate pairs properly (see http://archives.miloush.net/michkap/archive/2008/12/16/9223301.html).
// We could also use Uniscribe (see http://archives.miloush.net/michkap/archive/2005/01/14/352802.html, http://www.catch22.net/tuts/uniscribe-mysteries, http://www.catch22.net/tuts/keyboard-navigation, and https://maxradi.us/documents/uniscribe/), but its rules for buffer sizes is convoluted.
// Let's just deal with the CharNextW() bug.
2017-01-20 11:46:00 -06:00
2018-03-11 20:17:39 -05:00
int uiprivGraphemesTakesUTF16 ( void )
2016-12-03 17:34:06 -06:00
{
return 1 ;
}
2018-03-11 20:17:39 -05:00
struct graphemes * uiprivNewGraphemes ( void * s , size_t len )
2016-05-25 00:08:55 -05:00
{
2016-12-03 17:34:06 -06:00
struct graphemes * g ;
2017-02-10 21:12:37 -06:00
WCHAR * str ;
2016-12-03 17:34:06 -06:00
size_t * pPTG , * pGTP ;
2016-05-25 00:08:55 -05:00
2016-12-03 17:34:06 -06:00
g = uiNew ( struct graphemes ) ;
2017-02-10 21:12:37 -06:00
g - > len = 0 ;
str = ( WCHAR * ) s ;
while ( * str ! = L ' \0 ' ) {
g - > len + + ;
str = CharNextW ( str ) ;
// no need to worry about surrogates if we're just counting
}
2016-12-03 17:34:06 -06:00
g - > pointsToGraphemes = ( size_t * ) uiAlloc ( ( len + 1 ) * sizeof ( size_t ) , " size_t[] (graphemes) " ) ;
g - > graphemesToPoints = ( size_t * ) uiAlloc ( ( g - > len + 1 ) * sizeof ( size_t ) , " size_t[] (graphemes) " ) ;
2016-05-25 00:08:55 -05:00
2016-12-03 17:34:06 -06:00
pPTG = g - > pointsToGraphemes ;
pGTP = g - > graphemesToPoints ;
2017-02-10 21:12:37 -06:00
str = ( WCHAR * ) s ;
while ( * str ! = L ' \0 ' ) {
WCHAR * next , * p ;
ptrdiff_t nextoff ;
// as part of the bug, we need to make sure we only call CharNextW() on low halves, otherwise it'll return the same low half forever
nextoff = 0 ;
if ( IS_HIGH_SURROGATE ( * str ) )
nextoff = 1 ;
next = CharNextW ( str + nextoff ) ;
if ( IS_LOW_SURROGATE ( * next ) )
next - - ;
* pGTP = pPTG - g - > pointsToGraphemes ;
for ( p = str ; p < next ; p + + )
* pPTG + + = pGTP - g - > graphemesToPoints ;
pGTP + + ;
str = next ;
2016-05-25 00:08:55 -05:00
}
// and handle the last item for the end of the string
2017-02-10 21:12:37 -06:00
* pGTP = pPTG - g - > pointsToGraphemes ;
2017-02-10 19:37:05 -06:00
* pPTG = pGTP - g - > graphemesToPoints ;
2016-05-25 00:08:55 -05:00
2016-12-03 17:34:06 -06:00
return g ;
2016-05-25 00:08:55 -05:00
}