2016-12-03 17:34:06 -06:00
// 3 december 2016
# include "../ui.h"
# include "uipriv.h"
struct uiAttributedString {
char * s ;
size_t len ;
2016-12-22 13:33:30 -06:00
struct attrlist * attrs ;
2016-12-03 17:34:06 -06:00
// indiscriminately keep a UTF-16 copy of the string on all platforms so we can hand this off to the grapheme calculator
// this ensures no one platform has a speed advantage (sorry GTK+)
uint16_t * u16 ;
size_t u16len ;
size_t * u8tou16 ;
size_t * u16tou8 ;
// this is lazily created to keep things from getting *too* slow
struct graphemes * graphemes ;
} ;
static void resize ( uiAttributedString * s , size_t u8 , size_t u16 )
{
s - > len = u8 ;
s - > s = ( char * ) uiRealloc ( s - > s , ( s - > len + 1 ) * sizeof ( char ) , " char[] (uiAttributedString) " ) ;
s - > u8tou16 = ( size_t * ) uiRealloc ( s - > u8tou16 , ( s - > len + 1 ) * sizeof ( size_t ) , " size_t[] (uiAttributedString) " ) ;
s - > u16len = u16 ;
s - > u16 = ( uint16_t * ) uiRealloc ( s - > u16 , ( s - > u16len + 1 ) * sizeof ( uint16_t ) , " uint16_t[] (uiAttributedString) " ) ;
s - > u16tou8 = ( size_t * ) uiRealloc ( s - > u16tou8 , ( s - > u16len + 1 ) * sizeof ( size_t ) , " size_t[] (uiAttributedString) " ) ;
}
uiAttributedString * uiNewAttributedString ( const char * initialString )
{
uiAttributedString * s ;
s = uiNew ( uiAttributedString ) ;
2016-12-22 13:33:30 -06:00
s - > attrs = attrlistNew ( ) ;
2016-12-03 17:34:06 -06:00
uiAttributedStringAppendUnattributed ( s , initialString ) ;
return s ;
}
2016-12-04 15:02:56 -06:00
// TODO make sure that all implementations of graphemes() work fine with empty strings; in particular, the Windows one might not
2016-12-03 17:34:06 -06:00
static void recomputeGraphemes ( uiAttributedString * s )
{
if ( s - > graphemes ! = NULL )
return ;
if ( graphemesTakesUTF16 ( ) ) {
s - > graphemes = graphemes ( s - > u16 , s - > u16len ) ;
return ;
}
s - > graphemes = graphemes ( s - > s , s - > len ) ;
}
static void invalidateGraphemes ( uiAttributedString * s )
{
if ( s - > graphemes = = NULL )
return ;
uiFree ( s - > graphemes - > pointsToGraphemes ) ;
uiFree ( s - > graphemes - > graphemesToPoints ) ;
uiFree ( s - > graphemes ) ;
s - > graphemes = NULL ;
}
void uiFreeAttributedString ( uiAttributedString * s )
{
2016-12-22 13:33:30 -06:00
attrlistFree ( s - > attrs ) ;
2016-12-03 17:34:06 -06:00
invalidateGraphemes ( s ) ;
uiFree ( s - > u16tou8 ) ;
uiFree ( s - > u8tou16 ) ;
uiFree ( s - > u16 ) ;
uiFree ( s - > s ) ;
uiFree ( s ) ;
}
const char * uiAttributedStringString ( uiAttributedString * s )
{
return s - > s ;
}
2016-12-04 15:02:56 -06:00
size_t uiAttributedStringLen ( uiAttributedString * s )
{
return s - > len ;
}
static void u8u16len ( const char * str , size_t * n8 , size_t * n16 )
{
uint32_t rune ;
char buf [ 4 ] ;
uint16_t buf16 [ 2 ] ;
* n8 = 0 ;
* n16 = 0 ;
while ( * str ) {
str = utf8DecodeRune ( str , 0 , & rune ) ;
2017-02-16 13:25:04 -06:00
// TODO document the use of the function vs a pointer subtract here
2016-12-04 15:02:56 -06:00
* n8 + = utf8EncodeRune ( rune , buf ) ;
* n16 + = utf16EncodeRune ( rune , buf16 ) ;
}
}
2016-12-03 17:34:06 -06:00
void uiAttributedStringAppendUnattributed ( uiAttributedString * s , const char * str )
{
2016-12-04 15:02:56 -06:00
uiAttributedStringInsertAtUnattributed ( s , str , s - > len ) ;
}
// this works (and returns true, which is what we want) at s->len too because s->s[s->len] is always going to be 0 due to us allocating s->len + 1 bytes and because uiRealloc() always zero-fills allocated memory
static int onCodepointBoundary ( uiAttributedString * s , size_t at )
{
uint8_t c ;
// for uiNewAttributedString()
if ( s - > s = = NULL & & at = = 0 )
return 1 ;
c = ( uint8_t ) ( s - > s [ at ] ) ;
return c < 0x80 | | c > = 0xC0 ;
}
// TODO note that at must be on a codeoint boundary
void uiAttributedStringInsertAtUnattributed ( uiAttributedString * s , const char * str , size_t at )
{
2016-12-03 17:34:06 -06:00
uint32_t rune ;
char buf [ 4 ] ;
2017-01-17 12:05:40 -06:00
uint16_t buf16 [ 2 ] ;
2017-02-10 10:07:44 -06:00
size_t n8 , n16 ; // TODO make loop-local? to avoid using them in the wrong place again
2016-12-04 15:02:56 -06:00
size_t old , old16 ;
2017-02-16 13:25:04 -06:00
size_t oldn8 , oldn16 ;
2016-12-04 15:02:56 -06:00
size_t oldlen , old16len ;
size_t at16 ;
size_t i ;
if ( ! onCodepointBoundary ( s , at ) ) {
// TODO
}
2017-01-17 12:54:23 -06:00
at16 = 0 ;
if ( s - > u8tou16 ! = NULL )
at16 = s - > u8tou16 [ at ] ;
2016-12-04 15:02:56 -06:00
// do this first to reclaim memory
invalidateGraphemes ( s ) ;
2016-12-03 17:34:06 -06:00
// first figure out how much we need to grow by
// this includes post-validated UTF-8
2016-12-05 17:32:51 -06:00
u8u16len ( str , & n8 , & n16 ) ;
2016-12-03 17:34:06 -06:00
// and resize
2016-12-04 15:02:56 -06:00
old = at ;
old16 = at16 ;
oldlen = s - > len ;
old16len = s - > u16len ;
2016-12-05 17:32:51 -06:00
resize ( s , s - > len + n8 , s - > u16len + n16 ) ;
2016-12-03 17:34:06 -06:00
2016-12-04 15:02:56 -06:00
// move existing characters out of the way
// note the use of memmove(): https://twitter.com/rob_pike/status/737797688217894912
memmove (
s - > s + at + n8 ,
s - > s + at ,
( oldlen - at ) * sizeof ( char ) ) ;
memmove (
s - > u16 + at16 + n16 ,
s - > u16 + at16 ,
( old16len - at16 ) * sizeof ( uint16_t ) ) ;
// note the + 1 for these; we want to copy the terminating null too
memmove (
s - > u8tou16 + at + n8 ,
s - > u8tou16 + at ,
( oldlen - at + 1 ) * sizeof ( size_t ) ) ;
memmove (
s - > u16tou8 + at16 + n16 ,
s - > u16tou8 + at16 ,
( old16len - at16 + 1 ) * sizeof ( size_t ) ) ;
2017-02-16 13:25:04 -06:00
oldn8 = n8 ;
oldn16 = n16 ;
2016-12-04 15:02:56 -06:00
2016-12-03 17:34:06 -06:00
// and copy
while ( * str ) {
2016-12-05 17:32:51 -06:00
size_t n ;
2016-12-03 17:34:06 -06:00
str = utf8DecodeRune ( str , 0 , & rune ) ;
n = utf8EncodeRune ( rune , buf ) ;
n16 = utf16EncodeRune ( rune , buf16 ) ;
s - > s [ old ] = buf [ 0 ] ;
s - > u8tou16 [ old ] = old16 ;
if ( n > 1 ) {
s - > s [ old + 1 ] = buf [ 1 ] ;
s - > u8tou16 [ old + 1 ] = old16 ;
}
if ( n > 2 ) {
s - > s [ old + 2 ] = buf [ 2 ] ;
s - > u8tou16 [ old + 2 ] = old16 ;
}
if ( n > 3 ) {
s - > s [ old + 3 ] = buf [ 3 ] ;
s - > u8tou16 [ old + 3 ] = old16 ;
}
s - > u16 [ old16 ] = buf16 [ 0 ] ;
s - > u16tou8 [ old16 ] = old ;
if ( n16 > 1 ) {
s - > u16 [ old16 + 1 ] = buf16 [ 1 ] ;
s - > u16tou8 [ old16 + 1 ] = old ;
}
old + = n ;
old16 + = n16 ;
}
// and have an index for the end of the string
2016-12-04 15:02:56 -06:00
// TODO is this done by the below?
2017-02-16 13:25:04 -06:00
//TODO s->u8tou16[old] = old16;
//TODO s->u16tou8[old16] = old;
2016-12-03 17:34:06 -06:00
2016-12-05 17:32:51 -06:00
// and adjust the prior values in the conversion tables
2016-12-04 15:02:56 -06:00
// use <= so the terminating 0 gets updated too
for ( i = 0 ; i < = oldlen - at ; i + + )
2017-02-16 13:25:04 -06:00
s - > u8tou16 [ at + oldn8 + i ] + = s - > u16len - old16len ;
2016-12-04 15:02:56 -06:00
for ( i = 0 ; i < = old16len - at16 ; i + + )
2017-02-16 13:25:04 -06:00
s - > u16tou8 [ at16 + oldn16 + i ] + = s - > len - oldlen ;
2016-12-05 17:32:51 -06:00
// and finally do the attributes
2016-12-22 13:33:30 -06:00
attrlistInsertCharactersUnattributed ( s - > attrs , at , n8 ) ;
2016-12-04 15:02:56 -06:00
}
// TODO document that end is the first index that will be maintained
void uiAttributedStringDelete ( uiAttributedString * s , size_t start , size_t end )
{
size_t start16 , end16 ;
size_t count , count16 ;
size_t i ;
if ( ! onCodepointBoundary ( s , start ) ) {
// TODO
}
if ( ! onCodepointBoundary ( s , end ) ) {
// TODO
}
count = end - start ;
start16 = s - > u8tou16 [ start ] ;
end16 = s - > u8tou16 [ end ] ;
count16 = end16 - start16 ;
2016-12-03 17:34:06 -06:00
invalidateGraphemes ( s ) ;
2016-12-04 15:02:56 -06:00
// overwrite old characters
memmove (
s - > s + start ,
s - > s + end ,
2017-01-17 12:05:40 -06:00
( s - > len - end ) * sizeof ( char ) ) ;
2016-12-04 15:02:56 -06:00
memmove (
s - > u16 + start16 ,
s - > u16 + end16 ,
2017-01-17 12:05:40 -06:00
( s - > u16len - end16 ) * sizeof ( uint16_t ) ) ;
2016-12-04 15:02:56 -06:00
// note the + 1 for these; we want to copy the terminating null too
memmove (
s - > u8tou16 + start ,
s - > u8tou16 + end ,
2017-01-17 12:05:40 -06:00
( s - > len - end + 1 ) * sizeof ( size_t ) ) ;
2016-12-04 15:02:56 -06:00
memmove (
s - > u16tou8 + start16 ,
s - > u16tou8 + end16 ,
2017-01-17 12:05:40 -06:00
( s - > u16len - end16 + 1 ) * sizeof ( size_t ) ) ;
2016-12-04 15:02:56 -06:00
// update the conversion tables
// note the use of <= to include the null terminator
for ( i = 0 ; i < = count ; i + + )
s - > u8tou16 [ start + i ] - = count16 ;
for ( i = 0 ; i < = count16 ; i + + )
s - > u16tou8 [ start16 + i ] - = count ;
// null-terminate the string
s - > s [ start + count ] = 0 ;
s - > u16 [ start16 + count16 ] = 0 ;
2016-12-06 09:35:08 -06:00
// fix up attributes
2016-12-22 13:33:30 -06:00
attrlistRemoveCharacters ( s - > attrs , start , end ) ;
2016-12-06 09:35:08 -06:00
2016-12-04 15:02:56 -06:00
// and finally resize
resize ( s , start + count , start16 + count16 ) ;
2016-12-03 17:34:06 -06:00
}
// TODO figure out if we should count the grapheme past the end
size_t uiAttributedStringNumGraphemes ( uiAttributedString * s )
{
recomputeGraphemes ( s ) ;
return s - > graphemes - > len ;
}
size_t uiAttributedStringByteIndexToGrapheme ( uiAttributedString * s , size_t pos )
{
recomputeGraphemes ( s ) ;
if ( graphemesTakesUTF16 ( ) )
pos = s - > u8tou16 [ pos ] ;
return s - > graphemes - > pointsToGraphemes [ pos ] ;
}
size_t uiAttributedStringGraphemeToByteIndex ( uiAttributedString * s , size_t pos )
{
recomputeGraphemes ( s ) ;
pos = s - > graphemes - > graphemesToPoints [ pos ] ;
if ( graphemesTakesUTF16 ( ) )
pos = s - > u16tou8 [ pos ] ;
return pos ;
}
2016-12-23 13:01:09 -06:00
2017-02-12 13:11:25 -06:00
void uiAttributedStringSetAttribute ( uiAttributedString * s , uiAttributeSpec * spec , size_t start , size_t end )
2017-02-11 22:19:30 -06:00
{
2017-02-12 13:11:25 -06:00
attrlistInsertAttribute ( s - > attrs , spec , start , end ) ;
2017-02-11 22:19:30 -06:00
}
2016-12-23 13:01:09 -06:00
// TODO introduce an iterator?
void uiAttributedStringForEachAttribute ( uiAttributedString * s , uiAttributedStringForEachAttributeFunc f , void * data )
{
attrlistForEach ( s - > attrs , s , f , data ) ;
}
2017-01-02 22:53:31 -06:00
// helpers for platform-specific code
const uint16_t * attrstrUTF16 ( uiAttributedString * s )
{
return s - > u16 ;
}
2017-01-17 12:05:40 -06:00
size_t attrstrUTF16Len ( uiAttributedString * s )
2017-01-02 22:53:31 -06:00
{
return s - > u16len ;
}
2017-01-07 19:09:44 -06:00
2017-02-05 19:42:52 -06:00
// TODO is this still needed given the below?
2017-01-07 19:09:44 -06:00
size_t attrstrUTF8ToUTF16 ( uiAttributedString * s , size_t n )
{
return s - > u8tou16 [ n ] ;
}
2017-02-05 19:42:52 -06:00
size_t * attrstrCopyUTF8ToUTF16 ( uiAttributedString * s , size_t * n )
{
size_t * out ;
size_t nbytes ;
nbytes = ( s - > len + 1 ) * sizeof ( size_t ) ;
* n = s - > len ;
out = ( size_t * ) uiAlloc ( nbytes , " size_t[] (uiAttributedString) " ) ;
memmove ( out , s - > u8tou16 , nbytes ) ;
return out ;
}
2017-01-07 19:09:44 -06:00
size_t * attrstrCopyUTF16ToUTF8 ( uiAttributedString * s , size_t * n )
{
size_t * out ;
size_t nbytes ;
nbytes = ( s - > u16len + 1 ) * sizeof ( size_t ) ;
* n = s - > u16len ;
out = ( size_t * ) uiAlloc ( nbytes , " size_t[] (uiAttributedString) " ) ;
memmove ( out , s - > u16tou8 , nbytes ) ;
return out ;
}