@ -1553,66 +1553,59 @@ void* ImFileLoadToMemory(const char* filename, const char* mode, size_t* out_f
//-----------------------------------------------------------------------------
// Convert UTF-8 to 32-bit character, process single character input.
// Based on stb_from_utf8() from github.com/nothings/stb/
// Based on work of Christopher Wellons (https://github.com/skeeto/branchless-utf8)
// We handle UTF-8 decoding error by skipping forward.
int ImTextCharFromUtf8 ( unsigned int * out_char , const char * in_text , const char * in_text_end )
{
unsigned int c = ( unsigned int ) - 1 ;
const unsigned char * str = ( const unsigned char * ) in_text ;
if ( ! ( * str & 0x80 ) )
{
c = ( unsigned int ) ( * str + + ) ;
* out_char = c ;
return 1 ;
}
if ( ( * str & 0xe0 ) = = 0xc0 )
{
* out_char = IM_UNICODE_CODEPOINT_INVALID ; // will be invalid but not end of string
if ( in_text_end & & in_text_end - ( const char * ) str < 2 ) return 1 ;
if ( * str < 0xc2 ) return 2 ;
c = ( unsigned int ) ( ( * str + + & 0x1f ) < < 6 ) ;
if ( ( * str & 0xc0 ) ! = 0x80 ) return 2 ;
c + = ( * str + + & 0x3f ) ;
* out_char = c ;
return 2 ;
}
if ( ( * str & 0xf0 ) = = 0xe0 )
{
* out_char = IM_UNICODE_CODEPOINT_INVALID ; // will be invalid but not end of string
if ( in_text_end & & in_text_end - ( const char * ) str < 3 ) return 1 ;
if ( * str = = 0xe0 & & ( str [ 1 ] < 0xa0 | | str [ 1 ] > 0xbf ) ) return 3 ;
if ( * str = = 0xed & & str [ 1 ] > 0x9f ) return 3 ; // str[1] < 0x80 is checked below
c = ( unsigned int ) ( ( * str + + & 0x0f ) < < 12 ) ;
if ( ( * str & 0xc0 ) ! = 0x80 ) return 3 ;
c + = ( unsigned int ) ( ( * str + + & 0x3f ) < < 6 ) ;
if ( ( * str & 0xc0 ) ! = 0x80 ) return 3 ;
c + = ( * str + + & 0x3f ) ;
* out_char = c ;
return 3 ;
}
if ( ( * str & 0xf8 ) = = 0xf0 )
{
* out_char = IM_UNICODE_CODEPOINT_INVALID ; // will be invalid but not end of string
if ( in_text_end & & in_text_end - ( const char * ) str < 4 ) return 1 ;
if ( * str > 0xf4 ) return 4 ;
if ( * str = = 0xf0 & & ( str [ 1 ] < 0x90 | | str [ 1 ] > 0xbf ) ) return 4 ;
if ( * str = = 0xf4 & & str [ 1 ] > 0x8f ) return 4 ; // str[1] < 0x80 is checked below
c = ( unsigned int ) ( ( * str + + & 0x07 ) < < 18 ) ;
if ( ( * str & 0xc0 ) ! = 0x80 ) return 4 ;
c + = ( unsigned int ) ( ( * str + + & 0x3f ) < < 12 ) ;
if ( ( * str & 0xc0 ) ! = 0x80 ) return 4 ;
c + = ( unsigned int ) ( ( * str + + & 0x3f ) < < 6 ) ;
if ( ( * str & 0xc0 ) ! = 0x80 ) return 4 ;
c + = ( * str + + & 0x3f ) ;
// utf-8 encodings of values used in surrogate pairs are invalid
if ( ( c & 0xFFFFF800 ) = = 0xD800 ) return 4 ;
// If codepoint does not fit in ImWchar, use replacement character U+FFFD instead
if ( c > IM_UNICODE_CODEPOINT_MAX ) c = IM_UNICODE_CODEPOINT_INVALID ;
* out_char = c ;
return 4 ;
}
* out_char = 0 ;
return 0 ;
static const char lengths [ ] = { 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 2 , 2 , 2 , 2 , 3 , 3 , 4 , 0 } ;
static const int masks [ ] = { 0x00 , 0x7f , 0x1f , 0x0f , 0x07 } ;
static const uint32_t mins [ ] = { 4194304 , 0 , 128 , 2048 , 65536 } ;
static const int shiftc [ ] = { 0 , 18 , 12 , 6 , 0 } ;
static const int shifte [ ] = { 0 , 6 , 4 , 2 , 0 } ;
unsigned char s [ 4 ] ;
int len = lengths [ * ( const unsigned char * ) in_text > > 3 ] ;
if ( in_text_end = = NULL )
in_text_end = in_text + len + ! len ; // Max length, nulls will be taken into account.
// Copy at most 'len' bytes, stop copying at 0 or past in_text_end.
s [ 0 ] = in_text + 0 < in_text_end ? in_text [ 0 ] : 0 ;
s [ 1 ] = s [ 0 ] & & in_text + 1 < in_text_end ? in_text [ 1 ] : 0 ;
s [ 2 ] = s [ 1 ] & & in_text + 2 < in_text_end ? in_text [ 2 ] : 0 ;
s [ 3 ] = s [ 2 ] & & in_text + 3 < in_text_end ? in_text [ 3 ] : 0 ;
// Compute the pointer to the next character early so that the next
// iteration can start working on the next character. Neither Clang
// nor GCC figure out this reordering on their own.
//const unsigned char *next = s + len + !len;
// No bytes are consumed when *in_text == 0 || in_text == in_text_end.
// One byte is consumed in case of invalid first byte of in_text.
// All available bytes (at most `len` bytes) are consumed on incomplete/invalid second to last bytes.
int consumed = ! ! s [ 0 ] + ! ! s [ 1 ] + ! ! s [ 2 ] + ! ! s [ 3 ] ;
// Assume a four-byte character and load four bytes. Unused bits are shifted out.
* out_char = ( uint32_t ) ( s [ 0 ] & masks [ len ] ) < < 18 ;
* out_char | = ( uint32_t ) ( s [ 1 ] & 0x3f ) < < 12 ;
* out_char | = ( uint32_t ) ( s [ 2 ] & 0x3f ) < < 6 ;
* out_char | = ( uint32_t ) ( s [ 3 ] & 0x3f ) < < 0 ;
* out_char > > = shiftc [ len ] ;
// Accumulate the various error conditions.
int e = 0 ;
e = ( * out_char < mins [ len ] ) < < 6 ; // non-canonical encoding
e | = ( ( * out_char > > 11 ) = = 0x1b ) < < 7 ; // surrogate half?
e | = ( * out_char > IM_UNICODE_CODEPOINT_MAX ) < < 8 ; // out of range?
e | = ( s [ 1 ] & 0xc0 ) > > 2 ;
e | = ( s [ 2 ] & 0xc0 ) > > 4 ;
e | = ( s [ 3 ] ) > > 6 ;
e ^ = 0x2a ; // top two bits of each tail byte correct?
e > > = shifte [ len ] ;
if ( e )
* out_char = IM_UNICODE_CODEPOINT_INVALID ;
return consumed ;
}
int ImTextStrFromUtf8 ( ImWchar * buf , int buf_size , const char * in_text , const char * in_text_end , const char * * in_text_remaining )