This file defines a useful set of functions for decode string unicode from ASCII, UTF8, UTF16
Definition in file unicode.h.
Go to the source code of this file.
Defines | |
#define | UNI_MAX_HEADER_SIZE 3 |
#define | UNI_MAX_UTF8_SIZE 3 |
Define a ID for each text format | |
PS: This defines numbers are extract of ID3 norms | |
#define | UNI_TYPE_ASCII 0x00 |
#define | UNI_TYPE_UNKNOWN 0xFF |
#define | UNI_TYPE_UTF16BE 0x02 |
#define | UNI_TYPE_UTF16LE 0x01 |
#define | UNI_TYPE_UTF8 0x03 |
Functions | |
U8 | unicode_header_get (U8 *header, U8 txt_format) |
Return the header corresponding at text format identifier. | |
U8 | unicode_header_scan (U8 *header) |
Detect the text format via a header (3 bytes). | |
U8 | utf8_to_unicode (U8 *utf8, U16 *unicode) |
Conversion UTF-8 to unicode. |
#define UNI_MAX_HEADER_SIZE 3 |
#define UNI_MAX_UTF8_SIZE 3 |
#define UNI_TYPE_ASCII 0x00 |
#define UNI_TYPE_UTF16BE 0x02 |
Definition at line 57 of file unicode.h.
Referenced by pl_main_new(), reader_txt_get_line(), unicode_header_get(), and unicode_header_scan().
#define UNI_TYPE_UTF16LE 0x01 |
Definition at line 56 of file unicode.h.
Referenced by reader_txt_get_line(), unicode_header_get(), and unicode_header_scan().
#define UNI_TYPE_UTF8 0x03 |
Definition at line 58 of file unicode.h.
Referenced by reader_txt_get_line(), unicode_header_get(), and unicode_header_scan().
U8 unicode_header_get | ( | U8 * | header, | |
U8 | txt_format | |||
) |
Return the header corresponding at text format identifier.
txt_format | UNI_TYPE_UTF8, UNI_TYPE_UTF16BE, UNI_TYPE_UTF16LE, UNI_TYPE_ASCII | |
header | header to fill (array of 3B) |
Definition at line 88 of file unicode.c.
References UNI_TYPE_UTF16BE, UNI_TYPE_UTF16LE, and UNI_TYPE_UTF8.
Referenced by reader_txt_beg(), and reader_txt_new().
00089 { 00090 switch( txt_format ) 00091 { 00092 case UNI_TYPE_UTF8: 00093 header[0] = 0xEF; 00094 header[1] = 0xBB; 00095 header[2] = 0xBF; 00096 return 3; 00097 00098 case UNI_TYPE_UTF16BE: 00099 header[0] = 0xFE; 00100 header[1] = 0xFF; 00101 return 2; 00102 00103 case UNI_TYPE_UTF16LE: 00104 header[0] = 0xFF; 00105 header[1] = 0xFE; 00106 return 2; 00107 } 00108 return 0; 00109 }
U8 unicode_header_scan | ( | U8 * | header | ) |
Detect the text format via a header (3 bytes).
header | header to analyse (array of 3B) |
Definition at line 59 of file unicode.c.
References UNI_TYPE_ASCII, UNI_TYPE_UTF16BE, UNI_TYPE_UTF16LE, and UNI_TYPE_UTF8.
Referenced by reader_txt_beg().
00060 { 00061 if( (header[0] == 0xEF) 00062 && (header[1] == 0xBB) 00063 && (header[2] == 0xBF) ) 00064 { 00065 return UNI_TYPE_UTF8; 00066 } 00067 if( (header[0] == 0xFE) 00068 && (header[1] == 0xFF) ) 00069 { 00070 return UNI_TYPE_UTF16BE; 00071 } 00072 if( (header[0] == 0xFF) 00073 && (header[1] == 0xFE) ) 00074 { 00075 return UNI_TYPE_UTF16LE; 00076 } 00077 return UNI_TYPE_ASCII; 00078 }
U8 utf8_to_unicode | ( | U8 * | utf8, | |
U16 * | unicode | |||
) |
Conversion UTF-8 to unicode.
utf8 | array with utf8 codes (array of 3B max) | |
unicode | pointer used to store the character unicode decoded |
Definition at line 118 of file unicode.c.
Referenced by reader_txt_get_line().
00119 { 00120 U8 c0,c1,c2; 00121 00122 // Take 3 bytes 00123 c0 = utf8[0]; 00124 c1 = utf8[1]; 00125 c2 = utf8[2]; 00126 00127 if( 0x00 == (c0 & 0x80) ) 00128 { 00129 *unicode = c0; 00130 return 1; 00131 } 00132 if( 0xC0 == (c0 & 0xE0) ) 00133 { 00134 *unicode = ((U16)(c0 & 0x1F)<<6) | ((U16)(c1 & 0x3F)); 00135 return 2; 00136 } 00137 if( 0xE0 == (c0 & 0xF0) ) 00138 { 00139 *unicode = ((U16)(c0 & 0x0F)<<(6+6)) | ((U16)(c1 & 0x3F)<<(6)) | ((U16)(c2 & 0x3F)); 00140 return 3; 00141 } 00142 // Error ! 00143 *unicode = c0; 00144 return 1; 00145 }