unicode.c 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353
  1. #include "includes.h"
  2. #include "unicode.h"
  3. #include <stdint.h>
  4. #include <string.h>
  5. #include <stdlib.h>
  6. #include <stdio.h>
  7. #define GB2312_TO_UNICODE_BASE 0x134000 //起始地址?
  8. #define UNICODE_TO_GB2312_BASE 0x138000
  9. extern const uint16_t GB2312_UNICODE_MAP[];
  10. //extern unsigned short giGB2312[21243][2];
  11. //const unsigned short giGBCount=21243;
  12. typedef struct {
  13. uint16_t unicode;
  14. uint16_t gb2312;
  15. } unicode_gb2312_map_t;
  16. extern const unicode_gb2312_map_t UNICODE_GB2312_MAP[];
  17. /*
  18. MIN_CODE = $A1A1;
  19. MAX_CODE = $F7FE;
  20. */
  21. #define GB2312_CODE_ZONE_MIN 0xA1
  22. #define GB2312_CODE_ZONE_MAX 0xF7
  23. #define GB2312_CODE_POS_MIN 0xA1
  24. #define GB2312_CODE_POS_MAX 0xFE
  25. uint16_t
  26. GB2312_to_Unicode(uint16_t c)
  27. {
  28. uint8_t zone, pos;
  29. uint32_t offset;
  30. uint16_t retval;
  31. // printf("%2x/r",c);
  32. zone = c >> 8;
  33. pos = c & 0xFF;
  34. // printf("%2x-/r",zone);
  35. // printf("%2x--/r",pos);
  36. if ((zone > GB2312_CODE_ZONE_MAX) ||
  37. (zone < GB2312_CODE_ZONE_MIN) ||
  38. (pos > GB2312_CODE_POS_MAX) ||
  39. (pos < GB2312_CODE_POS_MIN))
  40. {
  41. return c;
  42. }
  43. offset = (zone - GB2312_CODE_ZONE_MIN) * (GB2312_CODE_POS_MAX - GB2312_CODE_POS_MIN + 1) +
  44. (pos - GB2312_CODE_POS_MIN);
  45. //spiFlashBlockRead(GB2312_TO_UNICODE_BASE + offset * 2, (uint8_t *)&retval, 2);
  46. retval=GB2312_UNICODE_MAP[offset];//offset*2
  47. return retval;
  48. }
  49. uint16_t
  50. Unicode_to_GB2312(uint16_t c)
  51. {
  52. int offset;
  53. int low, high, mid;
  54. const unicode_gb2312_map_t *map;
  55. if (((c >> 8) == 0) && ((c & 0xFF) < 0x7F))
  56. {
  57. return c;
  58. }
  59. low = 0;
  60. high = (GB2312_CODE_ZONE_MAX - GB2312_CODE_ZONE_MIN + 1) *
  61. (GB2312_CODE_POS_MAX - GB2312_CODE_POS_MIN + 1);
  62. while (low <= high)
  63. {
  64. mid = (low + high) / 2;
  65. // spiFlashBlockRead(UNICODE_TO_GB2312_BASE + mid * sizeof(unicode_gb2312_map_t),
  66. // (uint8_t *)&map, sizeof(unicode_gb2312_map_t));
  67. map = & UNICODE_GB2312_MAP[mid];
  68. if (c > map->unicode)
  69. {
  70. low = mid + 1;
  71. }
  72. if (c < map->unicode)
  73. {
  74. high = mid - 1;
  75. }
  76. if (c == map->unicode)
  77. {
  78. return map->gb2312;
  79. }
  80. }
  81. return '?'; // 无法识别,替换为 ?
  82. }
  83. uint16_t
  84. Ansi_to_Unicode(uint16_t *dest, uint16_t size, const uint8_t *src, uint16_t length) //GBK_Unicode
  85. {
  86. uint16_t count = 0;
  87. while((count < size) && length)
  88. {
  89. if ((*src > 0x7F) && (length > 1))
  90. {
  91. dest[count] = GB2312_to_Unicode(((uint16_t)src[0] << 8) | src[1]);
  92. src += 2;
  93. length -= 2;
  94. }
  95. else
  96. {
  97. dest[count] = *src;
  98. src++;
  99. length--;
  100. }
  101. ++count;
  102. }
  103. return count;
  104. }
  105. uint16_t
  106. Unicode_to_Ansi(uint8_t *dest, uint16_t size, const uint8_t *src, uint16_t length) //
  107. {
  108. uint16_t count = 0;
  109. uint16_t t;
  110. while((count + 1 < size) && length)
  111. {
  112. t = Unicode_to_GB2312(((uint16_t)src[1] << 8) | src[0]);
  113. if ((t >> 8) == 0)
  114. {
  115. *dest++ = t;
  116. ++count;
  117. }
  118. else
  119. {
  120. *dest++ = t >> 8;
  121. *dest++ = t;
  122. count += 2;
  123. }
  124. src += 2;
  125. length -= 2;
  126. }
  127. *dest++ = 0;
  128. return count;
  129. }
  130. /**************************************************************************************
  131. 将字符形式的Unicode转为Ansi
  132. 比如:字符串“d89ea48ba47fc47e0000”将转为 "默认群组"
  133. ***************************************************************************************/
  134. uint16_t StrUnicodeToAnsi(uint8_t *dest,uint16_t size,const char *src)
  135. {
  136. uint16_t count = 0;
  137. uint16_t t;
  138. int length;
  139. uint16_t v;
  140. char temp[5];
  141. length=strlen(src);
  142. if(length<4) return 0;
  143. memset(dest,0,size);
  144. while((count + 1 < size) && length>0)
  145. {
  146. temp[0]=src[2];temp[1]=src[3]; //{0x00A4, 0xA1E8},00A4 其实要变成A400 倒过来
  147. temp[2]=src[0];temp[3]=src[1];
  148. temp[4]=0;
  149. v=strtol(temp,NULL,16);
  150. t = Unicode_to_GB2312(v);
  151. if ((t >> 8) == 0)
  152. {
  153. *dest++ = t;
  154. ++count;
  155. }
  156. else
  157. {
  158. *dest++ = t >> 8;
  159. *dest++ = t;
  160. count += 2;
  161. }
  162. src += 4;
  163. length -= 4;
  164. }
  165. *dest++ = 0;
  166. return count;
  167. }
  168. /**************************************************************************************
  169. 将字符串转字符串形式的StrUnicode
  170. 比如:字符串"默认群组"(8字节) 转为 “d89ea48ba47fc47e”(16字节)默认群组的Unicode就是
  171. d89ea48ba47fc47e {0x6DF1, 0xC9EE},
  172. ***************************************************************************************/
  173. //uint16_t
  174. //GB_to_UN(uint16_t c)
  175. //{
  176. // uint8_t zone, pos;
  177. // int offset;
  178. // int low, high, mid;
  179. // const unicode_gb2312_map_t *map;
  180. // if (((c >> 8) == 0) && ((c & 0xFF) < 0x7F))
  181. // {
  182. // return c;
  183. // }
  184. // printf("%2x--/r",c);
  185. // low = 0;
  186. // high = (GB2312_CODE_ZONE_MAX - GB2312_CODE_ZONE_MIN + 1) * //8178
  187. // (GB2312_CODE_POS_MAX - GB2312_CODE_POS_MIN + 1);
  188. // while (low <= high)
  189. // {
  190. // mid = (low + high) / 2;
  191. // map = &UNICODE_GB2312_MAP[mid];
  192. //
  193. // if (c > map->gb2312)
  194. // {
  195. // low = mid + 1;
  196. // }
  197. // if (c < map->gb2312)
  198. // {
  199. // high = mid - 1;
  200. // }
  201. // if (c == map->gb2312)
  202. // {
  203. // return map->unicode;
  204. // }
  205. // }
  206. // return '?'; // 无法识别,替换为 ?
  207. //}
  208. ///**************************************************************************/
  209. //uint16_t AnsiToStrUnicode(uint16_t *dest,uint16_t size,const char *src)
  210. //{
  211. // uint16_t count = 0;
  212. // uint16_t t;
  213. // int length;
  214. // uint16_t v;
  215. // char temp[5];
  216. //
  217. //
  218. // length=strlen(src);
  219. // if(length<2) return 0; //因为后面只留2个0 c9eedbdab0eccac2b4a6 00
  220. // memset(dest,0,size);
  221. // printf("%2x/n-->",*src);
  222. //
  223. // while((count < size) && length>0)
  224. // {
  225. // temp[0]=src[0];temp[1]=src[1]; //{0x6DF1, 0xC9EE},
  226. // temp[2]=src[2];temp[3]=src[3];
  227. // temp[4]=0;
  228. // v=strtol(temp,NULL,16);
  229. // printf("%2x/n->",v);
  230. // if ((v > 0x7F)&& (length > 1)) //(*src > 0x7F) &&
  231. // {
  232. // dest[count] = GB_to_UN(v);
  233. // printf("%2x/n/r",dest[count]);
  234. // src += 4;
  235. // length -= 4;
  236. // }else{
  237. // dest[count] = *src;
  238. // src++;
  239. // length--;
  240. //
  241. // }
  242. // ++count;
  243. // }
  244. //
  245. // return count;
  246. //}
  247. /********************************************************************************/
  248. uint16_t AnsiToStrUnicode(uint16_t *dest,uint16_t size,const char *src)
  249. {
  250. #if 1
  251. uint16_t count = 0;
  252. uint16_t t,v;
  253. int length;
  254. char temp[5];
  255. unsigned char t1,t2;
  256. length=strlen(src);
  257. if(length<2) return 0; //因为后面只留2个0 c9eedbdab0eccac2b4a6 00
  258. memset(dest,0,size); //初始化目标
  259. while((count < size) && length)
  260. {
  261. if(src[0] == 0x30 && src[1] == 0x30) return count;
  262. temp[0]=src[0];temp[1]=src[1];
  263. temp[2]=src[2];temp[3]=src[3];
  264. temp[4]=0;
  265. v=strtol(temp,NULL,16);
  266. t1=v&0xff;
  267. t2=(v>>8)&0xff;
  268. if((t1 > 0x7F) && (t2 > 0x7f) && (length > 1))
  269. {
  270. dest[count] = GB2312_to_Unicode(v);
  271. src += 4;
  272. length -= 4;
  273. }
  274. else
  275. {
  276. t=t2;
  277. t &= 0xff;
  278. dest[count] = t;
  279. src +=2;
  280. length -=2;
  281. }
  282. ++count;
  283. }
  284. return 0;
  285. #else
  286. uint16_t count = 0;
  287. uint16_t t,v;
  288. int length;
  289. char temp[5];
  290. length=strlen(src);
  291. if(length<2) return 0; //因为后面只留2个0 c9eedbdab0eccac2b4a6 00
  292. memset(dest,0,size); //初始化目标
  293. g_GroupNameLen=0;
  294. // printf("%2x/n-->",*src);
  295. while((count < size) && length)
  296. {
  297. temp[0]=src[0];temp[1]=src[1];
  298. temp[2]=src[2];temp[3]=src[3];
  299. temp[4]=0;
  300. v=strtol(temp,NULL,16);
  301. // printf("%2x/n->",v);
  302. if(v!=0){g_GroupNameLen++;}else{return 0;}
  303. if ((v > 0x7F)&&((v>>8) > 0x7F) && (length > 1)) // if ((v > 0x7F)&& (length > 1))
  304. {
  305. dest[count] = GB2312_to_Unicode(v);
  306. // printf("%2x/n/r", dest[count]);
  307. src += 4;
  308. length -= 4;
  309. }
  310. else
  311. {
  312. dest[count] = *src;
  313. src++;
  314. length--;
  315. }
  316. ++count;
  317. }
  318. return 0;
  319. #endif
  320. }