Develop and Download Open Source Software

Browse Subversion Repository

Annotation of /branches/ttcomtester/teraterm/teraterm/unicode.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 10521 - (hide annotations) (download) (as text)
Fri Jan 20 16:03:38 2023 UTC (14 months, 2 weeks ago) by zmatsuo
File MIME type: text/x-c++src
File size: 13242 byte(s)
add communication test tool
1 doda 8445 /*
2 nmaya 9048 * Copyright (C) 2019- TeraTerm Project
3 doda 8445 * All rights reserved.
4     *
5     * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions
7     * are met:
8     *
9     * 1. Redistributions of source code must retain the above copyright
10     * notice, this list of conditions and the following disclaimer.
11     * 2. Redistributions in binary form must reproduce the above copyright
12     * notice, this list of conditions and the following disclaimer in the
13     * documentation and/or other materials provided with the distribution.
14     * 3. The name of the author may not be used to endorse or promote products
15     * derived from this software without specific prior written permission.
16     *
17     * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
18     * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19     * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20     * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22     * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23     * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24     * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25     * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26     * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27     */
28    
29     #include <stdlib.h>
30     #include <stdio.h>
31 zmatsuo 9597 #include <assert.h>
32 doda 8445
33     #include "unicode.h"
34    
35     /**
36     * East_Asian_Width �Q�l���� ����
37     *
38     * @retval 'F' Fullwidth �S�p
39     * @retval 'H' Halfwidth ���p
40     * @retval 'W' Wide �L
41     * @retval 'n' Na,Narrow ��
42     * @retval 'A' Ambiguous �B��
43     * �������������������������������B
44     * ���A�W�A���g�����������O���g�����������o�����A
45     * ���A�W�A���]�������R�[�h�������������S�p�������������������������B
46     * �M���V�A�������L�������������B
47     * @retval 'N' Neutral ����
48     * ���A�W�A���g�����������o�������A�S�p�������p���������B�A���r�A���������B
49     */
50     char UnicodeGetWidthProperty(unsigned long u32)
51     {
52     typedef struct {
53     unsigned long code_from;
54     unsigned long code_to;
55     char property;
56     } east_asian_width_map_t;
57     // �e�[�u���������������������� H
58     const static east_asian_width_map_t east_asian_width_map[] = {
59     #include "unicode_asian_width.tbl"
60     };
61     const east_asian_width_map_t *table = east_asian_width_map;
62     const size_t table_size = _countof(east_asian_width_map);
63     char result;
64    
65     // �e�[�u���O�`�F�b�N
66     if (u32 < east_asian_width_map[0].code_from) {
67     return 'H';
68     }
69     if (east_asian_width_map[table_size-1].code_to < u32) {
70     return 'H';
71     }
72    
73     // �e�[�u������
74     result = 'H';
75     size_t low = 0;
76     size_t high = table_size - 1;
77     while (low < high) {
78     size_t mid = (low + high) / 2;
79     if (table[mid].code_from <= u32 && u32 <= table[mid].code_to) {
80     result = table[mid].property;
81     break;
82     } else if (table[mid].code_to < u32) {
83     low = mid + 1;
84     } else {
85     high = mid;
86     }
87     }
88    
89     return result;
90     }
91    
92     typedef struct {
93     unsigned long code_from;
94     unsigned long code_to;
95     } UnicodeTable_t;
96    
97 zmatsuo 10310 typedef struct {
98     unsigned long code_from;
99     unsigned long code_to;
100     unsigned char category;
101     } UnicodeTableCombine_t;
102    
103 nmaya 10325 typedef struct {
104     unsigned long code_from;
105     unsigned long code_to;
106 zmatsuo 10330 const char *block_name;
107 nmaya 10325 } UnicodeTableBlock_t;
108    
109 zmatsuo 10330 static const UnicodeTableBlock_t UnicodeBlockList[] = {
110 nmaya 10325 #include "unicode_block.tbl"
111     };
112    
113 doda 8445 /**
114     * u32���e�[�u�����f�[�^����������������������
115 zmatsuo 10310 *
116     * @retval �e�[�u����index
117     * @retval -1 �e�[�u��������������
118 doda 8445 */
119 zmatsuo 10310 static int SearchTableSimple(
120 doda 8445 const UnicodeTable_t *table, size_t table_size,
121     unsigned long u32)
122     {
123     if (u32 < table[0].code_from) {
124 zmatsuo 10310 return -1;
125 doda 8445 }
126     if (u32 > table[table_size-1].code_to) {
127 zmatsuo 10310 return -1;
128 doda 8445 }
129     size_t low = 0;
130     size_t high = table_size - 1;
131     while (low <= high) {
132     size_t mid = (low + high) / 2;
133     if (table[mid].code_from <= u32 && u32 <= table[mid].code_to) {
134 zmatsuo 10310 return (int)mid;
135 doda 8445 } else if (table[mid].code_to < u32) {
136     low = mid + 1;
137     } else {
138     high = mid - 1;
139     }
140     }
141     // �e�[�u���������O
142 zmatsuo 10310 return -1;
143 doda 8445 }
144    
145 zmatsuo 10310 /**
146     * SearchTableSimple() ������
147     * �e�[�u�����^��������
148     *
149     * @retval �e�[�u����index
150     * @retval -1 �e�[�u��������������
151     */
152     static int SearchTableCombine(
153     const UnicodeTableCombine_t *table, size_t table_size,
154     unsigned long u32)
155     {
156     if (u32 < table[0].code_from) {
157     return -1;
158     }
159     if (u32 > table[table_size-1].code_to) {
160     return -1;
161     }
162     size_t low = 0;
163     size_t high = table_size - 1;
164     while (low <= high) {
165     size_t mid = (low + high) / 2;
166     if (table[mid].code_from <= u32 && u32 <= table[mid].code_to) {
167     return (int)mid;
168     } else if (table[mid].code_to < u32) {
169     low = mid + 1;
170     } else {
171     high = mid - 1;
172     }
173     }
174     // �e�[�u���������O
175     return -1;
176     }
177    
178 nmaya 10325 /**
179     * SearchTableSimple() ������
180     * �e�[�u�����^��������
181     *
182     * @retval �e�[�u����index
183     * @retval -1 �e�[�u��������������
184     */
185     static int SearchTableBlock(
186     const UnicodeTableBlock_t *table, size_t table_size,
187     unsigned long u32)
188     {
189     if (u32 < table[0].code_from) {
190     return -1;
191     }
192     if (u32 > table[table_size-1].code_to) {
193     return -1;
194     }
195     size_t low = 0;
196     size_t high = table_size - 1;
197     while (low <= high) {
198     size_t mid = (low + high) / 2;
199     if (table[mid].code_from <= u32 && u32 <= table[mid].code_to) {
200     return (int)mid;
201     } else if (table[mid].code_to < u32) {
202     low = mid + 1;
203     } else {
204     high = mid - 1;
205     }
206     }
207     // �e�[�u���������O
208     return -1;
209     }
210    
211 doda 8445 /*
212     * ������������������
213 zmatsuo 10310 * ����������������������������
214     * EMOJI MODIFIER
215     * = Nonspacing Mark
216     * VARIATION SELECTOR (�������Z���N�^)
217     * = Nonspacing Mark
218 doda 8445 *
219     * @retval 0 ����������������
220 zmatsuo 10310 * @retval 1 ��������,Nonspacing Mark, �J�[�\��������������
221     * @retval 2 ��������,Spacing Mark, �J�[�\���� +1 ��������
222 doda 8445 */
223     int UnicodeIsCombiningCharacter(unsigned long u32)
224     {
225 zmatsuo 10310 #define Mn 1 // Nonspacing_Mark a nonspacing combining mark (zero advance width)
226     #define Mc 2 // Spacing_Mark a spacing combining mark (positive advance width)
227     #define Me 1 // Enclosing_Mark an enclosing combining mark
228     #define Sk 1 // Modifier_Symbol a non-letterlike modifier symbol
229     const static UnicodeTableCombine_t CombiningCharacterList[] = {
230 doda 8445 #include "unicode_combine.tbl"
231     };
232 zmatsuo 10310 const int index = SearchTableCombine(CombiningCharacterList, _countof(CombiningCharacterList), u32);
233     if (index == -1) {
234     return 0;
235     }
236     return (int)CombiningCharacterList[index].category;
237 doda 8445 }
238    
239 zmatsuo 10310 /**
240     * �G����?
241     *
242     * @retval 0 �G������������
243     * @retval 1 �G����������
244     */
245 doda 8445 int UnicodeIsEmoji(unsigned long u32)
246     {
247     const static UnicodeTable_t EmojiList[] = {
248     #include "unicode_emoji.tbl"
249     };
250 zmatsuo 10310 const int index = SearchTableSimple(EmojiList, _countof(EmojiList), u32);
251     return index != -1 ? 1 : 0;
252 doda 8445 }
253    
254     /**
255     * �������Z���N�^���`�F�b�N����
256     *
257 zmatsuo 10310 * UnicodeIsCombiningCharacter() ���������`�F�b�N�����������g�p������������
258     *
259 doda 8445 * @retval 0 �������Z���N�^��������
260     * @retval 1 �������Z���N�^������
261     */
262 zmatsuo 10310 #if 0
263 doda 8445 int UnicodeIsVariationSelector(unsigned long u32)
264     {
265     if ((0x00180b <= u32 && u32 <= 0x00180d) || // FVS (Mongolian Free Variation Selector)
266     (0x00fe00 <= u32 && u32 <= 0x00fe0f) || // SVS VS1�`VS16
267     (0x0e0100 <= u32 && u32 <= 0x0e01ef)) // IVS VS17�`VS256
268     {
269     return 1;
270     }
271     return 0;
272     }
273 zmatsuo 10310 #endif
274 doda 8445
275 zmatsuo 10313 /**
276     * ���B���[�}?
277     *
278     * @retval 0 ���B���[�}��������
279     * @retval 1 ���B���[�}������
280     */
281     int UnicodeIsVirama(unsigned long u32)
282     {
283     const static UnicodeTable_t ViramaList[] = {
284     #include "unicode_virama.tbl"
285     };
286     const int index = SearchTableSimple(ViramaList, _countof(ViramaList), u32);
287     return index != -1 ? 1 : 0;
288     }
289 doda 8445
290 nmaya 10325 /**
291     * Unicode block �� index ������
292     *
293     * @retval -1 block ��������������
294     * @retval block �� index
295     */
296     int UnicodeBlockIndex(unsigned long u32)
297     {
298     return SearchTableBlock(UnicodeBlockList, _countof(UnicodeBlockList), u32);
299     }
300 zmatsuo 10313
301 zmatsuo 10330 const char *UnicodeBlockName(int index)
302 nmaya 10325 {
303     if (index == -1) {
304     return "";
305     }
306     return UnicodeBlockList[index].block_name;
307     }
308    
309 doda 8445 #if 0
310     int main(int, char *[])
311     {
312     static const unsigned long codes[] = {
313     #if 0
314     0, 1, 0x7f,
315     0x80,
316     0x0e00ff,
317     0x0e0100,
318     0x10fffd,
319     #endif
320     0x10fffe,
321     };
322    
323     for (size_t i = 0; i < _countof(codes); i++) {
324     unsigned long code = codes[i];
325     printf("U+%06lx %c\n", code, UnicodeGetWidthProperty(code));
326     }
327     return 0;
328     }
329     #endif
330 zmatsuo 8745
331     //
332     // Unicode Combining Character Support
333     //
334     #include "uni_combining.map"
335    
336 zmatsuo 9525 static unsigned short UnicodeGetPrecomposedChar(int start_index, unsigned short first_code, unsigned short code)
337 zmatsuo 8745 {
338     const combining_map_t *table = mapCombiningToPrecomposed;
339     int tmax = _countof(mapCombiningToPrecomposed);
340     unsigned short result = 0;
341     int i;
342    
343     for (i = start_index ; i < tmax ; i++) {
344     if (table[i].first_code != first_code) { // 1�������������������A���~���������������������B
345     break;
346     }
347    
348     if (table[i].second_code == code) {
349     result = table[i].precomposed;
350     break;
351     }
352     }
353    
354     return (result);
355     }
356    
357 zmatsuo 9525 static int UnicodeGetIndexOfCombiningFirstCode(unsigned short code)
358 zmatsuo 8745 {
359     const combining_map_t *table = mapCombiningToPrecomposed;
360     int tmax = _countof(mapCombiningToPrecomposed);
361     int low, mid, high;
362     int index = -1;
363    
364     low = 0;
365     high = tmax - 1;
366    
367     // binary search
368     while (low < high) {
369     mid = (low + high) / 2;
370     if (table[mid].first_code < code) {
371     low = mid + 1;
372     } else {
373     high = mid;
374     }
375     }
376    
377     if (table[low].first_code == code) {
378     while (low >= 0 && table[low].first_code == code) {
379     index = low;
380     low--;
381     }
382     }
383    
384     return (index);
385     }
386    
387     /**
388     * Unicode�������������s��
389     * @param[in] first_code
390     * @param[in] code
391     * @retval 0 ������������
392     * @retval ���O ��������Unicode
393 zmatsuo 9525 *
394     * ��
395     * first_code
396     * U+307B(��)
397     * code
398     * U+309A(�K)
399     * retval
400     * U+307D(��)
401 zmatsuo 8745 */
402     unsigned short UnicodeCombining(unsigned short first_code, unsigned short code)
403     {
404     int first_code_index = UnicodeGetIndexOfCombiningFirstCode(first_code);
405     if (first_code_index == -1) {
406     return 0;
407     }
408     unsigned short cset = UnicodeGetPrecomposedChar(first_code_index, first_code, code);
409     return cset;
410     }
411 zmatsuo 9597
412     typedef struct {
413     unsigned char code;
414     unsigned short unicode;
415     } ISO8859Table_t;
416    
417     /**
418     * ISO8859�e�[�u��
419     */
420     const ISO8859Table_t *GetISO8859Table(int iso8859_part)
421     {
422     static const ISO8859Table_t iso8859_2[] = {
423     #include "iso8859-2.tbl"
424     };
425     static const ISO8859Table_t iso8859_3[] = {
426     #include "iso8859-3.tbl"
427     };
428     static const ISO8859Table_t iso8859_4[] = {
429     #include "iso8859-4.tbl"
430     };
431     static const ISO8859Table_t iso8859_5[] = {
432     #include "iso8859-5.tbl"
433     };
434     static const ISO8859Table_t iso8859_6[] = {
435     #include "iso8859-6.tbl"
436     };
437     static const ISO8859Table_t iso8859_7[] = {
438     #include "iso8859-7.tbl"
439     };
440     static const ISO8859Table_t iso8859_8[] = {
441     #include "iso8859-8.tbl"
442     };
443     static const ISO8859Table_t iso8859_9[] = {
444     #include "iso8859-9.tbl"
445     };
446     static const ISO8859Table_t iso8859_10[] = {
447     #include "iso8859-10.tbl"
448     };
449     static const ISO8859Table_t iso8859_11[] = {
450     #include "iso8859-11.tbl"
451     };
452     static const ISO8859Table_t iso8859_13[] = {
453     #include "iso8859-13.tbl"
454     };
455     static const ISO8859Table_t iso8859_14[] = {
456     #include "iso8859-14.tbl"
457     };
458     static const ISO8859Table_t iso8859_15[] = {
459     #include "iso8859-15.tbl"
460     };
461     static const ISO8859Table_t iso8859_16[] = {
462     #include "iso8859-16.tbl"
463     };
464    
465     static const ISO8859Table_t *tables[] = {
466     NULL, // 0
467     NULL, // ISO8859-1
468     iso8859_2,
469     iso8859_3,
470     iso8859_4,
471     iso8859_5,
472     iso8859_6,
473     iso8859_7,
474     iso8859_8,
475     iso8859_9,
476     iso8859_10,
477     iso8859_11,
478     NULL,
479     iso8859_13,
480     iso8859_14,
481     iso8859_15,
482     iso8859_16,
483     };
484     if (iso8859_part >= _countof(tables)) {
485     assert(0);
486     return NULL;
487     }
488     assert(tables[iso8859_part] != NULL);
489     return tables[iso8859_part];
490     }
491    
492     /**
493     * ISO8859����Unicode������
494     */
495     int UnicodeFromISO8859(int part, unsigned char b, unsigned short *u16)
496     {
497     if (part == 1) {
498     // ISO8859-1 �� unicode ������
499     *u16 = b;
500     return 1;
501     }
502     const ISO8859Table_t *table = GetISO8859Table(part);
503     if (table == NULL) {
504     // ����������������
505     *u16 = 0;
506     return 0;
507     }
508     for (int i = 0; i < 0xff; i++ ){
509     if (table[i].code == b) {
510     *u16 = table[i].unicode;
511     return 1;
512     }
513     }
514     *u16 = 0;
515     return 0;
516     }
517    
518     /**
519     * Unicode����ISO8859������
520 zmatsuo 9988 *
521     * @param[in] part IS8859���� 1...11,13...16
522     * @param[in] u32 Unicode
523     * @param[out] *b ISO8859 char
524     * @retval 0 ������������
525     * @retval 1 ����������
526 zmatsuo 9597 */
527     int UnicodeToISO8859(int part, unsigned long u32, unsigned char *b)
528     {
529     if (part == 1) {
530     // ISO8859-1 �� unicode ������
531     *b = (unsigned char)u32;
532     return 1;
533     }
534     if (u32 >= 0x10000) {
535     // �������������������R�[�h
536     *b = 0;
537     return 0;
538     }
539     const unsigned short u16 = (unsigned short)u32;
540     const ISO8859Table_t *table = GetISO8859Table(part);
541     if (table == NULL) {
542     // ����������������
543     *b = 0;
544     return 0;
545     }
546     for (int i = 0; i < 0xff; i++ ){
547     if (table[i].unicode == u16) {
548     *b = table[i].code;
549     return 1;
550     }
551     }
552     *b = 0;
553     return 0;
554     }

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26