Develop and Download Open Source Software

Browse Subversion Repository

Contents of /trunk/teraterm/teraterm/unicode.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 10330 - (show annotations) (download) (as text)
Wed Oct 19 14:27:02 2022 UTC (19 months, 1 week ago) by zmatsuo
File MIME type: text/x-c++src
File size: 13242 byte(s)
コンパイラの警告対策

- unicode.cpp
  - 警告: ISO C++ forbids converting a string constant to ‘char*’
    - 書き換えできない文字列を書き換え可能文字列へ変換していたので修正
  - extern しない変数に static を追加
    - UnicodeBlockList[]
- buffer.c
  - 警告: 使用されない変数 ‘b’ です
1 /*
2 * Copyright (C) 2019- TeraTerm Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <assert.h>
32
33 #include "unicode.h"
34
35 /**
36 * East_Asian_Width �Q�l���� ����
37 *
38 * @retval 'F' Fullwidth �S�p
39 * @retval 'H' Halfwidth ���p
40 * @retval 'W' Wide �L
41 * @retval 'n' Na,Narrow ��
42 * @retval 'A' Ambiguous �B��
43 * �������������������������������B
44 * ���A�W�A���g�����������O���g�����������o�����A
45 * ���A�W�A���]�������R�[�h�������������S�p�������������������������B
46 * �M���V�A�������L�������������B
47 * @retval 'N' Neutral ����
48 * ���A�W�A���g�����������o�������A�S�p�������p���������B�A���r�A���������B
49 */
50 char UnicodeGetWidthProperty(unsigned long u32)
51 {
52 typedef struct {
53 unsigned long code_from;
54 unsigned long code_to;
55 char property;
56 } east_asian_width_map_t;
57 // �e�[�u���������������������� H
58 const static east_asian_width_map_t east_asian_width_map[] = {
59 #include "unicode_asian_width.tbl"
60 };
61 const east_asian_width_map_t *table = east_asian_width_map;
62 const size_t table_size = _countof(east_asian_width_map);
63 char result;
64
65 // �e�[�u���O�`�F�b�N
66 if (u32 < east_asian_width_map[0].code_from) {
67 return 'H';
68 }
69 if (east_asian_width_map[table_size-1].code_to < u32) {
70 return 'H';
71 }
72
73 // �e�[�u������
74 result = 'H';
75 size_t low = 0;
76 size_t high = table_size - 1;
77 while (low < high) {
78 size_t mid = (low + high) / 2;
79 if (table[mid].code_from <= u32 && u32 <= table[mid].code_to) {
80 result = table[mid].property;
81 break;
82 } else if (table[mid].code_to < u32) {
83 low = mid + 1;
84 } else {
85 high = mid;
86 }
87 }
88
89 return result;
90 }
91
92 typedef struct {
93 unsigned long code_from;
94 unsigned long code_to;
95 } UnicodeTable_t;
96
97 typedef struct {
98 unsigned long code_from;
99 unsigned long code_to;
100 unsigned char category;
101 } UnicodeTableCombine_t;
102
103 typedef struct {
104 unsigned long code_from;
105 unsigned long code_to;
106 const char *block_name;
107 } UnicodeTableBlock_t;
108
109 static const UnicodeTableBlock_t UnicodeBlockList[] = {
110 #include "unicode_block.tbl"
111 };
112
113 /**
114 * u32���e�[�u�����f�[�^����������������������
115 *
116 * @retval �e�[�u����index
117 * @retval -1 �e�[�u��������������
118 */
119 static int SearchTableSimple(
120 const UnicodeTable_t *table, size_t table_size,
121 unsigned long u32)
122 {
123 if (u32 < table[0].code_from) {
124 return -1;
125 }
126 if (u32 > table[table_size-1].code_to) {
127 return -1;
128 }
129 size_t low = 0;
130 size_t high = table_size - 1;
131 while (low <= high) {
132 size_t mid = (low + high) / 2;
133 if (table[mid].code_from <= u32 && u32 <= table[mid].code_to) {
134 return (int)mid;
135 } else if (table[mid].code_to < u32) {
136 low = mid + 1;
137 } else {
138 high = mid - 1;
139 }
140 }
141 // �e�[�u���������O
142 return -1;
143 }
144
145 /**
146 * SearchTableSimple() ������
147 * �e�[�u�����^��������
148 *
149 * @retval �e�[�u����index
150 * @retval -1 �e�[�u��������������
151 */
152 static int SearchTableCombine(
153 const UnicodeTableCombine_t *table, size_t table_size,
154 unsigned long u32)
155 {
156 if (u32 < table[0].code_from) {
157 return -1;
158 }
159 if (u32 > table[table_size-1].code_to) {
160 return -1;
161 }
162 size_t low = 0;
163 size_t high = table_size - 1;
164 while (low <= high) {
165 size_t mid = (low + high) / 2;
166 if (table[mid].code_from <= u32 && u32 <= table[mid].code_to) {
167 return (int)mid;
168 } else if (table[mid].code_to < u32) {
169 low = mid + 1;
170 } else {
171 high = mid - 1;
172 }
173 }
174 // �e�[�u���������O
175 return -1;
176 }
177
178 /**
179 * SearchTableSimple() ������
180 * �e�[�u�����^��������
181 *
182 * @retval �e�[�u����index
183 * @retval -1 �e�[�u��������������
184 */
185 static int SearchTableBlock(
186 const UnicodeTableBlock_t *table, size_t table_size,
187 unsigned long u32)
188 {
189 if (u32 < table[0].code_from) {
190 return -1;
191 }
192 if (u32 > table[table_size-1].code_to) {
193 return -1;
194 }
195 size_t low = 0;
196 size_t high = table_size - 1;
197 while (low <= high) {
198 size_t mid = (low + high) / 2;
199 if (table[mid].code_from <= u32 && u32 <= table[mid].code_to) {
200 return (int)mid;
201 } else if (table[mid].code_to < u32) {
202 low = mid + 1;
203 } else {
204 high = mid - 1;
205 }
206 }
207 // �e�[�u���������O
208 return -1;
209 }
210
211 /*
212 * ������������������
213 * ����������������������������
214 * EMOJI MODIFIER
215 * = Nonspacing Mark
216 * VARIATION SELECTOR (�������Z���N�^)
217 * = Nonspacing Mark
218 *
219 * @retval 0 ����������������
220 * @retval 1 ��������,Nonspacing Mark, �J�[�\��������������
221 * @retval 2 ��������,Spacing Mark, �J�[�\���� +1 ��������
222 */
223 int UnicodeIsCombiningCharacter(unsigned long u32)
224 {
225 #define Mn 1 // Nonspacing_Mark a nonspacing combining mark (zero advance width)
226 #define Mc 2 // Spacing_Mark a spacing combining mark (positive advance width)
227 #define Me 1 // Enclosing_Mark an enclosing combining mark
228 #define Sk 1 // Modifier_Symbol a non-letterlike modifier symbol
229 const static UnicodeTableCombine_t CombiningCharacterList[] = {
230 #include "unicode_combine.tbl"
231 };
232 const int index = SearchTableCombine(CombiningCharacterList, _countof(CombiningCharacterList), u32);
233 if (index == -1) {
234 return 0;
235 }
236 return (int)CombiningCharacterList[index].category;
237 }
238
239 /**
240 * �G����?
241 *
242 * @retval 0 �G������������
243 * @retval 1 �G����������
244 */
245 int UnicodeIsEmoji(unsigned long u32)
246 {
247 const static UnicodeTable_t EmojiList[] = {
248 #include "unicode_emoji.tbl"
249 };
250 const int index = SearchTableSimple(EmojiList, _countof(EmojiList), u32);
251 return index != -1 ? 1 : 0;
252 }
253
254 /**
255 * �������Z���N�^���`�F�b�N����
256 *
257 * UnicodeIsCombiningCharacter() ���������`�F�b�N�����������g�p������������
258 *
259 * @retval 0 �������Z���N�^��������
260 * @retval 1 �������Z���N�^������
261 */
262 #if 0
263 int UnicodeIsVariationSelector(unsigned long u32)
264 {
265 if ((0x00180b <= u32 && u32 <= 0x00180d) || // FVS (Mongolian Free Variation Selector)
266 (0x00fe00 <= u32 && u32 <= 0x00fe0f) || // SVS VS1�`VS16
267 (0x0e0100 <= u32 && u32 <= 0x0e01ef)) // IVS VS17�`VS256
268 {
269 return 1;
270 }
271 return 0;
272 }
273 #endif
274
275 /**
276 * ���B���[�}?
277 *
278 * @retval 0 ���B���[�}��������
279 * @retval 1 ���B���[�}������
280 */
281 int UnicodeIsVirama(unsigned long u32)
282 {
283 const static UnicodeTable_t ViramaList[] = {
284 #include "unicode_virama.tbl"
285 };
286 const int index = SearchTableSimple(ViramaList, _countof(ViramaList), u32);
287 return index != -1 ? 1 : 0;
288 }
289
290 /**
291 * Unicode block �� index ������
292 *
293 * @retval -1 block ��������������
294 * @retval block �� index
295 */
296 int UnicodeBlockIndex(unsigned long u32)
297 {
298 return SearchTableBlock(UnicodeBlockList, _countof(UnicodeBlockList), u32);
299 }
300
301 const char *UnicodeBlockName(int index)
302 {
303 if (index == -1) {
304 return "";
305 }
306 return UnicodeBlockList[index].block_name;
307 }
308
309 #if 0
310 int main(int, char *[])
311 {
312 static const unsigned long codes[] = {
313 #if 0
314 0, 1, 0x7f,
315 0x80,
316 0x0e00ff,
317 0x0e0100,
318 0x10fffd,
319 #endif
320 0x10fffe,
321 };
322
323 for (size_t i = 0; i < _countof(codes); i++) {
324 unsigned long code = codes[i];
325 printf("U+%06lx %c\n", code, UnicodeGetWidthProperty(code));
326 }
327 return 0;
328 }
329 #endif
330
331 //
332 // Unicode Combining Character Support
333 //
334 #include "uni_combining.map"
335
336 static unsigned short UnicodeGetPrecomposedChar(int start_index, unsigned short first_code, unsigned short code)
337 {
338 const combining_map_t *table = mapCombiningToPrecomposed;
339 int tmax = _countof(mapCombiningToPrecomposed);
340 unsigned short result = 0;
341 int i;
342
343 for (i = start_index ; i < tmax ; i++) {
344 if (table[i].first_code != first_code) { // 1�������������������A���~���������������������B
345 break;
346 }
347
348 if (table[i].second_code == code) {
349 result = table[i].precomposed;
350 break;
351 }
352 }
353
354 return (result);
355 }
356
357 static int UnicodeGetIndexOfCombiningFirstCode(unsigned short code)
358 {
359 const combining_map_t *table = mapCombiningToPrecomposed;
360 int tmax = _countof(mapCombiningToPrecomposed);
361 int low, mid, high;
362 int index = -1;
363
364 low = 0;
365 high = tmax - 1;
366
367 // binary search
368 while (low < high) {
369 mid = (low + high) / 2;
370 if (table[mid].first_code < code) {
371 low = mid + 1;
372 } else {
373 high = mid;
374 }
375 }
376
377 if (table[low].first_code == code) {
378 while (low >= 0 && table[low].first_code == code) {
379 index = low;
380 low--;
381 }
382 }
383
384 return (index);
385 }
386
387 /**
388 * Unicode�������������s��
389 * @param[in] first_code
390 * @param[in] code
391 * @retval 0 ������������
392 * @retval ���O ��������Unicode
393 *
394 * ��
395 * first_code
396 * U+307B(��)
397 * code
398 * U+309A(�K)
399 * retval
400 * U+307D(��)
401 */
402 unsigned short UnicodeCombining(unsigned short first_code, unsigned short code)
403 {
404 int first_code_index = UnicodeGetIndexOfCombiningFirstCode(first_code);
405 if (first_code_index == -1) {
406 return 0;
407 }
408 unsigned short cset = UnicodeGetPrecomposedChar(first_code_index, first_code, code);
409 return cset;
410 }
411
412 typedef struct {
413 unsigned char code;
414 unsigned short unicode;
415 } ISO8859Table_t;
416
417 /**
418 * ISO8859�e�[�u��
419 */
420 const ISO8859Table_t *GetISO8859Table(int iso8859_part)
421 {
422 static const ISO8859Table_t iso8859_2[] = {
423 #include "iso8859-2.tbl"
424 };
425 static const ISO8859Table_t iso8859_3[] = {
426 #include "iso8859-3.tbl"
427 };
428 static const ISO8859Table_t iso8859_4[] = {
429 #include "iso8859-4.tbl"
430 };
431 static const ISO8859Table_t iso8859_5[] = {
432 #include "iso8859-5.tbl"
433 };
434 static const ISO8859Table_t iso8859_6[] = {
435 #include "iso8859-6.tbl"
436 };
437 static const ISO8859Table_t iso8859_7[] = {
438 #include "iso8859-7.tbl"
439 };
440 static const ISO8859Table_t iso8859_8[] = {
441 #include "iso8859-8.tbl"
442 };
443 static const ISO8859Table_t iso8859_9[] = {
444 #include "iso8859-9.tbl"
445 };
446 static const ISO8859Table_t iso8859_10[] = {
447 #include "iso8859-10.tbl"
448 };
449 static const ISO8859Table_t iso8859_11[] = {
450 #include "iso8859-11.tbl"
451 };
452 static const ISO8859Table_t iso8859_13[] = {
453 #include "iso8859-13.tbl"
454 };
455 static const ISO8859Table_t iso8859_14[] = {
456 #include "iso8859-14.tbl"
457 };
458 static const ISO8859Table_t iso8859_15[] = {
459 #include "iso8859-15.tbl"
460 };
461 static const ISO8859Table_t iso8859_16[] = {
462 #include "iso8859-16.tbl"
463 };
464
465 static const ISO8859Table_t *tables[] = {
466 NULL, // 0
467 NULL, // ISO8859-1
468 iso8859_2,
469 iso8859_3,
470 iso8859_4,
471 iso8859_5,
472 iso8859_6,
473 iso8859_7,
474 iso8859_8,
475 iso8859_9,
476 iso8859_10,
477 iso8859_11,
478 NULL,
479 iso8859_13,
480 iso8859_14,
481 iso8859_15,
482 iso8859_16,
483 };
484 if (iso8859_part >= _countof(tables)) {
485 assert(0);
486 return NULL;
487 }
488 assert(tables[iso8859_part] != NULL);
489 return tables[iso8859_part];
490 }
491
492 /**
493 * ISO8859����Unicode������
494 */
495 int UnicodeFromISO8859(int part, unsigned char b, unsigned short *u16)
496 {
497 if (part == 1) {
498 // ISO8859-1 �� unicode ������
499 *u16 = b;
500 return 1;
501 }
502 const ISO8859Table_t *table = GetISO8859Table(part);
503 if (table == NULL) {
504 // ����������������
505 *u16 = 0;
506 return 0;
507 }
508 for (int i = 0; i < 0xff; i++ ){
509 if (table[i].code == b) {
510 *u16 = table[i].unicode;
511 return 1;
512 }
513 }
514 *u16 = 0;
515 return 0;
516 }
517
518 /**
519 * Unicode����ISO8859������
520 *
521 * @param[in] part IS8859���� 1...11,13...16
522 * @param[in] u32 Unicode
523 * @param[out] *b ISO8859 char
524 * @retval 0 ������������
525 * @retval 1 ����������
526 */
527 int UnicodeToISO8859(int part, unsigned long u32, unsigned char *b)
528 {
529 if (part == 1) {
530 // ISO8859-1 �� unicode ������
531 *b = (unsigned char)u32;
532 return 1;
533 }
534 if (u32 >= 0x10000) {
535 // �������������������R�[�h
536 *b = 0;
537 return 0;
538 }
539 const unsigned short u16 = (unsigned short)u32;
540 const ISO8859Table_t *table = GetISO8859Table(part);
541 if (table == NULL) {
542 // ����������������
543 *b = 0;
544 return 0;
545 }
546 for (int i = 0; i < 0xff; i++ ){
547 if (table[i].unicode == u16) {
548 *b = table[i].code;
549 return 1;
550 }
551 }
552 *b = 0;
553 return 0;
554 }

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26