Develop and Download Open Source Software

Browse Subversion Repository

Contents of /trunk/teraterm/teraterm/charset.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 10889 - (show annotations) (download) (as text)
Tue Sep 5 15:35:48 2023 UTC (7 months ago) by zmatsuo
File MIME type: text/x-c++src
File size: 23829 byte(s)
DEC Speical Graphics の誤り修正、置き換え文字の追加

- DEC Speical Graphics の範囲 0x5f - 0x7e
  - 修正前 0x60 - 0x7f
  - 0x5f, 0x7f は表示しないため問題なかったと思われる
    - 0x5f は NBSP(スペース)
    - 0x7f は DEL
- Unicode -> DEC Speical Graphics 対応文字を追加した
  - unisym2decsp.map にデータを追加
- Unicode -> DEC Speical Graphics テスト用スクリプト追加
  - tests/various_code_texts/dec_special_unicode.pl
1 /*
2 * (C) 2023- TeraTerm Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include "teraterm.h"
30 #include "tttypes.h"
31 #include <stdio.h>
32 #include <string.h>
33 #if !defined(_CRTDBG_MAP_ALLOC)
34 #define _CRTDBG_MAP_ALLOC
35 #endif
36 #include <stdlib.h>
37 #include <crtdbg.h>
38 #include <assert.h>
39
40 #include "ttwinman.h"
41 #include "codeconv.h"
42 #include "unicode.h"
43 #include "language.h" // for JIS2SJIS()
44 #include "ttcstd.h"
45 #include "vtterm.h"
46
47 #include "charset.h"
48
49 // UTF-8���s�����l�����������\����������
50 #define REPLACEMENT_CHARACTER 0xfffd // REPLACEMENT CHARACTER
51 //#define REPLACEMENT_CHARACTER 0x2e2e // Reversed Question Mark (VT382)
52
53 typedef struct CharSetDataTag {
54 /* GL, GR code group */
55 int Glr[2];
56 /* G0, G1, G2, G3 code group */
57 int Gn[4];
58 /* GL for single shift 2/3 */
59 int GLtmp;
60 /* single shift 2/3 flag */
61 BOOL SSflag;
62 //
63 char32_t replacement_char;
64 // UTF-8 work
65 BYTE buf[4];
66 int count;
67 BOOL Fallbacked;
68
69 // MBCS
70 BOOL KanjiIn; // TRUE = MBCS��1byte�������M��������
71 WORD Kanji;
72
73 // EUC
74 BOOL EUCkanaIn;
75 BOOL EUCsupIn;
76 int EUCcount;
77
78 /* JIS -> SJIS conversion flag */
79 BOOL ConvJIS;
80 BYTE DebugFlag;
81
82 // Operations
83 CharSetOp Op;
84 void *ClientData;
85 } CharSetData;
86
87 static BOOL IsC0(char32_t b)
88 {
89 return (b <= US);
90 }
91
92 static BOOL IsC1(char32_t b)
93 {
94 return ((b>=0x80) && (b<=0x9F));
95 }
96
97 /**
98 * ISO2022�p���[�N������������
99 */
100 static void CharSetInit2(CharSetData *w)
101 {
102 if (ts.Language==IdJapanese) {
103 w->Gn[0] = IdASCII;
104 w->Gn[1] = IdKatakana;
105 w->Gn[2] = IdKatakana;
106 w->Gn[3] = IdKanji;
107 w->Glr[0] = 0;
108 if ((ts.KanjiCode==IdJIS) && (ts.JIS7Katakana==0))
109 w->Glr[1] = 2; // 8-bit katakana
110 else
111 w->Glr[1] = 3;
112 }
113 else {
114 w->Gn[0] = IdASCII;
115 w->Gn[1] = IdSpecial;
116 w->Gn[2] = IdASCII;
117 w->Gn[3] = IdASCII;
118 w->Glr[0] = 0;
119 w->Glr[1] = 0;
120 }
121 }
122
123 /**
124 * �������A���[�N������������
125 */
126 CharSetData *CharSetInit(const CharSetOp *op, void *client_data)
127 {
128 CharSetData *w = (CharSetData *)calloc(sizeof(*w), 1);
129 if (w == NULL) {
130 return NULL;
131 }
132
133 w->Op = *op;
134 w->ClientData = client_data;
135
136 CharSetInit2(w);
137 w->GLtmp = 0;
138 w->SSflag = FALSE;
139
140 w->DebugFlag = DEBUG_FLAG_NONE;
141
142 w->replacement_char = REPLACEMENT_CHARACTER;
143 w->SSflag = FALSE;
144
145 w->KanjiIn = FALSE;
146 w->EUCkanaIn = FALSE;
147 w->EUCsupIn = FALSE;
148 w->ConvJIS = FALSE;
149 w->Fallbacked = FALSE;
150
151 return w;
152 }
153
154 void CharSetFinish(CharSetData *w)
155 {
156 assert(w != NULL);
157 free(w);
158 }
159
160 /**
161 * 1byte���`�F�b�N
162 */
163 static BOOL CheckFirstByte(BYTE b, int lang, int kanji_code)
164 {
165 switch (lang) {
166 case IdKorean:
167 return __ismbblead(b, 949);
168 case IdChinese:
169 if (kanji_code == IdCnGB2312) {
170 return __ismbblead(b, 936);
171 }
172 else if (ts.KanjiCode == IdCnBig5) {
173 return __ismbblead(b, 950);
174 }
175 break;
176 default:
177 assert(FALSE);
178 break;
179 }
180 assert(FALSE);
181 return FALSE;
182 }
183
184 /**
185 * Double-byte Character Sets
186 * SJIS��1byte��?
187 *
188 * ��1�o�C�g0x81...0x9F or 0xE0...0xEF
189 * ��1�o�C�g0x81...0x9F or 0xE0...0xFC
190 */
191 static BOOL ismbbleadSJIS(BYTE b)
192 {
193 if (((0x80<b) && (b<0xa0)) || ((0xdf<b) && (b<0xfd))) {
194 return TRUE;
195 }
196 return FALSE;
197 }
198
199 /**
200 * ts.Language == IdJapanese ��
201 * 1byte���`�F�b�N
202 */
203 static BOOL CheckKanji(CharSetData *w, BYTE b)
204 {
205 BOOL Check;
206
207 if (ts.Language!=IdJapanese)
208 return FALSE;
209
210 w->ConvJIS = FALSE;
211
212 if (ts.KanjiCode==IdSJIS ||
213 (ts.FallbackToCP932 && ts.KanjiCode==IdUTF8)) {
214 if (((0x80<b) && (b<0xa0)) || ((0xdf<b) && (b<0xfd))) {
215 w->Fallbacked = TRUE;
216 return TRUE; // SJIS kanji
217 }
218 if ((0xa1<=b) && (b<=0xdf)) {
219 return FALSE; // SJIS katakana
220 }
221 }
222
223 if ((b>=0x21) && (b<=0x7e)) {
224 Check = (w->Gn[w->Glr[0]] == IdKanji);
225 w->ConvJIS = Check;
226 }
227 else if ((b>=0xA1) && (b<=0xFE)) {
228 Check = (w->Gn[w->Glr[1]] == IdKanji);
229 if (ts.KanjiCode==IdEUC) {
230 Check = TRUE;
231 }
232 else if (ts.KanjiCode==IdJIS && ((ts.TermFlag & TF_FIXEDJIS)!=0) && (ts.JIS7Katakana==0)) {
233 Check = FALSE; // 8-bit katakana
234 }
235 w->ConvJIS = Check;
236 }
237 else {
238 Check = FALSE;
239 }
240
241 return Check;
242 }
243
244 static BOOL ParseFirstJP(CharSetData *w, BYTE b)
245 // returns TRUE if b is processed
246 // (actually allways returns TRUE)
247 {
248 if (w->KanjiIn) {
249 if (((! w->ConvJIS) && (0x3F<b) && (b<0xFD)) ||
250 (w->ConvJIS && ( ((0x20<b) && (b<0x7f)) ||
251 ((0xa0<b) && (b<0xff)) )) )
252 {
253 unsigned long u32;
254 w->Kanji = w->Kanji + b;
255 if (w->ConvJIS) {
256 // JIS -> Shift_JIS(CP932)
257 w->Kanji = JIS2SJIS((WORD)(w->Kanji & 0x7f7f));
258 }
259 u32 = CP932ToUTF32(w->Kanji);
260 w->Op.PutU32(u32, w->ClientData);
261 w->KanjiIn = FALSE;
262 return TRUE;
263 }
264 else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
265 w->KanjiIn = FALSE;
266 }
267 }
268
269 if (w->SSflag) {
270 if (w->Gn[w->GLtmp] == IdKanji) {
271 w->Kanji = b << 8;
272 w->KanjiIn = TRUE;
273 w->SSflag = FALSE;
274 return TRUE;
275 }
276 else if (w->Gn[w->GLtmp] == IdKatakana) {
277 b = b | 0x80;
278 }
279
280 w->Op.PutU32(b, w->ClientData);
281 w->SSflag = FALSE;
282 return TRUE;
283 }
284
285 if ((!w->EUCsupIn) && (!w->EUCkanaIn) && (!w->KanjiIn) && CheckKanji(w, b)) {
286 w->Kanji = b << 8;
287 w->KanjiIn = TRUE;
288 return TRUE;
289 }
290
291 if (b<=US) {
292 w->Op.ParseControl(b, w->ClientData);
293 }
294 else if (b==0x20) {
295 w->Op.PutU32(b, w->ClientData);
296 }
297 else if ((b>=0x21) && (b<=0x7E)) {
298 if (w->EUCsupIn) {
299 w->EUCcount--;
300 w->EUCsupIn = (w->EUCcount==0);
301 return TRUE;
302 }
303
304 if ((w->Gn[w->Glr[0]] == IdKatakana) || w->EUCkanaIn) {
305 b = b | 0x80;
306 w->EUCkanaIn = FALSE;
307 {
308 // b��sjis�����p�J�^�J�i
309 unsigned long u32 = CP932ToUTF32(b);
310 w->Op.PutU32(u32, w->ClientData);
311 }
312 return TRUE;
313 }
314 w->Op.PutU32(b, w->ClientData);
315 }
316 else if (b==0x7f) {
317 return TRUE;
318 }
319 else if ((b>=0x80) && (b<=0x8D)) {
320 w->Op.ParseControl(b, w->ClientData);
321 }
322 else if (b==0x8E) { // SS2
323 switch (ts.KanjiCode) {
324 case IdEUC:
325 if (ts.ISO2022Flag & ISO2022_SS2) {
326 w->EUCkanaIn = TRUE;
327 }
328 break;
329 case IdUTF8:
330 w->Op.PutU32(w->replacement_char, w->ClientData);
331 break;
332 default:
333 w->Op.ParseControl(b, w->ClientData);
334 }
335 }
336 else if (b==0x8F) { // SS3
337 switch (ts.KanjiCode) {
338 case IdEUC:
339 if (ts.ISO2022Flag & ISO2022_SS3) {
340 w->EUCcount = 2;
341 w->EUCsupIn = TRUE;
342 }
343 break;
344 case IdUTF8:
345 w->Op.PutU32(w->replacement_char, w->ClientData);
346 break;
347 default:
348 w->Op.ParseControl(b, w->ClientData);
349 }
350 }
351 else if ((b>=0x90) && (b<=0x9F)) {
352 w->Op.ParseControl(b, w->ClientData);
353 }
354 else if (b==0xA0) {
355 w->Op.PutU32(0x20, w->ClientData);
356 }
357 else if ((b>=0xA1) && (b<=0xFE)) {
358 if (w->EUCsupIn) {
359 w->EUCcount--;
360 w->EUCsupIn = (w->EUCcount==0);
361 return TRUE;
362 }
363
364 if ((w->Gn[w->Glr[1]] != IdASCII) ||
365 ((ts.KanjiCode==IdEUC) && w->EUCkanaIn) ||
366 (ts.KanjiCode==IdSJIS) ||
367 ((ts.KanjiCode==IdJIS) &&
368 (ts.JIS7Katakana==0) &&
369 ((ts.TermFlag & TF_FIXEDJIS)!=0))) {
370 // b��sjis�����p�J�^�J�i
371 unsigned long u32 = CP932ToUTF32(b);
372 w->Op.PutU32(u32, w->ClientData);
373 } else {
374 if (w->Gn[w->Glr[1]] == IdASCII) {
375 b = b & 0x7f;
376 }
377 w->Op.PutU32(b, w->ClientData);
378 }
379 w->EUCkanaIn = FALSE;
380 }
381 else {
382 w->Op.PutU32(b, w->ClientData);
383 }
384
385 return TRUE;
386 }
387
388 static BOOL ParseFirstKR(CharSetData *w, BYTE b)
389 // returns TRUE if b is processed
390 // (actually allways returns TRUE)
391 {
392 if (w->KanjiIn) {
393 if (((0x41<=b) && (b<=0x5A)) ||
394 ((0x61<=b) && (b<=0x7A)) ||
395 ((0x81<=b) && (b<=0xFE)))
396 {
397 unsigned long u32 = 0;
398 if (ts.KanjiCode == IdKoreanCP949) {
399 // CP949
400 w->Kanji = w->Kanji + b;
401 u32 = MBCP_UTF32(w->Kanji, 949);
402 }
403 else {
404 assert(FALSE);
405 }
406 w->Op.PutU32(u32, w->ClientData);
407 w->KanjiIn = FALSE;
408 return TRUE;
409 }
410 else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
411 w->KanjiIn = FALSE;
412 }
413 }
414
415 if ((!w->KanjiIn) && CheckFirstByte(b, ts.Language, ts.KanjiCode)) {
416 w->Kanji = b << 8;
417 w->KanjiIn = TRUE;
418 return TRUE;
419 }
420
421 if (b<=US) {
422 w->Op.ParseControl(b, w->ClientData);
423 }
424 else if (b==0x20) {
425 w->Op.PutU32(b, w->ClientData);
426 }
427 else if ((b>=0x21) && (b<=0x7E)) {
428 // if (Gn[Glr[0]] == IdKatakana) {
429 // b = b | 0x80;
430 // }
431 w->Op.PutU32(b, w->ClientData);
432 }
433 else if (b==0x7f) {
434 return TRUE;
435 }
436 else if ((0x80<=b) && (b<=0x9F)) {
437 w->Op.ParseControl(b, w->ClientData);
438 }
439 else if (b==0xA0) {
440 w->Op.PutU32(0x20, w->ClientData);
441 }
442 else if ((b>=0xA1) && (b<=0xFE)) {
443 if (w->Gn[w->Glr[1]] == IdASCII) {
444 b = b & 0x7f;
445 }
446 w->Op.PutU32(b, w->ClientData);
447 }
448 else {
449 w->Op.PutU32(b, w->ClientData);
450 }
451
452 return TRUE;
453 }
454
455 static BOOL ParseFirstCn(CharSetData *w, BYTE b)
456 // returns TRUE if b is processed
457 // (actually allways returns TRUE)
458 {
459 if (w->KanjiIn) {
460 // TODO
461 if (((0x40<=b) && (b<=0x7e)) ||
462 ((0xa1<=b) && (b<=0xFE)))
463 {
464 unsigned long u32 = 0;
465 w->Kanji = w->Kanji + b;
466 if (ts.KanjiCode == IdCnGB2312) {
467 // CP936 GB2312
468 u32 = MBCP_UTF32(w->Kanji, 936);
469 }
470 else if (ts.KanjiCode == IdCnBig5) {
471 // CP950 Big5
472 u32 = MBCP_UTF32(w->Kanji, 950);
473 }
474 else {
475 assert(FALSE);
476 }
477 w->Op.PutU32(u32, w->ClientData);
478 w->KanjiIn = FALSE;
479 return TRUE;
480 }
481 else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
482 w->KanjiIn = FALSE;
483 }
484 }
485
486 if ((!w->KanjiIn) && CheckFirstByte(b, ts.Language, ts.KanjiCode)) {
487 w->Kanji = b << 8;
488 w->KanjiIn = TRUE;
489 return TRUE;
490 }
491
492 if (b<=US) {
493 w->Op.ParseControl(b, w->ClientData);
494 }
495 else if (b==0x20) {
496 w->Op.PutU32(b, w->ClientData);
497 }
498 else if ((b>=0x21) && (b<=0x7E)) {
499 // if (Gn[Glr[0]] == IdKatakana) {
500 // b = b | 0x80;
501 // }
502 w->Op.PutU32(b, w->ClientData);
503 }
504 else if (b==0x7f) {
505 return TRUE;
506 }
507 else if ((0x80<=b) && (b<=0x9F)) {
508 w->Op.ParseControl(b, w->ClientData);
509 }
510 else if (b==0xA0) {
511 w->Op.PutU32(0x20, w->ClientData);
512 }
513 else if ((b>=0xA1) && (b<=0xFE)) {
514 if (w->Gn[w->Glr[1]] == IdASCII) {
515 b = b & 0x7f;
516 }
517 w->Op.PutU32(b, w->ClientData);
518 }
519 else {
520 w->Op.PutU32(b, w->ClientData);
521 }
522
523 return TRUE;
524 }
525
526 static void ParseASCII(CharSetData *w, BYTE b)
527 {
528 if (w->SSflag) {
529 w->Op.PutU32(b, w->ClientData);
530 w->SSflag = FALSE;
531 return;
532 }
533
534 if (b<=US) {
535 w->Op.ParseControl(b, w->ClientData);
536 } else if ((b>=0x20) && (b<=0x7E)) {
537 w->Op.PutU32(b, w->ClientData);
538 } else if ((b==0x8E) || (b==0x8F)) {
539 w->Op.PutU32(w->replacement_char, w->ClientData);
540 } else if ((b>=0x80) && (b<=0x9F)) {
541 w->Op.ParseControl(b, w->ClientData);
542 } else if (b>=0xA0) {
543 w->Op.PutU32(b, w->ClientData);
544 }
545 }
546
547 /**
548 * REPLACEMENT_CHARACTER ���\��
549 * UTF-8 �f�R�[�h�����g�p
550 */
551 static void PutReplacementChr(CharSetData *w, const BYTE *ptr, size_t len, BOOL fallback)
552 {
553 const char32_t replacement_char = w->replacement_char;
554 int i;
555 for (i = 0; i < len; i++) {
556 BYTE c = *ptr++;
557 assert(!IsC0(c));
558 if (fallback) {
559 // fallback ISO8859-1
560 w->Op.PutU32(c, w->ClientData);
561 }
562 else {
563 // fallback������
564 if (c < 0x80) {
565 // �s����UTF-8��������������0x80�������������A
566 // 1������UTF-8�������������������\������
567 w->Op.PutU32(c, w->ClientData);
568 }
569 else {
570 w->Op.PutU32(replacement_char, w->ClientData);
571 }
572 }
573 }
574 }
575
576 /**
577 * UTF-8�����M�f�[�^����������
578 *
579 * returns TRUE if b is processed
580 */
581 static BOOL ParseFirstUTF8(CharSetData *w, BYTE b)
582 {
583 char32_t code;
584
585 if (w->Fallbacked) {
586 BOOL r = ParseFirstJP(w, b);
587 w->Fallbacked = FALSE;
588 return r;
589 }
590
591 // UTF-8�G���R�[�h
592 // The Unicode Standard Chapter 3
593 // Table 3-7. Well-Formed UTF-8 Byte Sequences
594 // | Code Points | 1st Byte | 2nd Byte | 3rd Byte | 4th Byte |
595 // | U+0000..U+007F | 00..7F | | | |
596 // | U+0080..U+07FF | C2..DF | 80..BF | | |
597 // | U+0800..U+0FFF | E0 | A0..BF | 80..BF | |
598 // | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | |
599 // | U+D000..U+D7FF | ED | 80..9F | 80..BF | |
600 // | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | |
601 // | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF |
602 // | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF |
603 // | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF |
604 // - 1byte��
605 // - 0x00 - 0x7f ok
606 // - 0x80 - 0xc1 ng
607 // - 0xc2 - 0xf4 ok
608 // - 0xf5 - 0xff ng
609 // - 2byte�����~
610 // - 0x00 - 0x7f ng
611 // - 0x80 - 0xbf ok
612 // - 0xc0 - 0xff ng
613 // - 2byte�����O
614 // - 1byte == 0xe0 ������ 0xa0 - 0xbf����ok
615 // - 1byte == 0xed ������ 0x80 - 0x9f����ok
616 // - 1byte == 0xf0 ������ 0x90 - 0xbf����ok
617 // - 1byte == 0xf4 ������ 0x90 - 0x8f����ok
618 recheck:
619 // 1byte(7bit)
620 if (w->count == 0) {
621 if (IsC0(b)) {
622 // U+0000 .. U+001f
623 // C0��������, C0 Coontrols
624 w->Op.ParseControl(b, w->ClientData);
625 return TRUE;
626 }
627 else if (b <= 0x7f) {
628 // 0x7f����, �������A���������o��
629 w->Op.PutU32(b, w->ClientData);
630 return TRUE;
631 }
632 else if (0xc2 <= b && b <= 0xf4) {
633 // 1byte������
634 w->buf[w->count++] = b;
635 return TRUE;
636 }
637
638 // 0x80 - 0xc1, 0xf5 - 0xff
639 // UTF-8��1byte���o���������R�[�h������
640 if (ts.FallbackToCP932) {
641 // fallback��������
642 if ((ts.Language == IdJapanese) && ismbbleadSJIS(b)) {
643 // ���{�������� && Shift_JIS 1byte��
644 // Shift_JIS �� fallback
645 w->Fallbacked = TRUE;
646 w->ConvJIS = FALSE;
647 w->Kanji = b << 8;
648 w->KanjiIn = TRUE;
649 return TRUE;
650 }
651 }
652 // fallback������, �s������������
653 w->buf[0] = b;
654 PutReplacementChr(w, w->buf, 1, FALSE);
655 return TRUE;
656 }
657
658 // 2byte���~����?
659 if((b & 0xc0) != 0x80) { // == (b <= 0x7f || 0xc0 <= b)
660 // �s��������, (����2bit�� 0b10xx_xxxx ��������)
661 PutReplacementChr(w, w->buf, w->count, ts.FallbackToCP932);
662 w->count = 0;
663 goto recheck;
664 }
665
666 // 2byte�����~����
667 w->buf[w->count++] = b;
668
669 // 2byte(11bit)
670 if (w->count == 2) {
671 if ((w->buf[0] & 0xe0) == 0xc0) { // == (0xc2 <= w->buf[0] && w->buf[0] <= 0xdf)
672 // 5bit + 6bit
673 code = ((w->buf[0] & 0x1f) << 6) | (b & 0x3f);
674 if (IsC1(code)) {
675 // U+0080 .. u+009f
676 // C1��������, C1 Controls
677 w->Op.ParseControl((BYTE)code, w->ClientData);
678 }
679 else {
680 w->Op.PutU32(code, w->ClientData);
681 }
682 w->count = 0;
683 return TRUE;
684 }
685 return TRUE;
686 }
687
688 // 3byte(16bit)
689 if (w->count == 3) {
690 if ((w->buf[0] & 0xf0) == 0xe0) {
691 if ((w->buf[0] == 0xe0 && (w->buf[1] < 0xa0 || 0xbf < w->buf[1])) ||
692 (w->buf[0] == 0xed && ( 0x9f < w->buf[1]))) {
693 // �s���� UTF-8
694 PutReplacementChr(w, w->buf, 2, ts.FallbackToCP932);
695 w->count = 0;
696 goto recheck;
697 }
698 // 4bit + 6bit + 6bit
699 code = ((w->buf[0] & 0xf) << 12);
700 code |= ((w->buf[1] & 0x3f) << 6);
701 code |= ((w->buf[2] & 0x3f));
702 w->Op.PutU32(code, w->ClientData);
703 w->count = 0;
704 return TRUE;
705 }
706 return TRUE;
707 }
708
709 // 4byte(21bit)
710 assert(w->count == 4);
711 assert((w->buf[0] & 0xf8) == 0xf0);
712 if ((w->buf[0] == 0xf0 && (w->buf[1] < 0x90 || 0x9f < w->buf[1])) ||
713 (w->buf[0] == 0xf4 && (w->buf[1] < 0x80 || 0x8f < w->buf[1]))) {
714 // �s���� UTF-8
715 PutReplacementChr(w, w->buf, 3, ts.FallbackToCP932);
716 w->count = 0;
717 goto recheck;
718 }
719 // 3bit + 6bit + 6bit + 6bit
720 code = ((w->buf[0] & 0x07) << 18);
721 code |= ((w->buf[1] & 0x3f) << 12);
722 code |= ((w->buf[2] & 0x3f) << 6);
723 code |= (w->buf[3] & 0x3f);
724 w->Op.PutU32(code, w->ClientData);
725 w->count = 0;
726 return TRUE;
727 }
728
729 static BOOL ParseFirstRus(CharSetData *w, BYTE b)
730 // returns if b is processed
731 {
732 if (IsC0(b)) {
733 w->Op.ParseControl(b, w->ClientData);
734 return TRUE;
735 }
736 // CP1251������
737 BYTE c = RussConv(ts.KanjiCode, IdWindows, b);
738 // CP1251->Unicode
739 unsigned long u32 = MBCP_UTF32(c, 1251);
740 w->Op.PutU32(u32, w->ClientData);
741 return TRUE;
742 }
743
744 static BOOL ParseEnglish(CharSetData *w, BYTE b)
745 {
746 unsigned short u16 = 0;
747 int part = KanjiCodeToISO8859Part(ts.KanjiCode);
748 int r = UnicodeFromISO8859(part, b, &u16);
749 if (r == 0) {
750 return FALSE;
751 }
752 if (u16 < 0x100) {
753 ParseASCII(w, (BYTE)u16);
754 }
755 else {
756 w->Op.PutU32(u16, w->ClientData);
757 }
758 return TRUE;
759 }
760
761 static void PutDebugChar(CharSetData *w, BYTE b)
762 {
763 int i;
764 BOOL svInsertMode, svAutoWrapMode;
765 TCharAttr svCharAttr;
766 TCharAttr char_attr;
767
768 svInsertMode = TermGetInsertMode();
769 TermSetInsertMode(FALSE);
770 svAutoWrapMode = TermGetAutoWrapMode();
771 TermSetAutoWrapMode(TRUE);
772
773 TermGetAttr(&svCharAttr);
774 char_attr = svCharAttr;
775 char_attr.Attr = AttrDefault;
776 TermSetAttr(&char_attr);
777
778 if (w->DebugFlag==DEBUG_FLAG_HEXD) {
779 char buff[3];
780 _snprintf(buff, 3, "%02X", (unsigned int) b);
781
782 for (i=0; i<2; i++)
783 w->Op.PutU32(buff[i], w->ClientData);
784 w->Op.PutU32(' ', w->ClientData);
785 }
786 else if (w->DebugFlag==DEBUG_FLAG_NORM) {
787
788 if ((b & 0x80) == 0x80) {
789 //UpdateStr();
790 char_attr.Attr = AttrReverse;
791 TermSetAttr(&char_attr);
792 b = b & 0x7f;
793 }
794
795 if (b<=US) {
796 w->Op.PutU32('^', w->ClientData);
797 w->Op.PutU32((char)(b + 0x40), w->ClientData);
798 }
799 else if (b==DEL) {
800 w->Op.PutU32('<', w->ClientData);
801 w->Op.PutU32('D', w->ClientData);
802 w->Op.PutU32('E', w->ClientData);
803 w->Op.PutU32('L', w->ClientData);
804 w->Op.PutU32('>', w->ClientData);
805 }
806 else
807 w->Op.PutU32(b, w->ClientData);
808 }
809
810 TermSetAttr(&char_attr);
811 TermSetInsertMode(svInsertMode);
812 TermSetAutoWrapMode(svAutoWrapMode);
813 }
814
815 void ParseFirst(CharSetData *w, BYTE b)
816 {
817 WORD language = ts.Language;
818 if (w->DebugFlag != DEBUG_FLAG_NONE) {
819 language = IdDebug;
820 }
821
822 switch (language) {
823 default:
824 assert(FALSE);
825 language = IdUtf8;
826 // FALLTHROUGH
827 case IdUtf8:
828 ParseFirstUTF8(w, b);
829 return;
830
831 case IdJapanese:
832 switch (ts.KanjiCode) {
833 case IdUTF8:
834 if (ParseFirstUTF8(w, b)) {
835 return;
836 }
837 break;
838 default:
839 if (ParseFirstJP(w, b)) {
840 return;
841 }
842 }
843 break;
844
845 case IdKorean:
846 switch (ts.KanjiCode) {
847 case IdUTF8:
848 if (ParseFirstUTF8(w, b)) {
849 return;
850 }
851 break;
852 default:
853 if (ParseFirstKR(w, b)) {
854 return;
855 }
856 }
857 break;
858
859 case IdRussian:
860 if (ParseFirstRus(w, b)) {
861 return;
862 }
863 break;
864
865 case IdChinese:
866 switch (ts.KanjiCode) {
867 case IdUTF8:
868 if (ParseFirstUTF8(w, b)) {
869 return;
870 }
871 break;
872 default:
873 if (ParseFirstCn(w, b)) {
874 return;
875 }
876 }
877 break;
878 case IdEnglish: {
879 if (ParseEnglish(w, b)) {
880 return;
881 }
882 break;
883 }
884 case IdDebug: {
885 PutDebugChar(w, b);
886 return;
887 }
888 }
889
890 if (w->SSflag) {
891 w->Op.PutU32(b, w->ClientData);
892 w->SSflag = FALSE;
893 return;
894 }
895
896 if (b<=US)
897 w->Op.ParseControl(b, w->ClientData);
898 else if ((b>=0x20) && (b<=0x7E))
899 w->Op.PutU32(b, w->ClientData);
900 else if ((b>=0x80) && (b<=0x9F))
901 w->Op.ParseControl(b, w->ClientData);
902 else if (b>=0xA0)
903 w->Op.PutU32(b, w->ClientData);
904 }
905
906 /**
907 * �w��(Designate)
908 *
909 * @param Gn 0/1/2/3 = G0/G1/G2/G3
910 * @param codeset IdASCII 0
911 * IdKatakana 1
912 * IdKanji 2
913 * IdSpecial 3
914 */
915 void CharSet2022Designate(CharSetData *w, int gn, int cs)
916 {
917 w->Gn[gn] = cs;
918 }
919
920 /**
921 * �����o��(Invoke)
922 * @param shift
923 */
924 void CharSet2022Invoke(CharSetData *w, CharSet2022Shift shift)
925 {
926 switch (shift) {
927 case CHARSET_LS0:
928 // Locking Shift 0 (G0->GL)
929 w->Glr[0] = 0;
930 break;
931 case CHARSET_LS1:
932 // Locking Shift 1 (G1->GL)
933 w->Glr[0] = 1;
934 break;
935 case CHARSET_LS2:
936 // Locking Shift 2 (G2->GL)
937 w->Glr[0] = 2;
938 break;
939 case CHARSET_LS3:
940 // Locking Shift 3 (G3->GL)
941 w->Glr[0] = 3;
942 break;
943 case CHARSET_LS1R:
944 // Locking Shift 1 (G1->GR)
945 w->Glr[1] = 1;
946 break;
947 case CHARSET_LS2R:
948 // Locking Shift 2 (G2->GR)
949 w->Glr[1] = 2;
950 break;
951 case CHARSET_LS3R:
952 // Locking Shift 3 (G3->GR)
953 w->Glr[1] = 3;
954 break;
955 case CHARSET_SS2:
956 // Single Shift 2
957 w->GLtmp = 2;
958 w->SSflag = TRUE;
959 break;
960 case CHARSET_SS3:
961 // Single Shift 3
962 w->GLtmp = 3;
963 w->SSflag = TRUE;
964 break;
965 default:
966 assert(FALSE);
967 break;
968 }
969 }
970
971 /**
972 * DEC�����t�H���g(DEC Special Graphics, DSG)
973 * 0137(0x5f) ... 0176(0x7e) ���r�����A�T�C������������
974 * (0xdf) ... (0xfe) ��?
975 * <ESC>(0 �������������G�X�P�[�v�V�[�P���X�����`
976 * about/emulations.html
977 *
978 * @param b �R�[�h
979 * @retval TRUE IdSpecial
980 * @retval FALSE IdSpecial��������
981 */
982 BOOL CharSetIsSpecial(CharSetData *w, BYTE b)
983 {
984 BOOL SpecialNew = FALSE;
985
986 if ((b >= 0x5F) && (b < 0x7f)) {
987 if (w->SSflag) {
988 SpecialNew = (w->Gn[w->GLtmp] == IdSpecial);
989 }
990 else {
991 SpecialNew = (w->Gn[w->Glr[0]] == IdSpecial);
992 }
993 }
994 else if ((b >= 0xDF) && (b < 0xff)) {
995 if (w->SSflag) {
996 SpecialNew = (w->Gn[w->GLtmp] == IdSpecial);
997 }
998 else {
999 SpecialNew = (w->Gn[w->Glr[1]] == IdSpecial);
1000 }
1001 }
1002
1003 return SpecialNew;
1004 }
1005
1006 static void CharSetSaveStateLow(CharSetState *state, const CharSetData *w)
1007 {
1008 int i;
1009 state->infos[0] = w->Glr[0];
1010 state->infos[1] = w->Glr[1];
1011 for (i=0 ; i<=3; i++) {
1012 state->infos[2 + i] = w->Gn[i];
1013 }
1014 }
1015
1016 /**
1017 * ��������������
1018 */
1019 void CharSetSaveState(CharSetData *w, CharSetState *state)
1020 {
1021 CharSetSaveStateLow(state, w);
1022 }
1023
1024 /**
1025 * ���������A����
1026 */
1027 void CharSetLoadState(CharSetData *w, const CharSetState *state)
1028 {
1029 int i;
1030 w->Glr[0] = state->infos[0];
1031 w->Glr[1] = state->infos[1];
1032 for (i=0 ; i<=3; i++) {
1033 w->Gn[i] = state->infos[2 + i];
1034 }
1035 }
1036
1037 /**
1038 * �t�H�[���o�b�N���I��
1039 * ���M�f�[�^UTF-8�����AShift_JIS�o����(fallback����)�����f����
1040 *
1041 */
1042 void CharSetFallbackFinish(CharSetData *w)
1043 {
1044 w->Fallbacked = FALSE;
1045 }
1046
1047 /**
1048 * �f�o�O�o�����������[�h�����X����
1049 */
1050 void CharSetSetNextDebugMode(CharSetData *w)
1051 {
1052 // ts.DebugModes ���� tttypes.h �� DBGF_* �� OR ����������
1053 do {
1054 w->DebugFlag = (w->DebugFlag + 1) % DEBUG_FLAG_MAXD;
1055 } while (w->DebugFlag != DEBUG_FLAG_NONE && !((ts.DebugModes >> (w->DebugFlag - 1)) & 1));
1056 }
1057
1058 BYTE CharSetGetDebugMode(CharSetData *w)
1059 {
1060 return w->DebugFlag;
1061 }
1062
1063 void CharSetSetDebugMode(CharSetData *w, BYTE mode)
1064 {
1065 w->DebugFlag = mode % DEBUG_FLAG_MAXD;
1066 }

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26