Develop and Download Open Source Software

Browse Subversion Repository

Annotation of /trunk/teraterm/teraterm/charset.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 10771 - (hide annotations) (download) (as text)
Sat Jun 17 14:47:20 2023 UTC (9 months, 3 weeks ago) by zmatsuo
Original Path: trunk/teraterm/teraterm/charset.c
File MIME type: text/x-csrc
File size: 21591 byte(s)
デバグ用文字出力を charset.c へ移動

- PutDebugChar() を vtterm.c から移動
- 受信文字を表示用文字に変換する箇所が charset.c にまとまった
1 zmatsuo 10755 /*
2     * (C) 2023- TeraTerm Project
3     * All rights reserved.
4     *
5     * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions
7     * are met:
8     *
9     * 1. Redistributions of source code must retain the above copyright
10     * notice, this list of conditions and the following disclaimer.
11     * 2. Redistributions in binary form must reproduce the above copyright
12     * notice, this list of conditions and the following disclaimer in the
13     * documentation and/or other materials provided with the distribution.
14     * 3. The name of the author may not be used to endorse or promote products
15     * derived from this software without specific prior written permission.
16     *
17     * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
18     * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19     * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20     * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22     * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23     * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24     * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25     * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26     * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27     */
28    
29     #include "teraterm.h"
30     #include "tttypes.h"
31     #include <stdio.h>
32     #include <string.h>
33     #if !defined(_CRTDBG_MAP_ALLOC)
34     #define _CRTDBG_MAP_ALLOC
35     #endif
36     #include <stdlib.h>
37     #include <crtdbg.h>
38     #include <assert.h>
39    
40     #include "buffer.h" // for Wrap
41     #include "ttwinman.h"
42     #include "codeconv.h"
43     #include "unicode.h"
44     #include "language.h" // for JIS2SJIS()
45 zmatsuo 10763 #include "ttcstd.h"
46 zmatsuo 10771 #include "keyboard.h" // for DebugFlag
47 zmatsuo 10755
48     #include "charset.h"
49    
50 zmatsuo 10763 // UTF-8���s�����l�����������\����������
51     #define REPLACEMENT_CHARACTER '?'
52     //#define REPLACEMENT_CHARACTER 0x2592
53     //#define REPLACEMENT_CHARACTER 0x20
54     //#define REPLACEMENT_CHARACTER 0xfffd
55    
56 zmatsuo 10755 static BOOL KanjiIn; // TRUE = MBCS��1byte�������M��������
57     static BOOL EUCkanaIn, EUCsupIn;
58     static int EUCcount;
59     #if 0
60     static BOOL Special;
61     #endif
62    
63     /* GL for single shift 2/3 */
64     static int GLtmp;
65     /* single shift 2/3 flag */
66     static BOOL SSflag;
67     /* JIS -> SJIS conversion flag */
68     static BOOL ConvJIS;
69     static WORD Kanji;
70 zmatsuo 10763 static BOOL Fallbacked;
71 zmatsuo 10755
72     typedef struct {
73     /* GL, GR code group */
74     int Glr[2];
75     /* G0, G1, G2, G3 code group */
76     int Gn[4];
77 zmatsuo 10763 //
78     char32_t replacement_char;
79 zmatsuo 10767 // UTF-8 work
80     BYTE buf[4];
81     int count;
82 zmatsuo 10755 } VttermKanjiWork;
83    
84     static VttermKanjiWork KanjiWork;
85    
86 zmatsuo 10770 static BOOL IsC0(char32_t b)
87     {
88     return (b <= US);
89     }
90    
91     static BOOL IsC1(char32_t b)
92     {
93     return ((b>=0x80) && (b<=0x9F));
94     }
95    
96     /**
97     * PutU32() wrapper
98     * Unicode�x�[�X����������
99     */
100 zmatsuo 10760 static void PutChar(BYTE b)
101     {
102     PutU32(b);
103     }
104 zmatsuo 10755
105     /**
106     * ISO2022�p���[�N������������
107     */
108     static void CharSetInit2(VttermKanjiWork *w)
109     {
110     if (ts.Language==IdJapanese) {
111     w->Gn[0] = IdASCII;
112     w->Gn[1] = IdKatakana;
113     w->Gn[2] = IdKatakana;
114     w->Gn[3] = IdKanji;
115     w->Glr[0] = 0;
116     if ((ts.KanjiCode==IdJIS) && (ts.JIS7Katakana==0))
117     w->Glr[1] = 2; // 8-bit katakana
118     else
119     w->Glr[1] = 3;
120     }
121     else {
122     w->Gn[0] = IdASCII;
123     w->Gn[1] = IdSpecial;
124     w->Gn[2] = IdASCII;
125     w->Gn[3] = IdASCII;
126     w->Glr[0] = 0;
127     w->Glr[1] = 0;
128     }
129     }
130    
131     /**
132     * �������A���[�N������������
133     */
134     void CharSetInit(void)
135     {
136 zmatsuo 10763 VttermKanjiWork *w = &KanjiWork;
137    
138     CharSetInit2(w);
139    
140     w->replacement_char = REPLACEMENT_CHARACTER;
141 zmatsuo 10755 SSflag = FALSE;
142    
143     KanjiIn = FALSE;
144     EUCkanaIn = FALSE;
145     EUCsupIn = FALSE;
146     ConvJIS = FALSE;
147     Fallbacked = FALSE;
148     }
149    
150     /**
151     * 1byte���`�F�b�N
152     */
153     static BOOL CheckFirstByte(BYTE b, int lang, int kanji_code)
154     {
155     switch (lang) {
156     case IdKorean:
157     return __ismbblead(b, 51949);
158     case IdChinese:
159     if (kanji_code == IdCnGB2312) {
160     return __ismbblead(b, 936);
161     }
162     else if (ts.KanjiCode == IdCnBig5) {
163     return __ismbblead(b, 950);
164     }
165     break;
166     default:
167     assert(FALSE);
168     break;
169     }
170     assert(FALSE);
171     return FALSE;
172     }
173 zmatsuo 10763
174 zmatsuo 10755 /**
175 zmatsuo 10763 * Double-byte Character Sets
176     * SJIS��1byte��?
177     *
178     * ��1�o�C�g0x81...0x9F or 0xE0...0xEF
179     * ��1�o�C�g0x81...0x9F or 0xE0...0xFC
180     */
181     static BOOL ismbbleadSJIS(BYTE b)
182     {
183     if (((0x80<b) && (b<0xa0)) || ((0xdf<b) && (b<0xfd))) {
184     return TRUE;
185     }
186     return FALSE;
187     }
188    
189     /**
190 zmatsuo 10755 * ts.Language == IdJapanese ��
191     * 1byte���`�F�b�N
192     */
193     static BOOL CheckKanji(BYTE b)
194     {
195     VttermKanjiWork *w = &KanjiWork;
196     BOOL Check;
197    
198     if (ts.Language!=IdJapanese)
199     return FALSE;
200    
201     ConvJIS = FALSE;
202    
203     if (ts.KanjiCode==IdSJIS ||
204     (ts.FallbackToCP932 && ts.KanjiCode==IdUTF8)) {
205 zmatsuo 10759 if (((0x80<b) && (b<0xa0)) || ((0xdf<b) && (b<0xfd))) {
206 zmatsuo 10755 Fallbacked = TRUE;
207     return TRUE; // SJIS kanji
208     }
209     if ((0xa1<=b) && (b<=0xdf)) {
210     return FALSE; // SJIS katakana
211     }
212     }
213    
214     if ((b>=0x21) && (b<=0x7e)) {
215     Check = (w->Gn[w->Glr[0]] == IdKanji);
216     ConvJIS = Check;
217     }
218     else if ((b>=0xA1) && (b<=0xFE)) {
219     Check = (w->Gn[w->Glr[1]] == IdKanji);
220     if (ts.KanjiCode==IdEUC) {
221     Check = TRUE;
222     }
223     else if (ts.KanjiCode==IdJIS && ((ts.TermFlag & TF_FIXEDJIS)!=0) && (ts.JIS7Katakana==0)) {
224     Check = FALSE; // 8-bit katakana
225     }
226     ConvJIS = Check;
227     }
228     else {
229     Check = FALSE;
230     }
231    
232     return Check;
233     }
234    
235     static BOOL ParseFirstJP(BYTE b)
236     // returns TRUE if b is processed
237     // (actually allways returns TRUE)
238     {
239     VttermKanjiWork *w = &KanjiWork;
240     if (KanjiIn) {
241 zmatsuo 10759 if (((! ConvJIS) && (0x3F<b) && (b<0xFD)) ||
242     (ConvJIS && ( ((0x20<b) && (b<0x7f)) ||
243     ((0xa0<b) && (b<0xff)) )) )
244 zmatsuo 10755 {
245 zmatsuo 10758 unsigned long u32;
246     Kanji = Kanji + b;
247     if (ConvJIS) {
248     // JIS -> Shift_JIS(CP932)
249     Kanji = JIS2SJIS((WORD)(Kanji & 0x7f7f));
250     }
251     u32 = CP932ToUTF32(Kanji);
252     PutU32(u32);
253 zmatsuo 10755 KanjiIn = FALSE;
254     return TRUE;
255     }
256     else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
257     KanjiIn = FALSE;
258     }
259     else if ((b==CR) && Wrap) {
260     CarriageReturn(FALSE);
261     LineFeed(LF,FALSE);
262     Wrap = FALSE;
263     }
264     }
265    
266     if (SSflag) {
267     if (w->Gn[GLtmp] == IdKanji) {
268     Kanji = b << 8;
269     KanjiIn = TRUE;
270     SSflag = FALSE;
271     return TRUE;
272     }
273     else if (w->Gn[GLtmp] == IdKatakana) {
274     b = b | 0x80;
275     }
276    
277     PutChar(b);
278     SSflag = FALSE;
279     return TRUE;
280     }
281    
282     if ((!EUCsupIn) && (!EUCkanaIn) && (!KanjiIn) && CheckKanji(b)) {
283     Kanji = b << 8;
284     KanjiIn = TRUE;
285     return TRUE;
286     }
287    
288     if (b<=US) {
289     ParseControl(b);
290     }
291     else if (b==0x20) {
292     PutChar(b);
293     }
294     else if ((b>=0x21) && (b<=0x7E)) {
295     if (EUCsupIn) {
296     EUCcount--;
297     EUCsupIn = (EUCcount==0);
298     return TRUE;
299     }
300    
301     if ((w->Gn[w->Glr[0]] == IdKatakana) || EUCkanaIn) {
302     b = b | 0x80;
303     EUCkanaIn = FALSE;
304     {
305     // b��sjis�����p�J�^�J�i
306     unsigned long u32 = CP932ToUTF32(b);
307     PutU32(u32);
308     }
309     return TRUE;
310     }
311     PutChar(b);
312     }
313     else if (b==0x7f) {
314     return TRUE;
315     }
316     else if ((b>=0x80) && (b<=0x8D)) {
317     ParseControl(b);
318     }
319     else if (b==0x8E) { // SS2
320     switch (ts.KanjiCode) {
321     case IdEUC:
322     if (ts.ISO2022Flag & ISO2022_SS2) {
323     EUCkanaIn = TRUE;
324     }
325     break;
326     case IdUTF8:
327 zmatsuo 10763 PutU32(REPLACEMENT_CHARACTER);
328 zmatsuo 10755 break;
329     default:
330     ParseControl(b);
331     }
332     }
333     else if (b==0x8F) { // SS3
334     switch (ts.KanjiCode) {
335     case IdEUC:
336     if (ts.ISO2022Flag & ISO2022_SS3) {
337     EUCcount = 2;
338     EUCsupIn = TRUE;
339     }
340     break;
341     case IdUTF8:
342 zmatsuo 10763 PutU32(REPLACEMENT_CHARACTER);
343 zmatsuo 10755 break;
344     default:
345     ParseControl(b);
346     }
347     }
348     else if ((b>=0x90) && (b<=0x9F)) {
349     ParseControl(b);
350     }
351     else if (b==0xA0) {
352     PutChar(0x20);
353     }
354     else if ((b>=0xA1) && (b<=0xFE)) {
355     if (EUCsupIn) {
356     EUCcount--;
357     EUCsupIn = (EUCcount==0);
358     return TRUE;
359     }
360    
361     if ((w->Gn[w->Glr[1]] != IdASCII) ||
362 zmatsuo 10759 ((ts.KanjiCode==IdEUC) && EUCkanaIn) ||
363 zmatsuo 10755 (ts.KanjiCode==IdSJIS) ||
364 zmatsuo 10759 ((ts.KanjiCode==IdJIS) &&
365     (ts.JIS7Katakana==0) &&
366     ((ts.TermFlag & TF_FIXEDJIS)!=0))) {
367 zmatsuo 10755 // b��sjis�����p�J�^�J�i
368     unsigned long u32 = CP932ToUTF32(b);
369     PutU32(u32);
370     } else {
371     if (w->Gn[w->Glr[1]] == IdASCII) {
372     b = b & 0x7f;
373     }
374     PutChar(b);
375     }
376     EUCkanaIn = FALSE;
377     }
378     else {
379     PutChar(b);
380     }
381    
382     return TRUE;
383     }
384    
385     static BOOL ParseFirstKR(BYTE b)
386     // returns TRUE if b is processed
387     // (actually allways returns TRUE)
388     {
389     VttermKanjiWork *w = &KanjiWork;
390     if (KanjiIn) {
391 zmatsuo 10759 if (((0x41<=b) && (b<=0x5A)) ||
392     ((0x61<=b) && (b<=0x7A)) ||
393     ((0x81<=b) && (b<=0xFE)))
394 zmatsuo 10755 {
395 zmatsuo 10758 unsigned long u32 = 0;
396 zmatsuo 10768 if (ts.KanjiCode == IdKoreanCP949) {
397 zmatsuo 10758 // CP51949
398     Kanji = Kanji + b;
399     u32 = MBCP_UTF32(Kanji, 51949);
400     }
401     else {
402     assert(FALSE);
403     }
404     PutU32(u32);
405 zmatsuo 10755 KanjiIn = FALSE;
406     return TRUE;
407     }
408     else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
409     KanjiIn = FALSE;
410     }
411     else if ((b==CR) && Wrap) {
412     CarriageReturn(FALSE);
413     LineFeed(LF,FALSE);
414     Wrap = FALSE;
415     }
416     }
417    
418     if ((!KanjiIn) && CheckFirstByte(b, ts.Language, ts.KanjiCode)) {
419     Kanji = b << 8;
420     KanjiIn = TRUE;
421     return TRUE;
422     }
423    
424     if (b<=US) {
425     ParseControl(b);
426     }
427     else if (b==0x20) {
428     PutChar(b);
429     }
430     else if ((b>=0x21) && (b<=0x7E)) {
431     // if (Gn[Glr[0]] == IdKatakana) {
432     // b = b | 0x80;
433     // }
434     PutChar(b);
435     }
436     else if (b==0x7f) {
437     return TRUE;
438     }
439     else if ((0x80<=b) && (b<=0x9F)) {
440     ParseControl(b);
441     }
442     else if (b==0xA0) {
443     PutChar(0x20);
444     }
445     else if ((b>=0xA1) && (b<=0xFE)) {
446     if (w->Gn[w->Glr[1]] == IdASCII) {
447     b = b & 0x7f;
448     }
449     PutChar(b);
450     }
451     else {
452     PutChar(b);
453     }
454    
455     return TRUE;
456     }
457    
458     static BOOL ParseFirstCn(BYTE b)
459     // returns TRUE if b is processed
460     // (actually allways returns TRUE)
461     {
462     VttermKanjiWork *w = &KanjiWork;
463     if (KanjiIn) {
464     // TODO
465 zmatsuo 10759 if (((0x40<=b) && (b<=0x7e)) ||
466     ((0xa1<=b) && (b<=0xFE)))
467 zmatsuo 10755 {
468 zmatsuo 10758 unsigned long u32 = 0;
469     Kanji = Kanji + b;
470     if (ts.KanjiCode == IdCnGB2312) {
471     // CP936 GB2312
472     u32 = MBCP_UTF32(Kanji, 936);
473     }
474     else if (ts.KanjiCode == IdCnBig5) {
475     // CP950 Big5
476     u32 = MBCP_UTF32(Kanji, 950);
477     }
478     else {
479     assert(FALSE);
480     }
481     PutU32(u32);
482 zmatsuo 10755 KanjiIn = FALSE;
483     return TRUE;
484     }
485     else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
486     KanjiIn = FALSE;
487     }
488     else if ((b==CR) && Wrap) {
489     CarriageReturn(FALSE);
490     LineFeed(LF,FALSE);
491     Wrap = FALSE;
492     }
493     }
494    
495     if ((!KanjiIn) && CheckFirstByte(b, ts.Language, ts.KanjiCode)) {
496     Kanji = b << 8;
497     KanjiIn = TRUE;
498     return TRUE;
499     }
500    
501     if (b<=US) {
502     ParseControl(b);
503     }
504     else if (b==0x20) {
505     PutChar(b);
506     }
507     else if ((b>=0x21) && (b<=0x7E)) {
508     // if (Gn[Glr[0]] == IdKatakana) {
509     // b = b | 0x80;
510     // }
511     PutChar(b);
512     }
513     else if (b==0x7f) {
514     return TRUE;
515     }
516     else if ((0x80<=b) && (b<=0x9F)) {
517     ParseControl(b);
518     }
519     else if (b==0xA0) {
520     PutChar(0x20);
521     }
522     else if ((b>=0xA1) && (b<=0xFE)) {
523     if (w->Gn[w->Glr[1]] == IdASCII) {
524     b = b & 0x7f;
525     }
526     PutChar(b);
527     }
528     else {
529     PutChar(b);
530     }
531    
532     return TRUE;
533     }
534    
535     static void ParseASCII(BYTE b)
536     {
537     if (SSflag) {
538     PutChar(b);
539     SSflag = FALSE;
540     return;
541     }
542    
543     if (b<=US) {
544     ParseControl(b);
545     } else if ((b>=0x20) && (b<=0x7E)) {
546 zmatsuo 10760 PutU32(b);
547 zmatsuo 10755 } else if ((b==0x8E) || (b==0x8F)) {
548 zmatsuo 10763 PutU32(REPLACEMENT_CHARACTER);
549 zmatsuo 10755 } else if ((b>=0x80) && (b<=0x9F)) {
550     ParseControl(b);
551     } else if (b>=0xA0) {
552 zmatsuo 10760 PutU32(b);
553 zmatsuo 10755 }
554     }
555    
556 zmatsuo 10770 /**
557     * REPLACEMENT_CHARACTER ���\��
558     * UTF-8 �f�R�[�h�����g�p
559     */
560 zmatsuo 10764 static void PutReplacementChr(VttermKanjiWork *w, const BYTE *ptr, size_t len, BOOL fallback)
561 zmatsuo 10763 {
562     const char32_t replacement_char = w->replacement_char;
563     int i;
564     for (i = 0; i < len; i++) {
565     BYTE c = *ptr++;
566 zmatsuo 10770 assert(IsC0(c));
567 zmatsuo 10764 if (fallback) {
568     // fallback ISO8859-1
569     PutU32(c);
570 zmatsuo 10763 }
571     else {
572 zmatsuo 10764 // fallback������
573     if (c < 0x80) {
574     // �s����UTF-8��������������0x80�������������A
575     // 1������UTF-8�������������������\������
576 zmatsuo 10770 PutU32(c);
577 zmatsuo 10764 }
578     else {
579     PutU32(replacement_char);
580     }
581 zmatsuo 10763 }
582     }
583     }
584    
585 zmatsuo 10770 /**
586     * UTF-8�����M�f�[�^����������
587     *
588     * returns TRUE if b is processed
589     */
590 zmatsuo 10755 static BOOL ParseFirstUTF8(BYTE b)
591     {
592 zmatsuo 10763 VttermKanjiWork *w = &KanjiWork;
593 zmatsuo 10766 char32_t code;
594 zmatsuo 10755
595 zmatsuo 10763 if (Fallbacked) {
596     BOOL r = ParseFirstJP(b);
597     Fallbacked = FALSE;
598     return r;
599 zmatsuo 10755 }
600    
601     // UTF-8�G���R�[�h
602 zmatsuo 10766 // The Unicode Standard Chapter 3
603     // Table 3-7. Well-Formed UTF-8 Byte Sequences
604     // | Code Points | First Byte | Second Byte | Third Byte | Fourth Byte |
605     // | U+0000..U+007F | 00..7F | | | |
606     // | U+0080..U+07FF | C2..DF | 80..BF | | |
607     // | U+0800..U+0FFF | E0 | A0..BF | 80..BF | |
608     // | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | |
609     // | U+D000..U+D7FF | ED | 80..9F | 80..BF | |
610     // | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | |
611     // | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF |
612     // | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF |
613     // | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF |
614 zmatsuo 10755 // UTF-8���f�R�[�h������������
615     // - 1byte��
616 zmatsuo 10766 // - 0x00 - 0x7f ok
617     // - 0x80 - 0xc1 ng
618     // - 0xc2 - 0xf4 ok
619     // - 0xf5 - 0xff ng
620 zmatsuo 10755 // - 2byte�����~
621 zmatsuo 10766 // - 0x00 - 0x7f ng
622     // - 0x80 - 0xbf ok
623     // - 0xc0 - 0xff ng
624     // - 2byte�����O
625     // - 1byte == 0xe0 ������ 0xa0 - 0xbf����ok
626     // - 1byte == 0xed ������ 0x80 - 0x9f����ok
627     // - 1byte == 0xf0 ������ 0x90 - 0xbf����ok
628     // - 1byte == 0xf4 ������ 0x90 - 0x8f����ok
629 zmatsuo 10763 recheck:
630 zmatsuo 10755 // 1byte(7bit)
631 zmatsuo 10767 if (w->count == 0) {
632 zmatsuo 10770 if (IsC0(b)) {
633     // U+0000 .. U+001f
634     // C0��������, C0 Coontrols
635     ParseControl(b);
636 zmatsuo 10755 return TRUE;
637     }
638 zmatsuo 10770 else if (b <= 0x7f) {
639     // 0x7f����, �������A���������o��
640     PutU32(b);
641     return TRUE;
642     }
643     else if (0xc2 <= b && b <= 0xf4) {
644 zmatsuo 10766 // 1byte������
645 zmatsuo 10767 w->buf[w->count++] = b;
646 zmatsuo 10755 return TRUE;
647     }
648    
649 zmatsuo 10770 // 0x80 - 0xc1, 0xf5 - 0xff
650 zmatsuo 10766 // UTF-8��1byte���o���������R�[�h������
651     if (ts.FallbackToCP932) {
652     // fallback��������
653     if ((ts.Language == IdJapanese) && ismbbleadSJIS(b)) {
654     // ���{�������� && Shift_JIS 1byte��
655     // Shift_JIS �� fallback
656     Fallbacked = TRUE;
657     ConvJIS = FALSE;
658     Kanji = b << 8;
659     KanjiIn = TRUE;
660     return TRUE;
661 zmatsuo 10755 }
662 zmatsuo 10766 // fallback ISO8859-1
663     PutU32(b);
664     return TRUE;
665 zmatsuo 10755 }
666     else {
667 zmatsuo 10766 // fallback������, �s������������
668 zmatsuo 10767 w->buf[0] = b;
669     PutReplacementChr(w, w->buf, 1, FALSE);
670 zmatsuo 10755 }
671 zmatsuo 10766 return TRUE;
672 zmatsuo 10755 }
673    
674 zmatsuo 10764 // 2byte���~����?
675 zmatsuo 10766 if((b & 0xc0) != 0x80) { // == (b <= 0x7f || 0xc0 <= b)
676     // �s��������, (����2bit�� 0b10xx_xxxx ��������)
677 zmatsuo 10767 PutReplacementChr(w, w->buf, w->count, ts.FallbackToCP932);
678     w->count = 0;
679 zmatsuo 10764 goto recheck;
680     }
681    
682 zmatsuo 10755 // 2byte�����~����
683 zmatsuo 10767 w->buf[w->count++] = b;
684 zmatsuo 10755
685 zmatsuo 10766 // 2byte(11bit)
686 zmatsuo 10767 if (w->count == 2) {
687     if ((w->buf[0] & 0xe0) == 0xc0) { // == (0xc2 <= w->buf[0] && w->buf[0] <= 0xdf)
688 zmatsuo 10766 // 5bit + 6bit
689 zmatsuo 10767 code = ((w->buf[0] & 0x1f) << 6) | (b & 0x3f);
690 zmatsuo 10770 if (IsC1(code)) {
691     // U+0080 .. u+009f
692     // C1��������, C1 Controls
693     ParseControl((BYTE)code);
694     }
695     else {
696     PutU32(code);
697     }
698 zmatsuo 10767 w->count = 0;
699 zmatsuo 10755 return TRUE;
700     }
701 zmatsuo 10766 return TRUE;
702     }
703    
704     // 3byte(16bit)
705 zmatsuo 10767 if (w->count == 3) {
706     if ((w->buf[0] & 0xf0) == 0xe0) {
707     if ((w->buf[0] == 0xe0 && (w->buf[1] < 0xa0 || 0xbf < w->buf[1])) ||
708     (w->buf[0] == 0xed && ( 0x9f < w->buf[1]))) {
709 zmatsuo 10766 // �s���� UTF-8
710 zmatsuo 10767 PutReplacementChr(w, w->buf, 2, ts.FallbackToCP932);
711     w->count = 0;
712 zmatsuo 10766 goto recheck;
713     }
714 zmatsuo 10755 // 4bit + 6bit + 6bit
715 zmatsuo 10767 code = ((w->buf[0] & 0xf) << 12);
716     code |= ((w->buf[1] & 0x3f) << 6);
717     code |= ((w->buf[2] & 0x3f));
718 zmatsuo 10755 PutU32(code);
719 zmatsuo 10767 w->count = 0;
720 zmatsuo 10755 return TRUE;
721     }
722 zmatsuo 10766 return TRUE;
723 zmatsuo 10755 }
724    
725     // 4byte(21bit)
726 zmatsuo 10767 assert(w->count == 4);
727     assert((w->buf[0] & 0xf8) == 0xf0);
728     if ((w->buf[0] == 0xf0 && (w->buf[1] < 0x90 || 0x9f < w->buf[1])) ||
729     (w->buf[0] == 0xf4 && (w->buf[1] < 0x80 || 0x8f < w->buf[1]))) {
730 zmatsuo 10766 // �s���� UTF-8
731 zmatsuo 10767 PutReplacementChr(w, w->buf, 3, ts.FallbackToCP932);
732     w->count = 0;
733 zmatsuo 10766 goto recheck;
734 zmatsuo 10755 }
735 zmatsuo 10766 // 3bit + 6bit + 6bit + 6bit
736 zmatsuo 10767 code = ((w->buf[0] & 0x07) << 18);
737     code |= ((w->buf[1] & 0x3f) << 12);
738     code |= ((w->buf[2] & 0x3f) << 6);
739     code |= (w->buf[3] & 0x3f);
740 zmatsuo 10766 PutU32(code);
741 zmatsuo 10767 w->count = 0;
742 zmatsuo 10755 return TRUE;
743     }
744    
745     static BOOL ParseFirstRus(BYTE b)
746     // returns if b is processed
747     {
748 zmatsuo 10770 if (IsC0(b)) {
749     ParseControl(b);
750     return TRUE;
751     }
752 zmatsuo 10756 // CP1251������
753     BYTE c = RussConv(ts.KanjiCode, IdWindows, b);
754     // CP1251->Unicode
755     unsigned long u32 = MBCP_UTF32(c, 1251);
756     PutU32(u32);
757     return TRUE;
758 zmatsuo 10755 }
759    
760     static BOOL ParseEnglish(BYTE b)
761     {
762     unsigned short u16 = 0;
763     int part = KanjiCodeToISO8859Part(ts.KanjiCode);
764     int r = UnicodeFromISO8859(part, b, &u16);
765     if (r == 0) {
766     return FALSE;
767     }
768     if (u16 < 0x100) {
769     ParseASCII((BYTE)u16);
770     }
771     else {
772     PutU32(u16);
773     }
774     return TRUE;
775     }
776    
777 zmatsuo 10771 static void PutDebugChar(BYTE b)
778     {
779     int i;
780     BOOL svInsertMode, svAutoWrapMode;
781     TCharAttr svCharAttr;
782     TCharAttr char_attr;
783    
784     svInsertMode = TermGetInsertMode();
785     TermSetInsertMode(FALSE);
786     svAutoWrapMode = TermGetAutoWrapMode();
787     TermSetAutoWrapMode(TRUE);
788    
789     TermGetAttr(&svCharAttr);
790     char_attr = svCharAttr;
791     char_attr.Attr = AttrDefault;
792     TermSetAttr(&char_attr);
793    
794     if (DebugFlag==DEBUG_FLAG_HEXD) {
795     char buff[3];
796     _snprintf(buff, 3, "%02X", (unsigned int) b);
797    
798     for (i=0; i<2; i++)
799     PutChar(buff[i]);
800     PutChar(' ');
801     }
802     else if (DebugFlag==DEBUG_FLAG_NORM) {
803    
804     if ((b & 0x80) == 0x80) {
805     //UpdateStr();
806     char_attr.Attr = AttrReverse;
807     TermSetAttr(&char_attr);
808     b = b & 0x7f;
809     }
810    
811     if (b<=US) {
812     PutChar('^');
813     PutChar((char)(b+0x40));
814     }
815     else if (b==DEL) {
816     PutChar('<');
817     PutChar('D');
818     PutChar('E');
819     PutChar('L');
820     PutChar('>');
821     }
822     else
823     PutChar(b);
824     }
825    
826     TermSetAttr(&char_attr);
827     TermSetInsertMode(svInsertMode);
828     TermSetAutoWrapMode(svAutoWrapMode);
829     }
830    
831     void ParseFirst(BYTE b)
832     {
833     WORD language = ts.Language;
834     if (DebugFlag != DEBUG_FLAG_NONE) {
835     language = IdDebug;
836     }
837    
838     switch (language) {
839     case IdUtf8:
840     ParseFirstUTF8(b);
841 zmatsuo 10755 return;
842    
843 zmatsuo 10771 case IdJapanese:
844 zmatsuo 10755 switch (ts.KanjiCode) {
845 zmatsuo 10771 case IdUTF8:
846     if (ParseFirstUTF8(b)) {
847 zmatsuo 10755 return;
848     }
849     break;
850 zmatsuo 10771 default:
851 zmatsuo 10755 if (ParseFirstJP(b)) {
852     return;
853     }
854     }
855     break;
856    
857 zmatsuo 10771 case IdKorean:
858 zmatsuo 10755 switch (ts.KanjiCode) {
859 zmatsuo 10771 case IdUTF8:
860 zmatsuo 10755 if (ParseFirstUTF8(b)) {
861     return;
862     }
863     break;
864 zmatsuo 10771 default:
865 zmatsuo 10755 if (ParseFirstKR(b)) {
866     return;
867     }
868     }
869     break;
870    
871 zmatsuo 10771 case IdRussian:
872 zmatsuo 10755 if (ParseFirstRus(b)) {
873     return;
874     }
875     break;
876    
877     case IdChinese:
878     switch (ts.KanjiCode) {
879     case IdUTF8:
880     if (ParseFirstUTF8(b)) {
881     return;
882     }
883     break;
884     default:
885     if (ParseFirstCn(b)) {
886     return;
887     }
888     }
889     break;
890     case IdEnglish: {
891     if (ParseEnglish(b)) {
892     return;
893     }
894     break;
895     }
896 zmatsuo 10771 case IdDebug: {
897     PutDebugChar(b);
898     return;
899 zmatsuo 10755 }
900 zmatsuo 10771 }
901 zmatsuo 10755
902     if (SSflag) {
903     PutChar(b);
904     SSflag = FALSE;
905     return;
906     }
907    
908     if (b<=US)
909     ParseControl(b);
910     else if ((b>=0x20) && (b<=0x7E))
911     PutChar(b);
912     else if ((b>=0x80) && (b<=0x9F))
913     ParseControl(b);
914     else if (b>=0xA0)
915     PutChar(b);
916     }
917    
918     /**
919     * �w��(Designate)
920     *
921     * @param Gn 0/1/2/3 = G0/G1/G2/G3
922     * @param codeset IdASCII 0
923     * IdKatakana 1
924     * IdKanji 2
925     * IdSpecial 3
926     */
927     void CharSet2022Designate(int gn, int cs)
928     {
929     VttermKanjiWork *w = &KanjiWork;
930     w->Gn[gn] = cs;
931     }
932    
933     /**
934     * �����o��(Invoke)
935     * @param glr 0/1 = GL/GR (Locking shift�������L��)
936     * @param gn 0/1/2/3 = G0/G1/G2/G3
937     * @param single_shift FALSE Locking shift
938     * TRUE Single shift
939     */
940     void CharSet2022Invoke(int glr, int gn, BOOL single_shift)
941     {
942     VttermKanjiWork *w = &KanjiWork;
943     if (single_shift == FALSE) {
944     // Locking shift
945     w->Glr[glr] = gn;
946     }
947     else {
948     // Single shift
949     GLtmp = gn;
950     SSflag = TRUE;
951     }
952     }
953    
954     /**
955     * DEC�����t�H���g(Tera Special font)
956     * 0140(0x60) ... 0176(0x7f) ���r�����A�T�C������������
957 zmatsuo 10760 * (0xe0) ... (0xff) ��?
958 zmatsuo 10755 * <ESC>(0 �������������G�X�P�[�v�V�[�P���X�����`
959     * about/emulations.html
960     *
961     * @param b �R�[�h
962 zmatsuo 10760 * @retval TRUE IdSpecial
963     * @retval FALSE IdSpecial��������
964 zmatsuo 10755 */
965     BOOL CharSetIsSpecial(BYTE b)
966     {
967     VttermKanjiWork *w = &KanjiWork;
968     BOOL SpecialNew = FALSE;
969    
970     if ((b>0x5F) && (b<0x80)) {
971     if (SSflag)
972     SpecialNew = (w->Gn[GLtmp]==IdSpecial);
973     else
974     SpecialNew = (w->Gn[w->Glr[0]]==IdSpecial);
975     }
976     else if (b>0xDF) {
977     if (SSflag)
978     SpecialNew = (w->Gn[GLtmp]==IdSpecial);
979     else
980     SpecialNew = (w->Gn[w->Glr[1]]==IdSpecial);
981     }
982    
983     return SpecialNew;
984     }
985    
986     static void CharSetSaveStateLow(CharSetState *state, const VttermKanjiWork *w)
987     {
988     int i;
989     state->infos[0] = w->Glr[0];
990     state->infos[1] = w->Glr[1];
991     for (i=0 ; i<=3; i++) {
992     state->infos[2 + i] = w->Gn[i];
993     }
994     }
995    
996     /**
997     * ��������������
998     */
999     void CharSetSaveState(CharSetState *state)
1000     {
1001     VttermKanjiWork *w = &KanjiWork;
1002     CharSetSaveStateLow(state, w);
1003     }
1004    
1005     /**
1006     * ���������A����
1007     */
1008     void CharSetLoadState(const CharSetState *state)
1009     {
1010     VttermKanjiWork *w = &KanjiWork;
1011     int i;
1012     w->Glr[0] = state->infos[0];
1013     w->Glr[1] = state->infos[1];
1014     for (i=0 ; i<=3; i++) {
1015     w->Gn[i] = state->infos[2 + i];
1016     }
1017     }
1018 zmatsuo 10763
1019     /**
1020     * �t�H�[���o�b�N���I��
1021     * ���M�f�[�^UTF-8�����AShift_JIS�o����(fallback����)�����f����
1022     *
1023     */
1024     void CharSetFallbackFinish(void)
1025     {
1026     Fallbacked = FALSE;
1027     }

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26