Develop and Download Open Source Software

Browse Subversion Repository

Annotation of /trunk/teraterm/teraterm/charset.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 10776 - (hide annotations) (download) (as text)
Wed Jun 21 15:08:55 2023 UTC (9 months, 3 weeks ago) by zmatsuo
File MIME type: text/x-c++src
File size: 22481 byte(s)
CharSet2022Invoke() の引数を変更
1 zmatsuo 10755 /*
2     * (C) 2023- TeraTerm Project
3     * All rights reserved.
4     *
5     * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions
7     * are met:
8     *
9     * 1. Redistributions of source code must retain the above copyright
10     * notice, this list of conditions and the following disclaimer.
11     * 2. Redistributions in binary form must reproduce the above copyright
12     * notice, this list of conditions and the following disclaimer in the
13     * documentation and/or other materials provided with the distribution.
14     * 3. The name of the author may not be used to endorse or promote products
15     * derived from this software without specific prior written permission.
16     *
17     * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
18     * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19     * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20     * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22     * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23     * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24     * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25     * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26     * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27     */
28    
29     #include "teraterm.h"
30     #include "tttypes.h"
31     #include <stdio.h>
32     #include <string.h>
33     #if !defined(_CRTDBG_MAP_ALLOC)
34     #define _CRTDBG_MAP_ALLOC
35     #endif
36     #include <stdlib.h>
37     #include <crtdbg.h>
38     #include <assert.h>
39    
40     #include "buffer.h" // for Wrap
41     #include "ttwinman.h"
42     #include "codeconv.h"
43     #include "unicode.h"
44     #include "language.h" // for JIS2SJIS()
45 zmatsuo 10763 #include "ttcstd.h"
46 zmatsuo 10773 #include "vtterm.h"
47 zmatsuo 10755
48     #include "charset.h"
49    
50 zmatsuo 10763 // UTF-8���s�����l�����������\����������
51     #define REPLACEMENT_CHARACTER '?'
52     //#define REPLACEMENT_CHARACTER 0x2592
53     //#define REPLACEMENT_CHARACTER 0x20
54     //#define REPLACEMENT_CHARACTER 0xfffd
55    
56 zmatsuo 10755 static BOOL KanjiIn; // TRUE = MBCS��1byte�������M��������
57     static BOOL EUCkanaIn, EUCsupIn;
58     static int EUCcount;
59    
60     /* GL for single shift 2/3 */
61     static int GLtmp;
62     /* single shift 2/3 flag */
63     static BOOL SSflag;
64     /* JIS -> SJIS conversion flag */
65     static BOOL ConvJIS;
66     static WORD Kanji;
67 zmatsuo 10763 static BOOL Fallbacked;
68 zmatsuo 10755
69 zmatsuo 10773 static BYTE DebugFlag = DEBUG_FLAG_NONE;
70    
71 zmatsuo 10755 typedef struct {
72     /* GL, GR code group */
73     int Glr[2];
74     /* G0, G1, G2, G3 code group */
75     int Gn[4];
76 zmatsuo 10763 //
77     char32_t replacement_char;
78 zmatsuo 10767 // UTF-8 work
79     BYTE buf[4];
80     int count;
81 zmatsuo 10755 } VttermKanjiWork;
82    
83     static VttermKanjiWork KanjiWork;
84    
85 zmatsuo 10770 static BOOL IsC0(char32_t b)
86     {
87     return (b <= US);
88     }
89    
90     static BOOL IsC1(char32_t b)
91     {
92     return ((b>=0x80) && (b<=0x9F));
93     }
94    
95     /**
96     * PutU32() wrapper
97     * Unicode�x�[�X����������
98     */
99 zmatsuo 10760 static void PutChar(BYTE b)
100     {
101     PutU32(b);
102     }
103 zmatsuo 10755
104     /**
105     * ISO2022�p���[�N������������
106     */
107     static void CharSetInit2(VttermKanjiWork *w)
108     {
109     if (ts.Language==IdJapanese) {
110     w->Gn[0] = IdASCII;
111     w->Gn[1] = IdKatakana;
112     w->Gn[2] = IdKatakana;
113     w->Gn[3] = IdKanji;
114     w->Glr[0] = 0;
115     if ((ts.KanjiCode==IdJIS) && (ts.JIS7Katakana==0))
116     w->Glr[1] = 2; // 8-bit katakana
117     else
118     w->Glr[1] = 3;
119     }
120     else {
121     w->Gn[0] = IdASCII;
122     w->Gn[1] = IdSpecial;
123     w->Gn[2] = IdASCII;
124     w->Gn[3] = IdASCII;
125     w->Glr[0] = 0;
126     w->Glr[1] = 0;
127     }
128     }
129    
130     /**
131     * �������A���[�N������������
132     */
133     void CharSetInit(void)
134     {
135 zmatsuo 10763 VttermKanjiWork *w = &KanjiWork;
136    
137     CharSetInit2(w);
138    
139     w->replacement_char = REPLACEMENT_CHARACTER;
140 zmatsuo 10755 SSflag = FALSE;
141    
142     KanjiIn = FALSE;
143     EUCkanaIn = FALSE;
144     EUCsupIn = FALSE;
145     ConvJIS = FALSE;
146     Fallbacked = FALSE;
147     }
148    
149     /**
150     * 1byte���`�F�b�N
151     */
152     static BOOL CheckFirstByte(BYTE b, int lang, int kanji_code)
153     {
154     switch (lang) {
155     case IdKorean:
156     return __ismbblead(b, 51949);
157     case IdChinese:
158     if (kanji_code == IdCnGB2312) {
159     return __ismbblead(b, 936);
160     }
161     else if (ts.KanjiCode == IdCnBig5) {
162     return __ismbblead(b, 950);
163     }
164     break;
165     default:
166     assert(FALSE);
167     break;
168     }
169     assert(FALSE);
170     return FALSE;
171     }
172 zmatsuo 10763
173 zmatsuo 10755 /**
174 zmatsuo 10763 * Double-byte Character Sets
175     * SJIS��1byte��?
176     *
177     * ��1�o�C�g0x81...0x9F or 0xE0...0xEF
178     * ��1�o�C�g0x81...0x9F or 0xE0...0xFC
179     */
180     static BOOL ismbbleadSJIS(BYTE b)
181     {
182     if (((0x80<b) && (b<0xa0)) || ((0xdf<b) && (b<0xfd))) {
183     return TRUE;
184     }
185     return FALSE;
186     }
187    
188     /**
189 zmatsuo 10755 * ts.Language == IdJapanese ��
190     * 1byte���`�F�b�N
191     */
192     static BOOL CheckKanji(BYTE b)
193     {
194     VttermKanjiWork *w = &KanjiWork;
195     BOOL Check;
196    
197     if (ts.Language!=IdJapanese)
198     return FALSE;
199    
200     ConvJIS = FALSE;
201    
202     if (ts.KanjiCode==IdSJIS ||
203     (ts.FallbackToCP932 && ts.KanjiCode==IdUTF8)) {
204 zmatsuo 10759 if (((0x80<b) && (b<0xa0)) || ((0xdf<b) && (b<0xfd))) {
205 zmatsuo 10755 Fallbacked = TRUE;
206     return TRUE; // SJIS kanji
207     }
208     if ((0xa1<=b) && (b<=0xdf)) {
209     return FALSE; // SJIS katakana
210     }
211     }
212    
213     if ((b>=0x21) && (b<=0x7e)) {
214     Check = (w->Gn[w->Glr[0]] == IdKanji);
215     ConvJIS = Check;
216     }
217     else if ((b>=0xA1) && (b<=0xFE)) {
218     Check = (w->Gn[w->Glr[1]] == IdKanji);
219     if (ts.KanjiCode==IdEUC) {
220     Check = TRUE;
221     }
222     else if (ts.KanjiCode==IdJIS && ((ts.TermFlag & TF_FIXEDJIS)!=0) && (ts.JIS7Katakana==0)) {
223     Check = FALSE; // 8-bit katakana
224     }
225     ConvJIS = Check;
226     }
227     else {
228     Check = FALSE;
229     }
230    
231     return Check;
232     }
233    
234     static BOOL ParseFirstJP(BYTE b)
235     // returns TRUE if b is processed
236     // (actually allways returns TRUE)
237     {
238     VttermKanjiWork *w = &KanjiWork;
239     if (KanjiIn) {
240 zmatsuo 10759 if (((! ConvJIS) && (0x3F<b) && (b<0xFD)) ||
241     (ConvJIS && ( ((0x20<b) && (b<0x7f)) ||
242     ((0xa0<b) && (b<0xff)) )) )
243 zmatsuo 10755 {
244 zmatsuo 10758 unsigned long u32;
245     Kanji = Kanji + b;
246     if (ConvJIS) {
247     // JIS -> Shift_JIS(CP932)
248     Kanji = JIS2SJIS((WORD)(Kanji & 0x7f7f));
249     }
250     u32 = CP932ToUTF32(Kanji);
251     PutU32(u32);
252 zmatsuo 10755 KanjiIn = FALSE;
253     return TRUE;
254     }
255     else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
256     KanjiIn = FALSE;
257     }
258     else if ((b==CR) && Wrap) {
259     CarriageReturn(FALSE);
260     LineFeed(LF,FALSE);
261     Wrap = FALSE;
262     }
263     }
264    
265     if (SSflag) {
266     if (w->Gn[GLtmp] == IdKanji) {
267     Kanji = b << 8;
268     KanjiIn = TRUE;
269     SSflag = FALSE;
270     return TRUE;
271     }
272     else if (w->Gn[GLtmp] == IdKatakana) {
273     b = b | 0x80;
274     }
275    
276     PutChar(b);
277     SSflag = FALSE;
278     return TRUE;
279     }
280    
281     if ((!EUCsupIn) && (!EUCkanaIn) && (!KanjiIn) && CheckKanji(b)) {
282     Kanji = b << 8;
283     KanjiIn = TRUE;
284     return TRUE;
285     }
286    
287     if (b<=US) {
288     ParseControl(b);
289     }
290     else if (b==0x20) {
291     PutChar(b);
292     }
293     else if ((b>=0x21) && (b<=0x7E)) {
294     if (EUCsupIn) {
295     EUCcount--;
296     EUCsupIn = (EUCcount==0);
297     return TRUE;
298     }
299    
300     if ((w->Gn[w->Glr[0]] == IdKatakana) || EUCkanaIn) {
301     b = b | 0x80;
302     EUCkanaIn = FALSE;
303     {
304     // b��sjis�����p�J�^�J�i
305     unsigned long u32 = CP932ToUTF32(b);
306     PutU32(u32);
307     }
308     return TRUE;
309     }
310     PutChar(b);
311     }
312     else if (b==0x7f) {
313     return TRUE;
314     }
315     else if ((b>=0x80) && (b<=0x8D)) {
316     ParseControl(b);
317     }
318     else if (b==0x8E) { // SS2
319     switch (ts.KanjiCode) {
320     case IdEUC:
321     if (ts.ISO2022Flag & ISO2022_SS2) {
322     EUCkanaIn = TRUE;
323     }
324     break;
325     case IdUTF8:
326 zmatsuo 10763 PutU32(REPLACEMENT_CHARACTER);
327 zmatsuo 10755 break;
328     default:
329     ParseControl(b);
330     }
331     }
332     else if (b==0x8F) { // SS3
333     switch (ts.KanjiCode) {
334     case IdEUC:
335     if (ts.ISO2022Flag & ISO2022_SS3) {
336     EUCcount = 2;
337     EUCsupIn = TRUE;
338     }
339     break;
340     case IdUTF8:
341 zmatsuo 10763 PutU32(REPLACEMENT_CHARACTER);
342 zmatsuo 10755 break;
343     default:
344     ParseControl(b);
345     }
346     }
347     else if ((b>=0x90) && (b<=0x9F)) {
348     ParseControl(b);
349     }
350     else if (b==0xA0) {
351     PutChar(0x20);
352     }
353     else if ((b>=0xA1) && (b<=0xFE)) {
354     if (EUCsupIn) {
355     EUCcount--;
356     EUCsupIn = (EUCcount==0);
357     return TRUE;
358     }
359    
360     if ((w->Gn[w->Glr[1]] != IdASCII) ||
361 zmatsuo 10759 ((ts.KanjiCode==IdEUC) && EUCkanaIn) ||
362 zmatsuo 10755 (ts.KanjiCode==IdSJIS) ||
363 zmatsuo 10759 ((ts.KanjiCode==IdJIS) &&
364     (ts.JIS7Katakana==0) &&
365     ((ts.TermFlag & TF_FIXEDJIS)!=0))) {
366 zmatsuo 10755 // b��sjis�����p�J�^�J�i
367     unsigned long u32 = CP932ToUTF32(b);
368     PutU32(u32);
369     } else {
370     if (w->Gn[w->Glr[1]] == IdASCII) {
371     b = b & 0x7f;
372     }
373     PutChar(b);
374     }
375     EUCkanaIn = FALSE;
376     }
377     else {
378     PutChar(b);
379     }
380    
381     return TRUE;
382     }
383    
384     static BOOL ParseFirstKR(BYTE b)
385     // returns TRUE if b is processed
386     // (actually allways returns TRUE)
387     {
388     VttermKanjiWork *w = &KanjiWork;
389     if (KanjiIn) {
390 zmatsuo 10759 if (((0x41<=b) && (b<=0x5A)) ||
391     ((0x61<=b) && (b<=0x7A)) ||
392     ((0x81<=b) && (b<=0xFE)))
393 zmatsuo 10755 {
394 zmatsuo 10758 unsigned long u32 = 0;
395 zmatsuo 10768 if (ts.KanjiCode == IdKoreanCP949) {
396 zmatsuo 10758 // CP51949
397     Kanji = Kanji + b;
398     u32 = MBCP_UTF32(Kanji, 51949);
399     }
400     else {
401     assert(FALSE);
402     }
403     PutU32(u32);
404 zmatsuo 10755 KanjiIn = FALSE;
405     return TRUE;
406     }
407     else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
408     KanjiIn = FALSE;
409     }
410     else if ((b==CR) && Wrap) {
411     CarriageReturn(FALSE);
412     LineFeed(LF,FALSE);
413     Wrap = FALSE;
414     }
415     }
416    
417     if ((!KanjiIn) && CheckFirstByte(b, ts.Language, ts.KanjiCode)) {
418     Kanji = b << 8;
419     KanjiIn = TRUE;
420     return TRUE;
421     }
422    
423     if (b<=US) {
424     ParseControl(b);
425     }
426     else if (b==0x20) {
427     PutChar(b);
428     }
429     else if ((b>=0x21) && (b<=0x7E)) {
430     // if (Gn[Glr[0]] == IdKatakana) {
431     // b = b | 0x80;
432     // }
433     PutChar(b);
434     }
435     else if (b==0x7f) {
436     return TRUE;
437     }
438     else if ((0x80<=b) && (b<=0x9F)) {
439     ParseControl(b);
440     }
441     else if (b==0xA0) {
442     PutChar(0x20);
443     }
444     else if ((b>=0xA1) && (b<=0xFE)) {
445     if (w->Gn[w->Glr[1]] == IdASCII) {
446     b = b & 0x7f;
447     }
448     PutChar(b);
449     }
450     else {
451     PutChar(b);
452     }
453    
454     return TRUE;
455     }
456    
457     static BOOL ParseFirstCn(BYTE b)
458     // returns TRUE if b is processed
459     // (actually allways returns TRUE)
460     {
461     VttermKanjiWork *w = &KanjiWork;
462     if (KanjiIn) {
463     // TODO
464 zmatsuo 10759 if (((0x40<=b) && (b<=0x7e)) ||
465     ((0xa1<=b) && (b<=0xFE)))
466 zmatsuo 10755 {
467 zmatsuo 10758 unsigned long u32 = 0;
468     Kanji = Kanji + b;
469     if (ts.KanjiCode == IdCnGB2312) {
470     // CP936 GB2312
471     u32 = MBCP_UTF32(Kanji, 936);
472     }
473     else if (ts.KanjiCode == IdCnBig5) {
474     // CP950 Big5
475     u32 = MBCP_UTF32(Kanji, 950);
476     }
477     else {
478     assert(FALSE);
479     }
480     PutU32(u32);
481 zmatsuo 10755 KanjiIn = FALSE;
482     return TRUE;
483     }
484     else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
485     KanjiIn = FALSE;
486     }
487     else if ((b==CR) && Wrap) {
488     CarriageReturn(FALSE);
489     LineFeed(LF,FALSE);
490     Wrap = FALSE;
491     }
492     }
493    
494     if ((!KanjiIn) && CheckFirstByte(b, ts.Language, ts.KanjiCode)) {
495     Kanji = b << 8;
496     KanjiIn = TRUE;
497     return TRUE;
498     }
499    
500     if (b<=US) {
501     ParseControl(b);
502     }
503     else if (b==0x20) {
504     PutChar(b);
505     }
506     else if ((b>=0x21) && (b<=0x7E)) {
507     // if (Gn[Glr[0]] == IdKatakana) {
508     // b = b | 0x80;
509     // }
510     PutChar(b);
511     }
512     else if (b==0x7f) {
513     return TRUE;
514     }
515     else if ((0x80<=b) && (b<=0x9F)) {
516     ParseControl(b);
517     }
518     else if (b==0xA0) {
519     PutChar(0x20);
520     }
521     else if ((b>=0xA1) && (b<=0xFE)) {
522     if (w->Gn[w->Glr[1]] == IdASCII) {
523     b = b & 0x7f;
524     }
525     PutChar(b);
526     }
527     else {
528     PutChar(b);
529     }
530    
531     return TRUE;
532     }
533    
534     static void ParseASCII(BYTE b)
535     {
536     if (SSflag) {
537     PutChar(b);
538     SSflag = FALSE;
539     return;
540     }
541    
542     if (b<=US) {
543     ParseControl(b);
544     } else if ((b>=0x20) && (b<=0x7E)) {
545 zmatsuo 10760 PutU32(b);
546 zmatsuo 10755 } else if ((b==0x8E) || (b==0x8F)) {
547 zmatsuo 10763 PutU32(REPLACEMENT_CHARACTER);
548 zmatsuo 10755 } else if ((b>=0x80) && (b<=0x9F)) {
549     ParseControl(b);
550     } else if (b>=0xA0) {
551 zmatsuo 10760 PutU32(b);
552 zmatsuo 10755 }
553     }
554    
555 zmatsuo 10770 /**
556     * REPLACEMENT_CHARACTER ���\��
557     * UTF-8 �f�R�[�h�����g�p
558     */
559 zmatsuo 10764 static void PutReplacementChr(VttermKanjiWork *w, const BYTE *ptr, size_t len, BOOL fallback)
560 zmatsuo 10763 {
561     const char32_t replacement_char = w->replacement_char;
562     int i;
563     for (i = 0; i < len; i++) {
564     BYTE c = *ptr++;
565 zmatsuo 10770 assert(IsC0(c));
566 zmatsuo 10764 if (fallback) {
567     // fallback ISO8859-1
568     PutU32(c);
569 zmatsuo 10763 }
570     else {
571 zmatsuo 10764 // fallback������
572     if (c < 0x80) {
573     // �s����UTF-8��������������0x80�������������A
574     // 1������UTF-8�������������������\������
575 zmatsuo 10770 PutU32(c);
576 zmatsuo 10764 }
577     else {
578     PutU32(replacement_char);
579     }
580 zmatsuo 10763 }
581     }
582     }
583    
584 zmatsuo 10770 /**
585     * UTF-8�����M�f�[�^����������
586     *
587     * returns TRUE if b is processed
588     */
589 zmatsuo 10755 static BOOL ParseFirstUTF8(BYTE b)
590     {
591 zmatsuo 10763 VttermKanjiWork *w = &KanjiWork;
592 zmatsuo 10766 char32_t code;
593 zmatsuo 10755
594 zmatsuo 10763 if (Fallbacked) {
595     BOOL r = ParseFirstJP(b);
596     Fallbacked = FALSE;
597     return r;
598 zmatsuo 10755 }
599    
600     // UTF-8�G���R�[�h
601 zmatsuo 10766 // The Unicode Standard Chapter 3
602     // Table 3-7. Well-Formed UTF-8 Byte Sequences
603     // | Code Points | First Byte | Second Byte | Third Byte | Fourth Byte |
604     // | U+0000..U+007F | 00..7F | | | |
605     // | U+0080..U+07FF | C2..DF | 80..BF | | |
606     // | U+0800..U+0FFF | E0 | A0..BF | 80..BF | |
607     // | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | |
608     // | U+D000..U+D7FF | ED | 80..9F | 80..BF | |
609     // | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | |
610     // | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF |
611     // | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF |
612     // | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF |
613 zmatsuo 10755 // UTF-8���f�R�[�h������������
614     // - 1byte��
615 zmatsuo 10766 // - 0x00 - 0x7f ok
616     // - 0x80 - 0xc1 ng
617     // - 0xc2 - 0xf4 ok
618     // - 0xf5 - 0xff ng
619 zmatsuo 10755 // - 2byte�����~
620 zmatsuo 10766 // - 0x00 - 0x7f ng
621     // - 0x80 - 0xbf ok
622     // - 0xc0 - 0xff ng
623     // - 2byte�����O
624     // - 1byte == 0xe0 ������ 0xa0 - 0xbf����ok
625     // - 1byte == 0xed ������ 0x80 - 0x9f����ok
626     // - 1byte == 0xf0 ������ 0x90 - 0xbf����ok
627     // - 1byte == 0xf4 ������ 0x90 - 0x8f����ok
628 zmatsuo 10763 recheck:
629 zmatsuo 10755 // 1byte(7bit)
630 zmatsuo 10767 if (w->count == 0) {
631 zmatsuo 10770 if (IsC0(b)) {
632     // U+0000 .. U+001f
633     // C0��������, C0 Coontrols
634     ParseControl(b);
635 zmatsuo 10755 return TRUE;
636     }
637 zmatsuo 10770 else if (b <= 0x7f) {
638     // 0x7f����, �������A���������o��
639     PutU32(b);
640     return TRUE;
641     }
642     else if (0xc2 <= b && b <= 0xf4) {
643 zmatsuo 10766 // 1byte������
644 zmatsuo 10767 w->buf[w->count++] = b;
645 zmatsuo 10755 return TRUE;
646     }
647    
648 zmatsuo 10770 // 0x80 - 0xc1, 0xf5 - 0xff
649 zmatsuo 10766 // UTF-8��1byte���o���������R�[�h������
650     if (ts.FallbackToCP932) {
651     // fallback��������
652     if ((ts.Language == IdJapanese) && ismbbleadSJIS(b)) {
653     // ���{�������� && Shift_JIS 1byte��
654     // Shift_JIS �� fallback
655     Fallbacked = TRUE;
656     ConvJIS = FALSE;
657     Kanji = b << 8;
658     KanjiIn = TRUE;
659     return TRUE;
660 zmatsuo 10755 }
661 zmatsuo 10766 // fallback ISO8859-1
662     PutU32(b);
663     return TRUE;
664 zmatsuo 10755 }
665     else {
666 zmatsuo 10766 // fallback������, �s������������
667 zmatsuo 10767 w->buf[0] = b;
668     PutReplacementChr(w, w->buf, 1, FALSE);
669 zmatsuo 10755 }
670 zmatsuo 10766 return TRUE;
671 zmatsuo 10755 }
672    
673 zmatsuo 10764 // 2byte���~����?
674 zmatsuo 10766 if((b & 0xc0) != 0x80) { // == (b <= 0x7f || 0xc0 <= b)
675     // �s��������, (����2bit�� 0b10xx_xxxx ��������)
676 zmatsuo 10767 PutReplacementChr(w, w->buf, w->count, ts.FallbackToCP932);
677     w->count = 0;
678 zmatsuo 10764 goto recheck;
679     }
680    
681 zmatsuo 10755 // 2byte�����~����
682 zmatsuo 10767 w->buf[w->count++] = b;
683 zmatsuo 10755
684 zmatsuo 10766 // 2byte(11bit)
685 zmatsuo 10767 if (w->count == 2) {
686     if ((w->buf[0] & 0xe0) == 0xc0) { // == (0xc2 <= w->buf[0] && w->buf[0] <= 0xdf)
687 zmatsuo 10766 // 5bit + 6bit
688 zmatsuo 10767 code = ((w->buf[0] & 0x1f) << 6) | (b & 0x3f);
689 zmatsuo 10770 if (IsC1(code)) {
690     // U+0080 .. u+009f
691     // C1��������, C1 Controls
692     ParseControl((BYTE)code);
693     }
694     else {
695     PutU32(code);
696     }
697 zmatsuo 10767 w->count = 0;
698 zmatsuo 10755 return TRUE;
699     }
700 zmatsuo 10766 return TRUE;
701     }
702    
703     // 3byte(16bit)
704 zmatsuo 10767 if (w->count == 3) {
705     if ((w->buf[0] & 0xf0) == 0xe0) {
706     if ((w->buf[0] == 0xe0 && (w->buf[1] < 0xa0 || 0xbf < w->buf[1])) ||
707     (w->buf[0] == 0xed && ( 0x9f < w->buf[1]))) {
708 zmatsuo 10766 // �s���� UTF-8
709 zmatsuo 10767 PutReplacementChr(w, w->buf, 2, ts.FallbackToCP932);
710     w->count = 0;
711 zmatsuo 10766 goto recheck;
712     }
713 zmatsuo 10755 // 4bit + 6bit + 6bit
714 zmatsuo 10767 code = ((w->buf[0] & 0xf) << 12);
715     code |= ((w->buf[1] & 0x3f) << 6);
716     code |= ((w->buf[2] & 0x3f));
717 zmatsuo 10755 PutU32(code);
718 zmatsuo 10767 w->count = 0;
719 zmatsuo 10755 return TRUE;
720     }
721 zmatsuo 10766 return TRUE;
722 zmatsuo 10755 }
723    
724     // 4byte(21bit)
725 zmatsuo 10767 assert(w->count == 4);
726     assert((w->buf[0] & 0xf8) == 0xf0);
727     if ((w->buf[0] == 0xf0 && (w->buf[1] < 0x90 || 0x9f < w->buf[1])) ||
728     (w->buf[0] == 0xf4 && (w->buf[1] < 0x80 || 0x8f < w->buf[1]))) {
729 zmatsuo 10766 // �s���� UTF-8
730 zmatsuo 10767 PutReplacementChr(w, w->buf, 3, ts.FallbackToCP932);
731     w->count = 0;
732 zmatsuo 10766 goto recheck;
733 zmatsuo 10755 }
734 zmatsuo 10766 // 3bit + 6bit + 6bit + 6bit
735 zmatsuo 10767 code = ((w->buf[0] & 0x07) << 18);
736     code |= ((w->buf[1] & 0x3f) << 12);
737     code |= ((w->buf[2] & 0x3f) << 6);
738     code |= (w->buf[3] & 0x3f);
739 zmatsuo 10766 PutU32(code);
740 zmatsuo 10767 w->count = 0;
741 zmatsuo 10755 return TRUE;
742     }
743    
744     static BOOL ParseFirstRus(BYTE b)
745     // returns if b is processed
746     {
747 zmatsuo 10770 if (IsC0(b)) {
748     ParseControl(b);
749     return TRUE;
750     }
751 zmatsuo 10756 // CP1251������
752     BYTE c = RussConv(ts.KanjiCode, IdWindows, b);
753     // CP1251->Unicode
754     unsigned long u32 = MBCP_UTF32(c, 1251);
755     PutU32(u32);
756     return TRUE;
757 zmatsuo 10755 }
758    
759     static BOOL ParseEnglish(BYTE b)
760     {
761     unsigned short u16 = 0;
762     int part = KanjiCodeToISO8859Part(ts.KanjiCode);
763     int r = UnicodeFromISO8859(part, b, &u16);
764     if (r == 0) {
765     return FALSE;
766     }
767     if (u16 < 0x100) {
768     ParseASCII((BYTE)u16);
769     }
770     else {
771     PutU32(u16);
772     }
773     return TRUE;
774     }
775    
776 zmatsuo 10771 static void PutDebugChar(BYTE b)
777     {
778     int i;
779     BOOL svInsertMode, svAutoWrapMode;
780     TCharAttr svCharAttr;
781     TCharAttr char_attr;
782    
783     svInsertMode = TermGetInsertMode();
784     TermSetInsertMode(FALSE);
785     svAutoWrapMode = TermGetAutoWrapMode();
786     TermSetAutoWrapMode(TRUE);
787    
788     TermGetAttr(&svCharAttr);
789     char_attr = svCharAttr;
790     char_attr.Attr = AttrDefault;
791     TermSetAttr(&char_attr);
792    
793     if (DebugFlag==DEBUG_FLAG_HEXD) {
794     char buff[3];
795     _snprintf(buff, 3, "%02X", (unsigned int) b);
796    
797     for (i=0; i<2; i++)
798     PutChar(buff[i]);
799     PutChar(' ');
800     }
801     else if (DebugFlag==DEBUG_FLAG_NORM) {
802    
803     if ((b & 0x80) == 0x80) {
804     //UpdateStr();
805     char_attr.Attr = AttrReverse;
806     TermSetAttr(&char_attr);
807     b = b & 0x7f;
808     }
809    
810     if (b<=US) {
811     PutChar('^');
812     PutChar((char)(b+0x40));
813     }
814     else if (b==DEL) {
815     PutChar('<');
816     PutChar('D');
817     PutChar('E');
818     PutChar('L');
819     PutChar('>');
820     }
821     else
822     PutChar(b);
823     }
824    
825     TermSetAttr(&char_attr);
826     TermSetInsertMode(svInsertMode);
827     TermSetAutoWrapMode(svAutoWrapMode);
828     }
829    
830     void ParseFirst(BYTE b)
831     {
832     WORD language = ts.Language;
833     if (DebugFlag != DEBUG_FLAG_NONE) {
834     language = IdDebug;
835     }
836    
837     switch (language) {
838     case IdUtf8:
839     ParseFirstUTF8(b);
840 zmatsuo 10755 return;
841    
842 zmatsuo 10771 case IdJapanese:
843 zmatsuo 10755 switch (ts.KanjiCode) {
844 zmatsuo 10771 case IdUTF8:
845     if (ParseFirstUTF8(b)) {
846 zmatsuo 10755 return;
847     }
848     break;
849 zmatsuo 10771 default:
850 zmatsuo 10755 if (ParseFirstJP(b)) {
851     return;
852     }
853     }
854     break;
855    
856 zmatsuo 10771 case IdKorean:
857 zmatsuo 10755 switch (ts.KanjiCode) {
858 zmatsuo 10771 case IdUTF8:
859 zmatsuo 10755 if (ParseFirstUTF8(b)) {
860     return;
861     }
862     break;
863 zmatsuo 10771 default:
864 zmatsuo 10755 if (ParseFirstKR(b)) {
865     return;
866     }
867     }
868     break;
869    
870 zmatsuo 10771 case IdRussian:
871 zmatsuo 10755 if (ParseFirstRus(b)) {
872     return;
873     }
874     break;
875    
876     case IdChinese:
877     switch (ts.KanjiCode) {
878     case IdUTF8:
879     if (ParseFirstUTF8(b)) {
880     return;
881     }
882     break;
883     default:
884     if (ParseFirstCn(b)) {
885     return;
886     }
887     }
888     break;
889     case IdEnglish: {
890     if (ParseEnglish(b)) {
891     return;
892     }
893     break;
894     }
895 zmatsuo 10771 case IdDebug: {
896     PutDebugChar(b);
897     return;
898 zmatsuo 10755 }
899 zmatsuo 10771 }
900 zmatsuo 10755
901     if (SSflag) {
902     PutChar(b);
903     SSflag = FALSE;
904     return;
905     }
906    
907     if (b<=US)
908     ParseControl(b);
909     else if ((b>=0x20) && (b<=0x7E))
910     PutChar(b);
911     else if ((b>=0x80) && (b<=0x9F))
912     ParseControl(b);
913     else if (b>=0xA0)
914     PutChar(b);
915     }
916    
917     /**
918     * �w��(Designate)
919     *
920     * @param Gn 0/1/2/3 = G0/G1/G2/G3
921     * @param codeset IdASCII 0
922     * IdKatakana 1
923     * IdKanji 2
924     * IdSpecial 3
925     */
926     void CharSet2022Designate(int gn, int cs)
927     {
928     VttermKanjiWork *w = &KanjiWork;
929     w->Gn[gn] = cs;
930     }
931    
932     /**
933     * �����o��(Invoke)
934 zmatsuo 10776 * @param shift
935 zmatsuo 10755 */
936 zmatsuo 10776 void CharSet2022Invoke(CharSet2022Shift shift)
937 zmatsuo 10755 {
938     VttermKanjiWork *w = &KanjiWork;
939 zmatsuo 10776 switch (shift) {
940     case CHARSET_LS0:
941     // Locking Shift 0 (G0->GL)
942     w->Glr[0] = 0;
943     break;
944     case CHARSET_LS1:
945     // Locking Shift 1 (G1->GL)
946     w->Glr[0] = 1;
947     break;
948     case CHARSET_LS2:
949     // Locking Shift 2 (G2->GL)
950     w->Glr[0] = 2;
951     break;
952     case CHARSET_LS3:
953     // Locking Shift 3 (G3->GL)
954     w->Glr[0] = 3;
955     break;
956     case CHARSET_LS1R:
957     // Locking Shift 1 (G1->GR)
958     w->Glr[1] = 1;
959     break;
960     case CHARSET_LS2R:
961     // Locking Shift 2 (G2->GR)
962     w->Glr[1] = 2;
963     break;
964     case CHARSET_LS3R:
965     // Locking Shift 3 (G3->GR)
966     w->Glr[1] = 3;
967     break;
968     case CHARSET_SS2:
969     // Single Shift 2
970     GLtmp = 2;
971 zmatsuo 10755 SSflag = TRUE;
972 zmatsuo 10776 break;
973     case CHARSET_SS3:
974     // Single Shift 3
975     GLtmp = 3;
976     SSflag = TRUE;
977     break;
978     default:
979     assert(FALSE);
980     break;
981 zmatsuo 10755 }
982     }
983    
984     /**
985     * DEC�����t�H���g(Tera Special font)
986     * 0140(0x60) ... 0176(0x7f) ���r�����A�T�C������������
987 zmatsuo 10760 * (0xe0) ... (0xff) ��?
988 zmatsuo 10755 * <ESC>(0 �������������G�X�P�[�v�V�[�P���X�����`
989     * about/emulations.html
990     *
991     * @param b �R�[�h
992 zmatsuo 10760 * @retval TRUE IdSpecial
993     * @retval FALSE IdSpecial��������
994 zmatsuo 10755 */
995     BOOL CharSetIsSpecial(BYTE b)
996     {
997     VttermKanjiWork *w = &KanjiWork;
998     BOOL SpecialNew = FALSE;
999    
1000     if ((b>0x5F) && (b<0x80)) {
1001     if (SSflag)
1002     SpecialNew = (w->Gn[GLtmp]==IdSpecial);
1003     else
1004     SpecialNew = (w->Gn[w->Glr[0]]==IdSpecial);
1005     }
1006     else if (b>0xDF) {
1007     if (SSflag)
1008     SpecialNew = (w->Gn[GLtmp]==IdSpecial);
1009     else
1010     SpecialNew = (w->Gn[w->Glr[1]]==IdSpecial);
1011     }
1012    
1013     return SpecialNew;
1014     }
1015    
1016     static void CharSetSaveStateLow(CharSetState *state, const VttermKanjiWork *w)
1017     {
1018     int i;
1019     state->infos[0] = w->Glr[0];
1020     state->infos[1] = w->Glr[1];
1021     for (i=0 ; i<=3; i++) {
1022     state->infos[2 + i] = w->Gn[i];
1023     }
1024     }
1025    
1026     /**
1027     * ��������������
1028     */
1029     void CharSetSaveState(CharSetState *state)
1030     {
1031     VttermKanjiWork *w = &KanjiWork;
1032     CharSetSaveStateLow(state, w);
1033     }
1034    
1035     /**
1036     * ���������A����
1037     */
1038     void CharSetLoadState(const CharSetState *state)
1039     {
1040     VttermKanjiWork *w = &KanjiWork;
1041     int i;
1042     w->Glr[0] = state->infos[0];
1043     w->Glr[1] = state->infos[1];
1044     for (i=0 ; i<=3; i++) {
1045     w->Gn[i] = state->infos[2 + i];
1046     }
1047     }
1048 zmatsuo 10763
1049     /**
1050     * �t�H�[���o�b�N���I��
1051     * ���M�f�[�^UTF-8�����AShift_JIS�o����(fallback����)�����f����
1052     *
1053     */
1054     void CharSetFallbackFinish(void)
1055     {
1056     Fallbacked = FALSE;
1057     }
1058 zmatsuo 10773
1059     /**
1060     * �f�o�O�o�����������[�h�����X����
1061     */
1062     void CharSetSetNextDebugMode(void)
1063     {
1064     // ts.DebugModes ���� tttypes.h �� DBGF_* �� OR ����������
1065     do {
1066     DebugFlag = (DebugFlag + 1) % DEBUG_FLAG_MAXD;
1067     } while (DebugFlag != DEBUG_FLAG_NONE && !((ts.DebugModes >> (DebugFlag - 1)) & 1));
1068     }
1069    
1070     BYTE CharSetGetDebugMode(void)
1071     {
1072     return DebugFlag;
1073     }
1074    
1075     void CharSetSetDebugMode(BYTE mode)
1076     {
1077     DebugFlag = mode;
1078     }

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26