Develop and Download Open Source Software

Browse Subversion Repository

Annotation of /trunk/teraterm/teraterm/charset.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 10781 - (hide annotations) (download) (as text)
Tue Jun 27 13:28:58 2023 UTC (9 months, 2 weeks ago) by zmatsuo
File MIME type: text/x-c++src
File size: 21994 byte(s)
改行を一つ多く出力してしまうことがあるので修正

- 次の時改行を一つ多く出力してしまうことがある
  - ShiftJISなどの2byte文字受信時(UTF-8ではない)
  - 行末まで文字が出力されていて、カーソルが行末にある状態の時
    - Wrap状態
  - 2バイト文字の1バイト目を受信した後
  - CRを受信したとき
- 改行を出力しないよう修正

ticket #48285
1 zmatsuo 10755 /*
2     * (C) 2023- TeraTerm Project
3     * All rights reserved.
4     *
5     * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions
7     * are met:
8     *
9     * 1. Redistributions of source code must retain the above copyright
10     * notice, this list of conditions and the following disclaimer.
11     * 2. Redistributions in binary form must reproduce the above copyright
12     * notice, this list of conditions and the following disclaimer in the
13     * documentation and/or other materials provided with the distribution.
14     * 3. The name of the author may not be used to endorse or promote products
15     * derived from this software without specific prior written permission.
16     *
17     * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
18     * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19     * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20     * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22     * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23     * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24     * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25     * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26     * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27     */
28    
29     #include "teraterm.h"
30     #include "tttypes.h"
31     #include <stdio.h>
32     #include <string.h>
33     #if !defined(_CRTDBG_MAP_ALLOC)
34     #define _CRTDBG_MAP_ALLOC
35     #endif
36     #include <stdlib.h>
37     #include <crtdbg.h>
38     #include <assert.h>
39    
40     #include "ttwinman.h"
41     #include "codeconv.h"
42     #include "unicode.h"
43     #include "language.h" // for JIS2SJIS()
44 zmatsuo 10763 #include "ttcstd.h"
45 zmatsuo 10773 #include "vtterm.h"
46 zmatsuo 10755
47     #include "charset.h"
48    
49 zmatsuo 10763 // UTF-8���s�����l�����������\����������
50     #define REPLACEMENT_CHARACTER '?'
51     //#define REPLACEMENT_CHARACTER 0x2592
52     //#define REPLACEMENT_CHARACTER 0x20
53     //#define REPLACEMENT_CHARACTER 0xfffd
54    
55 zmatsuo 10755 static BOOL KanjiIn; // TRUE = MBCS��1byte�������M��������
56     static BOOL EUCkanaIn, EUCsupIn;
57     static int EUCcount;
58    
59     /* GL for single shift 2/3 */
60     static int GLtmp;
61     /* single shift 2/3 flag */
62     static BOOL SSflag;
63     /* JIS -> SJIS conversion flag */
64     static BOOL ConvJIS;
65     static WORD Kanji;
66 zmatsuo 10763 static BOOL Fallbacked;
67 zmatsuo 10755
68 zmatsuo 10773 static BYTE DebugFlag = DEBUG_FLAG_NONE;
69    
70 zmatsuo 10755 typedef struct {
71     /* GL, GR code group */
72     int Glr[2];
73     /* G0, G1, G2, G3 code group */
74     int Gn[4];
75 zmatsuo 10763 //
76     char32_t replacement_char;
77 zmatsuo 10767 // UTF-8 work
78     BYTE buf[4];
79     int count;
80 zmatsuo 10755 } VttermKanjiWork;
81    
82     static VttermKanjiWork KanjiWork;
83    
84 zmatsuo 10770 static BOOL IsC0(char32_t b)
85     {
86     return (b <= US);
87     }
88    
89     static BOOL IsC1(char32_t b)
90     {
91     return ((b>=0x80) && (b<=0x9F));
92     }
93    
94     /**
95     * PutU32() wrapper
96     * Unicode�x�[�X����������
97     */
98 zmatsuo 10760 static void PutChar(BYTE b)
99     {
100     PutU32(b);
101     }
102 zmatsuo 10755
103     /**
104     * ISO2022�p���[�N������������
105     */
106     static void CharSetInit2(VttermKanjiWork *w)
107     {
108     if (ts.Language==IdJapanese) {
109     w->Gn[0] = IdASCII;
110     w->Gn[1] = IdKatakana;
111     w->Gn[2] = IdKatakana;
112     w->Gn[3] = IdKanji;
113     w->Glr[0] = 0;
114     if ((ts.KanjiCode==IdJIS) && (ts.JIS7Katakana==0))
115     w->Glr[1] = 2; // 8-bit katakana
116     else
117     w->Glr[1] = 3;
118     }
119     else {
120     w->Gn[0] = IdASCII;
121     w->Gn[1] = IdSpecial;
122     w->Gn[2] = IdASCII;
123     w->Gn[3] = IdASCII;
124     w->Glr[0] = 0;
125     w->Glr[1] = 0;
126     }
127     }
128    
129     /**
130     * �������A���[�N������������
131     */
132     void CharSetInit(void)
133     {
134 zmatsuo 10763 VttermKanjiWork *w = &KanjiWork;
135    
136     CharSetInit2(w);
137    
138     w->replacement_char = REPLACEMENT_CHARACTER;
139 zmatsuo 10755 SSflag = FALSE;
140    
141     KanjiIn = FALSE;
142     EUCkanaIn = FALSE;
143     EUCsupIn = FALSE;
144     ConvJIS = FALSE;
145     Fallbacked = FALSE;
146     }
147    
148     /**
149     * 1byte���`�F�b�N
150     */
151     static BOOL CheckFirstByte(BYTE b, int lang, int kanji_code)
152     {
153     switch (lang) {
154     case IdKorean:
155 zmatsuo 10779 return __ismbblead(b, 949);
156 zmatsuo 10755 case IdChinese:
157     if (kanji_code == IdCnGB2312) {
158     return __ismbblead(b, 936);
159     }
160     else if (ts.KanjiCode == IdCnBig5) {
161     return __ismbblead(b, 950);
162     }
163     break;
164     default:
165     assert(FALSE);
166     break;
167     }
168     assert(FALSE);
169     return FALSE;
170     }
171 zmatsuo 10763
172 zmatsuo 10755 /**
173 zmatsuo 10763 * Double-byte Character Sets
174     * SJIS��1byte��?
175     *
176     * ��1�o�C�g0x81...0x9F or 0xE0...0xEF
177     * ��1�o�C�g0x81...0x9F or 0xE0...0xFC
178     */
179     static BOOL ismbbleadSJIS(BYTE b)
180     {
181     if (((0x80<b) && (b<0xa0)) || ((0xdf<b) && (b<0xfd))) {
182     return TRUE;
183     }
184     return FALSE;
185     }
186    
187     /**
188 zmatsuo 10755 * ts.Language == IdJapanese ��
189     * 1byte���`�F�b�N
190     */
191     static BOOL CheckKanji(BYTE b)
192     {
193     VttermKanjiWork *w = &KanjiWork;
194     BOOL Check;
195    
196     if (ts.Language!=IdJapanese)
197     return FALSE;
198    
199     ConvJIS = FALSE;
200    
201     if (ts.KanjiCode==IdSJIS ||
202     (ts.FallbackToCP932 && ts.KanjiCode==IdUTF8)) {
203 zmatsuo 10759 if (((0x80<b) && (b<0xa0)) || ((0xdf<b) && (b<0xfd))) {
204 zmatsuo 10755 Fallbacked = TRUE;
205     return TRUE; // SJIS kanji
206     }
207     if ((0xa1<=b) && (b<=0xdf)) {
208     return FALSE; // SJIS katakana
209     }
210     }
211    
212     if ((b>=0x21) && (b<=0x7e)) {
213     Check = (w->Gn[w->Glr[0]] == IdKanji);
214     ConvJIS = Check;
215     }
216     else if ((b>=0xA1) && (b<=0xFE)) {
217     Check = (w->Gn[w->Glr[1]] == IdKanji);
218     if (ts.KanjiCode==IdEUC) {
219     Check = TRUE;
220     }
221     else if (ts.KanjiCode==IdJIS && ((ts.TermFlag & TF_FIXEDJIS)!=0) && (ts.JIS7Katakana==0)) {
222     Check = FALSE; // 8-bit katakana
223     }
224     ConvJIS = Check;
225     }
226     else {
227     Check = FALSE;
228     }
229    
230     return Check;
231     }
232    
233     static BOOL ParseFirstJP(BYTE b)
234     // returns TRUE if b is processed
235     // (actually allways returns TRUE)
236     {
237     VttermKanjiWork *w = &KanjiWork;
238     if (KanjiIn) {
239 zmatsuo 10759 if (((! ConvJIS) && (0x3F<b) && (b<0xFD)) ||
240     (ConvJIS && ( ((0x20<b) && (b<0x7f)) ||
241     ((0xa0<b) && (b<0xff)) )) )
242 zmatsuo 10755 {
243 zmatsuo 10758 unsigned long u32;
244     Kanji = Kanji + b;
245     if (ConvJIS) {
246     // JIS -> Shift_JIS(CP932)
247     Kanji = JIS2SJIS((WORD)(Kanji & 0x7f7f));
248     }
249     u32 = CP932ToUTF32(Kanji);
250     PutU32(u32);
251 zmatsuo 10755 KanjiIn = FALSE;
252     return TRUE;
253     }
254     else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
255     KanjiIn = FALSE;
256     }
257     }
258    
259     if (SSflag) {
260     if (w->Gn[GLtmp] == IdKanji) {
261     Kanji = b << 8;
262     KanjiIn = TRUE;
263     SSflag = FALSE;
264     return TRUE;
265     }
266     else if (w->Gn[GLtmp] == IdKatakana) {
267     b = b | 0x80;
268     }
269    
270     PutChar(b);
271     SSflag = FALSE;
272     return TRUE;
273     }
274    
275     if ((!EUCsupIn) && (!EUCkanaIn) && (!KanjiIn) && CheckKanji(b)) {
276     Kanji = b << 8;
277     KanjiIn = TRUE;
278     return TRUE;
279     }
280    
281     if (b<=US) {
282     ParseControl(b);
283     }
284     else if (b==0x20) {
285     PutChar(b);
286     }
287     else if ((b>=0x21) && (b<=0x7E)) {
288     if (EUCsupIn) {
289     EUCcount--;
290     EUCsupIn = (EUCcount==0);
291     return TRUE;
292     }
293    
294     if ((w->Gn[w->Glr[0]] == IdKatakana) || EUCkanaIn) {
295     b = b | 0x80;
296     EUCkanaIn = FALSE;
297     {
298     // b��sjis�����p�J�^�J�i
299     unsigned long u32 = CP932ToUTF32(b);
300     PutU32(u32);
301     }
302     return TRUE;
303     }
304     PutChar(b);
305     }
306     else if (b==0x7f) {
307     return TRUE;
308     }
309     else if ((b>=0x80) && (b<=0x8D)) {
310     ParseControl(b);
311     }
312     else if (b==0x8E) { // SS2
313     switch (ts.KanjiCode) {
314     case IdEUC:
315     if (ts.ISO2022Flag & ISO2022_SS2) {
316     EUCkanaIn = TRUE;
317     }
318     break;
319     case IdUTF8:
320 zmatsuo 10763 PutU32(REPLACEMENT_CHARACTER);
321 zmatsuo 10755 break;
322     default:
323     ParseControl(b);
324     }
325     }
326     else if (b==0x8F) { // SS3
327     switch (ts.KanjiCode) {
328     case IdEUC:
329     if (ts.ISO2022Flag & ISO2022_SS3) {
330     EUCcount = 2;
331     EUCsupIn = TRUE;
332     }
333     break;
334     case IdUTF8:
335 zmatsuo 10763 PutU32(REPLACEMENT_CHARACTER);
336 zmatsuo 10755 break;
337     default:
338     ParseControl(b);
339     }
340     }
341     else if ((b>=0x90) && (b<=0x9F)) {
342     ParseControl(b);
343     }
344     else if (b==0xA0) {
345     PutChar(0x20);
346     }
347     else if ((b>=0xA1) && (b<=0xFE)) {
348     if (EUCsupIn) {
349     EUCcount--;
350     EUCsupIn = (EUCcount==0);
351     return TRUE;
352     }
353    
354     if ((w->Gn[w->Glr[1]] != IdASCII) ||
355 zmatsuo 10759 ((ts.KanjiCode==IdEUC) && EUCkanaIn) ||
356 zmatsuo 10755 (ts.KanjiCode==IdSJIS) ||
357 zmatsuo 10759 ((ts.KanjiCode==IdJIS) &&
358     (ts.JIS7Katakana==0) &&
359     ((ts.TermFlag & TF_FIXEDJIS)!=0))) {
360 zmatsuo 10755 // b��sjis�����p�J�^�J�i
361     unsigned long u32 = CP932ToUTF32(b);
362     PutU32(u32);
363     } else {
364     if (w->Gn[w->Glr[1]] == IdASCII) {
365     b = b & 0x7f;
366     }
367     PutChar(b);
368     }
369     EUCkanaIn = FALSE;
370     }
371     else {
372     PutChar(b);
373     }
374    
375     return TRUE;
376     }
377    
378     static BOOL ParseFirstKR(BYTE b)
379     // returns TRUE if b is processed
380     // (actually allways returns TRUE)
381     {
382     VttermKanjiWork *w = &KanjiWork;
383     if (KanjiIn) {
384 zmatsuo 10759 if (((0x41<=b) && (b<=0x5A)) ||
385     ((0x61<=b) && (b<=0x7A)) ||
386     ((0x81<=b) && (b<=0xFE)))
387 zmatsuo 10755 {
388 zmatsuo 10758 unsigned long u32 = 0;
389 zmatsuo 10768 if (ts.KanjiCode == IdKoreanCP949) {
390 zmatsuo 10779 // CP949
391 zmatsuo 10758 Kanji = Kanji + b;
392 zmatsuo 10779 u32 = MBCP_UTF32(Kanji, 949);
393 zmatsuo 10758 }
394     else {
395     assert(FALSE);
396     }
397     PutU32(u32);
398 zmatsuo 10755 KanjiIn = FALSE;
399     return TRUE;
400     }
401     else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
402     KanjiIn = FALSE;
403     }
404     }
405    
406     if ((!KanjiIn) && CheckFirstByte(b, ts.Language, ts.KanjiCode)) {
407     Kanji = b << 8;
408     KanjiIn = TRUE;
409     return TRUE;
410     }
411    
412     if (b<=US) {
413     ParseControl(b);
414     }
415     else if (b==0x20) {
416     PutChar(b);
417     }
418     else if ((b>=0x21) && (b<=0x7E)) {
419     // if (Gn[Glr[0]] == IdKatakana) {
420     // b = b | 0x80;
421     // }
422     PutChar(b);
423     }
424     else if (b==0x7f) {
425     return TRUE;
426     }
427     else if ((0x80<=b) && (b<=0x9F)) {
428     ParseControl(b);
429     }
430     else if (b==0xA0) {
431     PutChar(0x20);
432     }
433     else if ((b>=0xA1) && (b<=0xFE)) {
434     if (w->Gn[w->Glr[1]] == IdASCII) {
435     b = b & 0x7f;
436     }
437     PutChar(b);
438     }
439     else {
440     PutChar(b);
441     }
442    
443     return TRUE;
444     }
445    
446     static BOOL ParseFirstCn(BYTE b)
447     // returns TRUE if b is processed
448     // (actually allways returns TRUE)
449     {
450     VttermKanjiWork *w = &KanjiWork;
451     if (KanjiIn) {
452     // TODO
453 zmatsuo 10759 if (((0x40<=b) && (b<=0x7e)) ||
454     ((0xa1<=b) && (b<=0xFE)))
455 zmatsuo 10755 {
456 zmatsuo 10758 unsigned long u32 = 0;
457     Kanji = Kanji + b;
458     if (ts.KanjiCode == IdCnGB2312) {
459     // CP936 GB2312
460     u32 = MBCP_UTF32(Kanji, 936);
461     }
462     else if (ts.KanjiCode == IdCnBig5) {
463     // CP950 Big5
464     u32 = MBCP_UTF32(Kanji, 950);
465     }
466     else {
467     assert(FALSE);
468     }
469     PutU32(u32);
470 zmatsuo 10755 KanjiIn = FALSE;
471     return TRUE;
472     }
473     else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
474     KanjiIn = FALSE;
475     }
476     }
477    
478     if ((!KanjiIn) && CheckFirstByte(b, ts.Language, ts.KanjiCode)) {
479     Kanji = b << 8;
480     KanjiIn = TRUE;
481     return TRUE;
482     }
483    
484     if (b<=US) {
485     ParseControl(b);
486     }
487     else if (b==0x20) {
488     PutChar(b);
489     }
490     else if ((b>=0x21) && (b<=0x7E)) {
491     // if (Gn[Glr[0]] == IdKatakana) {
492     // b = b | 0x80;
493     // }
494     PutChar(b);
495     }
496     else if (b==0x7f) {
497     return TRUE;
498     }
499     else if ((0x80<=b) && (b<=0x9F)) {
500     ParseControl(b);
501     }
502     else if (b==0xA0) {
503     PutChar(0x20);
504     }
505     else if ((b>=0xA1) && (b<=0xFE)) {
506     if (w->Gn[w->Glr[1]] == IdASCII) {
507     b = b & 0x7f;
508     }
509     PutChar(b);
510     }
511     else {
512     PutChar(b);
513     }
514    
515     return TRUE;
516     }
517    
518     static void ParseASCII(BYTE b)
519     {
520     if (SSflag) {
521     PutChar(b);
522     SSflag = FALSE;
523     return;
524     }
525    
526     if (b<=US) {
527     ParseControl(b);
528     } else if ((b>=0x20) && (b<=0x7E)) {
529 zmatsuo 10760 PutU32(b);
530 zmatsuo 10755 } else if ((b==0x8E) || (b==0x8F)) {
531 zmatsuo 10763 PutU32(REPLACEMENT_CHARACTER);
532 zmatsuo 10755 } else if ((b>=0x80) && (b<=0x9F)) {
533     ParseControl(b);
534     } else if (b>=0xA0) {
535 zmatsuo 10760 PutU32(b);
536 zmatsuo 10755 }
537     }
538    
539 zmatsuo 10770 /**
540     * REPLACEMENT_CHARACTER ���\��
541     * UTF-8 �f�R�[�h�����g�p
542     */
543 zmatsuo 10764 static void PutReplacementChr(VttermKanjiWork *w, const BYTE *ptr, size_t len, BOOL fallback)
544 zmatsuo 10763 {
545     const char32_t replacement_char = w->replacement_char;
546     int i;
547     for (i = 0; i < len; i++) {
548     BYTE c = *ptr++;
549 zmatsuo 10770 assert(IsC0(c));
550 zmatsuo 10764 if (fallback) {
551     // fallback ISO8859-1
552     PutU32(c);
553 zmatsuo 10763 }
554     else {
555 zmatsuo 10764 // fallback������
556     if (c < 0x80) {
557     // �s����UTF-8��������������0x80�������������A
558     // 1������UTF-8�������������������\������
559 zmatsuo 10770 PutU32(c);
560 zmatsuo 10764 }
561     else {
562     PutU32(replacement_char);
563     }
564 zmatsuo 10763 }
565     }
566     }
567    
568 zmatsuo 10770 /**
569     * UTF-8�����M�f�[�^����������
570     *
571     * returns TRUE if b is processed
572     */
573 zmatsuo 10755 static BOOL ParseFirstUTF8(BYTE b)
574     {
575 zmatsuo 10763 VttermKanjiWork *w = &KanjiWork;
576 zmatsuo 10766 char32_t code;
577 zmatsuo 10755
578 zmatsuo 10763 if (Fallbacked) {
579     BOOL r = ParseFirstJP(b);
580     Fallbacked = FALSE;
581     return r;
582 zmatsuo 10755 }
583    
584     // UTF-8�G���R�[�h
585 zmatsuo 10766 // The Unicode Standard Chapter 3
586     // Table 3-7. Well-Formed UTF-8 Byte Sequences
587 zmatsuo 10777 // | Code Points | 1st Byte | 2nd Byte | 3rd Byte | 4th Byte |
588     // | U+0000..U+007F | 00..7F | | | |
589     // | U+0080..U+07FF | C2..DF | 80..BF | | |
590     // | U+0800..U+0FFF | E0 | A0..BF | 80..BF | |
591     // | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | |
592     // | U+D000..U+D7FF | ED | 80..9F | 80..BF | |
593     // | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | |
594     // | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF |
595     // | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF |
596     // | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF |
597 zmatsuo 10755 // - 1byte��
598 zmatsuo 10766 // - 0x00 - 0x7f ok
599     // - 0x80 - 0xc1 ng
600     // - 0xc2 - 0xf4 ok
601     // - 0xf5 - 0xff ng
602 zmatsuo 10755 // - 2byte�����~
603 zmatsuo 10766 // - 0x00 - 0x7f ng
604     // - 0x80 - 0xbf ok
605     // - 0xc0 - 0xff ng
606     // - 2byte�����O
607     // - 1byte == 0xe0 ������ 0xa0 - 0xbf����ok
608     // - 1byte == 0xed ������ 0x80 - 0x9f����ok
609     // - 1byte == 0xf0 ������ 0x90 - 0xbf����ok
610     // - 1byte == 0xf4 ������ 0x90 - 0x8f����ok
611 zmatsuo 10763 recheck:
612 zmatsuo 10755 // 1byte(7bit)
613 zmatsuo 10767 if (w->count == 0) {
614 zmatsuo 10770 if (IsC0(b)) {
615     // U+0000 .. U+001f
616     // C0��������, C0 Coontrols
617     ParseControl(b);
618 zmatsuo 10755 return TRUE;
619     }
620 zmatsuo 10770 else if (b <= 0x7f) {
621     // 0x7f����, �������A���������o��
622     PutU32(b);
623     return TRUE;
624     }
625     else if (0xc2 <= b && b <= 0xf4) {
626 zmatsuo 10766 // 1byte������
627 zmatsuo 10767 w->buf[w->count++] = b;
628 zmatsuo 10755 return TRUE;
629     }
630    
631 zmatsuo 10770 // 0x80 - 0xc1, 0xf5 - 0xff
632 zmatsuo 10766 // UTF-8��1byte���o���������R�[�h������
633     if (ts.FallbackToCP932) {
634     // fallback��������
635     if ((ts.Language == IdJapanese) && ismbbleadSJIS(b)) {
636     // ���{�������� && Shift_JIS 1byte��
637     // Shift_JIS �� fallback
638     Fallbacked = TRUE;
639     ConvJIS = FALSE;
640     Kanji = b << 8;
641     KanjiIn = TRUE;
642     return TRUE;
643 zmatsuo 10755 }
644 zmatsuo 10766 // fallback ISO8859-1
645     PutU32(b);
646     return TRUE;
647 zmatsuo 10755 }
648     else {
649 zmatsuo 10766 // fallback������, �s������������
650 zmatsuo 10767 w->buf[0] = b;
651     PutReplacementChr(w, w->buf, 1, FALSE);
652 zmatsuo 10755 }
653 zmatsuo 10766 return TRUE;
654 zmatsuo 10755 }
655    
656 zmatsuo 10764 // 2byte���~����?
657 zmatsuo 10766 if((b & 0xc0) != 0x80) { // == (b <= 0x7f || 0xc0 <= b)
658     // �s��������, (����2bit�� 0b10xx_xxxx ��������)
659 zmatsuo 10767 PutReplacementChr(w, w->buf, w->count, ts.FallbackToCP932);
660     w->count = 0;
661 zmatsuo 10764 goto recheck;
662     }
663    
664 zmatsuo 10755 // 2byte�����~����
665 zmatsuo 10767 w->buf[w->count++] = b;
666 zmatsuo 10755
667 zmatsuo 10766 // 2byte(11bit)
668 zmatsuo 10767 if (w->count == 2) {
669     if ((w->buf[0] & 0xe0) == 0xc0) { // == (0xc2 <= w->buf[0] && w->buf[0] <= 0xdf)
670 zmatsuo 10766 // 5bit + 6bit
671 zmatsuo 10767 code = ((w->buf[0] & 0x1f) << 6) | (b & 0x3f);
672 zmatsuo 10770 if (IsC1(code)) {
673     // U+0080 .. u+009f
674     // C1��������, C1 Controls
675     ParseControl((BYTE)code);
676     }
677     else {
678     PutU32(code);
679     }
680 zmatsuo 10767 w->count = 0;
681 zmatsuo 10755 return TRUE;
682     }
683 zmatsuo 10766 return TRUE;
684     }
685    
686     // 3byte(16bit)
687 zmatsuo 10767 if (w->count == 3) {
688     if ((w->buf[0] & 0xf0) == 0xe0) {
689     if ((w->buf[0] == 0xe0 && (w->buf[1] < 0xa0 || 0xbf < w->buf[1])) ||
690     (w->buf[0] == 0xed && ( 0x9f < w->buf[1]))) {
691 zmatsuo 10766 // �s���� UTF-8
692 zmatsuo 10767 PutReplacementChr(w, w->buf, 2, ts.FallbackToCP932);
693     w->count = 0;
694 zmatsuo 10766 goto recheck;
695     }
696 zmatsuo 10755 // 4bit + 6bit + 6bit
697 zmatsuo 10767 code = ((w->buf[0] & 0xf) << 12);
698     code |= ((w->buf[1] & 0x3f) << 6);
699     code |= ((w->buf[2] & 0x3f));
700 zmatsuo 10755 PutU32(code);
701 zmatsuo 10767 w->count = 0;
702 zmatsuo 10755 return TRUE;
703     }
704 zmatsuo 10766 return TRUE;
705 zmatsuo 10755 }
706    
707     // 4byte(21bit)
708 zmatsuo 10767 assert(w->count == 4);
709     assert((w->buf[0] & 0xf8) == 0xf0);
710     if ((w->buf[0] == 0xf0 && (w->buf[1] < 0x90 || 0x9f < w->buf[1])) ||
711     (w->buf[0] == 0xf4 && (w->buf[1] < 0x80 || 0x8f < w->buf[1]))) {
712 zmatsuo 10766 // �s���� UTF-8
713 zmatsuo 10767 PutReplacementChr(w, w->buf, 3, ts.FallbackToCP932);
714     w->count = 0;
715 zmatsuo 10766 goto recheck;
716 zmatsuo 10755 }
717 zmatsuo 10766 // 3bit + 6bit + 6bit + 6bit
718 zmatsuo 10767 code = ((w->buf[0] & 0x07) << 18);
719     code |= ((w->buf[1] & 0x3f) << 12);
720     code |= ((w->buf[2] & 0x3f) << 6);
721     code |= (w->buf[3] & 0x3f);
722 zmatsuo 10766 PutU32(code);
723 zmatsuo 10767 w->count = 0;
724 zmatsuo 10755 return TRUE;
725     }
726    
727     static BOOL ParseFirstRus(BYTE b)
728     // returns if b is processed
729     {
730 zmatsuo 10770 if (IsC0(b)) {
731     ParseControl(b);
732     return TRUE;
733     }
734 zmatsuo 10756 // CP1251������
735     BYTE c = RussConv(ts.KanjiCode, IdWindows, b);
736     // CP1251->Unicode
737     unsigned long u32 = MBCP_UTF32(c, 1251);
738     PutU32(u32);
739     return TRUE;
740 zmatsuo 10755 }
741    
742     static BOOL ParseEnglish(BYTE b)
743     {
744     unsigned short u16 = 0;
745     int part = KanjiCodeToISO8859Part(ts.KanjiCode);
746     int r = UnicodeFromISO8859(part, b, &u16);
747     if (r == 0) {
748     return FALSE;
749     }
750     if (u16 < 0x100) {
751     ParseASCII((BYTE)u16);
752     }
753     else {
754     PutU32(u16);
755     }
756     return TRUE;
757     }
758    
759 zmatsuo 10771 static void PutDebugChar(BYTE b)
760     {
761     int i;
762     BOOL svInsertMode, svAutoWrapMode;
763     TCharAttr svCharAttr;
764     TCharAttr char_attr;
765    
766     svInsertMode = TermGetInsertMode();
767     TermSetInsertMode(FALSE);
768     svAutoWrapMode = TermGetAutoWrapMode();
769     TermSetAutoWrapMode(TRUE);
770    
771     TermGetAttr(&svCharAttr);
772     char_attr = svCharAttr;
773     char_attr.Attr = AttrDefault;
774     TermSetAttr(&char_attr);
775    
776     if (DebugFlag==DEBUG_FLAG_HEXD) {
777     char buff[3];
778     _snprintf(buff, 3, "%02X", (unsigned int) b);
779    
780     for (i=0; i<2; i++)
781     PutChar(buff[i]);
782     PutChar(' ');
783     }
784     else if (DebugFlag==DEBUG_FLAG_NORM) {
785    
786     if ((b & 0x80) == 0x80) {
787     //UpdateStr();
788     char_attr.Attr = AttrReverse;
789     TermSetAttr(&char_attr);
790     b = b & 0x7f;
791     }
792    
793     if (b<=US) {
794     PutChar('^');
795     PutChar((char)(b+0x40));
796     }
797     else if (b==DEL) {
798     PutChar('<');
799     PutChar('D');
800     PutChar('E');
801     PutChar('L');
802     PutChar('>');
803     }
804     else
805     PutChar(b);
806     }
807    
808     TermSetAttr(&char_attr);
809     TermSetInsertMode(svInsertMode);
810     TermSetAutoWrapMode(svAutoWrapMode);
811     }
812    
813     void ParseFirst(BYTE b)
814     {
815     WORD language = ts.Language;
816     if (DebugFlag != DEBUG_FLAG_NONE) {
817     language = IdDebug;
818     }
819    
820     switch (language) {
821     case IdUtf8:
822     ParseFirstUTF8(b);
823 zmatsuo 10755 return;
824    
825 zmatsuo 10771 case IdJapanese:
826 zmatsuo 10755 switch (ts.KanjiCode) {
827 zmatsuo 10771 case IdUTF8:
828     if (ParseFirstUTF8(b)) {
829 zmatsuo 10755 return;
830     }
831     break;
832 zmatsuo 10771 default:
833 zmatsuo 10755 if (ParseFirstJP(b)) {
834     return;
835     }
836     }
837     break;
838    
839 zmatsuo 10771 case IdKorean:
840 zmatsuo 10755 switch (ts.KanjiCode) {
841 zmatsuo 10771 case IdUTF8:
842 zmatsuo 10755 if (ParseFirstUTF8(b)) {
843     return;
844     }
845     break;
846 zmatsuo 10771 default:
847 zmatsuo 10755 if (ParseFirstKR(b)) {
848     return;
849     }
850     }
851     break;
852    
853 zmatsuo 10771 case IdRussian:
854 zmatsuo 10755 if (ParseFirstRus(b)) {
855     return;
856     }
857     break;
858    
859     case IdChinese:
860     switch (ts.KanjiCode) {
861     case IdUTF8:
862     if (ParseFirstUTF8(b)) {
863     return;
864     }
865     break;
866     default:
867     if (ParseFirstCn(b)) {
868     return;
869     }
870     }
871     break;
872     case IdEnglish: {
873     if (ParseEnglish(b)) {
874     return;
875     }
876     break;
877     }
878 zmatsuo 10771 case IdDebug: {
879     PutDebugChar(b);
880     return;
881 zmatsuo 10755 }
882 zmatsuo 10771 }
883 zmatsuo 10755
884     if (SSflag) {
885     PutChar(b);
886     SSflag = FALSE;
887     return;
888     }
889    
890     if (b<=US)
891     ParseControl(b);
892     else if ((b>=0x20) && (b<=0x7E))
893     PutChar(b);
894     else if ((b>=0x80) && (b<=0x9F))
895     ParseControl(b);
896     else if (b>=0xA0)
897     PutChar(b);
898     }
899    
900     /**
901     * �w��(Designate)
902     *
903     * @param Gn 0/1/2/3 = G0/G1/G2/G3
904     * @param codeset IdASCII 0
905     * IdKatakana 1
906     * IdKanji 2
907     * IdSpecial 3
908     */
909     void CharSet2022Designate(int gn, int cs)
910     {
911     VttermKanjiWork *w = &KanjiWork;
912     w->Gn[gn] = cs;
913     }
914    
915     /**
916     * �����o��(Invoke)
917 zmatsuo 10776 * @param shift
918 zmatsuo 10755 */
919 zmatsuo 10776 void CharSet2022Invoke(CharSet2022Shift shift)
920 zmatsuo 10755 {
921     VttermKanjiWork *w = &KanjiWork;
922 zmatsuo 10776 switch (shift) {
923     case CHARSET_LS0:
924     // Locking Shift 0 (G0->GL)
925     w->Glr[0] = 0;
926     break;
927     case CHARSET_LS1:
928     // Locking Shift 1 (G1->GL)
929     w->Glr[0] = 1;
930     break;
931     case CHARSET_LS2:
932     // Locking Shift 2 (G2->GL)
933     w->Glr[0] = 2;
934     break;
935     case CHARSET_LS3:
936     // Locking Shift 3 (G3->GL)
937     w->Glr[0] = 3;
938     break;
939     case CHARSET_LS1R:
940     // Locking Shift 1 (G1->GR)
941     w->Glr[1] = 1;
942     break;
943     case CHARSET_LS2R:
944     // Locking Shift 2 (G2->GR)
945     w->Glr[1] = 2;
946     break;
947     case CHARSET_LS3R:
948     // Locking Shift 3 (G3->GR)
949     w->Glr[1] = 3;
950     break;
951     case CHARSET_SS2:
952     // Single Shift 2
953     GLtmp = 2;
954 zmatsuo 10755 SSflag = TRUE;
955 zmatsuo 10776 break;
956     case CHARSET_SS3:
957     // Single Shift 3
958     GLtmp = 3;
959     SSflag = TRUE;
960     break;
961     default:
962     assert(FALSE);
963     break;
964 zmatsuo 10755 }
965     }
966    
967     /**
968     * DEC�����t�H���g(Tera Special font)
969     * 0140(0x60) ... 0176(0x7f) ���r�����A�T�C������������
970 zmatsuo 10760 * (0xe0) ... (0xff) ��?
971 zmatsuo 10755 * <ESC>(0 �������������G�X�P�[�v�V�[�P���X�����`
972     * about/emulations.html
973     *
974     * @param b �R�[�h
975 zmatsuo 10760 * @retval TRUE IdSpecial
976     * @retval FALSE IdSpecial��������
977 zmatsuo 10755 */
978     BOOL CharSetIsSpecial(BYTE b)
979     {
980     VttermKanjiWork *w = &KanjiWork;
981     BOOL SpecialNew = FALSE;
982    
983     if ((b>0x5F) && (b<0x80)) {
984     if (SSflag)
985     SpecialNew = (w->Gn[GLtmp]==IdSpecial);
986     else
987     SpecialNew = (w->Gn[w->Glr[0]]==IdSpecial);
988     }
989     else if (b>0xDF) {
990     if (SSflag)
991     SpecialNew = (w->Gn[GLtmp]==IdSpecial);
992     else
993     SpecialNew = (w->Gn[w->Glr[1]]==IdSpecial);
994     }
995    
996     return SpecialNew;
997     }
998    
999     static void CharSetSaveStateLow(CharSetState *state, const VttermKanjiWork *w)
1000     {
1001     int i;
1002     state->infos[0] = w->Glr[0];
1003     state->infos[1] = w->Glr[1];
1004     for (i=0 ; i<=3; i++) {
1005     state->infos[2 + i] = w->Gn[i];
1006     }
1007     }
1008    
1009     /**
1010     * ��������������
1011     */
1012     void CharSetSaveState(CharSetState *state)
1013     {
1014     VttermKanjiWork *w = &KanjiWork;
1015     CharSetSaveStateLow(state, w);
1016     }
1017    
1018     /**
1019     * ���������A����
1020     */
1021     void CharSetLoadState(const CharSetState *state)
1022     {
1023     VttermKanjiWork *w = &KanjiWork;
1024     int i;
1025     w->Glr[0] = state->infos[0];
1026     w->Glr[1] = state->infos[1];
1027     for (i=0 ; i<=3; i++) {
1028     w->Gn[i] = state->infos[2 + i];
1029     }
1030     }
1031 zmatsuo 10763
1032     /**
1033     * �t�H�[���o�b�N���I��
1034     * ���M�f�[�^UTF-8�����AShift_JIS�o����(fallback����)�����f����
1035     *
1036     */
1037     void CharSetFallbackFinish(void)
1038     {
1039     Fallbacked = FALSE;
1040     }
1041 zmatsuo 10773
1042     /**
1043     * �f�o�O�o�����������[�h�����X����
1044     */
1045     void CharSetSetNextDebugMode(void)
1046     {
1047     // ts.DebugModes ���� tttypes.h �� DBGF_* �� OR ����������
1048     do {
1049     DebugFlag = (DebugFlag + 1) % DEBUG_FLAG_MAXD;
1050     } while (DebugFlag != DEBUG_FLAG_NONE && !((ts.DebugModes >> (DebugFlag - 1)) & 1));
1051     }
1052    
1053     BYTE CharSetGetDebugMode(void)
1054     {
1055     return DebugFlag;
1056     }
1057    
1058     void CharSetSetDebugMode(BYTE mode)
1059     {
1060     DebugFlag = mode;
1061     }

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26