Develop and Download Open Source Software

Browse Subversion Repository

Annotation of /trunk/teraterm/teraterm/charset.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 10766 - (hide annotations) (download) (as text)
Fri Jun 16 13:44:22 2023 UTC (9 months, 3 weeks ago) by zmatsuo
Original Path: trunk/teraterm/teraterm/charset.c
File MIME type: text/x-csrc
File size: 19688 byte(s)
UTF-8 のデコードを厳密にした

- サロゲートペアなども考慮
- Unicode仕様書を参考にした
  - https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf
  - Table 3-7. Well-Formed UTF-8 Byte Sequences
- 実装を見直した

ticket #48226
1 zmatsuo 10755 /*
2     * (C) 2023- TeraTerm Project
3     * All rights reserved.
4     *
5     * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions
7     * are met:
8     *
9     * 1. Redistributions of source code must retain the above copyright
10     * notice, this list of conditions and the following disclaimer.
11     * 2. Redistributions in binary form must reproduce the above copyright
12     * notice, this list of conditions and the following disclaimer in the
13     * documentation and/or other materials provided with the distribution.
14     * 3. The name of the author may not be used to endorse or promote products
15     * derived from this software without specific prior written permission.
16     *
17     * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
18     * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19     * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20     * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22     * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23     * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24     * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25     * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26     * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27     */
28    
29     #include "teraterm.h"
30     #include "tttypes.h"
31     #include <stdio.h>
32     #include <string.h>
33     #if !defined(_CRTDBG_MAP_ALLOC)
34     #define _CRTDBG_MAP_ALLOC
35     #endif
36     #include <stdlib.h>
37     #include <crtdbg.h>
38     #include <assert.h>
39    
40     #include "buffer.h" // for Wrap
41     #include "ttwinman.h"
42     #include "codeconv.h"
43     #include "unicode.h"
44     #include "language.h" // for JIS2SJIS()
45 zmatsuo 10763 #include "ttcstd.h"
46 zmatsuo 10755
47     #include "charset.h"
48    
49 zmatsuo 10763 // UTF-8���s�����l�����������\����������
50     #define REPLACEMENT_CHARACTER '?'
51     //#define REPLACEMENT_CHARACTER 0x2592
52     //#define REPLACEMENT_CHARACTER 0x20
53     //#define REPLACEMENT_CHARACTER 0xfffd
54    
55 zmatsuo 10755 static BOOL KanjiIn; // TRUE = MBCS��1byte�������M��������
56     static BOOL EUCkanaIn, EUCsupIn;
57     static int EUCcount;
58     #if 0
59     static BOOL Special;
60     #endif
61    
62     /* GL for single shift 2/3 */
63     static int GLtmp;
64     /* single shift 2/3 flag */
65     static BOOL SSflag;
66     /* JIS -> SJIS conversion flag */
67     static BOOL ConvJIS;
68     static WORD Kanji;
69 zmatsuo 10763 static BOOL Fallbacked;
70 zmatsuo 10755
71     typedef struct {
72     /* GL, GR code group */
73     int Glr[2];
74     /* G0, G1, G2, G3 code group */
75     int Gn[4];
76 zmatsuo 10763 //
77     char32_t replacement_char;
78 zmatsuo 10755 } VttermKanjiWork;
79    
80     static VttermKanjiWork KanjiWork;
81    
82 zmatsuo 10760 // Unicode�x�[�X����������
83     static void PutChar(BYTE b)
84     {
85     PutU32(b);
86     }
87 zmatsuo 10755
88     /**
89     * ISO2022�p���[�N������������
90     */
91     static void CharSetInit2(VttermKanjiWork *w)
92     {
93     if (ts.Language==IdJapanese) {
94     w->Gn[0] = IdASCII;
95     w->Gn[1] = IdKatakana;
96     w->Gn[2] = IdKatakana;
97     w->Gn[3] = IdKanji;
98     w->Glr[0] = 0;
99     if ((ts.KanjiCode==IdJIS) && (ts.JIS7Katakana==0))
100     w->Glr[1] = 2; // 8-bit katakana
101     else
102     w->Glr[1] = 3;
103     }
104     else {
105     w->Gn[0] = IdASCII;
106     w->Gn[1] = IdSpecial;
107     w->Gn[2] = IdASCII;
108     w->Gn[3] = IdASCII;
109     w->Glr[0] = 0;
110     w->Glr[1] = 0;
111     }
112     }
113    
114     /**
115     * �������A���[�N������������
116     */
117     void CharSetInit(void)
118     {
119 zmatsuo 10763 VttermKanjiWork *w = &KanjiWork;
120    
121     CharSetInit2(w);
122    
123     w->replacement_char = REPLACEMENT_CHARACTER;
124 zmatsuo 10755 SSflag = FALSE;
125    
126     KanjiIn = FALSE;
127     EUCkanaIn = FALSE;
128     EUCsupIn = FALSE;
129     ConvJIS = FALSE;
130     Fallbacked = FALSE;
131     }
132    
133     /**
134     * 1byte���`�F�b�N
135     */
136     static BOOL CheckFirstByte(BYTE b, int lang, int kanji_code)
137     {
138     switch (lang) {
139     case IdKorean:
140     return __ismbblead(b, 51949);
141     case IdChinese:
142     if (kanji_code == IdCnGB2312) {
143     return __ismbblead(b, 936);
144     }
145     else if (ts.KanjiCode == IdCnBig5) {
146     return __ismbblead(b, 950);
147     }
148     break;
149     default:
150     assert(FALSE);
151     break;
152     }
153     assert(FALSE);
154     return FALSE;
155     }
156 zmatsuo 10763
157 zmatsuo 10755 /**
158 zmatsuo 10763 * Double-byte Character Sets
159     * SJIS��1byte��?
160     *
161     * ��1�o�C�g0x81...0x9F or 0xE0...0xEF
162     * ��1�o�C�g0x81...0x9F or 0xE0...0xFC
163     */
164     static BOOL ismbbleadSJIS(BYTE b)
165     {
166     if (((0x80<b) && (b<0xa0)) || ((0xdf<b) && (b<0xfd))) {
167     return TRUE;
168     }
169     return FALSE;
170     }
171    
172     /**
173 zmatsuo 10755 * ts.Language == IdJapanese ��
174     * 1byte���`�F�b�N
175     */
176     static BOOL CheckKanji(BYTE b)
177     {
178     VttermKanjiWork *w = &KanjiWork;
179     BOOL Check;
180    
181     if (ts.Language!=IdJapanese)
182     return FALSE;
183    
184     ConvJIS = FALSE;
185    
186     if (ts.KanjiCode==IdSJIS ||
187     (ts.FallbackToCP932 && ts.KanjiCode==IdUTF8)) {
188 zmatsuo 10759 if (((0x80<b) && (b<0xa0)) || ((0xdf<b) && (b<0xfd))) {
189 zmatsuo 10755 Fallbacked = TRUE;
190     return TRUE; // SJIS kanji
191     }
192     if ((0xa1<=b) && (b<=0xdf)) {
193     return FALSE; // SJIS katakana
194     }
195     }
196    
197     if ((b>=0x21) && (b<=0x7e)) {
198     Check = (w->Gn[w->Glr[0]] == IdKanji);
199     ConvJIS = Check;
200     }
201     else if ((b>=0xA1) && (b<=0xFE)) {
202     Check = (w->Gn[w->Glr[1]] == IdKanji);
203     if (ts.KanjiCode==IdEUC) {
204     Check = TRUE;
205     }
206     else if (ts.KanjiCode==IdJIS && ((ts.TermFlag & TF_FIXEDJIS)!=0) && (ts.JIS7Katakana==0)) {
207     Check = FALSE; // 8-bit katakana
208     }
209     ConvJIS = Check;
210     }
211     else {
212     Check = FALSE;
213     }
214    
215     return Check;
216     }
217    
218     static BOOL ParseFirstJP(BYTE b)
219     // returns TRUE if b is processed
220     // (actually allways returns TRUE)
221     {
222     VttermKanjiWork *w = &KanjiWork;
223     if (KanjiIn) {
224 zmatsuo 10759 if (((! ConvJIS) && (0x3F<b) && (b<0xFD)) ||
225     (ConvJIS && ( ((0x20<b) && (b<0x7f)) ||
226     ((0xa0<b) && (b<0xff)) )) )
227 zmatsuo 10755 {
228 zmatsuo 10758 unsigned long u32;
229     Kanji = Kanji + b;
230     if (ConvJIS) {
231     // JIS -> Shift_JIS(CP932)
232     Kanji = JIS2SJIS((WORD)(Kanji & 0x7f7f));
233     }
234     u32 = CP932ToUTF32(Kanji);
235     PutU32(u32);
236 zmatsuo 10755 KanjiIn = FALSE;
237     return TRUE;
238     }
239     else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
240     KanjiIn = FALSE;
241     }
242     else if ((b==CR) && Wrap) {
243     CarriageReturn(FALSE);
244     LineFeed(LF,FALSE);
245     Wrap = FALSE;
246     }
247     }
248    
249     if (SSflag) {
250     if (w->Gn[GLtmp] == IdKanji) {
251     Kanji = b << 8;
252     KanjiIn = TRUE;
253     SSflag = FALSE;
254     return TRUE;
255     }
256     else if (w->Gn[GLtmp] == IdKatakana) {
257     b = b | 0x80;
258     }
259    
260     PutChar(b);
261     SSflag = FALSE;
262     return TRUE;
263     }
264    
265     if ((!EUCsupIn) && (!EUCkanaIn) && (!KanjiIn) && CheckKanji(b)) {
266     Kanji = b << 8;
267     KanjiIn = TRUE;
268     return TRUE;
269     }
270    
271     if (b<=US) {
272     ParseControl(b);
273     }
274     else if (b==0x20) {
275     PutChar(b);
276     }
277     else if ((b>=0x21) && (b<=0x7E)) {
278     if (EUCsupIn) {
279     EUCcount--;
280     EUCsupIn = (EUCcount==0);
281     return TRUE;
282     }
283    
284     if ((w->Gn[w->Glr[0]] == IdKatakana) || EUCkanaIn) {
285     b = b | 0x80;
286     EUCkanaIn = FALSE;
287     {
288     // b��sjis�����p�J�^�J�i
289     unsigned long u32 = CP932ToUTF32(b);
290     PutU32(u32);
291     }
292     return TRUE;
293     }
294     PutChar(b);
295     }
296     else if (b==0x7f) {
297     return TRUE;
298     }
299     else if ((b>=0x80) && (b<=0x8D)) {
300     ParseControl(b);
301     }
302     else if (b==0x8E) { // SS2
303     switch (ts.KanjiCode) {
304     case IdEUC:
305     if (ts.ISO2022Flag & ISO2022_SS2) {
306     EUCkanaIn = TRUE;
307     }
308     break;
309     case IdUTF8:
310 zmatsuo 10763 PutU32(REPLACEMENT_CHARACTER);
311 zmatsuo 10755 break;
312     default:
313     ParseControl(b);
314     }
315     }
316     else if (b==0x8F) { // SS3
317     switch (ts.KanjiCode) {
318     case IdEUC:
319     if (ts.ISO2022Flag & ISO2022_SS3) {
320     EUCcount = 2;
321     EUCsupIn = TRUE;
322     }
323     break;
324     case IdUTF8:
325 zmatsuo 10763 PutU32(REPLACEMENT_CHARACTER);
326 zmatsuo 10755 break;
327     default:
328     ParseControl(b);
329     }
330     }
331     else if ((b>=0x90) && (b<=0x9F)) {
332     ParseControl(b);
333     }
334     else if (b==0xA0) {
335     PutChar(0x20);
336     }
337     else if ((b>=0xA1) && (b<=0xFE)) {
338     if (EUCsupIn) {
339     EUCcount--;
340     EUCsupIn = (EUCcount==0);
341     return TRUE;
342     }
343    
344     if ((w->Gn[w->Glr[1]] != IdASCII) ||
345 zmatsuo 10759 ((ts.KanjiCode==IdEUC) && EUCkanaIn) ||
346 zmatsuo 10755 (ts.KanjiCode==IdSJIS) ||
347 zmatsuo 10759 ((ts.KanjiCode==IdJIS) &&
348     (ts.JIS7Katakana==0) &&
349     ((ts.TermFlag & TF_FIXEDJIS)!=0))) {
350 zmatsuo 10755 // b��sjis�����p�J�^�J�i
351     unsigned long u32 = CP932ToUTF32(b);
352     PutU32(u32);
353     } else {
354     if (w->Gn[w->Glr[1]] == IdASCII) {
355     b = b & 0x7f;
356     }
357     PutChar(b);
358     }
359     EUCkanaIn = FALSE;
360     }
361     else {
362     PutChar(b);
363     }
364    
365     return TRUE;
366     }
367    
368     static BOOL ParseFirstKR(BYTE b)
369     // returns TRUE if b is processed
370     // (actually allways returns TRUE)
371     {
372     VttermKanjiWork *w = &KanjiWork;
373     if (KanjiIn) {
374 zmatsuo 10759 if (((0x41<=b) && (b<=0x5A)) ||
375     ((0x61<=b) && (b<=0x7A)) ||
376     ((0x81<=b) && (b<=0xFE)))
377 zmatsuo 10755 {
378 zmatsuo 10758 unsigned long u32 = 0;
379     if (ts.KanjiCode == IdKoreanCP51949) {
380     // CP51949
381     Kanji = Kanji + b;
382     u32 = MBCP_UTF32(Kanji, 51949);
383     }
384     else {
385     assert(FALSE);
386     }
387     PutU32(u32);
388 zmatsuo 10755 KanjiIn = FALSE;
389     return TRUE;
390     }
391     else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
392     KanjiIn = FALSE;
393     }
394     else if ((b==CR) && Wrap) {
395     CarriageReturn(FALSE);
396     LineFeed(LF,FALSE);
397     Wrap = FALSE;
398     }
399     }
400    
401     if ((!KanjiIn) && CheckFirstByte(b, ts.Language, ts.KanjiCode)) {
402     Kanji = b << 8;
403     KanjiIn = TRUE;
404     return TRUE;
405     }
406    
407     if (b<=US) {
408     ParseControl(b);
409     }
410     else if (b==0x20) {
411     PutChar(b);
412     }
413     else if ((b>=0x21) && (b<=0x7E)) {
414     // if (Gn[Glr[0]] == IdKatakana) {
415     // b = b | 0x80;
416     // }
417     PutChar(b);
418     }
419     else if (b==0x7f) {
420     return TRUE;
421     }
422     else if ((0x80<=b) && (b<=0x9F)) {
423     ParseControl(b);
424     }
425     else if (b==0xA0) {
426     PutChar(0x20);
427     }
428     else if ((b>=0xA1) && (b<=0xFE)) {
429     if (w->Gn[w->Glr[1]] == IdASCII) {
430     b = b & 0x7f;
431     }
432     PutChar(b);
433     }
434     else {
435     PutChar(b);
436     }
437    
438     return TRUE;
439     }
440    
441     static BOOL ParseFirstCn(BYTE b)
442     // returns TRUE if b is processed
443     // (actually allways returns TRUE)
444     {
445     VttermKanjiWork *w = &KanjiWork;
446     if (KanjiIn) {
447     // TODO
448 zmatsuo 10759 if (((0x40<=b) && (b<=0x7e)) ||
449     ((0xa1<=b) && (b<=0xFE)))
450 zmatsuo 10755 {
451 zmatsuo 10758 unsigned long u32 = 0;
452     Kanji = Kanji + b;
453     if (ts.KanjiCode == IdCnGB2312) {
454     // CP936 GB2312
455     u32 = MBCP_UTF32(Kanji, 936);
456     }
457     else if (ts.KanjiCode == IdCnBig5) {
458     // CP950 Big5
459     u32 = MBCP_UTF32(Kanji, 950);
460     }
461     else {
462     assert(FALSE);
463     }
464     PutU32(u32);
465 zmatsuo 10755 KanjiIn = FALSE;
466     return TRUE;
467     }
468     else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
469     KanjiIn = FALSE;
470     }
471     else if ((b==CR) && Wrap) {
472     CarriageReturn(FALSE);
473     LineFeed(LF,FALSE);
474     Wrap = FALSE;
475     }
476     }
477    
478     if ((!KanjiIn) && CheckFirstByte(b, ts.Language, ts.KanjiCode)) {
479     Kanji = b << 8;
480     KanjiIn = TRUE;
481     return TRUE;
482     }
483    
484     if (b<=US) {
485     ParseControl(b);
486     }
487     else if (b==0x20) {
488     PutChar(b);
489     }
490     else if ((b>=0x21) && (b<=0x7E)) {
491     // if (Gn[Glr[0]] == IdKatakana) {
492     // b = b | 0x80;
493     // }
494     PutChar(b);
495     }
496     else if (b==0x7f) {
497     return TRUE;
498     }
499     else if ((0x80<=b) && (b<=0x9F)) {
500     ParseControl(b);
501     }
502     else if (b==0xA0) {
503     PutChar(0x20);
504     }
505     else if ((b>=0xA1) && (b<=0xFE)) {
506     if (w->Gn[w->Glr[1]] == IdASCII) {
507     b = b & 0x7f;
508     }
509     PutChar(b);
510     }
511     else {
512     PutChar(b);
513     }
514    
515     return TRUE;
516     }
517    
518     static void ParseASCII(BYTE b)
519     {
520     if (SSflag) {
521     PutChar(b);
522     SSflag = FALSE;
523     return;
524     }
525    
526     if (b<=US) {
527     ParseControl(b);
528     } else if ((b>=0x20) && (b<=0x7E)) {
529 zmatsuo 10760 PutU32(b);
530 zmatsuo 10755 } else if ((b==0x8E) || (b==0x8F)) {
531 zmatsuo 10763 PutU32(REPLACEMENT_CHARACTER);
532 zmatsuo 10755 } else if ((b>=0x80) && (b<=0x9F)) {
533     ParseControl(b);
534     } else if (b>=0xA0) {
535 zmatsuo 10760 PutU32(b);
536 zmatsuo 10755 }
537     }
538    
539 zmatsuo 10764 static void PutReplacementChr(VttermKanjiWork *w, const BYTE *ptr, size_t len, BOOL fallback)
540 zmatsuo 10763 {
541     const char32_t replacement_char = w->replacement_char;
542     int i;
543     for (i = 0; i < len; i++) {
544     BYTE c = *ptr++;
545 zmatsuo 10764 if (fallback) {
546     // fallback ISO8859-1
547     PutU32(c);
548 zmatsuo 10763 }
549     else {
550 zmatsuo 10764 // fallback������
551     if (c < 0x80) {
552     // �s����UTF-8��������������0x80�������������A
553     // 1������UTF-8�������������������\������
554     ParseASCII(c);
555     }
556     else {
557     PutU32(replacement_char);
558     }
559 zmatsuo 10763 }
560     }
561     }
562    
563 zmatsuo 10755 // UTF-8�����M�f�[�^����������
564     // returns TRUE if b is processed
565     // (actually allways returns TRUE)
566     static BOOL ParseFirstUTF8(BYTE b)
567     {
568 zmatsuo 10763 VttermKanjiWork *w = &KanjiWork;
569 zmatsuo 10755 static BYTE buf[4];
570     static int count = 0;
571 zmatsuo 10766 char32_t code;
572 zmatsuo 10755
573 zmatsuo 10763 if (Fallbacked) {
574     BOOL r = ParseFirstJP(b);
575     Fallbacked = FALSE;
576     return r;
577 zmatsuo 10755 }
578    
579     // UTF-8�G���R�[�h
580 zmatsuo 10766 // The Unicode Standard Chapter 3
581     // Table 3-7. Well-Formed UTF-8 Byte Sequences
582     // | Code Points | First Byte | Second Byte | Third Byte | Fourth Byte |
583     // | U+0000..U+007F | 00..7F | | | |
584     // | U+0080..U+07FF | C2..DF | 80..BF | | |
585     // | U+0800..U+0FFF | E0 | A0..BF | 80..BF | |
586     // | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | |
587     // | U+D000..U+D7FF | ED | 80..9F | 80..BF | |
588     // | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | |
589     // | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF |
590     // | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF |
591     // | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF |
592 zmatsuo 10755 // UTF-8���f�R�[�h������������
593     // - 1byte��
594 zmatsuo 10766 // - 0x00 - 0x7f ok
595     // - 0x80 - 0xc1 ng
596     // - 0xc2 - 0xf4 ok
597     // - 0xf5 - 0xff ng
598 zmatsuo 10755 // - 2byte�����~
599 zmatsuo 10766 // - 0x00 - 0x7f ng
600     // - 0x80 - 0xbf ok
601     // - 0xc0 - 0xff ng
602     // - 2byte�����O
603     // - 1byte == 0xe0 ������ 0xa0 - 0xbf����ok
604     // - 1byte == 0xed ������ 0x80 - 0x9f����ok
605     // - 1byte == 0xf0 ������ 0x90 - 0xbf����ok
606     // - 1byte == 0xf4 ������ 0x90 - 0x8f����ok
607 zmatsuo 10763 recheck:
608 zmatsuo 10755 // 1byte(7bit)
609     if (count == 0) {
610 zmatsuo 10766 if (b <= 0x7f) {
611 zmatsuo 10755 // 1byte(7bit)
612     // 0x7f����, �������A���������o��
613     ParseASCII(b);
614     return TRUE;
615     }
616 zmatsuo 10766 if (0xc2 <= b && b <= 0xf4) {
617     // 1byte������
618     buf[count++] = b;
619 zmatsuo 10755 return TRUE;
620     }
621    
622 zmatsuo 10766 // UTF-8��1byte���o���������R�[�h������
623     if (ts.FallbackToCP932) {
624     // fallback��������
625     if ((ts.Language == IdJapanese) && ismbbleadSJIS(b)) {
626     // ���{�������� && Shift_JIS 1byte��
627     // Shift_JIS �� fallback
628     Fallbacked = TRUE;
629     ConvJIS = FALSE;
630     Kanji = b << 8;
631     KanjiIn = TRUE;
632     return TRUE;
633 zmatsuo 10755 }
634 zmatsuo 10766 // fallback ISO8859-1
635     PutU32(b);
636     return TRUE;
637 zmatsuo 10755 }
638     else {
639 zmatsuo 10766 // fallback������, �s������������
640     buf[0] = b;
641     PutReplacementChr(w, buf, 1, FALSE);
642 zmatsuo 10755 }
643 zmatsuo 10766 return TRUE;
644 zmatsuo 10755 }
645    
646 zmatsuo 10764 // 2byte���~����?
647 zmatsuo 10766 if((b & 0xc0) != 0x80) { // == (b <= 0x7f || 0xc0 <= b)
648     // �s��������, (����2bit�� 0b10xx_xxxx ��������)
649 zmatsuo 10764 PutReplacementChr(w, buf, count, ts.FallbackToCP932);
650     count = 0;
651     goto recheck;
652     }
653    
654 zmatsuo 10755 // 2byte�����~����
655     buf[count++] = b;
656    
657 zmatsuo 10766 // 2byte(11bit)
658     if (count == 2) {
659     if ((buf[0] & 0xe0) == 0xc0) { // == (0xc2 <= buf[0] && buf[0] <= 0xdf)
660     // 5bit + 6bit
661     code = ((buf[0] & 0x1f) << 6) | (b & 0x3f);
662     PutU32(code);
663     count = 0;
664 zmatsuo 10755 return TRUE;
665     }
666 zmatsuo 10766 return TRUE;
667     }
668    
669     // 3byte(16bit)
670     if (count == 3) {
671     if ((buf[0] & 0xf0) == 0xe0) {
672     if ((buf[0] == 0xe0 && (buf[1] < 0xa0 || 0xbf < buf[1])) ||
673     (buf[0] == 0xed && ( 0x9f < buf[1]))) {
674     // �s���� UTF-8
675     PutReplacementChr(w, buf, 2, ts.FallbackToCP932);
676     count = 0;
677     goto recheck;
678     }
679 zmatsuo 10755 // 4bit + 6bit + 6bit
680     code = ((buf[0] & 0xf) << 12);
681     code |= ((buf[1] & 0x3f) << 6);
682     code |= ((buf[2] & 0x3f));
683     PutU32(code);
684     count = 0;
685     return TRUE;
686     }
687 zmatsuo 10766 return TRUE;
688 zmatsuo 10755 }
689    
690     // 4byte(21bit)
691 zmatsuo 10766 assert(count == 4);
692     assert((buf[0] & 0xf8) == 0xf0);
693     if ((buf[0] == 0xf0 && (buf[1] < 0x90 || 0x9f < buf[1])) ||
694     (buf[0] == 0xf4 && (buf[1] < 0x80 || 0x8f < buf[1]))) {
695     // �s���� UTF-8
696     PutReplacementChr(w, buf, 3, ts.FallbackToCP932);
697     count = 0;
698     goto recheck;
699 zmatsuo 10755 }
700 zmatsuo 10766 // 3bit + 6bit + 6bit + 6bit
701     code = ((buf[0] & 0x07) << 18);
702     code |= ((buf[1] & 0x3f) << 12);
703     code |= ((buf[2] & 0x3f) << 6);
704     code |= (buf[3] & 0x3f);
705     PutU32(code);
706 zmatsuo 10755 count = 0;
707     return TRUE;
708     }
709    
710     static BOOL ParseFirstRus(BYTE b)
711     // returns if b is processed
712     {
713 zmatsuo 10756 // CP1251������
714     BYTE c = RussConv(ts.KanjiCode, IdWindows, b);
715     // CP1251->Unicode
716     unsigned long u32 = MBCP_UTF32(c, 1251);
717     PutU32(u32);
718     return TRUE;
719 zmatsuo 10755 }
720    
721     static BOOL ParseEnglish(BYTE b)
722     {
723     unsigned short u16 = 0;
724     int part = KanjiCodeToISO8859Part(ts.KanjiCode);
725     int r = UnicodeFromISO8859(part, b, &u16);
726     if (r == 0) {
727     return FALSE;
728     }
729     if (u16 < 0x100) {
730     ParseASCII((BYTE)u16);
731     }
732     else {
733     PutU32(u16);
734     }
735     return TRUE;
736     }
737    
738     void ParseFirst(BYTE b) {
739     switch (ts.Language) {
740     case IdUtf8:
741     ParseFirstUTF8(b);
742     return;
743    
744     case IdJapanese:
745     switch (ts.KanjiCode) {
746     case IdUTF8:
747     if (ParseFirstUTF8(b)) {
748     return;
749     }
750     break;
751     default:
752     if (ParseFirstJP(b)) {
753     return;
754     }
755     }
756     break;
757    
758     case IdKorean:
759     switch (ts.KanjiCode) {
760     case IdUTF8:
761     if (ParseFirstUTF8(b)) {
762     return;
763     }
764     break;
765     default:
766     if (ParseFirstKR(b)) {
767     return;
768     }
769     }
770     break;
771    
772     case IdRussian:
773     if (ParseFirstRus(b)) {
774     return;
775     }
776     break;
777    
778     case IdChinese:
779     switch (ts.KanjiCode) {
780     case IdUTF8:
781     if (ParseFirstUTF8(b)) {
782     return;
783     }
784     break;
785     default:
786     if (ParseFirstCn(b)) {
787     return;
788     }
789     }
790     break;
791     case IdEnglish: {
792     if (ParseEnglish(b)) {
793     return;
794     }
795     break;
796     }
797     }
798    
799     if (SSflag) {
800     PutChar(b);
801     SSflag = FALSE;
802     return;
803     }
804    
805     if (b<=US)
806     ParseControl(b);
807     else if ((b>=0x20) && (b<=0x7E))
808     PutChar(b);
809     else if ((b>=0x80) && (b<=0x9F))
810     ParseControl(b);
811     else if (b>=0xA0)
812     PutChar(b);
813     }
814    
815     /**
816     * �w��(Designate)
817     *
818     * @param Gn 0/1/2/3 = G0/G1/G2/G3
819     * @param codeset IdASCII 0
820     * IdKatakana 1
821     * IdKanji 2
822     * IdSpecial 3
823     */
824     void CharSet2022Designate(int gn, int cs)
825     {
826     VttermKanjiWork *w = &KanjiWork;
827     w->Gn[gn] = cs;
828     }
829    
830     /**
831     * �����o��(Invoke)
832     * @param glr 0/1 = GL/GR (Locking shift�������L��)
833     * @param gn 0/1/2/3 = G0/G1/G2/G3
834     * @param single_shift FALSE Locking shift
835     * TRUE Single shift
836     */
837     void CharSet2022Invoke(int glr, int gn, BOOL single_shift)
838     {
839     VttermKanjiWork *w = &KanjiWork;
840     if (single_shift == FALSE) {
841     // Locking shift
842     w->Glr[glr] = gn;
843     }
844     else {
845     // Single shift
846     GLtmp = gn;
847     SSflag = TRUE;
848     }
849     }
850    
851     /**
852     * DEC�����t�H���g(Tera Special font)
853     * 0140(0x60) ... 0176(0x7f) ���r�����A�T�C������������
854 zmatsuo 10760 * (0xe0) ... (0xff) ��?
855 zmatsuo 10755 * <ESC>(0 �������������G�X�P�[�v�V�[�P���X�����`
856     * about/emulations.html
857     *
858     * @param b �R�[�h
859 zmatsuo 10760 * @retval TRUE IdSpecial
860     * @retval FALSE IdSpecial��������
861 zmatsuo 10755 */
862     BOOL CharSetIsSpecial(BYTE b)
863     {
864     VttermKanjiWork *w = &KanjiWork;
865     BOOL SpecialNew = FALSE;
866    
867     if ((b>0x5F) && (b<0x80)) {
868     if (SSflag)
869     SpecialNew = (w->Gn[GLtmp]==IdSpecial);
870     else
871     SpecialNew = (w->Gn[w->Glr[0]]==IdSpecial);
872     }
873     else if (b>0xDF) {
874     if (SSflag)
875     SpecialNew = (w->Gn[GLtmp]==IdSpecial);
876     else
877     SpecialNew = (w->Gn[w->Glr[1]]==IdSpecial);
878     }
879    
880     return SpecialNew;
881     }
882    
883     static void CharSetSaveStateLow(CharSetState *state, const VttermKanjiWork *w)
884     {
885     int i;
886     state->infos[0] = w->Glr[0];
887     state->infos[1] = w->Glr[1];
888     for (i=0 ; i<=3; i++) {
889     state->infos[2 + i] = w->Gn[i];
890     }
891     }
892    
893     /**
894     * ��������������
895     */
896     void CharSetSaveState(CharSetState *state)
897     {
898     VttermKanjiWork *w = &KanjiWork;
899     CharSetSaveStateLow(state, w);
900     }
901    
902     /**
903     * ���������A����
904     */
905     void CharSetLoadState(const CharSetState *state)
906     {
907     VttermKanjiWork *w = &KanjiWork;
908     int i;
909     w->Glr[0] = state->infos[0];
910     w->Glr[1] = state->infos[1];
911     for (i=0 ; i<=3; i++) {
912     w->Gn[i] = state->infos[2 + i];
913     }
914     }
915 zmatsuo 10763
916     /**
917     * �t�H�[���o�b�N���I��
918     * ���M�f�[�^UTF-8�����AShift_JIS�o����(fallback����)�����f����
919     *
920     */
921     void CharSetFallbackFinish(void)
922     {
923     Fallbacked = FALSE;
924     }

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26