Develop and Download Open Source Software

Browse Subversion Repository

Annotation of /trunk/teraterm/teraterm/charset.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 10768 - (hide annotations) (download) (as text)
Fri Jun 16 14:18:03 2023 UTC (9 months, 3 weeks ago) by zmatsuo
Original Path: trunk/teraterm/teraterm/charset.c
File MIME type: text/x-csrc
File size: 19816 byte(s)
CP949 を誤って CP51949 としていたので修正

- r8766 で CP51949 としていた
1 zmatsuo 10755 /*
2     * (C) 2023- TeraTerm Project
3     * All rights reserved.
4     *
5     * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions
7     * are met:
8     *
9     * 1. Redistributions of source code must retain the above copyright
10     * notice, this list of conditions and the following disclaimer.
11     * 2. Redistributions in binary form must reproduce the above copyright
12     * notice, this list of conditions and the following disclaimer in the
13     * documentation and/or other materials provided with the distribution.
14     * 3. The name of the author may not be used to endorse or promote products
15     * derived from this software without specific prior written permission.
16     *
17     * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
18     * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19     * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20     * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22     * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23     * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24     * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25     * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26     * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27     */
28    
29     #include "teraterm.h"
30     #include "tttypes.h"
31     #include <stdio.h>
32     #include <string.h>
33     #if !defined(_CRTDBG_MAP_ALLOC)
34     #define _CRTDBG_MAP_ALLOC
35     #endif
36     #include <stdlib.h>
37     #include <crtdbg.h>
38     #include <assert.h>
39    
40     #include "buffer.h" // for Wrap
41     #include "ttwinman.h"
42     #include "codeconv.h"
43     #include "unicode.h"
44     #include "language.h" // for JIS2SJIS()
45 zmatsuo 10763 #include "ttcstd.h"
46 zmatsuo 10755
47     #include "charset.h"
48    
49 zmatsuo 10763 // UTF-8���s�����l�����������\����������
50     #define REPLACEMENT_CHARACTER '?'
51     //#define REPLACEMENT_CHARACTER 0x2592
52     //#define REPLACEMENT_CHARACTER 0x20
53     //#define REPLACEMENT_CHARACTER 0xfffd
54    
55 zmatsuo 10755 static BOOL KanjiIn; // TRUE = MBCS��1byte�������M��������
56     static BOOL EUCkanaIn, EUCsupIn;
57     static int EUCcount;
58     #if 0
59     static BOOL Special;
60     #endif
61    
62     /* GL for single shift 2/3 */
63     static int GLtmp;
64     /* single shift 2/3 flag */
65     static BOOL SSflag;
66     /* JIS -> SJIS conversion flag */
67     static BOOL ConvJIS;
68     static WORD Kanji;
69 zmatsuo 10763 static BOOL Fallbacked;
70 zmatsuo 10755
71     typedef struct {
72     /* GL, GR code group */
73     int Glr[2];
74     /* G0, G1, G2, G3 code group */
75     int Gn[4];
76 zmatsuo 10763 //
77     char32_t replacement_char;
78 zmatsuo 10767 // UTF-8 work
79     BYTE buf[4];
80     int count;
81 zmatsuo 10755 } VttermKanjiWork;
82    
83     static VttermKanjiWork KanjiWork;
84    
85 zmatsuo 10760 // Unicode�x�[�X����������
86     static void PutChar(BYTE b)
87     {
88     PutU32(b);
89     }
90 zmatsuo 10755
91     /**
92     * ISO2022�p���[�N������������
93     */
94     static void CharSetInit2(VttermKanjiWork *w)
95     {
96     if (ts.Language==IdJapanese) {
97     w->Gn[0] = IdASCII;
98     w->Gn[1] = IdKatakana;
99     w->Gn[2] = IdKatakana;
100     w->Gn[3] = IdKanji;
101     w->Glr[0] = 0;
102     if ((ts.KanjiCode==IdJIS) && (ts.JIS7Katakana==0))
103     w->Glr[1] = 2; // 8-bit katakana
104     else
105     w->Glr[1] = 3;
106     }
107     else {
108     w->Gn[0] = IdASCII;
109     w->Gn[1] = IdSpecial;
110     w->Gn[2] = IdASCII;
111     w->Gn[3] = IdASCII;
112     w->Glr[0] = 0;
113     w->Glr[1] = 0;
114     }
115     }
116    
117     /**
118     * �������A���[�N������������
119     */
120     void CharSetInit(void)
121     {
122 zmatsuo 10763 VttermKanjiWork *w = &KanjiWork;
123    
124     CharSetInit2(w);
125    
126     w->replacement_char = REPLACEMENT_CHARACTER;
127 zmatsuo 10755 SSflag = FALSE;
128    
129     KanjiIn = FALSE;
130     EUCkanaIn = FALSE;
131     EUCsupIn = FALSE;
132     ConvJIS = FALSE;
133     Fallbacked = FALSE;
134     }
135    
136     /**
137     * 1byte���`�F�b�N
138     */
139     static BOOL CheckFirstByte(BYTE b, int lang, int kanji_code)
140     {
141     switch (lang) {
142     case IdKorean:
143     return __ismbblead(b, 51949);
144     case IdChinese:
145     if (kanji_code == IdCnGB2312) {
146     return __ismbblead(b, 936);
147     }
148     else if (ts.KanjiCode == IdCnBig5) {
149     return __ismbblead(b, 950);
150     }
151     break;
152     default:
153     assert(FALSE);
154     break;
155     }
156     assert(FALSE);
157     return FALSE;
158     }
159 zmatsuo 10763
160 zmatsuo 10755 /**
161 zmatsuo 10763 * Double-byte Character Sets
162     * SJIS��1byte��?
163     *
164     * ��1�o�C�g0x81...0x9F or 0xE0...0xEF
165     * ��1�o�C�g0x81...0x9F or 0xE0...0xFC
166     */
167     static BOOL ismbbleadSJIS(BYTE b)
168     {
169     if (((0x80<b) && (b<0xa0)) || ((0xdf<b) && (b<0xfd))) {
170     return TRUE;
171     }
172     return FALSE;
173     }
174    
175     /**
176 zmatsuo 10755 * ts.Language == IdJapanese ��
177     * 1byte���`�F�b�N
178     */
179     static BOOL CheckKanji(BYTE b)
180     {
181     VttermKanjiWork *w = &KanjiWork;
182     BOOL Check;
183    
184     if (ts.Language!=IdJapanese)
185     return FALSE;
186    
187     ConvJIS = FALSE;
188    
189     if (ts.KanjiCode==IdSJIS ||
190     (ts.FallbackToCP932 && ts.KanjiCode==IdUTF8)) {
191 zmatsuo 10759 if (((0x80<b) && (b<0xa0)) || ((0xdf<b) && (b<0xfd))) {
192 zmatsuo 10755 Fallbacked = TRUE;
193     return TRUE; // SJIS kanji
194     }
195     if ((0xa1<=b) && (b<=0xdf)) {
196     return FALSE; // SJIS katakana
197     }
198     }
199    
200     if ((b>=0x21) && (b<=0x7e)) {
201     Check = (w->Gn[w->Glr[0]] == IdKanji);
202     ConvJIS = Check;
203     }
204     else if ((b>=0xA1) && (b<=0xFE)) {
205     Check = (w->Gn[w->Glr[1]] == IdKanji);
206     if (ts.KanjiCode==IdEUC) {
207     Check = TRUE;
208     }
209     else if (ts.KanjiCode==IdJIS && ((ts.TermFlag & TF_FIXEDJIS)!=0) && (ts.JIS7Katakana==0)) {
210     Check = FALSE; // 8-bit katakana
211     }
212     ConvJIS = Check;
213     }
214     else {
215     Check = FALSE;
216     }
217    
218     return Check;
219     }
220    
221     static BOOL ParseFirstJP(BYTE b)
222     // returns TRUE if b is processed
223     // (actually allways returns TRUE)
224     {
225     VttermKanjiWork *w = &KanjiWork;
226     if (KanjiIn) {
227 zmatsuo 10759 if (((! ConvJIS) && (0x3F<b) && (b<0xFD)) ||
228     (ConvJIS && ( ((0x20<b) && (b<0x7f)) ||
229     ((0xa0<b) && (b<0xff)) )) )
230 zmatsuo 10755 {
231 zmatsuo 10758 unsigned long u32;
232     Kanji = Kanji + b;
233     if (ConvJIS) {
234     // JIS -> Shift_JIS(CP932)
235     Kanji = JIS2SJIS((WORD)(Kanji & 0x7f7f));
236     }
237     u32 = CP932ToUTF32(Kanji);
238     PutU32(u32);
239 zmatsuo 10755 KanjiIn = FALSE;
240     return TRUE;
241     }
242     else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
243     KanjiIn = FALSE;
244     }
245     else if ((b==CR) && Wrap) {
246     CarriageReturn(FALSE);
247     LineFeed(LF,FALSE);
248     Wrap = FALSE;
249     }
250     }
251    
252     if (SSflag) {
253     if (w->Gn[GLtmp] == IdKanji) {
254     Kanji = b << 8;
255     KanjiIn = TRUE;
256     SSflag = FALSE;
257     return TRUE;
258     }
259     else if (w->Gn[GLtmp] == IdKatakana) {
260     b = b | 0x80;
261     }
262    
263     PutChar(b);
264     SSflag = FALSE;
265     return TRUE;
266     }
267    
268     if ((!EUCsupIn) && (!EUCkanaIn) && (!KanjiIn) && CheckKanji(b)) {
269     Kanji = b << 8;
270     KanjiIn = TRUE;
271     return TRUE;
272     }
273    
274     if (b<=US) {
275     ParseControl(b);
276     }
277     else if (b==0x20) {
278     PutChar(b);
279     }
280     else if ((b>=0x21) && (b<=0x7E)) {
281     if (EUCsupIn) {
282     EUCcount--;
283     EUCsupIn = (EUCcount==0);
284     return TRUE;
285     }
286    
287     if ((w->Gn[w->Glr[0]] == IdKatakana) || EUCkanaIn) {
288     b = b | 0x80;
289     EUCkanaIn = FALSE;
290     {
291     // b��sjis�����p�J�^�J�i
292     unsigned long u32 = CP932ToUTF32(b);
293     PutU32(u32);
294     }
295     return TRUE;
296     }
297     PutChar(b);
298     }
299     else if (b==0x7f) {
300     return TRUE;
301     }
302     else if ((b>=0x80) && (b<=0x8D)) {
303     ParseControl(b);
304     }
305     else if (b==0x8E) { // SS2
306     switch (ts.KanjiCode) {
307     case IdEUC:
308     if (ts.ISO2022Flag & ISO2022_SS2) {
309     EUCkanaIn = TRUE;
310     }
311     break;
312     case IdUTF8:
313 zmatsuo 10763 PutU32(REPLACEMENT_CHARACTER);
314 zmatsuo 10755 break;
315     default:
316     ParseControl(b);
317     }
318     }
319     else if (b==0x8F) { // SS3
320     switch (ts.KanjiCode) {
321     case IdEUC:
322     if (ts.ISO2022Flag & ISO2022_SS3) {
323     EUCcount = 2;
324     EUCsupIn = TRUE;
325     }
326     break;
327     case IdUTF8:
328 zmatsuo 10763 PutU32(REPLACEMENT_CHARACTER);
329 zmatsuo 10755 break;
330     default:
331     ParseControl(b);
332     }
333     }
334     else if ((b>=0x90) && (b<=0x9F)) {
335     ParseControl(b);
336     }
337     else if (b==0xA0) {
338     PutChar(0x20);
339     }
340     else if ((b>=0xA1) && (b<=0xFE)) {
341     if (EUCsupIn) {
342     EUCcount--;
343     EUCsupIn = (EUCcount==0);
344     return TRUE;
345     }
346    
347     if ((w->Gn[w->Glr[1]] != IdASCII) ||
348 zmatsuo 10759 ((ts.KanjiCode==IdEUC) && EUCkanaIn) ||
349 zmatsuo 10755 (ts.KanjiCode==IdSJIS) ||
350 zmatsuo 10759 ((ts.KanjiCode==IdJIS) &&
351     (ts.JIS7Katakana==0) &&
352     ((ts.TermFlag & TF_FIXEDJIS)!=0))) {
353 zmatsuo 10755 // b��sjis�����p�J�^�J�i
354     unsigned long u32 = CP932ToUTF32(b);
355     PutU32(u32);
356     } else {
357     if (w->Gn[w->Glr[1]] == IdASCII) {
358     b = b & 0x7f;
359     }
360     PutChar(b);
361     }
362     EUCkanaIn = FALSE;
363     }
364     else {
365     PutChar(b);
366     }
367    
368     return TRUE;
369     }
370    
371     static BOOL ParseFirstKR(BYTE b)
372     // returns TRUE if b is processed
373     // (actually allways returns TRUE)
374     {
375     VttermKanjiWork *w = &KanjiWork;
376     if (KanjiIn) {
377 zmatsuo 10759 if (((0x41<=b) && (b<=0x5A)) ||
378     ((0x61<=b) && (b<=0x7A)) ||
379     ((0x81<=b) && (b<=0xFE)))
380 zmatsuo 10755 {
381 zmatsuo 10758 unsigned long u32 = 0;
382 zmatsuo 10768 if (ts.KanjiCode == IdKoreanCP949) {
383 zmatsuo 10758 // CP51949
384     Kanji = Kanji + b;
385     u32 = MBCP_UTF32(Kanji, 51949);
386     }
387     else {
388     assert(FALSE);
389     }
390     PutU32(u32);
391 zmatsuo 10755 KanjiIn = FALSE;
392     return TRUE;
393     }
394     else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
395     KanjiIn = FALSE;
396     }
397     else if ((b==CR) && Wrap) {
398     CarriageReturn(FALSE);
399     LineFeed(LF,FALSE);
400     Wrap = FALSE;
401     }
402     }
403    
404     if ((!KanjiIn) && CheckFirstByte(b, ts.Language, ts.KanjiCode)) {
405     Kanji = b << 8;
406     KanjiIn = TRUE;
407     return TRUE;
408     }
409    
410     if (b<=US) {
411     ParseControl(b);
412     }
413     else if (b==0x20) {
414     PutChar(b);
415     }
416     else if ((b>=0x21) && (b<=0x7E)) {
417     // if (Gn[Glr[0]] == IdKatakana) {
418     // b = b | 0x80;
419     // }
420     PutChar(b);
421     }
422     else if (b==0x7f) {
423     return TRUE;
424     }
425     else if ((0x80<=b) && (b<=0x9F)) {
426     ParseControl(b);
427     }
428     else if (b==0xA0) {
429     PutChar(0x20);
430     }
431     else if ((b>=0xA1) && (b<=0xFE)) {
432     if (w->Gn[w->Glr[1]] == IdASCII) {
433     b = b & 0x7f;
434     }
435     PutChar(b);
436     }
437     else {
438     PutChar(b);
439     }
440    
441     return TRUE;
442     }
443    
444     static BOOL ParseFirstCn(BYTE b)
445     // returns TRUE if b is processed
446     // (actually allways returns TRUE)
447     {
448     VttermKanjiWork *w = &KanjiWork;
449     if (KanjiIn) {
450     // TODO
451 zmatsuo 10759 if (((0x40<=b) && (b<=0x7e)) ||
452     ((0xa1<=b) && (b<=0xFE)))
453 zmatsuo 10755 {
454 zmatsuo 10758 unsigned long u32 = 0;
455     Kanji = Kanji + b;
456     if (ts.KanjiCode == IdCnGB2312) {
457     // CP936 GB2312
458     u32 = MBCP_UTF32(Kanji, 936);
459     }
460     else if (ts.KanjiCode == IdCnBig5) {
461     // CP950 Big5
462     u32 = MBCP_UTF32(Kanji, 950);
463     }
464     else {
465     assert(FALSE);
466     }
467     PutU32(u32);
468 zmatsuo 10755 KanjiIn = FALSE;
469     return TRUE;
470     }
471     else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
472     KanjiIn = FALSE;
473     }
474     else if ((b==CR) && Wrap) {
475     CarriageReturn(FALSE);
476     LineFeed(LF,FALSE);
477     Wrap = FALSE;
478     }
479     }
480    
481     if ((!KanjiIn) && CheckFirstByte(b, ts.Language, ts.KanjiCode)) {
482     Kanji = b << 8;
483     KanjiIn = TRUE;
484     return TRUE;
485     }
486    
487     if (b<=US) {
488     ParseControl(b);
489     }
490     else if (b==0x20) {
491     PutChar(b);
492     }
493     else if ((b>=0x21) && (b<=0x7E)) {
494     // if (Gn[Glr[0]] == IdKatakana) {
495     // b = b | 0x80;
496     // }
497     PutChar(b);
498     }
499     else if (b==0x7f) {
500     return TRUE;
501     }
502     else if ((0x80<=b) && (b<=0x9F)) {
503     ParseControl(b);
504     }
505     else if (b==0xA0) {
506     PutChar(0x20);
507     }
508     else if ((b>=0xA1) && (b<=0xFE)) {
509     if (w->Gn[w->Glr[1]] == IdASCII) {
510     b = b & 0x7f;
511     }
512     PutChar(b);
513     }
514     else {
515     PutChar(b);
516     }
517    
518     return TRUE;
519     }
520    
521     static void ParseASCII(BYTE b)
522     {
523     if (SSflag) {
524     PutChar(b);
525     SSflag = FALSE;
526     return;
527     }
528    
529     if (b<=US) {
530     ParseControl(b);
531     } else if ((b>=0x20) && (b<=0x7E)) {
532 zmatsuo 10760 PutU32(b);
533 zmatsuo 10755 } else if ((b==0x8E) || (b==0x8F)) {
534 zmatsuo 10763 PutU32(REPLACEMENT_CHARACTER);
535 zmatsuo 10755 } else if ((b>=0x80) && (b<=0x9F)) {
536     ParseControl(b);
537     } else if (b>=0xA0) {
538 zmatsuo 10760 PutU32(b);
539 zmatsuo 10755 }
540     }
541    
542 zmatsuo 10764 static void PutReplacementChr(VttermKanjiWork *w, const BYTE *ptr, size_t len, BOOL fallback)
543 zmatsuo 10763 {
544     const char32_t replacement_char = w->replacement_char;
545     int i;
546     for (i = 0; i < len; i++) {
547     BYTE c = *ptr++;
548 zmatsuo 10764 if (fallback) {
549     // fallback ISO8859-1
550     PutU32(c);
551 zmatsuo 10763 }
552     else {
553 zmatsuo 10764 // fallback������
554     if (c < 0x80) {
555     // �s����UTF-8��������������0x80�������������A
556     // 1������UTF-8�������������������\������
557     ParseASCII(c);
558     }
559     else {
560     PutU32(replacement_char);
561     }
562 zmatsuo 10763 }
563     }
564     }
565    
566 zmatsuo 10755 // UTF-8�����M�f�[�^����������
567     // returns TRUE if b is processed
568     // (actually allways returns TRUE)
569     static BOOL ParseFirstUTF8(BYTE b)
570     {
571 zmatsuo 10763 VttermKanjiWork *w = &KanjiWork;
572 zmatsuo 10766 char32_t code;
573 zmatsuo 10755
574 zmatsuo 10763 if (Fallbacked) {
575     BOOL r = ParseFirstJP(b);
576     Fallbacked = FALSE;
577     return r;
578 zmatsuo 10755 }
579    
580     // UTF-8�G���R�[�h
581 zmatsuo 10766 // The Unicode Standard Chapter 3
582     // Table 3-7. Well-Formed UTF-8 Byte Sequences
583     // | Code Points | First Byte | Second Byte | Third Byte | Fourth Byte |
584     // | U+0000..U+007F | 00..7F | | | |
585     // | U+0080..U+07FF | C2..DF | 80..BF | | |
586     // | U+0800..U+0FFF | E0 | A0..BF | 80..BF | |
587     // | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | |
588     // | U+D000..U+D7FF | ED | 80..9F | 80..BF | |
589     // | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | |
590     // | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF |
591     // | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF |
592     // | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF |
593 zmatsuo 10755 // UTF-8���f�R�[�h������������
594     // - 1byte��
595 zmatsuo 10766 // - 0x00 - 0x7f ok
596     // - 0x80 - 0xc1 ng
597     // - 0xc2 - 0xf4 ok
598     // - 0xf5 - 0xff ng
599 zmatsuo 10755 // - 2byte�����~
600 zmatsuo 10766 // - 0x00 - 0x7f ng
601     // - 0x80 - 0xbf ok
602     // - 0xc0 - 0xff ng
603     // - 2byte�����O
604     // - 1byte == 0xe0 ������ 0xa0 - 0xbf����ok
605     // - 1byte == 0xed ������ 0x80 - 0x9f����ok
606     // - 1byte == 0xf0 ������ 0x90 - 0xbf����ok
607     // - 1byte == 0xf4 ������ 0x90 - 0x8f����ok
608 zmatsuo 10763 recheck:
609 zmatsuo 10755 // 1byte(7bit)
610 zmatsuo 10767 if (w->count == 0) {
611 zmatsuo 10766 if (b <= 0x7f) {
612 zmatsuo 10755 // 1byte(7bit)
613     // 0x7f����, �������A���������o��
614     ParseASCII(b);
615     return TRUE;
616     }
617 zmatsuo 10766 if (0xc2 <= b && b <= 0xf4) {
618     // 1byte������
619 zmatsuo 10767 w->buf[w->count++] = b;
620 zmatsuo 10755 return TRUE;
621     }
622    
623 zmatsuo 10766 // UTF-8��1byte���o���������R�[�h������
624     if (ts.FallbackToCP932) {
625     // fallback��������
626     if ((ts.Language == IdJapanese) && ismbbleadSJIS(b)) {
627     // ���{�������� && Shift_JIS 1byte��
628     // Shift_JIS �� fallback
629     Fallbacked = TRUE;
630     ConvJIS = FALSE;
631     Kanji = b << 8;
632     KanjiIn = TRUE;
633     return TRUE;
634 zmatsuo 10755 }
635 zmatsuo 10766 // fallback ISO8859-1
636     PutU32(b);
637     return TRUE;
638 zmatsuo 10755 }
639     else {
640 zmatsuo 10766 // fallback������, �s������������
641 zmatsuo 10767 w->buf[0] = b;
642     PutReplacementChr(w, w->buf, 1, FALSE);
643 zmatsuo 10755 }
644 zmatsuo 10766 return TRUE;
645 zmatsuo 10755 }
646    
647 zmatsuo 10764 // 2byte���~����?
648 zmatsuo 10766 if((b & 0xc0) != 0x80) { // == (b <= 0x7f || 0xc0 <= b)
649     // �s��������, (����2bit�� 0b10xx_xxxx ��������)
650 zmatsuo 10767 PutReplacementChr(w, w->buf, w->count, ts.FallbackToCP932);
651     w->count = 0;
652 zmatsuo 10764 goto recheck;
653     }
654    
655 zmatsuo 10755 // 2byte�����~����
656 zmatsuo 10767 w->buf[w->count++] = b;
657 zmatsuo 10755
658 zmatsuo 10766 // 2byte(11bit)
659 zmatsuo 10767 if (w->count == 2) {
660     if ((w->buf[0] & 0xe0) == 0xc0) { // == (0xc2 <= w->buf[0] && w->buf[0] <= 0xdf)
661 zmatsuo 10766 // 5bit + 6bit
662 zmatsuo 10767 code = ((w->buf[0] & 0x1f) << 6) | (b & 0x3f);
663 zmatsuo 10766 PutU32(code);
664 zmatsuo 10767 w->count = 0;
665 zmatsuo 10755 return TRUE;
666     }
667 zmatsuo 10766 return TRUE;
668     }
669    
670     // 3byte(16bit)
671 zmatsuo 10767 if (w->count == 3) {
672     if ((w->buf[0] & 0xf0) == 0xe0) {
673     if ((w->buf[0] == 0xe0 && (w->buf[1] < 0xa0 || 0xbf < w->buf[1])) ||
674     (w->buf[0] == 0xed && ( 0x9f < w->buf[1]))) {
675 zmatsuo 10766 // �s���� UTF-8
676 zmatsuo 10767 PutReplacementChr(w, w->buf, 2, ts.FallbackToCP932);
677     w->count = 0;
678 zmatsuo 10766 goto recheck;
679     }
680 zmatsuo 10755 // 4bit + 6bit + 6bit
681 zmatsuo 10767 code = ((w->buf[0] & 0xf) << 12);
682     code |= ((w->buf[1] & 0x3f) << 6);
683     code |= ((w->buf[2] & 0x3f));
684 zmatsuo 10755 PutU32(code);
685 zmatsuo 10767 w->count = 0;
686 zmatsuo 10755 return TRUE;
687     }
688 zmatsuo 10766 return TRUE;
689 zmatsuo 10755 }
690    
691     // 4byte(21bit)
692 zmatsuo 10767 assert(w->count == 4);
693     assert((w->buf[0] & 0xf8) == 0xf0);
694     if ((w->buf[0] == 0xf0 && (w->buf[1] < 0x90 || 0x9f < w->buf[1])) ||
695     (w->buf[0] == 0xf4 && (w->buf[1] < 0x80 || 0x8f < w->buf[1]))) {
696 zmatsuo 10766 // �s���� UTF-8
697 zmatsuo 10767 PutReplacementChr(w, w->buf, 3, ts.FallbackToCP932);
698     w->count = 0;
699 zmatsuo 10766 goto recheck;
700 zmatsuo 10755 }
701 zmatsuo 10766 // 3bit + 6bit + 6bit + 6bit
702 zmatsuo 10767 code = ((w->buf[0] & 0x07) << 18);
703     code |= ((w->buf[1] & 0x3f) << 12);
704     code |= ((w->buf[2] & 0x3f) << 6);
705     code |= (w->buf[3] & 0x3f);
706 zmatsuo 10766 PutU32(code);
707 zmatsuo 10767 w->count = 0;
708 zmatsuo 10755 return TRUE;
709     }
710    
711     static BOOL ParseFirstRus(BYTE b)
712     // returns if b is processed
713     {
714 zmatsuo 10756 // CP1251������
715     BYTE c = RussConv(ts.KanjiCode, IdWindows, b);
716     // CP1251->Unicode
717     unsigned long u32 = MBCP_UTF32(c, 1251);
718     PutU32(u32);
719     return TRUE;
720 zmatsuo 10755 }
721    
722     static BOOL ParseEnglish(BYTE b)
723     {
724     unsigned short u16 = 0;
725     int part = KanjiCodeToISO8859Part(ts.KanjiCode);
726     int r = UnicodeFromISO8859(part, b, &u16);
727     if (r == 0) {
728     return FALSE;
729     }
730     if (u16 < 0x100) {
731     ParseASCII((BYTE)u16);
732     }
733     else {
734     PutU32(u16);
735     }
736     return TRUE;
737     }
738    
739     void ParseFirst(BYTE b) {
740     switch (ts.Language) {
741     case IdUtf8:
742     ParseFirstUTF8(b);
743     return;
744    
745     case IdJapanese:
746     switch (ts.KanjiCode) {
747     case IdUTF8:
748     if (ParseFirstUTF8(b)) {
749     return;
750     }
751     break;
752     default:
753     if (ParseFirstJP(b)) {
754     return;
755     }
756     }
757     break;
758    
759     case IdKorean:
760     switch (ts.KanjiCode) {
761     case IdUTF8:
762     if (ParseFirstUTF8(b)) {
763     return;
764     }
765     break;
766     default:
767     if (ParseFirstKR(b)) {
768     return;
769     }
770     }
771     break;
772    
773     case IdRussian:
774     if (ParseFirstRus(b)) {
775     return;
776     }
777     break;
778    
779     case IdChinese:
780     switch (ts.KanjiCode) {
781     case IdUTF8:
782     if (ParseFirstUTF8(b)) {
783     return;
784     }
785     break;
786     default:
787     if (ParseFirstCn(b)) {
788     return;
789     }
790     }
791     break;
792     case IdEnglish: {
793     if (ParseEnglish(b)) {
794     return;
795     }
796     break;
797     }
798     }
799    
800     if (SSflag) {
801     PutChar(b);
802     SSflag = FALSE;
803     return;
804     }
805    
806     if (b<=US)
807     ParseControl(b);
808     else if ((b>=0x20) && (b<=0x7E))
809     PutChar(b);
810     else if ((b>=0x80) && (b<=0x9F))
811     ParseControl(b);
812     else if (b>=0xA0)
813     PutChar(b);
814     }
815    
816     /**
817     * �w��(Designate)
818     *
819     * @param Gn 0/1/2/3 = G0/G1/G2/G3
820     * @param codeset IdASCII 0
821     * IdKatakana 1
822     * IdKanji 2
823     * IdSpecial 3
824     */
825     void CharSet2022Designate(int gn, int cs)
826     {
827     VttermKanjiWork *w = &KanjiWork;
828     w->Gn[gn] = cs;
829     }
830    
831     /**
832     * �����o��(Invoke)
833     * @param glr 0/1 = GL/GR (Locking shift�������L��)
834     * @param gn 0/1/2/3 = G0/G1/G2/G3
835     * @param single_shift FALSE Locking shift
836     * TRUE Single shift
837     */
838     void CharSet2022Invoke(int glr, int gn, BOOL single_shift)
839     {
840     VttermKanjiWork *w = &KanjiWork;
841     if (single_shift == FALSE) {
842     // Locking shift
843     w->Glr[glr] = gn;
844     }
845     else {
846     // Single shift
847     GLtmp = gn;
848     SSflag = TRUE;
849     }
850     }
851    
852     /**
853     * DEC�����t�H���g(Tera Special font)
854     * 0140(0x60) ... 0176(0x7f) ���r�����A�T�C������������
855 zmatsuo 10760 * (0xe0) ... (0xff) ��?
856 zmatsuo 10755 * <ESC>(0 �������������G�X�P�[�v�V�[�P���X�����`
857     * about/emulations.html
858     *
859     * @param b �R�[�h
860 zmatsuo 10760 * @retval TRUE IdSpecial
861     * @retval FALSE IdSpecial��������
862 zmatsuo 10755 */
863     BOOL CharSetIsSpecial(BYTE b)
864     {
865     VttermKanjiWork *w = &KanjiWork;
866     BOOL SpecialNew = FALSE;
867    
868     if ((b>0x5F) && (b<0x80)) {
869     if (SSflag)
870     SpecialNew = (w->Gn[GLtmp]==IdSpecial);
871     else
872     SpecialNew = (w->Gn[w->Glr[0]]==IdSpecial);
873     }
874     else if (b>0xDF) {
875     if (SSflag)
876     SpecialNew = (w->Gn[GLtmp]==IdSpecial);
877     else
878     SpecialNew = (w->Gn[w->Glr[1]]==IdSpecial);
879     }
880    
881     return SpecialNew;
882     }
883    
884     static void CharSetSaveStateLow(CharSetState *state, const VttermKanjiWork *w)
885     {
886     int i;
887     state->infos[0] = w->Glr[0];
888     state->infos[1] = w->Glr[1];
889     for (i=0 ; i<=3; i++) {
890     state->infos[2 + i] = w->Gn[i];
891     }
892     }
893    
894     /**
895     * ��������������
896     */
897     void CharSetSaveState(CharSetState *state)
898     {
899     VttermKanjiWork *w = &KanjiWork;
900     CharSetSaveStateLow(state, w);
901     }
902    
903     /**
904     * ���������A����
905     */
906     void CharSetLoadState(const CharSetState *state)
907     {
908     VttermKanjiWork *w = &KanjiWork;
909     int i;
910     w->Glr[0] = state->infos[0];
911     w->Glr[1] = state->infos[1];
912     for (i=0 ; i<=3; i++) {
913     w->Gn[i] = state->infos[2 + i];
914     }
915     }
916 zmatsuo 10763
917     /**
918     * �t�H�[���o�b�N���I��
919     * ���M�f�[�^UTF-8�����AShift_JIS�o����(fallback����)�����f����
920     *
921     */
922     void CharSetFallbackFinish(void)
923     {
924     Fallbacked = FALSE;
925     }

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26