Develop and Download Open Source Software

Browse Subversion Repository

Annotation of /trunk/teraterm/teraterm/charset.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 10801 - (hide annotations) (download) (as text)
Fri Jul 7 16:10:06 2023 UTC (9 months ago) by zmatsuo
File MIME type: text/x-c++src
File size: 23761 byte(s)
UTF8Fallback を FallbackToCP932 に変更、TERATERM.INI に保存するようにした

- 日本語UTF-8以外ではfallbackしないように修正
  - 修正前はISO8859-1にfallbackしていた

ticket #48226
1 zmatsuo 10755 /*
2     * (C) 2023- TeraTerm Project
3     * All rights reserved.
4     *
5     * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions
7     * are met:
8     *
9     * 1. Redistributions of source code must retain the above copyright
10     * notice, this list of conditions and the following disclaimer.
11     * 2. Redistributions in binary form must reproduce the above copyright
12     * notice, this list of conditions and the following disclaimer in the
13     * documentation and/or other materials provided with the distribution.
14     * 3. The name of the author may not be used to endorse or promote products
15     * derived from this software without specific prior written permission.
16     *
17     * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
18     * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19     * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20     * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22     * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23     * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24     * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25     * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26     * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27     */
28    
29     #include "teraterm.h"
30     #include "tttypes.h"
31     #include <stdio.h>
32     #include <string.h>
33     #if !defined(_CRTDBG_MAP_ALLOC)
34     #define _CRTDBG_MAP_ALLOC
35     #endif
36     #include <stdlib.h>
37     #include <crtdbg.h>
38     #include <assert.h>
39    
40     #include "ttwinman.h"
41     #include "codeconv.h"
42     #include "unicode.h"
43     #include "language.h" // for JIS2SJIS()
44 zmatsuo 10763 #include "ttcstd.h"
45 zmatsuo 10773 #include "vtterm.h"
46 zmatsuo 10755
47     #include "charset.h"
48    
49 zmatsuo 10763 // UTF-8���s�����l�����������\����������
50 zmatsuo 10800 #define REPLACEMENT_CHARACTER 0xfffd // REPLACEMENT CHARACTER
51     //#define REPLACEMENT_CHARACTER 0x2e2e // Reversed Question Mark (VT382)
52 zmatsuo 10763
53 zmatsuo 10782 typedef struct CharSetDataTag {
54 zmatsuo 10755 /* GL, GR code group */
55     int Glr[2];
56     /* G0, G1, G2, G3 code group */
57     int Gn[4];
58 zmatsuo 10782 /* GL for single shift 2/3 */
59     int GLtmp;
60     /* single shift 2/3 flag */
61     BOOL SSflag;
62 zmatsuo 10763 //
63     char32_t replacement_char;
64 zmatsuo 10767 // UTF-8 work
65     BYTE buf[4];
66     int count;
67 zmatsuo 10782 BOOL Fallbacked;
68 zmatsuo 10755
69 zmatsuo 10782 // MBCS
70     BOOL KanjiIn; // TRUE = MBCS��1byte�������M��������
71     WORD Kanji;
72 zmatsuo 10755
73 zmatsuo 10782 // EUC
74     BOOL EUCkanaIn;
75     BOOL EUCsupIn;
76     int EUCcount;
77    
78     /* JIS -> SJIS conversion flag */
79     BOOL ConvJIS;
80     BYTE DebugFlag;
81    
82     // Operations
83     CharSetOp Op;
84     void *ClientData;
85     } CharSetData;
86    
87 zmatsuo 10770 static BOOL IsC0(char32_t b)
88     {
89     return (b <= US);
90     }
91    
92     static BOOL IsC1(char32_t b)
93     {
94     return ((b>=0x80) && (b<=0x9F));
95     }
96    
97     /**
98 zmatsuo 10755 * ISO2022�p���[�N������������
99     */
100 zmatsuo 10782 static void CharSetInit2(CharSetData *w)
101 zmatsuo 10755 {
102     if (ts.Language==IdJapanese) {
103     w->Gn[0] = IdASCII;
104     w->Gn[1] = IdKatakana;
105     w->Gn[2] = IdKatakana;
106     w->Gn[3] = IdKanji;
107     w->Glr[0] = 0;
108     if ((ts.KanjiCode==IdJIS) && (ts.JIS7Katakana==0))
109     w->Glr[1] = 2; // 8-bit katakana
110     else
111     w->Glr[1] = 3;
112     }
113     else {
114     w->Gn[0] = IdASCII;
115     w->Gn[1] = IdSpecial;
116     w->Gn[2] = IdASCII;
117     w->Gn[3] = IdASCII;
118     w->Glr[0] = 0;
119     w->Glr[1] = 0;
120     }
121     }
122    
123     /**
124     * �������A���[�N������������
125     */
126 zmatsuo 10782 CharSetData *CharSetInit(const CharSetOp *op, void *client_data)
127 zmatsuo 10755 {
128 zmatsuo 10782 CharSetData *w = (CharSetData *)calloc(sizeof(*w), 1);
129     if (w == NULL) {
130     return NULL;
131     }
132 zmatsuo 10763
133 zmatsuo 10782 w->Op = *op;
134     w->ClientData = client_data;
135    
136 zmatsuo 10763 CharSetInit2(w);
137 zmatsuo 10782 w->GLtmp = 0;
138     w->SSflag = FALSE;
139 zmatsuo 10763
140 zmatsuo 10782 w->DebugFlag = DEBUG_FLAG_NONE;
141    
142 zmatsuo 10763 w->replacement_char = REPLACEMENT_CHARACTER;
143 zmatsuo 10782 w->SSflag = FALSE;
144 zmatsuo 10755
145 zmatsuo 10782 w->KanjiIn = FALSE;
146     w->EUCkanaIn = FALSE;
147     w->EUCsupIn = FALSE;
148     w->ConvJIS = FALSE;
149     w->Fallbacked = FALSE;
150    
151     return w;
152 zmatsuo 10755 }
153    
154 zmatsuo 10782 void CharSetFinish(CharSetData *w)
155     {
156     assert(w != NULL);
157     free(w);
158     }
159    
160 zmatsuo 10755 /**
161     * 1byte���`�F�b�N
162     */
163     static BOOL CheckFirstByte(BYTE b, int lang, int kanji_code)
164     {
165     switch (lang) {
166     case IdKorean:
167 zmatsuo 10779 return __ismbblead(b, 949);
168 zmatsuo 10755 case IdChinese:
169     if (kanji_code == IdCnGB2312) {
170     return __ismbblead(b, 936);
171     }
172     else if (ts.KanjiCode == IdCnBig5) {
173     return __ismbblead(b, 950);
174     }
175     break;
176     default:
177     assert(FALSE);
178     break;
179     }
180     assert(FALSE);
181     return FALSE;
182     }
183 zmatsuo 10763
184 zmatsuo 10755 /**
185 zmatsuo 10763 * Double-byte Character Sets
186     * SJIS��1byte��?
187     *
188     * ��1�o�C�g0x81...0x9F or 0xE0...0xEF
189     * ��1�o�C�g0x81...0x9F or 0xE0...0xFC
190     */
191     static BOOL ismbbleadSJIS(BYTE b)
192     {
193     if (((0x80<b) && (b<0xa0)) || ((0xdf<b) && (b<0xfd))) {
194     return TRUE;
195     }
196     return FALSE;
197     }
198    
199     /**
200 zmatsuo 10755 * ts.Language == IdJapanese ��
201     * 1byte���`�F�b�N
202     */
203 zmatsuo 10782 static BOOL CheckKanji(CharSetData *w, BYTE b)
204 zmatsuo 10755 {
205     BOOL Check;
206    
207     if (ts.Language!=IdJapanese)
208     return FALSE;
209    
210 zmatsuo 10782 w->ConvJIS = FALSE;
211 zmatsuo 10755
212     if (ts.KanjiCode==IdSJIS ||
213 zmatsuo 10801 (ts.FallbackToCP932 && ts.KanjiCode==IdUTF8)) {
214 zmatsuo 10759 if (((0x80<b) && (b<0xa0)) || ((0xdf<b) && (b<0xfd))) {
215 zmatsuo 10782 w->Fallbacked = TRUE;
216 zmatsuo 10755 return TRUE; // SJIS kanji
217     }
218     if ((0xa1<=b) && (b<=0xdf)) {
219     return FALSE; // SJIS katakana
220     }
221     }
222    
223     if ((b>=0x21) && (b<=0x7e)) {
224     Check = (w->Gn[w->Glr[0]] == IdKanji);
225 zmatsuo 10782 w->ConvJIS = Check;
226 zmatsuo 10755 }
227     else if ((b>=0xA1) && (b<=0xFE)) {
228     Check = (w->Gn[w->Glr[1]] == IdKanji);
229     if (ts.KanjiCode==IdEUC) {
230     Check = TRUE;
231     }
232     else if (ts.KanjiCode==IdJIS && ((ts.TermFlag & TF_FIXEDJIS)!=0) && (ts.JIS7Katakana==0)) {
233     Check = FALSE; // 8-bit katakana
234     }
235 zmatsuo 10782 w->ConvJIS = Check;
236 zmatsuo 10755 }
237     else {
238     Check = FALSE;
239     }
240    
241     return Check;
242     }
243    
244 zmatsuo 10782 static BOOL ParseFirstJP(CharSetData *w, BYTE b)
245 zmatsuo 10755 // returns TRUE if b is processed
246     // (actually allways returns TRUE)
247     {
248 zmatsuo 10782 if (w->KanjiIn) {
249     if (((! w->ConvJIS) && (0x3F<b) && (b<0xFD)) ||
250     (w->ConvJIS && ( ((0x20<b) && (b<0x7f)) ||
251 zmatsuo 10759 ((0xa0<b) && (b<0xff)) )) )
252 zmatsuo 10755 {
253 zmatsuo 10758 unsigned long u32;
254 zmatsuo 10782 w->Kanji = w->Kanji + b;
255     if (w->ConvJIS) {
256 zmatsuo 10758 // JIS -> Shift_JIS(CP932)
257 zmatsuo 10782 w->Kanji = JIS2SJIS((WORD)(w->Kanji & 0x7f7f));
258 zmatsuo 10758 }
259 zmatsuo 10782 u32 = CP932ToUTF32(w->Kanji);
260     w->Op.PutU32(u32, w->ClientData);
261     w->KanjiIn = FALSE;
262 zmatsuo 10755 return TRUE;
263     }
264     else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
265 zmatsuo 10782 w->KanjiIn = FALSE;
266 zmatsuo 10755 }
267     }
268    
269 zmatsuo 10782 if (w->SSflag) {
270     if (w->Gn[w->GLtmp] == IdKanji) {
271     w->Kanji = b << 8;
272     w->KanjiIn = TRUE;
273     w->SSflag = FALSE;
274 zmatsuo 10755 return TRUE;
275     }
276 zmatsuo 10782 else if (w->Gn[w->GLtmp] == IdKatakana) {
277 zmatsuo 10755 b = b | 0x80;
278     }
279    
280 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
281     w->SSflag = FALSE;
282 zmatsuo 10755 return TRUE;
283     }
284    
285 zmatsuo 10782 if ((!w->EUCsupIn) && (!w->EUCkanaIn) && (!w->KanjiIn) && CheckKanji(w, b)) {
286     w->Kanji = b << 8;
287     w->KanjiIn = TRUE;
288 zmatsuo 10755 return TRUE;
289     }
290    
291     if (b<=US) {
292 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
293 zmatsuo 10755 }
294     else if (b==0x20) {
295 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
296 zmatsuo 10755 }
297     else if ((b>=0x21) && (b<=0x7E)) {
298 zmatsuo 10782 if (w->EUCsupIn) {
299     w->EUCcount--;
300     w->EUCsupIn = (w->EUCcount==0);
301 zmatsuo 10755 return TRUE;
302     }
303    
304 zmatsuo 10782 if ((w->Gn[w->Glr[0]] == IdKatakana) || w->EUCkanaIn) {
305 zmatsuo 10755 b = b | 0x80;
306 zmatsuo 10782 w->EUCkanaIn = FALSE;
307 zmatsuo 10755 {
308     // b��sjis�����p�J�^�J�i
309     unsigned long u32 = CP932ToUTF32(b);
310 zmatsuo 10782 w->Op.PutU32(u32, w->ClientData);
311 zmatsuo 10755 }
312     return TRUE;
313     }
314 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
315 zmatsuo 10755 }
316     else if (b==0x7f) {
317     return TRUE;
318     }
319     else if ((b>=0x80) && (b<=0x8D)) {
320 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
321 zmatsuo 10755 }
322     else if (b==0x8E) { // SS2
323     switch (ts.KanjiCode) {
324     case IdEUC:
325     if (ts.ISO2022Flag & ISO2022_SS2) {
326 zmatsuo 10782 w->EUCkanaIn = TRUE;
327 zmatsuo 10755 }
328     break;
329     case IdUTF8:
330 zmatsuo 10800 w->Op.PutU32(w->replacement_char, w->ClientData);
331 zmatsuo 10755 break;
332     default:
333 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
334 zmatsuo 10755 }
335     }
336     else if (b==0x8F) { // SS3
337     switch (ts.KanjiCode) {
338     case IdEUC:
339     if (ts.ISO2022Flag & ISO2022_SS3) {
340 zmatsuo 10782 w->EUCcount = 2;
341     w->EUCsupIn = TRUE;
342 zmatsuo 10755 }
343     break;
344     case IdUTF8:
345 zmatsuo 10800 w->Op.PutU32(w->replacement_char, w->ClientData);
346 zmatsuo 10755 break;
347     default:
348 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
349 zmatsuo 10755 }
350     }
351     else if ((b>=0x90) && (b<=0x9F)) {
352 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
353 zmatsuo 10755 }
354     else if (b==0xA0) {
355 zmatsuo 10782 w->Op.PutU32(0x20, w->ClientData);
356 zmatsuo 10755 }
357     else if ((b>=0xA1) && (b<=0xFE)) {
358 zmatsuo 10782 if (w->EUCsupIn) {
359     w->EUCcount--;
360     w->EUCsupIn = (w->EUCcount==0);
361 zmatsuo 10755 return TRUE;
362     }
363    
364     if ((w->Gn[w->Glr[1]] != IdASCII) ||
365 zmatsuo 10782 ((ts.KanjiCode==IdEUC) && w->EUCkanaIn) ||
366 zmatsuo 10755 (ts.KanjiCode==IdSJIS) ||
367 zmatsuo 10759 ((ts.KanjiCode==IdJIS) &&
368     (ts.JIS7Katakana==0) &&
369     ((ts.TermFlag & TF_FIXEDJIS)!=0))) {
370 zmatsuo 10755 // b��sjis�����p�J�^�J�i
371     unsigned long u32 = CP932ToUTF32(b);
372 zmatsuo 10782 w->Op.PutU32(u32, w->ClientData);
373 zmatsuo 10755 } else {
374     if (w->Gn[w->Glr[1]] == IdASCII) {
375     b = b & 0x7f;
376     }
377 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
378 zmatsuo 10755 }
379 zmatsuo 10782 w->EUCkanaIn = FALSE;
380 zmatsuo 10755 }
381     else {
382 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
383 zmatsuo 10755 }
384    
385     return TRUE;
386     }
387    
388 zmatsuo 10782 static BOOL ParseFirstKR(CharSetData *w, BYTE b)
389 zmatsuo 10755 // returns TRUE if b is processed
390     // (actually allways returns TRUE)
391     {
392 zmatsuo 10782 if (w->KanjiIn) {
393 zmatsuo 10759 if (((0x41<=b) && (b<=0x5A)) ||
394     ((0x61<=b) && (b<=0x7A)) ||
395     ((0x81<=b) && (b<=0xFE)))
396 zmatsuo 10755 {
397 zmatsuo 10758 unsigned long u32 = 0;
398 zmatsuo 10768 if (ts.KanjiCode == IdKoreanCP949) {
399 zmatsuo 10779 // CP949
400 zmatsuo 10782 w->Kanji = w->Kanji + b;
401     u32 = MBCP_UTF32(w->Kanji, 949);
402 zmatsuo 10758 }
403     else {
404     assert(FALSE);
405     }
406 zmatsuo 10782 w->Op.PutU32(u32, w->ClientData);
407     w->KanjiIn = FALSE;
408 zmatsuo 10755 return TRUE;
409     }
410     else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
411 zmatsuo 10782 w->KanjiIn = FALSE;
412 zmatsuo 10755 }
413     }
414    
415 zmatsuo 10782 if ((!w->KanjiIn) && CheckFirstByte(b, ts.Language, ts.KanjiCode)) {
416     w->Kanji = b << 8;
417     w->KanjiIn = TRUE;
418 zmatsuo 10755 return TRUE;
419     }
420    
421     if (b<=US) {
422 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
423 zmatsuo 10755 }
424     else if (b==0x20) {
425 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
426 zmatsuo 10755 }
427     else if ((b>=0x21) && (b<=0x7E)) {
428     // if (Gn[Glr[0]] == IdKatakana) {
429     // b = b | 0x80;
430     // }
431 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
432 zmatsuo 10755 }
433     else if (b==0x7f) {
434     return TRUE;
435     }
436     else if ((0x80<=b) && (b<=0x9F)) {
437 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
438 zmatsuo 10755 }
439     else if (b==0xA0) {
440 zmatsuo 10782 w->Op.PutU32(0x20, w->ClientData);
441 zmatsuo 10755 }
442     else if ((b>=0xA1) && (b<=0xFE)) {
443     if (w->Gn[w->Glr[1]] == IdASCII) {
444     b = b & 0x7f;
445     }
446 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
447 zmatsuo 10755 }
448     else {
449 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
450 zmatsuo 10755 }
451    
452     return TRUE;
453     }
454    
455 zmatsuo 10782 static BOOL ParseFirstCn(CharSetData *w, BYTE b)
456 zmatsuo 10755 // returns TRUE if b is processed
457     // (actually allways returns TRUE)
458     {
459 zmatsuo 10782 if (w->KanjiIn) {
460 zmatsuo 10755 // TODO
461 zmatsuo 10759 if (((0x40<=b) && (b<=0x7e)) ||
462     ((0xa1<=b) && (b<=0xFE)))
463 zmatsuo 10755 {
464 zmatsuo 10758 unsigned long u32 = 0;
465 zmatsuo 10782 w->Kanji = w->Kanji + b;
466 zmatsuo 10758 if (ts.KanjiCode == IdCnGB2312) {
467     // CP936 GB2312
468 zmatsuo 10782 u32 = MBCP_UTF32(w->Kanji, 936);
469 zmatsuo 10758 }
470     else if (ts.KanjiCode == IdCnBig5) {
471     // CP950 Big5
472 zmatsuo 10782 u32 = MBCP_UTF32(w->Kanji, 950);
473 zmatsuo 10758 }
474     else {
475     assert(FALSE);
476     }
477 zmatsuo 10782 w->Op.PutU32(u32, w->ClientData);
478     w->KanjiIn = FALSE;
479 zmatsuo 10755 return TRUE;
480     }
481     else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
482 zmatsuo 10782 w->KanjiIn = FALSE;
483 zmatsuo 10755 }
484     }
485    
486 zmatsuo 10782 if ((!w->KanjiIn) && CheckFirstByte(b, ts.Language, ts.KanjiCode)) {
487     w->Kanji = b << 8;
488     w->KanjiIn = TRUE;
489 zmatsuo 10755 return TRUE;
490     }
491    
492     if (b<=US) {
493 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
494 zmatsuo 10755 }
495     else if (b==0x20) {
496 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
497 zmatsuo 10755 }
498     else if ((b>=0x21) && (b<=0x7E)) {
499     // if (Gn[Glr[0]] == IdKatakana) {
500     // b = b | 0x80;
501     // }
502 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
503 zmatsuo 10755 }
504     else if (b==0x7f) {
505     return TRUE;
506     }
507     else if ((0x80<=b) && (b<=0x9F)) {
508 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
509 zmatsuo 10755 }
510     else if (b==0xA0) {
511 zmatsuo 10782 w->Op.PutU32(0x20, w->ClientData);
512 zmatsuo 10755 }
513     else if ((b>=0xA1) && (b<=0xFE)) {
514     if (w->Gn[w->Glr[1]] == IdASCII) {
515     b = b & 0x7f;
516     }
517 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
518 zmatsuo 10755 }
519     else {
520 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
521 zmatsuo 10755 }
522    
523     return TRUE;
524     }
525    
526 zmatsuo 10782 static void ParseASCII(CharSetData *w, BYTE b)
527 zmatsuo 10755 {
528 zmatsuo 10782 if (w->SSflag) {
529     w->Op.PutU32(b, w->ClientData);
530     w->SSflag = FALSE;
531 zmatsuo 10755 return;
532     }
533    
534     if (b<=US) {
535 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
536 zmatsuo 10755 } else if ((b>=0x20) && (b<=0x7E)) {
537 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
538 zmatsuo 10755 } else if ((b==0x8E) || (b==0x8F)) {
539 zmatsuo 10800 w->Op.PutU32(w->replacement_char, w->ClientData);
540 zmatsuo 10755 } else if ((b>=0x80) && (b<=0x9F)) {
541 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
542 zmatsuo 10755 } else if (b>=0xA0) {
543 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
544 zmatsuo 10755 }
545     }
546    
547 zmatsuo 10770 /**
548     * REPLACEMENT_CHARACTER ���\��
549     * UTF-8 �f�R�[�h�����g�p
550     */
551 zmatsuo 10782 static void PutReplacementChr(CharSetData *w, const BYTE *ptr, size_t len, BOOL fallback)
552 zmatsuo 10763 {
553     const char32_t replacement_char = w->replacement_char;
554     int i;
555     for (i = 0; i < len; i++) {
556     BYTE c = *ptr++;
557 zmatsuo 10789 assert(!IsC0(c));
558 zmatsuo 10764 if (fallback) {
559     // fallback ISO8859-1
560 zmatsuo 10782 w->Op.PutU32(c, w->ClientData);
561 zmatsuo 10763 }
562     else {
563 zmatsuo 10764 // fallback������
564     if (c < 0x80) {
565     // �s����UTF-8��������������0x80�������������A
566     // 1������UTF-8�������������������\������
567 zmatsuo 10782 w->Op.PutU32(c, w->ClientData);
568 zmatsuo 10764 }
569     else {
570 zmatsuo 10782 w->Op.PutU32(replacement_char, w->ClientData);
571 zmatsuo 10764 }
572 zmatsuo 10763 }
573     }
574     }
575    
576 zmatsuo 10770 /**
577     * UTF-8�����M�f�[�^����������
578     *
579     * returns TRUE if b is processed
580     */
581 zmatsuo 10782 static BOOL ParseFirstUTF8(CharSetData *w, BYTE b)
582 zmatsuo 10755 {
583 zmatsuo 10766 char32_t code;
584 zmatsuo 10755
585 zmatsuo 10782 if (w->Fallbacked) {
586     BOOL r = ParseFirstJP(w, b);
587     w->Fallbacked = FALSE;
588 zmatsuo 10763 return r;
589 zmatsuo 10755 }
590    
591     // UTF-8�G���R�[�h
592 zmatsuo 10766 // The Unicode Standard Chapter 3
593     // Table 3-7. Well-Formed UTF-8 Byte Sequences
594 zmatsuo 10777 // | Code Points | 1st Byte | 2nd Byte | 3rd Byte | 4th Byte |
595     // | U+0000..U+007F | 00..7F | | | |
596     // | U+0080..U+07FF | C2..DF | 80..BF | | |
597     // | U+0800..U+0FFF | E0 | A0..BF | 80..BF | |
598     // | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | |
599     // | U+D000..U+D7FF | ED | 80..9F | 80..BF | |
600     // | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | |
601     // | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF |
602     // | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF |
603     // | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF |
604 zmatsuo 10755 // - 1byte��
605 zmatsuo 10766 // - 0x00 - 0x7f ok
606     // - 0x80 - 0xc1 ng
607     // - 0xc2 - 0xf4 ok
608     // - 0xf5 - 0xff ng
609 zmatsuo 10755 // - 2byte�����~
610 zmatsuo 10766 // - 0x00 - 0x7f ng
611     // - 0x80 - 0xbf ok
612     // - 0xc0 - 0xff ng
613     // - 2byte�����O
614     // - 1byte == 0xe0 ������ 0xa0 - 0xbf����ok
615     // - 1byte == 0xed ������ 0x80 - 0x9f����ok
616     // - 1byte == 0xf0 ������ 0x90 - 0xbf����ok
617     // - 1byte == 0xf4 ������ 0x90 - 0x8f����ok
618 zmatsuo 10763 recheck:
619 zmatsuo 10755 // 1byte(7bit)
620 zmatsuo 10767 if (w->count == 0) {
621 zmatsuo 10770 if (IsC0(b)) {
622     // U+0000 .. U+001f
623     // C0��������, C0 Coontrols
624 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
625 zmatsuo 10755 return TRUE;
626     }
627 zmatsuo 10770 else if (b <= 0x7f) {
628     // 0x7f����, �������A���������o��
629 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
630 zmatsuo 10770 return TRUE;
631     }
632     else if (0xc2 <= b && b <= 0xf4) {
633 zmatsuo 10766 // 1byte������
634 zmatsuo 10767 w->buf[w->count++] = b;
635 zmatsuo 10755 return TRUE;
636     }
637    
638 zmatsuo 10770 // 0x80 - 0xc1, 0xf5 - 0xff
639 zmatsuo 10766 // UTF-8��1byte���o���������R�[�h������
640 zmatsuo 10801 if (ts.FallbackToCP932) {
641 zmatsuo 10766 // fallback��������
642     if ((ts.Language == IdJapanese) && ismbbleadSJIS(b)) {
643     // ���{�������� && Shift_JIS 1byte��
644     // Shift_JIS �� fallback
645 zmatsuo 10782 w->Fallbacked = TRUE;
646     w->ConvJIS = FALSE;
647     w->Kanji = b << 8;
648     w->KanjiIn = TRUE;
649 zmatsuo 10766 return TRUE;
650 zmatsuo 10755 }
651     }
652 zmatsuo 10801 // fallback������, �s������������
653     w->buf[0] = b;
654     PutReplacementChr(w, w->buf, 1, FALSE);
655 zmatsuo 10766 return TRUE;
656 zmatsuo 10755 }
657    
658 zmatsuo 10764 // 2byte���~����?
659 zmatsuo 10766 if((b & 0xc0) != 0x80) { // == (b <= 0x7f || 0xc0 <= b)
660     // �s��������, (����2bit�� 0b10xx_xxxx ��������)
661 zmatsuo 10801 PutReplacementChr(w, w->buf, w->count, ts.FallbackToCP932);
662 zmatsuo 10767 w->count = 0;
663 zmatsuo 10764 goto recheck;
664     }
665    
666 zmatsuo 10755 // 2byte�����~����
667 zmatsuo 10767 w->buf[w->count++] = b;
668 zmatsuo 10755
669 zmatsuo 10766 // 2byte(11bit)
670 zmatsuo 10767 if (w->count == 2) {
671     if ((w->buf[0] & 0xe0) == 0xc0) { // == (0xc2 <= w->buf[0] && w->buf[0] <= 0xdf)
672 zmatsuo 10766 // 5bit + 6bit
673 zmatsuo 10767 code = ((w->buf[0] & 0x1f) << 6) | (b & 0x3f);
674 zmatsuo 10770 if (IsC1(code)) {
675     // U+0080 .. u+009f
676     // C1��������, C1 Controls
677 zmatsuo 10782 w->Op.ParseControl((BYTE)code, w->ClientData);
678 zmatsuo 10770 }
679     else {
680 zmatsuo 10782 w->Op.PutU32(code, w->ClientData);
681 zmatsuo 10770 }
682 zmatsuo 10767 w->count = 0;
683 zmatsuo 10755 return TRUE;
684     }
685 zmatsuo 10766 return TRUE;
686     }
687    
688     // 3byte(16bit)
689 zmatsuo 10767 if (w->count == 3) {
690     if ((w->buf[0] & 0xf0) == 0xe0) {
691     if ((w->buf[0] == 0xe0 && (w->buf[1] < 0xa0 || 0xbf < w->buf[1])) ||
692     (w->buf[0] == 0xed && ( 0x9f < w->buf[1]))) {
693 zmatsuo 10766 // �s���� UTF-8
694 zmatsuo 10801 PutReplacementChr(w, w->buf, 2, ts.FallbackToCP932);
695 zmatsuo 10767 w->count = 0;
696 zmatsuo 10766 goto recheck;
697     }
698 zmatsuo 10755 // 4bit + 6bit + 6bit
699 zmatsuo 10767 code = ((w->buf[0] & 0xf) << 12);
700     code |= ((w->buf[1] & 0x3f) << 6);
701     code |= ((w->buf[2] & 0x3f));
702 zmatsuo 10782 w->Op.PutU32(code, w->ClientData);
703 zmatsuo 10767 w->count = 0;
704 zmatsuo 10755 return TRUE;
705     }
706 zmatsuo 10766 return TRUE;
707 zmatsuo 10755 }
708    
709     // 4byte(21bit)
710 zmatsuo 10767 assert(w->count == 4);
711     assert((w->buf[0] & 0xf8) == 0xf0);
712     if ((w->buf[0] == 0xf0 && (w->buf[1] < 0x90 || 0x9f < w->buf[1])) ||
713     (w->buf[0] == 0xf4 && (w->buf[1] < 0x80 || 0x8f < w->buf[1]))) {
714 zmatsuo 10766 // �s���� UTF-8
715 zmatsuo 10801 PutReplacementChr(w, w->buf, 3, ts.FallbackToCP932);
716 zmatsuo 10767 w->count = 0;
717 zmatsuo 10766 goto recheck;
718 zmatsuo 10755 }
719 zmatsuo 10766 // 3bit + 6bit + 6bit + 6bit
720 zmatsuo 10767 code = ((w->buf[0] & 0x07) << 18);
721     code |= ((w->buf[1] & 0x3f) << 12);
722     code |= ((w->buf[2] & 0x3f) << 6);
723     code |= (w->buf[3] & 0x3f);
724 zmatsuo 10782 w->Op.PutU32(code, w->ClientData);
725 zmatsuo 10767 w->count = 0;
726 zmatsuo 10755 return TRUE;
727     }
728    
729 zmatsuo 10782 static BOOL ParseFirstRus(CharSetData *w, BYTE b)
730 zmatsuo 10755 // returns if b is processed
731     {
732 zmatsuo 10770 if (IsC0(b)) {
733 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
734 zmatsuo 10770 return TRUE;
735     }
736 zmatsuo 10756 // CP1251������
737     BYTE c = RussConv(ts.KanjiCode, IdWindows, b);
738     // CP1251->Unicode
739     unsigned long u32 = MBCP_UTF32(c, 1251);
740 zmatsuo 10782 w->Op.PutU32(u32, w->ClientData);
741 zmatsuo 10756 return TRUE;
742 zmatsuo 10755 }
743    
744 zmatsuo 10782 static BOOL ParseEnglish(CharSetData *w, BYTE b)
745 zmatsuo 10755 {
746     unsigned short u16 = 0;
747     int part = KanjiCodeToISO8859Part(ts.KanjiCode);
748     int r = UnicodeFromISO8859(part, b, &u16);
749     if (r == 0) {
750     return FALSE;
751     }
752     if (u16 < 0x100) {
753 zmatsuo 10782 ParseASCII(w, (BYTE)u16);
754 zmatsuo 10755 }
755     else {
756 zmatsuo 10782 w->Op.PutU32(u16, w->ClientData);
757 zmatsuo 10755 }
758     return TRUE;
759     }
760    
761 zmatsuo 10782 static void PutDebugChar(CharSetData *w, BYTE b)
762 zmatsuo 10771 {
763     int i;
764     BOOL svInsertMode, svAutoWrapMode;
765     TCharAttr svCharAttr;
766     TCharAttr char_attr;
767    
768     svInsertMode = TermGetInsertMode();
769     TermSetInsertMode(FALSE);
770     svAutoWrapMode = TermGetAutoWrapMode();
771     TermSetAutoWrapMode(TRUE);
772    
773     TermGetAttr(&svCharAttr);
774     char_attr = svCharAttr;
775     char_attr.Attr = AttrDefault;
776     TermSetAttr(&char_attr);
777    
778 zmatsuo 10782 if (w->DebugFlag==DEBUG_FLAG_HEXD) {
779 zmatsuo 10771 char buff[3];
780     _snprintf(buff, 3, "%02X", (unsigned int) b);
781    
782     for (i=0; i<2; i++)
783 zmatsuo 10782 w->Op.PutU32(buff[i], w->ClientData);
784     w->Op.PutU32(' ', w->ClientData);
785 zmatsuo 10771 }
786 zmatsuo 10782 else if (w->DebugFlag==DEBUG_FLAG_NORM) {
787 zmatsuo 10771
788     if ((b & 0x80) == 0x80) {
789     //UpdateStr();
790     char_attr.Attr = AttrReverse;
791     TermSetAttr(&char_attr);
792     b = b & 0x7f;
793     }
794    
795     if (b<=US) {
796 zmatsuo 10782 w->Op.PutU32('^', w->ClientData);
797     w->Op.PutU32((char)(b + 0x40), w->ClientData);
798 zmatsuo 10771 }
799     else if (b==DEL) {
800 zmatsuo 10782 w->Op.PutU32('<', w->ClientData);
801     w->Op.PutU32('D', w->ClientData);
802     w->Op.PutU32('E', w->ClientData);
803     w->Op.PutU32('L', w->ClientData);
804     w->Op.PutU32('>', w->ClientData);
805 zmatsuo 10771 }
806     else
807 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
808 zmatsuo 10771 }
809    
810     TermSetAttr(&char_attr);
811     TermSetInsertMode(svInsertMode);
812     TermSetAutoWrapMode(svAutoWrapMode);
813     }
814    
815 zmatsuo 10782 void ParseFirst(CharSetData *w, BYTE b)
816 zmatsuo 10771 {
817     WORD language = ts.Language;
818 zmatsuo 10782 if (w->DebugFlag != DEBUG_FLAG_NONE) {
819 zmatsuo 10771 language = IdDebug;
820     }
821    
822     switch (language) {
823 zmatsuo 10782 default:
824     assert(FALSE);
825     language = IdUtf8;
826     // FALLTHROUGH
827 zmatsuo 10771 case IdUtf8:
828 zmatsuo 10782 ParseFirstUTF8(w, b);
829 zmatsuo 10755 return;
830    
831 zmatsuo 10771 case IdJapanese:
832 zmatsuo 10755 switch (ts.KanjiCode) {
833 zmatsuo 10771 case IdUTF8:
834 zmatsuo 10782 if (ParseFirstUTF8(w, b)) {
835 zmatsuo 10755 return;
836     }
837     break;
838 zmatsuo 10771 default:
839 zmatsuo 10782 if (ParseFirstJP(w, b)) {
840 zmatsuo 10755 return;
841     }
842     }
843     break;
844    
845 zmatsuo 10771 case IdKorean:
846 zmatsuo 10755 switch (ts.KanjiCode) {
847 zmatsuo 10771 case IdUTF8:
848 zmatsuo 10782 if (ParseFirstUTF8(w, b)) {
849 zmatsuo 10755 return;
850     }
851     break;
852 zmatsuo 10771 default:
853 zmatsuo 10782 if (ParseFirstKR(w, b)) {
854 zmatsuo 10755 return;
855     }
856     }
857     break;
858    
859 zmatsuo 10771 case IdRussian:
860 zmatsuo 10782 if (ParseFirstRus(w, b)) {
861 zmatsuo 10755 return;
862     }
863     break;
864    
865     case IdChinese:
866     switch (ts.KanjiCode) {
867     case IdUTF8:
868 zmatsuo 10782 if (ParseFirstUTF8(w, b)) {
869 zmatsuo 10755 return;
870     }
871     break;
872     default:
873 zmatsuo 10782 if (ParseFirstCn(w, b)) {
874 zmatsuo 10755 return;
875     }
876     }
877     break;
878     case IdEnglish: {
879 zmatsuo 10782 if (ParseEnglish(w, b)) {
880 zmatsuo 10755 return;
881     }
882     break;
883     }
884 zmatsuo 10771 case IdDebug: {
885 zmatsuo 10782 PutDebugChar(w, b);
886 zmatsuo 10771 return;
887 zmatsuo 10755 }
888 zmatsuo 10771 }
889 zmatsuo 10755
890 zmatsuo 10782 if (w->SSflag) {
891     w->Op.PutU32(b, w->ClientData);
892     w->SSflag = FALSE;
893 zmatsuo 10755 return;
894     }
895    
896     if (b<=US)
897 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
898 zmatsuo 10755 else if ((b>=0x20) && (b<=0x7E))
899 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
900 zmatsuo 10755 else if ((b>=0x80) && (b<=0x9F))
901 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
902 zmatsuo 10755 else if (b>=0xA0)
903 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
904 zmatsuo 10755 }
905    
906     /**
907     * �w��(Designate)
908     *
909     * @param Gn 0/1/2/3 = G0/G1/G2/G3
910     * @param codeset IdASCII 0
911     * IdKatakana 1
912     * IdKanji 2
913     * IdSpecial 3
914     */
915 zmatsuo 10782 void CharSet2022Designate(CharSetData *w, int gn, int cs)
916 zmatsuo 10755 {
917     w->Gn[gn] = cs;
918     }
919    
920     /**
921     * �����o��(Invoke)
922 zmatsuo 10776 * @param shift
923 zmatsuo 10755 */
924 zmatsuo 10782 void CharSet2022Invoke(CharSetData *w, CharSet2022Shift shift)
925 zmatsuo 10755 {
926 zmatsuo 10776 switch (shift) {
927     case CHARSET_LS0:
928     // Locking Shift 0 (G0->GL)
929     w->Glr[0] = 0;
930     break;
931     case CHARSET_LS1:
932     // Locking Shift 1 (G1->GL)
933     w->Glr[0] = 1;
934     break;
935     case CHARSET_LS2:
936     // Locking Shift 2 (G2->GL)
937     w->Glr[0] = 2;
938     break;
939     case CHARSET_LS3:
940     // Locking Shift 3 (G3->GL)
941     w->Glr[0] = 3;
942     break;
943     case CHARSET_LS1R:
944     // Locking Shift 1 (G1->GR)
945     w->Glr[1] = 1;
946     break;
947     case CHARSET_LS2R:
948     // Locking Shift 2 (G2->GR)
949     w->Glr[1] = 2;
950     break;
951     case CHARSET_LS3R:
952     // Locking Shift 3 (G3->GR)
953     w->Glr[1] = 3;
954     break;
955     case CHARSET_SS2:
956     // Single Shift 2
957 zmatsuo 10782 w->GLtmp = 2;
958     w->SSflag = TRUE;
959 zmatsuo 10776 break;
960     case CHARSET_SS3:
961     // Single Shift 3
962 zmatsuo 10782 w->GLtmp = 3;
963     w->SSflag = TRUE;
964 zmatsuo 10776 break;
965     default:
966     assert(FALSE);
967     break;
968 zmatsuo 10755 }
969     }
970    
971     /**
972     * DEC�����t�H���g(Tera Special font)
973     * 0140(0x60) ... 0176(0x7f) ���r�����A�T�C������������
974 zmatsuo 10760 * (0xe0) ... (0xff) ��?
975 zmatsuo 10755 * <ESC>(0 �������������G�X�P�[�v�V�[�P���X�����`
976     * about/emulations.html
977     *
978     * @param b �R�[�h
979 zmatsuo 10760 * @retval TRUE IdSpecial
980     * @retval FALSE IdSpecial��������
981 zmatsuo 10755 */
982 zmatsuo 10782 BOOL CharSetIsSpecial(CharSetData *w, BYTE b)
983 zmatsuo 10755 {
984     BOOL SpecialNew = FALSE;
985    
986     if ((b>0x5F) && (b<0x80)) {
987 zmatsuo 10782 if (w->SSflag)
988     SpecialNew = (w->Gn[w->GLtmp]==IdSpecial);
989 zmatsuo 10755 else
990     SpecialNew = (w->Gn[w->Glr[0]]==IdSpecial);
991     }
992     else if (b>0xDF) {
993 zmatsuo 10782 if (w->SSflag)
994     SpecialNew = (w->Gn[w->GLtmp]==IdSpecial);
995 zmatsuo 10755 else
996     SpecialNew = (w->Gn[w->Glr[1]]==IdSpecial);
997     }
998    
999     return SpecialNew;
1000     }
1001    
1002 zmatsuo 10782 static void CharSetSaveStateLow(CharSetState *state, const CharSetData *w)
1003 zmatsuo 10755 {
1004     int i;
1005     state->infos[0] = w->Glr[0];
1006     state->infos[1] = w->Glr[1];
1007     for (i=0 ; i<=3; i++) {
1008     state->infos[2 + i] = w->Gn[i];
1009     }
1010     }
1011    
1012     /**
1013     * ��������������
1014     */
1015 zmatsuo 10782 void CharSetSaveState(CharSetData *w, CharSetState *state)
1016 zmatsuo 10755 {
1017     CharSetSaveStateLow(state, w);
1018     }
1019    
1020     /**
1021     * ���������A����
1022     */
1023 zmatsuo 10782 void CharSetLoadState(CharSetData *w, const CharSetState *state)
1024 zmatsuo 10755 {
1025     int i;
1026     w->Glr[0] = state->infos[0];
1027     w->Glr[1] = state->infos[1];
1028     for (i=0 ; i<=3; i++) {
1029     w->Gn[i] = state->infos[2 + i];
1030     }
1031     }
1032 zmatsuo 10763
1033     /**
1034     * �t�H�[���o�b�N���I��
1035     * ���M�f�[�^UTF-8�����AShift_JIS�o����(fallback����)�����f����
1036     *
1037     */
1038 zmatsuo 10782 void CharSetFallbackFinish(CharSetData *w)
1039 zmatsuo 10763 {
1040 zmatsuo 10782 w->Fallbacked = FALSE;
1041 zmatsuo 10763 }
1042 zmatsuo 10773
1043     /**
1044     * �f�o�O�o�����������[�h�����X����
1045     */
1046 zmatsuo 10782 void CharSetSetNextDebugMode(CharSetData *w)
1047 zmatsuo 10773 {
1048     // ts.DebugModes ���� tttypes.h �� DBGF_* �� OR ����������
1049     do {
1050 zmatsuo 10782 w->DebugFlag = (w->DebugFlag + 1) % DEBUG_FLAG_MAXD;
1051     } while (w->DebugFlag != DEBUG_FLAG_NONE && !((ts.DebugModes >> (w->DebugFlag - 1)) & 1));
1052 zmatsuo 10773 }
1053    
1054 zmatsuo 10782 BYTE CharSetGetDebugMode(CharSetData *w)
1055 zmatsuo 10773 {
1056 zmatsuo 10782 return w->DebugFlag;
1057 zmatsuo 10773 }
1058    
1059 zmatsuo 10782 void CharSetSetDebugMode(CharSetData *w, BYTE mode)
1060 zmatsuo 10773 {
1061 zmatsuo 10782 w->DebugFlag = mode % DEBUG_FLAG_MAXD;
1062 zmatsuo 10773 }

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26