Develop and Download Open Source Software

Browse Subversion Repository

Annotation of /trunk/teraterm/teraterm/charset.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 10794 - (hide annotations) (download) (as text)
Sun Jul 2 16:01:11 2023 UTC (9 months, 1 week ago) by zmatsuo
File MIME type: text/x-c++src
File size: 23863 byte(s)
TERATERM.INI の UTF8Fallback キーの設定を読むよう修正

- TERATERM.INI の UTF8Fallback を追加
  - 従来の実験実装 FallbackToCP932 を拡張した設定
- FallbackToCP932 があった場合、FallbackToCP932の設定が使用される
  - ただし、UTF8Fallback もあった場合は、UTF8Fallback が優先される
- デフォルト off (従来と同じ)
- UTF8Fallback=onのとき
  - 不正な UTF-8 受信時、Shift_JIS または ISO8859-1 としてデコードする
  - 従来は Shift_JIS(CP932) としてデコードしていた
- read only で、書き込み時に保存されない
  - 従来の FallbackToCP932 と同じ
- ドキュメント追加
  - ただし、英語版は日本語のコピー

ticket #48226
1 zmatsuo 10755 /*
2     * (C) 2023- TeraTerm Project
3     * All rights reserved.
4     *
5     * Redistribution and use in source and binary forms, with or without
6     * modification, are permitted provided that the following conditions
7     * are met:
8     *
9     * 1. Redistributions of source code must retain the above copyright
10     * notice, this list of conditions and the following disclaimer.
11     * 2. Redistributions in binary form must reproduce the above copyright
12     * notice, this list of conditions and the following disclaimer in the
13     * documentation and/or other materials provided with the distribution.
14     * 3. The name of the author may not be used to endorse or promote products
15     * derived from this software without specific prior written permission.
16     *
17     * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
18     * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19     * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20     * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22     * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23     * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24     * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25     * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26     * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27     */
28    
29     #include "teraterm.h"
30     #include "tttypes.h"
31     #include <stdio.h>
32     #include <string.h>
33     #if !defined(_CRTDBG_MAP_ALLOC)
34     #define _CRTDBG_MAP_ALLOC
35     #endif
36     #include <stdlib.h>
37     #include <crtdbg.h>
38     #include <assert.h>
39    
40     #include "ttwinman.h"
41     #include "codeconv.h"
42     #include "unicode.h"
43     #include "language.h" // for JIS2SJIS()
44 zmatsuo 10763 #include "ttcstd.h"
45 zmatsuo 10773 #include "vtterm.h"
46 zmatsuo 10755
47     #include "charset.h"
48    
49 zmatsuo 10763 // UTF-8���s�����l�����������\����������
50     #define REPLACEMENT_CHARACTER '?'
51     //#define REPLACEMENT_CHARACTER 0x2592
52     //#define REPLACEMENT_CHARACTER 0x20
53     //#define REPLACEMENT_CHARACTER 0xfffd
54    
55 zmatsuo 10782 typedef struct CharSetDataTag {
56 zmatsuo 10755 /* GL, GR code group */
57     int Glr[2];
58     /* G0, G1, G2, G3 code group */
59     int Gn[4];
60 zmatsuo 10782 /* GL for single shift 2/3 */
61     int GLtmp;
62     /* single shift 2/3 flag */
63     BOOL SSflag;
64 zmatsuo 10763 //
65     char32_t replacement_char;
66 zmatsuo 10767 // UTF-8 work
67     BYTE buf[4];
68     int count;
69 zmatsuo 10782 BOOL Fallbacked;
70 zmatsuo 10755
71 zmatsuo 10782 // MBCS
72     BOOL KanjiIn; // TRUE = MBCS��1byte�������M��������
73     WORD Kanji;
74 zmatsuo 10755
75 zmatsuo 10782 // EUC
76     BOOL EUCkanaIn;
77     BOOL EUCsupIn;
78     int EUCcount;
79    
80     /* JIS -> SJIS conversion flag */
81     BOOL ConvJIS;
82     BYTE DebugFlag;
83    
84     // Operations
85     CharSetOp Op;
86     void *ClientData;
87     } CharSetData;
88    
89 zmatsuo 10770 static BOOL IsC0(char32_t b)
90     {
91     return (b <= US);
92     }
93    
94     static BOOL IsC1(char32_t b)
95     {
96     return ((b>=0x80) && (b<=0x9F));
97     }
98    
99     /**
100 zmatsuo 10755 * ISO2022�p���[�N������������
101     */
102 zmatsuo 10782 static void CharSetInit2(CharSetData *w)
103 zmatsuo 10755 {
104     if (ts.Language==IdJapanese) {
105     w->Gn[0] = IdASCII;
106     w->Gn[1] = IdKatakana;
107     w->Gn[2] = IdKatakana;
108     w->Gn[3] = IdKanji;
109     w->Glr[0] = 0;
110     if ((ts.KanjiCode==IdJIS) && (ts.JIS7Katakana==0))
111     w->Glr[1] = 2; // 8-bit katakana
112     else
113     w->Glr[1] = 3;
114     }
115     else {
116     w->Gn[0] = IdASCII;
117     w->Gn[1] = IdSpecial;
118     w->Gn[2] = IdASCII;
119     w->Gn[3] = IdASCII;
120     w->Glr[0] = 0;
121     w->Glr[1] = 0;
122     }
123     }
124    
125     /**
126     * �������A���[�N������������
127     */
128 zmatsuo 10782 CharSetData *CharSetInit(const CharSetOp *op, void *client_data)
129 zmatsuo 10755 {
130 zmatsuo 10782 CharSetData *w = (CharSetData *)calloc(sizeof(*w), 1);
131     if (w == NULL) {
132     return NULL;
133     }
134 zmatsuo 10763
135 zmatsuo 10782 w->Op = *op;
136     w->ClientData = client_data;
137    
138 zmatsuo 10763 CharSetInit2(w);
139 zmatsuo 10782 w->GLtmp = 0;
140     w->SSflag = FALSE;
141 zmatsuo 10763
142 zmatsuo 10782 w->DebugFlag = DEBUG_FLAG_NONE;
143    
144 zmatsuo 10763 w->replacement_char = REPLACEMENT_CHARACTER;
145 zmatsuo 10782 w->SSflag = FALSE;
146 zmatsuo 10755
147 zmatsuo 10782 w->KanjiIn = FALSE;
148     w->EUCkanaIn = FALSE;
149     w->EUCsupIn = FALSE;
150     w->ConvJIS = FALSE;
151     w->Fallbacked = FALSE;
152    
153     return w;
154 zmatsuo 10755 }
155    
156 zmatsuo 10782 void CharSetFinish(CharSetData *w)
157     {
158     assert(w != NULL);
159     free(w);
160     }
161    
162 zmatsuo 10755 /**
163     * 1byte���`�F�b�N
164     */
165     static BOOL CheckFirstByte(BYTE b, int lang, int kanji_code)
166     {
167     switch (lang) {
168     case IdKorean:
169 zmatsuo 10779 return __ismbblead(b, 949);
170 zmatsuo 10755 case IdChinese:
171     if (kanji_code == IdCnGB2312) {
172     return __ismbblead(b, 936);
173     }
174     else if (ts.KanjiCode == IdCnBig5) {
175     return __ismbblead(b, 950);
176     }
177     break;
178     default:
179     assert(FALSE);
180     break;
181     }
182     assert(FALSE);
183     return FALSE;
184     }
185 zmatsuo 10763
186 zmatsuo 10755 /**
187 zmatsuo 10763 * Double-byte Character Sets
188     * SJIS��1byte��?
189     *
190     * ��1�o�C�g0x81...0x9F or 0xE0...0xEF
191     * ��1�o�C�g0x81...0x9F or 0xE0...0xFC
192     */
193     static BOOL ismbbleadSJIS(BYTE b)
194     {
195     if (((0x80<b) && (b<0xa0)) || ((0xdf<b) && (b<0xfd))) {
196     return TRUE;
197     }
198     return FALSE;
199     }
200    
201     /**
202 zmatsuo 10755 * ts.Language == IdJapanese ��
203     * 1byte���`�F�b�N
204     */
205 zmatsuo 10782 static BOOL CheckKanji(CharSetData *w, BYTE b)
206 zmatsuo 10755 {
207     BOOL Check;
208    
209     if (ts.Language!=IdJapanese)
210     return FALSE;
211    
212 zmatsuo 10782 w->ConvJIS = FALSE;
213 zmatsuo 10755
214     if (ts.KanjiCode==IdSJIS ||
215 zmatsuo 10794 (ts.UTF8Fallback && ts.KanjiCode==IdUTF8)) {
216 zmatsuo 10759 if (((0x80<b) && (b<0xa0)) || ((0xdf<b) && (b<0xfd))) {
217 zmatsuo 10782 w->Fallbacked = TRUE;
218 zmatsuo 10755 return TRUE; // SJIS kanji
219     }
220     if ((0xa1<=b) && (b<=0xdf)) {
221     return FALSE; // SJIS katakana
222     }
223     }
224    
225     if ((b>=0x21) && (b<=0x7e)) {
226     Check = (w->Gn[w->Glr[0]] == IdKanji);
227 zmatsuo 10782 w->ConvJIS = Check;
228 zmatsuo 10755 }
229     else if ((b>=0xA1) && (b<=0xFE)) {
230     Check = (w->Gn[w->Glr[1]] == IdKanji);
231     if (ts.KanjiCode==IdEUC) {
232     Check = TRUE;
233     }
234     else if (ts.KanjiCode==IdJIS && ((ts.TermFlag & TF_FIXEDJIS)!=0) && (ts.JIS7Katakana==0)) {
235     Check = FALSE; // 8-bit katakana
236     }
237 zmatsuo 10782 w->ConvJIS = Check;
238 zmatsuo 10755 }
239     else {
240     Check = FALSE;
241     }
242    
243     return Check;
244     }
245    
246 zmatsuo 10782 static BOOL ParseFirstJP(CharSetData *w, BYTE b)
247 zmatsuo 10755 // returns TRUE if b is processed
248     // (actually allways returns TRUE)
249     {
250 zmatsuo 10782 if (w->KanjiIn) {
251     if (((! w->ConvJIS) && (0x3F<b) && (b<0xFD)) ||
252     (w->ConvJIS && ( ((0x20<b) && (b<0x7f)) ||
253 zmatsuo 10759 ((0xa0<b) && (b<0xff)) )) )
254 zmatsuo 10755 {
255 zmatsuo 10758 unsigned long u32;
256 zmatsuo 10782 w->Kanji = w->Kanji + b;
257     if (w->ConvJIS) {
258 zmatsuo 10758 // JIS -> Shift_JIS(CP932)
259 zmatsuo 10782 w->Kanji = JIS2SJIS((WORD)(w->Kanji & 0x7f7f));
260 zmatsuo 10758 }
261 zmatsuo 10782 u32 = CP932ToUTF32(w->Kanji);
262     w->Op.PutU32(u32, w->ClientData);
263     w->KanjiIn = FALSE;
264 zmatsuo 10755 return TRUE;
265     }
266     else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
267 zmatsuo 10782 w->KanjiIn = FALSE;
268 zmatsuo 10755 }
269     }
270    
271 zmatsuo 10782 if (w->SSflag) {
272     if (w->Gn[w->GLtmp] == IdKanji) {
273     w->Kanji = b << 8;
274     w->KanjiIn = TRUE;
275     w->SSflag = FALSE;
276 zmatsuo 10755 return TRUE;
277     }
278 zmatsuo 10782 else if (w->Gn[w->GLtmp] == IdKatakana) {
279 zmatsuo 10755 b = b | 0x80;
280     }
281    
282 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
283     w->SSflag = FALSE;
284 zmatsuo 10755 return TRUE;
285     }
286    
287 zmatsuo 10782 if ((!w->EUCsupIn) && (!w->EUCkanaIn) && (!w->KanjiIn) && CheckKanji(w, b)) {
288     w->Kanji = b << 8;
289     w->KanjiIn = TRUE;
290 zmatsuo 10755 return TRUE;
291     }
292    
293     if (b<=US) {
294 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
295 zmatsuo 10755 }
296     else if (b==0x20) {
297 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
298 zmatsuo 10755 }
299     else if ((b>=0x21) && (b<=0x7E)) {
300 zmatsuo 10782 if (w->EUCsupIn) {
301     w->EUCcount--;
302     w->EUCsupIn = (w->EUCcount==0);
303 zmatsuo 10755 return TRUE;
304     }
305    
306 zmatsuo 10782 if ((w->Gn[w->Glr[0]] == IdKatakana) || w->EUCkanaIn) {
307 zmatsuo 10755 b = b | 0x80;
308 zmatsuo 10782 w->EUCkanaIn = FALSE;
309 zmatsuo 10755 {
310     // b��sjis�����p�J�^�J�i
311     unsigned long u32 = CP932ToUTF32(b);
312 zmatsuo 10782 w->Op.PutU32(u32, w->ClientData);
313 zmatsuo 10755 }
314     return TRUE;
315     }
316 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
317 zmatsuo 10755 }
318     else if (b==0x7f) {
319     return TRUE;
320     }
321     else if ((b>=0x80) && (b<=0x8D)) {
322 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
323 zmatsuo 10755 }
324     else if (b==0x8E) { // SS2
325     switch (ts.KanjiCode) {
326     case IdEUC:
327     if (ts.ISO2022Flag & ISO2022_SS2) {
328 zmatsuo 10782 w->EUCkanaIn = TRUE;
329 zmatsuo 10755 }
330     break;
331     case IdUTF8:
332 zmatsuo 10782 w->Op.PutU32(REPLACEMENT_CHARACTER, w->ClientData);
333 zmatsuo 10755 break;
334     default:
335 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
336 zmatsuo 10755 }
337     }
338     else if (b==0x8F) { // SS3
339     switch (ts.KanjiCode) {
340     case IdEUC:
341     if (ts.ISO2022Flag & ISO2022_SS3) {
342 zmatsuo 10782 w->EUCcount = 2;
343     w->EUCsupIn = TRUE;
344 zmatsuo 10755 }
345     break;
346     case IdUTF8:
347 zmatsuo 10782 w->Op.PutU32(REPLACEMENT_CHARACTER, w->ClientData);
348 zmatsuo 10755 break;
349     default:
350 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
351 zmatsuo 10755 }
352     }
353     else if ((b>=0x90) && (b<=0x9F)) {
354 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
355 zmatsuo 10755 }
356     else if (b==0xA0) {
357 zmatsuo 10782 w->Op.PutU32(0x20, w->ClientData);
358 zmatsuo 10755 }
359     else if ((b>=0xA1) && (b<=0xFE)) {
360 zmatsuo 10782 if (w->EUCsupIn) {
361     w->EUCcount--;
362     w->EUCsupIn = (w->EUCcount==0);
363 zmatsuo 10755 return TRUE;
364     }
365    
366     if ((w->Gn[w->Glr[1]] != IdASCII) ||
367 zmatsuo 10782 ((ts.KanjiCode==IdEUC) && w->EUCkanaIn) ||
368 zmatsuo 10755 (ts.KanjiCode==IdSJIS) ||
369 zmatsuo 10759 ((ts.KanjiCode==IdJIS) &&
370     (ts.JIS7Katakana==0) &&
371     ((ts.TermFlag & TF_FIXEDJIS)!=0))) {
372 zmatsuo 10755 // b��sjis�����p�J�^�J�i
373     unsigned long u32 = CP932ToUTF32(b);
374 zmatsuo 10782 w->Op.PutU32(u32, w->ClientData);
375 zmatsuo 10755 } else {
376     if (w->Gn[w->Glr[1]] == IdASCII) {
377     b = b & 0x7f;
378     }
379 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
380 zmatsuo 10755 }
381 zmatsuo 10782 w->EUCkanaIn = FALSE;
382 zmatsuo 10755 }
383     else {
384 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
385 zmatsuo 10755 }
386    
387     return TRUE;
388     }
389    
390 zmatsuo 10782 static BOOL ParseFirstKR(CharSetData *w, BYTE b)
391 zmatsuo 10755 // returns TRUE if b is processed
392     // (actually allways returns TRUE)
393     {
394 zmatsuo 10782 if (w->KanjiIn) {
395 zmatsuo 10759 if (((0x41<=b) && (b<=0x5A)) ||
396     ((0x61<=b) && (b<=0x7A)) ||
397     ((0x81<=b) && (b<=0xFE)))
398 zmatsuo 10755 {
399 zmatsuo 10758 unsigned long u32 = 0;
400 zmatsuo 10768 if (ts.KanjiCode == IdKoreanCP949) {
401 zmatsuo 10779 // CP949
402 zmatsuo 10782 w->Kanji = w->Kanji + b;
403     u32 = MBCP_UTF32(w->Kanji, 949);
404 zmatsuo 10758 }
405     else {
406     assert(FALSE);
407     }
408 zmatsuo 10782 w->Op.PutU32(u32, w->ClientData);
409     w->KanjiIn = FALSE;
410 zmatsuo 10755 return TRUE;
411     }
412     else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
413 zmatsuo 10782 w->KanjiIn = FALSE;
414 zmatsuo 10755 }
415     }
416    
417 zmatsuo 10782 if ((!w->KanjiIn) && CheckFirstByte(b, ts.Language, ts.KanjiCode)) {
418     w->Kanji = b << 8;
419     w->KanjiIn = TRUE;
420 zmatsuo 10755 return TRUE;
421     }
422    
423     if (b<=US) {
424 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
425 zmatsuo 10755 }
426     else if (b==0x20) {
427 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
428 zmatsuo 10755 }
429     else if ((b>=0x21) && (b<=0x7E)) {
430     // if (Gn[Glr[0]] == IdKatakana) {
431     // b = b | 0x80;
432     // }
433 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
434 zmatsuo 10755 }
435     else if (b==0x7f) {
436     return TRUE;
437     }
438     else if ((0x80<=b) && (b<=0x9F)) {
439 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
440 zmatsuo 10755 }
441     else if (b==0xA0) {
442 zmatsuo 10782 w->Op.PutU32(0x20, w->ClientData);
443 zmatsuo 10755 }
444     else if ((b>=0xA1) && (b<=0xFE)) {
445     if (w->Gn[w->Glr[1]] == IdASCII) {
446     b = b & 0x7f;
447     }
448 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
449 zmatsuo 10755 }
450     else {
451 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
452 zmatsuo 10755 }
453    
454     return TRUE;
455     }
456    
457 zmatsuo 10782 static BOOL ParseFirstCn(CharSetData *w, BYTE b)
458 zmatsuo 10755 // returns TRUE if b is processed
459     // (actually allways returns TRUE)
460     {
461 zmatsuo 10782 if (w->KanjiIn) {
462 zmatsuo 10755 // TODO
463 zmatsuo 10759 if (((0x40<=b) && (b<=0x7e)) ||
464     ((0xa1<=b) && (b<=0xFE)))
465 zmatsuo 10755 {
466 zmatsuo 10758 unsigned long u32 = 0;
467 zmatsuo 10782 w->Kanji = w->Kanji + b;
468 zmatsuo 10758 if (ts.KanjiCode == IdCnGB2312) {
469     // CP936 GB2312
470 zmatsuo 10782 u32 = MBCP_UTF32(w->Kanji, 936);
471 zmatsuo 10758 }
472     else if (ts.KanjiCode == IdCnBig5) {
473     // CP950 Big5
474 zmatsuo 10782 u32 = MBCP_UTF32(w->Kanji, 950);
475 zmatsuo 10758 }
476     else {
477     assert(FALSE);
478     }
479 zmatsuo 10782 w->Op.PutU32(u32, w->ClientData);
480     w->KanjiIn = FALSE;
481 zmatsuo 10755 return TRUE;
482     }
483     else if ((ts.TermFlag & TF_CTRLINKANJI)==0) {
484 zmatsuo 10782 w->KanjiIn = FALSE;
485 zmatsuo 10755 }
486     }
487    
488 zmatsuo 10782 if ((!w->KanjiIn) && CheckFirstByte(b, ts.Language, ts.KanjiCode)) {
489     w->Kanji = b << 8;
490     w->KanjiIn = TRUE;
491 zmatsuo 10755 return TRUE;
492     }
493    
494     if (b<=US) {
495 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
496 zmatsuo 10755 }
497     else if (b==0x20) {
498 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
499 zmatsuo 10755 }
500     else if ((b>=0x21) && (b<=0x7E)) {
501     // if (Gn[Glr[0]] == IdKatakana) {
502     // b = b | 0x80;
503     // }
504 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
505 zmatsuo 10755 }
506     else if (b==0x7f) {
507     return TRUE;
508     }
509     else if ((0x80<=b) && (b<=0x9F)) {
510 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
511 zmatsuo 10755 }
512     else if (b==0xA0) {
513 zmatsuo 10782 w->Op.PutU32(0x20, w->ClientData);
514 zmatsuo 10755 }
515     else if ((b>=0xA1) && (b<=0xFE)) {
516     if (w->Gn[w->Glr[1]] == IdASCII) {
517     b = b & 0x7f;
518     }
519 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
520 zmatsuo 10755 }
521     else {
522 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
523 zmatsuo 10755 }
524    
525     return TRUE;
526     }
527    
528 zmatsuo 10782 static void ParseASCII(CharSetData *w, BYTE b)
529 zmatsuo 10755 {
530 zmatsuo 10782 if (w->SSflag) {
531     w->Op.PutU32(b, w->ClientData);
532     w->SSflag = FALSE;
533 zmatsuo 10755 return;
534     }
535    
536     if (b<=US) {
537 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
538 zmatsuo 10755 } else if ((b>=0x20) && (b<=0x7E)) {
539 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
540 zmatsuo 10755 } else if ((b==0x8E) || (b==0x8F)) {
541 zmatsuo 10782 w->Op.PutU32(REPLACEMENT_CHARACTER, w->ClientData);
542 zmatsuo 10755 } else if ((b>=0x80) && (b<=0x9F)) {
543 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
544 zmatsuo 10755 } else if (b>=0xA0) {
545 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
546 zmatsuo 10755 }
547     }
548    
549 zmatsuo 10770 /**
550     * REPLACEMENT_CHARACTER ���\��
551     * UTF-8 �f�R�[�h�����g�p
552     */
553 zmatsuo 10782 static void PutReplacementChr(CharSetData *w, const BYTE *ptr, size_t len, BOOL fallback)
554 zmatsuo 10763 {
555     const char32_t replacement_char = w->replacement_char;
556     int i;
557     for (i = 0; i < len; i++) {
558     BYTE c = *ptr++;
559 zmatsuo 10789 assert(!IsC0(c));
560 zmatsuo 10764 if (fallback) {
561     // fallback ISO8859-1
562 zmatsuo 10782 w->Op.PutU32(c, w->ClientData);
563 zmatsuo 10763 }
564     else {
565 zmatsuo 10764 // fallback������
566     if (c < 0x80) {
567     // �s����UTF-8��������������0x80�������������A
568     // 1������UTF-8�������������������\������
569 zmatsuo 10782 w->Op.PutU32(c, w->ClientData);
570 zmatsuo 10764 }
571     else {
572 zmatsuo 10782 w->Op.PutU32(replacement_char, w->ClientData);
573 zmatsuo 10764 }
574 zmatsuo 10763 }
575     }
576     }
577    
578 zmatsuo 10770 /**
579     * UTF-8�����M�f�[�^����������
580     *
581     * returns TRUE if b is processed
582     */
583 zmatsuo 10782 static BOOL ParseFirstUTF8(CharSetData *w, BYTE b)
584 zmatsuo 10755 {
585 zmatsuo 10766 char32_t code;
586 zmatsuo 10755
587 zmatsuo 10782 if (w->Fallbacked) {
588     BOOL r = ParseFirstJP(w, b);
589     w->Fallbacked = FALSE;
590 zmatsuo 10763 return r;
591 zmatsuo 10755 }
592    
593     // UTF-8�G���R�[�h
594 zmatsuo 10766 // The Unicode Standard Chapter 3
595     // Table 3-7. Well-Formed UTF-8 Byte Sequences
596 zmatsuo 10777 // | Code Points | 1st Byte | 2nd Byte | 3rd Byte | 4th Byte |
597     // | U+0000..U+007F | 00..7F | | | |
598     // | U+0080..U+07FF | C2..DF | 80..BF | | |
599     // | U+0800..U+0FFF | E0 | A0..BF | 80..BF | |
600     // | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | |
601     // | U+D000..U+D7FF | ED | 80..9F | 80..BF | |
602     // | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | |
603     // | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF |
604     // | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF |
605     // | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF |
606 zmatsuo 10755 // - 1byte��
607 zmatsuo 10766 // - 0x00 - 0x7f ok
608     // - 0x80 - 0xc1 ng
609     // - 0xc2 - 0xf4 ok
610     // - 0xf5 - 0xff ng
611 zmatsuo 10755 // - 2byte�����~
612 zmatsuo 10766 // - 0x00 - 0x7f ng
613     // - 0x80 - 0xbf ok
614     // - 0xc0 - 0xff ng
615     // - 2byte�����O
616     // - 1byte == 0xe0 ������ 0xa0 - 0xbf����ok
617     // - 1byte == 0xed ������ 0x80 - 0x9f����ok
618     // - 1byte == 0xf0 ������ 0x90 - 0xbf����ok
619     // - 1byte == 0xf4 ������ 0x90 - 0x8f����ok
620 zmatsuo 10763 recheck:
621 zmatsuo 10755 // 1byte(7bit)
622 zmatsuo 10767 if (w->count == 0) {
623 zmatsuo 10770 if (IsC0(b)) {
624     // U+0000 .. U+001f
625     // C0��������, C0 Coontrols
626 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
627 zmatsuo 10755 return TRUE;
628     }
629 zmatsuo 10770 else if (b <= 0x7f) {
630     // 0x7f����, �������A���������o��
631 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
632 zmatsuo 10770 return TRUE;
633     }
634     else if (0xc2 <= b && b <= 0xf4) {
635 zmatsuo 10766 // 1byte������
636 zmatsuo 10767 w->buf[w->count++] = b;
637 zmatsuo 10755 return TRUE;
638     }
639    
640 zmatsuo 10770 // 0x80 - 0xc1, 0xf5 - 0xff
641 zmatsuo 10766 // UTF-8��1byte���o���������R�[�h������
642 zmatsuo 10794 if (ts.UTF8Fallback) {
643 zmatsuo 10766 // fallback��������
644     if ((ts.Language == IdJapanese) && ismbbleadSJIS(b)) {
645     // ���{�������� && Shift_JIS 1byte��
646     // Shift_JIS �� fallback
647 zmatsuo 10782 w->Fallbacked = TRUE;
648     w->ConvJIS = FALSE;
649     w->Kanji = b << 8;
650     w->KanjiIn = TRUE;
651 zmatsuo 10766 return TRUE;
652 zmatsuo 10755 }
653 zmatsuo 10766 // fallback ISO8859-1
654 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
655 zmatsuo 10766 return TRUE;
656 zmatsuo 10755 }
657     else {
658 zmatsuo 10766 // fallback������, �s������������
659 zmatsuo 10767 w->buf[0] = b;
660     PutReplacementChr(w, w->buf, 1, FALSE);
661 zmatsuo 10755 }
662 zmatsuo 10766 return TRUE;
663 zmatsuo 10755 }
664    
665 zmatsuo 10764 // 2byte���~����?
666 zmatsuo 10766 if((b & 0xc0) != 0x80) { // == (b <= 0x7f || 0xc0 <= b)
667     // �s��������, (����2bit�� 0b10xx_xxxx ��������)
668 zmatsuo 10794 PutReplacementChr(w, w->buf, w->count, ts.UTF8Fallback);
669 zmatsuo 10767 w->count = 0;
670 zmatsuo 10764 goto recheck;
671     }
672    
673 zmatsuo 10755 // 2byte�����~����
674 zmatsuo 10767 w->buf[w->count++] = b;
675 zmatsuo 10755
676 zmatsuo 10766 // 2byte(11bit)
677 zmatsuo 10767 if (w->count == 2) {
678     if ((w->buf[0] & 0xe0) == 0xc0) { // == (0xc2 <= w->buf[0] && w->buf[0] <= 0xdf)
679 zmatsuo 10766 // 5bit + 6bit
680 zmatsuo 10767 code = ((w->buf[0] & 0x1f) << 6) | (b & 0x3f);
681 zmatsuo 10770 if (IsC1(code)) {
682     // U+0080 .. u+009f
683     // C1��������, C1 Controls
684 zmatsuo 10782 w->Op.ParseControl((BYTE)code, w->ClientData);
685 zmatsuo 10770 }
686     else {
687 zmatsuo 10782 w->Op.PutU32(code, w->ClientData);
688 zmatsuo 10770 }
689 zmatsuo 10767 w->count = 0;
690 zmatsuo 10755 return TRUE;
691     }
692 zmatsuo 10766 return TRUE;
693     }
694    
695     // 3byte(16bit)
696 zmatsuo 10767 if (w->count == 3) {
697     if ((w->buf[0] & 0xf0) == 0xe0) {
698     if ((w->buf[0] == 0xe0 && (w->buf[1] < 0xa0 || 0xbf < w->buf[1])) ||
699     (w->buf[0] == 0xed && ( 0x9f < w->buf[1]))) {
700 zmatsuo 10766 // �s���� UTF-8
701 zmatsuo 10794 PutReplacementChr(w, w->buf, 2, ts.UTF8Fallback);
702 zmatsuo 10767 w->count = 0;
703 zmatsuo 10766 goto recheck;
704     }
705 zmatsuo 10755 // 4bit + 6bit + 6bit
706 zmatsuo 10767 code = ((w->buf[0] & 0xf) << 12);
707     code |= ((w->buf[1] & 0x3f) << 6);
708     code |= ((w->buf[2] & 0x3f));
709 zmatsuo 10782 w->Op.PutU32(code, w->ClientData);
710 zmatsuo 10767 w->count = 0;
711 zmatsuo 10755 return TRUE;
712     }
713 zmatsuo 10766 return TRUE;
714 zmatsuo 10755 }
715    
716     // 4byte(21bit)
717 zmatsuo 10767 assert(w->count == 4);
718     assert((w->buf[0] & 0xf8) == 0xf0);
719     if ((w->buf[0] == 0xf0 && (w->buf[1] < 0x90 || 0x9f < w->buf[1])) ||
720     (w->buf[0] == 0xf4 && (w->buf[1] < 0x80 || 0x8f < w->buf[1]))) {
721 zmatsuo 10766 // �s���� UTF-8
722 zmatsuo 10794 PutReplacementChr(w, w->buf, 3, ts.UTF8Fallback);
723 zmatsuo 10767 w->count = 0;
724 zmatsuo 10766 goto recheck;
725 zmatsuo 10755 }
726 zmatsuo 10766 // 3bit + 6bit + 6bit + 6bit
727 zmatsuo 10767 code = ((w->buf[0] & 0x07) << 18);
728     code |= ((w->buf[1] & 0x3f) << 12);
729     code |= ((w->buf[2] & 0x3f) << 6);
730     code |= (w->buf[3] & 0x3f);
731 zmatsuo 10782 w->Op.PutU32(code, w->ClientData);
732 zmatsuo 10767 w->count = 0;
733 zmatsuo 10755 return TRUE;
734     }
735    
736 zmatsuo 10782 static BOOL ParseFirstRus(CharSetData *w, BYTE b)
737 zmatsuo 10755 // returns if b is processed
738     {
739 zmatsuo 10770 if (IsC0(b)) {
740 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
741 zmatsuo 10770 return TRUE;
742     }
743 zmatsuo 10756 // CP1251������
744     BYTE c = RussConv(ts.KanjiCode, IdWindows, b);
745     // CP1251->Unicode
746     unsigned long u32 = MBCP_UTF32(c, 1251);
747 zmatsuo 10782 w->Op.PutU32(u32, w->ClientData);
748 zmatsuo 10756 return TRUE;
749 zmatsuo 10755 }
750    
751 zmatsuo 10782 static BOOL ParseEnglish(CharSetData *w, BYTE b)
752 zmatsuo 10755 {
753     unsigned short u16 = 0;
754     int part = KanjiCodeToISO8859Part(ts.KanjiCode);
755     int r = UnicodeFromISO8859(part, b, &u16);
756     if (r == 0) {
757     return FALSE;
758     }
759     if (u16 < 0x100) {
760 zmatsuo 10782 ParseASCII(w, (BYTE)u16);
761 zmatsuo 10755 }
762     else {
763 zmatsuo 10782 w->Op.PutU32(u16, w->ClientData);
764 zmatsuo 10755 }
765     return TRUE;
766     }
767    
768 zmatsuo 10782 static void PutDebugChar(CharSetData *w, BYTE b)
769 zmatsuo 10771 {
770     int i;
771     BOOL svInsertMode, svAutoWrapMode;
772     TCharAttr svCharAttr;
773     TCharAttr char_attr;
774    
775     svInsertMode = TermGetInsertMode();
776     TermSetInsertMode(FALSE);
777     svAutoWrapMode = TermGetAutoWrapMode();
778     TermSetAutoWrapMode(TRUE);
779    
780     TermGetAttr(&svCharAttr);
781     char_attr = svCharAttr;
782     char_attr.Attr = AttrDefault;
783     TermSetAttr(&char_attr);
784    
785 zmatsuo 10782 if (w->DebugFlag==DEBUG_FLAG_HEXD) {
786 zmatsuo 10771 char buff[3];
787     _snprintf(buff, 3, "%02X", (unsigned int) b);
788    
789     for (i=0; i<2; i++)
790 zmatsuo 10782 w->Op.PutU32(buff[i], w->ClientData);
791     w->Op.PutU32(' ', w->ClientData);
792 zmatsuo 10771 }
793 zmatsuo 10782 else if (w->DebugFlag==DEBUG_FLAG_NORM) {
794 zmatsuo 10771
795     if ((b & 0x80) == 0x80) {
796     //UpdateStr();
797     char_attr.Attr = AttrReverse;
798     TermSetAttr(&char_attr);
799     b = b & 0x7f;
800     }
801    
802     if (b<=US) {
803 zmatsuo 10782 w->Op.PutU32('^', w->ClientData);
804     w->Op.PutU32((char)(b + 0x40), w->ClientData);
805 zmatsuo 10771 }
806     else if (b==DEL) {
807 zmatsuo 10782 w->Op.PutU32('<', w->ClientData);
808     w->Op.PutU32('D', w->ClientData);
809     w->Op.PutU32('E', w->ClientData);
810     w->Op.PutU32('L', w->ClientData);
811     w->Op.PutU32('>', w->ClientData);
812 zmatsuo 10771 }
813     else
814 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
815 zmatsuo 10771 }
816    
817     TermSetAttr(&char_attr);
818     TermSetInsertMode(svInsertMode);
819     TermSetAutoWrapMode(svAutoWrapMode);
820     }
821    
822 zmatsuo 10782 void ParseFirst(CharSetData *w, BYTE b)
823 zmatsuo 10771 {
824     WORD language = ts.Language;
825 zmatsuo 10782 if (w->DebugFlag != DEBUG_FLAG_NONE) {
826 zmatsuo 10771 language = IdDebug;
827     }
828    
829     switch (language) {
830 zmatsuo 10782 default:
831     assert(FALSE);
832     language = IdUtf8;
833     // FALLTHROUGH
834 zmatsuo 10771 case IdUtf8:
835 zmatsuo 10782 ParseFirstUTF8(w, b);
836 zmatsuo 10755 return;
837    
838 zmatsuo 10771 case IdJapanese:
839 zmatsuo 10755 switch (ts.KanjiCode) {
840 zmatsuo 10771 case IdUTF8:
841 zmatsuo 10782 if (ParseFirstUTF8(w, b)) {
842 zmatsuo 10755 return;
843     }
844     break;
845 zmatsuo 10771 default:
846 zmatsuo 10782 if (ParseFirstJP(w, b)) {
847 zmatsuo 10755 return;
848     }
849     }
850     break;
851    
852 zmatsuo 10771 case IdKorean:
853 zmatsuo 10755 switch (ts.KanjiCode) {
854 zmatsuo 10771 case IdUTF8:
855 zmatsuo 10782 if (ParseFirstUTF8(w, b)) {
856 zmatsuo 10755 return;
857     }
858     break;
859 zmatsuo 10771 default:
860 zmatsuo 10782 if (ParseFirstKR(w, b)) {
861 zmatsuo 10755 return;
862     }
863     }
864     break;
865    
866 zmatsuo 10771 case IdRussian:
867 zmatsuo 10782 if (ParseFirstRus(w, b)) {
868 zmatsuo 10755 return;
869     }
870     break;
871    
872     case IdChinese:
873     switch (ts.KanjiCode) {
874     case IdUTF8:
875 zmatsuo 10782 if (ParseFirstUTF8(w, b)) {
876 zmatsuo 10755 return;
877     }
878     break;
879     default:
880 zmatsuo 10782 if (ParseFirstCn(w, b)) {
881 zmatsuo 10755 return;
882     }
883     }
884     break;
885     case IdEnglish: {
886 zmatsuo 10782 if (ParseEnglish(w, b)) {
887 zmatsuo 10755 return;
888     }
889     break;
890     }
891 zmatsuo 10771 case IdDebug: {
892 zmatsuo 10782 PutDebugChar(w, b);
893 zmatsuo 10771 return;
894 zmatsuo 10755 }
895 zmatsuo 10771 }
896 zmatsuo 10755
897 zmatsuo 10782 if (w->SSflag) {
898     w->Op.PutU32(b, w->ClientData);
899     w->SSflag = FALSE;
900 zmatsuo 10755 return;
901     }
902    
903     if (b<=US)
904 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
905 zmatsuo 10755 else if ((b>=0x20) && (b<=0x7E))
906 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
907 zmatsuo 10755 else if ((b>=0x80) && (b<=0x9F))
908 zmatsuo 10782 w->Op.ParseControl(b, w->ClientData);
909 zmatsuo 10755 else if (b>=0xA0)
910 zmatsuo 10782 w->Op.PutU32(b, w->ClientData);
911 zmatsuo 10755 }
912    
913     /**
914     * �w��(Designate)
915     *
916     * @param Gn 0/1/2/3 = G0/G1/G2/G3
917     * @param codeset IdASCII 0
918     * IdKatakana 1
919     * IdKanji 2
920     * IdSpecial 3
921     */
922 zmatsuo 10782 void CharSet2022Designate(CharSetData *w, int gn, int cs)
923 zmatsuo 10755 {
924     w->Gn[gn] = cs;
925     }
926    
927     /**
928     * �����o��(Invoke)
929 zmatsuo 10776 * @param shift
930 zmatsuo 10755 */
931 zmatsuo 10782 void CharSet2022Invoke(CharSetData *w, CharSet2022Shift shift)
932 zmatsuo 10755 {
933 zmatsuo 10776 switch (shift) {
934     case CHARSET_LS0:
935     // Locking Shift 0 (G0->GL)
936     w->Glr[0] = 0;
937     break;
938     case CHARSET_LS1:
939     // Locking Shift 1 (G1->GL)
940     w->Glr[0] = 1;
941     break;
942     case CHARSET_LS2:
943     // Locking Shift 2 (G2->GL)
944     w->Glr[0] = 2;
945     break;
946     case CHARSET_LS3:
947     // Locking Shift 3 (G3->GL)
948     w->Glr[0] = 3;
949     break;
950     case CHARSET_LS1R:
951     // Locking Shift 1 (G1->GR)
952     w->Glr[1] = 1;
953     break;
954     case CHARSET_LS2R:
955     // Locking Shift 2 (G2->GR)
956     w->Glr[1] = 2;
957     break;
958     case CHARSET_LS3R:
959     // Locking Shift 3 (G3->GR)
960     w->Glr[1] = 3;
961     break;
962     case CHARSET_SS2:
963     // Single Shift 2
964 zmatsuo 10782 w->GLtmp = 2;
965     w->SSflag = TRUE;
966 zmatsuo 10776 break;
967     case CHARSET_SS3:
968     // Single Shift 3
969 zmatsuo 10782 w->GLtmp = 3;
970     w->SSflag = TRUE;
971 zmatsuo 10776 break;
972     default:
973     assert(FALSE);
974     break;
975 zmatsuo 10755 }
976     }
977    
978     /**
979     * DEC�����t�H���g(Tera Special font)
980     * 0140(0x60) ... 0176(0x7f) ���r�����A�T�C������������
981 zmatsuo 10760 * (0xe0) ... (0xff) ��?
982 zmatsuo 10755 * <ESC>(0 �������������G�X�P�[�v�V�[�P���X�����`
983     * about/emulations.html
984     *
985     * @param b �R�[�h
986 zmatsuo 10760 * @retval TRUE IdSpecial
987     * @retval FALSE IdSpecial��������
988 zmatsuo 10755 */
989 zmatsuo 10782 BOOL CharSetIsSpecial(CharSetData *w, BYTE b)
990 zmatsuo 10755 {
991     BOOL SpecialNew = FALSE;
992    
993     if ((b>0x5F) && (b<0x80)) {
994 zmatsuo 10782 if (w->SSflag)
995     SpecialNew = (w->Gn[w->GLtmp]==IdSpecial);
996 zmatsuo 10755 else
997     SpecialNew = (w->Gn[w->Glr[0]]==IdSpecial);
998     }
999     else if (b>0xDF) {
1000 zmatsuo 10782 if (w->SSflag)
1001     SpecialNew = (w->Gn[w->GLtmp]==IdSpecial);
1002 zmatsuo 10755 else
1003     SpecialNew = (w->Gn[w->Glr[1]]==IdSpecial);
1004     }
1005    
1006     return SpecialNew;
1007     }
1008    
1009 zmatsuo 10782 static void CharSetSaveStateLow(CharSetState *state, const CharSetData *w)
1010 zmatsuo 10755 {
1011     int i;
1012     state->infos[0] = w->Glr[0];
1013     state->infos[1] = w->Glr[1];
1014     for (i=0 ; i<=3; i++) {
1015     state->infos[2 + i] = w->Gn[i];
1016     }
1017     }
1018    
1019     /**
1020     * ��������������
1021     */
1022 zmatsuo 10782 void CharSetSaveState(CharSetData *w, CharSetState *state)
1023 zmatsuo 10755 {
1024     CharSetSaveStateLow(state, w);
1025     }
1026    
1027     /**
1028     * ���������A����
1029     */
1030 zmatsuo 10782 void CharSetLoadState(CharSetData *w, const CharSetState *state)
1031 zmatsuo 10755 {
1032     int i;
1033     w->Glr[0] = state->infos[0];
1034     w->Glr[1] = state->infos[1];
1035     for (i=0 ; i<=3; i++) {
1036     w->Gn[i] = state->infos[2 + i];
1037     }
1038     }
1039 zmatsuo 10763
1040     /**
1041     * �t�H�[���o�b�N���I��
1042     * ���M�f�[�^UTF-8�����AShift_JIS�o����(fallback����)�����f����
1043     *
1044     */
1045 zmatsuo 10782 void CharSetFallbackFinish(CharSetData *w)
1046 zmatsuo 10763 {
1047 zmatsuo 10782 w->Fallbacked = FALSE;
1048 zmatsuo 10763 }
1049 zmatsuo 10773
1050     /**
1051     * �f�o�O�o�����������[�h�����X����
1052     */
1053 zmatsuo 10782 void CharSetSetNextDebugMode(CharSetData *w)
1054 zmatsuo 10773 {
1055     // ts.DebugModes ���� tttypes.h �� DBGF_* �� OR ����������
1056     do {
1057 zmatsuo 10782 w->DebugFlag = (w->DebugFlag + 1) % DEBUG_FLAG_MAXD;
1058     } while (w->DebugFlag != DEBUG_FLAG_NONE && !((ts.DebugModes >> (w->DebugFlag - 1)) & 1));
1059 zmatsuo 10773 }
1060    
1061 zmatsuo 10782 BYTE CharSetGetDebugMode(CharSetData *w)
1062 zmatsuo 10773 {
1063 zmatsuo 10782 return w->DebugFlag;
1064 zmatsuo 10773 }
1065    
1066 zmatsuo 10782 void CharSetSetDebugMode(CharSetData *w, BYTE mode)
1067 zmatsuo 10773 {
1068 zmatsuo 10782 w->DebugFlag = mode % DEBUG_FLAG_MAXD;
1069 zmatsuo 10773 }

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26