Revision: 10775 https://osdn.net/projects/ttssh2/scm/svn/commits/10775 Author: zmatsuo Date: 2023-06-20 23:22:56 +0900 (Tue, 20 Jun 2023) Log Message: ----------- VS2005でビルドできるよう修正 - charset.c を charset.cpp に変更 Modified Paths: -------------- trunk/teraterm/teraterm/CMakeLists.txt trunk/teraterm/teraterm/charset.h trunk/teraterm/teraterm/ttermpro.v16.vcxproj trunk/teraterm/teraterm/ttermpro.v16.vcxproj.filters trunk/teraterm/teraterm/ttermpro.v17.vcxproj trunk/teraterm/teraterm/ttermpro.v17.vcxproj.filters trunk/teraterm/teraterm/vtdisp.c trunk/teraterm/teraterm/vtterm.c Added Paths: ----------- trunk/teraterm/teraterm/charset.cpp Removed Paths: ------------- trunk/teraterm/teraterm/charset.c -------------- next part -------------- Modified: trunk/teraterm/teraterm/CMakeLists.txt =================================================================== --- trunk/teraterm/teraterm/CMakeLists.txt 2023-06-20 14:22:45 UTC (rev 10774) +++ trunk/teraterm/teraterm/CMakeLists.txt 2023-06-20 14:22:56 UTC (rev 10775) @@ -17,7 +17,7 @@ broadcast.h buffer.c buffer.h - charset.c + charset.cpp charset.h checkeol.cpp checkeol.h Deleted: trunk/teraterm/teraterm/charset.c =================================================================== --- trunk/teraterm/teraterm/charset.c 2023-06-20 14:22:45 UTC (rev 10774) +++ trunk/teraterm/teraterm/charset.c 2023-06-20 14:22:56 UTC (rev 10775) @@ -1,1047 +0,0 @@ -/* - * (C) 2023- TeraTerm Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "teraterm.h" -#include "tttypes.h" -#include <stdio.h> -#include <string.h> -#if !defined(_CRTDBG_MAP_ALLOC) -#define _CRTDBG_MAP_ALLOC -#endif -#include <stdlib.h> -#include <crtdbg.h> -#include <assert.h> - -#include "buffer.h" // for Wrap -#include "ttwinman.h" -#include "codeconv.h" -#include "unicode.h" -#include "language.h" // for JIS2SJIS() -#include "ttcstd.h" -#include "vtterm.h" - -#include "charset.h" - -// UTF-8\x82\xAA\x95s\x90\xB3\x82Ȓl\x82\xBE\x82\xC1\x82\xBD\x8E\x9E\x82ɕ\\x8E\xA6\x82\xB7\x82镶\x8E\x9A -#define REPLACEMENT_CHARACTER '?' -//#define REPLACEMENT_CHARACTER 0x2592 -//#define REPLACEMENT_CHARACTER 0x20 -//#define REPLACEMENT_CHARACTER 0xfffd - -static BOOL KanjiIn; // TRUE = MBCS\x82\xCC1byte\x96ڂ\xF0\x8E\xF3\x90M\x82\xB5\x82Ă\xA2\x82\xE9 -static BOOL EUCkanaIn, EUCsupIn; -static int EUCcount; - -/* GL for single shift 2/3 */ -static int GLtmp; -/* single shift 2/3 flag */ -static BOOL SSflag; -/* JIS -> SJIS conversion flag */ -static BOOL ConvJIS; -static WORD Kanji; -static BOOL Fallbacked; - -static BYTE DebugFlag = DEBUG_FLAG_NONE; - -typedef struct { - /* GL, GR code group */ - int Glr[2]; - /* G0, G1, G2, G3 code group */ - int Gn[4]; - // - char32_t replacement_char; - // UTF-8 work - BYTE buf[4]; - int count; -} VttermKanjiWork; - -static VttermKanjiWork KanjiWork; - -static BOOL IsC0(char32_t b) -{ - return (b <= US); -} - -static BOOL IsC1(char32_t b) -{ - return ((b>=0x80) && (b<=0x9F)); -} - -/** - * PutU32() wrapper - * Unicode\x83x\x81[\x83X\x82ɐ\xE8\x91ւ\xA6 - */ -static void PutChar(BYTE b) -{ - PutU32(b); -} - -/** - * ISO2022\x97p\x83\x8F\x81[\x83N\x82\xF0\x8F\x89\x8A\x{227B0B7}\x82\xE9 - */ -static void CharSetInit2(VttermKanjiWork *w) -{ - if (ts.Language==IdJapanese) { - w->Gn[0] = IdASCII; - w->Gn[1] = IdKatakana; - w->Gn[2] = IdKatakana; - w->Gn[3] = IdKanji; - w->Glr[0] = 0; - if ((ts.KanjiCode==IdJIS) && (ts.JIS7Katakana==0)) - w->Glr[1] = 2; // 8-bit katakana - else - w->Glr[1] = 3; - } - else { - w->Gn[0] = IdASCII; - w->Gn[1] = IdSpecial; - w->Gn[2] = IdASCII; - w->Gn[3] = IdASCII; - w->Glr[0] = 0; - w->Glr[1] = 0; - } -} - -/** - * \x8A\xBF\x8E\x9A\x8A֘A\x83\x8F\x81[\x83N\x82\xF0\x8F\x89\x8A\x{227B0B7}\x82\xE9 - */ -void CharSetInit(void) -{ - VttermKanjiWork *w = &KanjiWork; - - CharSetInit2(w); - - w->replacement_char = REPLACEMENT_CHARACTER; - SSflag = FALSE; - - KanjiIn = FALSE; - EUCkanaIn = FALSE; - EUCsupIn = FALSE; - ConvJIS = FALSE; - Fallbacked = FALSE; -} - -/** - * 1byte\x96ڃ`\x83F\x83b\x83N - */ -static BOOL CheckFirstByte(BYTE b, int lang, int kanji_code) -{ - switch (lang) { - case IdKorean: - return __ismbblead(b, 51949); - case IdChinese: - if (kanji_code == IdCnGB2312) { - return __ismbblead(b, 936); - } - else if (ts.KanjiCode == IdCnBig5) { - return __ismbblead(b, 950); - } - break; - default: - assert(FALSE); - break; - } - assert(FALSE); - return FALSE; -} - -/** - * Double-byte Character Sets - * SJIS\x82\xCC1byte\x96\xDA? - * - * \x91\xE61\x83o\x83C\x83g0x81...0x9F or 0xE0...0xEF - * \x91\xE61\x83o\x83C\x83g0x81...0x9F or 0xE0...0xFC - */ -static BOOL ismbbleadSJIS(BYTE b) -{ - if (((0x80<b) && (b<0xa0)) || ((0xdf<b) && (b<0xfd))) { - return TRUE; - } - return FALSE; -} - -/** - * ts.Language == IdJapanese \x8E\x9E - * 1byte\x96ڃ`\x83F\x83b\x83N - */ -static BOOL CheckKanji(BYTE b) -{ - VttermKanjiWork *w = &KanjiWork; - BOOL Check; - - if (ts.Language!=IdJapanese) - return FALSE; - - ConvJIS = FALSE; - - if (ts.KanjiCode==IdSJIS || - (ts.FallbackToCP932 && ts.KanjiCode==IdUTF8)) { - if (((0x80<b) && (b<0xa0)) || ((0xdf<b) && (b<0xfd))) { - Fallbacked = TRUE; - return TRUE; // SJIS kanji - } - if ((0xa1<=b) && (b<=0xdf)) { - return FALSE; // SJIS katakana - } - } - - if ((b>=0x21) && (b<=0x7e)) { - Check = (w->Gn[w->Glr[0]] == IdKanji); - ConvJIS = Check; - } - else if ((b>=0xA1) && (b<=0xFE)) { - Check = (w->Gn[w->Glr[1]] == IdKanji); - if (ts.KanjiCode==IdEUC) { - Check = TRUE; - } - else if (ts.KanjiCode==IdJIS && ((ts.TermFlag & TF_FIXEDJIS)!=0) && (ts.JIS7Katakana==0)) { - Check = FALSE; // 8-bit katakana - } - ConvJIS = Check; - } - else { - Check = FALSE; - } - - return Check; -} - -static BOOL ParseFirstJP(BYTE b) -// returns TRUE if b is processed -// (actually allways returns TRUE) -{ - VttermKanjiWork *w = &KanjiWork; - if (KanjiIn) { - if (((! ConvJIS) && (0x3F<b) && (b<0xFD)) || - (ConvJIS && ( ((0x20<b) && (b<0x7f)) || - ((0xa0<b) && (b<0xff)) )) ) - { - unsigned long u32; - Kanji = Kanji + b; - if (ConvJIS) { - // JIS -> Shift_JIS(CP932) - Kanji = JIS2SJIS((WORD)(Kanji & 0x7f7f)); - } - u32 = CP932ToUTF32(Kanji); - PutU32(u32); - KanjiIn = FALSE; - return TRUE; - } - else if ((ts.TermFlag & TF_CTRLINKANJI)==0) { - KanjiIn = FALSE; - } - else if ((b==CR) && Wrap) { - CarriageReturn(FALSE); - LineFeed(LF,FALSE); - Wrap = FALSE; - } - } - - if (SSflag) { - if (w->Gn[GLtmp] == IdKanji) { - Kanji = b << 8; - KanjiIn = TRUE; - SSflag = FALSE; - return TRUE; - } - else if (w->Gn[GLtmp] == IdKatakana) { - b = b | 0x80; - } - - PutChar(b); - SSflag = FALSE; - return TRUE; - } - - if ((!EUCsupIn) && (!EUCkanaIn) && (!KanjiIn) && CheckKanji(b)) { - Kanji = b << 8; - KanjiIn = TRUE; - return TRUE; - } - - if (b<=US) { - ParseControl(b); - } - else if (b==0x20) { - PutChar(b); - } - else if ((b>=0x21) && (b<=0x7E)) { - if (EUCsupIn) { - EUCcount--; - EUCsupIn = (EUCcount==0); - return TRUE; - } - - if ((w->Gn[w->Glr[0]] == IdKatakana) || EUCkanaIn) { - b = b | 0x80; - EUCkanaIn = FALSE; - { - // b\x82\xCDsjis\x82̔\xBC\x8Ap\x83J\x83^\x83J\x83i - unsigned long u32 = CP932ToUTF32(b); - PutU32(u32); - } - return TRUE; - } - PutChar(b); - } - else if (b==0x7f) { - return TRUE; - } - else if ((b>=0x80) && (b<=0x8D)) { - ParseControl(b); - } - else if (b==0x8E) { // SS2 - switch (ts.KanjiCode) { - case IdEUC: - if (ts.ISO2022Flag & ISO2022_SS2) { - EUCkanaIn = TRUE; - } - break; - case IdUTF8: - PutU32(REPLACEMENT_CHARACTER); - break; - default: - ParseControl(b); - } - } - else if (b==0x8F) { // SS3 - switch (ts.KanjiCode) { - case IdEUC: - if (ts.ISO2022Flag & ISO2022_SS3) { - EUCcount = 2; - EUCsupIn = TRUE; - } - break; - case IdUTF8: - PutU32(REPLACEMENT_CHARACTER); - break; - default: - ParseControl(b); - } - } - else if ((b>=0x90) && (b<=0x9F)) { - ParseControl(b); - } - else if (b==0xA0) { - PutChar(0x20); - } - else if ((b>=0xA1) && (b<=0xFE)) { - if (EUCsupIn) { - EUCcount--; - EUCsupIn = (EUCcount==0); - return TRUE; - } - - if ((w->Gn[w->Glr[1]] != IdASCII) || - ((ts.KanjiCode==IdEUC) && EUCkanaIn) || - (ts.KanjiCode==IdSJIS) || - ((ts.KanjiCode==IdJIS) && - (ts.JIS7Katakana==0) && - ((ts.TermFlag & TF_FIXEDJIS)!=0))) { - // b\x82\xCDsjis\x82̔\xBC\x8Ap\x83J\x83^\x83J\x83i - unsigned long u32 = CP932ToUTF32(b); - PutU32(u32); - } else { - if (w->Gn[w->Glr[1]] == IdASCII) { - b = b & 0x7f; - } - PutChar(b); - } - EUCkanaIn = FALSE; - } - else { - PutChar(b); - } - - return TRUE; -} - -static BOOL ParseFirstKR(BYTE b) -// returns TRUE if b is processed -// (actually allways returns TRUE) -{ - VttermKanjiWork *w = &KanjiWork; - if (KanjiIn) { - if (((0x41<=b) && (b<=0x5A)) || - ((0x61<=b) && (b<=0x7A)) || - ((0x81<=b) && (b<=0xFE))) - { - unsigned long u32 = 0; - if (ts.KanjiCode == IdKoreanCP949) { - // CP51949 - Kanji = Kanji + b; - u32 = MBCP_UTF32(Kanji, 51949); - } - else { - assert(FALSE); - } - PutU32(u32); - KanjiIn = FALSE; - return TRUE; - } - else if ((ts.TermFlag & TF_CTRLINKANJI)==0) { - KanjiIn = FALSE; - } - else if ((b==CR) && Wrap) { - CarriageReturn(FALSE); - LineFeed(LF,FALSE); - Wrap = FALSE; - } - } - - if ((!KanjiIn) && CheckFirstByte(b, ts.Language, ts.KanjiCode)) { - Kanji = b << 8; - KanjiIn = TRUE; - return TRUE; - } - - if (b<=US) { - ParseControl(b); - } - else if (b==0x20) { - PutChar(b); - } - else if ((b>=0x21) && (b<=0x7E)) { -// if (Gn[Glr[0]] == IdKatakana) { -// b = b | 0x80; -// } - PutChar(b); - } - else if (b==0x7f) { - return TRUE; - } - else if ((0x80<=b) && (b<=0x9F)) { - ParseControl(b); - } - else if (b==0xA0) { - PutChar(0x20); - } - else if ((b>=0xA1) && (b<=0xFE)) { - if (w->Gn[w->Glr[1]] == IdASCII) { - b = b & 0x7f; - } - PutChar(b); - } - else { - PutChar(b); - } - - return TRUE; -} - -static BOOL ParseFirstCn(BYTE b) -// returns TRUE if b is processed -// (actually allways returns TRUE) -{ - VttermKanjiWork *w = &KanjiWork; - if (KanjiIn) { - // TODO - if (((0x40<=b) && (b<=0x7e)) || - ((0xa1<=b) && (b<=0xFE))) - { - unsigned long u32 = 0; - Kanji = Kanji + b; - if (ts.KanjiCode == IdCnGB2312) { - // CP936 GB2312 - u32 = MBCP_UTF32(Kanji, 936); - } - else if (ts.KanjiCode == IdCnBig5) { - // CP950 Big5 - u32 = MBCP_UTF32(Kanji, 950); - } - else { - assert(FALSE); - } - PutU32(u32); - KanjiIn = FALSE; - return TRUE; - } - else if ((ts.TermFlag & TF_CTRLINKANJI)==0) { - KanjiIn = FALSE; - } - else if ((b==CR) && Wrap) { - CarriageReturn(FALSE); - LineFeed(LF,FALSE); - Wrap = FALSE; - } - } - - if ((!KanjiIn) && CheckFirstByte(b, ts.Language, ts.KanjiCode)) { - Kanji = b << 8; - KanjiIn = TRUE; - return TRUE; - } - - if (b<=US) { - ParseControl(b); - } - else if (b==0x20) { - PutChar(b); - } - else if ((b>=0x21) && (b<=0x7E)) { -// if (Gn[Glr[0]] == IdKatakana) { -// b = b | 0x80; -// } - PutChar(b); - } - else if (b==0x7f) { - return TRUE; - } - else if ((0x80<=b) && (b<=0x9F)) { - ParseControl(b); - } - else if (b==0xA0) { - PutChar(0x20); - } - else if ((b>=0xA1) && (b<=0xFE)) { - if (w->Gn[w->Glr[1]] == IdASCII) { - b = b & 0x7f; - } - PutChar(b); - } - else { - PutChar(b); - } - - return TRUE; -} - -static void ParseASCII(BYTE b) -{ - if (SSflag) { - PutChar(b); - SSflag = FALSE; - return; - } - - if (b<=US) { - ParseControl(b); - } else if ((b>=0x20) && (b<=0x7E)) { - PutU32(b); - } else if ((b==0x8E) || (b==0x8F)) { - PutU32(REPLACEMENT_CHARACTER); - } else if ((b>=0x80) && (b<=0x9F)) { - ParseControl(b); - } else if (b>=0xA0) { - PutU32(b); - } -} - -/** - * REPLACEMENT_CHARACTER \x82̕\\x8E\xA6 - * UTF-8 \x83f\x83R\x81[\x83h\x82\xA9\x82\xE7\x8Eg\x97p - */ -static void PutReplacementChr(VttermKanjiWork *w, const BYTE *ptr, size_t len, BOOL fallback) -{ - const char32_t replacement_char = w->replacement_char; - int i; - for (i = 0; i < len; i++) { - BYTE c = *ptr++; - assert(IsC0(c)); - if (fallback) { - // fallback ISO8859-1 - PutU32(c); - } - else { - // fallback\x82\xB5\x82Ȃ\xA2 - if (c < 0x80) { - // \x95s\x90\xB3\x82\xC8UTF-8\x95\xB6\x8E\x9A\x97\xF1\x82̂Ȃ\xA9\x82\xC90x80\x96\xA2\x96\x9E\x82\xAA\x82\xA0\x82\xEA\x82A - // 1\x95\xB6\x8E\x9A\x82\xCCUTF-8\x95\xB6\x8E\x9A\x82Ƃ\xB5\x82Ă\xBB\x82̂܂ܕ\\x8E\xA6\x82\xB7\x82\xE9 - PutU32(c); - } - else { - PutU32(replacement_char); - } - } - } -} - -/** - * UTF-8\x82Ŏ\xF3\x90M\x83f\x81[\x83^\x82\xF0\x8F\x88\x97\x9D\x82\xB7\x82\xE9 - * - * returns TRUE if b is processed - */ -static BOOL ParseFirstUTF8(BYTE b) -{ - VttermKanjiWork *w = &KanjiWork; - char32_t code; - - if (Fallbacked) { - BOOL r = ParseFirstJP(b); - Fallbacked = FALSE; - return r; - } - - // UTF-8\x83G\x83\x93\x83R\x81[\x83h - // The Unicode Standard Chapter 3 - // Table 3-7. Well-Formed UTF-8 Byte Sequences - // | Code Points | First Byte | Second Byte | Third Byte | Fourth Byte | - // | U+0000..U+007F | 00..7F | | | | - // | U+0080..U+07FF | C2..DF | 80..BF | | | - // | U+0800..U+0FFF | E0 | A0..BF | 80..BF | | - // | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | | - // | U+D000..U+D7FF | ED | 80..9F | 80..BF | | - // | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | | - // | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF | - // | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF | - // | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF | - // UTF-8\x82Ńf\x83R\x81[\x83h\x82ł\xAB\x82Ȃ\xA2\x8Fꍇ - // - 1byte\x96\xDA - // - 0x00 - 0x7f ok - // - 0x80 - 0xc1 ng - // - 0xc2 - 0xf4 ok - // - 0xf5 - 0xff ng - // - 2byte\x96ڈȍ~ - // - 0x00 - 0x7f ng - // - 0x80 - 0xbf ok - // - 0xc0 - 0xff ng - // - 2byte\x96ڗ\xE1\x8AO - // - 1byte == 0xe0 \x82̂Ƃ\xAB 0xa0 - 0xbf\x82̂\xDDok - // - 1byte == 0xed \x82̂Ƃ\xAB 0x80 - 0x9f\x82̂\xDDok - // - 1byte == 0xf0 \x82̂Ƃ\xAB 0x90 - 0xbf\x82̂\xDDok - // - 1byte == 0xf4 \x82̂Ƃ\xAB 0x90 - 0x8f\x82̂\xDDok -recheck: - // 1byte(7bit) - if (w->count == 0) { - if (IsC0(b)) { - // U+0000 .. U+001f - // C0\x90\xA7\x8C䕶\x8E\x9A, C0 Coontrols - ParseControl(b); - return TRUE; - } - else if (b <= 0x7f) { - // 0x7f\x88ȉ\xBA, \x82̂Ƃ\xAB\x81A\x82\xBB\x82̂܂o\x97\xCD - PutU32(b); - return TRUE; - } - else if (0xc2 <= b && b <= 0xf4) { - // 1byte\x96ڕۑ\xB6 - w->buf[w->count++] = b; - return TRUE; - } - - // 0x80 - 0xc1, 0xf5 - 0xff - // UTF-8\x82\xC51byte\x82ɏo\x8C\xBB\x82\xB5\x82Ȃ\xA2\x83R\x81[\x83h\x82̂Ƃ\xAB - if (ts.FallbackToCP932) { - // fallback\x82\xB7\x82\xE9\x8Fꍇ - if ((ts.Language == IdJapanese) && ismbbleadSJIS(b)) { - // \x93\xFA\x96{\x8C\xEA\x82̏ꍇ && Shift_JIS 1byte\x96\xDA - // Shift_JIS \x82\xC9 fallback - Fallbacked = TRUE; - ConvJIS = FALSE; - Kanji = b << 8; - KanjiIn = TRUE; - return TRUE; - } - // fallback ISO8859-1 - PutU32(b); - return TRUE; - } - else { - // fallback\x82\xB5\x82Ȃ\xA2, \x95s\x90\xB3\x82ȕ\xB6\x8E\x9A\x93\xFC\x97\xCD - w->buf[0] = b; - PutReplacementChr(w, w->buf, 1, FALSE); - } - return TRUE; - } - - // 2byte\x88ȍ~\x90\xB3\x8F\xED? - if((b & 0xc0) != 0x80) { // == (b <= 0x7f || 0xc0 <= b) - // \x95s\x90\xB3\x82ȕ\xB6\x8E\x9A, (\x8F\xE3\x88\xCA2bit\x82\xAA 0b10xx_xxxx \x82ł͂Ȃ\xA2) - PutReplacementChr(w, w->buf, w->count, ts.FallbackToCP932); - w->count = 0; - goto recheck; - } - - // 2byte\x96ڈȍ~\x95ۑ\xB6 - w->buf[w->count++] = b; - - // 2byte(11bit) - if (w->count == 2) { - if ((w->buf[0] & 0xe0) == 0xc0) { // == (0xc2 <= w->buf[0] && w->buf[0] <= 0xdf) - // 5bit + 6bit - code = ((w->buf[0] & 0x1f) << 6) | (b & 0x3f); - if (IsC1(code)) { - // U+0080 .. u+009f - // C1\x90\xA7\x8C䕶\x8E\x9A, C1 Controls - ParseControl((BYTE)code); - } - else { - PutU32(code); - } - w->count = 0; - return TRUE; - } - return TRUE; - } - - // 3byte(16bit) - if (w->count == 3) { - if ((w->buf[0] & 0xf0) == 0xe0) { - if ((w->buf[0] == 0xe0 && (w->buf[1] < 0xa0 || 0xbf < w->buf[1])) || - (w->buf[0] == 0xed && ( 0x9f < w->buf[1]))) { - // \x95s\x90\xB3\x82\xC8 UTF-8 - PutReplacementChr(w, w->buf, 2, ts.FallbackToCP932); - w->count = 0; - goto recheck; - } - // 4bit + 6bit + 6bit - code = ((w->buf[0] & 0xf) << 12); - code |= ((w->buf[1] & 0x3f) << 6); - code |= ((w->buf[2] & 0x3f)); - PutU32(code); - w->count = 0; - return TRUE; - } - return TRUE; - } - - // 4byte(21bit) - assert(w->count == 4); - assert((w->buf[0] & 0xf8) == 0xf0); - if ((w->buf[0] == 0xf0 && (w->buf[1] < 0x90 || 0x9f < w->buf[1])) || - (w->buf[0] == 0xf4 && (w->buf[1] < 0x80 || 0x8f < w->buf[1]))) { - // \x95s\x90\xB3\x82\xC8 UTF-8 - PutReplacementChr(w, w->buf, 3, ts.FallbackToCP932); - w->count = 0; - goto recheck; - } - // 3bit + 6bit + 6bit + 6bit - code = ((w->buf[0] & 0x07) << 18); - code |= ((w->buf[1] & 0x3f) << 12); - code |= ((w->buf[2] & 0x3f) << 6); - code |= (w->buf[3] & 0x3f); - PutU32(code); - w->count = 0; - return TRUE; -} - -static BOOL ParseFirstRus(BYTE b) -// returns if b is processed -{ - if (IsC0(b)) { - ParseControl(b); - return TRUE; - } - // CP1251\x82ɕϊ\xB7 - BYTE c = RussConv(ts.KanjiCode, IdWindows, b); - // CP1251->Unicode - unsigned long u32 = MBCP_UTF32(c, 1251); - PutU32(u32); - return TRUE; -} - -static BOOL ParseEnglish(BYTE b) -{ - unsigned short u16 = 0; - int part = KanjiCodeToISO8859Part(ts.KanjiCode); - int r = UnicodeFromISO8859(part, b, &u16); - if (r == 0) { - return FALSE; - } - if (u16 < 0x100) { - ParseASCII((BYTE)u16); - } - else { - PutU32(u16); - } - return TRUE; -} - -static void PutDebugChar(BYTE b) -{ - int i; - BOOL svInsertMode, svAutoWrapMode; - TCharAttr svCharAttr; - TCharAttr char_attr; - - svInsertMode = TermGetInsertMode(); - TermSetInsertMode(FALSE); - svAutoWrapMode = TermGetAutoWrapMode(); - TermSetAutoWrapMode(TRUE); - - TermGetAttr(&svCharAttr); - char_attr = svCharAttr; - char_attr.Attr = AttrDefault; - TermSetAttr(&char_attr); - - if (DebugFlag==DEBUG_FLAG_HEXD) { - char buff[3]; - _snprintf(buff, 3, "%02X", (unsigned int) b); - - for (i=0; i<2; i++) - PutChar(buff[i]); - PutChar(' '); - } - else if (DebugFlag==DEBUG_FLAG_NORM) { - - if ((b & 0x80) == 0x80) { - //UpdateStr(); - char_attr.Attr = AttrReverse; - TermSetAttr(&char_attr); - b = b & 0x7f; - } - - if (b<=US) { - PutChar('^'); - PutChar((char)(b+0x40)); - } - else if (b==DEL) { - PutChar('<'); - PutChar('D'); - PutChar('E'); - PutChar('L'); - PutChar('>'); - } - else - PutChar(b); - } - - TermSetAttr(&char_attr); - TermSetInsertMode(svInsertMode); - TermSetAutoWrapMode(svAutoWrapMode); -} - -void ParseFirst(BYTE b) -{ - WORD language = ts.Language; - if (DebugFlag != DEBUG_FLAG_NONE) { - language = IdDebug; - } - - switch (language) { - case IdUtf8: - ParseFirstUTF8(b); - return; - - case IdJapanese: - switch (ts.KanjiCode) { - case IdUTF8: - if (ParseFirstUTF8(b)) { - return; - } - break; - default: - if (ParseFirstJP(b)) { - return; - } - } - break; - - case IdKorean: - switch (ts.KanjiCode) { - case IdUTF8: - if (ParseFirstUTF8(b)) { - return; - } - break; - default: - if (ParseFirstKR(b)) { - return; - } - } - break; - - case IdRussian: - if (ParseFirstRus(b)) { - return; - } - break; - - case IdChinese: - switch (ts.KanjiCode) { - case IdUTF8: - if (ParseFirstUTF8(b)) { - return; - } - break; - default: - if (ParseFirstCn(b)) { - return; - } - } - break; - case IdEnglish: { - if (ParseEnglish(b)) { - return; - } - break; - } - case IdDebug: { - PutDebugChar(b); - return; - } - } - - if (SSflag) { - PutChar(b); - SSflag = FALSE; - return; - } - - if (b<=US) - ParseControl(b); - else if ((b>=0x20) && (b<=0x7E)) - PutChar(b); - else if ((b>=0x80) && (b<=0x9F)) - ParseControl(b); - else if (b>=0xA0) - PutChar(b); -} - -/** - * \x8Ew\x8E\xA6(Designate) - * - * @param Gn 0/1/2/3 = G0/G1/G2/G3 - * @param codeset IdASCII 0 - * IdKatakana 1 - * IdKanji 2 - * IdSpecial 3 - */ -void CharSet2022Designate(int gn, int cs) -{ - VttermKanjiWork *w = &KanjiWork; - w->Gn[gn] = cs; -} - -/** - * \x8CĂяo\x82\xB5(Invoke) - * @param glr 0/1 = GL/GR (Locking shift\x8E\x9E\x82̂ݗL\x8C\xF8) - * @param gn 0/1/2/3 = G0/G1/G2/G3 - * @param single_shift FALSE Locking shift - * TRUE Single shift - */ -void CharSet2022Invoke(int glr, int gn, BOOL single_shift) -{ - VttermKanjiWork *w = &KanjiWork; - if (single_shift == FALSE) { - // Locking shift - w->Glr[glr] = gn; - } - else { - // Single shift - GLtmp = gn; - SSflag = TRUE; - } -} - -/** - * DEC\x93\xC1\x8E\xEA\x83t\x83H\x83\x93\x83g(Tera Special font) - * 0140(0x60) ... 0176(0x7f) \x82Ɍr\x90\xFC\x82ŃA\x83T\x83C\x83\x93\x82\xB3\x82\xEA\x82Ă\xA2\x82\xE9 - * (0xe0) ... (0xff) \x82\xE0? - * <ESC>(0 \x82Ƃ\xA2\x82\xA4\x93\xC1\x8E\xEA\x82ȃG\x83X\x83P\x81[\x83v\x83V\x81[\x83P\x83\x93\x83X\x82Œ\xE8\x8B` - * about/emulations.html - * - * @param b \x83R\x81[\x83h - * @retval TRUE IdSpecial - * @retval FALSE IdSpecial\x82ł͂Ȃ\xA2 - */ -BOOL CharSetIsSpecial(BYTE b) -{ - VttermKanjiWork *w = &KanjiWork; - BOOL SpecialNew = FALSE; - - if ((b>0x5F) && (b<0x80)) { - if (SSflag) - SpecialNew = (w->Gn[GLtmp]==IdSpecial); - else - SpecialNew = (w->Gn[w->Glr[0]]==IdSpecial); - } - else if (b>0xDF) { - if (SSflag) - SpecialNew = (w->Gn[GLtmp]==IdSpecial); - else - SpecialNew = (w->Gn[w->Glr[1]]==IdSpecial); - } - - return SpecialNew; -} - -static void CharSetSaveStateLow(CharSetState *state, const VttermKanjiWork *w) -{ - int i; - state->infos[0] = w->Glr[0]; - state->infos[1] = w->Glr[1]; - for (i=0 ; i<=3; i++) { - state->infos[2 + i] = w->Gn[i]; - } -} - -/** - * \x8F\xF3\x91Ԃ\xF0\x95ۑ\xB6\x82\xB7\x82\xE9 - */ -void CharSetSaveState(CharSetState *state) -{ - VttermKanjiWork *w = &KanjiWork; - CharSetSaveStateLow(state, w); -} - -/** - * \x8F\xF3\x91ԂA\x82\xB7\x82\xE9 - */ -void CharSetLoadState(const CharSetState *state) -{ - VttermKanjiWork *w = &KanjiWork; - int i; - w->Glr[0] = state->infos[0]; - w->Glr[1] = state->infos[1]; - for (i=0 ; i<=3; i++) { - w->Gn[i] = state->infos[2 + i]; - } -} - -/** - * \x83t\x83H\x81[\x83\x8B\x83o\x83b\x83N\x82̏I\x97\xB9 - * \x8E\xF3\x90M\x83f\x81[\x83^UTF-8\x8E\x9E\x82ɁAShift_JIS\x8Fo\x97͒\x86(fallback\x8F\xF3\x91\xD4)\x82𒆒f\x82\xB7\x82\xE9 - * - */ -void CharSetFallbackFinish(void) -{ - Fallbacked = FALSE; -} - -/** - * \x83f\x83o\x83O\x8Fo\x97͂\xF0\x8E\x9F\x82̃\x82\x81[\x83h\x82ɕύX\x82\xB7\x82\xE9 - */ -void CharSetSetNextDebugMode(void) -{ - // ts.DebugModes \x82ɂ\xCD tttypes.h \x82\xCC DBGF_* \x82\xAA OR \x82œ\xFC\x82\xC1\x82Ă\xE9 - do { - DebugFlag = (DebugFlag + 1) % DEBUG_FLAG_MAXD; - } while (DebugFlag != DEBUG_FLAG_NONE && !((ts.DebugModes >> (DebugFlag - 1)) & 1)); -} - -BYTE CharSetGetDebugMode(void) -{ - return DebugFlag; -} - -void CharSetSetDebugMode(BYTE mode) -{ - DebugFlag = mode; -} Copied: trunk/teraterm/teraterm/charset.cpp (from rev 10774, trunk/teraterm/teraterm/charset.c) =================================================================== --- trunk/teraterm/teraterm/charset.cpp (rev 0) +++ trunk/teraterm/teraterm/charset.cpp 2023-06-20 14:22:56 UTC (rev 10775) @@ -0,0 +1,1047 @@ +/* + * (C) 2023- TeraTerm Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "teraterm.h" +#include "tttypes.h" +#include <stdio.h> +#include <string.h> +#if !defined(_CRTDBG_MAP_ALLOC) +#define _CRTDBG_MAP_ALLOC +#endif +#include <stdlib.h> +#include <crtdbg.h> +#include <assert.h> + +#include "buffer.h" // for Wrap +#include "ttwinman.h" +#include "codeconv.h" +#include "unicode.h" +#include "language.h" // for JIS2SJIS() +#include "ttcstd.h" +#include "vtterm.h" + +#include "charset.h" + +// UTF-8\x82\xAA\x95s\x90\xB3\x82Ȓl\x82\xBE\x82\xC1\x82\xBD\x8E\x9E\x82ɕ\\x8E\xA6\x82\xB7\x82镶\x8E\x9A +#define REPLACEMENT_CHARACTER '?' +//#define REPLACEMENT_CHARACTER 0x2592 +//#define REPLACEMENT_CHARACTER 0x20 +//#define REPLACEMENT_CHARACTER 0xfffd + +static BOOL KanjiIn; // TRUE = MBCS\x82\xCC1byte\x96ڂ\xF0\x8E\xF3\x90M\x82\xB5\x82Ă\xA2\x82\xE9 +static BOOL EUCkanaIn, EUCsupIn; +static int EUCcount; + +/* GL for single shift 2/3 */ +static int GLtmp; +/* single shift 2/3 flag */ +static BOOL SSflag; +/* JIS -> SJIS conversion flag */ +static BOOL ConvJIS; +static WORD Kanji; +static BOOL Fallbacked; + +static BYTE DebugFlag = DEBUG_FLAG_NONE; + +typedef struct { + /* GL, GR code group */ + int Glr[2]; + /* G0, G1, G2, G3 code group */ + int Gn[4]; + // + char32_t replacement_char; + // UTF-8 work + BYTE buf[4]; + int count; +} VttermKanjiWork; + +static VttermKanjiWork KanjiWork; + +static BOOL IsC0(char32_t b) +{ + return (b <= US); +} + +static BOOL IsC1(char32_t b) +{ + return ((b>=0x80) && (b<=0x9F)); +} + +/** + * PutU32() wrapper + * Unicode\x83x\x81[\x83X\x82ɐ\xE8\x91ւ\xA6 + */ +static void PutChar(BYTE b) +{ + PutU32(b); +} + +/** + * ISO2022\x97p\x83\x8F\x81[\x83N\x82\xF0\x8F\x89\x8A\x{227B0B7}\x82\xE9 + */ +static void CharSetInit2(VttermKanjiWork *w) +{ + if (ts.Language==IdJapanese) { + w->Gn[0] = IdASCII; + w->Gn[1] = IdKatakana; + w->Gn[2] = IdKatakana; + w->Gn[3] = IdKanji; + w->Glr[0] = 0; + if ((ts.KanjiCode==IdJIS) && (ts.JIS7Katakana==0)) + w->Glr[1] = 2; // 8-bit katakana + else + w->Glr[1] = 3; + } + else { + w->Gn[0] = IdASCII; + w->Gn[1] = IdSpecial; + w->Gn[2] = IdASCII; + w->Gn[3] = IdASCII; + w->Glr[0] = 0; + w->Glr[1] = 0; + } +} + +/** + * \x8A\xBF\x8E\x9A\x8A֘A\x83\x8F\x81[\x83N\x82\xF0\x8F\x89\x8A\x{227B0B7}\x82\xE9 + */ +void CharSetInit(void) +{ + VttermKanjiWork *w = &KanjiWork; + + CharSetInit2(w); + + w->replacement_char = REPLACEMENT_CHARACTER; + SSflag = FALSE; + + KanjiIn = FALSE; + EUCkanaIn = FALSE; + EUCsupIn = FALSE; + ConvJIS = FALSE; + Fallbacked = FALSE; +} + +/** + * 1byte\x96ڃ`\x83F\x83b\x83N + */ +static BOOL CheckFirstByte(BYTE b, int lang, int kanji_code) +{ + switch (lang) { + case IdKorean: + return __ismbblead(b, 51949); + case IdChinese: + if (kanji_code == IdCnGB2312) { + return __ismbblead(b, 936); + } + else if (ts.KanjiCode == IdCnBig5) { + return __ismbblead(b, 950); + } + break; + default: + assert(FALSE); + break; + } + assert(FALSE); + return FALSE; +} + +/** + * Double-byte Character Sets + * SJIS\x82\xCC1byte\x96\xDA? + * + * \x91\xE61\x83o\x83C\x83g0x81...0x9F or 0xE0...0xEF + * \x91\xE61\x83o\x83C\x83g0x81...0x9F or 0xE0...0xFC + */ +static BOOL ismbbleadSJIS(BYTE b) +{ + if (((0x80<b) && (b<0xa0)) || ((0xdf<b) && (b<0xfd))) { + return TRUE; + } + return FALSE; +} + +/** + * ts.Language == IdJapanese \x8E\x9E + * 1byte\x96ڃ`\x83F\x83b\x83N + */ +static BOOL CheckKanji(BYTE b) +{ + VttermKanjiWork *w = &KanjiWork; + BOOL Check; + + if (ts.Language!=IdJapanese) + return FALSE; + + ConvJIS = FALSE; + + if (ts.KanjiCode==IdSJIS || + (ts.FallbackToCP932 && ts.KanjiCode==IdUTF8)) { + if (((0x80<b) && (b<0xa0)) || ((0xdf<b) && (b<0xfd))) { + Fallbacked = TRUE; + return TRUE; // SJIS kanji + } + if ((0xa1<=b) && (b<=0xdf)) { + return FALSE; // SJIS katakana + } + } + + if ((b>=0x21) && (b<=0x7e)) { + Check = (w->Gn[w->Glr[0]] == IdKanji); + ConvJIS = Check; + } + else if ((b>=0xA1) && (b<=0xFE)) { + Check = (w->Gn[w->Glr[1]] == IdKanji); + if (ts.KanjiCode==IdEUC) { + Check = TRUE; + } + else if (ts.KanjiCode==IdJIS && ((ts.TermFlag & TF_FIXEDJIS)!=0) && (ts.JIS7Katakana==0)) { + Check = FALSE; // 8-bit katakana + } + ConvJIS = Check; + } + else { + Check = FALSE; + } + + return Check; +} + +static BOOL ParseFirstJP(BYTE b) +// returns TRUE if b is processed +// (actually allways returns TRUE) +{ + VttermKanjiWork *w = &KanjiWork; + if (KanjiIn) { + if (((! ConvJIS) && (0x3F<b) && (b<0xFD)) || + (ConvJIS && ( ((0x20<b) && (b<0x7f)) || + ((0xa0<b) && (b<0xff)) )) ) + { + unsigned long u32; + Kanji = Kanji + b; + if (ConvJIS) { + // JIS -> Shift_JIS(CP932) + Kanji = JIS2SJIS((WORD)(Kanji & 0x7f7f)); + } + u32 = CP932ToUTF32(Kanji); + PutU32(u32); + KanjiIn = FALSE; + return TRUE; + } + else if ((ts.TermFlag & TF_CTRLINKANJI)==0) { + KanjiIn = FALSE; + } + else if ((b==CR) && Wrap) { + CarriageReturn(FALSE); + LineFeed(LF,FALSE); + Wrap = FALSE; + } + } + + if (SSflag) { + if (w->Gn[GLtmp] == IdKanji) { + Kanji = b << 8; + KanjiIn = TRUE; + SSflag = FALSE; + return TRUE; + } + else if (w->Gn[GLtmp] == IdKatakana) { + b = b | 0x80; + } + + PutChar(b); + SSflag = FALSE; + return TRUE; + } + + if ((!EUCsupIn) && (!EUCkanaIn) && (!KanjiIn) && CheckKanji(b)) { + Kanji = b << 8; + KanjiIn = TRUE; + return TRUE; + } + + if (b<=US) { + ParseControl(b); + } + else if (b==0x20) { + PutChar(b); + } + else if ((b>=0x21) && (b<=0x7E)) { + if (EUCsupIn) { + EUCcount--; + EUCsupIn = (EUCcount==0); + return TRUE; + } + + if ((w->Gn[w->Glr[0]] == IdKatakana) || EUCkanaIn) { + b = b | 0x80; + EUCkanaIn = FALSE; + { + // b\x82\xCDsjis\x82̔\xBC\x8Ap\x83J\x83^\x83J\x83i + unsigned long u32 = CP932ToUTF32(b); + PutU32(u32); + } + return TRUE; + } + PutChar(b); + } + else if (b==0x7f) { + return TRUE; + } + else if ((b>=0x80) && (b<=0x8D)) { + ParseControl(b); + } + else if (b==0x8E) { // SS2 + switch (ts.KanjiCode) { + case IdEUC: + if (ts.ISO2022Flag & ISO2022_SS2) { + EUCkanaIn = TRUE; + } + break; + case IdUTF8: + PutU32(REPLACEMENT_CHARACTER); + break; + default: + ParseControl(b); + } + } + else if (b==0x8F) { // SS3 + switch (ts.KanjiCode) { + case IdEUC: + if (ts.ISO2022Flag & ISO2022_SS3) { + EUCcount = 2; + EUCsupIn = TRUE; + } + break; + case IdUTF8: + PutU32(REPLACEMENT_CHARACTER); + break; + default: + ParseControl(b); + } + } + else if ((b>=0x90) && (b<=0x9F)) { + ParseControl(b); + } + else if (b==0xA0) { + PutChar(0x20); + } + else if ((b>=0xA1) && (b<=0xFE)) { + if (EUCsupIn) { + EUCcount--; + EUCsupIn = (EUCcount==0); + return TRUE; + } + + if ((w->Gn[w->Glr[1]] != IdASCII) || + ((ts.KanjiCode==IdEUC) && EUCkanaIn) || + (ts.KanjiCode==IdSJIS) || + ((ts.KanjiCode==IdJIS) && + (ts.JIS7Katakana==0) && + ((ts.TermFlag & TF_FIXEDJIS)!=0))) { + // b\x82\xCDsjis\x82̔\xBC\x8Ap\x83J\x83^\x83J\x83i + unsigned long u32 = CP932ToUTF32(b); + PutU32(u32); + } else { + if (w->Gn[w->Glr[1]] == IdASCII) { + b = b & 0x7f; + } + PutChar(b); + } + EUCkanaIn = FALSE; + } + else { + PutChar(b); + } + + return TRUE; +} + +static BOOL ParseFirstKR(BYTE b) +// returns TRUE if b is processed +// (actually allways returns TRUE) +{ + VttermKanjiWork *w = &KanjiWork; + if (KanjiIn) { + if (((0x41<=b) && (b<=0x5A)) || + ((0x61<=b) && (b<=0x7A)) || + ((0x81<=b) && (b<=0xFE))) + { + unsigned long u32 = 0; + if (ts.KanjiCode == IdKoreanCP949) { + // CP51949 + Kanji = Kanji + b; + u32 = MBCP_UTF32(Kanji, 51949); + } + else { + assert(FALSE); + } + PutU32(u32); + KanjiIn = FALSE; + return TRUE; + } + else if ((ts.TermFlag & TF_CTRLINKANJI)==0) { + KanjiIn = FALSE; + } + else if ((b==CR) && Wrap) { + CarriageReturn(FALSE); + LineFeed(LF,FALSE); + Wrap = FALSE; + } + } + + if ((!KanjiIn) && CheckFirstByte(b, ts.Language, ts.KanjiCode)) { + Kanji = b << 8; + KanjiIn = TRUE; + return TRUE; + } + + if (b<=US) { + ParseControl(b); + } + else if (b==0x20) { + PutChar(b); + } + else if ((b>=0x21) && (b<=0x7E)) { +// if (Gn[Glr[0]] == IdKatakana) { +// b = b | 0x80; +// } + PutChar(b); + } + else if (b==0x7f) { + return TRUE; + } + else if ((0x80<=b) && (b<=0x9F)) { + ParseControl(b); + } + else if (b==0xA0) { + PutChar(0x20); + } + else if ((b>=0xA1) && (b<=0xFE)) { + if (w->Gn[w->Glr[1]] == IdASCII) { + b = b & 0x7f; + } + PutChar(b); + } + else { + PutChar(b); + } + + return TRUE; +} + +static BOOL ParseFirstCn(BYTE b) +// returns TRUE if b is processed +// (actually allways returns TRUE) +{ + VttermKanjiWork *w = &KanjiWork; + if (KanjiIn) { + // TODO + if (((0x40<=b) && (b<=0x7e)) || + ((0xa1<=b) && (b<=0xFE))) + { + unsigned long u32 = 0; + Kanji = Kanji + b; + if (ts.KanjiCode == IdCnGB2312) { + // CP936 GB2312 + u32 = MBCP_UTF32(Kanji, 936); + } + else if (ts.KanjiCode == IdCnBig5) { + // CP950 Big5 + u32 = MBCP_UTF32(Kanji, 950); + } + else { + assert(FALSE); + } + PutU32(u32); + KanjiIn = FALSE; + return TRUE; + } + else if ((ts.TermFlag & TF_CTRLINKANJI)==0) { + KanjiIn = FALSE; + } + else if ((b==CR) && Wrap) { + CarriageReturn(FALSE); + LineFeed(LF,FALSE); + Wrap = FALSE; + } + } + + if ((!KanjiIn) && CheckFirstByte(b, ts.Language, ts.KanjiCode)) { + Kanji = b << 8; + KanjiIn = TRUE; + return TRUE; + } + + if (b<=US) { + ParseControl(b); + } + else if (b==0x20) { + PutChar(b); + } + else if ((b>=0x21) && (b<=0x7E)) { +// if (Gn[Glr[0]] == IdKatakana) { +// b = b | 0x80; +// } + PutChar(b); + } + else if (b==0x7f) { + return TRUE; + } + else if ((0x80<=b) && (b<=0x9F)) { + ParseControl(b); + } + else if (b==0xA0) { + PutChar(0x20); + } + else if ((b>=0xA1) && (b<=0xFE)) { + if (w->Gn[w->Glr[1]] == IdASCII) { + b = b & 0x7f; + } + PutChar(b); + } + else { + PutChar(b); + } + + return TRUE; +} + +static void ParseASCII(BYTE b) +{ + if (SSflag) { + PutChar(b); + SSflag = FALSE; + return; + } + + if (b<=US) { + ParseControl(b); + } else if ((b>=0x20) && (b<=0x7E)) { + PutU32(b); + } else if ((b==0x8E) || (b==0x8F)) { + PutU32(REPLACEMENT_CHARACTER); + } else if ((b>=0x80) && (b<=0x9F)) { + ParseControl(b); + } else if (b>=0xA0) { + PutU32(b); + } +} + +/** + * REPLACEMENT_CHARACTER \x82̕\\x8E\xA6 + * UTF-8 \x83f\x83R\x81[\x83h\x82\xA9\x82\xE7\x8Eg\x97p + */ +static void PutReplacementChr(VttermKanjiWork *w, const BYTE *ptr, size_t len, BOOL fallback) +{ + const char32_t replacement_char = w->replacement_char; + int i; + for (i = 0; i < len; i++) { + BYTE c = *ptr++; + assert(IsC0(c)); + if (fallback) { + // fallback ISO8859-1 + PutU32(c); + } + else { + // fallback\x82\xB5\x82Ȃ\xA2 + if (c < 0x80) { + // \x95s\x90\xB3\x82\xC8UTF-8\x95\xB6\x8E\x9A\x97\xF1\x82̂Ȃ\xA9\x82\xC90x80\x96\xA2\x96\x9E\x82\xAA\x82\xA0\x82\xEA\x82A + // 1\x95\xB6\x8E\x9A\x82\xCCUTF-8\x95\xB6\x8E\x9A\x82Ƃ\xB5\x82Ă\xBB\x82̂܂ܕ\\x8E\xA6\x82\xB7\x82\xE9 + PutU32(c); + } + else { + PutU32(replacement_char); + } + } + } +} + +/** + * UTF-8\x82Ŏ\xF3\x90M\x83f\x81[\x83^\x82\xF0\x8F\x88\x97\x9D\x82\xB7\x82\xE9 + * + * returns TRUE if b is processed + */ +static BOOL ParseFirstUTF8(BYTE b) +{ + VttermKanjiWork *w = &KanjiWork; + char32_t code; + + if (Fallbacked) { + BOOL r = ParseFirstJP(b); + Fallbacked = FALSE; + return r; + } + + // UTF-8\x83G\x83\x93\x83R\x81[\x83h + // The Unicode Standard Chapter 3 + // Table 3-7. Well-Formed UTF-8 Byte Sequences + // | Code Points | First Byte | Second Byte | Third Byte | Fourth Byte | + // | U+0000..U+007F | 00..7F | | | | + // | U+0080..U+07FF | C2..DF | 80..BF | | | + // | U+0800..U+0FFF | E0 | A0..BF | 80..BF | | + // | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | | + // | U+D000..U+D7FF | ED | 80..9F | 80..BF | | + // | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | | + // | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF | + // | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF | + // | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF | + // UTF-8\x82Ńf\x83R\x81[\x83h\x82ł\xAB\x82Ȃ\xA2\x8Fꍇ + // - 1byte\x96\xDA + // - 0x00 - 0x7f ok + // - 0x80 - 0xc1 ng + // - 0xc2 - 0xf4 ok + // - 0xf5 - 0xff ng + // - 2byte\x96ڈȍ~ + // - 0x00 - 0x7f ng + // - 0x80 - 0xbf ok + // - 0xc0 - 0xff ng + // - 2byte\x96ڗ\xE1\x8AO + // - 1byte == 0xe0 \x82̂Ƃ\xAB 0xa0 - 0xbf\x82̂\xDDok + // - 1byte == 0xed \x82̂Ƃ\xAB 0x80 - 0x9f\x82̂\xDDok + // - 1byte == 0xf0 \x82̂Ƃ\xAB 0x90 - 0xbf\x82̂\xDDok + // - 1byte == 0xf4 \x82̂Ƃ\xAB 0x90 - 0x8f\x82̂\xDDok +recheck: + // 1byte(7bit) + if (w->count == 0) { + if (IsC0(b)) { + // U+0000 .. U+001f + // C0\x90\xA7\x8C䕶\x8E\x9A, C0 Coontrols + ParseControl(b); + return TRUE; + } + else if (b <= 0x7f) { + // 0x7f\x88ȉ\xBA, \x82̂Ƃ\xAB\x81A\x82\xBB\x82̂܂o\x97\xCD + PutU32(b); + return TRUE; + } + else if (0xc2 <= b && b <= 0xf4) { + // 1byte\x96ڕۑ\xB6 + w->buf[w->count++] = b; + return TRUE; + } + + // 0x80 - 0xc1, 0xf5 - 0xff + // UTF-8\x82\xC51byte\x82ɏo\x8C\xBB\x82\xB5\x82Ȃ\xA2\x83R\x81[\x83h\x82̂Ƃ\xAB + if (ts.FallbackToCP932) { + // fallback\x82\xB7\x82\xE9\x8Fꍇ + if ((ts.Language == IdJapanese) && ismbbleadSJIS(b)) { + // \x93\xFA\x96{\x8C\xEA\x82̏ꍇ && Shift_JIS 1byte\x96\xDA + // Shift_JIS \x82\xC9 fallback + Fallbacked = TRUE; + ConvJIS = FALSE; + Kanji = b << 8; + KanjiIn = TRUE; + return TRUE; + } + // fallback ISO8859-1 + PutU32(b); + return TRUE; + } + else { + // fallback\x82\xB5\x82Ȃ\xA2, \x95s\x90\xB3\x82ȕ\xB6\x8E\x9A\x93\xFC\x97\xCD + w->buf[0] = b; + PutReplacementChr(w, w->buf, 1, FALSE); + } + return TRUE; + } + + // 2byte\x88ȍ~\x90\xB3\x8F\xED? + if((b & 0xc0) != 0x80) { // == (b <= 0x7f || 0xc0 <= b) + // \x95s\x90\xB3\x82ȕ\xB6\x8E\x9A, (\x8F\xE3\x88\xCA2bit\x82\xAA 0b10xx_xxxx \x82ł͂Ȃ\xA2) + PutReplacementChr(w, w->buf, w->count, ts.FallbackToCP932); + w->count = 0; + goto recheck; + } + + // 2byte\x96ڈȍ~\x95ۑ\xB6 + w->buf[w->count++] = b; + + // 2byte(11bit) + if (w->count == 2) { + if ((w->buf[0] & 0xe0) == 0xc0) { // == (0xc2 <= w->buf[0] && w->buf[0] <= 0xdf) + // 5bit + 6bit + code = ((w->buf[0] & 0x1f) << 6) | (b & 0x3f); + if (IsC1(code)) { + // U+0080 .. u+009f + // C1\x90\xA7\x8C䕶\x8E\x9A, C1 Controls + ParseControl((BYTE)code); + } + else { + PutU32(code); + } + w->count = 0; + return TRUE; + } + return TRUE; + } + + // 3byte(16bit) + if (w->count == 3) { + if ((w->buf[0] & 0xf0) == 0xe0) { + if ((w->buf[0] == 0xe0 && (w->buf[1] < 0xa0 || 0xbf < w->buf[1])) || + (w->buf[0] == 0xed && ( 0x9f < w->buf[1]))) { + // \x95s\x90\xB3\x82\xC8 UTF-8 + PutReplacementChr(w, w->buf, 2, ts.FallbackToCP932); + w->count = 0; + goto recheck; + } + // 4bit + 6bit + 6bit + code = ((w->buf[0] & 0xf) << 12); + code |= ((w->buf[1] & 0x3f) << 6); + code |= ((w->buf[2] & 0x3f)); + PutU32(code); + w->count = 0; + return TRUE; + } + return TRUE; + } + + // 4byte(21bit) + assert(w->count == 4); + assert((w->buf[0] & 0xf8) == 0xf0); + if ((w->buf[0] == 0xf0 && (w->buf[1] < 0x90 || 0x9f < w->buf[1])) || + (w->buf[0] == 0xf4 && (w->buf[1] < 0x80 || 0x8f < w->buf[1]))) { + // \x95s\x90\xB3\x82\xC8 UTF-8 + PutReplacementChr(w, w->buf, 3, ts.FallbackToCP932); + w->count = 0; + goto recheck; + } + // 3bit + 6bit + 6bit + 6bit + code = ((w->buf[0] & 0x07) << 18); + code |= ((w->buf[1] & 0x3f) << 12); + code |= ((w->buf[2] & 0x3f) << 6); + code |= (w->buf[3] & 0x3f); + PutU32(code); + w->count = 0; + return TRUE; +} + +static BOOL ParseFirstRus(BYTE b) +// returns if b is processed +{ + if (IsC0(b)) { + ParseControl(b); + return TRUE; + } + // CP1251\x82ɕϊ\xB7 + BYTE c = RussConv(ts.KanjiCode, IdWindows, b); + // CP1251->Unicode + unsigned long u32 = MBCP_UTF32(c, 1251); + PutU32(u32); + return TRUE; +} + +static BOOL ParseEnglish(BYTE b) +{ + unsigned short u16 = 0; + int part = KanjiCodeToISO8859Part(ts.KanjiCode); + int r = UnicodeFromISO8859(part, b, &u16); + if (r == 0) { + return FALSE; + } + if (u16 < 0x100) { + ParseASCII((BYTE)u16); + } + else { + PutU32(u16); + } + return TRUE; +} + +static void PutDebugChar(BYTE b) +{ + int i; + BOOL svInsertMode, svAutoWrapMode; + TCharAttr svCharAttr; + TCharAttr char_attr; + + svInsertMode = TermGetInsertMode(); + TermSetInsertMode(FALSE); + svAutoWrapMode = TermGetAutoWrapMode(); + TermSetAutoWrapMode(TRUE); + + TermGetAttr(&svCharAttr); + char_attr = svCharAttr; + char_attr.Attr = AttrDefault; + TermSetAttr(&char_attr); + + if (DebugFlag==DEBUG_FLAG_HEXD) { + char buff[3]; + _snprintf(buff, 3, "%02X", (unsigned int) b); + + for (i=0; i<2; i++) + PutChar(buff[i]); + PutChar(' '); + } + else if (DebugFlag==DEBUG_FLAG_NORM) { + + if ((b & 0x80) == 0x80) { + //UpdateStr(); + char_attr.Attr = AttrReverse; + TermSetAttr(&char_attr); + b = b & 0x7f; + } + + if (b<=US) { + PutChar('^'); + PutChar((char)(b+0x40)); + } + else if (b==DEL) { + PutChar('<'); + PutChar('D'); + PutChar('E'); + PutChar('L'); + PutChar('>'); + } + else + PutChar(b); + } + + TermSetAttr(&char_attr); + TermSetInsertMode(svInsertMode); + TermSetAutoWrapMode(svAutoWrapMode); +} + +void ParseFirst(BYTE b) +{ + WORD language = ts.Language; + if (DebugFlag != DEBUG_FLAG_NONE) { + language = IdDebug; + } + + switch (language) { + case IdUtf8: + ParseFirstUTF8(b); + return; + + case IdJapanese: + switch (ts.KanjiCode) { + case IdUTF8: + if (ParseFirstUTF8(b)) { + return; + } + break; + default: + if (ParseFirstJP(b)) { + return; + } + } + break; + + case IdKorean: + switch (ts.KanjiCode) { + case IdUTF8: + if (ParseFirstUTF8(b)) { + return; + } + break; + default: + if (ParseFirstKR(b)) { + return; + } + } + break; + + case IdRussian: + if (ParseFirstRus(b)) { + return; + } + break; + + case IdChinese: + switch (ts.KanjiCode) { + case IdUTF8: + if (ParseFirstUTF8(b)) { + return; + } + break; + default: + if (ParseFirstCn(b)) { + return; + } + } + break; + case IdEnglish: { + if (ParseEnglish(b)) { + return; + } + break; + } + case IdDebug: { + PutDebugChar(b); + return; + } + } + + if (SSflag) { + PutChar(b); + SSflag = FALSE; + return; + } + + if (b<=US) + ParseControl(b); + else if ((b>=0x20) && (b<=0x7E)) + PutChar(b); + else if ((b>=0x80) && (b<=0x9F)) + ParseControl(b); + else if (b>=0xA0) + PutChar(b); +} + +/** + * \x8Ew\x8E\xA6(Designate) + * + * @param Gn 0/1/2/3 = G0/G1/G2/G3 + * @param codeset IdASCII 0 + * IdKatakana 1 + * IdKanji 2 + * IdSpecial 3 + */ +void CharSet2022Designate(int gn, int cs) +{ + VttermKanjiWork *w = &KanjiWork; + w->Gn[gn] = cs; +} + +/** + * \x8CĂяo\x82\xB5(Invoke) + * @param glr 0/1 = GL/GR (Locking shift\x8E\x9E\x82̂ݗL\x8C\xF8) + * @param gn 0/1/2/3 = G0/G1/G2/G3 + * @param single_shift FALSE Locking shift + * TRUE Single shift + */ +void CharSet2022Invoke(int glr, int gn, BOOL single_shift) +{ + VttermKanjiWork *w = &KanjiWork; + if (single_shift == FALSE) { + // Locking shift + w->Glr[glr] = gn; + } + else { + // Single shift + GLtmp = gn; + SSflag = TRUE; + } +} + +/** + * DEC\x93\xC1\x8E\xEA\x83t\x83H\x83\x93\x83g(Tera Special font) + * 0140(0x60) ... 0176(0x7f) \x82Ɍr\x90\xFC\x82ŃA\x83T\x83C\x83\x93\x82\xB3\x82\xEA\x82Ă\xA2\x82\xE9 + * (0xe0) ... (0xff) \x82\xE0? + * <ESC>(0 \x82Ƃ\xA2\x82\xA4\x93\xC1\x8E\xEA\x82ȃG\x83X\x83P\x81[\x83v\x83V\x81[\x83P\x83\x93\x83X\x82Œ\xE8\x8B` + * about/emulations.html + * + * @param b \x83R\x81[\x83h + * @retval TRUE IdSpecial + * @retval FALSE IdSpecial\x82ł͂Ȃ\xA2 + */ +BOOL CharSetIsSpecial(BYTE b) +{ + VttermKanjiWork *w = &KanjiWork; + BOOL SpecialNew = FALSE; + + if ((b>0x5F) && (b<0x80)) { + if (SSflag) + SpecialNew = (w->Gn[GLtmp]==IdSpecial); + else + SpecialNew = (w->Gn[w->Glr[0]]==IdSpecial); + } + else if (b>0xDF) { + if (SSflag) + SpecialNew = (w->Gn[GLtmp]==IdSpecial); + else + SpecialNew = (w->Gn[w->Glr[1]]==IdSpecial); + } + + return SpecialNew; +} + +static void CharSetSaveStateLow(CharSetState *state, const VttermKanjiWork *w) +{ + int i; + state->infos[0] = w->Glr[0]; + state->infos[1] = w->Glr[1]; + for (i=0 ; i<=3; i++) { + state->infos[2 + i] = w->Gn[i]; + } +} + +/** + * \x8F\xF3\x91Ԃ\xF0\x95ۑ\xB6\x82\xB7\x82\xE9 + */ +void CharSetSaveState(CharSetState *state) +{ + VttermKanjiWork *w = &KanjiWork; + CharSetSaveStateLow(state, w); +} + +/** + * \x8F\xF3\x91ԂA\x82\xB7\x82\xE9 + */ +void CharSetLoadState(const CharSetState *state) +{ + VttermKanjiWork *w = &KanjiWork; + int i; + w->Glr[0] = state->infos[0]; + w->Glr[1] = state->infos[1]; + for (i=0 ; i<=3; i++) { + w->Gn[i] = state->infos[2 + i]; + } +} + +/** + * \x83t\x83H\x81[\x83\x8B\x83o\x83b\x83N\x82̏I\x97\xB9 + * \x8E\xF3\x90M\x83f\x81[\x83^UTF-8\x8E\x9E\x82ɁAShift_JIS\x8Fo\x97͒\x86(fallback\x8F\xF3\x91\xD4)\x82𒆒f\x82\xB7\x82\xE9 + * + */ +void CharSetFallbackFinish(void) +{ + Fallbacked = FALSE; +} + +/** + * \x83f\x83o\x83O\x8Fo\x97͂\xF0\x8E\x9F\x82̃\x82\x81[\x83h\x82ɕύX\x82\xB7\x82\xE9 + */ +void CharSetSetNextDebugMode(void) +{ + // ts.DebugModes \x82ɂ\xCD tttypes.h \x82\xCC DBGF_* \x82\xAA OR \x82œ\xFC\x82\xC1\x82Ă\xE9 + do { + DebugFlag = (DebugFlag + 1) % DEBUG_FLAG_MAXD; + } while (DebugFlag != DEBUG_FLAG_NONE && !((ts.DebugModes >> (DebugFlag - 1)) & 1)); +} + +BYTE CharSetGetDebugMode(void) +{ + return DebugFlag; +} + +void CharSetSetDebugMode(BYTE mode) +{ + DebugFlag = mode; +} Modified: trunk/teraterm/teraterm/charset.h =================================================================== --- trunk/teraterm/teraterm/charset.h 2023-06-20 14:22:45 UTC (rev 10774) +++ trunk/teraterm/teraterm/charset.h 2023-06-20 14:22:56 UTC (rev 10775) @@ -26,6 +26,10 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#ifdef __cplusplus +extern "C" { +#endif + typedef struct { int infos[6]; } CharSetState; @@ -57,3 +61,7 @@ void CharSetSetNextDebugMode(void); //BYTE CharSetGetDebugMode(void); void CharSetSetDebugMode(BYTE mode); + +#ifdef __cplusplus +} +#endif Modified: trunk/teraterm/teraterm/ttermpro.v16.vcxproj =================================================================== --- trunk/teraterm/teraterm/ttermpro.v16.vcxproj 2023-06-20 14:22:45 UTC (rev 10774) +++ trunk/teraterm/teraterm/ttermpro.v16.vcxproj 2023-06-20 14:22:56 UTC (rev 10775) @@ -160,7 +160,7 @@ <ClCompile Include="addsetting.cpp" /> <ClCompile Include="broadcast.cpp" /> <ClCompile Include="buffer.c" /> - <ClCompile Include="charset.c" /> + <ClCompile Include="charset.cpp" /> <ClCompile Include="checkeol.cpp" /> <ClCompile Include="clipboar.c" /> <ClCompile Include="coding_pp.cpp" /> @@ -334,4 +334,4 @@ <UserProperties RESOURCE_FILE="ttermpro.rc" /> </VisualStudio> </ProjectExtensions> -</Project> \ No newline at end of file +</Project> Modified: trunk/teraterm/teraterm/ttermpro.v16.vcxproj.filters =================================================================== --- trunk/teraterm/teraterm/ttermpro.v16.vcxproj.filters 2023-06-20 14:22:45 UTC (rev 10774) +++ trunk/teraterm/teraterm/ttermpro.v16.vcxproj.filters 2023-06-20 14:22:56 UTC (rev 10775) @@ -225,7 +225,7 @@ <ClCompile Include="scp.cpp"> <Filter>Source Files</Filter> </ClCompile> - <ClCompile Include="charset.c"> + <ClCompile Include="charset.cpp"> <Filter>Source Files</Filter> </ClCompile> </ItemGroup> @@ -526,4 +526,4 @@ <Filter>Header Files</Filter> </ClInclude> </ItemGroup> -</Project> \ No newline at end of file +</Project> Modified: trunk/teraterm/teraterm/ttermpro.v17.vcxproj =================================================================== --- trunk/teraterm/teraterm/ttermpro.v17.vcxproj 2023-06-20 14:22:45 UTC (rev 10774) +++ trunk/teraterm/teraterm/ttermpro.v17.vcxproj 2023-06-20 14:22:56 UTC (rev 10775) @@ -160,7 +160,7 @@ <ClCompile Include="addsetting.cpp" /> <ClCompile Include="broadcast.cpp" /> <ClCompile Include="buffer.c" /> - <ClCompile Include="charset.c" /> + <ClCompile Include="charset.cpp" /> <ClCompile Include="checkeol.cpp" /> <ClCompile Include="clipboar.c" /> <ClCompile Include="coding_pp.cpp" /> @@ -334,4 +334,4 @@ <UserProperties RESOURCE_FILE="ttermpro.rc" /> </VisualStudio> </ProjectExtensions> -</Project> \ No newline at end of file +</Project> Modified: trunk/teraterm/teraterm/ttermpro.v17.vcxproj.filters =================================================================== --- trunk/teraterm/teraterm/ttermpro.v17.vcxproj.filters 2023-06-20 14:22:45 UTC (rev 10774) +++ trunk/teraterm/teraterm/ttermpro.v17.vcxproj.filters 2023-06-20 14:22:56 UTC (rev 10775) @@ -225,7 +225,7 @@ <ClCompile Include="scp.cpp"> <Filter>Source Files</Filter> </ClCompile> - <ClCompile Include="charset.c"> + <ClCompile Include="charset.cpp"> <Filter>Source Files</Filter> </ClCompile> </ItemGroup> @@ -526,4 +526,4 @@ <Filter>Header Files</Filter> </ClInclude> </ItemGroup> -</Project> \ No newline at end of file +</Project> Modified: trunk/teraterm/teraterm/vtdisp.c =================================================================== --- trunk/teraterm/teraterm/vtdisp.c 2023-06-20 14:22:45 UTC (rev 10774) +++ trunk/teraterm/teraterm/vtdisp.c 2023-06-20 14:22:56 UTC (rev 10775) @@ -1411,10 +1411,10 @@ */ static void DrawBox(HDC hdc, int sx, int sy, int width, int height, COLORREF rgb) { + HPEN red_pen = CreatePen(PS_SOLID, 0, rgb); + HGDIOBJ old_pen = SelectObject(hdc, red_pen); width--; height--; - HPEN red_pen = CreatePen(PS_SOLID, 0, rgb); - HGDIOBJ old_pen = SelectObject(hdc, red_pen); MoveToEx(hdc, sx, sy, NULL); LineTo(hdc, sx + width, sy); LineTo(hdc, sx + width, sy + height); Modified: trunk/teraterm/teraterm/vtterm.c =================================================================== --- trunk/teraterm/teraterm/vtterm.c 2023-06-20 14:22:45 UTC (rev 10774) +++ trunk/teraterm/teraterm/vtterm.c 2023-06-20 14:22:56 UTC (rev 10775) @@ -758,9 +758,9 @@ unsigned short cset; int LineEnd; - LastPutCharacter = code; TCharAttr CharAttrTmp; CharAttrTmp = CharAttr; + LastPutCharacter = code; { int r;