(empty log message)
@@ -0,0 +1,94 @@ | ||
1 | += pykf: Kanji code filter = | |
2 | + 2002/3/3 Atsuo Ishimoto<ishimoto@gembook.org> | |
3 | +------------------------------------------------------------------------ | |
4 | + | |
5 | +pykfハ、ShiftJIS, EUC-JP, JISコードヲ相互ニ変換スルタメノモジュールデス。 | |
6 | + | |
7 | +== インストール方法 == | |
8 | + | |
9 | +python setup.py install | |
10 | + | |
11 | +== リファレンス == | |
12 | + | |
13 | +SJIS, EUC, JIS, UNKNWON: | |
14 | + 変換元ノエンコーディングヲ指定スル際ニ使用シマス。UNKNOWNヲ指定スルト、 | |
15 | + 入力文字列カラエンコーディングヲ推定シテ変換シマス。 | |
16 | + | |
17 | +tojis(encoding, s): | |
18 | + 入力文字列sヲJISニ変換シマス。 | |
19 | + | |
20 | +toeuc(encoding, s): | |
21 | + 入力文字列sヲEUC-JPニ変換シマス。 | |
22 | + | |
23 | +tosjis(encoding, s): | |
24 | + 入力文字列sヲShiftJISニ変換シマス。 | |
25 | + | |
26 | +== sample == | |
27 | + | |
28 | +import pykf | |
29 | +s = "アイウエオ" | |
30 | +s1 = pykf.tojis(pykf.UNKNOWN, s) | |
31 | +s2 = pykf.tosjis(pykf.JIS, s1) | |
32 | +assert(s2 == s) | |
33 | + | |
34 | + | |
35 | +== 実装メモ == | |
36 | + | |
37 | +pykfハ、マイクロソフト漢字コードノ拡張文字ヲ変換スルタメ、JIS X 0213ヲ | |
38 | +中途半端ニサポートシテイマス。規格票ヲ見ナイデ書イテマスノデ、問題点モ | |
39 | +多イカト思イマスガ... | |
40 | +尚、JIS X 0213ノコード表ハ、JISX0213 InfoCenter | |
41 | + http://www.jca.apc.org/~earthian/aozora/0213.html | |
42 | +ノデータヲ使ワセテイタダキマシタ。 | |
43 | + | |
44 | + | |
45 | +・SJISノIBM拡張漢字等ハ、主要WebブラウザデアルInternet Explorer/Mozillaニ | |
46 | + 準拠シ、JIS X 0213 1面(第三水準)ニ変換シマス。コノタメ、OSF 日本ベンダ | |
47 | + 協議会ノ「日本語 EUC ・シフト JIS 間コード変換仕様トコード系 実態調査」 | |
48 | + (*1) ニ準拠シタライブラリ・アプリケーションナドトハ変換結果ガ異ナリマス。 | |
49 | + | |
50 | +・EUC/JIS->SJIS変換時、NEC選定IBM拡張文字ハ使用セズ、スベテIBM拡張文字ト | |
51 | + シテ変換サレマス。 | |
52 | + | |
53 | +・SJIS/EUC->JIS変換時、JIS X 0208デ定義サレタ文字ハ、JIS X 0213 附属書2 | |
54 | + 4.1(e)デ禁止サレテイル文字デアッテモ、ESC $ B デ呼ビ出シマス。 | |
55 | + | |
56 | +・第四水準ノ変換ハサポートシテイマセン。 | |
57 | + | |
58 | +・ユーザ定義外字(SJIS 0xF040-0xF9FC)ハ、全テ'〓'ニ変換シマス。 | |
59 | + | |
60 | +(*1) http://www.opengroup.or.jp/jvc/cde/sjis-euc.html | |
61 | + | |
62 | + | |
63 | +== バージョン情報 == | |
64 | +2002/03/02 0.1.0 初期公開 | |
65 | + | |
66 | + | |
67 | +== ライセンス == | |
68 | +Japanese Kanji filter module | |
69 | + Copyright (c) 2002, Atsuo Ishimoto. All rights reserved. | |
70 | + | |
71 | +Permission to use, copy, modify, and distribute this software and its | |
72 | +documentation for any purpose and without fee is hereby granted, provided that | |
73 | +the above copyright notice appear in all copies and that both that copyright | |
74 | +notice and this permission notice appear in supporting documentation, and | |
75 | +that the name of Atsuo Ishimoto not be used in advertising or publicity | |
76 | +pertaining to distribution of the software without specific, written prior | |
77 | +permission. | |
78 | + | |
79 | +ATSUO ISHIMOTO DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, | |
80 | +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO | |
81 | +EVENT SHALL ATSUO ISHIMOTO BE LIABLE FOR ANY SPECIAL, INDIRECT OR | |
82 | +CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF | |
83 | +USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR | |
84 | +OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
85 | +PERFORMANCE OF THIS SOFTWARE. | |
86 | + | |
87 | +--------------------------------------------------------------------- | |
88 | +This module is besed on kf.c written by Haruhiko Okumura. | |
89 | + Copyright (c) 1995-2000 Haruhiko Okumura | |
90 | + This file may be freely modified/redistributed. | |
91 | + | |
92 | +Original kf.c: | |
93 | + http://www.matsusaka-u.ac.jp/~okumura/kf.html | |
94 | + |
@@ -0,0 +1,221 @@ | ||
1 | +from __future__ import nested_scopes | |
2 | + | |
3 | +import unittest | |
4 | +from pykf import * | |
5 | + | |
6 | +def readfile(fname): | |
7 | + lines = [l for l in open(fname).readlines() if l[0:1] != '#'] | |
8 | + sjis = [int(l.split(",")[0], 16) for l in lines] | |
9 | + sjis = "".join([chr(s >> 8)+chr(s & 0xff) for s in sjis]) | |
10 | + euc = [int(l.split(",")[1], 16) for l in lines] | |
11 | + euc = "".join([chr(s >> 8)+chr(s & 0xff) for s in euc]) | |
12 | + | |
13 | + return sjis, euc | |
14 | + | |
15 | +class test_kf(unittest.TestCase): | |
16 | + | |
17 | + def readtbl(self, fname): | |
18 | + f = open(fname) | |
19 | + lines = [l for l in f.readlines() if l[0:1] != '#'] | |
20 | + sjis = [int(l.split(",")[0], 16) for l in lines] | |
21 | + sjis = "".join([chr(s >> 8)+chr(s & 0xff) for s in sjis]) | |
22 | + euc = [int(l.split(",")[1], 16) for l in lines] | |
23 | + euc = "".join([chr(s >> 8)+chr(s & 0xff) for s in euc]) | |
24 | + return sjis, euc | |
25 | + | |
26 | + def conv(self, sjis): | |
27 | + euc1 = toeuc(sjis) | |
28 | + jis1 = tojis(sjis) | |
29 | + euc2 = toeuc(jis1) | |
30 | + jis2 = tojis(euc1) | |
31 | + sjis1 = tosjis(jis1) | |
32 | + sjis2 = tosjis(euc1) | |
33 | + | |
34 | + for i in range(0, len(sjis), 2): | |
35 | + s = sjis[i:i+2] | |
36 | + e1 = sjis1[i:i+2] | |
37 | + if s != e1: | |
38 | + print "%s(%x%x) %s(%x%x)" % (s, ord(s[0]), ord(s[1]), e1, ord(e1[0]), ord(e1[1])) | |
39 | + | |
40 | + | |
41 | + assert sjis==sjis1 | |
42 | + assert sjis1==sjis2 | |
43 | + assert (max(sjis) < '\x80') or sjis2 != euc1 | |
44 | + assert (max(sjis) < '\x80') or sjis2 != jis1 | |
45 | + assert euc1==euc2 | |
46 | + assert (max(sjis) < '\x80') or euc1 !=jis1 | |
47 | + assert jis1==jis2 | |
48 | + | |
49 | + assert (max(sjis) < '\x80') or guess(sjis1) == SJIS | |
50 | + assert (max(sjis) < '\x80') or guess(euc1) == EUC | |
51 | + assert (max(sjis) < '\x80') or guess(jis1) == JIS | |
52 | + | |
53 | + | |
54 | + def testBasic(self): | |
55 | + sjis = open("../readme.sjis").read() | |
56 | + self.conv(sjis) | |
57 | + | |
58 | + def testHankana(self): | |
59 | + sjis = open("hankana.txt").read() | |
60 | + self.conv(sjis) | |
61 | + | |
62 | + def testNEC(self): | |
63 | + sjis, euc = self.readtbl("../misc/nectoeuc.txt") | |
64 | + assert toeuc(sjis) == euc | |
65 | + assert toeuc(tojis(sjis)) == euc | |
66 | + assert tosjis(euc) == sjis | |
67 | + | |
68 | + def testNECIBM(self): | |
69 | + sjis, euc = self.readtbl("../misc/necibmtoeuc.txt") | |
70 | + assert toeuc(sjis) == euc | |
71 | + assert toeuc(tojis(sjis)) == euc | |
72 | + assert tosjis(euc) == sjis | |
73 | + | |
74 | + def testIBM(self): | |
75 | + sjis, euc = self.readtbl("../misc/ibmtoeuc.txt") | |
76 | + assert toeuc(sjis) == euc | |
77 | + assert toeuc(tojis(sjis)) == euc | |
78 | + assert tosjis(euc) != sjis | |
79 | + assert unicode(tosjis(euc), "japanese.ms932") == unicode(sjis, "japanese.ms932") | |
80 | + | |
81 | + def testGaiji(self): | |
82 | + sjis = "".join([chr(x)+chr(y) for x in range(0xf0, 0xfa) for y in range(0x40, 0x7e)]) | |
83 | + assert tosjis(toeuc(sjis)) == "\x81\xac" * (len(sjis)/2) | |
84 | + assert tosjis(tojis(sjis)) == "\x81\xac" * (len(sjis)/2) | |
85 | + | |
86 | + sjis = "".join([chr(x)+chr(y) for x in range(0xf0, 0xfa) for y in range(0x80, 0xfd)]) | |
87 | + assert tosjis(toeuc(sjis)) == "\x81\xac" * (len(sjis)/2) | |
88 | + assert tosjis(tojis(sjis)) == "\x81\xac" * (len(sjis)/2) | |
89 | + | |
90 | + def testUtf8(self): | |
91 | + utf8 = "\xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a" | |
92 | + assert guess(utf8) == UTF8 | |
93 | + assert guess("\xef\xbb\xbf") == UTF8 | |
94 | + | |
95 | + def testJisNormalize(self): | |
96 | + sjis = "\x82\xa0" | |
97 | + jis = tojis(sjis, SJIS) | |
98 | + assert jis[-3:] == '\x1b(B' | |
99 | + assert tosjis(jis, JIS) == sjis | |
100 | + | |
101 | + euc = toeuc("\x82\xa0", SJIS) | |
102 | + jis = tojis(euc, EUC) | |
103 | + assert jis[-3:] == '\x1b(B' | |
104 | + assert toeuc(jis, JIS) == euc | |
105 | + | |
106 | +class test_zerolen(unittest.TestCase): | |
107 | + def test_zerolen(self): | |
108 | + src = "" | |
109 | + assert tosjis(src) == "" | |
110 | + assert toeuc(src) == "" | |
111 | + assert tojis(src) == "" | |
112 | + | |
113 | + assert tosjis(src, EUC) == "" | |
114 | + assert tosjis(src, JIS) == "" | |
115 | + assert tosjis("\x1b(I", JIS) == "" | |
116 | + assert toeuc(src, SJIS) == "" | |
117 | + assert toeuc(src, JIS) == "" | |
118 | + assert toeuc("\x1b(I", JIS) == "" | |
119 | + assert tojis(src, SJIS) == "" | |
120 | + assert tojis(src, EUC) == "" | |
121 | + | |
122 | +class test_split(unittest.TestCase): | |
123 | + def test_split(self): | |
124 | + ascii = "abcdefg" | |
125 | + sjis = "abc\x82\xa0\x82\xa1\x82\xa2\xb1\xb2\xb3abc\x82\xa0" | |
126 | + | |
127 | + assert "".join(split(ascii)) == ascii | |
128 | + assert "".join(split(sjis)) == sjis | |
129 | + assert "".join(split(toeuc(sjis))) == toeuc(sjis) | |
130 | + assert "".join(split(tojis(sjis))) == tojis(sjis) | |
131 | + | |
132 | + | |
133 | +class test_tohalf(unittest.TestCase): | |
134 | + sjis = 'abc\x83A\x83C\x83E\x83G\x83I\x83K\x83M\x83O\x83Q\x83S\x82`\x82a\x82b' | |
135 | + sjis_half = 'abc\xb1\xb2\xb3\xb4\xb5\xb6\xde\xb7\xde\xb8\xde\xb9\xde\xba\xde\x82`\x82a\x82b' | |
136 | + all_half = '\xa1\xa2\xa3\xa4\xa5\xa7\xb1\xa8\xb2\xa9\xb3\xaa\xb4\xab\xb5\xb6\xb6\xde\xb7\xb7\xde\xb8\xb8\xde\xb9\xb9\xde\xba\xba\xde\xbb\xbb\xde\xbc\xbc\xde\xbd\xbd\xde\xbe\xbe\xde\xbf\xbf\xde\xc0\xc0\xde\xc1\xc1\xde\xaf\xc2\xc2\xde\xc3\xc3\xde\xc4\xc4\xde\xc5\xc6\xc7\xc8\xc9\xca\xca\xde\xca\xdf\xcb\xcb\xde\xcb\xdf\xcc\xcc\xde\xcc\xdf\xcd\xcd\xde\xcd\xdf\xce\xce\xde\xce\xdf\xcf\xd0\xd1\xd2\xd3\xac\xd4\xad\xd5\xae\xd6\xd7\xd8\xd9\xda\xdb\x83\x8e\xdc\x83\x90\x83\x91\xa6\xdd\xb3\xde\x83\x95\xb0' | |
137 | + all_full = "\x81B\x81u\x81v\x81A\x81E\x83@\x83A\x83B\x83C\x83D\x83E\x83F\x83G\x83H\x83I\x83J\x83K\x83L\x83M\x83N\x83O\x83P\x83Q\x83R\x83S\x83T\x83U\x83V\x83W\x83X\x83Y\x83Z\x83[\x83\\\x83]\x83^\x83_\x83`\x83a\x83b\x83c\x83d\x83e\x83f\x83g\x83h\x83i\x83j\x83k\x83l\x83m\x83n\x83o\x83p\x83q\x83r\x83s\x83t\x83u\x83v\x83w\x83x\x83y\x83z\x83{\x83|\x83}\x83~\x83\x80\x83\x81\x83\x82\x83\x83\x83\x84\x83\x85\x83\x86\x83\x87\x83\x88\x83\x89\x83\x8a\x83\x8b\x83\x8c\x83\x8d\x83\x8e\x83\x8f\x83\x90\x83\x91\x83\x92\x83\x93\x83\x94\x83\x95\x81[" | |
138 | + | |
139 | + def test_sjis(self): | |
140 | + assert tohalf_kana(self.sjis, SJIS) == self.sjis_half | |
141 | + assert tohalf_kana(self.all_full, SJIS) == self.all_half | |
142 | + | |
143 | + def test_euc(self): | |
144 | + e = toeuc(self.sjis, SJIS) | |
145 | + assert tohalf_kana(e, EUC) == toeuc(self.sjis_half, SJIS) | |
146 | + | |
147 | + e = toeuc(self.all_full, SJIS) | |
148 | + assert tohalf_kana(e, EUC) == toeuc(self.all_half, SJIS) | |
149 | + | |
150 | + | |
151 | +class test_tofull(unittest.TestCase): | |
152 | + sjis = 'abc\x83A\x83C\x83E\x83G\x83I\x83K\x83M\x83O\x83Q\x83S\x82`\x82a\x82b' | |
153 | + sjis_half = 'abc\xb1\xb2\xb3\xb4\xb5\xb6\xde\xb7\xde\xb8\xde\xb9\xde\xba\xde\x82`\x82a\x82b' | |
154 | + all_half = '\xa1\xa2\xa3\xa4\xa5\xa7\xb1\xa8\xb2\xa9\xb3\xaa\xb4\xab\xb5\xb6\xb6\xde\xb7\xb7\xde\xb8\xb8\xde\xb9\xb9\xde\xba\xba\xde\xbb\xbb\xde\xbc\xbc\xde\xbd\xbd\xde\xbe\xbe\xde\xbf\xbf\xde\xc0\xc0\xde\xc1\xc1\xde\xaf\xc2\xc2\xde\xc3\xc3\xde\xc4\xc4\xde\xc5\xc6\xc7\xc8\xc9\xca\xca\xde\xca\xdf\xcb\xcb\xde\xcb\xdf\xcc\xcc\xde\xcc\xdf\xcd\xcd\xde\xcd\xdf\xce\xce\xde\xce\xdf\xcf\xd0\xd1\xd2\xd3\xac\xd4\xad\xd5\xae\xd6\xd7\xd8\xd9\xda\xdb\x83\x8e\xdc\x83\x90\x83\x91\xa6\xdd\xb3\xde\x83\x95\xb0' | |
155 | + all_full = "\x81B\x81u\x81v\x81A\x81E\x83@\x83A\x83B\x83C\x83D\x83E\x83F\x83G\x83H\x83I\x83J\x83K\x83L\x83M\x83N\x83O\x83P\x83Q\x83R\x83S\x83T\x83U\x83V\x83W\x83X\x83Y\x83Z\x83[\x83\\\x83]\x83^\x83_\x83`\x83a\x83b\x83c\x83d\x83e\x83f\x83g\x83h\x83i\x83j\x83k\x83l\x83m\x83n\x83o\x83p\x83q\x83r\x83s\x83t\x83u\x83v\x83w\x83x\x83y\x83z\x83{\x83|\x83}\x83~\x83\x80\x83\x81\x83\x82\x83\x83\x83\x84\x83\x85\x83\x86\x83\x87\x83\x88\x83\x89\x83\x8a\x83\x8b\x83\x8c\x83\x8d\x83\x8e\x83\x8f\x83\x90\x83\x91\x83\x92\x83\x93\x83\x94\x83\x95\x81[" | |
156 | + | |
157 | + def test_sjis(self): | |
158 | +# print tofull_kana(self.sjis_half, SJIS) | |
159 | + assert tofull_kana(self.sjis_half, SJIS) == self.sjis | |
160 | + assert tofull_kana(self.all_half, SJIS) == self.all_full | |
161 | + | |
162 | + def test_euc(self): | |
163 | + e = toeuc(self.sjis_half, SJIS) | |
164 | + assert tofull_kana(e, EUC) == toeuc(self.sjis, SJIS) | |
165 | + | |
166 | + e = toeuc(self.all_half, SJIS) | |
167 | + assert tofull_kana(e, EUC) == toeuc(self.all_full, SJIS) | |
168 | + | |
169 | +class test_strict(unittest.TestCase): | |
170 | + def test_sjis(self): | |
171 | + s1 = "あいうえお" | |
172 | + assert guess(s1, True) == SJIS | |
173 | + assert guess(s1, False) == SJIS | |
174 | + s2 = "あいうえおかきくけこ"*1000 + "\xf0\x01" | |
175 | + assert guess(s2, False) == SJIS | |
176 | + assert guess(s2, True) == ERROR | |
177 | + | |
178 | + def test_euc(self): | |
179 | + s1 = toeuc("あいうえお", SJIS) | |
180 | + assert guess(s1, True) == EUC | |
181 | + assert guess(s1, False) == EUC | |
182 | + s2 = toeuc("あいうえおかきくけこ"*1000 + "\xf0\x01", SJIS) | |
183 | + assert guess(s2, False) == EUC | |
184 | + assert guess(s2, True) == ERROR | |
185 | + | |
186 | + def test_jis(self): | |
187 | + s1 = tojis("あいうえお", SJIS) | |
188 | + assert guess(s1, True) == JIS | |
189 | + assert guess(s1, False) == JIS | |
190 | + s2 = tojis("あいうえおかきくけこ" + "\xf0\x01", SJIS) | |
191 | + assert guess(s2, False) == UNKNOWN | |
192 | + assert guess(s2, True) == ERROR | |
193 | + | |
194 | + def test_flag(self): | |
195 | + setstrict(True) | |
196 | + assert getstrict() | |
197 | + | |
198 | + setstrict(False) | |
199 | + assert not getstrict() | |
200 | + | |
201 | + s2 = "あいうえおかきくけこ"*1000 + "\xf0\x01" | |
202 | + assert guess(s2) == SJIS | |
203 | + setstrict(True) | |
204 | + assert guess(s2) == ERROR | |
205 | + setstrict(False) | |
206 | + | |
207 | + | |
208 | +class test_j0208(unittest.TestCase): | |
209 | + def test_sjis(self): | |
210 | + s1 = "?" | |
211 | + | |
212 | + assert tojis(s1, SJIS, j0208=False) == '\x1b$(O-j\x1b(B' | |
213 | + assert tojis(s1, SJIS, j0208=True) == '\x1b$B-j\x1b(B' | |
214 | + | |
215 | + assert tosjis(tojis(s1, SJIS, j0208=False)) == s1 | |
216 | + assert tosjis(tojis(s1, SJIS, j0208=True)) == s1 | |
217 | + | |
218 | +if __name__ == '__main__': | |
219 | + unittest.main() | |
220 | + | |
221 | + |
@@ -0,0 +1,397 @@ | ||
1 | +IBM = [ | |
2 | +"?", # 0xfa40 | |
3 | +"?", # 0xfa41 | |
4 | +"?", # 0xfa42 | |
5 | +"?", # 0xfa43 | |
6 | +"?", # 0xfa44 | |
7 | +"?", # 0xfa45 | |
8 | +"?", # 0xfa46 | |
9 | +"?", # 0xfa47 | |
10 | +"?", # 0xfa48 | |
11 | +"?", # 0xfa49 | |
12 | +"?", # 0xfa4a | |
13 | +"?", # 0xfa4b | |
14 | +"?", # 0xfa4c | |
15 | +"?", # 0xfa4d | |
16 | +"?", # 0xfa4e | |
17 | +"?", # 0xfa4f | |
18 | +"?", # 0xfa50 | |
19 | +"?", # 0xfa51 | |
20 | +"?", # 0xfa52 | |
21 | +"?", # 0xfa53 | |
22 | +"?", # 0xfa54 | |
23 | +"?", # 0xfa55 | |
24 | +"?", # 0xfa56 | |
25 | +"?", # 0xfa57 | |
26 | +"?", # 0xfa58 | |
27 | +"?", # 0xfa59 | |
28 | +"?", # 0xfa5a | |
29 | +"?", # 0xfa5b | |
30 | +"?", # 0xfa5c | |
31 | +"?", # 0xfa5d | |
32 | +"?", # 0xfa5e | |
33 | +"?", # 0xfa5f | |
34 | +"?", # 0xfa60 | |
35 | +"?", # 0xfa61 | |
36 | +"?", # 0xfa62 | |
37 | +"?", # 0xfa63 | |
38 | +"?", # 0xfa64 | |
39 | +"?", # 0xfa65 | |
40 | +"?", # 0xfa66 | |
41 | +"?", # 0xfa67 | |
42 | +"?", # 0xfa68 | |
43 | +"?", # 0xfa69 | |
44 | +"?", # 0xfa6a | |
45 | +"?", # 0xfa6b | |
46 | +"?", # 0xfa6c | |
47 | +"?", # 0xfa6d | |
48 | +"?", # 0xfa6e | |
49 | +"?", # 0xfa6f | |
50 | +"?", # 0xfa70 | |
51 | +"?", # 0xfa71 | |
52 | +"?", # 0xfa72 | |
53 | +"?", # 0xfa73 | |
54 | +"?", # 0xfa74 | |
55 | +"?", # 0xfa75 | |
56 | +"?", # 0xfa76 | |
57 | +"?", # 0xfa77 | |
58 | +"?", # 0xfa78 | |
59 | +"?", # 0xfa79 | |
60 | +"?", # 0xfa7a | |
61 | +"?", # 0xfa7b | |
62 | +"?", # 0xfa7c | |
63 | +"?", # 0xfa7d | |
64 | +"?", # 0xfa7e | |
65 | +"?", # 0xfa80 | |
66 | +"?", # 0xfa81 | |
67 | +"?", # 0xfa82 | |
68 | +"?", # 0xfa83 | |
69 | +"?", # 0xfa84 | |
70 | +"?", # 0xfa85 | |
71 | +"?", # 0xfa86 | |
72 | +"?", # 0xfa87 | |
73 | +"?", # 0xfa88 | |
74 | +"?", # 0xfa89 | |
75 | +"?", # 0xfa8a | |
76 | +"?", # 0xfa8b | |
77 | +"?", # 0xfa8c | |
78 | +"?", # 0xfa8d | |
79 | +"?", # 0xfa8e | |
80 | +"?", # 0xfa8f | |
81 | +"?", # 0xfa90 | |
82 | +"?", # 0xfa91 | |
83 | +"?", # 0xfa92 | |
84 | +"?", # 0xfa93 | |
85 | +"?", # 0xfa94 | |
86 | +"?", # 0xfa95 | |
87 | +"?", # 0xfa96 | |
88 | +"?", # 0xfa97 | |
89 | +"?", # 0xfa98 | |
90 | +"?", # 0xfa99 | |
91 | +"?", # 0xfa9a | |
92 | +"?", # 0xfa9b | |
93 | +"?", # 0xfa9c | |
94 | +"?", # 0xfa9d | |
95 | +"?", # 0xfa9e | |
96 | +"?", # 0xfa9f | |
97 | +"?", # 0xfaa0 | |
98 | +"?", # 0xfaa1 | |
99 | +"?", # 0xfaa2 | |
100 | +"?", # 0xfaa3 | |
101 | +"?", # 0xfaa4 | |
102 | +"?", # 0xfaa5 | |
103 | +"?", # 0xfaa6 | |
104 | +"?", # 0xfaa7 | |
105 | +"?", # 0xfaa8 | |
106 | +"?", # 0xfaa9 | |
107 | +"?", # 0xfaaa | |
108 | +"?", # 0xfaab | |
109 | +"?", # 0xfaac | |
110 | +"?", # 0xfaad | |
111 | +"?", # 0xfaae | |
112 | +"?", # 0xfaaf | |
113 | +"?", # 0xfab0 | |
114 | +"?", # 0xfab1 | |
115 | +"?", # 0xfab2 | |
116 | +"?", # 0xfab3 | |
117 | +"?", # 0xfab4 | |
118 | +"?", # 0xfab5 | |
119 | +"?", # 0xfab6 | |
120 | +"?", # 0xfab7 | |
121 | +"?", # 0xfab8 | |
122 | +"?", # 0xfab9 | |
123 | +"?", # 0xfaba | |
124 | +"?", # 0xfabb | |
125 | +"?", # 0xfabc | |
126 | +"?", # 0xfabd | |
127 | +"?", # 0xfabe | |
128 | +"?", # 0xfabf | |
129 | +"?", # 0xfac0 | |
130 | +"?", # 0xfac1 | |
131 | +"?", # 0xfac2 | |
132 | +"?", # 0xfac3 | |
133 | +"?", # 0xfac4 | |
134 | +"?", # 0xfac5 | |
135 | +"?", # 0xfac6 | |
136 | +"?", # 0xfac7 | |
137 | +"?", # 0xfac8 | |
138 | +"?", # 0xfac9 | |
139 | +"?", # 0xfaca | |
140 | +"?", # 0xfacb | |
141 | +"?", # 0xfacc | |
142 | +"?", # 0xfacd | |
143 | +"?", # 0xface | |
144 | +"?", # 0xfacf | |
145 | +"?", # 0xfad0 | |
146 | +"?", # 0xfad1 | |
147 | +"?", # 0xfad2 | |
148 | +"?", # 0xfad3 | |
149 | +"?", # 0xfad4 | |
150 | +"?", # 0xfad5 | |
151 | +"?", # 0xfad6 | |
152 | +"?", # 0xfad7 | |
153 | +"?", # 0xfad8 | |
154 | +"?", # 0xfad9 | |
155 | +"?", # 0xfada | |
156 | +"?", # 0xfadb | |
157 | +"?", # 0xfadc | |
158 | +"?", # 0xfadd | |
159 | +"?", # 0xfade | |
160 | +"?", # 0xfadf | |
161 | +"?", # 0xfae0 | |
162 | +"?", # 0xfae1 | |
163 | +"?", # 0xfae2 | |
164 | +"?", # 0xfae3 | |
165 | +"?", # 0xfae4 | |
166 | +"?", # 0xfae5 | |
167 | +"?", # 0xfae6 | |
168 | +"?", # 0xfae7 | |
169 | +"?", # 0xfae8 | |
170 | +"?", # 0xfae9 | |
171 | +"?", # 0xfaea | |
172 | +"?", # 0xfaeb | |
173 | +"?", # 0xfaec | |
174 | +"?", # 0xfaed | |
175 | +"?", # 0xfaee | |
176 | +"?", # 0xfaef | |
177 | +"?", # 0xfaf0 | |
178 | +"?", # 0xfaf1 | |
179 | +"?", # 0xfaf2 | |
180 | +"?", # 0xfaf3 | |
181 | +"?", # 0xfaf4 | |
182 | +"?", # 0xfaf5 | |
183 | +"?", # 0xfaf6 | |
184 | +"?", # 0xfaf7 | |
185 | +"?", # 0xfaf8 | |
186 | +"?", # 0xfaf9 | |
187 | +"?", # 0xfafa | |
188 | +"?", # 0xfafb | |
189 | +"?", # 0xfafc | |
190 | +"?", # 0xfb40 | |
191 | +"?", # 0xfb41 | |
192 | +"?", # 0xfb42 | |
193 | +"?", # 0xfb43 | |
194 | +"?", # 0xfb44 | |
195 | +"?", # 0xfb45 | |
196 | +"?", # 0xfb46 | |
197 | +"?", # 0xfb47 | |
198 | +"?", # 0xfb48 | |
199 | +"?", # 0xfb49 | |
200 | +"?", # 0xfb4a | |
201 | +"?", # 0xfb4b | |
202 | +"?", # 0xfb4c | |
203 | +"?", # 0xfb4d | |
204 | +"?", # 0xfb4e | |
205 | +"?", # 0xfb4f | |
206 | +"?", # 0xfb50 | |
207 | +"?", # 0xfb51 | |
208 | +"?", # 0xfb52 | |
209 | +"?", # 0xfb53 | |
210 | +"?", # 0xfb54 | |
211 | +"?", # 0xfb55 | |
212 | +"?", # 0xfb56 | |
213 | +"?", # 0xfb57 | |
214 | +"?", # 0xfb58 | |
215 | +"?", # 0xfb59 | |
216 | +"?", # 0xfb5a | |
217 | +"?", # 0xfb5b | |
218 | +"?", # 0xfb5c | |
219 | +"?", # 0xfb5d | |
220 | +"?", # 0xfb5e | |
221 | +"?", # 0xfb5f | |
222 | +"?", # 0xfb60 | |
223 | +"?", # 0xfb61 | |
224 | +"?", # 0xfb62 | |
225 | +"?", # 0xfb63 | |
226 | +"?", # 0xfb64 | |
227 | +"?", # 0xfb65 | |
228 | +"?", # 0xfb66 | |
229 | +"?", # 0xfb67 | |
230 | +"?", # 0xfb68 | |
231 | +"?", # 0xfb69 | |
232 | +"?", # 0xfb6a | |
233 | +"?", # 0xfb6b | |
234 | +"?", # 0xfb6c | |
235 | +"?", # 0xfb6d | |
236 | +"?", # 0xfb6e | |
237 | +"?", # 0xfb6f | |
238 | +"?", # 0xfb70 | |
239 | +"?", # 0xfb71 | |
240 | +"?", # 0xfb72 | |
241 | +"?", # 0xfb73 | |
242 | +"?", # 0xfb74 | |
243 | +"?", # 0xfb75 | |
244 | +"?", # 0xfb76 | |
245 | +"?", # 0xfb77 | |
246 | +"?", # 0xfb78 | |
247 | +"?", # 0xfb79 | |
248 | +"?", # 0xfb7a | |
249 | +"?", # 0xfb7b | |
250 | +"?", # 0xfb7c | |
251 | +"?", # 0xfb7d | |
252 | +"?", # 0xfb7e | |
253 | +"?", # 0xfb80 | |
254 | +"?", # 0xfb81 | |
255 | +"?", # 0xfb82 | |
256 | +"?", # 0xfb83 | |
257 | +"?", # 0xfb84 | |
258 | +"?", # 0xfb85 | |
259 | +"?", # 0xfb86 | |
260 | +"?", # 0xfb87 | |
261 | +"?", # 0xfb88 | |
262 | +"?", # 0xfb89 | |
263 | +"?", # 0xfb8a | |
264 | +"?", # 0xfb8b | |
265 | +"?", # 0xfb8c | |
266 | +"?", # 0xfb8d | |
267 | +"?", # 0xfb8e | |
268 | +"?", # 0xfb8f | |
269 | +"?", # 0xfb90 | |
270 | +"?", # 0xfb91 | |
271 | +"?", # 0xfb92 | |
272 | +"?", # 0xfb93 | |
273 | +"?", # 0xfb94 | |
274 | +"?", # 0xfb95 | |
275 | +"?", # 0xfb96 | |
276 | +"?", # 0xfb97 | |
277 | +"?", # 0xfb98 | |
278 | +"?", # 0xfb99 | |
279 | +"?", # 0xfb9a | |
280 | +"?", # 0xfb9b | |
281 | +"?", # 0xfb9c | |
282 | +"?", # 0xfb9d | |
283 | +"?", # 0xfb9e | |
284 | +"?", # 0xfb9f | |
285 | +"?", # 0xfba0 | |
286 | +"?", # 0xfba1 | |
287 | +"?", # 0xfba2 | |
288 | +"?", # 0xfba3 | |
289 | +"?", # 0xfba4 | |
290 | +"?", # 0xfba5 | |
291 | +"?", # 0xfba6 | |
292 | +"?", # 0xfba7 | |
293 | +"?", # 0xfba8 | |
294 | +"?", # 0xfba9 | |
295 | +"?", # 0xfbaa | |
296 | +"?", # 0xfbab | |
297 | +"?", # 0xfbac | |
298 | +"?", # 0xfbad | |
299 | +"?", # 0xfbae | |
300 | +"?", # 0xfbaf | |
301 | +"?", # 0xfbb0 | |
302 | +"?", # 0xfbb1 | |
303 | +"?", # 0xfbb2 | |
304 | +"?", # 0xfbb3 | |
305 | +"?", # 0xfbb4 | |
306 | +"?", # 0xfbb5 | |
307 | +"?", # 0xfbb6 | |
308 | +"?", # 0xfbb7 | |
309 | +"?", # 0xfbb8 | |
310 | +"?", # 0xfbb9 | |
311 | +"?", # 0xfbba | |
312 | +"?", # 0xfbbb | |
313 | +"?", # 0xfbbc | |
314 | +"?", # 0xfbbd | |
315 | +"?", # 0xfbbe | |
316 | +"?", # 0xfbbf | |
317 | +"?", # 0xfbc0 | |
318 | +"?", # 0xfbc1 | |
319 | +"?", # 0xfbc2 | |
320 | +"?", # 0xfbc3 | |
321 | +"?", # 0xfbc4 | |
322 | +"?", # 0xfbc5 | |
323 | +"?", # 0xfbc6 | |
324 | +"?", # 0xfbc7 | |
325 | +"?", # 0xfbc8 | |
326 | +"?", # 0xfbc9 | |
327 | +"?", # 0xfbca | |
328 | +"?", # 0xfbcb | |
329 | +"?", # 0xfbcc | |
330 | +"?", # 0xfbcd | |
331 | +"?", # 0xfbce | |
332 | +"?", # 0xfbcf | |
333 | +"?", # 0xfbd0 | |
334 | +"?", # 0xfbd1 | |
335 | +"?", # 0xfbd2 | |
336 | +"?", # 0xfbd3 | |
337 | +"?", # 0xfbd4 | |
338 | +"?", # 0xfbd5 | |
339 | +"?", # 0xfbd6 | |
340 | +"?", # 0xfbd7 | |
341 | +"?", # 0xfbd8 | |
342 | +"?", # 0xfbd9 | |
343 | +"?", # 0xfbda | |
344 | +"?", # 0xfbdb | |
345 | +"?", # 0xfbdc | |
346 | +"?", # 0xfbdd | |
347 | +"?", # 0xfbde | |
348 | +"?", # 0xfbdf | |
349 | +"?", # 0xfbe0 | |
350 | +"?", # 0xfbe1 | |
351 | +"?", # 0xfbe2 | |
352 | +"?", # 0xfbe3 | |
353 | +"?", # 0xfbe4 | |
354 | +"?", # 0xfbe5 | |
355 | +"?", # 0xfbe6 | |
356 | +"?", # 0xfbe7 | |
357 | +"?", # 0xfbe8 | |
358 | +"?", # 0xfbe9 | |
359 | +"?", # 0xfbea | |
360 | +"?", # 0xfbeb | |
361 | +"?", # 0xfbec | |
362 | +"?", # 0xfbed | |
363 | +"?", # 0xfbee | |
364 | +"?", # 0xfbef | |
365 | +"?", # 0xfbf0 | |
366 | +"?", # 0xfbf1 | |
367 | +"?", # 0xfbf2 | |
368 | +"?", # 0xfbf3 | |
369 | +"?", # 0xfbf4 | |
370 | +"?", # 0xfbf5 | |
371 | +"?", # 0xfbf6 | |
372 | +"?", # 0xfbf7 | |
373 | +"?", # 0xfbf8 | |
374 | +"?", # 0xfbf9 | |
375 | +"?", # 0xfbfa | |
376 | +"?", # 0xfbfb | |
377 | +"?", # 0xfbfc | |
378 | +"?", # 0xfc40 | |
379 | +"?", # 0xfc41 | |
380 | +"?", # 0xfc42 | |
381 | +"?", # 0xfc43 | |
382 | +"?", # 0xfc44 | |
383 | +"?", # 0xfc45 | |
384 | +"?", # 0xfc46 | |
385 | +"?", # 0xfc47 | |
386 | +"?", # 0xfc48 | |
387 | +"?", # 0xfc49 | |
388 | +"?", # 0xfc4a | |
389 | +"?", # 0xfc4b | |
390 | +] | |
391 | + | |
392 | +import pykf | |
393 | +for c in IBM: | |
394 | + e = pykf.toeuc(c, pykf.EUC) | |
395 | + s = pykf.tosjis(e, pykf.SJIS) | |
396 | + print c, s, hex(ord(c[0])<< 8 | ord(c[1])), hex(ord(s[0])<< 8 | ord(s[1])) | |
397 | + assert c != s |
@@ -0,0 +1,18 @@ | ||
1 | +#!/usr/bin/env python | |
2 | + | |
3 | +from distutils.core import setup, Extension | |
4 | + | |
5 | +setup (name = "pykf", | |
6 | + version = "0.3.4", | |
7 | + description = "Japanese Kanji code filter", | |
8 | + author = "Atsuo Ishimoto", | |
9 | + author_email = "ishimoto@gembook.org", | |
10 | + url = "http://www.gembook.jp", | |
11 | + ext_modules = [ | |
12 | + Extension("pykf", | |
13 | + [ | |
14 | + "src/pykf.c", | |
15 | + "src/converter.c", | |
16 | + "src/jis0213.c", | |
17 | + "src/mskanji.c", | |
18 | + ])]) |
@@ -0,0 +1,25 @@ | ||
1 | + | |
2 | + | |
3 | +/* kanji conversion tables */ | |
4 | +extern unsigned int tbl_jis0213[]; | |
5 | +extern int tbl_sjis2jis[]; | |
6 | +extern int tbl_jis2sjis[]; | |
7 | + | |
8 | + | |
9 | +/* Japanese character encodings */ | |
10 | +enum {ERROR=-1, UNKNOWN=0, ASCII=1, SJIS=2, EUC=3, JIS=4, UTF8=5, UTF16_LE=7, UTF16_BE=8}; | |
11 | + | |
12 | +int guess(int imax, unsigned char buf[], int strict); | |
13 | +int sjistojis(int len, unsigned char *buf, unsigned char **ret, int *retlen, int jis0208); | |
14 | +int euctojis(int len, unsigned char *buf, unsigned char **ret, int *retlen, int jis0208); | |
15 | +int sjistoeuc(int len, unsigned char *buf, unsigned char **ret, int *retlen); | |
16 | +int jistoeuc(int len, unsigned char *buf, unsigned char **ret, int *retlen); | |
17 | +int jistosjis(int len, unsigned char *buf, unsigned char **ret, int *retlen); | |
18 | +int euctosjis(int len, unsigned char *buf, unsigned char **ret, int *retlen); | |
19 | + | |
20 | +int sjistohankana(int len, unsigned char *buf, unsigned char **ret, int *retlen); | |
21 | +int euctohankana(int len, unsigned char *buf, unsigned char **ret, int *retlen); | |
22 | +int sjistofullkana(int len, unsigned char *buf, unsigned char **ret, int *retlen); | |
23 | +int euctofullkana(int len, unsigned char *buf, unsigned char **ret, int *retlen); | |
24 | + | |
25 | + |
@@ -0,0 +1,14 @@ | ||
1 | +#define isjis(c) (((c)>=0x21 && (c)<=0x7e)) | |
2 | +#define iseuc(c) (((c)>=0xa1 && (c)<=0xfe)) | |
3 | + | |
4 | +#define isgaiji1(c) ((c)>=0xf0 && (c)<=0xf9) | |
5 | +#define isibmgaiji1(c) ((c)>=0xfa && (c)<=0xfc) | |
6 | +#define issjis1(c) (((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xef) || isgaiji1(c) || isibmgaiji1(c)) | |
7 | +#define issjis2(c) ((c)>=0x40 && (c)<=0xfc && (c)!=0x7f) | |
8 | + | |
9 | +#define ishankana(c) ((c)>=0xa0 && (c)<=0xdf) | |
10 | + | |
11 | +#define isutf8_2byte(c) (0xc0<=c && c <= 0xdf) | |
12 | +#define isutf8_3byte(c) (0xe0<=c && c <= 0xef) | |
13 | +#define isutf8_trail(c) (0x80<=c && c <= 0xbf) | |
14 | +#define CONV_FAILED 0x222e |
@@ -0,0 +1,1447 @@ | ||
1 | +/********************************************************************* | |
2 | + | |
3 | +Japanese Kanji filter module | |
4 | + Copyright (c) 2002, Atsuo Ishimoto. All rights reserved. | |
5 | + | |
6 | +Permission to use, copy, modify, and distribute this software and its | |
7 | +documentation for any purpose and without fee is hereby granted, provided that | |
8 | +the above copyright notice appear in all copies and that both that copyright | |
9 | +notice and this permission notice appear in supporting documentation, and that | |
10 | +the name of Atsuo Ishimoto not be used in advertising or publicity pertaining | |
11 | +to distribution of the software without specific, written prior permission. | |
12 | + | |
13 | +ATSUO ISHIMOTO DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, | |
14 | +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO | |
15 | +EVENT SHALL ATSUO ISHIMOTO BE LIABLE FOR ANY SPECIAL, INDIRECT OR | |
16 | +CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF | |
17 | +USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR | |
18 | +OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
19 | +PERFORMANCE OF THIS SOFTWARE. | |
20 | + | |
21 | +--------------------------------------------------------------------- | |
22 | +This module is besed on kf.c written by Haruhiko Okumura. | |
23 | + Copyright (c) 1995-2000 Haruhiko Okumura | |
24 | + This file may be freely modified/redistributed. | |
25 | + | |
26 | +Original kf.c: | |
27 | + http://www.matsusaka-u.ac.jp/~okumura/kf.html | |
28 | + | |
29 | +*********************************************************************/ | |
30 | + | |
31 | +#include <stdio.h> | |
32 | +#include <stdlib.h> | |
33 | +#include <memory.h> | |
34 | +#include <string.h> | |
35 | +#include <assert.h> | |
36 | + | |
37 | +#include "pykf.h" | |
38 | + | |
39 | +#if defined(_MSC_VER) | |
40 | +#define LOCAL_INLINE __inline static | |
41 | +#endif | |
42 | + | |
43 | +#if !defined(__cplusplus) && !defined(inline) | |
44 | +#ifdef __GNUC__ | |
45 | +#define LOCAL_INLINE __inline static | |
46 | +#endif | |
47 | +#endif | |
48 | + | |
49 | +#if !defined(LOCAL_INLINE) | |
50 | +#define LOCAL_INLINE static | |
51 | +#endif | |
52 | + | |
53 | +#include "convert.h" | |
54 | + | |
55 | + | |
56 | + | |
57 | + | |
58 | +int guess(int imax, unsigned char buf[], int strict) | |
59 | +{ | |
60 | + int i; | |
61 | + int ascii, euc, sjis, utf8, bad_euc, bad_sjis, bad_utf8; | |
62 | + int jis, hankana; | |
63 | + int sjis_error, euc_error, utf8_error; | |
64 | + | |
65 | + ascii = 1; | |
66 | + bad_euc=euc=0; | |
67 | + bad_sjis=sjis=0; | |
68 | + bad_utf8 = utf8=0; | |
69 | + jis = 0; | |
70 | + sjis_error = euc_error = utf8_error = 0; | |
71 | + | |
72 | + /* check BOM */ | |
73 | + if (imax >= 2) { | |
74 | + if (buf[0] == 0xff && buf[1] == 0xfe) { | |
75 | + return UTF16_LE; | |
76 | + } | |
77 | + else if (buf[0] == 0xfe && buf[1] == 0xff) { | |
78 | + return UTF16_BE; | |
79 | + } | |
80 | + } | |
81 | + if (imax >= 3 && !memcmp(buf, "\xef\xbb\xbf", 3)) { | |
82 | + return UTF8; | |
83 | + } | |
84 | + | |
85 | + // check SJIS | |
86 | + hankana = 0; | |
87 | + for (i = 0; i < imax; i++) { | |
88 | + | |
89 | + if (buf[i] >= 0x80) { | |
90 | + ascii = 0; | |
91 | + } | |
92 | + | |
93 | + if (buf[i] == 0x1b) { | |
94 | + jis= 1; | |
95 | + } | |
96 | + | |
97 | + if (buf[i] == 0x8e ) { | |
98 | + // looks like euc. | |
99 | + if (i + 2 < imax) { | |
100 | + if (buf[i+2]==0x8e && ishankana(buf[i+1])) { | |
101 | + bad_sjis += 1; | |
102 | + } | |
103 | + } | |
104 | + } | |
105 | + | |
106 | + if (ishankana(buf[i])) { | |
107 | + sjis += 0x10/2-1; | |
108 | + hankana++; | |
109 | + } | |
110 | + else { | |
111 | + if (hankana == 1) { | |
112 | + // single halfwidth-kana is bad sign. | |
113 | + bad_sjis++; | |
114 | + } | |
115 | + hankana = 0; | |
116 | + | |
117 | + if (issjis1(buf[i])) { | |
118 | + if (i+1 >= imax) { | |
119 | + if (strict) { | |
120 | + sjis_error = 1; | |
121 | + break; | |
122 | + } | |
123 | + bad_sjis+=0x100; | |
124 | + } | |
125 | + else if (issjis2(buf[i+1])) { | |
126 | + sjis += 0x10; | |
127 | + i++; | |
128 | + } | |
129 | + else { | |
130 | + if (strict) { | |
131 | + sjis_error = 1; | |
132 | + break; | |
133 | + } | |
134 | + bad_sjis += 0x100; | |
135 | + } | |
136 | + } | |
137 | + else if (buf[i] >= 0x80) { | |
138 | + if (strict) { | |
139 | + sjis_error = 1; | |
140 | + break; | |
141 | + } | |
142 | + bad_sjis += 0x100; | |
143 | + } | |
144 | + } | |
145 | + } | |
146 | + | |
147 | + if (ascii && jis) { | |
148 | + return JIS; | |
149 | + } | |
150 | + | |
151 | + if (ascii) { | |
152 | + return ASCII; | |
153 | + } | |
154 | + | |
155 | + // check EUC-JP | |
156 | + hankana=0; | |
157 | + for (i = 0; i < imax; i++) { | |
158 | + if (buf[i] == 0x8e) { | |
159 | + if (i+1 >= imax) { | |
160 | + if (strict) { | |
161 | + euc_error = 1; | |
162 | + break; | |
163 | + } | |
164 | + bad_euc += 0x100; | |
165 | + } | |
166 | + else if (ishankana(buf[i+1])) { | |
167 | + euc+=10; | |
168 | + i++; | |
169 | + hankana++; | |
170 | + } | |
171 | + else { | |
172 | + if (strict) { | |
173 | + euc_error = 1; | |
174 | + break; | |
175 | + } | |
176 | + bad_euc += 0x100; | |
177 | + } | |
178 | + } | |
179 | + else { | |
180 | + if (hankana == 1) { | |
181 | + bad_euc++; | |
182 | + } | |
183 | + hankana = 0; | |
184 | + if (iseuc(buf[i])) { | |
185 | + if (i+1 >= imax) { | |
186 | + if (strict) { | |
187 | + euc_error = 1; | |
188 | + break; | |
189 | + } | |
190 | + bad_euc+=0x100; | |
191 | + } | |
192 | + else if (iseuc(buf[i+1])) { | |
193 | + i++; | |
194 | + euc+=0x10; | |
195 | + } | |
196 | + else { | |
197 | + if (strict) { | |
198 | + euc_error = 1; | |
199 | + break; | |
200 | + } | |
201 | + bad_euc+=0x100; | |
202 | + } | |
203 | + } | |
204 | + else if (buf[i] == 0x8f) { | |
205 | + if (i+2 >= imax) { | |
206 | + if (strict) { | |
207 | + euc_error = 1; | |
208 | + break; | |
209 | + } | |
210 | + bad_euc+=0x100; | |
211 | + } | |
212 | + else if (iseuc(buf[i+1]) && iseuc(buf[i+2])) { | |
213 | + i+=2; | |
214 | + euc+=0x10; | |
215 | + } | |
216 | + else { | |
217 | + if (strict) { | |
218 | + euc_error = 1; | |
219 | + break; | |
220 | + } | |
221 | + bad_euc+=100; | |
222 | + } | |
223 | + } | |
224 | + else if (buf[i] >= 0x80) { | |
225 | + if (strict) { | |
226 | + euc_error = 1; | |
227 | + break; | |
228 | + } | |
229 | + bad_euc+=0x100; | |
230 | + } | |
231 | + } | |
232 | + } | |
233 | + | |
234 | + // check UTF-8 | |
235 | + for (i = 0; i < imax; i++) { | |
236 | + if (isutf8_2byte(buf[i])) { | |
237 | + if (i+1 >= imax) { | |
238 | + if (strict) { | |
239 | + utf8_error = 1; | |
240 | + break; | |
241 | + } | |
242 | + bad_utf8 += 1000; | |
243 | + } | |
244 | + else if (isutf8_trail(buf[i+1])) { | |
245 | + utf8+=10; | |
246 | + i++; | |
247 | + } | |
248 | + else { | |
249 | + if (strict) { | |
250 | + utf8_error = 1; | |
251 | + break; | |
252 | + } | |
253 | + bad_utf8+=100; | |
254 | + } | |
255 | + } | |
256 | + else if (isutf8_3byte(buf[i])) { | |
257 | + if (i+2 >= imax) { | |
258 | + if (strict) { | |
259 | + utf8_error = 1; | |
260 | + break; | |
261 | + } | |
262 | + bad_utf8 += 1000; | |
263 | + } | |
264 | + else if (isutf8_trail(buf[i+1]) && isutf8_trail(buf[i+2])) { | |
265 | + utf8+=15; | |
266 | + i+=2; | |
267 | + } | |
268 | + else { | |
269 | + if (strict) { | |
270 | + utf8_error = 1; | |
271 | + break; | |
272 | + } | |
273 | + bad_utf8+=1000; | |
274 | + } | |
275 | + } else if (buf[i] >= 0x80) { | |
276 | + if (strict) { | |
277 | + utf8_error = 1; | |
278 | + break; | |
279 | + } | |
280 | + bad_utf8 += 1000; | |
281 | + } | |
282 | + } | |
283 | + | |
284 | + if (sjis_error && euc_error && utf8_error) { | |
285 | + return ERROR; | |
286 | + } | |
287 | + | |
288 | + if (sjis_error) { | |
289 | + if (euc_error) { | |
290 | + return UTF8; | |
291 | + } | |
292 | + if (utf8_error) { | |
293 | + return EUC; | |
294 | + } | |
295 | + if (euc-bad_euc > utf8-bad_utf8) | |
296 | + return EUC; | |
297 | + else if (euc-bad_euc < utf8-bad_utf8) | |
298 | + return UTF8; | |
299 | + } | |
300 | + | |
301 | + if (euc_error) { | |
302 | + if (sjis_error) { | |
303 | + return UTF8; | |
304 | + } | |
305 | + if (utf8_error) { | |
306 | + return SJIS; | |
307 | + } | |
308 | + if (sjis-bad_sjis > utf8-bad_utf8) | |
309 | + return SJIS; | |
310 | + else if (sjis-bad_sjis < utf8-bad_utf8) | |
311 | + return UTF8; | |
312 | + } | |
313 | + | |
314 | + if (utf8_error) { | |
315 | + if (sjis_error) { | |
316 | + return EUC; | |
317 | + } | |
318 | + if (euc_error) { | |
319 | + return SJIS; | |
320 | + } | |
321 | + if (sjis-bad_sjis > euc-bad_euc) | |
322 | + return SJIS; | |
323 | + else | |
324 | + return EUC; | |
325 | + } | |
326 | + | |
327 | + if (sjis-bad_sjis > euc-bad_euc) { | |
328 | + if (sjis-bad_sjis > utf8-bad_utf8) | |
329 | + return SJIS; | |
330 | + else if (sjis-bad_sjis < utf8-bad_utf8) | |
331 | + return UTF8; | |
332 | + } | |
333 | + | |
334 | + if (sjis-bad_sjis < euc-bad_euc) { | |
335 | + if (euc-bad_euc > utf8-bad_utf8) | |
336 | + return EUC; | |
337 | + else if (euc-bad_euc < utf8-bad_utf8) | |
338 | + return UTF8; | |
339 | + } | |
340 | + return UNKNOWN; | |
341 | +} | |
342 | + | |
343 | +LOCAL_INLINE | |
344 | +void jis_to_sjis2(unsigned char *ph, unsigned char *pl); | |
345 | + | |
346 | +LOCAL_INLINE | |
347 | +int isjis0213(unsigned char h, unsigned char l) { | |
348 | + int *p; | |
349 | + int jis = (h << 8 | l) & 0xffff; | |
350 | + | |
351 | + for (p=tbl_jis0213; *(p+2) < jis; p+=2); | |
352 | + | |
353 | + if (*p <= jis && (jis < (p[0] + p[1]))) { | |
354 | + return 1; | |
355 | + } | |
356 | + else { | |
357 | + return 0; | |
358 | + } | |
359 | +} | |
360 | + | |
361 | + | |
362 | +LOCAL_INLINE | |
363 | +int mskanji_to_jis(unsigned char *ph, unsigned char *pl) { | |
364 | + int *p; | |
365 | + int sjis = (*ph << 8 | *pl) & 0xffff; | |
366 | + | |
367 | + if (isgaiji1(*ph)) { | |
368 | + *ph = (CONV_FAILED >> 8) & 0xff; | |
369 | + *pl = CONV_FAILED & 0xff; | |
370 | + return 1; | |
371 | + } | |
372 | + | |
373 | + for (p=tbl_sjis2jis; *p < sjis; p+=2); | |
374 | + | |
375 | + if (*p == sjis) { | |
376 | + *ph = (*(p+1)) >> 8; | |
377 | + *pl = (*(p+1)) & 0xff; | |
378 | + return 1; | |
379 | + } | |
380 | + return 0; | |
381 | +} | |
382 | + | |
383 | +LOCAL_INLINE | |
384 | +void sjis_to_jis(unsigned char *ph, unsigned char *pl) | |
385 | +{ | |
386 | + if (*ph <= 0x9f) { | |
387 | + if (*pl < 0x9f) | |
388 | + *ph = (*ph << 1) - 0xe1; | |
389 | + else | |
390 | + *ph = (*ph << 1) - 0xe0; | |
391 | + } else { | |
392 | + if (*pl < 0x9f) | |
393 | + *ph = (*ph << 1) - 0x161; | |
394 | + else | |
395 | + *ph = (*ph << 1) - 0x160; | |
396 | + } | |
397 | + if (*pl < 0x7f) | |
398 | + *pl -= 0x1f; | |
399 | + else if (*pl < 0x9f) | |
400 | + *pl -= 0x20; | |
401 | + else | |
402 | + *pl -= 0x7e; | |
403 | +} | |
404 | + | |
405 | +LOCAL_INLINE | |
406 | +void sjis_to_jis2(unsigned char *ph, unsigned char *pl) | |
407 | +{ | |
408 | + if (mskanji_to_jis(ph, pl)) | |
409 | + return; | |
410 | + else | |
411 | + sjis_to_jis(ph, pl); | |
412 | +} | |
413 | + | |
414 | + | |
415 | +LOCAL_INLINE | |
416 | +void jis_to_sjis(unsigned char *ph, unsigned char *pl) | |
417 | +{ | |
418 | + if (*ph & 1) { | |
419 | + if (*pl < 0x60) | |
420 | + *pl += 0x1f; | |
421 | + else | |
422 | + *pl += 0x20; | |
423 | + } else | |
424 | + *pl += 0x7e; | |
425 | + | |
426 | + if (*ph < 0x5f) | |
427 | + *ph = (*ph + 0xe1) >> 1; | |
428 | + else | |
429 | + *ph = (*ph + 0x161) >> 1; | |
430 | +} | |
431 | + | |
432 | + | |
433 | +LOCAL_INLINE | |
434 | +int jis_to_mskanji(unsigned char *ph, unsigned char *pl) { | |
435 | + int *p; | |
436 | + int jis = (*ph << 8 | *pl) & 0xffff; | |
437 | + | |
438 | + for (p=tbl_jis2sjis; *p < jis; p+=2); | |
439 | + | |
440 | + if (*p == jis) { | |
441 | + *ph = (*(p+1)) >> 8; | |
442 | + *pl = (*(p+1)) & 0xff; | |
443 | + return 1; | |
444 | + } | |
445 | + return 0; | |
446 | +} | |
447 | + | |
448 | + | |
449 | + | |
450 | +LOCAL_INLINE | |
451 | +void jis_to_sjis2(unsigned char *ph, unsigned char *pl) | |
452 | +{ | |
453 | + if (jis_to_mskanji(ph, pl)) | |
454 | + return; | |
455 | + else | |
456 | + jis_to_sjis(ph, pl); | |
457 | +} | |
458 | + | |
459 | + | |
460 | + | |
461 | + | |
462 | + | |
463 | +int sjistojis(int len, unsigned char *buf, unsigned char **ret, int *retlen, int j0208) | |
464 | +{ | |
465 | + unsigned char c, d; | |
466 | + int pos, tmplen, retpos=0; | |
467 | + char tmp[10]; | |
468 | + char *newbuf; | |
469 | + enum {NORMAL, KANJI, HANKANA, JIS0213} mode = NORMAL; | |
470 | + | |
471 | + if (!len) { | |
472 | + *retlen = 0; | |
473 | + return 1; | |
474 | + } | |
475 | + | |
476 | + *retlen = len; | |
477 | + *ret = malloc(*retlen); | |
478 | + if (!*ret) { | |
479 | + return 0; | |
480 | + } | |
481 | + | |
482 | + | |
483 | + for (pos = 0; pos < len; pos++) { | |
484 | + tmplen = 0; | |
485 | + if (issjis1(buf[pos]) && (pos + 1 < len) && issjis2(buf[pos+1])) { | |
486 | + c = buf[pos]; | |
487 | + d = buf[pos+1]; | |
488 | + pos += 1; | |
489 | + sjis_to_jis2(&c, &d); | |
490 | + | |
491 | + if (j0208 || !isjis0213(c, d)) { | |
492 | + if (mode != KANJI) { | |
493 | + mode = KANJI; | |
494 | + tmp[tmplen++] = 0x1b; | |
495 | + tmp[tmplen++] = '$'; | |
496 | + tmp[tmplen++] = 'B'; | |
497 | + } | |
498 | + } | |
499 | + else { | |
500 | + if (mode != JIS0213) { | |
501 | + mode = JIS0213; | |
502 | + tmp[tmplen++] = 0x1b; | |
503 | + tmp[tmplen++] = '$'; | |
504 | + tmp[tmplen++] = '('; | |
505 | + tmp[tmplen++] = 'O'; | |
506 | + } | |
507 | + } | |
508 | + tmp[tmplen++] = c; | |
509 | + tmp[tmplen++] = d; | |
510 | + } else if (ishankana(buf[pos])) { | |
511 | + if (mode != HANKANA) { | |
512 | + mode = HANKANA; | |
513 | + tmp[tmplen++] = 0x1b; | |
514 | + tmp[tmplen++] = '('; | |
515 | + tmp[tmplen++] = 'I'; | |
516 | + } | |
517 | + tmp[tmplen++] = buf[pos] & 0x7f; | |
518 | + } else { | |
519 | + if (mode != NORMAL) { | |
520 | + mode = NORMAL; | |
521 | + tmp[tmplen++] = 0x1b; | |
522 | + tmp[tmplen++] = '('; | |
523 | + tmp[tmplen++] = 'B'; | |
524 | + } | |
525 | + tmp[tmplen++] = buf[pos]; | |
526 | + } | |
527 | + | |
528 | + if (tmplen) { | |
529 | + if (retpos + tmplen > *retlen) { | |
530 | + *retlen = *retlen + len / 2 + 16; | |
531 | + newbuf = realloc(*ret, *retlen); | |
532 | + if (!newbuf) { | |
533 | + free(*ret); | |
534 | + return 0; | |
535 | + } | |
536 | + *ret = newbuf; | |
537 | + } | |
538 | + memcpy(*ret+retpos, tmp, tmplen); | |
539 | + retpos += tmplen; | |
540 | + } | |
541 | + } | |
542 | + | |
543 | + if (!retpos) { | |
544 | + *retlen = 0; | |
545 | + free(*ret); | |
546 | + return 1; | |
547 | + } | |
548 | + | |
549 | + if (mode != NORMAL) { | |
550 | + if (retpos + 3 > *retlen) { | |
551 | + *retlen = retpos + 3; | |
552 | + newbuf = realloc(*ret, *retlen); | |
553 | + if (!newbuf) { | |
554 | + free(*ret); | |
555 | + return 0; | |
556 | + } | |
557 | + *ret = newbuf; | |
558 | + } | |
559 | + *(*ret + retpos) = 0x1b; | |
560 | + *(*ret + retpos+1) = '('; | |
561 | + *(*ret + retpos+2) = 'B'; | |
562 | + retpos += 3; | |
563 | + } | |
564 | + | |
565 | + newbuf = realloc(*ret, retpos); | |
566 | + if (!newbuf) { | |
567 | + free(*ret); | |
568 | + return 0; | |
569 | + } | |
570 | + *ret = newbuf; | |
571 | + *retlen = retpos; | |
572 | + return 1; | |
573 | +} | |
574 | + | |
575 | +int euctojis(int len, unsigned char *buf, unsigned char **ret, int *retlen, int j0208) | |
576 | +{ | |
577 | + unsigned char c, d; | |
578 | + int pos, tmplen, retpos=0; | |
579 | + char tmp[10]; | |
580 | + char *newbuf; | |
581 | + enum {NORMAL, KANJI, HANKANA, JIS0213} mode = NORMAL; | |
582 | + | |
583 | + if (!len) { | |
584 | + *retlen = 0; | |
585 | + return 1; | |
586 | + } | |
587 | + | |
588 | + *retlen = len; | |
589 | + *ret = malloc(*retlen); | |
590 | + if (!*ret) { | |
591 | + return 0; | |
592 | + } | |
593 | + | |
594 | + for (pos = 0; pos < len; pos++) { | |
595 | + tmplen = 0; | |
596 | + if (iseuc(buf[pos]) && (pos + 1 < len) && iseuc(buf[pos+1])) { | |
597 | + c = buf[pos] & 0x7f; | |
598 | + d = buf[pos+1] & 0x7f; | |
599 | + pos += 1; | |
600 | + | |
601 | + if (j0208 || !isjis0213(c, d)) { | |
602 | + if (mode != KANJI) { | |
603 | + mode = KANJI; | |
604 | + tmp[tmplen++] = 0x1b; | |
605 | + tmp[tmplen++] = '$'; | |
606 | + tmp[tmplen++] = 'B'; | |
607 | + } | |
608 | + } | |
609 | + else { | |
610 | + if (mode != JIS0213) { | |
611 | + mode = JIS0213; | |
612 | + tmp[tmplen++] = 0x1b; | |
613 | + tmp[tmplen++] = '$'; | |
614 | + tmp[tmplen++] = '('; | |
615 | + tmp[tmplen++] = 'O'; | |
616 | + } | |
617 | + } | |
618 | + tmp[tmplen++] = c; | |
619 | + tmp[tmplen++] = d; | |
620 | + } else if ((buf[pos] == 0x8e) && (pos + 1 < len) && ishankana(buf[pos+1])) { | |
621 | + | |
622 | + | |
623 | + if (mode != HANKANA) { | |
624 | + mode = HANKANA; | |
625 | + tmp[tmplen++] = 0x1b; | |
626 | + tmp[tmplen++] = '('; | |
627 | + tmp[tmplen++] = 'I'; | |
628 | + } | |
629 | + tmp[tmplen++] = buf[pos+1] & 0x7f; | |
630 | + pos += 1; | |
631 | + | |
632 | + } else { | |
633 | + if (mode != NORMAL) { | |
634 | + mode = NORMAL; | |
635 | + tmp[tmplen++] = 0x1b; | |
636 | + tmp[tmplen++] = '('; | |
637 | + tmp[tmplen++] = 'B'; | |
638 | + } | |
639 | + tmp[tmplen++] = buf[pos]; | |
640 | + } | |
641 | + | |
642 | + if (tmplen) { | |
643 | + if (retpos + tmplen > *retlen) { | |
644 | + *retlen = *retlen + len / 2 + 16; | |
645 | + newbuf = realloc(*ret, *retlen); | |
646 | + if (!newbuf) { | |
647 | + free(*ret); | |
648 | + return 0; | |
649 | + } | |
650 | + *ret = newbuf; | |
651 | + } | |
652 | + memcpy(*ret+retpos, tmp, tmplen); | |
653 | + retpos += tmplen; | |
654 | + } | |
655 | + } | |
656 | + | |
657 | + if (!retpos) { | |
658 | + *retlen = 0; | |
659 | + free(*ret); | |
660 | + return 1; | |
661 | + } | |
662 | + | |
663 | + if (mode != NORMAL) { | |
664 | + if (retpos + 3 > *retlen) { | |
665 | + *retlen = retpos + 3; | |
666 | + newbuf = realloc(*ret, *retlen); | |
667 | + if (!newbuf) { | |
668 | + free(*ret); | |
669 | + return 0; | |
670 | + } | |
671 | + *ret = newbuf; | |
672 | + } | |
673 | + *(*ret + retpos) = 0x1b; | |
674 | + *(*ret + retpos+1) = '('; | |
675 | + *(*ret + retpos+2) = 'B'; | |
676 | + retpos += 3; | |
677 | + } | |
678 | + | |
679 | + newbuf = realloc(*ret, retpos); | |
680 | + if (!newbuf) { | |
681 | + free(*ret); | |
682 | + return 0; | |
683 | + } | |
684 | + *ret = newbuf; | |
685 | + *retlen = retpos; | |
686 | + return 1; | |
687 | +} | |
688 | + | |
689 | + | |
690 | +int sjistoeuc(int len, unsigned char *buf, unsigned char **ret, int *retlen) | |
691 | +{ | |
692 | + unsigned char c, d; | |
693 | + int pos, tmplen, retpos=0; | |
694 | + char tmp[10]; | |
695 | + char *newbuf; | |
696 | + | |
697 | + if (!len) { | |
698 | + *retlen = 0; | |
699 | + return 1; | |
700 | + } | |
701 | + | |
702 | + *retlen = len; | |
703 | + *ret = malloc(*retlen); | |
704 | + if (!*ret) { | |
705 | + return 0; | |
706 | + } | |
707 | + | |
708 | + for (pos = 0; pos < len; pos++) { | |
709 | + tmplen=0; | |
710 | + | |
711 | + if (issjis1(buf[pos]) && (pos + 1 < len) && issjis2(buf[pos+1])) { | |
712 | + c = buf[pos]; | |
713 | + d = buf[pos+1]; | |
714 | + pos += 1; | |
715 | + | |
716 | + sjis_to_jis2(&c, &d); | |
717 | + tmp[tmplen++] = c | 0x80; | |
718 | + tmp[tmplen++] = d | 0x80; | |
719 | + } else if (ishankana(buf[pos])) { | |
720 | + tmp[tmplen++] = '\x8e'; | |
721 | + tmp[tmplen++] = buf[pos]; | |
722 | + } else { | |
723 | + tmp[tmplen++] = buf[pos]; | |
724 | + } | |
725 | + | |
726 | + if (tmplen) { | |
727 | + if (retpos + tmplen > *retlen) { | |
728 | + *retlen = *retlen + len / 2 + 16; | |
729 | + newbuf = realloc(*ret, *retlen); | |
730 | + if (!newbuf) { | |
731 | + free(*ret); | |
732 | + return 0; | |
733 | + } | |
734 | + *ret = newbuf; | |
735 | + } | |
736 | + memcpy(*ret+retpos, tmp, tmplen); | |
737 | + retpos += tmplen; | |
738 | + } | |
739 | + } | |
740 | + | |
741 | + if (!retpos) { | |
742 | + *retlen = 0; | |
743 | + free(*ret); | |
744 | + return 1; | |
745 | + } | |
746 | + | |
747 | + newbuf = realloc(*ret, retpos); | |
748 | + if (!newbuf) { | |
749 | + free(*ret); | |
750 | + return 0; | |
751 | + } | |
752 | + *ret = newbuf; | |
753 | + *retlen = retpos; | |
754 | + | |
755 | + return 1; | |
756 | +} | |
757 | + | |
758 | +int jistoeuc(int len, unsigned char *buf, unsigned char **ret, int *retlen) | |
759 | +{ | |
760 | + int pos, tmplen, retpos=0; | |
761 | + char tmp[10]; | |
762 | + char *newbuf; | |
763 | + | |
764 | + enum {NORMAL, KANJI, HANKANA} mode = NORMAL; | |
765 | + | |
766 | + if (!len) { | |
767 | + *retlen = 0; | |
768 | + return 1; | |
769 | + } | |
770 | + | |
771 | + *retlen = len; | |
772 | + *ret = malloc(*retlen); | |
773 | + if (!*ret) { | |
774 | + return 0; | |
775 | + } | |
776 | + | |
777 | + for (pos = 0; pos < len; pos++) { | |
778 | + tmplen=0; | |
779 | + | |
780 | + if ((pos + 2 < len) && | |
781 | + (!memcmp(buf+pos, "\x1b$@", 3) || | |
782 | + !memcmp(buf+pos, "\x1b$B", 3))) { | |
783 | + | |
784 | + mode = KANJI; | |
785 | + pos += 2; | |
786 | + } | |
787 | + else if ((pos + 3 < len) && !memcmp(buf+pos, "\x1b$(O", 4)) { | |
788 | + mode = KANJI; | |
789 | + pos += 3; | |
790 | + } | |
791 | + else if ((pos + 2 < len) && | |
792 | + (!memcmp(buf+pos, "\x1b(B", 3) || | |
793 | + !memcmp(buf+pos, "\x1b(J", 3))) { | |
794 | + | |
795 | + mode = NORMAL; | |
796 | + pos += 2; | |
797 | + } | |
798 | + else if ((pos + 2 < len) && !memcmp(buf+pos, "\x1b(I", 3)) { | |
799 | + mode = HANKANA; | |
800 | + pos += 2; | |
801 | + } | |
802 | + else if (buf[pos] == '\x0e') { | |
803 | + mode = HANKANA; | |
804 | + } | |
805 | + else if (buf[pos] == '\x0f') { | |
806 | + mode = NORMAL; | |
807 | + } | |
808 | + else if (mode == KANJI && isjis(buf[pos]) && (pos+1 < len) && isjis(buf[pos+1])) { | |
809 | + tmp[tmplen++] = buf[pos] | 0x80; | |
810 | + tmp[tmplen++] = buf[pos+1] | 0x80; | |
811 | + pos++; | |
812 | + } else if (mode == HANKANA && buf[pos] >= 0x20 && buf[pos] <= 0x5f) { | |
813 | + tmp[tmplen++] = '\x8e'; | |
814 | + tmp[tmplen++] = buf[pos] | 0x80; | |
815 | + } else { | |
816 | + tmp[tmplen++] = buf[pos]; | |
817 | + } | |
818 | + | |
819 | + if (tmplen) { | |
820 | + if (retpos + tmplen > *retlen) { | |
821 | + *retlen = *retlen + len / 2 + 16; | |
822 | + newbuf = realloc(*ret, *retlen); | |
823 | + if (!newbuf) { | |
824 | + free(*ret); | |
825 | + return 0; | |
826 | + } | |
827 | + *ret = newbuf; | |
828 | + } | |
829 | + memcpy(*ret+retpos, tmp, tmplen); | |
830 | + retpos += tmplen; | |
831 | + } | |
832 | + } | |
833 | + | |
834 | + if (!retpos) { | |
835 | + *retlen = 0; | |
836 | + free(*ret); | |
837 | + return 1; | |
838 | + } | |
839 | + | |
840 | + newbuf = realloc(*ret, retpos); | |
841 | + if (!newbuf) { | |
842 | + free(*ret); | |
843 | + return 0; | |
844 | + } | |
845 | + *ret = newbuf; | |
846 | + *retlen = retpos; | |
847 | + return 1; | |
848 | +} | |
849 | + | |
850 | + | |
851 | +int jistosjis(int len, unsigned char *buf, unsigned char **ret, int *retlen) | |
852 | +{ | |
853 | + unsigned char c, d; | |
854 | + int pos, tmplen, retpos=0; | |
855 | + char tmp[10]; | |
856 | + char *newbuf; | |
857 | + | |
858 | + enum {NORMAL, KANJI, HANKANA} mode = NORMAL; | |
859 | + | |
860 | + if (!len) { | |
861 | + *retlen = 0; | |
862 | + return 1; | |
863 | + } | |
864 | + | |
865 | + *retlen = len; | |
866 | + *ret = malloc(*retlen); | |
867 | + if (!*ret) { | |
868 | + return 0; | |
869 | + } | |
870 | + | |
871 | + for (pos = 0; pos < len; pos++) { | |
872 | + tmplen=0; | |
873 | + | |
874 | + if ((pos + 2 < len) && | |
875 | + (!memcmp(buf+pos, "\x1b$@", 3) || | |
876 | + !memcmp(buf+pos, "\x1b$B", 3))) { | |
877 | + | |
878 | + mode = KANJI; | |
879 | + pos += 2; | |
880 | + } | |
881 | + else if ((pos + 3 < len) && !memcmp(buf+pos, "\x1b$(O", 4)) { | |
882 | + mode = KANJI; | |
883 | + pos += 3; | |
884 | + } | |
885 | + else if ((pos + 2 < len) && | |
886 | + (!memcmp(buf+pos, "\x1b(B", 3) || | |
887 | + !memcmp(buf+pos, "\x1b(J", 3))) { | |
888 | + | |
889 | + mode = NORMAL; | |
890 | + pos += 2; | |
891 | + } | |
892 | + else if ((pos + 2 < len) && !memcmp(buf+pos, "\x1b(I", 3)) { | |
893 | + mode = HANKANA; | |
894 | + pos += 2; | |
895 | + } | |
896 | + else if (buf[pos] == '\x0e') { | |
897 | + mode = HANKANA; | |
898 | + } | |
899 | + else if (buf[pos] == '\x0f') { | |
900 | + mode = NORMAL; | |
901 | + } | |
902 | + else if (mode == KANJI && isjis(buf[pos]) && (pos+1 < len) && isjis(buf[pos+1])) { | |
903 | + c = buf[pos]; | |
904 | + d = buf[pos+1]; | |
905 | + pos++; | |
906 | + | |
907 | + jis_to_sjis2(&c, &d); | |
908 | + tmp[tmplen++] = c; | |
909 | + tmp[tmplen++] = d; | |
910 | + } else if (mode == HANKANA && buf[pos] >= 0x20 && buf[pos] <= 0x5f) { | |
911 | + tmp[tmplen++] = buf[pos] | 0x80; | |
912 | + } else { | |
913 | + tmp[tmplen++] = buf[pos]; | |
914 | + } | |
915 | + | |
916 | + if (tmplen) { | |
917 | + if (retpos + tmplen > *retlen) { | |
918 | + *retlen = *retlen + len / 2 + 16; | |
919 | + newbuf = realloc(*ret, *retlen); | |
920 | + if (!newbuf) { | |
921 | + free(*ret); | |
922 | + return 0; | |
923 | + } | |
924 | + *ret = newbuf; | |
925 | + } | |
926 | + memcpy(*ret+retpos, tmp, tmplen); | |
927 | + retpos += tmplen; | |
928 | + } | |
929 | + } | |
930 | + | |
931 | + if (!retpos) { | |
932 | + *retlen = 0; | |
933 | + free(*ret); | |
934 | + return 1; | |
935 | + } | |
936 | + | |
937 | + newbuf = realloc(*ret, retpos); | |
938 | + if (!newbuf) { | |
939 | + free(*ret); | |
940 | + return 0; | |
941 | + } | |
942 | + *ret = newbuf; | |
943 | + *retlen = retpos; | |
944 | + return 1; | |
945 | +} | |
946 | + | |
947 | +int euctosjis(int len, unsigned char *buf, unsigned char **ret, int *retlen) | |
948 | +{ | |
949 | + unsigned char c, d; | |
950 | + int pos, tmplen, retpos=0; | |
951 | + char tmp[10]; | |
952 | + char *newbuf; | |
953 | + | |
954 | + if (!len) { | |
955 | + *retlen = 0; | |
956 | + return 1; | |
957 | + } | |
958 | + | |
959 | + *retlen = len; | |
960 | + *ret = malloc(*retlen); | |
961 | + if (!*ret) { | |
962 | + return 0; | |
963 | + } | |
964 | + | |
965 | + for (pos = 0; pos < len; pos++) { | |
966 | + tmplen=0; | |
967 | + | |
968 | + if (iseuc(buf[pos]) && (pos + 1 < len) && iseuc(buf[pos+1])) { | |
969 | + c = buf[pos] & 0x7f; | |
970 | + d = buf[pos+1] & 0x7f; | |
971 | + pos += 1; | |
972 | + | |
973 | + jis_to_sjis2(&c, &d); | |
974 | + tmp[tmplen++] = c; | |
975 | + tmp[tmplen++] = d; | |
976 | + } else if ((buf[pos] == 0x8e) && (pos + 1 < len) && ishankana(buf[pos+1])) { | |
977 | + tmp[tmplen++] = buf[pos+1]; | |
978 | + pos++; | |
979 | + } else { | |
980 | + tmp[tmplen++] = buf[pos]; | |
981 | + } | |
982 | + | |
983 | + if (tmplen) { | |
984 | + if (retpos + tmplen > *retlen) { | |
985 | + *retlen = *retlen + len / 2 + 16; | |
986 | + newbuf = realloc(*ret, *retlen); | |
987 | + if (!newbuf) { | |
988 | + free(*ret); | |
989 | + return 0; | |
990 | + } | |
991 | + *ret = newbuf; | |
992 | + } | |
993 | + memcpy(*ret+retpos, tmp, tmplen); | |
994 | + retpos += tmplen; | |
995 | + } | |
996 | + } | |
997 | + | |
998 | + if (!retpos) { | |
999 | + *retlen = 0; | |
1000 | + free(*ret); | |
1001 | + return 1; | |
1002 | + } | |
1003 | + | |
1004 | + newbuf = realloc(*ret, retpos); | |
1005 | + if (!newbuf) { | |
1006 | + free(*ret); | |
1007 | + return 0; | |
1008 | + } | |
1009 | + *ret = newbuf; | |
1010 | + *retlen = retpos; | |
1011 | + return 1; | |
1012 | +} | |
1013 | + | |
1014 | +static const unsigned char *h_kana[] = { | |
1015 | +"\xdd", "\xdc", "\xdb", "\xda", "\xd9", "\xd8", "\xd7", "\xd6", "\xd5", "\xd4", | |
1016 | +"\xd3", "\xd2", "\xd1", "\xd0", "\xcf", "\xce\xdf", "\xce\xde", "\xce", "\xcd\xdf", | |
1017 | +"\xcd\xde", "\xcd", "\xcc\xdf", "\xcc\xde", "\xcc", "\xcb\xdf", "\xcb\xde", | |
1018 | +"\xcb", "\xca\xdf", "\xca\xde", "\xca", "\xc9", "\xc8", "\xc7", "\xc6", "\xc5", | |
1019 | +"\xc4\xde", "\xc4", "\xc3\xde", "\xc3", "\xc2\xde", "\xc2", "\xc1\xde", "\xc1", | |
1020 | +"\xc0\xde", "\xc0", "\xbf\xde", "\xbf", "\xbe\xde", "\xbe", "\xbd\xde", "\xbd", | |
1021 | +"\xbc\xde", "\xbc", "\xbb\xde", "\xbb", "\xba\xde", "\xba", "\xb9\xde", "\xb9", | |
1022 | +"\xb8\xde", "\xb8", "\xb7\xde", "\xb7", "\xb6\xde", "\xb6", "\xb5", "\xb4", "\xb3\xde", | |
1023 | +"\xb3", "\xb2", "\xb1", "\xb0", "\xaf", "\xae", "\xad", "\xac", "\xab", | |
1024 | +"\xaa", "\xa9", "\xa8", "\xa7", "\xa6", "\xa5", "\xa4", "\xa3", "\xa2", "\xa1", NULL}; | |
1025 | + | |
1026 | +static const unsigned char *euc_h_kana[] = { | |
1027 | +"\x8e\xdd", "\x8e\xdc", "\x8e\xdb", "\x8e\xda", "\x8e\xd9", "\x8e\xd8", "\x8e\xd7", "\x8e\xd6", "\x8e\xd5", "\x8e\xd4", | |
1028 | +"\x8e\xd3", "\x8e\xd2", "\x8e\xd1", "\x8e\xd0", "\x8e\xcf", "\x8e\xce\x8e\xdf", "\x8e\xce\x8e\xde", "\x8e\xce", "\x8e\xcd\x8e\xdf", | |
1029 | +"\x8e\xcd\x8e\xde", "\x8e\xcd", "\x8e\xcc\x8e\xdf", "\x8e\xcc\x8e\xde", "\x8e\xcc", "\x8e\xcb\x8e\xdf", "\x8e\xcb\x8e\xde", | |
1030 | +"\x8e\xcb", "\x8e\xca\x8e\xdf", "\x8e\xca\x8e\xde", "\x8e\xca", "\x8e\xc9", "\x8e\xc8", "\x8e\xc7", "\x8e\xc6", "\x8e\xc5", | |
1031 | +"\x8e\xc4\x8e\xde", "\x8e\xc4", "\x8e\xc3\x8e\xde", "\x8e\xc3", "\x8e\xc2\x8e\xde", "\x8e\xc2", "\x8e\xc1\x8e\xde", "\x8e\xc1", | |
1032 | +"\x8e\xc0\x8e\xde", "\x8e\xc0", "\x8e\xbf\x8e\xde", "\x8e\xbf", "\x8e\xbe\x8e\xde", "\x8e\xbe", "\x8e\xbd\x8e\xde", "\x8e\xbd", | |
1033 | +"\x8e\xbc\x8e\xde", "\x8e\xbc", "\x8e\xbb\x8e\xde", "\x8e\xbb", "\x8e\xba\x8e\xde", "\x8e\xba", "\x8e\xb9\x8e\xde", "\x8e\xb9", | |
1034 | +"\x8e\xb8\x8e\xde", "\x8e\xb8", "\x8e\xb7\x8e\xde", "\x8e\xb7", "\x8e\xb6\x8e\xde", "\x8e\xb6", "\x8e\xb5", "\x8e\xb4", "\x8e\xb3\x8e\xde", | |
1035 | +"\x8e\xb3", "\x8e\xb2", "\x8e\xb1", "\x8e\xb0", "\x8e\xaf", "\x8e\xae", "\x8e\xad", "\x8e\xac", "\x8e\xab", | |
1036 | +"\x8e\xaa", "\x8e\xa9", "\x8e\xa8", "\x8e\xa7", "\x8e\xa6", "\x8e\xa5", "\x8e\xa4", "\x8e\xa3", "\x8e\xa2", "\x8e\xa1", NULL}; | |
1037 | + | |
1038 | +static const unsigned char *sjis_f_kana[] = { | |
1039 | + "\x83\x93", "\x83\x8f", "\x83\x8d", "\x83\x8c", "\x83\x8b", "\x83\x8a", | |
1040 | + "\x83\x89", "\x83\x88", "\x83\x86", "\x83\x84", "\x83\x82", "\x83\x81", | |
1041 | + "\x83\x80", "\x83\x7e", "\x83\x7d", "\x83\x7c", "\x83\x7b", "\x83\x7a", | |
1042 | + "\x83\x79", "\x83\x78", "\x83\x77", "\x83\x76", "\x83\x75", "\x83\x74", | |
1043 | + "\x83\x73", "\x83\x72", "\x83\x71", "\x83\x70", "\x83\x6f", "\x83\x6e", | |
1044 | + "\x83\x6d", "\x83\x6c", "\x83\x6b", "\x83\x6a", "\x83\x69", "\x83\x68", | |
1045 | + "\x83\x67", "\x83\x66", "\x83\x65", "\x83\x64", "\x83\x63", "\x83\x61", | |
1046 | + "\x83\x60", "\x83\x5f", "\x83\x5e", "\x83\x5d", "\x83\x5c", "\x83\x5b", | |
1047 | + "\x83\x5a", "\x83\x59", "\x83\x58", "\x83\x57", "\x83\x56", "\x83\x55", | |
1048 | + "\x83\x54", "\x83\x53", "\x83\x52", "\x83\x51", "\x83\x50", "\x83\x4f", | |
1049 | + "\x83\x4e", "\x83\x4d", "\x83\x4c", "\x83\x4b", "\x83\x4a", "\x83\x49", | |
1050 | + "\x83\x47", "\x83\x94", "\x83\x45", "\x83\x43", "\x83\x41", "\x81\x5b", | |
1051 | + "\x83\x62", "\x83\x87", "\x83\x85", "\x83\x83", "\x83\x48", "\x83\x46", | |
1052 | + "\x83\x44", "\x83\x42", "\x83\x40", "\x83\x92", "\x81\x45", "\x81\x41", | |
1053 | + "\x81\x76", "\x81\x75", "\x81\x42", NULL}; | |
1054 | + | |
1055 | + | |
1056 | +static const unsigned char *euc_f_kana[] = { | |
1057 | + "\xa5\xf3", "\xa5\xef", "\xa5\xed", "\xa5\xec", "\xa5\xeb", "\xa5\xea", | |
1058 | + "\xa5\xe9", "\xa5\xe8", "\xa5\xe6", "\xa5\xe4", "\xa5\xe2", "\xa5\xe1", | |
1059 | + "\xa5\xe0", "\xa5\xdf", "\xa5\xde", "\xa5\xdd", "\xa5\xdc", "\xa5\xdb", | |
1060 | + "\xa5\xda", "\xa5\xd9", "\xa5\xd8", "\xa5\xd7", "\xa5\xd6", "\xa5\xd5", | |
1061 | + "\xa5\xd4", "\xa5\xd3", "\xa5\xd2", "\xa5\xd1", "\xa5\xd0", "\xa5\xcf", | |
1062 | + "\xa5\xce", "\xa5\xcd", "\xa5\xcc", "\xa5\xcb", "\xa5\xca", "\xa5\xc9", | |
1063 | + "\xa5\xc8", "\xa5\xc7", "\xa5\xc6", "\xa5\xc5", "\xa5\xc4", "\xa5\xc2", | |
1064 | + "\xa5\xc1", "\xa5\xc0", "\xa5\xbf", "\xa5\xbe", "\xa5\xbd", "\xa5\xbc", | |
1065 | + "\xa5\xbb", "\xa5\xba", "\xa5\xb9", "\xa5\xb8", "\xa5\xb7", "\xa5\xb6", | |
1066 | + "\xa5\xb5", "\xa5\xb4", "\xa5\xb3", "\xa5\xb2", "\xa5\xb1", "\xa5\xb0", | |
1067 | + "\xa5\xaf", "\xa5\xae", "\xa5\xad", "\xa5\xac", "\xa5\xab", "\xa5\xaa", | |
1068 | + "\xa5\xa8", "\xa5\xf4", "\xa5\xa6", "\xa5\xa4", "\xa5\xa2", "\xa1\xbc", | |
1069 | + "\xa5\xc3", "\xa5\xe7", "\xa5\xe5", "\xa5\xe3", "\xa5\xa9", "\xa5\xa7", | |
1070 | + "\xa5\xa5", "\xa5\xa3", "\xa5\xa1", "\xa5\xf2", "\xa1\xa6", "\xa1\xa2", | |
1071 | + "\xa1\xd7", "\xa1\xd6", "\xa1\xa3", NULL}; | |
1072 | + | |
1073 | + | |
1074 | +int sjistohankana(int len, unsigned char *buf, unsigned char **ret, int *retlen) { | |
1075 | + int pos, tmplen, retpos=0; | |
1076 | + char tmp[10]; | |
1077 | + char *newbuf; | |
1078 | + int i; | |
1079 | + | |
1080 | + if (!len) { | |
1081 | + *retlen = 0; | |
1082 | + return 1; | |
1083 | + } | |
1084 | + | |
1085 | + *retlen = len; | |
1086 | + *ret = malloc(*retlen); | |
1087 | + if (!*ret) { | |
1088 | + return 0; | |
1089 | + } | |
1090 | + | |
1091 | + for (pos = 0; pos < len; pos++) { | |
1092 | + tmplen=0; | |
1093 | + | |
1094 | + if (issjis1(buf[pos]) && (pos + 1 < len) && issjis2(buf[pos+1])) { | |
1095 | + for (i = 0; sjis_f_kana[i]; i++) { | |
1096 | + if (buf[pos] == sjis_f_kana[i][0] && buf[pos+1] == sjis_f_kana[i][1]) { | |
1097 | + tmp[tmplen++] = h_kana[i][0]; | |
1098 | + if (h_kana[i][1]) { | |
1099 | + tmp[tmplen++] = h_kana[i][1]; | |
1100 | + } | |
1101 | + break; | |
1102 | + } | |
1103 | + } | |
1104 | + if (!sjis_f_kana[i]) { | |
1105 | + tmp[tmplen++] = buf[pos]; | |
1106 | + tmp[tmplen++] = buf[pos+1]; | |
1107 | + } | |
1108 | + | |
1109 | + pos++; | |
1110 | + } else { | |
1111 | + tmp[tmplen++] = buf[pos]; | |
1112 | + } | |
1113 | + | |
1114 | + if (tmplen) { | |
1115 | + if (retpos + tmplen > *retlen) { | |
1116 | + *retlen = *retlen + len / 2 + 16; | |
1117 | + newbuf = realloc(*ret, *retlen); | |
1118 | + if (!newbuf) { | |
1119 | + free(*ret); | |
1120 | + return 0; | |
1121 | + } | |
1122 | + *ret = newbuf; | |
1123 | + } | |
1124 | + memcpy(*ret+retpos, tmp, tmplen); | |
1125 | + retpos += tmplen; | |
1126 | + } | |
1127 | + } | |
1128 | + | |
1129 | + if (!retpos) { | |
1130 | + *retlen = 0; | |
1131 | + free(*ret); | |
1132 | + return 1; | |
1133 | + } | |
1134 | + | |
1135 | + newbuf = realloc(*ret, retpos); | |
1136 | + if (!newbuf) { | |
1137 | + free(*ret); | |
1138 | + return 0; | |
1139 | + } | |
1140 | + *ret = newbuf; | |
1141 | + *retlen = retpos; | |
1142 | + | |
1143 | + return 1; | |
1144 | +} | |
1145 | + | |
1146 | + | |
1147 | +int sjistofullkana(int len, unsigned char *buf, unsigned char **ret, int *retlen) { | |
1148 | + int pos, tmplen, retpos=0; | |
1149 | + char tmp[10]; | |
1150 | + char *newbuf; | |
1151 | + int i, j; | |
1152 | + | |
1153 | + if (!len) { | |
1154 | + *retlen = 0; | |
1155 | + return 1; | |
1156 | + } | |
1157 | + | |
1158 | + *retlen = len; | |
1159 | + *ret = malloc(*retlen); | |
1160 | + if (!*ret) { | |
1161 | + return 0; | |
1162 | + } | |
1163 | + | |
1164 | + for (pos = 0; pos < len; pos++) { | |
1165 | + tmplen=0; | |
1166 | + | |
1167 | + if (ishankana(buf[pos])) { | |
1168 | + for (i = 0; h_kana[i]; i++) { | |
1169 | + for (j = 0; h_kana[i][j] && buf[pos+j]; j++) { | |
1170 | + if (h_kana[i][j] != buf[pos+j]) { | |
1171 | + break; | |
1172 | + } | |
1173 | + } | |
1174 | + if (!h_kana[i][j]) { | |
1175 | + const char *p; | |
1176 | + for (p = sjis_f_kana[i]; *p; p++) { | |
1177 | + tmp[tmplen++] = *p; | |
1178 | + } | |
1179 | + pos += j-1; | |
1180 | + break; | |
1181 | + } | |
1182 | + } | |
1183 | + | |
1184 | + if (!h_kana[i]) { | |
1185 | + tmp[tmplen++] = buf[pos]; | |
1186 | + } | |
1187 | + } | |
1188 | + else if (issjis1(buf[pos]) && (pos + 1 < len) && issjis2(buf[pos+1])) { | |
1189 | + tmp[tmplen++] = buf[pos]; | |
1190 | + tmp[tmplen++] = buf[pos+1]; | |
1191 | + pos += 1; | |
1192 | + } else { | |
1193 | + tmp[tmplen++] = buf[pos]; | |
1194 | + } | |
1195 | + | |
1196 | + if (tmplen) { | |
1197 | + if (retpos + tmplen > *retlen) { | |
1198 | + *retlen = *retlen + len / 2 + 16; | |
1199 | + newbuf = realloc(*ret, *retlen); | |
1200 | + if (!newbuf) { | |
1201 | + free(*ret); | |
1202 | + return 0; | |
1203 | + } | |
1204 | + *ret = newbuf; | |
1205 | + } | |
1206 | + memcpy(*ret+retpos, tmp, tmplen); | |
1207 | + retpos += tmplen; | |
1208 | + } | |
1209 | + } | |
1210 | + | |
1211 | + if (!retpos) { | |
1212 | + *retlen = 0; | |
1213 | + free(*ret); | |
1214 | + return 1; | |
1215 | + } | |
1216 | + | |
1217 | + newbuf = realloc(*ret, retpos); | |
1218 | + if (!newbuf) { | |
1219 | + free(*ret); | |
1220 | + return 0; | |
1221 | + } | |
1222 | + *ret = newbuf; | |
1223 | + *retlen = retpos; | |
1224 | + | |
1225 | + return 1; | |
1226 | +} | |
1227 | + | |
1228 | +int euctohankana(int len, unsigned char *buf, unsigned char **ret, int *retlen) { | |
1229 | + int pos, tmplen, retpos=0; | |
1230 | + char tmp[10]; | |
1231 | + char *newbuf; | |
1232 | + int i; | |
1233 | + | |
1234 | + if (!len) { | |
1235 | + *retlen = 0; | |
1236 | + return 1; | |
1237 | + } | |
1238 | + | |
1239 | + *retlen = len; | |
1240 | + *ret = malloc(*retlen); | |
1241 | + if (!*ret) { | |
1242 | + return 0; | |
1243 | + } | |
1244 | + | |
1245 | + for (pos = 0; pos < len; pos++) { | |
1246 | + tmplen=0; | |
1247 | + | |
1248 | + if (iseuc(buf[pos]) && (pos + 1 < len) && iseuc(buf[pos+1])) { | |
1249 | + for (i = 0; euc_f_kana[i]; i++) { | |
1250 | + if (buf[pos] == euc_f_kana[i][0] && buf[pos+1] == euc_f_kana[i][1]) { | |
1251 | + tmp[tmplen++] = '\x8e'; | |
1252 | + tmp[tmplen++] = h_kana[i][0]; | |
1253 | + if (h_kana[i][1]) { | |
1254 | + tmp[tmplen++] = '\x8e'; | |
1255 | + tmp[tmplen++] = h_kana[i][1]; | |
1256 | + } | |
1257 | + break; | |
1258 | + } | |
1259 | + } | |
1260 | + if (!euc_f_kana[i]) { | |
1261 | + tmp[tmplen++] = buf[pos]; | |
1262 | + tmp[tmplen++] = buf[pos+1]; | |
1263 | + } | |
1264 | + pos++; | |
1265 | + } | |
1266 | + else if ((buf[pos] == 0x8e) && (pos + 1 < len) && ishankana(buf[pos+1])) { | |
1267 | + tmp[tmplen++] = buf[pos]; | |
1268 | + tmp[tmplen++] = buf[pos+1]; | |
1269 | + pos++; | |
1270 | + } else { | |
1271 | + tmp[tmplen++] = buf[pos]; | |
1272 | + } | |
1273 | + | |
1274 | + if (tmplen) { | |
1275 | + if (retpos + tmplen > *retlen) { | |
1276 | + *retlen = *retlen + len / 2 + 16; | |
1277 | + newbuf = realloc(*ret, *retlen); | |
1278 | + if (!newbuf) { | |
1279 | + free(*ret); | |
1280 | + return 0; | |
1281 | + } | |
1282 | + *ret = newbuf; | |
1283 | + } | |
1284 | + memcpy(*ret+retpos, tmp, tmplen); | |
1285 | + retpos += tmplen; | |
1286 | + } | |
1287 | + } | |
1288 | + | |
1289 | + if (!retpos) { | |
1290 | + *retlen = 0; | |
1291 | + free(*ret); | |
1292 | + return 1; | |
1293 | + } | |
1294 | + | |
1295 | + newbuf = realloc(*ret, retpos); | |
1296 | + if (!newbuf) { | |
1297 | + free(*ret); | |
1298 | + return 0; | |
1299 | + } | |
1300 | + *ret = newbuf; | |
1301 | + *retlen = retpos; | |
1302 | + | |
1303 | + return 1; | |
1304 | +} | |
1305 | + | |
1306 | + | |
1307 | +int euctofullkana(int len, unsigned char *buf, unsigned char **ret, int *retlen) { | |
1308 | + int pos, tmplen, retpos=0; | |
1309 | + char tmp[10]; | |
1310 | + char *newbuf; | |
1311 | + int i, j; | |
1312 | + | |
1313 | + if (!len) { | |
1314 | + *retlen = 0; | |
1315 | + return 1; | |
1316 | + } | |
1317 | + | |
1318 | + *retlen = len; | |
1319 | + *ret = malloc(*retlen); | |
1320 | + if (!*ret) { | |
1321 | + return 0; | |
1322 | + } | |
1323 | + | |
1324 | + for (pos = 0; pos < len; pos++) { | |
1325 | + tmplen=0; | |
1326 | + | |
1327 | + if ((buf[pos] == 0x8e) && (pos + 1 < len) && ishankana(buf[pos+1])) { | |
1328 | + for (i = 0; euc_h_kana[i]; i++) { | |
1329 | + for (j = 0; euc_h_kana[i][j] && buf[pos+j]; j++) { | |
1330 | + if (euc_h_kana[i][j] != buf[pos+j]) { | |
1331 | + break; | |
1332 | + } | |
1333 | + } | |
1334 | + if (!euc_h_kana[i][j]) { | |
1335 | + const char *p; | |
1336 | + for (p = euc_f_kana[i]; *p; p++) { | |
1337 | + tmp[tmplen++] = *p; | |
1338 | + } | |
1339 | + pos += j-1; | |
1340 | + break; | |
1341 | + } | |
1342 | + } | |
1343 | + | |
1344 | + if (!h_kana[i]) { | |
1345 | + tmp[tmplen++] = buf[pos]; | |
1346 | + } | |
1347 | + } | |
1348 | + else if (iseuc(buf[pos]) && (pos + 1 < len) && iseuc(buf[pos+1])) { | |
1349 | + tmp[tmplen++] = buf[pos]; | |
1350 | + tmp[tmplen++] = buf[pos+1]; | |
1351 | + pos += 1; | |
1352 | + } else { | |
1353 | + tmp[tmplen++] = buf[pos]; | |
1354 | + } | |
1355 | + | |
1356 | + if (tmplen) { | |
1357 | + if (retpos + tmplen > *retlen) { | |
1358 | + *retlen = *retlen + len / 2 + 16; | |
1359 | + newbuf = realloc(*ret, *retlen); | |
1360 | + if (!newbuf) { | |
1361 | + free(*ret); | |
1362 | + return 0; | |
1363 | + } | |
1364 | + *ret = newbuf; | |
1365 | + } | |
1366 | + memcpy(*ret+retpos, tmp, tmplen); | |
1367 | + retpos += tmplen; | |
1368 | + } | |
1369 | + } | |
1370 | + | |
1371 | + if (!retpos) { | |
1372 | + *retlen = 0; | |
1373 | + free(*ret); | |
1374 | + return 1; | |
1375 | + } | |
1376 | + | |
1377 | + newbuf = realloc(*ret, retpos); | |
1378 | + if (!newbuf) { | |
1379 | + free(*ret); | |
1380 | + return 0; | |
1381 | + } | |
1382 | + *ret = newbuf; | |
1383 | + *retlen = retpos; | |
1384 | + | |
1385 | + return 1; | |
1386 | +} | |
1387 | + | |
1388 | + | |
1389 | +#ifdef PYKF_MAIN | |
1390 | + | |
1391 | + | |
1392 | +void main() { | |
1393 | +/* | |
1394 | + | |
1395 | + char *ret, *ret2, *ret3, *ret4, *ret5, *ret6, *ret7, *ret8; | |
1396 | + int retlen, retlen2, retlen3, retlen4, retlen5, retlen6, retlen7, retlen8; | |
1397 | + char *s1 = "\x82\xa0\xb1\x88\x9f\x61\x82\xa2\xb2\x8b\x8f\x62\x82\xa4\xb3\x89\x4b\x63\x82\xa6\xb4\x93\xbe\x64\x82\xa8\xb5\x94\xf6\x6f"; | |
1398 | + char *s2 = "アイウエオ"; | |
1399 | + char *gaiji = "\xf0\x40"; | |
1400 | + char *s3 = "あいうえお\x81"; | |
1401 | + char *s4 = "アイウエオカ"; | |
1402 | + char *s5 = "アイ"; | |
1403 | + int guessed; | |
1404 | + | |
1405 | + guess(strlen(s1), s1, 1); | |
1406 | + sjistohankana(strlen(s2), s2, &ret7, &retlen7); | |
1407 | + | |
1408 | + | |
1409 | + sjistojis(strlen(s1), s1, &ret, &retlen); | |
1410 | + jistoeuc(retlen, ret, &ret2, &retlen2); | |
1411 | + guess(retlen2, ret2, 1); | |
1412 | + | |
1413 | + euctosjis(retlen2, ret2, &ret3, &retlen3); | |
1414 | + assert(strncmp(s1, ret3, strlen(s1))==0); | |
1415 | + | |
1416 | + euctojis(retlen2, ret2, &ret4, &retlen4); | |
1417 | + assert(strncmp(ret, ret4, retlen)==0); | |
1418 | + | |
1419 | + sjistoeuc(strlen(s1), s1, &ret5, &retlen5); | |
1420 | + assert(strncmp(ret2, ret5, strlen(ret2))==0); | |
1421 | + | |
1422 | + jistosjis(retlen4, ret4, &ret6, &retlen6); | |
1423 | + assert(strncmp(s1, ret6, strlen(s1))==0); | |
1424 | + | |
1425 | + sjistoeuc(strlen(gaiji), gaiji, &ret7, &retlen7); | |
1426 | + | |
1427 | + sjistojis(strlen(s5), s5, &ret8, &retlen8); | |
1428 | + | |
1429 | + guessed = guess(strlen(s3), s3, 1); | |
1430 | + assert(guessed == ERROR); | |
1431 | + | |
1432 | + guessed = guess(strlen(s3), s3, 0); | |
1433 | + assert(guessed == SJIS); | |
1434 | + | |
1435 | + guessed = guess(strlen(s4), s4, 0); | |
1436 | + | |
1437 | + | |
1438 | +*/ | |
1439 | + char *s = "?"; | |
1440 | + char *ret; | |
1441 | + int retlen; | |
1442 | + | |
1443 | + sjistojis(strlen(s), s, &ret, &retlen, 0); | |
1444 | + | |
1445 | +} | |
1446 | + | |
1447 | +#endif |
@@ -0,0 +1,650 @@ | ||
1 | +/********************************************************************* | |
2 | + | |
3 | +Japanese Kanji filter module | |
4 | + Copyright (c) 2002, Atsuo Ishimoto. All rights reserved. | |
5 | + | |
6 | +Permission to use, copy, modify, and distribute this software and its | |
7 | +documentation for any purpose and without fee is hereby granted, provided that | |
8 | +the above copyright notice appear in all copies and that both that copyright | |
9 | +notice and this permission notice appear in supporting documentation, and that | |
10 | +the name of Atsuo Ishimoto not be used in advertising or publicity pertaining | |
11 | +to distribution of the software without specific, written prior permission. | |
12 | + | |
13 | +ATSUO ISHIMOTO DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, | |
14 | +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO | |
15 | +EVENT SHALL ATSUO ISHIMOTO BE LIABLE FOR ANY SPECIAL, INDIRECT OR | |
16 | +CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF | |
17 | +USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR | |
18 | +OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
19 | +PERFORMANCE OF THIS SOFTWARE. | |
20 | + | |
21 | +--------------------------------------------------------------------- | |
22 | +This module is besed on kf.c written by Haruhiko Okumura. | |
23 | + Copyright (c) 1995-2000 Haruhiko Okumura | |
24 | + This file may be freely modified/redistributed. | |
25 | + | |
26 | +Original kf.c: | |
27 | + http://www.matsusaka-u.ac.jp/~okumura/kf.html | |
28 | +*********************************************************************/ | |
29 | + | |
30 | +#include <Python.h> | |
31 | +#include "pykf.h" | |
32 | +#include "convert.h" | |
33 | + | |
34 | +static PyObject *EncodingError; | |
35 | +#define BADENCODING(d) {PyErr_Format(EncodingError, "%d", d);} | |
36 | +#define GUESSFAILED() {PyErr_Format(EncodingError, "Failed to detect encodnig");} | |
37 | + | |
38 | + | |
39 | + | |
40 | +#if defined(MS_WIN32) || defined(macintosh) | |
41 | +static int default_enc = SJIS; | |
42 | +#else | |
43 | +static int default_enc = EUC; | |
44 | +#endif | |
45 | + | |
46 | +#define SETDEFAULT_DOC "setdefault(enc) -> None\n\ | |
47 | +\tSet default input encoding" | |
48 | + | |
49 | +static PyObject* | |
50 | +pykf_setdefault(PyObject* self, PyObject* args) | |
51 | +{ | |
52 | + int enc; | |
53 | + if (!PyArg_ParseTuple(args, "i:setdefalult", &enc)) | |
54 | + return NULL; | |
55 | + | |
56 | + switch (enc) { | |
57 | + case UNKNOWN: case ASCII: case SJIS: case EUC: case JIS: | |
58 | + default_enc = enc; | |
59 | + break; | |
60 | + default: | |
61 | + BADENCODING(enc); return NULL; | |
62 | + } | |
63 | + Py_INCREF(Py_None); | |
64 | + return Py_None; | |
65 | +} | |
66 | + | |
67 | + | |
68 | +#define GETDEFAULT_DOC "getdefault() -> enc\n\ | |
69 | +\tGet default input encoding" | |
70 | + | |
71 | +static PyObject* | |
72 | +pykf_getdefault(PyObject* self, PyObject* args) | |
73 | +{ | |
74 | + if (!PyArg_ParseTuple(args, ":getdefault")) | |
75 | + return NULL; | |
76 | + | |
77 | + return PyInt_FromLong(default_enc); | |
78 | +} | |
79 | + | |
80 | +static int check_strict = 0; | |
81 | + | |
82 | +#define SETSTRICT_DOC "setstrict(True/False) -> None\n\ | |
83 | +\tSet strict check mode." | |
84 | + | |
85 | +static PyObject* | |
86 | +pykf_setstrict(PyObject* self, PyObject* args) | |
87 | +{ | |
88 | + if (!PyArg_ParseTuple(args, "i:setstrict", &check_strict)) | |
89 | + return NULL; | |
90 | + Py_INCREF(Py_None); | |
91 | + return Py_None; | |
92 | +} | |
93 | + | |
94 | +#define GETSTRICT_DOC "getstrict() -> int\n\ | |
95 | +\tGet strict check mode." | |
96 | + | |
97 | +static PyObject* | |
98 | +pykf_getstrict(PyObject* self, PyObject* args) | |
99 | +{ | |
100 | + if (!PyArg_ParseTuple(args, ":getstrict")) | |
101 | + return NULL; | |
102 | + | |
103 | + return PyInt_FromLong(check_strict); | |
104 | +} | |
105 | + | |
106 | + | |
107 | +#define GUESS_DOC "guess(s) -> encoding\n\ | |
108 | +\tGuess string encoding" | |
109 | + | |
110 | +static PyObject* | |
111 | +pykf_guess(PyObject* self, PyObject* args) | |
112 | +{ | |
113 | + char *s; | |
114 | + int ret, len; | |
115 | + int strict = check_strict; | |
116 | + | |
117 | + if (!PyArg_ParseTuple(args, "s#|i:guess", &s, &len, &strict)) | |
118 | + return NULL; | |
119 | + | |
120 | + ret = guess(len, s, strict); | |
121 | + return PyInt_FromLong(ret); | |
122 | +} | |
123 | + | |
124 | + | |
125 | + | |
126 | +#define TOJIS_DOC "tojis(s[, enc]) -> converted string\n\ | |
127 | +\tConvet string to JIS encoding" | |
128 | + | |
129 | +static PyObject* | |
130 | +pykf_tojis(PyObject* self, PyObject* args, PyObject* kwds) | |
131 | +{ | |
132 | + unsigned char *s, *conv; | |
133 | + int enc=UNKNOWN, len, convlen; | |
134 | + PyObject *ret; | |
135 | + int strict = check_strict; | |
136 | + int j0208 = 0; | |
137 | + static char *kwlist[] = {"s", "enc", "strict", "j0208", NULL}; | |
138 | + if (!PyArg_ParseTupleAndKeywords(args, kwds, "s#|iii:tojis", kwlist, &s, &len, &enc, &strict, &j0208)) | |
139 | + return NULL; | |
140 | + | |
141 | +// if (!PyArg_ParseTuple(args, "s#|ii:tojis", &s, &len, &enc, &strict)) | |
142 | + | |
143 | + if (enc == UNKNOWN) { | |
144 | + enc = guess(len, s, strict); | |
145 | + if (strict && enc == ERROR) { | |
146 | + GUESSFAILED(); return NULL; | |
147 | + } | |
148 | + if (enc == UNKNOWN) | |
149 | + enc = default_enc; | |
150 | + if (enc == UNKNOWN) { | |
151 | + GUESSFAILED(); return NULL; | |
152 | + } | |
153 | + } | |
154 | + | |
155 | + switch (enc) { | |
156 | + case SJIS: | |
157 | + if (sjistojis(len, s, &conv, &convlen, j0208)) { | |
158 | + if (convlen) { | |
159 | + ret = PyString_FromStringAndSize(conv, convlen); | |
160 | + free(conv); | |
161 | + } | |
162 | + else { | |
163 | + ret = PyString_FromStringAndSize("", 0); | |
164 | + } | |
165 | + return ret; | |
166 | + } | |
167 | + break; | |
168 | + case EUC: | |
169 | + if (euctojis(len, s, &conv, &convlen, j0208)) { | |
170 | + if (convlen) { | |
171 | + ret = PyString_FromStringAndSize(conv, convlen); | |
172 | + free(conv); | |
173 | + } | |
174 | + else { | |
175 | + ret = PyString_FromStringAndSize("", 0); | |
176 | + } | |
177 | + return ret; | |
178 | + } | |
179 | + break; | |
180 | + case JIS: | |
181 | + case ASCII: | |
182 | + return PyString_FromStringAndSize(s, len); | |
183 | + default: | |
184 | + BADENCODING(enc); return NULL; | |
185 | + } | |
186 | + return PyErr_NoMemory(); | |
187 | +} | |
188 | + | |
189 | + | |
190 | +#define TOEUC_DOC "toeuc(s[, enc]) -> converted string\n\ | |
191 | +\tConvet string to EUC encoding" | |
192 | + | |
193 | +static PyObject* | |
194 | +pykf_toeuc(PyObject* self, PyObject* args, PyObject* kwds) | |
195 | +{ | |
196 | + unsigned char *s, *conv; | |
197 | + int enc=UNKNOWN, len, convlen; | |
198 | + PyObject *ret; | |
199 | + int strict = check_strict; | |
200 | + | |
201 | + static char *kwlist[] = {"s", "enc", "strict", NULL}; | |
202 | + if (!PyArg_ParseTupleAndKeywords(args, kwds, "s#|ii:toeuc", kwlist, &s, &len, &enc, &strict)) | |
203 | + return NULL; | |
204 | + | |
205 | + if (enc == UNKNOWN) { | |
206 | + enc = guess(len, s, strict); | |
207 | + if (strict && enc == ERROR) { | |
208 | + GUESSFAILED(); return NULL; | |
209 | + } | |
210 | + if (enc == UNKNOWN) | |
211 | + enc = default_enc; | |
212 | + if (enc == UNKNOWN) { | |
213 | + GUESSFAILED(); return NULL; | |
214 | + } | |
215 | + } | |
216 | + | |
217 | + switch (enc) { | |
218 | + case SJIS: | |
219 | + if (sjistoeuc(len, s, &conv, &convlen)) { | |
220 | + if (convlen) { | |
221 | + ret = PyString_FromStringAndSize(conv, convlen); | |
222 | + free(conv); | |
223 | + } | |
224 | + else { | |
225 | + ret = PyString_FromStringAndSize("", 0); | |
226 | + } | |
227 | + return ret; | |
228 | + } | |
229 | + break; | |
230 | + case JIS: | |
231 | + if (jistoeuc(len, s, &conv, &convlen)) { | |
232 | + if (convlen) { | |
233 | + ret = PyString_FromStringAndSize(conv, convlen); | |
234 | + free(conv); | |
235 | + } | |
236 | + else { | |
237 | + ret = PyString_FromStringAndSize("", 0); | |
238 | + } | |
239 | + return ret; | |
240 | + } | |
241 | + break; | |
242 | + case EUC: | |
243 | + case ASCII: | |
244 | + return PyString_FromStringAndSize(s, len); | |
245 | + default: | |
246 | + BADENCODING(enc); return NULL; | |
247 | + } | |
248 | + | |
249 | + return PyErr_NoMemory(); | |
250 | +} | |
251 | + | |
252 | + | |
253 | +#define TOSJIS_DOC "tosjis(s[, enc]) -> converted string\n\ | |
254 | +\tConvet string to SJIS encoding" | |
255 | + | |
256 | +static PyObject* | |
257 | +pykf_tosjis(PyObject* self, PyObject* args, PyObject *kwds) | |
258 | +{ | |
259 | + unsigned char *s, *conv; | |
260 | + int enc=UNKNOWN, len, convlen; | |
261 | + PyObject *ret; | |
262 | + int strict = check_strict; | |
263 | + | |
264 | + static char *kwlist[] = {"s", "enc", "strict", NULL}; | |
265 | + if (!PyArg_ParseTupleAndKeywords(args, kwds, "s#|ii:tosjis", kwlist, &s, &len, &enc, &strict)) | |
266 | + return NULL; | |
267 | + | |
268 | + | |
269 | + if (enc == UNKNOWN) { | |
270 | + enc = guess(len, s, strict); | |
271 | + if (strict && enc == ERROR) { | |
272 | + GUESSFAILED(); return NULL; | |
273 | + } | |
274 | + if (enc == UNKNOWN) | |
275 | + enc = default_enc; | |
276 | + if (enc == UNKNOWN) { | |
277 | + GUESSFAILED(); return NULL; | |
278 | + } | |
279 | + } | |
280 | + | |
281 | + switch (enc) { | |
282 | + case SJIS: | |
283 | + case ASCII: | |
284 | + return PyString_FromStringAndSize(s, len); | |
285 | + case JIS: | |
286 | + if (jistosjis(len, s, &conv, &convlen)) { | |
287 | + if (convlen) { | |
288 | + ret = PyString_FromStringAndSize(conv, convlen); | |
289 | + free(conv); | |
290 | + } | |
291 | + else { | |
292 | + ret = PyString_FromStringAndSize("", 0); | |
293 | + } | |
294 | + return ret; | |
295 | + } | |
296 | + break; | |
297 | + case EUC: | |
298 | + if (euctosjis(len, s, &conv, &convlen)) { | |
299 | + if (convlen) { | |
300 | + ret = PyString_FromStringAndSize(conv, convlen); | |
301 | + free(conv); | |
302 | + } | |
303 | + else { | |
304 | + ret = PyString_FromStringAndSize("", 0); | |
305 | + } | |
306 | + return ret; | |
307 | + } | |
308 | + break; | |
309 | + default: | |
310 | + BADENCODING(enc); return NULL; | |
311 | + } | |
312 | + | |
313 | + return PyErr_NoMemory(); | |
314 | +} | |
315 | + | |
316 | +#define TOHALF_DOC "tohalf(s[, enc]) -> converted string\n\ | |
317 | +\tConvet string to half width character" | |
318 | + | |
319 | +static PyObject* | |
320 | +pykf_tohalfkana(PyObject* self, PyObject* args, PyObject *kwds) | |
321 | +{ | |
322 | + unsigned char *s, *conv; | |
323 | + int enc=UNKNOWN, len, convlen; | |
324 | + PyObject *ret; | |
325 | + int strict = check_strict; | |
326 | + | |
327 | + static char *kwlist[] = {"s", "enc", "strict", NULL}; | |
328 | + if (!PyArg_ParseTupleAndKeywords(args, kwds, "s#|ii:tohalf", kwlist, &s, &len, &enc, &strict)) | |
329 | + return NULL; | |
330 | + | |
331 | + if (enc == UNKNOWN) { | |
332 | + enc = guess(len, s, strict); | |
333 | + if (strict && enc == ERROR) { | |
334 | + GUESSFAILED(); return NULL; | |
335 | + } | |
336 | + if (enc == UNKNOWN) | |
337 | + enc = default_enc; | |
338 | + if (enc == UNKNOWN) { | |
339 | + GUESSFAILED(); return NULL; | |
340 | + } | |
341 | + } | |
342 | + | |
343 | + switch (enc) { | |
344 | + case SJIS: | |
345 | + if (sjistohankana(len, s, &conv, &convlen)) { | |
346 | + if (convlen) { | |
347 | + ret = PyString_FromStringAndSize(conv, convlen); | |
348 | + free(conv); | |
349 | + } | |
350 | + else { | |
351 | + ret = PyString_FromStringAndSize("", 0); | |
352 | + } | |
353 | + return ret; | |
354 | + } | |
355 | + break; | |
356 | + case EUC: | |
357 | + if (euctohankana(len, s, &conv, &convlen)) { | |
358 | + if (convlen) { | |
359 | + ret = PyString_FromStringAndSize(conv, convlen); | |
360 | + free(conv); | |
361 | + } | |
362 | + else { | |
363 | + ret = PyString_FromStringAndSize("", 0); | |
364 | + } | |
365 | + return ret; | |
366 | + } | |
367 | + break; | |
368 | + default: | |
369 | + BADENCODING(enc); return NULL; | |
370 | + } | |
371 | + | |
372 | + return PyErr_NoMemory(); | |
373 | +} | |
374 | + | |
375 | + | |
376 | +#define TOFULL_DOC "tofull(s[, enc]) -> converted string\n\ | |
377 | +\tConvet string to full width character" | |
378 | + | |
379 | +static PyObject* | |
380 | +pykf_tofullkana(PyObject* self, PyObject* args, PyObject *kwds) | |
381 | +{ | |
382 | + unsigned char *s, *conv; | |
383 | + int enc=UNKNOWN, len, convlen; | |
384 | + int strict = check_strict; | |
385 | + PyObject *ret; | |
386 | + | |
387 | + static char *kwlist[] = {"s", "enc", "strict", NULL}; | |
388 | + if (!PyArg_ParseTupleAndKeywords(args, kwds, "s#|ii:tofull", kwlist, &s, &len, &enc, &strict)) | |
389 | + return NULL; | |
390 | + | |
391 | + | |
392 | + if (enc == UNKNOWN) { | |
393 | + enc = guess(len, s, strict); | |
394 | + if (strict && enc == ERROR) { | |
395 | + GUESSFAILED(); return NULL; | |
396 | + } | |
397 | + if (enc == UNKNOWN) | |
398 | + enc = default_enc; | |
399 | + if (enc == UNKNOWN) { | |
400 | + GUESSFAILED(); return NULL; | |
401 | + } | |
402 | + } | |
403 | + | |
404 | + switch (enc) { | |
405 | + case SJIS: | |
406 | + if (sjistofullkana(len, s, &conv, &convlen)) { | |
407 | + if (convlen) { | |
408 | + ret = PyString_FromStringAndSize(conv, convlen); | |
409 | + free(conv); | |
410 | + } | |
411 | + else { | |
412 | + ret = PyString_FromStringAndSize("", 0); | |
413 | + } | |
414 | + return ret; | |
415 | + } | |
416 | + break; | |
417 | + case EUC: | |
418 | + if (euctofullkana(len, s, &conv, &convlen)) { | |
419 | + if (convlen) { | |
420 | + ret = PyString_FromStringAndSize(conv, convlen); | |
421 | + free(conv); | |
422 | + } | |
423 | + else { | |
424 | + ret = PyString_FromStringAndSize("", 0); | |
425 | + } | |
426 | + return ret; | |
427 | + } | |
428 | + break; | |
429 | + default: | |
430 | + BADENCODING(enc); return NULL; | |
431 | + } | |
432 | + | |
433 | + return PyErr_NoMemory(); | |
434 | +} | |
435 | + | |
436 | + | |
437 | +#define SPLIT_DOC "tosjis(s[, enc]) -> list of chars\n\ | |
438 | +\tConvet string to list of chars" | |
439 | + | |
440 | +static PyObject* | |
441 | +pykf_split(PyObject* self, PyObject* args, PyObject *kwds) | |
442 | +{ | |
443 | + unsigned char *s; | |
444 | + int enc=UNKNOWN, len; | |
445 | + int pos; | |
446 | + PyObject *ret, *o; | |
447 | + int strict = check_strict; | |
448 | + enum {NORMAL, KANJI, HANKANA} mode = NORMAL; | |
449 | + | |
450 | + static char *kwlist[] = {"s", "enc", "strict", NULL}; | |
451 | + if (!PyArg_ParseTupleAndKeywords(args, kwds, "s#|ii:split", kwlist, &s, &len, &enc, &strict)) | |
452 | + return NULL; | |
453 | + | |
454 | + if (enc == UNKNOWN) { | |
455 | + enc = guess(len, s, strict); | |
456 | + if (strict && enc == ERROR) { | |
457 | + GUESSFAILED(); return NULL; | |
458 | + } | |
459 | + if (enc == UNKNOWN) | |
460 | + enc = default_enc; | |
461 | + if (enc == UNKNOWN) { | |
462 | + GUESSFAILED(); return NULL; | |
463 | + } | |
464 | + } | |
465 | + | |
466 | + ret = PyList_New(0); | |
467 | + if (!ret) { | |
468 | + return NULL; | |
469 | + } | |
470 | + switch (enc) { | |
471 | + case SJIS: | |
472 | + for (pos = 0; pos < len; pos++) { | |
473 | + if (issjis1(s[pos]) && (pos + 1 < len) && issjis2(s[pos+1])) { | |
474 | + o = PyString_FromStringAndSize(s+pos, 2); | |
475 | + pos++; | |
476 | + } | |
477 | + else { | |
478 | + o = PyString_FromStringAndSize(s+pos, 1); | |
479 | + } | |
480 | + if (!o) { | |
481 | + Py_DECREF(ret); | |
482 | + return NULL; | |
483 | + } | |
484 | + if (-1 == PyList_Append(ret, o)) { | |
485 | + Py_DECREF(ret); | |
486 | + return NULL; | |
487 | + } | |
488 | + Py_DECREF(o); | |
489 | + } | |
490 | + return ret; | |
491 | + case ASCII: | |
492 | + for (pos = 0; pos < len; pos++) { | |
493 | + o = PyString_FromStringAndSize(s+pos, 1); | |
494 | + if (!o) { | |
495 | + Py_DECREF(ret); | |
496 | + return NULL; | |
497 | + } | |
498 | + if (-1 == PyList_Append(ret, o)) { | |
499 | + Py_DECREF(ret); | |
500 | + return NULL; | |
501 | + } | |
502 | + Py_DECREF(o); | |
503 | + } | |
504 | + return ret; | |
505 | + case JIS: | |
506 | + for (pos = 0; pos < len; pos++) { | |
507 | + | |
508 | + if ((pos + 2 < len) && | |
509 | + (!memcmp(s+pos, "\x1b$@", 3) || | |
510 | + !memcmp(s+pos, "\x1b$B", 3))) { | |
511 | + | |
512 | + mode = KANJI; | |
513 | + o = PyString_FromStringAndSize(s+pos, 3); | |
514 | + pos += 2; | |
515 | + } | |
516 | + else if ((pos + 3 < len) && !memcmp(s+pos, "\x1b$(O", 4)) { | |
517 | + mode = KANJI; | |
518 | + o = PyString_FromStringAndSize(s+pos, 3); | |
519 | + pos += 3; | |
520 | + } | |
521 | + else if ((pos + 2 < len) && | |
522 | + (!memcmp(s+pos, "\x1b(B", 3) || | |
523 | + !memcmp(s+pos, "\x1b(J", 3))) { | |
524 | + | |
525 | + mode = NORMAL; | |
526 | + o = PyString_FromStringAndSize(s+pos, 3); | |
527 | + pos += 2; | |
528 | + } | |
529 | + else if ((pos + 2 < len) && !memcmp(s+pos, "\x1b(I", 3)) { | |
530 | + mode = HANKANA; | |
531 | + o = PyString_FromStringAndSize(s+pos, 3); | |
532 | + pos += 2; | |
533 | + } | |
534 | + else if (s[pos] == '\x0e') { | |
535 | + mode = HANKANA; | |
536 | + o = PyString_FromStringAndSize(s+pos, 1); | |
537 | + } | |
538 | + else if (s[pos] == '\x0f') { | |
539 | + mode = NORMAL; | |
540 | + o = PyString_FromStringAndSize(s+pos, 1); | |
541 | + } | |
542 | + else if (mode == KANJI && isjis(s[pos]) && (pos+1 < len) && isjis(s[pos+1])) { | |
543 | + o = PyString_FromStringAndSize(s+pos, 2); | |
544 | + pos++; | |
545 | + } else if (mode == HANKANA && s[pos] >= 0x20 && s[pos] <= 0x5f) { | |
546 | + o = PyString_FromStringAndSize(s+pos, 1); | |
547 | + } else { | |
548 | + o = PyString_FromStringAndSize(s+pos, 1); | |
549 | + } | |
550 | + if (!o) { | |
551 | + Py_DECREF(ret); | |
552 | + return NULL; | |
553 | + } | |
554 | + if (-1 == PyList_Append(ret, o)) { | |
555 | + Py_DECREF(ret); | |
556 | + return NULL; | |
557 | + } | |
558 | + Py_DECREF(o); | |
559 | + } | |
560 | + return ret; | |
561 | + case EUC: | |
562 | + for (pos = 0; pos < len; pos++) { | |
563 | + if (iseuc(s[pos]) && (pos + 1 < len) && iseuc(s[pos+1])) { | |
564 | + o = PyString_FromStringAndSize(s+pos, 2); | |
565 | + pos++; | |
566 | + } else if ((s[pos] == 0x8e) && (pos + 1 < len) && ishankana(s[pos+1])) { | |
567 | + o = PyString_FromStringAndSize(s+pos, 2); | |
568 | + pos++; | |
569 | + } | |
570 | + else { | |
571 | + o = PyString_FromStringAndSize(s+pos, 1); | |
572 | + } | |
573 | + if (!o) { | |
574 | + Py_DECREF(ret); | |
575 | + return NULL; | |
576 | + } | |
577 | + if (-1 == PyList_Append(ret, o)) { | |
578 | + Py_DECREF(ret); | |
579 | + return NULL; | |
580 | + } | |
581 | + Py_DECREF(o); | |
582 | + } | |
583 | + return ret; | |
584 | + default: | |
585 | + BADENCODING(enc); return NULL; | |
586 | + } | |
587 | + | |
588 | + return PyErr_NoMemory(); | |
589 | +} | |
590 | + | |
591 | + | |
592 | + | |
593 | + | |
594 | +static PyMethodDef pykf_methods[] = { | |
595 | + {"setdefault", (PyCFunction)pykf_setdefault, METH_VARARGS, SETDEFAULT_DOC}, | |
596 | + {"getdefault", (PyCFunction)pykf_getdefault, METH_VARARGS, GETDEFAULT_DOC}, | |
597 | + {"guess", (PyCFunction)pykf_guess, METH_VARARGS, GUESS_DOC}, | |
598 | + {"tojis", (PyCFunction)pykf_tojis, METH_VARARGS|METH_KEYWORDS, TOJIS_DOC}, | |
599 | + {"tosjis", (PyCFunction)pykf_tosjis, METH_VARARGS|METH_KEYWORDS, TOSJIS_DOC}, | |
600 | + {"toeuc", (PyCFunction)pykf_toeuc, METH_VARARGS|METH_KEYWORDS, TOEUC_DOC}, | |
601 | + {"tohalf_kana", (PyCFunction)pykf_tohalfkana, METH_VARARGS|METH_KEYWORDS, TOHALF_DOC}, | |
602 | + {"tofull_kana", (PyCFunction)pykf_tofullkana, METH_VARARGS|METH_KEYWORDS, TOFULL_DOC}, | |
603 | + {"split", (PyCFunction)pykf_split, METH_VARARGS|METH_KEYWORDS, SPLIT_DOC}, | |
604 | + {"setstrict", (PyCFunction)pykf_setstrict, METH_VARARGS|METH_KEYWORDS, SETSTRICT_DOC}, | |
605 | + {"getstrict", (PyCFunction)pykf_getstrict, METH_VARARGS|METH_KEYWORDS, GETSTRICT_DOC}, | |
606 | + {NULL, NULL} /* sentinel */ | |
607 | +}; | |
608 | + | |
609 | + | |
610 | +static void _setint(PyObject* dict, char *name, int value) | |
611 | +{ | |
612 | + PyObject* v; | |
613 | + v = PyInt_FromLong((long) value); | |
614 | + PyDict_SetItemString(dict, name, v); | |
615 | + Py_XDECREF(v); | |
616 | +} | |
617 | + | |
618 | + | |
619 | +DL_EXPORT(void) initpykf(void) | |
620 | +{ | |
621 | + PyObject *m, *d; | |
622 | + int one = 1; | |
623 | + int is_little_endian = (int)*(char*)&one; | |
624 | + | |
625 | + m = Py_InitModule("pykf", pykf_methods); | |
626 | + d = PyModule_GetDict(m); | |
627 | + | |
628 | + EncodingError = PyErr_NewException("pykf.IllegalEncoding", NULL, NULL); | |
629 | + PyDict_SetItemString(d, "IllegalEncoding", EncodingError); | |
630 | + | |
631 | + _setint(d, "ERROR", ERROR); | |
632 | + _setint(d, "UNKNOWN", UNKNOWN); | |
633 | + _setint(d, "ASCII", ASCII); | |
634 | + _setint(d, "SJIS", SJIS); | |
635 | + _setint(d, "EUC", EUC); | |
636 | + _setint(d, "JIS", JIS); | |
637 | + _setint(d, "UTF8", UTF8); | |
638 | + _setint(d, "UTF16_LE", UTF16_LE); | |
639 | + _setint(d, "UTF16_BE", UTF16_BE); | |
640 | + if (is_little_endian) { | |
641 | + _setint(d, "UTF16", UTF16_LE); | |
642 | + } | |
643 | + else { | |
644 | + _setint(d, "UTF16", UTF16_BE); | |
645 | + } | |
646 | + | |
647 | +} | |
648 | + | |
649 | + | |
650 | + |
@@ -0,0 +1,49 @@ | ||
1 | + | |
2 | +/* JIS X 0213 char table */ | |
3 | + | |
4 | +unsigned int tbl_jis0213[] = { | |
5 | + | |
6 | + 0x222f, 11, | |
7 | + 0x2242, 8, | |
8 | + 0x2251, 11, | |
9 | + 0x226b, 7, | |
10 | + 0x227a, 4, | |
11 | + 0x2321, 15, | |
12 | + 0x233a, 7, | |
13 | + 0x235b, 6, | |
14 | + 0x237b, 4, | |
15 | + 0x2474, 8, | |
16 | + 0x2577, 8, | |
17 | + 0x2639, 8, | |
18 | + 0x2659, 38, | |
19 | + 0x2742, 15, | |
20 | + 0x2772, 13, | |
21 | + 0x2841, 30, | |
22 | + 0x2867, 22, | |
23 | + 0x2921, 94, | |
24 | + 0x2a21, 94, | |
25 | + 0x2b21, 94, | |
26 | + 0x2c21, 83, | |
27 | + 0x2c7d, 2, | |
28 | + 0x2d21, 55, | |
29 | + 0x2d5f, 17, | |
30 | + 0x2d73, 1, | |
31 | + 0x2d78, 2, | |
32 | + 0x2d7d, 2, | |
33 | + 0x2e22, 93, | |
34 | + 0x2f21, 93, | |
35 | + 0x4f55, 41, | |
36 | + 0x7428, 87, | |
37 | + 0x7521, 94, | |
38 | + 0x7621, 94, | |
39 | + 0x7721, 94, | |
40 | + 0x7821, 94, | |
41 | + 0x7921, 94, | |
42 | + 0x7a21, 94, | |
43 | + 0x7b21, 94, | |
44 | + 0x7c21, 94, | |
45 | + 0x7d21, 94, | |
46 | + | |
47 | + 0xffff, 0xffff | |
48 | +}; | |
49 | + |
@@ -0,0 +1,808 @@ | ||
1 | + | |
2 | +/* SJIS - JIS conversion table */ | |
3 | + | |
4 | +unsigned int tbl_sjis2jis[] = { | |
5 | + | |
6 | + 0x8790, 0x2262, | |
7 | + 0x8791, 0x2261, | |
8 | + 0x8792, 0x2269, | |
9 | + 0x8795, 0x2265, | |
10 | + 0x8796, 0x225d, | |
11 | + 0x8797, 0x225c, | |
12 | + 0x879a, 0x2268, | |
13 | + 0x879b, 0x2241, | |
14 | + 0x879c, 0x2240, | |
15 | + 0xeef9, 0x224c, | |
16 | + 0xfa40, 0x7c71, | |
17 | + 0xfa41, 0x7c72, | |
18 | + 0xfa42, 0x7c73, | |
19 | + 0xfa43, 0x7c74, | |
20 | + 0xfa44, 0x7c75, | |
21 | + 0xfa45, 0x7c76, | |
22 | + 0xfa46, 0x7c77, | |
23 | + 0xfa47, 0x7c78, | |
24 | + 0xfa48, 0x7c79, | |
25 | + 0xfa49, 0x7c7a, | |
26 | + 0xfa4a, 0x2d35, | |
27 | + 0xfa4b, 0x2d36, | |
28 | + 0xfa4c, 0x2d37, | |
29 | + 0xfa4d, 0x2d38, | |
30 | + 0xfa4e, 0x2d39, | |
31 | + 0xfa4f, 0x2d3a, | |
32 | + 0xfa50, 0x2d3b, | |
33 | + 0xfa51, 0x2d3c, | |
34 | + 0xfa52, 0x2d3d, | |
35 | + 0xfa53, 0x2d3e, | |
36 | + 0xfa54, 0x224c, | |
37 | + 0xfa55, 0x7c7c, | |
38 | + 0xfa56, 0x7c7d, | |
39 | + 0xfa57, 0x7c7e, | |
40 | + 0xfa58, 0x2d6a, | |
41 | + 0xfa59, 0x2d62, | |
42 | + 0xfa5a, 0x2d64, | |
43 | + 0xfa5b, 0x2268, | |
44 | + 0xfa5c, 0x7921, | |
45 | + 0xfa5d, 0x7922, | |
46 | + 0xfa5e, 0x7923, | |
47 | + 0xfa5f, 0x7924, | |
48 | + 0xfa60, 0x7925, | |
49 | + 0xfa61, 0x7926, | |
50 | + 0xfa62, 0x7927, | |
51 | + 0xfa63, 0x7928, | |
52 | + 0xfa64, 0x7929, | |
53 | + 0xfa65, 0x792a, | |
54 | + 0xfa66, 0x792b, | |
55 | + 0xfa67, 0x792c, | |
56 | + 0xfa68, 0x792d, | |
57 | + 0xfa69, 0x792e, | |
58 | + 0xfa6a, 0x792f, | |
59 | + 0xfa6b, 0x7930, | |
60 | + 0xfa6c, 0x7931, | |
61 | + 0xfa6d, 0x7932, | |
62 | + 0xfa6e, 0x7933, | |
63 | + 0xfa6f, 0x7934, | |
64 | + 0xfa70, 0x7935, | |
65 | + 0xfa71, 0x7936, | |
66 | + 0xfa72, 0x7937, | |
67 | + 0xfa73, 0x7938, | |
68 | + 0xfa74, 0x7939, | |
69 | + 0xfa75, 0x793a, | |
70 | + 0xfa76, 0x793b, | |
71 | + 0xfa77, 0x793c, | |
72 | + 0xfa78, 0x793d, | |
73 | + 0xfa79, 0x793e, | |
74 | + 0xfa7a, 0x793f, | |
75 | + 0xfa7b, 0x7940, | |
76 | + 0xfa7c, 0x7941, | |
77 | + 0xfa7d, 0x7942, | |
78 | + 0xfa7e, 0x7943, | |
79 | + 0xfa80, 0x7944, | |
80 | + 0xfa81, 0x7945, | |
81 | + 0xfa82, 0x7946, | |
82 | + 0xfa83, 0x7947, | |
83 | + 0xfa84, 0x7948, | |
84 | + 0xfa85, 0x7949, | |
85 | + 0xfa86, 0x794a, | |
86 | + 0xfa87, 0x794b, | |
87 | + 0xfa88, 0x794c, | |
88 | + 0xfa89, 0x794d, | |
89 | + 0xfa8a, 0x794e, | |
90 | + 0xfa8b, 0x794f, | |
91 | + 0xfa8c, 0x7950, | |
92 | + 0xfa8d, 0x7951, | |
93 | + 0xfa8e, 0x7952, | |
94 | + 0xfa8f, 0x7953, | |
95 | + 0xfa90, 0x7954, | |
96 | + 0xfa91, 0x7955, | |
97 | + 0xfa92, 0x7956, | |
98 | + 0xfa93, 0x7957, | |
99 | + 0xfa94, 0x7958, | |
100 | + 0xfa95, 0x7959, | |
101 | + 0xfa96, 0x795a, | |
102 | + 0xfa97, 0x795b, | |
103 | + 0xfa98, 0x795c, | |
104 | + 0xfa99, 0x795d, | |
105 | + 0xfa9a, 0x795e, | |
106 | + 0xfa9b, 0x795f, | |
107 | + 0xfa9c, 0x7960, | |
108 | + 0xfa9d, 0x7961, | |
109 | + 0xfa9e, 0x7962, | |
110 | + 0xfa9f, 0x7963, | |
111 | + 0xfaa0, 0x7964, | |
112 | + 0xfaa1, 0x7965, | |
113 | + 0xfaa2, 0x7966, | |
114 | + 0xfaa3, 0x7967, | |
115 | + 0xfaa4, 0x7968, | |
116 | + 0xfaa5, 0x7969, | |
117 | + 0xfaa6, 0x796a, | |
118 | + 0xfaa7, 0x796b, | |
119 | + 0xfaa8, 0x796c, | |
120 | + 0xfaa9, 0x796d, | |
121 | + 0xfaaa, 0x796e, | |
122 | + 0xfaab, 0x796f, | |
123 | + 0xfaac, 0x7970, | |
124 | + 0xfaad, 0x7971, | |
125 | + 0xfaae, 0x7972, | |
126 | + 0xfaaf, 0x7973, | |
127 | + 0xfab0, 0x7974, | |
128 | + 0xfab1, 0x7975, | |
129 | + 0xfab2, 0x7976, | |
130 | + 0xfab3, 0x7977, | |
131 | + 0xfab4, 0x7978, | |
132 | + 0xfab5, 0x7979, | |
133 | + 0xfab6, 0x797a, | |
134 | + 0xfab7, 0x797b, | |
135 | + 0xfab8, 0x797c, | |
136 | + 0xfab9, 0x797d, | |
137 | + 0xfaba, 0x797e, | |
138 | + 0xfabb, 0x7a21, | |
139 | + 0xfabc, 0x7a22, | |
140 | + 0xfabd, 0x7a23, | |
141 | + 0xfabe, 0x7a24, | |
142 | + 0xfabf, 0x7a25, | |
143 | + 0xfac0, 0x7a26, | |
144 | + 0xfac1, 0x7a27, | |
145 | + 0xfac2, 0x7a28, | |
146 | + 0xfac3, 0x7a29, | |
147 | + 0xfac4, 0x7a2a, | |
148 | + 0xfac5, 0x7a2b, | |
149 | + 0xfac6, 0x7a2c, | |
150 | + 0xfac7, 0x7a2d, | |
151 | + 0xfac8, 0x7a2e, | |
152 | + 0xfac9, 0x7a2f, | |
153 | + 0xfaca, 0x7a30, | |
154 | + 0xfacb, 0x7a31, | |
155 | + 0xfacc, 0x7a32, | |
156 | + 0xfacd, 0x7a33, | |
157 | + 0xface, 0x7a34, | |
158 | + 0xfacf, 0x7a35, | |
159 | + 0xfad0, 0x7a36, | |
160 | + 0xfad1, 0x7a37, | |
161 | + 0xfad2, 0x7a38, | |
162 | + 0xfad3, 0x7a39, | |
163 | + 0xfad4, 0x7a3a, | |
164 | + 0xfad5, 0x7a3b, | |
165 | + 0xfad6, 0x7a3c, | |
166 | + 0xfad7, 0x7a3d, | |
167 | + 0xfad8, 0x7a3e, | |
168 | + 0xfad9, 0x7a3f, | |
169 | + 0xfada, 0x7a40, | |
170 | + 0xfadb, 0x7a41, | |
171 | + 0xfadc, 0x7a42, | |
172 | + 0xfadd, 0x7a43, | |
173 | + 0xfade, 0x7a44, | |
174 | + 0xfadf, 0x7a45, | |
175 | + 0xfae0, 0x7a46, | |
176 | + 0xfae1, 0x7a47, | |
177 | + 0xfae2, 0x7a48, | |
178 | + 0xfae3, 0x7a49, | |
179 | + 0xfae4, 0x7a4a, | |
180 | + 0xfae5, 0x7a4b, | |
181 | + 0xfae6, 0x7a4c, | |
182 | + 0xfae7, 0x7a4d, | |
183 | + 0xfae8, 0x7a4e, | |
184 | + 0xfae9, 0x7a4f, | |
185 | + 0xfaea, 0x7a50, | |
186 | + 0xfaeb, 0x7a51, | |
187 | + 0xfaec, 0x7a52, | |
188 | + 0xfaed, 0x7a53, | |
189 | + 0xfaee, 0x7a54, | |
190 | + 0xfaef, 0x7a55, | |
191 | + 0xfaf0, 0x7a56, | |
192 | + 0xfaf1, 0x7a57, | |
193 | + 0xfaf2, 0x7a58, | |
194 | + 0xfaf3, 0x7a59, | |
195 | + 0xfaf4, 0x7a5a, | |
196 | + 0xfaf5, 0x7a5b, | |
197 | + 0xfaf6, 0x7a5c, | |
198 | + 0xfaf7, 0x7a5d, | |
199 | + 0xfaf8, 0x7a5e, | |
200 | + 0xfaf9, 0x7a5f, | |
201 | + 0xfafa, 0x7a60, | |
202 | + 0xfafb, 0x7a61, | |
203 | + 0xfafc, 0x7a62, | |
204 | + 0xfb40, 0x7a63, | |
205 | + 0xfb41, 0x7a64, | |
206 | + 0xfb42, 0x7a65, | |
207 | + 0xfb43, 0x7a66, | |
208 | + 0xfb44, 0x7a67, | |
209 | + 0xfb45, 0x7a68, | |
210 | + 0xfb46, 0x7a69, | |
211 | + 0xfb47, 0x7a6a, | |
212 | + 0xfb48, 0x7a6b, | |
213 | + 0xfb49, 0x7a6c, | |
214 | + 0xfb4a, 0x7a6d, | |
215 | + 0xfb4b, 0x7a6e, | |
216 | + 0xfb4c, 0x7a6f, | |
217 | + 0xfb4d, 0x7a70, | |
218 | + 0xfb4e, 0x7a71, | |
219 | + 0xfb4f, 0x7a72, | |
220 | + 0xfb50, 0x7a73, | |
221 | + 0xfb51, 0x7a74, | |
222 | + 0xfb52, 0x7a75, | |
223 | + 0xfb53, 0x7a76, | |
224 | + 0xfb54, 0x7a77, | |
225 | + 0xfb55, 0x7a78, | |
226 | + 0xfb56, 0x7a79, | |
227 | + 0xfb57, 0x7a7a, | |
228 | + 0xfb58, 0x7a7b, | |
229 | + 0xfb59, 0x7a7c, | |
230 | + 0xfb5a, 0x7a7d, | |
231 | + 0xfb5b, 0x7a7e, | |
232 | + 0xfb5c, 0x7b21, | |
233 | + 0xfb5d, 0x7b22, | |
234 | + 0xfb5e, 0x7b23, | |
235 | + 0xfb5f, 0x7b24, | |
236 | + 0xfb60, 0x7b25, | |
237 | + 0xfb61, 0x7b26, | |
238 | + 0xfb62, 0x7b27, | |
239 | + 0xfb63, 0x7b28, | |
240 | + 0xfb64, 0x7b29, | |
241 | + 0xfb65, 0x7b2a, | |
242 | + 0xfb66, 0x7b2b, | |
243 | + 0xfb67, 0x7b2c, | |
244 | + 0xfb68, 0x7b2d, | |
245 | + 0xfb69, 0x7b2e, | |
246 | + 0xfb6a, 0x7b2f, | |
247 | + 0xfb6b, 0x7b30, | |
248 | + 0xfb6c, 0x7b31, | |
249 | + 0xfb6d, 0x7b32, | |
250 | + 0xfb6e, 0x7b33, | |
251 | + 0xfb6f, 0x7b34, | |
252 | + 0xfb70, 0x7b35, | |
253 | + 0xfb71, 0x7b36, | |
254 | + 0xfb72, 0x7b37, | |
255 | + 0xfb73, 0x7b38, | |
256 | + 0xfb74, 0x7b39, | |
257 | + 0xfb75, 0x7b3a, | |
258 | + 0xfb76, 0x7b3b, | |
259 | + 0xfb77, 0x7b3c, | |
260 | + 0xfb78, 0x7b3d, | |
261 | + 0xfb79, 0x7b3e, | |
262 | + 0xfb7a, 0x7b3f, | |
263 | + 0xfb7b, 0x7b40, | |
264 | + 0xfb7c, 0x7b41, | |
265 | + 0xfb7d, 0x7b42, | |
266 | + 0xfb7e, 0x7b43, | |
267 | + 0xfb80, 0x7b44, | |
268 | + 0xfb81, 0x7b45, | |
269 | + 0xfb82, 0x7b46, | |
270 | + 0xfb83, 0x7b47, | |
271 | + 0xfb84, 0x7b48, | |
272 | + 0xfb85, 0x7b49, | |
273 | + 0xfb86, 0x7b4a, | |
274 | + 0xfb87, 0x7b4b, | |
275 | + 0xfb88, 0x7b4c, | |
276 | + 0xfb89, 0x7b4d, | |
277 | + 0xfb8a, 0x7b4e, | |
278 | + 0xfb8b, 0x7b4f, | |
279 | + 0xfb8c, 0x7b50, | |
280 | + 0xfb8d, 0x7b51, | |
281 | + 0xfb8e, 0x7b52, | |
282 | + 0xfb8f, 0x7b53, | |
283 | + 0xfb90, 0x7b54, | |
284 | + 0xfb91, 0x7b55, | |
285 | + 0xfb92, 0x7b56, | |
286 | + 0xfb93, 0x7b57, | |
287 | + 0xfb94, 0x7b58, | |
288 | + 0xfb95, 0x7b59, | |
289 | + 0xfb96, 0x7b5a, | |
290 | + 0xfb97, 0x7b5b, | |
291 | + 0xfb98, 0x7b5c, | |
292 | + 0xfb99, 0x7b5d, | |
293 | + 0xfb9a, 0x7b5e, | |
294 | + 0xfb9b, 0x7b5f, | |
295 | + 0xfb9c, 0x7b60, | |
296 | + 0xfb9d, 0x7b61, | |
297 | + 0xfb9e, 0x7b62, | |
298 | + 0xfb9f, 0x7b63, | |
299 | + 0xfba0, 0x7b64, | |
300 | + 0xfba1, 0x7b65, | |
301 | + 0xfba2, 0x7b66, | |
302 | + 0xfba3, 0x7b67, | |
303 | + 0xfba4, 0x7b68, | |
304 | + 0xfba5, 0x7b69, | |
305 | + 0xfba6, 0x7b6a, | |
306 | + 0xfba7, 0x7b6b, | |
307 | + 0xfba8, 0x7b6c, | |
308 | + 0xfba9, 0x7b6d, | |
309 | + 0xfbaa, 0x7b6e, | |
310 | + 0xfbab, 0x7b6f, | |
311 | + 0xfbac, 0x7b70, | |
312 | + 0xfbad, 0x7b71, | |
313 | + 0xfbae, 0x7b72, | |
314 | + 0xfbaf, 0x7b73, | |
315 | + 0xfbb0, 0x7b74, | |
316 | + 0xfbb1, 0x7b75, | |
317 | + 0xfbb2, 0x7b76, | |
318 | + 0xfbb3, 0x7b77, | |
319 | + 0xfbb4, 0x7b78, | |
320 | + 0xfbb5, 0x7b79, | |
321 | + 0xfbb6, 0x7b7a, | |
322 | + 0xfbb7, 0x7b7b, | |
323 | + 0xfbb8, 0x7b7c, | |
324 | + 0xfbb9, 0x7b7d, | |
325 | + 0xfbba, 0x7b7e, | |
326 | + 0xfbbb, 0x7c21, | |
327 | + 0xfbbc, 0x7c22, | |
328 | + 0xfbbd, 0x7c23, | |
329 | + 0xfbbe, 0x7c24, | |
330 | + 0xfbbf, 0x7c25, | |
331 | + 0xfbc0, 0x7c26, | |
332 | + 0xfbc1, 0x7c27, | |
333 | + 0xfbc2, 0x7c28, | |
334 | + 0xfbc3, 0x7c29, | |
335 | + 0xfbc4, 0x7c2a, | |
336 | + 0xfbc5, 0x7c2b, | |
337 | + 0xfbc6, 0x7c2c, | |
338 | + 0xfbc7, 0x7c2d, | |
339 | + 0xfbc8, 0x7c2e, | |
340 | + 0xfbc9, 0x7c2f, | |
341 | + 0xfbca, 0x7c30, | |
342 | + 0xfbcb, 0x7c31, | |
343 | + 0xfbcc, 0x7c32, | |
344 | + 0xfbcd, 0x7c33, | |
345 | + 0xfbce, 0x7c34, | |
346 | + 0xfbcf, 0x7c35, | |
347 | + 0xfbd0, 0x7c36, | |
348 | + 0xfbd1, 0x7c37, | |
349 | + 0xfbd2, 0x7c38, | |
350 | + 0xfbd3, 0x7c39, | |
351 | + 0xfbd4, 0x7c3a, | |
352 | + 0xfbd5, 0x7c3b, | |
353 | + 0xfbd6, 0x7c3c, | |
354 | + 0xfbd7, 0x7c3d, | |
355 | + 0xfbd8, 0x7c3e, | |
356 | + 0xfbd9, 0x7c3f, | |
357 | + 0xfbda, 0x7c40, | |
358 | + 0xfbdb, 0x7c41, | |
359 | + 0xfbdc, 0x7c42, | |
360 | + 0xfbdd, 0x7c43, | |
361 | + 0xfbde, 0x7c44, | |
362 | + 0xfbdf, 0x7c45, | |
363 | + 0xfbe0, 0x7c46, | |
364 | + 0xfbe1, 0x7c47, | |
365 | + 0xfbe2, 0x7c48, | |
366 | + 0xfbe3, 0x7c49, | |
367 | + 0xfbe4, 0x7c4a, | |
368 | + 0xfbe5, 0x7c4b, | |
369 | + 0xfbe6, 0x7c4c, | |
370 | + 0xfbe7, 0x7c4d, | |
371 | + 0xfbe8, 0x7c4e, | |
372 | + 0xfbe9, 0x7c4f, | |
373 | + 0xfbea, 0x7c50, | |
374 | + 0xfbeb, 0x7c51, | |
375 | + 0xfbec, 0x7c52, | |
376 | + 0xfbed, 0x7c53, | |
377 | + 0xfbee, 0x7c54, | |
378 | + 0xfbef, 0x7c55, | |
379 | + 0xfbf0, 0x7c56, | |
380 | + 0xfbf1, 0x7c57, | |
381 | + 0xfbf2, 0x7c58, | |
382 | + 0xfbf3, 0x7c59, | |
383 | + 0xfbf4, 0x7c5a, | |
384 | + 0xfbf5, 0x7c5b, | |
385 | + 0xfbf6, 0x7c5c, | |
386 | + 0xfbf7, 0x7c5d, | |
387 | + 0xfbf8, 0x7c5e, | |
388 | + 0xfbf9, 0x7c5f, | |
389 | + 0xfbfa, 0x7c60, | |
390 | + 0xfbfb, 0x7c61, | |
391 | + 0xfbfc, 0x7c62, | |
392 | + 0xfc40, 0x7c63, | |
393 | + 0xfc41, 0x7c64, | |
394 | + 0xfc42, 0x7c65, | |
395 | + 0xfc43, 0x7c66, | |
396 | + 0xfc44, 0x7c67, | |
397 | + 0xfc45, 0x7c68, | |
398 | + 0xfc46, 0x7c69, | |
399 | + 0xfc47, 0x7c6a, | |
400 | + 0xfc48, 0x7c6b, | |
401 | + 0xfc49, 0x7c6c, | |
402 | + 0xfc4a, 0x7c6d, | |
403 | + 0xfc4b, 0x7c6e, | |
404 | + 0xffff, 0xffff, | |
405 | +}; | |
406 | + | |
407 | +/* JIS - SJIS conversion table */ | |
408 | + | |
409 | +unsigned int tbl_jis2sjis[] = { | |
410 | + | |
411 | + 0x2240, 0x879c, | |
412 | + 0x2241, 0x879b, | |
413 | + 0x224c, 0xeef9, | |
414 | + 0x225c, 0x8797, | |
415 | + 0x225d, 0x8796, | |
416 | + 0x2261, 0x8791, | |
417 | + 0x2262, 0x8790, | |
418 | + 0x2265, 0x8795, | |
419 | + 0x2268, 0x879a, | |
420 | + 0x2269, 0x8792, | |
421 | + 0x2d35, 0x8754, | |
422 | + 0x2d36, 0x8755, | |
423 | + 0x2d37, 0x8756, | |
424 | + 0x2d38, 0x8757, | |
425 | + 0x2d39, 0x8758, | |
426 | + 0x2d3a, 0x8759, | |
427 | + 0x2d3b, 0x875a, | |
428 | + 0x2d3c, 0x875b, | |
429 | + 0x2d3d, 0x875c, | |
430 | + 0x2d3e, 0x875d, | |
431 | + 0x2d62, 0x8782, | |
432 | + 0x2d64, 0x8784, | |
433 | + 0x2d6a, 0x878a, | |
434 | + 0x7921, 0xed40, | |
435 | + 0x7922, 0xed41, | |
436 | + 0x7923, 0xed42, | |
437 | + 0x7924, 0xed43, | |
438 | + 0x7925, 0xed44, | |
439 | + 0x7926, 0xed45, | |
440 | + 0x7927, 0xed46, | |
441 | + 0x7928, 0xed47, | |
442 | + 0x7929, 0xed48, | |
443 | + 0x792a, 0xed49, | |
444 | + 0x792b, 0xed4a, | |
445 | + 0x792c, 0xed4b, | |
446 | + 0x792d, 0xed4c, | |
447 | + 0x792e, 0xed4d, | |
448 | + 0x792f, 0xed4e, | |
449 | + 0x7930, 0xed4f, | |
450 | + 0x7931, 0xed50, | |
451 | + 0x7932, 0xed51, | |
452 | + 0x7933, 0xed52, | |
453 | + 0x7934, 0xed53, | |
454 | + 0x7935, 0xed54, | |
455 | + 0x7936, 0xed55, | |
456 | + 0x7937, 0xed56, | |
457 | + 0x7938, 0xed57, | |
458 | + 0x7939, 0xed58, | |
459 | + 0x793a, 0xed59, | |
460 | + 0x793b, 0xed5a, | |
461 | + 0x793c, 0xed5b, | |
462 | + 0x793d, 0xed5c, | |
463 | + 0x793e, 0xed5d, | |
464 | + 0x793f, 0xed5e, | |
465 | + 0x7940, 0xed5f, | |
466 | + 0x7941, 0xed60, | |
467 | + 0x7942, 0xed61, | |
468 | + 0x7943, 0xed62, | |
469 | + 0x7944, 0xed63, | |
470 | + 0x7945, 0xed64, | |
471 | + 0x7946, 0xed65, | |
472 | + 0x7947, 0xed66, | |
473 | + 0x7948, 0xed67, | |
474 | + 0x7949, 0xed68, | |
475 | + 0x794a, 0xed69, | |
476 | + 0x794b, 0xed6a, | |
477 | + 0x794c, 0xed6b, | |
478 | + 0x794d, 0xed6c, | |
479 | + 0x794e, 0xed6d, | |
480 | + 0x794f, 0xed6e, | |
481 | + 0x7950, 0xed6f, | |
482 | + 0x7951, 0xed70, | |
483 | + 0x7952, 0xed71, | |
484 | + 0x7953, 0xed72, | |
485 | + 0x7954, 0xed73, | |
486 | + 0x7955, 0xed74, | |
487 | + 0x7956, 0xed75, | |
488 | + 0x7957, 0xed76, | |
489 | + 0x7958, 0xed77, | |
490 | + 0x7959, 0xed78, | |
491 | + 0x795a, 0xed79, | |
492 | + 0x795b, 0xed7a, | |
493 | + 0x795c, 0xed7b, | |
494 | + 0x795d, 0xed7c, | |
495 | + 0x795e, 0xed7d, | |
496 | + 0x795f, 0xed7e, | |
497 | + 0x7960, 0xed80, | |
498 | + 0x7961, 0xed81, | |
499 | + 0x7962, 0xed82, | |
500 | + 0x7963, 0xed83, | |
501 | + 0x7964, 0xed84, | |
502 | + 0x7965, 0xed85, | |
503 | + 0x7966, 0xed86, | |
504 | + 0x7967, 0xed87, | |
505 | + 0x7968, 0xed88, | |
506 | + 0x7969, 0xed89, | |
507 | + 0x796a, 0xed8a, | |
508 | + 0x796b, 0xed8b, | |
509 | + 0x796c, 0xed8c, | |
510 | + 0x796d, 0xed8d, | |
511 | + 0x796e, 0xed8e, | |
512 | + 0x796f, 0xed8f, | |
513 | + 0x7970, 0xed90, | |
514 | + 0x7971, 0xed91, | |
515 | + 0x7972, 0xed92, | |
516 | + 0x7973, 0xed93, | |
517 | + 0x7974, 0xed94, | |
518 | + 0x7975, 0xed95, | |
519 | + 0x7976, 0xed96, | |
520 | + 0x7977, 0xed97, | |
521 | + 0x7978, 0xed98, | |
522 | + 0x7979, 0xed99, | |
523 | + 0x797a, 0xed9a, | |
524 | + 0x797b, 0xed9b, | |
525 | + 0x797c, 0xed9c, | |
526 | + 0x797d, 0xed9d, | |
527 | + 0x797e, 0xed9e, | |
528 | + 0x7a21, 0xed9f, | |
529 | + 0x7a22, 0xeda0, | |
530 | + 0x7a23, 0xeda1, | |
531 | + 0x7a24, 0xeda2, | |
532 | + 0x7a25, 0xeda3, | |
533 | + 0x7a26, 0xeda4, | |
534 | + 0x7a27, 0xeda5, | |
535 | + 0x7a28, 0xeda6, | |
536 | + 0x7a29, 0xeda7, | |
537 | + 0x7a2a, 0xeda8, | |
538 | + 0x7a2b, 0xeda9, | |
539 | + 0x7a2c, 0xedaa, | |
540 | + 0x7a2d, 0xedab, | |
541 | + 0x7a2e, 0xedac, | |
542 | + 0x7a2f, 0xedad, | |
543 | + 0x7a30, 0xedae, | |
544 | + 0x7a31, 0xedaf, | |
545 | + 0x7a32, 0xedb0, | |
546 | + 0x7a33, 0xedb1, | |
547 | + 0x7a34, 0xedb2, | |
548 | + 0x7a35, 0xedb3, | |
549 | + 0x7a36, 0xedb4, | |
550 | + 0x7a37, 0xedb5, | |
551 | + 0x7a38, 0xedb6, | |
552 | + 0x7a39, 0xedb7, | |
553 | + 0x7a3a, 0xedb8, | |
554 | + 0x7a3b, 0xedb9, | |
555 | + 0x7a3c, 0xedba, | |
556 | + 0x7a3d, 0xedbb, | |
557 | + 0x7a3e, 0xedbc, | |
558 | + 0x7a3f, 0xedbd, | |
559 | + 0x7a40, 0xedbe, | |
560 | + 0x7a41, 0xedbf, | |
561 | + 0x7a42, 0xedc0, | |
562 | + 0x7a43, 0xedc1, | |
563 | + 0x7a44, 0xedc2, | |
564 | + 0x7a45, 0xedc3, | |
565 | + 0x7a46, 0xedc4, | |
566 | + 0x7a47, 0xedc5, | |
567 | + 0x7a48, 0xedc6, | |
568 | + 0x7a49, 0xedc7, | |
569 | + 0x7a4a, 0xedc8, | |
570 | + 0x7a4b, 0xedc9, | |
571 | + 0x7a4c, 0xedca, | |
572 | + 0x7a4d, 0xedcb, | |
573 | + 0x7a4e, 0xedcc, | |
574 | + 0x7a4f, 0xedcd, | |
575 | + 0x7a50, 0xedce, | |
576 | + 0x7a51, 0xedcf, | |
577 | + 0x7a52, 0xedd0, | |
578 | + 0x7a53, 0xedd1, | |
579 | + 0x7a54, 0xedd2, | |
580 | + 0x7a55, 0xedd3, | |
581 | + 0x7a56, 0xedd4, | |
582 | + 0x7a57, 0xedd5, | |
583 | + 0x7a58, 0xedd6, | |
584 | + 0x7a59, 0xedd7, | |
585 | + 0x7a5a, 0xedd8, | |
586 | + 0x7a5b, 0xedd9, | |
587 | + 0x7a5c, 0xedda, | |
588 | + 0x7a5d, 0xeddb, | |
589 | + 0x7a5e, 0xeddc, | |
590 | + 0x7a5f, 0xeddd, | |
591 | + 0x7a60, 0xedde, | |
592 | + 0x7a61, 0xeddf, | |
593 | + 0x7a62, 0xede0, | |
594 | + 0x7a63, 0xede1, | |
595 | + 0x7a64, 0xede2, | |
596 | + 0x7a65, 0xede3, | |
597 | + 0x7a66, 0xede4, | |
598 | + 0x7a67, 0xede5, | |
599 | + 0x7a68, 0xede6, | |
600 | + 0x7a69, 0xede7, | |
601 | + 0x7a6a, 0xede8, | |
602 | + 0x7a6b, 0xede9, | |
603 | + 0x7a6c, 0xedea, | |
604 | + 0x7a6d, 0xedeb, | |
605 | + 0x7a6e, 0xedec, | |
606 | + 0x7a6f, 0xeded, | |
607 | + 0x7a70, 0xedee, | |
608 | + 0x7a71, 0xedef, | |
609 | + 0x7a72, 0xedf0, | |
610 | + 0x7a73, 0xedf1, | |
611 | + 0x7a74, 0xedf2, | |
612 | + 0x7a75, 0xedf3, | |
613 | + 0x7a76, 0xedf4, | |
614 | + 0x7a77, 0xedf5, | |
615 | + 0x7a78, 0xedf6, | |
616 | + 0x7a79, 0xedf7, | |
617 | + 0x7a7a, 0xedf8, | |
618 | + 0x7a7b, 0xedf9, | |
619 | + 0x7a7c, 0xedfa, | |
620 | + 0x7a7d, 0xedfb, | |
621 | + 0x7a7e, 0xedfc, | |
622 | + 0x7b21, 0xee40, | |
623 | + 0x7b22, 0xee41, | |
624 | + 0x7b23, 0xee42, | |
625 | + 0x7b24, 0xee43, | |
626 | + 0x7b25, 0xee44, | |
627 | + 0x7b26, 0xee45, | |
628 | + 0x7b27, 0xee46, | |
629 | + 0x7b28, 0xee47, | |
630 | + 0x7b29, 0xee48, | |
631 | + 0x7b2a, 0xee49, | |
632 | + 0x7b2b, 0xee4a, | |
633 | + 0x7b2c, 0xee4b, | |
634 | + 0x7b2d, 0xee4c, | |
635 | + 0x7b2e, 0xee4d, | |
636 | + 0x7b2f, 0xee4e, | |
637 | + 0x7b30, 0xee4f, | |
638 | + 0x7b31, 0xee50, | |
639 | + 0x7b32, 0xee51, | |
640 | + 0x7b33, 0xee52, | |
641 | + 0x7b34, 0xee53, | |
642 | + 0x7b35, 0xee54, | |
643 | + 0x7b36, 0xee55, | |
644 | + 0x7b37, 0xee56, | |
645 | + 0x7b38, 0xee57, | |
646 | + 0x7b39, 0xee58, | |
647 | + 0x7b3a, 0xee59, | |
648 | + 0x7b3b, 0xee5a, | |
649 | + 0x7b3c, 0xee5b, | |
650 | + 0x7b3d, 0xee5c, | |
651 | + 0x7b3e, 0xee5d, | |
652 | + 0x7b3f, 0xee5e, | |
653 | + 0x7b40, 0xee5f, | |
654 | + 0x7b41, 0xee60, | |
655 | + 0x7b42, 0xee61, | |
656 | + 0x7b43, 0xee62, | |
657 | + 0x7b44, 0xee63, | |
658 | + 0x7b45, 0xee64, | |
659 | + 0x7b46, 0xee65, | |
660 | + 0x7b47, 0xee66, | |
661 | + 0x7b48, 0xee67, | |
662 | + 0x7b49, 0xee68, | |
663 | + 0x7b4a, 0xee69, | |
664 | + 0x7b4b, 0xee6a, | |
665 | + 0x7b4c, 0xee6b, | |
666 | + 0x7b4d, 0xee6c, | |
667 | + 0x7b4e, 0xee6d, | |
668 | + 0x7b4f, 0xee6e, | |
669 | + 0x7b50, 0xee6f, | |
670 | + 0x7b51, 0xee70, | |
671 | + 0x7b52, 0xee71, | |
672 | + 0x7b53, 0xee72, | |
673 | + 0x7b54, 0xee73, | |
674 | + 0x7b55, 0xee74, | |
675 | + 0x7b56, 0xee75, | |
676 | + 0x7b57, 0xee76, | |
677 | + 0x7b58, 0xee77, | |
678 | + 0x7b59, 0xee78, | |
679 | + 0x7b5a, 0xee79, | |
680 | + 0x7b5b, 0xee7a, | |
681 | + 0x7b5c, 0xee7b, | |
682 | + 0x7b5d, 0xee7c, | |
683 | + 0x7b5e, 0xee7d, | |
684 | + 0x7b5f, 0xee7e, | |
685 | + 0x7b60, 0xee80, | |
686 | + 0x7b61, 0xee81, | |
687 | + 0x7b62, 0xee82, | |
688 | + 0x7b63, 0xee83, | |
689 | + 0x7b64, 0xee84, | |
690 | + 0x7b65, 0xee85, | |
691 | + 0x7b66, 0xee86, | |
692 | + 0x7b67, 0xee87, | |
693 | + 0x7b68, 0xee88, | |
694 | + 0x7b69, 0xee89, | |
695 | + 0x7b6a, 0xee8a, | |
696 | + 0x7b6b, 0xee8b, | |
697 | + 0x7b6c, 0xee8c, | |
698 | + 0x7b6d, 0xee8d, | |
699 | + 0x7b6e, 0xee8e, | |
700 | + 0x7b6f, 0xee8f, | |
701 | + 0x7b70, 0xee90, | |
702 | + 0x7b71, 0xee91, | |
703 | + 0x7b72, 0xee92, | |
704 | + 0x7b73, 0xee93, | |
705 | + 0x7b74, 0xee94, | |
706 | + 0x7b75, 0xee95, | |
707 | + 0x7b76, 0xee96, | |
708 | + 0x7b77, 0xee97, | |
709 | + 0x7b78, 0xee98, | |
710 | + 0x7b79, 0xee99, | |
711 | + 0x7b7a, 0xee9a, | |
712 | + 0x7b7b, 0xee9b, | |
713 | + 0x7b7c, 0xee9c, | |
714 | + 0x7b7d, 0xee9d, | |
715 | + 0x7b7e, 0xee9e, | |
716 | + 0x7c21, 0xee9f, | |
717 | + 0x7c22, 0xeea0, | |
718 | + 0x7c23, 0xeea1, | |
719 | + 0x7c24, 0xeea2, | |
720 | + 0x7c25, 0xeea3, | |
721 | + 0x7c26, 0xeea4, | |
722 | + 0x7c27, 0xeea5, | |
723 | + 0x7c28, 0xeea6, | |
724 | + 0x7c29, 0xeea7, | |
725 | + 0x7c2a, 0xeea8, | |
726 | + 0x7c2b, 0xeea9, | |
727 | + 0x7c2c, 0xeeaa, | |
728 | + 0x7c2d, 0xeeab, | |
729 | + 0x7c2e, 0xeeac, | |
730 | + 0x7c2f, 0xeead, | |
731 | + 0x7c30, 0xeeae, | |
732 | + 0x7c31, 0xeeaf, | |
733 | + 0x7c32, 0xeeb0, | |
734 | + 0x7c33, 0xeeb1, | |
735 | + 0x7c34, 0xeeb2, | |
736 | + 0x7c35, 0xeeb3, | |
737 | + 0x7c36, 0xeeb4, | |
738 | + 0x7c37, 0xeeb5, | |
739 | + 0x7c38, 0xeeb6, | |
740 | + 0x7c39, 0xeeb7, | |
741 | + 0x7c3a, 0xeeb8, | |
742 | + 0x7c3b, 0xeeb9, | |
743 | + 0x7c3c, 0xeeba, | |
744 | + 0x7c3d, 0xeebb, | |
745 | + 0x7c3e, 0xeebc, | |
746 | + 0x7c3f, 0xeebd, | |
747 | + 0x7c40, 0xeebe, | |
748 | + 0x7c41, 0xeebf, | |
749 | + 0x7c42, 0xeec0, | |
750 | + 0x7c43, 0xeec1, | |
751 | + 0x7c44, 0xeec2, | |
752 | + 0x7c45, 0xeec3, | |
753 | + 0x7c46, 0xeec4, | |
754 | + 0x7c47, 0xeec5, | |
755 | + 0x7c48, 0xeec6, | |
756 | + 0x7c49, 0xeec7, | |
757 | + 0x7c4a, 0xeec8, | |
758 | + 0x7c4b, 0xeec9, | |
759 | + 0x7c4c, 0xeeca, | |
760 | + 0x7c4d, 0xeecb, | |
761 | + 0x7c4e, 0xeecc, | |
762 | + 0x7c4f, 0xeecd, | |
763 | + 0x7c50, 0xeece, | |
764 | + 0x7c51, 0xeecf, | |
765 | + 0x7c52, 0xeed0, | |
766 | + 0x7c53, 0xeed1, | |
767 | + 0x7c54, 0xeed2, | |
768 | + 0x7c55, 0xeed3, | |
769 | + 0x7c56, 0xeed4, | |
770 | + 0x7c57, 0xeed5, | |
771 | + 0x7c58, 0xeed6, | |
772 | + 0x7c59, 0xeed7, | |
773 | + 0x7c5a, 0xeed8, | |
774 | + 0x7c5b, 0xeed9, | |
775 | + 0x7c5c, 0xeeda, | |
776 | + 0x7c5d, 0xeedb, | |
777 | + 0x7c5e, 0xeedc, | |
778 | + 0x7c5f, 0xeedd, | |
779 | + 0x7c60, 0xeede, | |
780 | + 0x7c61, 0xeedf, | |
781 | + 0x7c62, 0xeee0, | |
782 | + 0x7c63, 0xeee1, | |
783 | + 0x7c64, 0xeee2, | |
784 | + 0x7c65, 0xeee3, | |
785 | + 0x7c66, 0xeee4, | |
786 | + 0x7c67, 0xeee5, | |
787 | + 0x7c68, 0xeee6, | |
788 | + 0x7c69, 0xeee7, | |
789 | + 0x7c6a, 0xeee8, | |
790 | + 0x7c6b, 0xeee9, | |
791 | + 0x7c6c, 0xeeea, | |
792 | + 0x7c6d, 0xeeeb, | |
793 | + 0x7c6e, 0xeeec, | |
794 | + 0x7c71, 0xeeef, | |
795 | + 0x7c72, 0xeef0, | |
796 | + 0x7c73, 0xeef1, | |
797 | + 0x7c74, 0xeef2, | |
798 | + 0x7c75, 0xeef3, | |
799 | + 0x7c76, 0xeef4, | |
800 | + 0x7c77, 0xeef5, | |
801 | + 0x7c78, 0xeef6, | |
802 | + 0x7c79, 0xeef7, | |
803 | + 0x7c7a, 0xeef8, | |
804 | + 0x7c7c, 0xeefa, | |
805 | + 0x7c7d, 0xeefb, | |
806 | + 0x7c7e, 0xeefc, | |
807 | + 0xffff, 0xffff, | |
808 | +}; |