Added tests for python3
@@ -0,0 +1,216 @@ | ||
1 | +# -*- coding:cp932 -*- | |
2 | +from __future__ import nested_scopes | |
3 | + | |
4 | +import unittest | |
5 | +from pykf import * | |
6 | + | |
7 | +class test_kf(unittest.TestCase): | |
8 | + | |
9 | + def readtbl(self, fname): | |
10 | + with open(fname) as f: | |
11 | + lines = [l for l in f.readlines() if l[0:1] != b'#'] | |
12 | + | |
13 | + sjis = [int(l.split(",")[0], 16) for l in lines] | |
14 | + sjis = b"".join(bytes((s >> 8, s & 0xff)) for s in sjis) | |
15 | + euc = [int(l.split(",")[1], 16) for l in lines] | |
16 | + euc = b"".join(bytes((s >> 8, s & 0xff)) for s in euc) | |
17 | + return sjis, euc | |
18 | + | |
19 | + def conv(self, sjis): | |
20 | + euc1 = toeuc(sjis) | |
21 | + jis1 = tojis(sjis) | |
22 | + euc2 = toeuc(jis1) | |
23 | + jis2 = tojis(euc1) | |
24 | + sjis1 = tosjis(jis1) | |
25 | + sjis2 = tosjis(euc1) | |
26 | + | |
27 | + for i in range(0, len(sjis), 2): | |
28 | + s = sjis[i:i+2] | |
29 | + e1 = sjis1[i:i+2] | |
30 | + if s != e1: | |
31 | + print ("%s(%x%x) %s(%x%x)" % (s, ord(s[0]), ord(s[1]), e1, ord(e1[0]), ord(e1[1]))) | |
32 | + | |
33 | + | |
34 | + assert sjis==sjis1 | |
35 | + assert sjis1==sjis2 | |
36 | + assert (max(sjis) < 0x80) or sjis2 != euc1 | |
37 | + assert (max(sjis) < 0x80) or sjis2 != jis1 | |
38 | + assert euc1==euc2 | |
39 | + assert (max(sjis) < 0x80) or euc1 !=jis1 | |
40 | + assert jis1==jis2 | |
41 | + | |
42 | + assert (max(sjis) < 0x80) or guess(sjis1) == SJIS | |
43 | + assert (max(sjis) < 0x80) or guess(euc1) == EUC | |
44 | + assert (max(sjis) < 0x80) or guess(jis1) == JIS | |
45 | + | |
46 | + | |
47 | + def testBasic(self): | |
48 | + with open("./readme.sjis", "rb") as f: | |
49 | + sjis = f.read() | |
50 | + self.conv(sjis) | |
51 | + | |
52 | + def testHankana(self): | |
53 | + with open("test/hankana.txt", "rb") as f: | |
54 | + sjis = f.read() | |
55 | + self.conv(sjis) | |
56 | + | |
57 | + def testNEC(self): | |
58 | + sjis, euc = self.readtbl("../misc/nectoeuc.txt") | |
59 | + assert toeuc(sjis) == euc | |
60 | + assert toeuc(tojis(sjis)) == euc | |
61 | + assert tosjis(euc) == sjis | |
62 | + | |
63 | + def testNECIBM(self): | |
64 | + sjis, euc = self.readtbl("../misc/necibmtoeuc.txt") | |
65 | + assert toeuc(sjis) == euc | |
66 | + assert toeuc(tojis(sjis)) == euc | |
67 | + assert tosjis(euc) == sjis | |
68 | + | |
69 | + def testIBM(self): | |
70 | + sjis, euc = self.readtbl("../misc/ibmtoeuc.txt") | |
71 | + assert toeuc(sjis) == euc | |
72 | + assert toeuc(tojis(sjis)) == euc | |
73 | + assert tosjis(euc) != sjis | |
74 | + assert str(tosjis(euc), "cp932") == str(sjis, "cp932") | |
75 | + | |
76 | + def testGaiji(self): | |
77 | + sjis = b"".join(bytes((x, y)) for x in range(0xf0, 0xfa) for y in range(0x40, 0x7e)) | |
78 | + assert tosjis(toeuc(sjis)) == b"\x81\xac" * (len(sjis)//2) | |
79 | + assert tosjis(tojis(sjis)) == b"\x81\xac" * (len(sjis)//2) | |
80 | + | |
81 | + sjis = b"".join(bytes((x, y)) for x in range(0xf0, 0xfa) for y in range(0x80, 0xfd)) | |
82 | + assert tosjis(toeuc(sjis)) == b"\x81\xac" * (len(sjis)//2) | |
83 | + assert tosjis(tojis(sjis)) == b"\x81\xac" * (len(sjis)//2) | |
84 | + | |
85 | + def testUtf8(self): | |
86 | + utf8 = "\xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a" | |
87 | + assert guess(utf8) == UTF8 | |
88 | + assert guess("\xef\xbb\xbf") == UTF8 | |
89 | + | |
90 | + def testJisNormalize(self): | |
91 | + sjis = b"\x82\xa0" | |
92 | + jis = tojis(sjis, SJIS) | |
93 | + assert jis[-3:] == b'\x1b(B' | |
94 | + assert tosjis(jis, JIS) == sjis | |
95 | + | |
96 | + euc = toeuc(b"\x82\xa0", SJIS) | |
97 | + jis = tojis(euc, EUC) | |
98 | + assert jis[-3:] == b'\x1b(B' | |
99 | + assert toeuc(jis, JIS) == euc | |
100 | + | |
101 | +class test_zerolen(unittest.TestCase): | |
102 | + def test_zerolen(self): | |
103 | + src = "" | |
104 | + assert tosjis(src) == b"" | |
105 | + assert toeuc(src) == b"" | |
106 | + assert tojis(src) == b"" | |
107 | + | |
108 | + assert tosjis(src, EUC) == b"" | |
109 | + assert tosjis(src, JIS) == b"" | |
110 | + assert tosjis("\x1b(I", JIS) == b"" | |
111 | + assert toeuc(src, SJIS) == b"" | |
112 | + assert toeuc(src, JIS) == b"" | |
113 | + assert toeuc("\x1b(I", JIS) == b"" | |
114 | + assert tojis(src, SJIS) == b"" | |
115 | + assert tojis(src, EUC) == b"" | |
116 | + | |
117 | +class test_split(unittest.TestCase): | |
118 | + def test_split(self): | |
119 | + ascii = b"abcdefg" | |
120 | + sjis = b"abc\x82\xa0\x82\xa1\x82\xa2\xb1\xb2\xb3abc\x82\xa0" | |
121 | + | |
122 | + assert b"".join(split(ascii)) == ascii | |
123 | + assert b"".join(split(sjis)) == sjis | |
124 | + assert b"".join(split(toeuc(sjis))) == toeuc(sjis) | |
125 | + assert b"".join(split(tojis(sjis))) == tojis(sjis) | |
126 | + | |
127 | + | |
128 | +class test_tohalf(unittest.TestCase): | |
129 | + sjis = b'abc\x83A\x83C\x83E\x83G\x83I\x83K\x83M\x83O\x83Q\x83S\x82`\x82a\x82b' | |
130 | + sjis_half = b'abc\xb1\xb2\xb3\xb4\xb5\xb6\xde\xb7\xde\xb8\xde\xb9\xde\xba\xde\x82`\x82a\x82b' | |
131 | + all_half = b'\xa1\xa2\xa3\xa4\xa5\xa7\xb1\xa8\xb2\xa9\xb3\xaa\xb4\xab\xb5\xb6\xb6\xde\xb7\xb7\xde\xb8\xb8\xde\xb9\xb9\xde\xba\xba\xde\xbb\xbb\xde\xbc\xbc\xde\xbd\xbd\xde\xbe\xbe\xde\xbf\xbf\xde\xc0\xc0\xde\xc1\xc1\xde\xaf\xc2\xc2\xde\xc3\xc3\xde\xc4\xc4\xde\xc5\xc6\xc7\xc8\xc9\xca\xca\xde\xca\xdf\xcb\xcb\xde\xcb\xdf\xcc\xcc\xde\xcc\xdf\xcd\xcd\xde\xcd\xdf\xce\xce\xde\xce\xdf\xcf\xd0\xd1\xd2\xd3\xac\xd4\xad\xd5\xae\xd6\xd7\xd8\xd9\xda\xdb\x83\x8e\xdc\x83\x90\x83\x91\xa6\xdd\xb3\xde\x83\x95\xb0' | |
132 | + all_full = b"\x81B\x81u\x81v\x81A\x81E\x83@\x83A\x83B\x83C\x83D\x83E\x83F\x83G\x83H\x83I\x83J\x83K\x83L\x83M\x83N\x83O\x83P\x83Q\x83R\x83S\x83T\x83U\x83V\x83W\x83X\x83Y\x83Z\x83[\x83\\\x83]\x83^\x83_\x83`\x83a\x83b\x83c\x83d\x83e\x83f\x83g\x83h\x83i\x83j\x83k\x83l\x83m\x83n\x83o\x83p\x83q\x83r\x83s\x83t\x83u\x83v\x83w\x83x\x83y\x83z\x83{\x83|\x83}\x83~\x83\x80\x83\x81\x83\x82\x83\x83\x83\x84\x83\x85\x83\x86\x83\x87\x83\x88\x83\x89\x83\x8a\x83\x8b\x83\x8c\x83\x8d\x83\x8e\x83\x8f\x83\x90\x83\x91\x83\x92\x83\x93\x83\x94\x83\x95\x81[" | |
133 | + | |
134 | + def test_sjis(self): | |
135 | + assert tohalf_kana(self.sjis, SJIS) == self.sjis_half | |
136 | + assert tohalf_kana(self.all_full, SJIS) == self.all_half | |
137 | + | |
138 | + def test_euc(self): | |
139 | + e = toeuc(self.sjis, SJIS) | |
140 | + assert tohalf_kana(e, EUC) == toeuc(self.sjis_half, SJIS) | |
141 | + | |
142 | + e = toeuc(self.all_full, SJIS) | |
143 | + assert tohalf_kana(e, EUC) == toeuc(self.all_half, SJIS) | |
144 | + | |
145 | + | |
146 | +class test_tofull(unittest.TestCase): | |
147 | + sjis = b'abc\x83A\x83C\x83E\x83G\x83I\x83K\x83M\x83O\x83Q\x83S\x82`\x82a\x82b' | |
148 | + sjis_half = b'abc\xb1\xb2\xb3\xb4\xb5\xb6\xde\xb7\xde\xb8\xde\xb9\xde\xba\xde\x82`\x82a\x82b' | |
149 | + all_half = b'\xa1\xa2\xa3\xa4\xa5\xa7\xb1\xa8\xb2\xa9\xb3\xaa\xb4\xab\xb5\xb6\xb6\xde\xb7\xb7\xde\xb8\xb8\xde\xb9\xb9\xde\xba\xba\xde\xbb\xbb\xde\xbc\xbc\xde\xbd\xbd\xde\xbe\xbe\xde\xbf\xbf\xde\xc0\xc0\xde\xc1\xc1\xde\xaf\xc2\xc2\xde\xc3\xc3\xde\xc4\xc4\xde\xc5\xc6\xc7\xc8\xc9\xca\xca\xde\xca\xdf\xcb\xcb\xde\xcb\xdf\xcc\xcc\xde\xcc\xdf\xcd\xcd\xde\xcd\xdf\xce\xce\xde\xce\xdf\xcf\xd0\xd1\xd2\xd3\xac\xd4\xad\xd5\xae\xd6\xd7\xd8\xd9\xda\xdb\x83\x8e\xdc\x83\x90\x83\x91\xa6\xdd\xb3\xde\x83\x95\xb0' | |
150 | + all_full = b"\x81B\x81u\x81v\x81A\x81E\x83@\x83A\x83B\x83C\x83D\x83E\x83F\x83G\x83H\x83I\x83J\x83K\x83L\x83M\x83N\x83O\x83P\x83Q\x83R\x83S\x83T\x83U\x83V\x83W\x83X\x83Y\x83Z\x83[\x83\\\x83]\x83^\x83_\x83`\x83a\x83b\x83c\x83d\x83e\x83f\x83g\x83h\x83i\x83j\x83k\x83l\x83m\x83n\x83o\x83p\x83q\x83r\x83s\x83t\x83u\x83v\x83w\x83x\x83y\x83z\x83{\x83|\x83}\x83~\x83\x80\x83\x81\x83\x82\x83\x83\x83\x84\x83\x85\x83\x86\x83\x87\x83\x88\x83\x89\x83\x8a\x83\x8b\x83\x8c\x83\x8d\x83\x8e\x83\x8f\x83\x90\x83\x91\x83\x92\x83\x93\x83\x94\x83\x95\x81[" | |
151 | + | |
152 | + def test_sjis(self): | |
153 | +# print tofull_kana(self.sjis_half, SJIS) | |
154 | + assert tofull_kana(self.sjis_half, SJIS) == self.sjis | |
155 | + assert tofull_kana(self.all_half, SJIS) == self.all_full | |
156 | + | |
157 | + def test_euc(self): | |
158 | + e = toeuc(self.sjis_half, SJIS) | |
159 | + assert tofull_kana(e, EUC) == toeuc(self.sjis, SJIS) | |
160 | + | |
161 | + e = toeuc(self.all_half, SJIS) | |
162 | + assert tofull_kana(e, EUC) == toeuc(self.all_full, SJIS) | |
163 | + | |
164 | +class test_strict(unittest.TestCase): | |
165 | + def test_sjis(self): | |
166 | + s1 = "あいうえお".encode("ShiftJIS") | |
167 | + assert guess(s1, True) == SJIS | |
168 | + assert guess(s1, False) == SJIS | |
169 | + s2 = ("あいうえおかきくけこ"*1000).encode("ShiftJIS") + b"\xf0\x01" | |
170 | + assert guess(s2, False) == SJIS | |
171 | + assert guess(s2, True) == ERROR | |
172 | + | |
173 | + def test_euc(self): | |
174 | + s1 = toeuc("あいうえお".encode("ShiftJIS"), SJIS) | |
175 | + assert guess(s1, True) == EUC | |
176 | + assert guess(s1, False) == EUC | |
177 | + s2 = toeuc(("あいうえおかきくけこ"*1000).encode("ShiftJIS") + b"\xf0\x01", SJIS) | |
178 | + assert guess(s2, False) == EUC | |
179 | + assert guess(s2, True) == ERROR | |
180 | + | |
181 | + def test_jis(self): | |
182 | + s1 = tojis("あいうえお".encode("ShiftJIS"), SJIS) | |
183 | + assert guess(s1, True) == JIS | |
184 | + assert guess(s1, False) == JIS | |
185 | + s2 = tojis("あいうえおかきくけこ".encode("ShiftJIS") + b"\xf0\x01", SJIS) | |
186 | + assert guess(s2, False) == UNKNOWN | |
187 | + assert guess(s2, True) == ERROR | |
188 | + | |
189 | + def test_flag(self): | |
190 | + setstrict(True) | |
191 | + assert getstrict() | |
192 | + | |
193 | + setstrict(False) | |
194 | + assert not getstrict() | |
195 | + | |
196 | + s2 = ("あいうえおかきくけこ"*1000).encode("ShiftJIS") + b"\xf0\x01" | |
197 | + assert guess(s2) == SJIS | |
198 | + setstrict(True) | |
199 | + assert guess(s2) == ERROR | |
200 | + setstrict(False) | |
201 | + | |
202 | + | |
203 | +class test_j0208(unittest.TestCase): | |
204 | + def test_sjis(self): | |
205 | + s1 = "?".encode("cp932") | |
206 | + | |
207 | + assert tojis(s1, SJIS, j0208=False) == b'\x1b$(O-j\x1b(B' | |
208 | + assert tojis(s1, SJIS, j0208=True) == b'\x1b$B-j\x1b(B' | |
209 | + | |
210 | + assert tosjis(tojis(s1, SJIS, j0208=False)) == s1 | |
211 | + assert tosjis(tojis(s1, SJIS, j0208=True)) == s1 | |
212 | + | |
213 | +if __name__ == '__main__': | |
214 | + unittest.main() | |
215 | + | |
216 | + |