• R/O
  • SSH
  • HTTPS

pykf: Commit


Commit MetaInfo

Revision20 (tree)
Time2012-03-24 21:53:41
Authorishimoto

Log Message

Added tests for python3

Change Summary

Incremental Difference

--- trunk/test\test_kf_py3.py (nonexistent)
+++ trunk/test\test_kf_py3.py (revision 20)
@@ -0,0 +1,216 @@
1+# -*- coding:cp932 -*-
2+from __future__ import nested_scopes
3+
4+import unittest
5+from pykf import *
6+
7+class test_kf(unittest.TestCase):
8+
9+ def readtbl(self, fname):
10+ with open(fname) as f:
11+ lines = [l for l in f.readlines() if l[0:1] != b'#']
12+
13+ sjis = [int(l.split(",")[0], 16) for l in lines]
14+ sjis = b"".join(bytes((s >> 8, s & 0xff)) for s in sjis)
15+ euc = [int(l.split(",")[1], 16) for l in lines]
16+ euc = b"".join(bytes((s >> 8, s & 0xff)) for s in euc)
17+ return sjis, euc
18+
19+ def conv(self, sjis):
20+ euc1 = toeuc(sjis)
21+ jis1 = tojis(sjis)
22+ euc2 = toeuc(jis1)
23+ jis2 = tojis(euc1)
24+ sjis1 = tosjis(jis1)
25+ sjis2 = tosjis(euc1)
26+
27+ for i in range(0, len(sjis), 2):
28+ s = sjis[i:i+2]
29+ e1 = sjis1[i:i+2]
30+ if s != e1:
31+ print ("%s(%x%x) %s(%x%x)" % (s, ord(s[0]), ord(s[1]), e1, ord(e1[0]), ord(e1[1])))
32+
33+
34+ assert sjis==sjis1
35+ assert sjis1==sjis2
36+ assert (max(sjis) < 0x80) or sjis2 != euc1
37+ assert (max(sjis) < 0x80) or sjis2 != jis1
38+ assert euc1==euc2
39+ assert (max(sjis) < 0x80) or euc1 !=jis1
40+ assert jis1==jis2
41+
42+ assert (max(sjis) < 0x80) or guess(sjis1) == SJIS
43+ assert (max(sjis) < 0x80) or guess(euc1) == EUC
44+ assert (max(sjis) < 0x80) or guess(jis1) == JIS
45+
46+
47+ def testBasic(self):
48+ with open("./readme.sjis", "rb") as f:
49+ sjis = f.read()
50+ self.conv(sjis)
51+
52+ def testHankana(self):
53+ with open("test/hankana.txt", "rb") as f:
54+ sjis = f.read()
55+ self.conv(sjis)
56+
57+ def testNEC(self):
58+ sjis, euc = self.readtbl("../misc/nectoeuc.txt")
59+ assert toeuc(sjis) == euc
60+ assert toeuc(tojis(sjis)) == euc
61+ assert tosjis(euc) == sjis
62+
63+ def testNECIBM(self):
64+ sjis, euc = self.readtbl("../misc/necibmtoeuc.txt")
65+ assert toeuc(sjis) == euc
66+ assert toeuc(tojis(sjis)) == euc
67+ assert tosjis(euc) == sjis
68+
69+ def testIBM(self):
70+ sjis, euc = self.readtbl("../misc/ibmtoeuc.txt")
71+ assert toeuc(sjis) == euc
72+ assert toeuc(tojis(sjis)) == euc
73+ assert tosjis(euc) != sjis
74+ assert str(tosjis(euc), "cp932") == str(sjis, "cp932")
75+
76+ def testGaiji(self):
77+ sjis = b"".join(bytes((x, y)) for x in range(0xf0, 0xfa) for y in range(0x40, 0x7e))
78+ assert tosjis(toeuc(sjis)) == b"\x81\xac" * (len(sjis)//2)
79+ assert tosjis(tojis(sjis)) == b"\x81\xac" * (len(sjis)//2)
80+
81+ sjis = b"".join(bytes((x, y)) for x in range(0xf0, 0xfa) for y in range(0x80, 0xfd))
82+ assert tosjis(toeuc(sjis)) == b"\x81\xac" * (len(sjis)//2)
83+ assert tosjis(tojis(sjis)) == b"\x81\xac" * (len(sjis)//2)
84+
85+ def testUtf8(self):
86+ utf8 = "\xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a"
87+ assert guess(utf8) == UTF8
88+ assert guess("\xef\xbb\xbf") == UTF8
89+
90+ def testJisNormalize(self):
91+ sjis = b"\x82\xa0"
92+ jis = tojis(sjis, SJIS)
93+ assert jis[-3:] == b'\x1b(B'
94+ assert tosjis(jis, JIS) == sjis
95+
96+ euc = toeuc(b"\x82\xa0", SJIS)
97+ jis = tojis(euc, EUC)
98+ assert jis[-3:] == b'\x1b(B'
99+ assert toeuc(jis, JIS) == euc
100+
101+class test_zerolen(unittest.TestCase):
102+ def test_zerolen(self):
103+ src = ""
104+ assert tosjis(src) == b""
105+ assert toeuc(src) == b""
106+ assert tojis(src) == b""
107+
108+ assert tosjis(src, EUC) == b""
109+ assert tosjis(src, JIS) == b""
110+ assert tosjis("\x1b(I", JIS) == b""
111+ assert toeuc(src, SJIS) == b""
112+ assert toeuc(src, JIS) == b""
113+ assert toeuc("\x1b(I", JIS) == b""
114+ assert tojis(src, SJIS) == b""
115+ assert tojis(src, EUC) == b""
116+
117+class test_split(unittest.TestCase):
118+ def test_split(self):
119+ ascii = b"abcdefg"
120+ sjis = b"abc\x82\xa0\x82\xa1\x82\xa2\xb1\xb2\xb3abc\x82\xa0"
121+
122+ assert b"".join(split(ascii)) == ascii
123+ assert b"".join(split(sjis)) == sjis
124+ assert b"".join(split(toeuc(sjis))) == toeuc(sjis)
125+ assert b"".join(split(tojis(sjis))) == tojis(sjis)
126+
127+
128+class test_tohalf(unittest.TestCase):
129+ sjis = b'abc\x83A\x83C\x83E\x83G\x83I\x83K\x83M\x83O\x83Q\x83S\x82`\x82a\x82b'
130+ sjis_half = b'abc\xb1\xb2\xb3\xb4\xb5\xb6\xde\xb7\xde\xb8\xde\xb9\xde\xba\xde\x82`\x82a\x82b'
131+ all_half = b'\xa1\xa2\xa3\xa4\xa5\xa7\xb1\xa8\xb2\xa9\xb3\xaa\xb4\xab\xb5\xb6\xb6\xde\xb7\xb7\xde\xb8\xb8\xde\xb9\xb9\xde\xba\xba\xde\xbb\xbb\xde\xbc\xbc\xde\xbd\xbd\xde\xbe\xbe\xde\xbf\xbf\xde\xc0\xc0\xde\xc1\xc1\xde\xaf\xc2\xc2\xde\xc3\xc3\xde\xc4\xc4\xde\xc5\xc6\xc7\xc8\xc9\xca\xca\xde\xca\xdf\xcb\xcb\xde\xcb\xdf\xcc\xcc\xde\xcc\xdf\xcd\xcd\xde\xcd\xdf\xce\xce\xde\xce\xdf\xcf\xd0\xd1\xd2\xd3\xac\xd4\xad\xd5\xae\xd6\xd7\xd8\xd9\xda\xdb\x83\x8e\xdc\x83\x90\x83\x91\xa6\xdd\xb3\xde\x83\x95\xb0'
132+ all_full = b"\x81B\x81u\x81v\x81A\x81E\x83@\x83A\x83B\x83C\x83D\x83E\x83F\x83G\x83H\x83I\x83J\x83K\x83L\x83M\x83N\x83O\x83P\x83Q\x83R\x83S\x83T\x83U\x83V\x83W\x83X\x83Y\x83Z\x83[\x83\\\x83]\x83^\x83_\x83`\x83a\x83b\x83c\x83d\x83e\x83f\x83g\x83h\x83i\x83j\x83k\x83l\x83m\x83n\x83o\x83p\x83q\x83r\x83s\x83t\x83u\x83v\x83w\x83x\x83y\x83z\x83{\x83|\x83}\x83~\x83\x80\x83\x81\x83\x82\x83\x83\x83\x84\x83\x85\x83\x86\x83\x87\x83\x88\x83\x89\x83\x8a\x83\x8b\x83\x8c\x83\x8d\x83\x8e\x83\x8f\x83\x90\x83\x91\x83\x92\x83\x93\x83\x94\x83\x95\x81["
133+
134+ def test_sjis(self):
135+ assert tohalf_kana(self.sjis, SJIS) == self.sjis_half
136+ assert tohalf_kana(self.all_full, SJIS) == self.all_half
137+
138+ def test_euc(self):
139+ e = toeuc(self.sjis, SJIS)
140+ assert tohalf_kana(e, EUC) == toeuc(self.sjis_half, SJIS)
141+
142+ e = toeuc(self.all_full, SJIS)
143+ assert tohalf_kana(e, EUC) == toeuc(self.all_half, SJIS)
144+
145+
146+class test_tofull(unittest.TestCase):
147+ sjis = b'abc\x83A\x83C\x83E\x83G\x83I\x83K\x83M\x83O\x83Q\x83S\x82`\x82a\x82b'
148+ sjis_half = b'abc\xb1\xb2\xb3\xb4\xb5\xb6\xde\xb7\xde\xb8\xde\xb9\xde\xba\xde\x82`\x82a\x82b'
149+ all_half = b'\xa1\xa2\xa3\xa4\xa5\xa7\xb1\xa8\xb2\xa9\xb3\xaa\xb4\xab\xb5\xb6\xb6\xde\xb7\xb7\xde\xb8\xb8\xde\xb9\xb9\xde\xba\xba\xde\xbb\xbb\xde\xbc\xbc\xde\xbd\xbd\xde\xbe\xbe\xde\xbf\xbf\xde\xc0\xc0\xde\xc1\xc1\xde\xaf\xc2\xc2\xde\xc3\xc3\xde\xc4\xc4\xde\xc5\xc6\xc7\xc8\xc9\xca\xca\xde\xca\xdf\xcb\xcb\xde\xcb\xdf\xcc\xcc\xde\xcc\xdf\xcd\xcd\xde\xcd\xdf\xce\xce\xde\xce\xdf\xcf\xd0\xd1\xd2\xd3\xac\xd4\xad\xd5\xae\xd6\xd7\xd8\xd9\xda\xdb\x83\x8e\xdc\x83\x90\x83\x91\xa6\xdd\xb3\xde\x83\x95\xb0'
150+ all_full = b"\x81B\x81u\x81v\x81A\x81E\x83@\x83A\x83B\x83C\x83D\x83E\x83F\x83G\x83H\x83I\x83J\x83K\x83L\x83M\x83N\x83O\x83P\x83Q\x83R\x83S\x83T\x83U\x83V\x83W\x83X\x83Y\x83Z\x83[\x83\\\x83]\x83^\x83_\x83`\x83a\x83b\x83c\x83d\x83e\x83f\x83g\x83h\x83i\x83j\x83k\x83l\x83m\x83n\x83o\x83p\x83q\x83r\x83s\x83t\x83u\x83v\x83w\x83x\x83y\x83z\x83{\x83|\x83}\x83~\x83\x80\x83\x81\x83\x82\x83\x83\x83\x84\x83\x85\x83\x86\x83\x87\x83\x88\x83\x89\x83\x8a\x83\x8b\x83\x8c\x83\x8d\x83\x8e\x83\x8f\x83\x90\x83\x91\x83\x92\x83\x93\x83\x94\x83\x95\x81["
151+
152+ def test_sjis(self):
153+# print tofull_kana(self.sjis_half, SJIS)
154+ assert tofull_kana(self.sjis_half, SJIS) == self.sjis
155+ assert tofull_kana(self.all_half, SJIS) == self.all_full
156+
157+ def test_euc(self):
158+ e = toeuc(self.sjis_half, SJIS)
159+ assert tofull_kana(e, EUC) == toeuc(self.sjis, SJIS)
160+
161+ e = toeuc(self.all_half, SJIS)
162+ assert tofull_kana(e, EUC) == toeuc(self.all_full, SJIS)
163+
164+class test_strict(unittest.TestCase):
165+ def test_sjis(self):
166+ s1 = "あいうえお".encode("ShiftJIS")
167+ assert guess(s1, True) == SJIS
168+ assert guess(s1, False) == SJIS
169+ s2 = ("あいうえおかきくけこ"*1000).encode("ShiftJIS") + b"\xf0\x01"
170+ assert guess(s2, False) == SJIS
171+ assert guess(s2, True) == ERROR
172+
173+ def test_euc(self):
174+ s1 = toeuc("あいうえお".encode("ShiftJIS"), SJIS)
175+ assert guess(s1, True) == EUC
176+ assert guess(s1, False) == EUC
177+ s2 = toeuc(("あいうえおかきくけこ"*1000).encode("ShiftJIS") + b"\xf0\x01", SJIS)
178+ assert guess(s2, False) == EUC
179+ assert guess(s2, True) == ERROR
180+
181+ def test_jis(self):
182+ s1 = tojis("あいうえお".encode("ShiftJIS"), SJIS)
183+ assert guess(s1, True) == JIS
184+ assert guess(s1, False) == JIS
185+ s2 = tojis("あいうえおかきくけこ".encode("ShiftJIS") + b"\xf0\x01", SJIS)
186+ assert guess(s2, False) == UNKNOWN
187+ assert guess(s2, True) == ERROR
188+
189+ def test_flag(self):
190+ setstrict(True)
191+ assert getstrict()
192+
193+ setstrict(False)
194+ assert not getstrict()
195+
196+ s2 = ("あいうえおかきくけこ"*1000).encode("ShiftJIS") + b"\xf0\x01"
197+ assert guess(s2) == SJIS
198+ setstrict(True)
199+ assert guess(s2) == ERROR
200+ setstrict(False)
201+
202+
203+class test_j0208(unittest.TestCase):
204+ def test_sjis(self):
205+ s1 = "?".encode("cp932")
206+
207+ assert tojis(s1, SJIS, j0208=False) == b'\x1b$(O-j\x1b(B'
208+ assert tojis(s1, SJIS, j0208=True) == b'\x1b$B-j\x1b(B'
209+
210+ assert tosjis(tojis(s1, SJIS, j0208=False)) == s1
211+ assert tosjis(tojis(s1, SJIS, j0208=True)) == s1
212+
213+if __name__ == '__main__':
214+ unittest.main()
215+
216+
Show on old repository browser