• R/O
  • SSH
  • HTTPS

unf: Commit


Commit MetaInfo

Revision56 (tree)
Time2011-11-19 19:34:27
Authorphjgt

Log Message

変換テーブルソース生成用のスクリプトを登録

Change Summary

Incremental Difference

--- trunk/lisp/gen-table.lisp (nonexistent)
+++ trunk/lisp/gen-table.lisp (revision 56)
@@ -0,0 +1,192 @@
1+;; TODO: 整理
2+(require :asdf)
3+
4+;;
5+(unless (= (length sb-ext:*posix-argv*) 3)
6+ (format *error-output* "~&Usage: sbcl --script gen-table.lisp DATA_DIR OUTPUT_TABLE_FILE~%")
7+ (sb-ext:quit))
8+
9+(defparameter *data-dir* (pathname (second sb-ext:*posix-argv*)))
10+(defparameter *table-hh* (pathname (third sb-ext:*posix-argv*)))
11+
12+;;
13+(defun load-local-system (package &optional (package-directory #P"./"))
14+ (let #.`((asdf:*central-registry* (directory package-directory))
15+ ;; or #+ASDF2
16+ ,@(when #.#1=(find-symbol "*DEFAULT-SOURCE-REGISTRIES*" :asdf)
17+ `((,#1# nil))))
18+ (asdf:load-system package)))
19+
20+(defmacro each-file-line ((line filepath &rest keys) &body body)
21+ `(with-open-file (#1=#:in ,filepath ,@keys)
22+ (let (,line)
23+ (loop while (setf ,line (read-line #1# nil nil nil))
24+ DO (locally ,@body)))))
25+
26+(defun s (&rest args)
27+ "ARGSを連接した文字列に変換する"
28+ (with-output-to-string (s)
29+ (dolist (a args)
30+ (typecase a
31+ (string (write-string a s))
32+ (character (write-char a s))
33+ (otherwise (princ a s))))))
34+
35+(defun flatten (lst &aux acc)
36+ (labels ((self (x)
37+ (if (consp x)
38+ (progn (self (car x)) (self (cdr x)))
39+ (when x
40+ (push x acc)))))
41+ (self lst)
42+ (nreverse acc)))
43+
44+(load-local-system :dict #P"lib/dict-0.0.2/")
45+(load-local-system :dawg #P"lib/cl-dawg-0.2.2-unf/")
46+
47+;;
48+(defun read-attr-def (path &aux acc)
49+ (each-file-line (line path)
50+ (push (list (subseq line 3) (parse-integer line :end 2 :radix 16)) acc))
51+ (sort (nreverse acc) #'string< :key #'first))
52+
53+(defun read-map-def (path &aux acc)
54+ (each-file-line (line path)
55+ (let ((p (position #\Tab line)))
56+ (push (list (subseq line 0 p) (subseq line (1+ p))) acc)))
57+ (sort (nreverse acc) #'string< :key #'first))
58+
59+(let ((*default-pathname-defaults* (probe-file *data-dir*)))
60+ (defparameter *cac*
61+ (read-map-def "canonical-composition.def"))
62+
63+ (defparameter *cad*
64+ (read-map-def "canonical-decomposition.def"))
65+
66+ (defparameter *cod*
67+ (read-map-def "compatibility-decomposition.def"))
68+
69+ (defparameter *ccc*
70+ (read-attr-def "canonical-combining-class.def"))
71+
72+ (defparameter *nic*
73+ (read-attr-def "nfc-illegal-char.def"))
74+
75+ (defparameter *nfic*
76+ (read-attr-def "nfkc-illegal-char.def")))
77+
78+;;
79+(defun add-prefix (prefix)
80+ (lambda (s)
81+ (s prefix (car s))))
82+
83+(defun cat (strs)
84+ (reduce (lambda (acc s)
85+ (declare (simple-string s acc))
86+ (let ((p (search s acc)))
87+ (if (null p)
88+ (concatenate 'string s acc)
89+ acc)))
90+ strs
91+ :initial-value ""))
92+
93+(defparameter *keys*
94+ (flatten
95+ (list (mapcar (add-prefix "0") *cac*)
96+ (mapcar (add-prefix "1") *cad*)
97+ (mapcar (add-prefix "2") *cod*)
98+ (mapcar (add-prefix "3") *ccc*)
99+ (mapcar (add-prefix "4") *nic*)
100+ (mapcar (add-prefix "5") *nfic*))))
101+
102+(defparameter *strs*
103+ (cat
104+ (sort
105+ (flatten
106+ (list (mapcar #'second *cac*)
107+ (mapcar #'second *cad*)
108+ (mapcar #'second *cod*)))
109+ #'> :key #'length)))
110+
111+(defparameter *octets* (sb-ext:string-to-octets *strs*))
112+
113+(with-open-file (out "/tmp/unf.str.dat" :direction :output
114+ :if-exists :supersede
115+ :element-type '(unsigned-byte 8))
116+ (write-sequence *octets* out)
117+ 'done)
118+
119+(defparameter *vals*
120+ (flatten
121+ (list
122+ (loop FOR as IN (list *cac* *cad* *cod*)
123+ COLLECT
124+ (loop FOR (_ v) IN as
125+ FOR bv = (string-to-octets v)
126+ FOR p = (search bv *octets*)
127+ COLLECT (progn
128+ (assert (and (<= (integer-length p) 18)
129+ (<= (integer-length (length bv)) 6)))
130+ (dpb (length bv) (byte 6 18) p))))
131+
132+ (loop FOR (_ attr) IN *ccc* COLLECT attr)
133+
134+ (loop REPEAT (+ (length *nic*) (length *nfic*)) COLLECT 0))))
135+
136+(defparameter *kvs* (mapcar (lambda (x y)
137+ (cons (s x (code-char 0)) y))
138+ *keys* *vals*))
139+
140+;;
141+(dawg:build :input *kvs* :output "/tmp/unf.key.idx")
142+
143+(defun gen-source (path)
144+ (with-open-file (out path :direction :output :if-exists :supersede)
145+ (format out "#ifndef UNF_TABLE_HH~%")
146+ (format out "#define UNF_TABLE_HH~%")
147+ (format out "namespace UNF {~%")
148+ (format out "namespace TABLE {~%")
149+
150+ (with-open-file (in "/tmp/unf.key.idx" :element-type '(unsigned-byte 32))
151+ (let ((base (ldb (byte 24 0) (progn #1=(read-byte in nil nil) #1# #1#))))
152+ (format out "const unsigned CANONICAL_COM_ROOT = ~d;~%" (+ base (char-code #\0)))
153+ (format out "const unsigned CANONICAL_DECOM_ROOT = ~d;~%" (+ base (char-code #\1)))
154+ (format out "const unsigned COMPATIBILITY_DECOM_ROOT = ~d;~%" (+ base (char-code #\2)))
155+ (format out "const unsigned CANONICAL_CLASS_ROOT = ~d;~%" (+ base (char-code #\3)))
156+ (format out "const unsigned NFC_ILLEGAL_ROOT = ~d;~%" (+ base (char-code #\4)))
157+ (format out "const unsigned NFKC_ILLEGAL_ROOT = ~d;~%" (+ base (char-code #\5)))))
158+
159+ (with-open-file (in "/tmp/unf.key.idx" :element-type '(unsigned-byte 32))
160+ (format out "~%const unsigned NODES[]={")
161+ (read-byte in nil nil)
162+ (read-byte in nil nil)
163+ (loop FOR v = (read-byte in nil nil)
164+ WHILE v
165+ FOR i FROM 0
166+ DO
167+ (when (zerop (mod i 10))
168+ (terpri out))
169+ (format out "0x~8,'0x" v)
170+ (when (listen in)
171+ (format out ",")))
172+ (format out "};~%"))
173+
174+ (with-open-file (in "/tmp/unf.str.dat" :element-type '(signed-byte 8))
175+ (format out "~%const char STRINGS[]={")
176+ (loop FOR c = (read-byte in nil nil)
177+ WHILE c
178+ FOR i FROM 0
179+ DO
180+ (when (zerop (mod i 20))
181+ (terpri out))
182+ (format out "~4d" c)
183+ (when (listen in)
184+ (format out ",")))
185+ (format out "};~%"))
186+
187+ (format out "}~%")
188+ (format out "}~%")
189+ (format out "#endif~%")))
190+
191+;;
192+(gen-source *table-hh*)
--- trunk/README (revision 55)
+++ trunk/README (revision 56)
@@ -1,3 +1,9 @@
11 - unfの開発版
22 - 変換テーブル生成ソース/データを含む
33 - 本番用(?)のREADMEおよびMakefileには'.production'が付いている
4+
5+[変換テーブル生成方法]
6+# ※ SBCLが必要
7+# ※ gen-table.lispはいろいろ未整理
8+$ cd lisp
9+$ sbcl --script gen-table.lisp ../data/ ../src/unf/table.hh
Show on old repository browser