• R/O
  • SSH
  • HTTPS

creole: Commit


Commit MetaInfo

Revision14 (tree)
Time2010-05-03 02:25:06
Authorphjgt

Log Message

コード整理終了

Change Summary

Incremental Difference

--- package.lisp (revision 13)
+++ package.lisp (revision 14)
@@ -6,7 +6,7 @@
66 *default-external-format*))
77 (in-package :creole)
88
9-(defparameter *data-dir*
9+(defvar *data-dir*
1010 (make-pathname
1111 :directory (pathname-directory
1212 (merge-pathnames #P"data/" *load-pathname*))))
@@ -13,13 +13,10 @@
1313
1414 (deftype octet () '(unsigned-byte 8))
1515 (deftype simple-octets () '(simple-array octet))
16-(deftype positive-fixnum () '(integer 1 #.most-positive-fixnum))
16+(deftype simple-characters () '(simple-array character (*)))
1717 (deftype array-index () '(integer 0 #.(1- array-total-size-limit)))
18-(deftype simple-characters () '(simple-array character (*)))
19-
2018 (deftype optimize-hack-array-index () #+SBCL '(integer 0 100)
2119 #-SBCL 'array-index)
2220
23-(defvar *fastest* '(optimize (speed 3) (debug 0) (safety 0) (compilation-speed 0)))
24-(defvar *interface* '(optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))
25-
21+(defvar *fastest* '(optimize (speed 3) (debug 0) (safety 0) (compilation-speed 0)))
22+(defvar *interface* '(optimize (speed 3) (debug 1) (safety 1) (compilation-speed 0)))
\ No newline at end of file
--- util.lisp (revision 13)
+++ util.lisp (revision 14)
@@ -11,12 +11,6 @@
1111 #+SBCL (declare (sb-ext:muffle-conditions sb-ext:compiler-note))
1212 ,@body))
1313
14-;; TODO: なくす
15-(defmacro ensure-function (function-desginator)
16- `(etypecase ,function-desginator
17- (function)
18- (symbol (setf ,function-desginator (symbol-function ,function-desginator)))))
19-
2014 (defmacro each-char-code ((code string &optional return) &body body)
2115 (let ((char (gensym)))
2216 `(loop FOR ,char ACROSS ,string
--- COPYING (revision 13)
+++ COPYING (revision 14)
@@ -1 +1,21 @@
1-
\ No newline at end of file
1+The MIT License
2+
3+Copyright (c) 2010 Takeru Ohta <phjgt308@ybb.ne.jp>
4+
5+Permission is hereby granted, free of charge, to any person obtaining a copy
6+of this software and associated documentation files (the "Software"), to deal
7+in the Software without restriction, including without limitation the rights
8+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+copies of the Software, and to permit persons to whom the Software is
10+furnished to do so, subject to the following conditions:
11+
12+The above copyright notice and this permission notice shall be included in
13+all copies or substantial portions of the Software.
14+
15+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21+THE SOFTWARE.
--- trie.lisp (revision 13)
+++ trie.lisp (revision 14)
@@ -6,6 +6,7 @@
66 (base #() :type (simple-array (or character fixnum)) :read-only t)
77 (chck #() :type (simple-array (signed-byte 16)) :read-only t))
88
9+
910 (defun load-trie (path)
1011 (with-open-file (in path :element-type 'octet)
1112 (let ((size (/ (file-length in) 6)))
@@ -19,11 +20,6 @@
1920 (+ node code))
2021
2122 (defun to-unicode (octets start trie)
22- (declare #.*fastest*
23- (simple-octets octets)
24- (array-index start)
25- (trie trie))
26-
2723 (when (< (aref octets start) #x80)
2824 (return-from to-unicode (values (code-char (aref octets start)) (1+ start))))
2925
--- octets-to-string.lisp (revision 13)
+++ octets-to-string.lisp (revision 14)
@@ -20,8 +20,6 @@
2020 (merge-pathnames #P"decode/" *data-dir*))))))
2121
2222 (defun general-octets-to-string (octets trie)
23- (declare #.*fastest*
24- (simple-octets octets))
2523 (let* ((len (length octets))
2624 (buf (make-array len :element-type 'character))
2725 (tail-pos -1)
@@ -39,11 +37,11 @@
3937 ;;; external function
4038 (defun octets-to-string (octets &key (external-format *default-external-format*))
4139 (declare #.*interface*)
42- (check-type octets simple-octets)
40+ #-SBCL (check-type octets simple-octets)
4341 (locally
44- (declare (simple-octets octets))
42+ (declare #.*fastest*)
4543 (case (external-format-key external-format)
46- (:|utf-8| (utf8-octets-to-string octets))
47- (:|utf-16be| (utf16-octets-to-string octets :be))
48- (:|utf-16le| (utf16-octets-to-string octets :le))
49- (t (general-octets-to-string octets (get-decode-trie external-format))))))
44+ (:|utf-8| (utf8-octets-to-string octets))
45+ (:|utf-16be| (utf16-octets-to-string octets :be))
46+ (:|utf-16le| (utf16-octets-to-string octets :le))
47+ (t (general-octets-to-string octets (get-decode-trie external-format))))))
\ No newline at end of file
--- string-to-octets.lisp (revision 13)
+++ string-to-octets.lisp (revision 14)
@@ -50,22 +50,24 @@
5050 ;;; external function
5151 (defun string-to-octets (string &key (external-format *default-external-format*))
5252 (declare #.*interface*)
53- (ensure-simple-characters string
54- (case (external-format-key external-format)
55- (:|utf-8| (utf8-string-to-octets string))
56- (:|utf-16be| (utf16-string-to-octets string :be))
57- (:|utf-16le| (utf16-string-to-octets string :le))
58- (t
59- (let* ((table (get-encode-table external-format))
60- (code-limit (length table))
61- (including-illegal-character? nil)
62- (len 0))
63- (declare (fixnum len))
64- (each-char-code (code string)
65- (let ((octets (and (or (< code code-limit) (svref table code))
66- (progn (setf including-illegal-character? t)
67- +UNKNOWN-OCTETS+))))
68- (incf len (length (the simple-octets octets)))))
69- (if including-illegal-character?
70- (illegal-string-to-octets string len table)
71- (legal-string-to-octets string len table)))))))
\ No newline at end of file
53+ (locally
54+ (declare #.*fastest*)
55+ (ensure-simple-characters string
56+ (case (external-format-key external-format)
57+ (:|utf-8| (utf8-string-to-octets string))
58+ (:|utf-16be| (utf16-string-to-octets string :be))
59+ (:|utf-16le| (utf16-string-to-octets string :le))
60+ (t
61+ (let* ((table (get-encode-table external-format))
62+ (code-limit (length table))
63+ (including-illegal-character? nil)
64+ (len 0))
65+ (declare (fixnum len))
66+ (each-char-code (code string)
67+ (let ((octets (and (or (< code code-limit) (svref table code))
68+ (progn (setf including-illegal-character? t)
69+ +UNKNOWN-OCTETS+))))
70+ (incf len (length (the simple-octets octets)))))
71+ (if including-illegal-character?
72+ (illegal-string-to-octets string len table)
73+ (legal-string-to-octets string len table))))))))
\ No newline at end of file
--- utf16.lisp (revision 13)
+++ utf16.lisp (revision 14)
@@ -3,7 +3,7 @@
33 ;;;;;;;;;;;
44 ;;; declaim
55 (declaim (inline low-surrogate high-surrogate
6- decode-surrogate-pair to-utf16le-code to-utf16be-code))
6+ to-utf16le-code to-utf16be-code))
77
88 ;;;;;;;;;;;;;;;;;;;;;;
99 ;;; auxiliary function
@@ -21,10 +21,11 @@
2121 (+ (ash (aref octets (+ start 0)) 0)
2222 (ash (aref octets (+ start 1)) 8)))
2323
24-(defun decode-surrogate-pair (high low)
25- (code-char (+ #x10000
26- (ash (ldb (byte 10 0) high) 10)
27- (ash (ldb (byte 10 0) low) 00))))
24+;; see: http://d.hatena.ne.jp/sile/20100502/1272815686
25+(defmacro decode-surrogate-pair (high low)
26+ `(code-char (+ #x10000
27+ (ash (ldb (byte 10 0) ,high) 10)
28+ (ash (ldb (byte 10 0) ,low) 00))))
2829
2930 ;;;;;;;;;;;;;;;;;;;;
3031 ;;; string => octets
@@ -35,23 +36,25 @@
3536 (when (>= cd #x10000)
3637 (return t))))
3738 (buf-len (* (if has-surrogate? 4 2) (length ,string)))
39+ (legal-octets? t)
3840 (octets (make-array buf-len :element-type 'octet))
3941 (i 0))
4042 (declare (optimize-hack-array-index i))
4143 (macrolet ((add-octets (&rest octet-list &aux (n (length octet-list)))
42- (declare (optimize (speed 0)))
43- `(progn ,@(loop FOR i FROM 0 BELOW n
44- FOR o IN octet-list COLLECT
45- `(setf (aref octets (+ i ,i)) ,o))
46- (incf i ,n))))
44+ (declare (optimize (speed 0)))
45+ `(progn ,@(loop FOR i FROM 0 BELOW n
46+ FOR o IN octet-list COLLECT
47+ `(setf (aref octets (+ i ,i)) ,o))
48+ (incf i ,n))))
4749 (if (not has-surrogate?)
48- (each-char-code (cd ,string octets)
49- #1=(if (<= #xD800 cd #xDFFF)
50- (add-octets (ldb (byte 8 ,p1) +UNKNOWN-CODE+)
51- (ldb (byte 8 ,p2) +UNKNOWN-CODE+))
50+ (each-char-code (cd ,string (values octets legal-octets?))
51+ #1=(progn
52+ (when (<= #xD800 cd #xDFFF)
53+ (setf cd +UNKNOWN-CODE+
54+ legal-octets? nil))
5255 (add-octets (ldb (byte 8 ,p1) cd)
5356 (ldb (byte 8 ,p2) cd))))
54- (each-char-code (cd ,string (subseq octets 0 i))
57+ (each-char-code (cd ,string (values (subseq octets 0 i) legal-octets?))
5558 (if (< cd #x10000)
5659 #1#
5760 (let ((low (low-surrogate cd))
@@ -60,7 +63,6 @@
6063 (ldb (byte 8 ,p2) high)
6164 (ldb (byte 8 ,p1) low)
6265 (ldb (byte 8 ,p2) low))))))))))
63-
6466 ;;;;;;;;;;;;;;;;;;;;
6567 ;;; octets => string
6668 (defmacro utf16-octets-to-string (octets endian)
@@ -68,11 +70,11 @@
6870 (buf-len (ceiling octets-len 2))
6971 (buf (make-array buf-len :element-type 'character))
7072 (pos -1)
71- (including-illegal-octet? nil))
73+ (legal-octets? t))
7274 (declare (fixnum pos))
7375 (flet ((add-char (char) (setf (aref buf (incf pos)) char))
7476 (add-unk-char () (setf (aref buf (incf pos)) +UNKNOWN-CHAR+
75- including-illegal-octet? t)))
77+ legal-octets? nil)))
7678 (declare (inline add-char add-unk-char))
7779 (loop WITH surrogate = nil
7880 FOR i FROM 0 BELOW (1- octets-len) BY 2
@@ -94,4 +96,4 @@
9496 (when (oddp octets-len)
9597 (add-unk-char)))
9698 (values (if (= (1+ pos) buf-len) buf (subseq buf 0 (1+ pos)))
97- (not including-illegal-octet?))))
\ No newline at end of file
99+ legal-octets?)))
\ No newline at end of file
--- utf8.lisp (revision 13)
+++ utf8.lisp (revision 14)
@@ -3,11 +3,11 @@
33 ;;;;;;;;;;;
44 ;;; declaim
55 (declaim (inline bit-off? bit-val 10xxxxxx-p
6- utf8-octets-length
7- utf8-octets-to-unicode utf8-octets-to-string utf8-string-to-octets))
6+ utf8-octets-length utf8-octets-to-unicode
7+ utf8-octets-to-string utf8-string-to-octets))
88
9-;;;;;;;;;;;;;;;;;;;;;
10-;;; internal function
9+;;;;;;;;;;;;;;;;;;;;;;
10+;;; auxiliary function
1111 (defun bit-off? (pos octet)
1212 (not (ldb-test (byte 1 pos) octet)))
1313
@@ -17,21 +17,20 @@
1717 (defun 10xxxxxx-p (octet)
1818 (= (ldb (byte 2 6) octet) #b10))
1919
20+;;;;;;;;;;;;;;;;;;;;
21+;;; octets => string
2022 (defun utf8-octets-to-unicode(octets pos string j octets-len &aux (os octets))
21- (declare #.*fastest*
22- (simple-octets os)
23- (optimize-hack-array-index pos j octets-len)
24- (simple-characters string))
2523 (macrolet ((with-validate (num exp)
26- `(if (and (< (+ ,num pos) octets-len)
27- (/= (bit-val ,(- 6 num) octet) 0)
28- ,@(loop FOR i fixnum FROM 1 TO num
24+ (declare (optimize (speed 0)))
25+ `(if (and (< (+ ,num pos) octets-len) ; out of bounds
26+ (/= (bit-val ,(- 6 num) octet) 0) ; redundant expression
27+ ,@(loop FOR i fixnum FROM 1 TO num ; octet since the second
2928 COLLECT `(10xxxxxx-p (aref os (+ ,i pos)))))
3029 (values ,exp
31- (the positive-fixnum (+ ,(1+ num) pos))
30+ (the fixnum (+ ,(1+ num) pos))
3231 t)
3332 (values +UNKNOWN-CODE+
34- (the positive-fixnum (+ ,(1+ num) pos))
33+ (the fixnum (+ ,(1+ num) pos))
3534 nil))))
3635 (let ((octet (aref os pos)))
3736 (multiple-value-bind (code new-pos legal?)
@@ -58,27 +57,22 @@
5857 (bit-val 6 (aref os (+ 1 pos)) 12)
5958 (bit-val 6 (aref os (+ 2 pos)) 6)
6059 (bit-val 6 (aref os (+ 3 pos))))))
61-
60+
6261 (t (values +UNKNOWN-CODE+ (1+ pos) nil)))
6362 (setf (aref string j) (code-char code))
6463 (values new-pos legal?)))))
6564
66-;;;;;;;;;;;;;;;;;;;;
67-;;; octets => string
6865 (defun utf8-octets-to-string (octets &aux (len (length octets)))
69- (declare #.*fastest*
70- (simple-octets octets))
7166 (let ((buf (make-array len :element-type 'character))
72- (including-illegal-octet? nil)
67+ (legal-octets? t)
7368 (legal? t))
7469 (do ((i 0)
7570 (j 0 (1+ j)))
76- ((>= i len) (values (subseq buf 0 j)
77- (not including-illegal-octet?)))
71+ ((>= i len) (values (subseq buf 0 j) legal-octets?))
7872 (declare (optimize-hack-array-index j i))
7973 (setf (values i legal?) (utf8-octets-to-unicode octets i buf j len))
8074 (unless legal?
81- (setf including-illegal-octet? t)))))
75+ (setf legal-octets? nil)))))
8276
8377 ;;;;;;;;;;;;;;;;;;;;
8478 ;;; string => octets
@@ -91,8 +85,6 @@
9185 (t 4)))))
9286
9387 (defun utf8-string-to-octets (string)
94- (declare #.*fastest*
95- (simple-characters string))
9688 (let ((octets (make-array (utf8-octets-length string) :element-type 'octet))
9789 (i 0))
9890 (declare (optimize-hack-array-index i))
--- external-format.lisp (revision 13)
+++ external-format.lisp (revision 14)
@@ -1,11 +1,9 @@
11 (in-package :creole)
22
3-;;;;;;;;;;;
4-;;; declaim
53 (declaim (inline external-format-filename external-format-key))
64
7-;;;;;;;;;;;;;;;;;;;;
8-;;; special variable
5+
6+(defvar *default-external-format* :utf-8)
97 (defvar *external-formats*
108 (loop FOR path IN (remove-if-not #'pathname-name
119 (directory (merge-pathnames "name/*" *data-dir*)))
@@ -13,8 +11,6 @@
1311 (with-open-file (in path)
1412 (read in))))
1513
16-(defvar *default-external-format* :utf-8)
17-
1814 (defvar *external-format=>filename-map*
1915 (let ((map (make-hash-table :test #'eq)))
2016 (loop FOR path IN (remove-if-not #'pathname-name
@@ -24,8 +20,7 @@
2420 (setf (gethash external-format map) (intern (pathname-name path) :keyword)))))
2521 map))
2622
27-;;;;;;;;;;;;;;;;;;;;;
28-;;; internal function
23+
2924 (defun external-format-filename (external-format)
3025 (assert #1=(gethash external-format *external-format=>filename-map*)
3126 (external-format) "Undefined external-format ~S" external-format)
@@ -32,4 +27,4 @@
3227 (values (symbol-name #1#)))
3328
3429 (defun external-format-key (external-format)
35- (values (gethash external-format *external-format=>filename-map*)))
30+ (values (gethash external-format *external-format=>filename-map*)))
\ No newline at end of file
Show on old repository browser