• R/O
  • SSH
  • HTTPS

creole: Commit


Commit MetaInfo

Revision6 (tree)
Time2010-04-19 14:04:53
Authorphjgt

Log Message

commit

Change Summary

Incremental Difference

--- package.lisp (revision 5)
+++ package.lisp (revision 6)
@@ -18,4 +18,4 @@
1818 (deftype octet () '(unsigned-byte 8))
1919 (deftype simple-octets () '(simple-array octet))
2020 (deftype positive-fixnum () '(integer 1 #.most-positive-fixnum))
21-(deftype array-index () '(integer 0 #.array-total-size-limit))
\ No newline at end of file
21+(deftype array-index () '(integer 0 #.(1- array-total-size-limit)))
\ No newline at end of file
--- trie.lisp (revision 5)
+++ trie.lisp (revision 6)
@@ -32,7 +32,7 @@
3232 (let ((node (aref base 0)))
3333 (loop FOR i fixnum FROM start BELOW (length octets)
3434 FOR code OF-TYPE octet = (aref octets i)
35- FOR idx OF-TYPE fixnum = (next-index (the fixnum node) code) DO
35+ FOR idx OF-TYPE array-index = (next-index (the fixnum node) code) DO
3636 (setf node (aref base idx))
3737
3838 (unless (= (aref chck idx) code)
--- utf8.lisp (revision 5)
+++ utf8.lisp (revision 6)
@@ -3,31 +3,8 @@
33 ;;;;;;;;;;;
44 ;;; declaim
55 (declaim (inline bit-off? bit-val 10xxxxxx-p
6- utf8-octets-to-unicode utf8-octets-to-string
7- utf8-string-to-octets))
6+ utf8-octets-to-unicode utf8-octets-to-string))
87
9-;;;;;;;;;;;;
10-;;; constant
11-#-SBCL
12-(defconst-onceonly +UNICODE=>UTF8+
13- (let ((table (make-array #x10000)))
14- (loop FOR code FROM #x0 BELOW #x100 DO
15- (setf (aref table code)
16- (coerce `(,code)
17- '(vector octet))))
18- (loop FOR code FROM #x100 BELOW #x800 DO
19- (setf (aref table code)
20- (coerce `(,(+ #b11000000 (ldb (byte 5 6) code))
21- ,(+ #b10000000 (ldb (byte 6 0) code)))
22- '(vector octet))))
23- (loop FOR code FROM #x800 BELOW #x10000 DO
24- (setf (aref table code)
25- (coerce `(,(+ #b11100000 (ldb (byte 4 12) code))
26- ,(+ #b10000000 (ldb (byte 6 6) code))
27- ,(+ #b10000000 (ldb (byte 6 0) code)))
28- '(vector octet))))
29- table))
30-
318 ;;;;;;;;;;;;;;;;;;;;;
329 ;;; internal function
3310 (defun bit-off? (pos octet)
@@ -108,33 +85,51 @@
10885
10986 ;;;;;;;;;;;;;;;;;;;;
11087 ;;; string => octets
111-(defun utf8-string-to-octets (string)
88+(declaim (inline utf8-octets-length))
89+(defun utf8-octets-length (string)
11290 (declare (optimize (speed 3) (debug 0) (safety 0) (compilation-speed 0))
11391 (simple-string string))
114- ;; TODO: 全部自作する
115- #+SBCL (sb-ext:string-to-octets string :external-format :utf-8)
116- #-SBCL
11792 (let ((len 0))
11893 (declare (fixnum len))
119-
120- (loop FOR ch ACROSS string
94+ (loop FOR ch ACROSS string
12195 FOR cd = (char-code ch) DO
122- (cond ((< cd #x80) (incf len 1))
123- ((< cd #x800) (incf len 2))
124- ((< cd #x10000) (incf len 3))
125- (t (incf len 4))))
96+ (incf len
97+ (cond ((<= cd #x80) 1)
98+ ((<= cd #x800) 2)
99+ ((<= cd #x10000) 3)
100+ (t 4))))
101+ len))
126102
127- ;; TODO: if len = str-len
128- (let ((octets (make-array len :element-type 'octet))
129- (i -1))
130- (declare (fixnum i))
103+(defun utf8-string-to-octets (string)
104+ (declare (optimize (speed 3) (debug 0) (safety 0))
105+ (simple-string string))
106+ (let* ((octets (make-array (utf8-octets-length string) :element-type 'octet))
107+ (len (length octets))
108+ (i 0))
109+ (declare (array-index i))
110+ (macrolet ((add-octets (&rest octet-list &aux (n (length octet-list)))
111+ (declare (optimize (speed 0)))
112+ `(progn ,@(loop FOR i FROM 0 BELOW n
113+ FOR o IN octet-list COLLECT
114+ `(setf (aref octets (+ i ,i)) ,o))
115+ (incf i ,n))))
131116 (loop FOR ch ACROSS string
132117 FOR cd = (char-code ch) DO
133- (if (< cd #x10000)
134- (loop FOR o ACROSS (the simple-octets (svref +UNICODE=>UTF8+ cd)) DO
135- (setf (aref octets (incf i)) o))
136- (setf (aref octets (incf i)) (+ #b11110000 (ldb (byte 3 18) cd))
137- (aref octets (incf i)) (+ #b10000000 (ldb (byte 6 12) cd))
138- (aref octets (incf i)) (+ #b10000000 (ldb (byte 6 6) cd))
139- (aref octets (incf i)) (+ #b10000000 (ldb (byte 6 0) cd)))))
140- octets)))
\ No newline at end of file
118+ (cond ((<= cd #x80)
119+ (add-octets cd))
120+ ((<= cd #x800)
121+ (add-octets (+ #b11000000 (ldb (byte 5 6) cd))
122+ (+ #b10000000 (ldb (byte 6 0) cd))))
123+ ((<= cd #x10000)
124+ #+C (when (<= #xd800 cd #xdfff)
125+ (error ""))
126+ (assert (<= (+ i 3) len))
127+ (add-octets (+ #b11100000 (ldb (byte 4 12) cd))
128+ (+ #b10000000 (ldb (byte 6 6) cd))
129+ (+ #b10000000 (ldb (byte 6 0) cd))))
130+ (t
131+ (add-octets (+ #b11110000 (ldb (byte 3 18) cd))
132+ (+ #b10000000 (ldb (byte 6 12) cd))
133+ (+ #b10000000 (ldb (byte 6 6) cd))
134+ (+ #b10000000 (ldb (byte 6 0) cd)))))))
135+ octets))
\ No newline at end of file
Show on old repository browser