• R/O
  • SSH
  • HTTPS

creole: Commit


Commit MetaInfo

Revision13 (tree)
Time2010-05-02 21:55:07
Authorphjgt

Log Message

octets-to-string: 一通り実装終了

Change Summary

Incremental Difference

--- package.lisp (revision 12)
+++ package.lisp (revision 13)
@@ -21,4 +21,5 @@
2121 #-SBCL 'array-index)
2222
2323 (defvar *fastest* '(optimize (speed 3) (debug 0) (safety 0) (compilation-speed 0)))
24+(defvar *interface* '(optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))
2425
--- trie.lisp (revision 12)
+++ trie.lisp (revision 13)
@@ -19,9 +19,9 @@
1919 (+ node code))
2020
2121 (defun to-unicode (octets start trie)
22- (declare (optimize (speed 3) (debug 0) (safety 0))
22+ (declare #.*fastest*
2323 (simple-octets octets)
24- (fixnum start)
24+ (array-index start)
2525 (trie trie))
2626
2727 (when (< (aref octets start) #x80)
--- octets-to-string.lisp (revision 12)
+++ octets-to-string.lisp (revision 13)
@@ -2,7 +2,8 @@
22
33 ;;;;;;;;;;;
44 ;;; declaim
5-(declaim (inline get-decode-trie general-octets-to-string))
5+(declaim (inline get-decode-trie general-octets-to-string)
6+ (ftype (function (simple-octets &key (:external-format t)) (values simple-characters boolean)) octets-to-string))
67
78 ;;;;;;;;;;;;;;;;;;;;
89 ;;; special variable
@@ -36,13 +37,13 @@
3637
3738 ;;;;;;;;;;;;;;;;;;;;;
3839 ;;; external function
39-(defun octets-to-string (octets &key (external-format *default-external-format*) (safe t))
40- (declare #.*fastest*)
40+(defun octets-to-string (octets &key (external-format *default-external-format*))
41+ (declare #.*interface*)
4142 (check-type octets simple-octets)
4243 (locally
4344 (declare (simple-octets octets))
4445 (case (external-format-key external-format)
45- (:|utf-8| (utf8-octets-to-string octets safe))
46+ (:|utf-8| (utf8-octets-to-string octets))
4647 (:|utf-16be| (utf16-octets-to-string octets :be))
4748 (:|utf-16le| (utf16-octets-to-string octets :le))
4849 (t (general-octets-to-string octets (get-decode-trie external-format))))))
--- string-to-octets.lisp (revision 12)
+++ string-to-octets.lisp (revision 13)
@@ -2,7 +2,7 @@
22
33 ;;;;;;;;;;;
44 ;;; declaim
5-(declaim (inline get-encode-table #+C string-to-octets legal-string-to-octets illegal-string-to-octets)
5+(declaim (inline get-encode-table legal-string-to-octets illegal-string-to-octets)
66 (ftype (function (string &key (:external-format t)) (values simple-octets boolean)) string-to-octets)
77 (ftype (function (t) simple-vector) get-encode-table))
88
@@ -41,15 +41,15 @@
4141 (code-limit (length table))
4242 (i -1))
4343 (each-char-code (code string (values buf nil))
44- (loop FOR o ACROSS (the simple-octets
45- (or (and (< code code-limit) (svref table code))
46- +UNKNOWN-OCTETS+)) DO
47- (setf (aref buf (incf (the fixnum i))) o)))))
44+ (let ((octets (or (and (< code code-limit) (svref table code))
45+ +UNKNOWN-OCTETS+)))
46+ (loop FOR o ACROSS (the simple-octets octets) DO
47+ (setf (aref buf (incf (the fixnum i))) o))))))
4848
4949 ;;;;;;;;;;;;;;;;;;;;;
5050 ;;; external function
5151 (defun string-to-octets (string &key (external-format *default-external-format*))
52- (declare #.*fastest*)
52+ (declare #.*interface*)
5353 (ensure-simple-characters string
5454 (case (external-format-key external-format)
5555 (:|utf-8| (utf8-string-to-octets string))
@@ -62,10 +62,9 @@
6262 (len 0))
6363 (declare (fixnum len))
6464 (each-char-code (code string)
65- (let ((octets (and (< code code-limit) (svref table code))))
66- (when (null octets)
67- (setf octets +UNKNOWN-OCTETS+
68- including-illegal-character? t))
65+ (let ((octets (and (or (< code code-limit) (svref table code))
66+ (progn (setf including-illegal-character? t)
67+ +UNKNOWN-OCTETS+))))
6968 (incf len (length (the simple-octets octets)))))
7069 (if including-illegal-character?
7170 (illegal-string-to-octets string len table)
--- utf8.lisp (revision 12)
+++ utf8.lisp (revision 13)
@@ -3,9 +3,8 @@
33 ;;;;;;;;;;;
44 ;;; declaim
55 (declaim (inline bit-off? bit-val 10xxxxxx-p
6- utf8-octets-length utf8-string-length
7- utf8-octets-to-string-safe utf8-octets-to-string-unsafe
8- utf8-octets-to-unicode utf8-octets-to-string))
6+ utf8-octets-length
7+ utf8-octets-to-unicode utf8-octets-to-string utf8-string-to-octets))
98
109 ;;;;;;;;;;;;;;;;;;;;;
1110 ;;; internal function
@@ -21,11 +20,11 @@
2120 (defun utf8-octets-to-unicode(octets pos string j octets-len &aux (os octets))
2221 (declare #.*fastest*
2322 (simple-octets os)
24- (fixnum pos j octets-len)
23+ (optimize-hack-array-index pos j octets-len)
2524 (simple-characters string))
2625 (macrolet ((with-validate (num exp)
2726 `(if (and (< (+ ,num pos) octets-len)
28- ;; TODO: 一番初めは0ではいけない -> check
27+ (/= (bit-val ,(- 6 num) octet) 0)
2928 ,@(loop FOR i fixnum FROM 1 TO num
3029 COLLECT `(10xxxxxx-p (aref os (+ ,i pos)))))
3130 (values ,exp
@@ -62,9 +61,13 @@
6261
6362 (t (values +UNKNOWN-CODE+ (1+ pos) nil)))
6463 (setf (aref string j) (code-char code))
65- (values (the fixnum new-pos) legal?)))))
64+ (values new-pos legal?)))))
6665
67-(defun utf8-octets-to-string-safe (octets &aux (len (length octets)))
66+;;;;;;;;;;;;;;;;;;;;
67+;;; octets => string
68+(defun utf8-octets-to-string (octets &aux (len (length octets)))
69+ (declare #.*fastest*
70+ (simple-octets octets))
6871 (let ((buf (make-array len :element-type 'character))
6972 (including-illegal-octet? nil)
7073 (legal? t))
@@ -77,55 +80,7 @@
7780 (unless legal?
7881 (setf including-illegal-octet? t)))))
7982
80-(defun utf8-string-length (octets octets-len)
81- (do ((i 0)
82- (len 0 (1+ len)))
83- ((= i octets-len) len)
84- (declare (optimize-hack-array-index i len))
85- (let ((octet (aref octets i)))
86- (cond ((bit-off? 7 octet) (incf i))
87- ((bit-off? 5 octet) (incf i 2))
88- ((bit-off? 4 octet) (incf i 3))
89- ((bit-off? 3 octet) (incf i 4))))))
90-
91-(defun utf8-octets-to-string-unsafe (octets &aux (len (length octets)))
92- (let ((buf (make-array len #+C(utf8-string-length octets len) :element-type 'character)))
93- (do ((i 0 (1+ i))
94- (j 0))
95- ((= j len) (values (subseq buf 0 i) t))
96- (declare (optimize-hack-array-index i j))
97- (flet ((add-char (code) (setf (aref buf i) (code-char code))))
98- (declare (inline add-char))
99- (let ((octet (aref octets j)))
100- (cond ((bit-off? 7 octet)
101- (add-char octet)
102- (incf j))
103- ((bit-off? 5 octet)
104- (add-char (+ (bit-val 5 octet 6)
105- (bit-val 6 (aref octets (+ 1 j)))))
106- (incf j 2))
107- ((bit-off? 4 octet)
108- (add-char (+ (bit-val 4 octet 12)
109- (bit-val 6 (aref octets (+ 1 j)) 6)
110- (bit-val 6 (aref octets (+ 2 j)))))
111- (incf j 3))
112- ((bit-off? 3 octet)
113- (add-char (+ (bit-val 3 octet 18)
114- (bit-val 6 (aref octets (+ 1 j)) 12)
115- (bit-val 6 (aref octets (+ 2 j)) 6)
116- (bit-val 6 (aref octets (+ 3 j)))))
117- (incf j 4))))))))
118-
11983 ;;;;;;;;;;;;;;;;;;;;
120-;;; octets => string
121-(defun utf8-octets-to-string (octets safe)
122- (declare #.*fastest*
123- (simple-octets octets))
124- (if safe
125- (utf8-octets-to-string-safe octets)
126- (utf8-octets-to-string-unsafe octets)))
127-
128-;;;;;;;;;;;;;;;;;;;;
12984 ;;; string => octets
13085 (defun utf8-octets-length (string &aux (len 0))
13186 (each-char-code (cd string len)
Show on old repository browser