• R/O
  • SSH
  • HTTPS

creole: Commit


Commit MetaInfo

Revision24 (tree)
Time2010-05-09 09:00:54
Authorphjgt

Log Message

charseqを使用しての実験中

Change Summary

Incremental Difference

--- util.lisp (revision 23)
+++ util.lisp (revision 24)
@@ -19,6 +19,11 @@
1919 (declare (array-index ,i))
2020 ,@body)))
2121
22+(defmacro each-code ((code charseq &key return) &body body)
23+ `(charseq:each (#1=#:char ,charseq ,return)
24+ (let ((,code (char-code #1#)))
25+ ,@body)))
26+
2227 (defun to-simple-characters (source start end)
2328 (let ((dist (make-array (- end start) :element-type 'character)))
2429 (loop FOR i FROM start BELOW end
@@ -26,6 +31,7 @@
2631 (setf (aref dist j) (muffle-warn (aref source i))))
2732 dist))
2833
34+;; TODO: delete
2935 (defmacro ensure-simple-characters ((s start end) &body body)
3036 `(multiple-value-bind (,s ,start ,end)
3137 (etypecase ,s
--- string-to-octets.lisp (revision 23)
+++ string-to-octets.lisp (revision 24)
@@ -4,8 +4,8 @@
44 ;;; declaim
55 (declaim (inline get-encode-table legal-string-to-octets illegal-string-to-octets)
66 (ftype (function (string &key (:external-format t)
7- (:start array-index)
8- (:end array-index))
7+ (:start charseq:index)
8+ (:end charseq:index))
99 (values simple-octets boolean)) string-to-octets)
1010 (ftype (function (t) simple-vector) get-encode-table))
1111
@@ -32,18 +32,21 @@
3232 it
3333 (setf #1# (load-encode-table external-format))))
3434
35-(defun legal-string-to-octets (string octets-length table start end)
35+(defun legal-string-to-octets (charseq octets-length table)
36+ (declare #.*fastest*
37+ (charseq:charseq charseq)
38+ (optimize-hack-array-index octets-length))
3639 (let ((buf (make-array octets-length :element-type 'octet))
3740 (i -1))
38- (each-char-code (code string :start start :end end :return (values buf t))
41+ (each-code (code charseq :return (values buf t))
3942 (loop FOR o ACROSS (the simple-octets (svref table code)) DO
4043 (setf (aref buf (incf (the fixnum i))) o)))))
4144
42-(defun illegal-string-to-octets (string octets-length table start end)
45+(defun illegal-string-to-octets (charseq octets-length table)
4346 (let ((buf (make-array octets-length :element-type 'octet))
4447 (code-limit (length table))
4548 (i -1))
46- (each-char-code (code string :start start :end end :return (values buf nil))
49+ (each-code (code charseq :return (values buf nil))
4750 (let ((octets (or (and (< code code-limit) (svref table code))
4851 +UNKNOWN-OCTETS+)))
4952 (loop FOR o ACROSS (the simple-octets octets) DO
@@ -55,27 +58,24 @@
5558 (start 0)
5659 (end (length string)))
5760 (declare #.*interface*)
58- (ensure-simple-characters (string start end)
59- (let ((end (min end (length string))))
60- (declare #.*fastest*)
61- (when (> start end)
62- (return-from string-to-octets (values (coerce '() '(vector octet)) t)))
63-
64- (case (external-format-key external-format)
65- (:|utf-8| (utf8-string-to-octets string start end))
66- (:|utf-16be| (utf16-string-to-octets string start end :be))
67- (:|utf-16le| (utf16-string-to-octets string start end :le))
68- (t
69- (let* ((table (get-encode-table external-format))
70- (code-limit (length table))
71- (including-illegal-character? nil)
72- (len 0))
73- (declare (fixnum len))
74- (each-char-code (code string :start start :end end)
75- (let ((octets (or (and (< code code-limit) (svref table code))
76- (progn (setf including-illegal-character? t)
77- +UNKNOWN-OCTETS+))))
78- (incf len (length (the simple-octets octets)))))
79- (if including-illegal-character?
80- (illegal-string-to-octets string len table start end)
81- (legal-string-to-octets string len table start end))))))))
\ No newline at end of file
61+ (let ((charseq (charseq:make string :start start :end end)))
62+ (declare #.*fastest*)
63+
64+ (case (external-format-key external-format)
65+ (:|utf-8| (utf8-string-to-octets charseq))
66+ (:|utf-16be| (utf16-string-to-octets charseq :be))
67+ (:|utf-16le| (utf16-string-to-octets charseq :le))
68+ (t
69+ (let* ((table (get-encode-table external-format))
70+ (code-limit (length table))
71+ (including-illegal-character? nil)
72+ (len 0))
73+ (declare (fixnum len))
74+ (each-code (code charseq)
75+ (let ((octets (or (and (< code code-limit) (svref table code))
76+ (progn (setf including-illegal-character? t)
77+ +UNKNOWN-OCTETS+))))
78+ (incf len (length (the simple-octets octets)))))
79+ (if including-illegal-character?
80+ (illegal-string-to-octets charseq len table)
81+ (legal-string-to-octets charseq len table)))))))
\ No newline at end of file
--- utf16.lisp (revision 23)
+++ utf16.lisp (revision 24)
@@ -29,13 +29,13 @@
2929
3030 ;;;;;;;;;;;;;;;;;;;;
3131 ;;; string => octets
32-(defmacro utf16-string-to-octets (string start end endian)
32+(defmacro utf16-string-to-octets (charseq endian)
3333 (symbol-macrolet ((p1 (if (eq endian :be) 8 0))
3434 (p2 (if (eq endian :be) 0 8)))
35- `(let* ((has-surrogate? (each-char-code (cd ,string :return nil)
35+ `(let* ((has-surrogate? (each-code (cd ,charseq :return nil)
3636 (when (>= cd #x10000)
3737 (return t))))
38- (buf-len (* (if has-surrogate? 4 2) (- ,end ,start)))
38+ (buf-len (* (if has-surrogate? 4 2) (charseq:length ,charseq)))
3939 (legal-octets? t)
4040 (octets (make-array buf-len :element-type 'octet))
4141 (i 0))
@@ -47,8 +47,7 @@
4747 `(setf (aref octets (+ i ,i)) ,o))
4848 (incf i ,n))))
4949 (if (not has-surrogate?)
50- (each-char-code (cd ,string :start ,start :end ,end
51- :return (values octets legal-octets?))
50+ (each-code (cd ,charseq :return (values octets legal-octets?))
5251 #1=(progn
5352 (when (<= #xD800 cd #xDFFF)
5453 (setf cd +UNKNOWN-CODE+
@@ -55,8 +54,7 @@
5554 legal-octets? nil))
5655 (add-octets (ldb (byte 8 ,p1) cd)
5756 (ldb (byte 8 ,p2) cd))))
58- (each-char-code (cd ,string :start ,start :end ,end
59- :return (values (subseq octets 0 i) legal-octets?))
57+ (each-code (cd ,charseq :return (values (subseq octets 0 i) legal-octets?))
6058 (if (< cd #x10000)
6159 #1#
6260 (let ((low (low-surrogate cd))
--- utf8.lisp (revision 23)
+++ utf8.lisp (revision 24)
@@ -77,8 +77,8 @@
7777
7878 ;;;;;;;;;;;;;;;;;;;;
7979 ;;; string => octets
80-(defun utf8-octets-length (string start end &aux (len 0))
81- (each-char-code (cd string :start start :end end :return len)
80+(defun utf8-octets-length (charseq &aux (len 0))
81+ (each-code (cd charseq :return len)
8282 (incf (the array-index len)
8383 (cond ((< cd #x80) 1)
8484 ((< cd #x800) 2)
@@ -85,8 +85,8 @@
8585 ((< cd #x10000) 3)
8686 (t 4)))))
8787
88-(defun utf8-string-to-octets (string start end)
89- (let ((octets (make-array (utf8-octets-length string start end) :element-type 'octet))
88+(defun utf8-string-to-octets (charseq)
89+ (let ((octets (make-array (utf8-octets-length charseq) :element-type 'octet))
9090 (i 0))
9191 (declare (optimize-hack-array-index i))
9292 (macrolet ((add-octets (&rest octet-list &aux (n (length octet-list)))
@@ -95,7 +95,7 @@
9595 FOR o IN octet-list COLLECT
9696 `(setf (aref octets (+ i ,i)) ,o))
9797 (incf i ,n))))
98- (each-char-code (cd string :start start :end end)
98+ (each-code (cd charseq)
9999 (cond ((< cd #x80)
100100 (add-octets cd))
101101 ((< cd #x800)
Show on old repository browser