commit
@@ -17,4 +17,5 @@ | ||
17 | 17 | |
18 | 18 | (deftype octet () '(unsigned-byte 8)) |
19 | 19 | (deftype simple-octets () '(simple-array octet)) |
20 | -(deftype positive-fixnum () '(integer 1 #.most-positive-fixnum)) | |
\ No newline at end of file | ||
20 | +(deftype positive-fixnum () '(integer 1 #.most-positive-fixnum)) | |
21 | +(deftype array-index () '(integer 0 #.array-total-size-limit)) | |
\ No newline at end of file |
@@ -73,8 +73,8 @@ | ||
73 | 73 | |
74 | 74 | (case (external-format-key external-format) |
75 | 75 | (:|utf-8| (utf8-string-to-octets string)) |
76 | - (:|utf-16be| (utf16-string-to-octets string :be)) | |
77 | - (:|utf-16le| (utf16-string-to-octets string :le)) | |
76 | + (:|utf-16be| (utf16-string-to-octets string :be replace-fn)) | |
77 | + (:|utf-16le| (utf16-string-to-octets string :le replace-fn)) | |
78 | 78 | (t |
79 | 79 | (let* ((table (get-encode-table external-format)) |
80 | 80 | (code-limit (length (the simple-vector table))) |
@@ -28,18 +28,22 @@ | ||
28 | 28 | |
29 | 29 | ;;;;;;;;;;;;;;;;;;;; |
30 | 30 | ;;; string => octets |
31 | -(defmacro utf16-string-to-octets (string endian) | |
31 | +(defmacro utf16-string-to-octets (string endian replace-fn) | |
32 | 32 | (symbol-macrolet ((p1 (if (eq endian :be) 8 0)) |
33 | 33 | (p2 (if (eq endian :be) 0 8))) |
34 | - `(let ((octets (make-array (* 4 (length ,string)) | |
35 | - :element-type 'octet)) | |
34 | + `(let ((octets (make-array (* 4 (length ,string)) :element-type 'octet)) | |
36 | 35 | (i -1)) |
37 | 36 | (declare (fixnum i)) |
38 | - (loop FOR char ACROSS string | |
37 | + (loop FOR char ACROSS ,string | |
39 | 38 | FOR code = (char-code char) DO |
40 | 39 | (if (< code #x10000) |
41 | - (setf (aref octets (incf i)) (ldb (byte 8 ,p1) code) | |
42 | - (aref octets (incf i)) (ldb (byte 8 ,p2) code)) | |
40 | + (progn | |
41 | + (loop WHILE (<= #xD800 code #xDFFF) DO | |
42 | + (multiple-value-bind (new-char) (funcall ,replace-fn code) | |
43 | + (check-type new-char character) | |
44 | + (setf code (char-code new-char)))) | |
45 | + (setf (aref octets (incf i)) (ldb (byte 8 ,p1) code) | |
46 | + (aref octets (incf i)) (ldb (byte 8 ,p2) code))) | |
43 | 47 | (let ((low (low-surrogate code)) |
44 | 48 | (high (high-surrogate code))) |
45 | 49 | (setf (aref octets (incf i)) (ldb (byte 8 ,p1) high) |
@@ -122,4 +126,4 @@ | ||
122 | 126 | (setf surrogate code)) |
123 | 127 | (t |
124 | 128 | (setf (aref buf (incf pos)) (code-char code))))) |
125 | - (subseq buf 0 (1+ pos))))) | |
129 | + (subseq buf 0 (1+ pos))))) | |
\ No newline at end of file |
@@ -8,6 +8,7 @@ | ||
8 | 8 | |
9 | 9 | ;;;;;;;;;;;; |
10 | 10 | ;;; constant |
11 | +#-SBCL | |
11 | 12 | (defconst-onceonly +UNICODE=>UTF8+ |
12 | 13 | (let ((table (make-array #x10000))) |
13 | 14 | (loop FOR code FROM #x0 BELOW #x100 DO |
@@ -109,6 +110,9 @@ | ||
109 | 110 | (defun utf8-string-to-octets (string) |
110 | 111 | (declare (optimize (speed 3) (debug 0) (safety 0) (compilation-speed 0)) |
111 | 112 | (simple-string string)) |
113 | + ;; TODO: 全部自作する | |
114 | + #+SBCL (sb-ext:string-to-octets string :external-format :utf-8) | |
115 | + #-SBCL | |
112 | 116 | (let ((len 0)) |
113 | 117 | (declare (fixnum len)) |
114 | 118 |
@@ -119,6 +123,7 @@ | ||
119 | 123 | ((< cd #x10000) (incf len 3)) |
120 | 124 | (t (incf len 4)))) |
121 | 125 | |
126 | + ;; TODO: if len = str-len | |
122 | 127 | (let ((octets (make-array len :element-type 'octet)) |
123 | 128 | (i -1)) |
124 | 129 | (declare (fixnum i)) |