-
Notifications
You must be signed in to change notification settings - Fork 12
/
utf8.cl
132 lines (100 loc) · 3.48 KB
/
utf8.cl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
;; -*- mode: common-lisp -*-
;; See the file LICENSE for the full license governing this code.
(in-package :user)
(defmacro mi-to-fixnum (value)
`(comp::ll :mi-to-fixnum ,value))
(defmacro mi (value)
`(comp::ll :fixnum-to-mi ,value))
(defmacro mi-incf (var &optional (amt 1))
`(setf ,var (comp::ll :+ ,var (mi ,amt))))
(defmacro mi-= (var const)
`(comp::ll := ,var (mi ,const)))
(defmacro mi-<= (var const)
`(comp::ll :<= ,var (mi ,const)))
(defmacro mi-sub (expr1 expr2)
`(comp::ll :- ,expr1 ,expr2))
(defmacro mi-or (expr1 expr2 &rest exprs)
(if (constantp expr1)
(setf expr1 `(mi ,expr1)))
(if (constantp expr2)
(setf expr2 `(mi ,expr2)))
(if* (zerop (length exprs))
then `(comp::ll :logior ,expr1 ,expr2)
else `(mi-or (comp::ll :logior ,expr1 ,expr2) ,@exprs)))
(defmacro mi-and (expr1 expr2)
(if (constantp expr1)
(setf expr1 `(mi ,expr1)))
(if (constantp expr2)
(setf expr2 `(mi ,expr2)))
`(comp::ll :logand ,expr1 ,expr2))
(defmacro mi-lsr (value amount)
`(comp::ll :lsr ,value (mi ,amount)))
(defmacro mi-lsl (value amount)
`(comp::ll :lsl ,value (mi ,amount)))
(defmacro aref-ubyte-vec (vec)
`(comp::ll :aref-ubyte ,vec (mi #.(sys::mdparam 'comp::md-lvector-data0-norm))))
(defmacro aref-uword-vec (vec)
`(comp::ll :aref-uword ,vec (mi #.(sys::mdparam 'comp::md-lvector-data0-norm))))
(defmacro aset-byte-vec (vec value)
`(comp::ll :aset-byte ,vec (mi #.(sys::mdparam 'comp::md-lvector-data0-norm)) ,value))
(defmacro aset-word-vec (vec value)
`(comp::ll :aset-word ,vec (mi #.(sys::mdparam 'comp::md-lvector-data0-norm)) ,value))
;; Expects 16-bit chars
;; Returns number of bytes encoded.
(defun string-to-utf8 (string vec pos)
(declare (optimize (speed 3) (safety 0))
(simple-string string)
((simple-array (unsigned-byte 8) (*)) vec))
(let ((remaining (length string)))
(declare (fixnum remaining))
(mi-incf vec pos)
(let ((orig-vec vec))
(while (not (zerop remaining))
(macrolet ((put (value)
`(progn (aset-byte-vec vec ,value)
(mi-incf vec))))
(let ((code (aref-uword-vec string)))
(mi-incf string 2)
(decf remaining)
(if* (mi-<= code #x7f)
then ;; simple-ascii
(put code)
elseif (mi-<= code #x7ff)
then ;; two byte encoding
(put (mi-or #xc0 (mi-lsr code 6)))
(put (mi-or #x80 (mi-and #x3f code)))
else ;; three byte encoding
(put (mi-or #xe0 (mi-lsr code 12)))
(put (mi-or #x80 (mi-and #x3f (mi-lsr code 6))))
(put (mi-or #x80 (mi-and #x3f code)))))))
(mi-to-fixnum (mi-sub vec orig-vec)))))
;; Returns # of characters decoded.
(defun utf8-to-string (vec start len out)
(declare (optimize (speed 3) (safety 0))
(fixnum len))
(mi-incf vec start)
(let ((orig-out out))
(while (not (zerop len))
(macrolet ((nextbyte ()
`(prog1 (aref-ubyte-vec vec)
(mi-incf vec)
(decf len)))
(outchar (code)
`(progn
(aset-word-vec out ,code)
(mi-incf out 2)))
(lowsix (value)
`(mi-and #x3f ,value)))
(let ((b (nextbyte)))
(if* (mi-<= b #x7f)
then (outchar b)
elseif (mi-= (mi-and #xe0 b) #xc0)
then ;; 2 byte encoding
(outchar (mi-or (mi-lsl (mi-and #b11111 b) 6)
(lowsix (nextbyte))))
else ;; 3 byte encoding
(outchar (mi-or
(mi-lsl (mi-and b #xf) 12)
(mi-lsl (lowsix (nextbyte)) 6)
(lowsix (nextbyte))))))))
(mi-to-fixnum (mi-lsr (mi-sub out orig-out) 1))))