1 | ;;;; unicode-utils.scm -*- Scheme -*- |
---|
2 | ;;;; Kon Lovett, Jul '18 |
---|
3 | ;;;; Kon Lovett, Aug '10 |
---|
4 | |
---|
5 | ;; Issues |
---|
6 | ;; |
---|
7 | ;; - Implies Unicode support that is not present. |
---|
8 | |
---|
9 | (declare |
---|
10 | (bound-to-procedure |
---|
11 | ##sys#string-append |
---|
12 | ##sys#char->utf8-string |
---|
13 | ##sys#unicode-surrogate? |
---|
14 | ##sys#surrogates->codepoint)) |
---|
15 | |
---|
16 | (module unicode-utils |
---|
17 | |
---|
18 | (;export |
---|
19 | ascii-codepoint? |
---|
20 | unicode-char->string |
---|
21 | unicode-string *unicode-string |
---|
22 | generic-make-string |
---|
23 | unicode-make-string *unicode-make-string |
---|
24 | unicode-surrogate? |
---|
25 | unicode-surrogates->codepoint) |
---|
26 | |
---|
27 | (import scheme |
---|
28 | (chicken base) |
---|
29 | (chicken fixnum) |
---|
30 | (chicken type) |
---|
31 | (only srfi-1 every make-list) |
---|
32 | (only srfi-13 string-concatenate) |
---|
33 | (only type-checks check-natural-fixnum check-char)) |
---|
34 | |
---|
35 | ;; Simple UTF 8 |
---|
36 | |
---|
37 | ;nul is not accepted! |
---|
38 | (: ascii-codepoint? (* -> boolean : char)) |
---|
39 | ; |
---|
40 | (define (ascii-codepoint? ch) |
---|
41 | (and |
---|
42 | (char? ch) |
---|
43 | (let ((x (char->integer ch))) |
---|
44 | ;[1 7f] = (1 80) |
---|
45 | (and (fx< 0 x) (fx< x #x80)) ) ) ) |
---|
46 | |
---|
47 | (: unicode-char->string (char --> string)) |
---|
48 | ; |
---|
49 | (define (unicode-char->string ch) |
---|
50 | (##sys#char->utf8-string (check-char 'unicode-char->string ch)) ) |
---|
51 | |
---|
52 | ;inefficient |
---|
53 | (: unicode-string (#!rest (list-of char) --> string)) |
---|
54 | ; |
---|
55 | (define (unicode-string . chs) |
---|
56 | (if (null? chs) |
---|
57 | "" |
---|
58 | (begin |
---|
59 | (if (null? (cdr chs)) |
---|
60 | (check-char 'unicode-string (car chs)) |
---|
61 | (every (cut check-char 'unicode-string <>) chs) ) |
---|
62 | (*unicode-string chs) ) ) ) |
---|
63 | |
---|
64 | (: unicode-make-string (fixnum #!optional char --> string)) |
---|
65 | ; |
---|
66 | (define (unicode-make-string len #!optional (fill #\space)) |
---|
67 | (*unicode-make-string |
---|
68 | (check-natural-fixnum 'unicode-make-string len) |
---|
69 | (check-char 'unicode-make-string fill)) ) |
---|
70 | |
---|
71 | (define generic-make-string unicode-make-string) |
---|
72 | |
---|
73 | (: unicode-surrogate? (* -> boolean : fixnum)) |
---|
74 | ; |
---|
75 | (define (unicode-surrogate? n) |
---|
76 | (and |
---|
77 | (fixnum? n) |
---|
78 | (##sys#unicode-surrogate? n) ) ) |
---|
79 | |
---|
80 | (: unicode-surrogates->codepoint (fixnum fixnum --> (or boolean fixnum))) |
---|
81 | ; |
---|
82 | (define (unicode-surrogates->codepoint hi lo) |
---|
83 | (##sys#surrogates->codepoint |
---|
84 | (check-natural-fixnum 'unicode-surrogates->codepoint hi "high") |
---|
85 | (check-natural-fixnum 'unicode-surrogates->codepoint lo "low")) ) |
---|
86 | |
---|
87 | (: *unicode-make-string (fixnum char --> string)) |
---|
88 | ; |
---|
89 | (define (*unicode-make-string len fill) |
---|
90 | (cond |
---|
91 | ((fx= 0 len) |
---|
92 | "" ) |
---|
93 | ((ascii-codepoint? fill) |
---|
94 | (##sys#make-string len fill) ) |
---|
95 | (else |
---|
96 | (*unicode-string (the (list-of char) (make-list len fill))) ) ) ) |
---|
97 | |
---|
98 | ;inefficient |
---|
99 | (: *unicode-string ((list-of char) --> string)) |
---|
100 | ; |
---|
101 | (define (*unicode-string chs) |
---|
102 | (cond |
---|
103 | ((null? chs) |
---|
104 | "" ) |
---|
105 | ((null? (cdr chs)) |
---|
106 | (##sys#char->utf8-string (car chs)) ) |
---|
107 | (else |
---|
108 | (string-concatenate (map ##sys#char->utf8-string chs)) ) ) ) |
---|
109 | |
---|
110 | ) ;module unicode-utils |
---|