source: project/release/5/string-utils/trunk/unicode-utils.scm @ 37677

Last change on this file since 37677 was 37677, checked in by Kon Lovett, 4 months ago

fake subdomain of char

File size: 2.7 KB
Line 
1;;;; unicode-utils.scm  -*- Scheme -*-
2;;;; Kon Lovett, Jul '18
3;;;; Kon Lovett, Aug '10
4
5;; Issues
6;;
7;; - Implies Unicode support that is not present.
8
9(declare
10  (bound-to-procedure
11    ##sys#string-append
12    ##sys#char->utf8-string
13    ##sys#unicode-surrogate?
14    ##sys#surrogates->codepoint))
15
16(module unicode-utils
17
18(;export
19  ascii-codepoint?
20  unicode-char->string
21  unicode-string *unicode-string
22  generic-make-string
23  unicode-make-string *unicode-make-string
24  unicode-surrogate?
25  unicode-surrogates->codepoint)
26
27(import scheme
28  (chicken base)
29  (chicken fixnum)
30  (chicken type)
31  (only srfi-1 every make-list)
32  (only srfi-13 string-concatenate)
33  (only type-checks check-natural-fixnum check-char))
34
35;; Simple UTF 8
36
37;nul is not accepted!
38(: ascii-codepoint? (* -> boolean : char))
39;
40(define (ascii-codepoint? ch)
41  (and
42    (char? ch)
43    (let ((x (char->integer ch)))
44      ;[1 7f] = (1 80)
45      (and (fx< 0 x) (fx< x #x80)) ) ) )
46
47(: unicode-char->string (char -> string))
48;
49(define (unicode-char->string ch)
50  (##sys#char->utf8-string (check-char 'unicode-char->string ch)) )
51
52(: *unicode-string ((list-of char) -> string))
53;
54(define (*unicode-string chs)
55  (cond
56    ((null? chs)
57      "" )
58    ((null? (cdr chs))
59      (##sys#char->utf8-string (car chs)) )
60    (else
61      (let ((chs (map ##sys#char->utf8-string chs)))
62        (##sys#fragments->string
63          (foldl (lambda (l s) (fx+ l (##sys#size s))) 0 chs)
64          chs) ) ) ) )
65
66;inefficient
67(: unicode-string (#!rest -> string))
68;
69(define (unicode-string . chs)
70  (if (null? chs)
71    ""
72    (begin
73      (if (null? (cdr chs))
74        (check-char 'unicode-string (car chs))
75        (every (cut check-char 'unicode-string <>) chs) )
76      (*unicode-string chs) ) ) )
77
78(: *unicode-make-string (fixnum char -> string))
79;
80(define (*unicode-make-string len fill)
81  (cond
82    ((fx= 0 len)
83      "" )
84    ;ascii-codepoint < char
85    ((not (ascii-codepoint? fill))
86      (*unicode-string (the (list-of char) (make-list len fill))) )
87    (else
88      (##sys#make-string len fill) ) ) )
89
90(: unicode-make-string (fixnum #!optional char -> string))
91;
92(define (unicode-make-string len #!optional (fill #\space))
93  (*unicode-make-string
94    (check-natural-fixnum 'unicode-make-string len)
95    (check-char 'unicode-make-string fill)) )
96
97(define generic-make-string unicode-make-string)
98
99(: unicode-surrogate? (* -> boolean : fixnum))
100;
101(define (unicode-surrogate? n)
102  (and
103    (fixnum? n)
104    (##sys#unicode-surrogate? n) ) )
105
106(: unicode-surrogates->codepoint (fixnum fixnum -> (or boolean fixnum)))
107;
108(define (unicode-surrogates->codepoint hi lo)
109  (##sys#surrogates->codepoint
110    (check-natural-fixnum 'unicode-surrogates->codepoint hi "high")
111    (check-natural-fixnum 'unicode-surrogates->codepoint lo "low")) )
112
113) ;module unicode-utils
Note: See TracBrowser for help on using the repository browser.