source: project/release/5/string-utils/trunk/unicode-utils.scm @ 35791

Last change on this file since 35791 was 35791, checked in by kon, 4 months ago

C5 initial

File size: 2.6 KB
Line 
1;;;; unicode-utils.scm  -*- Scheme -*-
2;;;; Kon Lovett, Jul '18
3;;;; Kon Lovett, Aug '10
4
5;; Issues
6;;
7;; - Implies Unicode support that is not present.
8
9(declare
10  (bound-to-procedure
11    ##sys#string-append
12    ##sys#char->utf8-string
13    ##sys#unicode-surrogate?
14    ##sys#surrogates->codepoint))
15
16(module unicode-utils
17
18(;export
19  ascii-codepoint?
20  unicode-char->string
21  unicode-string *unicode-string
22  generic-make-string
23  unicode-make-string *unicode-make-string
24  unicode-surrogate?
25  unicode-surrogates->codepoint)
26
27(import scheme
28  (chicken base)
29  (chicken fixnum)
30  (chicken type)
31  (only srfi-1 every make-list)
32  (only srfi-13 string-concatenate)
33  (only type-checks check-natural-fixnum check-char))
34
35;; Simple UTF 8
36
37;nul is not accepted!
38(: ascii-codepoint? (* -> boolean : char))
39;
40(define (ascii-codepoint? ch)
41  (and
42    (char? ch)
43    (let ((x (char->integer ch)))
44      ;[1 7f] = (1 80)
45      (and (fx< 0 x) (fx< x #x80)) ) ) )
46
47(: unicode-char->string (char --> string))
48;
49(define (unicode-char->string ch)
50  (##sys#char->utf8-string (check-char 'unicode-char->string ch)) )
51
52;inefficient
53(: unicode-string (#!rest (list-of char) --> string))
54;
55(define (unicode-string . chs)
56  (if (null? chs)
57    ""
58    (begin
59      (if (null? (cdr chs))
60        (check-char 'unicode-string (car chs))
61        (every (cut check-char 'unicode-string <>) chs) )
62      (*unicode-string chs) ) ) )
63
64(: unicode-make-string (fixnum #!optional char --> string))
65;
66(define (unicode-make-string len #!optional (fill #\space))
67  (*unicode-make-string
68    (check-natural-fixnum 'unicode-make-string len)
69    (check-char 'unicode-make-string fill)) )
70
71(define generic-make-string unicode-make-string)
72
73(: unicode-surrogate? (* -> boolean : fixnum))
74;
75(define (unicode-surrogate? n)
76  (and
77    (fixnum? n)
78    (##sys#unicode-surrogate? n) ) )
79
80(: unicode-surrogates->codepoint (fixnum fixnum --> (or boolean fixnum)))
81;
82(define (unicode-surrogates->codepoint hi lo)
83  (##sys#surrogates->codepoint
84    (check-natural-fixnum 'unicode-surrogates->codepoint hi "high")
85    (check-natural-fixnum 'unicode-surrogates->codepoint lo "low")) )
86
87(: *unicode-make-string (fixnum char --> string))
88;
89(define (*unicode-make-string len fill)
90  (cond
91    ((fx= 0 len)
92      "" )
93    ((ascii-codepoint? fill)
94      (##sys#make-string len fill) )
95    (else
96      (*unicode-string (the (list-of char) (make-list len fill))) ) ) )
97
98;inefficient
99(: *unicode-string ((list-of char) --> string))
100;
101(define (*unicode-string chs)
102  (cond
103    ((null? chs)
104      "" )
105    ((null? (cdr chs))
106      (##sys#char->utf8-string (car chs)) )
107    (else
108      (string-concatenate (map ##sys#char->utf8-string chs)) ) ) )
109
110) ;module unicode-utils
Note: See TracBrowser for help on using the repository browser.