Changeset 9608 in project


Ignore:
Timestamp:
03/14/08 18:05:34 (12 years ago)
Author:
Alex Shinn
Message:

No longer using syntax-case modules, relying on default integrations
to provide separation of core and unicode procedures.

Location:
release/3/utf8
Files:
2 added
10 edited

Legend:

Unmodified
Added
Removed
  • release/3/utf8/byte-string-srfi-13.scm

    r4332 r9608  
    1818
    1919(declare
     20  (usual-integrations)
    2021  (export
    2122    byte-string-upcase!
  • release/3/utf8/byte-string.scm

    r4332 r9608  
    11;;;; byte-string.scm -- renaming of string routines
    22;;
    3 ;; Copyright (c) 2004-2005 Alex Shinn. All rights reserved.
     3;; Copyright (c) 2004-2007 Alex Shinn. All rights reserved.
    44;; BSD-style license: http://synthcode.com/license.txt
    55
     
    1818
    1919(declare
     20  (usual-integrations)
    2021  (export
    2122    small-char-alphabetic?
     
    3940    byte-substring-index
    4041    byte-substring-index-ci
     42    byte-string-compare3
     43    byte-string-compare3-ci
    4144    byte-string-translate
    4245    byte-string-split
     
    5659    byte-display
    5760    read-byte-char
    58     write-byte-char ) )
     61    write-byte-char))
    5962
    6063(define small-char-alphabetic? char-alphabetic?)
     
    9295(define byte-substring-index substring-index)
    9396(define byte-substring-index-ci substring-index-ci)
     97(define byte-string-compare3 string-compare3)
     98(define byte-string-compare3-ci string-compare3-ci)
    9499
    95100(define byte-grep grep)
  • release/3/utf8/string-pointer.scm

    r3138 r9608  
    11;;;; string-pointer.scm -- mimic the interface provided by utf8
    22;;
    3 ;; Copyright (c) 2004-2005 Alex Shinn
     3;; Copyright (c) 2004-2007 Alex Shinn
    44;; All rights reserved.
    55;;
     
    1010  (declare
    1111   (fixnum)
     12   (usual-integrations)
    1213   (export
    1314    make-string-pointer string-pointer? sp-copy
  • release/3/utf8/utf8-lolevel.scm

    r7852 r9608  
    1818(declare
    1919  (fixnum) ; no chars above 2^21
     20  (usual-integrations)
    2021  (export
    2122    ;; utils
     
    2728    utf8-string-ref utf8-string-set! utf8-string-length
    2829    utf8-string->list utf8-prev-char utf8-next-char
    29     with-substring-offsets
     30    make-utf8-string
     31    with-substring-offsets with-two-substring-offsets
    3032    ;; string-pointers
    3133    make-string-pointer string-pointer? sp-copy
     
    3436    sp-check? sp-check-lo? sp-check-hi?
    3537    ;; I/O
    36     read-utf8-char write-utf8-char char->utf8-string ) )
    37 
    38 (cond-expand
    39   (chicken)
    40   (else
    41     (declare
    42       (export
    43         ;; I/O
    44         read-byte write-byte ) ) ) )
     38    read-utf8-char write-utf8-char char->utf8-string))
    4539
    4640;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     
    145139    (let lp ((i 0) (res 0))
    146140      (if (>= i limit)
    147         res
    148         (lp (+ i (utf8-start-byte->length (string-int-ref s i))) (+ res 1))))))
    149 
    150 (define (with-substring-offsets proc s . opt)
     141          res
     142          (lp (+ i (utf8-start-byte->length (string-int-ref s i)))
     143              (+ res 1))))))
     144
     145(define (with-substring-offsets proc s opt)
    151146  (let* ((start (if (pair? opt) (car opt) 0))
    152147         (b1 (utf8-index->offset s start))
     
    164159      (proc s b1 (string-length s)))))
    165160
     161(define (with-two-substring-offsets proc s1 s2 opt)
     162  (with-substring-offsets
     163      (lambda (s1 start1 end1)
     164        (with-substring-offsets
     165            (lambda (s2 start2 end2)
     166              (proc s1 s2 start1 end1 start2 end2))
     167            s2 (if (and (pair? opt) (pair? (cdr opt))) (cddr opt) '())))
     168      s1 opt))
     169
    166170(define (utf8-string->list str)
    167171  (let ((limit (string-length str)))
    168     (let loop ((i 0) (res '()))
     172    (let lp ((i 0) (res '()))
    169173      (if (>= i limit)
    170         (reverse res)
    171         (loop (+ i (utf8-start-byte->length (string-int-ref str i)))
     174          (reverse res)
     175          (lp (+ i (utf8-start-byte->length (string-int-ref str i)))
    172176              (cons (sp-ref str i) res))))))
     177
     178(define (make-utf8-string len . opt)
     179  (if (pair? opt)
     180      (let* ((c (car opt))
     181             (c-i (char->integer c))
     182             (c-len (ucs-integer->length c-i)))
     183        (if (<= c-len 1)
     184            (make-byte-string len c)
     185            (let* ((size (* len c-len))
     186                   (res (make-byte-string size)))
     187              (let lp ((i 0))
     188                (if (>= i size)
     189                    res
     190                    (begin
     191                      (string-set-at-byte-in-place! res size c-len i c-i)
     192                      (lp (+ i c-len))))))))
     193      (make-byte-string len)))
    173194
    174195;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     
    199220    (if (> end limit)
    200221      (error "utf8 trailing char overflow" s off)
    201       (substring s off end))))
     222      (byte-substring s off end))))
    202223
    203224(define (utf8-string-ref s pos)
     
    300321
    301322(define (sp-before s sp)
    302   (substring s 0 sp))
     323  (byte-substring s 0 sp))
    303324
    304325(define (sp-after s sp)
    305   (substring s sp))
     326  (byte-substring s sp))
    306327
    307328(define (sp-substring s . opt)
    308329  (if (null? opt)
    309     (substring s 0)
    310     (apply substring s opt)))
     330    (byte-substring s 0)
     331    (apply byte-substring s opt)))
    311332
    312333;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    313334;; Basic I/O
    314 
    315 (cond-expand
    316   (chicken)
    317   (else
    318     (define (read-byte . opt)
    319       (let ((res (read-char (if (pair? opt) (car opt) (current-input-port)))))
    320         (if (char? res)
    321           (char->integer res)
    322           res)))
    323 
    324     (define (write-byte b . opt)
    325       (apply write-char (integer->char b) opt)) ) )
    326335
    327336;; now in the core library
  • release/3/utf8/utf8-srfi-13.scm

    r4356 r9608  
    11;;;; utf8-srfi-13.scm -- Unicode-aware SRFI-13
    22;;
    3 ;; Copyright (c) 2004-2005 Alex Shinn. All rights reserved.
     3;; Copyright (c) 2004-2008 Alex Shinn. All rights reserved.
    44;; BSD-style license: http://synthcode.com/license.txt
    55
     
    1010;;
    1111;;   (require-extension utf8-srfi-13)
    12 ;;   (module ()
    13 ;;     (import utf8-srfi-13)
    14 ;;
    15 ;;     ... ; unicode-aware SRFI-13
    16 ;;
    17 ;;     )
    1812;;
    1913;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    2014
    21 (require-extension utf8 utf8-lolevel byte-string byte-string-srfi-13
    22                    utf8-srfi-14)
    23 (require-extension iset)
    24 
    25 (declare (fixnum) ) ; no chars above 2^21
    26 
    27 (module utf8-srfi-13
    28   (
     15(require-extension utf8-lolevel utf8-srfi-14 iset utf8-case-map)
     16
     17(register-feature! 'srfi-13)
     18
     19(declare (fixnum)) ; no chars above 2^21
     20
     21(declare
     22 (usual-integrations)
     23 (export
    2924  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    3025  ;; srfi-13
     
    6156  ;; filtering & deleting
    6257  string-filter string-delete
    63   )
    64 
    65 (import utf8)
    66 (import utf8-srfi-14)
    67 
    68 (define string-null? byte-string-null?)
     58  ))
     59
     60(define (string-null? s) (equal? s ""))
    6961
    7062(define (string-fold kons knil s . opt)
    71   (let-optionals* opt ((start 0) (end (string-length s)))
    72     (let loop ((i start) (b (utf8-index->offset s start)) (acc knil))
     63  (let-optionals* opt ((start 0) (end (utf8-string-length s)))
     64    (let lp ((i start) (b (utf8-index->offset s start)) (acc knil))
    7365      (if (>= i end)
    74         acc
    75         (loop (+ i 1)
     66          acc
     67          (lp (+ i 1)
    7668              (+ b (utf8-start-byte->length (string-int-ref s b)))
    7769              (kons (sp-ref s b) acc))))))
    7870
    7971(define (string-fold-right kons knil s . opt)
    80   (let-optionals* opt ((start 0) (end (string-length s)))
    81     (let loop ((i (- end 1))
    82                (b (utf8-prev-char s (utf8-index->offset s end)))
    83                (acc knil))
     72  (let-optionals* opt ((start 0) (end (utf8-string-length s)))
     73    (let lp ((i (- end 1))
     74             (b (utf8-prev-char s (utf8-index->offset s end)))
     75             (acc knil))
    8476      (if (< i start)
    85         acc
    86         (loop (- i 1)
     77          acc
     78          (lp (- i 1)
    8779              (utf8-prev-char s b)
    8880              (kons (sp-ref s b) acc))))))
     
    9183  (let-optionals* opt ((base "")
    9284                       (make-final (lambda (x) "")))
    93     (let lp ((seed seed) (ans base))
    94       (if (p seed)
    95         (string-append ans (make-final seed))
    96         (lp (g seed) (string-append ans (string (f seed))))))))
     85    (let ((out (open-output-string)))
     86      (display base out)
     87      (let lp ((seed seed))
     88        (if (p seed)
     89            (display (make-final seed) out)
     90            (begin
     91              (write-utf8-char (f seed) out)
     92              (lp (g seed)))))
     93      (get-output-string out))))
    9794
    9895(define (string-unfold-right p f g seed . opt)
    9996  (let-optionals* opt ((base "")
    10097                       (make-final (lambda (x) "")))
    101     (let lp ((seed seed) (ans base))
     98    (let lp ((seed seed) (ans (list base)))
    10299      (if (p seed)
    103         (string-append (make-final seed) ans)
    104         (lp (g seed) (string-append (string (f seed)) ans))))))
     100          (string-intersperse (cons (make-final seed) ans) "")
     101          (lp (g seed) (cons (char->utf8-string (f seed)) ans))))))
    105102
    106103(define (string-map proc s . opt)
    107   (reverse-list->string
    108    (apply string-fold (lambda (c acc) (cons (proc c) acc)) '() s opt)))
     104  (string-intersperse
     105   (reverse
     106    (map char->utf8-string
     107         (apply string-fold (lambda (c acc) (cons (proc c) acc)) '() s opt)))
     108   ""))
    109109
    110110(define string-map! string-map)
    111111
    112112(define (string-for-each proc s . opt)
    113   (apply string-fold (lambda (c acc) (proc c)) #f s opt))
     113  (apply string-fold (lambda (c acc) (proc c)) #f s opt)
     114  (if #f #f))
    114115
    115116(define (string-for-each-index proc s . opt)
     
    117118        (end (if (and (pair? opt) (pair? (cdr opt)))
    118119               (cadr opt)
    119                (string-length s))))
     120               (utf8-string-length s))))
    120121    (do ((i start (+ i 1)))
    121122        ((= i end))
     
    145146
    146147(define (string-tabulate proc len)
    147   (let loop ((i 0) (acc '()))
    148     (if (= i len)
    149       (reverse-list->string acc)
    150       (loop (+ i 1) (cons (proc i) acc)))))
     148  (let ((out (open-output-string)))
     149    (let lp ((i 0))
     150      (cond
     151        ((< i len)
     152         (write-utf8-char (proc i) out)
     153         (lp (+ i 1)))))
     154    (get-output-string out)))
    151155
    152156(define (string-copy s . opt)
    153   (if (null? opt)
    154     (byte-string-copy s)
    155     (apply substring s opt)))
    156 
    157 (define (substring/shared s start . opt)
    158   (apply with-substring-offsets byte-substring/shared s start opt))
     157  (with-substring-offsets (lambda (s start end) (##sys#substring s start end)) s opt))
     158
     159(define (substring/shared s . opt)
     160  (with-substring-offsets (lambda (s start end) (##sys#substring s start end)) s opt))
     161
     162(define (byte-string-copy! target t-off str start end)
     163  (if (> start t-off)
     164      (do ((i start (+ i 1))
     165           (j t-off (+ j 1)))
     166          ((>= i end))
     167        (string-set! target j (string-ref str i)))
     168      (do ((i (- end 1) (- i 1))
     169           (j (+ -1 t-off (- end start)) (- j 1)))
     170          ((< i start))
     171        (string-set! target j (string-ref str i)))))
    159172
    160173(define (string-copy! target tstart str . opt)
    161   (let-optionals* opt ((start 0) (end (string-length str)))
     174  (let-optionals* opt ((start 0) (end (utf8-string-length str)))
    162175    (let* ((str (substring str start end))
    163176           (len (- end start))
    164            (s-size (byte-string-length str))
    165            (t-total-size (byte-string-length target))
     177           (s-size (string-length str))
     178           (t-total-size (string-length target))
    166179           (t-off (utf8-index->offset target tstart))
    167180           (t-end-off (utf8-index->offset target (+ tstart len)))
    168181           (t-size (- t-end-off t-off)))
    169182      (if (= s-size t-size)
    170         (byte-string-copy! target t-off str)
     183        (byte-string-copy! target t-off str 0 len)
    171184        (let ((res (string-append
    172                     (byte-substring target 0 t-off)
     185                    (##sys#substring target 0 t-off)
    173186                    str
    174                     (byte-substring target t-end-off t-total-size))))
     187                    (##sys#substring target t-end-off t-total-size))))
    175188          (##sys#become! (list (cons target res))))))))
    176189
    177190(define (string-take s n) (substring s 0 n))
    178191(define (string-drop s n) (substring s n))
    179 (define (string-take-right s n) (substring s (- (string-length s) n)))
    180 (define (string-drop-right s n) (substring s 0 (- (string-length s) n)))
     192(define (string-take-right s n) (substring s (- (utf8-string-length s) n)))
     193(define (string-drop-right s n) (substring s 0 (- (utf8-string-length s) n)))
    181194
    182195(define (string-pad s len . opt)
    183   (let-optionals* opt ((ch #\space) (start 0) (end (string-length s)))
     196  (let-optionals* opt ((ch #\space) (start 0) (end (utf8-string-length s)))
    184197    (let ((diff (- end start)))
    185198      (if (<= len diff)
    186         (substring/shared s (- end len) end)
    187         (string-append (make-string (- len diff) ch)
    188                        (substring/shared s start end))))))
     199        (substring s (- end len) end)
     200        (string-append (make-utf8-string (- len diff) ch)
     201                       (substring s start end))))))
    189202
    190203(define (string-pad-right s len . opt)
    191   (let-optionals* opt ((ch #\space) (start 0) (end (string-length s)))
     204  (let-optionals* opt ((ch #\space) (start 0) (end (utf8-string-length s)))
    192205    (let ((diff (- end start)))
    193206      (if (<= len diff)
    194         (substring/shared s start (+ start len))
    195         (string-append (substring/shared s start end)
    196                        (make-string (- len diff) ch))))))
     207        (substring s start (+ start len))
     208        (string-append (substring s start end)
     209                       (make-utf8-string (- len diff) ch))))))
    197210
    198211(define (string-trim s . opt)
    199212  (let-optionals* opt ((trimmer #\space) (start 0) (end #f))
    200     (if (and (char? trimmer) (< (char->integer trimmer) #x80))
    201       (if end
    202         (byte-string-trim s trimmer start (utf8-index->offset s end))
    203         (byte-string-trim s trimmer start))
    204       (let ((pred (char-predicate trimmer))
    205             (end-off
    206              (if end (utf8-index->offset s end) (byte-string-length s))))
    207         (let lp ((i (utf8-index->offset s start)))
    208           (if (or (>= i end-off) (not (pred (sp-ref s i))))
    209             (byte-substring s i)
    210             (lp (sp-next s i))))))))
     213    (let* ((pred (char-predicate trimmer))
     214           (end-off
     215            (if end (utf8-index->offset s end) (string-length s))))
     216      (let lp ((i (utf8-index->offset s start)))
     217        (if (or (>= i end-off) (not (pred (sp-ref s i))))
     218            (##sys#substring s i end-off)
     219            (lp (sp-next s i)))))))
    211220
    212221(define (string-trim-right s . opt)
    213222  (let-optionals* opt ((trimmer #\space) (start 0) (end #f))
    214     (if (and (char? trimmer) (< (char->integer trimmer) #x80))
    215       (if end
    216         (byte-string-trim s trimmer start (utf8-index->offset s end))
    217         (byte-string-trim s trimmer start))
    218       (let ((pred (char-predicate trimmer))
    219             (end-off
    220              (if end (utf8-index->offset s end) (byte-string-length s))))
    221         (let lp ((i (sp-prev s end-off)) (j end-off))
    222           (if (or (negative? i) (not (pred (sp-ref s i))))
    223             (byte-substring s (utf8-index->offset s start) j)
    224             (lp (sp-prev s i) i)))))))
     223    (let ((pred (char-predicate trimmer))
     224          (end-off
     225           (if end (utf8-index->offset s end) (string-length s))))
     226      (let lp ((i (sp-prev s end-off)) (j end-off))
     227        (if (or (negative? i) (not (pred (sp-ref s i))))
     228            (##sys#substring s (utf8-index->offset s start) j)
     229            (lp (sp-prev s i) i))))))
    225230
    226231(define (string-trim-both s . opt)
     
    228233    (string-trim (apply string-trim-right s opt) trimmer)))
    229234
    230 (define (with-two-substring-offsets proc s1 s2 opt)
    231   (apply with-substring-offsets
    232    (lambda (s1 start1 end1)
    233      (apply with-substring-offsets
    234       (lambda (s2 start2 end2)
    235         (proc s1 s2 start1 end1 start2 end2))
    236       s2 (if (and (pair? opt) (pair? (cdr opt))) (cddr opt) '())))
    237    s1 opt))
     235;; alas, can't use string-compare3 because the predicates get the
     236;; index as an argument
    238237
    239238(define (string-compare s1 s2 proc< proc= proc> . opt)
    240239  (with-two-substring-offsets
    241    (lambda (s1 s2 start1 end1 start2 end2)
    242      (byte-string-compare
    243       s1 s2
    244       (lambda (i) (proc< (utf8-offset->index s1 i)))
    245       (lambda (i) (proc= (utf8-offset->index s1 i)))
    246       (lambda (i) (proc> (utf8-offset->index s1 i)))
    247       start1 end1 start2 end2))
    248    s1 s2 opt))
     240      (lambda (s1 s2 start1 end1 start2 end2)
     241        (let lp ((i start1) (j start2))
     242          (cond
     243            ((>= i end1)
     244             ((if (>= j end2) proc= proc<)
     245              (utf8-offset->index s1 i)))
     246            ((>= j end2)
     247             (utf8-offset->index s1 i))
     248            ((char<? (string-ref s1 i) (string-ref s2 i))
     249             (proc< (utf8-offset->index s1 i)))
     250            ((char>? (string-ref s1 i) (string-ref s2 i))
     251             (proc> (utf8-offset->index s1 i)))
     252            (else
     253             (lp (+ i 1) (+ j 1))))))
     254      s1 s2 opt))
    249255
    250256(define (string-compare-ci s1 s2 proc< proc= proc> . opt)
    251257  (with-two-substring-offsets
    252    (lambda (s1 s2 start1 end1 start2 end2)
    253      (byte-string-compare-ci
    254       s1 s2
    255       (lambda (i) (proc< (utf8-offset->index s1 i)))
    256       (lambda (i) (proc= (utf8-offset->index s1 i)))
    257       (lambda (i) (proc> (utf8-offset->index s1 i)))
    258       start1 end1 start2 end2))
    259    s1 s2 opt))
    260 
    261 (define (make-string-comparator proc)
     258      (lambda (s1 s2 start1 end1 start2 end2)
     259        (let lp ((i start1) (j start2))
     260          (cond
     261            ((>= i end1)
     262             ((if (>= j end2) proc= proc<)
     263              (utf8-offset->index s1 i)))
     264            ((>= j end2)
     265             (utf8-offset->index s1 i))
     266            ((char-ci<? (string-ref s1 i) (string-ref s2 i))
     267             (proc< (utf8-offset->index s1 i)))
     268            ((char-ci>? (string-ref s1 i) (string-ref s2 i))
     269             (proc> (utf8-offset->index s1 i)))
     270            (else
     271             (lp (+ i 1) (+ j 1))))))
     272      s1 s2 opt))
     273
     274(define (make-string-comparator proc pred)
    262275  (lambda (s1 s2 . opt)
    263     (with-two-substring-offsets proc s1 s2 opt)))
    264 
    265 (define string= (make-string-comparator byte-string=))
    266 (define string<> (make-string-comparator byte-string<>))
    267 (define string< (make-string-comparator byte-string<))
    268 (define string> (make-string-comparator byte-string>))
    269 (define string<= (make-string-comparator byte-string<=))
    270 (define string>= (make-string-comparator byte-string>=))
    271 
    272 (define string-ci= (make-string-comparator byte-string-ci=))
    273 (define string-ci<> (make-string-comparator byte-string-ci<>))
    274 (define string-ci< (make-string-comparator byte-string-ci<))
    275 (define string-ci> (make-string-comparator byte-string-ci>))
    276 (define string-ci<= (make-string-comparator byte-string-ci<=))
    277 (define string-ci>= (make-string-comparator byte-string-ci>=))
     276    (if (null? opt)
     277        (pred (proc s1 s2))
     278        (pred (with-two-substring-offsets
     279                  (lambda (s1 s2 start1 end1 start2 end2)
     280                    (proc (##sys#substring s1 start1 end1)
     281                          (##sys#substring s2 start2 end2)))
     282                  s1 s2 opt)))))
     283
     284(define string= (make-string-comparator string-compare3 zero?))
     285(define string<> (make-string-comparator string-compare3 (complement zero?)))
     286(define string< (make-string-comparator string-compare3 negative?))
     287(define string> (make-string-comparator string-compare3 positive?))
     288(define string<= (make-string-comparator string-compare3 (complement positive?)))
     289(define string>= (make-string-comparator string-compare3 (complement negative?)))
     290
     291(define string-ci= (make-string-comparator string-compare3-ci zero?))
     292(define string-ci<> (make-string-comparator string-compare3-ci (complement zero?)))
     293(define string-ci< (make-string-comparator string-compare3-ci negative?))
     294(define string-ci> (make-string-comparator string-compare3-ci positive?))
     295(define string-ci<= (make-string-comparator string-compare3-ci (complement positive?)))
     296(define string-ci>= (make-string-comparator string-compare3-ci (complement negative?)))
    278297
    279298(define (utf8-substring-length s start . opt)
    280   (let ((end (if (pair? opt) (car opt) (byte-string-length s))))
     299  (let ((end (if (pair? opt) (car opt) (string-length s))))
    281300    (let lp ((i start) (res 0))
    282301      (if (>= i end)
    283         res
    284         (lp (+ i (utf8-start-byte->length (string-int-ref s i))) (+ res 1))))))
     302          res
     303          (lp (+ i (utf8-start-byte->length (string-int-ref s i)))
     304              (+ res 1))))))
    285305
    286306(define (make-string-fix-length proc)
    287307  (lambda (s1 s2 . opt)
    288308    (with-two-substring-offsets
    289      (lambda (s1 s2 start1 start2 end1 end2)
     309     (lambda (s1 s2 start1 end1 start2 end2)
    290310       (let ((res (proc s1 s2 start1 start2 end1 end2)))
    291          (and res (+ (utf8-substring-length s1 start1 res)
    292                      (utf8-offset->index s1 start1)))))
     311         (if (zero? res)
     312             res
     313             (utf8-substring-length s1 start1 (+ start1 res)))))
    293314     s1 s2 opt)))
     315
     316(define (byte-string-prefix-length s1 s2 start1 start2 end1 end2)
     317  (let lp ((i start1) (j start2))
     318    (cond
     319      ((>= i end1) (- i start1))
     320      ((>= j end2) (- j start2))
     321      ((char=? (string-ref s1 i) (string-ref s2 j))
     322       (lp (+ i 1) (+ j 1)))
     323      (else (- i start1)))))
     324
     325(define (byte-string-prefix-length-ci s1 s2 start1 start2 end1 end2)
     326  (let lp ((i start1) (j start2))
     327    (cond
     328      ((>= i end1) (- i start1))
     329      ((>= j end2) (- j start2))
     330      ((char-ci=? (string-ref s1 i) (string-ref s2 j))
     331       (lp (+ i 1) (+ j 1)))
     332      (else (- i start1)))))
     333
     334(define (byte-string-suffix-length s1 s2 start1 start2 end1 end2)
     335  (let lp ((i (- end1 1)) (j (- end2 1)))
     336    (cond
     337      ((< i start1) (- end1 i 1))
     338      ((< j start2) (- end2 j 1))
     339      ((char=? (string-ref s1 i) (string-ref s2 j))
     340       (lp (- i 1) (- j 1)))
     341      (else (- end1 i 1)))))
     342
     343(define (byte-string-suffix-length-ci s1 s2 start1 start2 end1 end2)
     344  (let lp ((i (- end1 1)) (j (- end2 1)))
     345    (cond
     346      ((< i start1) (- end1 i 1))
     347      ((< j start2) (- end2 j 1))
     348      ((char-ci=? (string-ref s1 i) (string-ref s2 j))
     349       (lp (- i 1) (- j 1)))
     350      (else (- end1 i 1)))))
    294351
    295352(define string-prefix-length (make-string-fix-length byte-string-prefix-length))
     
    298355(define string-suffix-length-ci (make-string-fix-length byte-string-suffix-length-ci))
    299356
    300 (define (make-string-fix-test proc)
     357(define (make-string-prefix-test proc)
    301358  (lambda (s1 s2 . opt)
    302     (with-two-substring-offsets
    303      (lambda (s1 s2 start1 start2 end1 end2)
    304        (proc s1 s2 start1 start2 end1 end2))
    305      s1 s2 opt)))
    306 
    307 (define string-prefix? (make-string-fix-test byte-string-prefix?))
    308 (define string-prefix-ci? (make-string-fix-test byte-string-prefix-ci?))
    309 (define string-suffix? (make-string-fix-test byte-string-suffix?))
    310 (define string-suffix-ci? (make-string-fix-test byte-string-suffix-ci?))
     359    (cond
     360      ((null? opt) (proc s1 s2))
     361      ((null? (cdr opt)) (proc s1 s2 (car opt)))
     362      (else
     363       (with-two-substring-offsets
     364           (lambda (s1 s2 start1 end1 start2 end2)
     365             (proc (##sys#substring s1 start1 end1)
     366                   (##sys#substring s2 start2 end2)))
     367           s1 s2 opt)))))
     368
     369(define string-prefix? (make-string-prefix-test substring=?))
     370(define string-prefix-ci? (make-string-prefix-test substring-ci=?))
     371
     372(define (string-suffix? s1 s2 . opt)
     373  (with-two-substring-offsets
     374      (lambda (s1 s2 start1 end1 start2 end2)
     375        (and (>= (- end2 start2) (- end1 start1))
     376             (let lp ((i (- end1 1)) (j (- end2 1)))
     377               (or (< i start1)
     378                   (if (char=? (string-ref s1 i) (string-ref s2 j))
     379                       (lp (- i 1) (- j 1))
     380                       #f)))))
     381      s1 s2 opt))
     382
     383(define (string-suffix-ci? s1 s2 . opt)
     384  (with-two-substring-offsets
     385      (lambda (s1 s2 start1 end1 start2 end2)
     386        (and (>= (- end2 start2) (- end1 start1))
     387             (let lp ((i (- end1 1)) (j (- end2 1)))
     388               (or (< i start1)
     389                   (if (char-ci=? (string-ref s1 i) (string-ref s2 j))
     390                       (lp (- i 1) (- j 1))
     391                       #f)))))
     392      s1 s2 opt))
    311393
    312394(define (make-string-hasher proc)
    313395  (lambda (s . opt)
    314     (if (null? opt)
    315       (proc s)
    316       (apply
    317        with-substring-offsets
    318        (lambda (s start end)
    319          (proc s (car opt) start end))
    320        s (cdr opt)))))
    321 
    322 (define string-hash (make-string-hasher byte-string-hash))
    323 (define string-hash-ci (make-string-hasher byte-string-hash-ci))
     396    (cond
     397      ((null? opt)
     398       (proc s))
     399      ((null? (cdr opt))
     400       (proc s (car opt)))
     401      (else
     402       (with-substring-offsets
     403           (lambda (s start end)
     404             (proc (##sys#substring s start end) (car opt)))
     405           s (cdr opt))))))
     406
     407(define string-hash
     408  (make-string-hasher hash))
     409(define string-hash-ci
     410  (make-string-hasher (lambda (s) (hash (string-downcase s)))))
    324411
    325412(define (with-string-index+offset proc s x . opt)
    326413  (if (equal? s "")
    327     #f
    328     (let-optionals* opt ((start 0) (end -1))
    329       (let ((size (byte-string-length s))
    330             (pred (char-predicate x)))
    331         (let lp ((i start) (off (utf8-index->offset s start)))
    332           (if (or (= i end) (= off size))
    333             (proc #f #f)
    334             (let ((ch (sp-ref s off)))
    335               (if (pred ch)
    336                 (proc i off)
    337                 (lp (+ i 1)
    338                     (+ off (ucs-integer->length (char->integer ch))))))))))))
     414      #f
     415      (let-optionals* opt ((start 0) (end -1))
     416        (let ((size (string-length s))
     417              (pred (char-predicate x)))
     418          (let lp ((i start) (off (utf8-index->offset s start)))
     419            (if (or (= i end) (= off size))
     420                (proc #f #f)
     421                (let ((ch (sp-ref s off)))
     422                  (if (pred ch)
     423                      (proc i off)
     424                      (lp (+ i 1)
     425                          (+ off (ucs-integer->length
     426                                  (char->integer ch))))))))))))
    339427
    340428(define (with-string-index+offset-right proc s x . opt)
    341429  (if (equal? s "")
    342     #f
    343     (let-optionals* opt ((start 0) (end (string-length s)))
    344       (let* ((size (byte-string-length s))
    345              (pred (char-predicate x)))
    346         (let lp ((i (- end 1)) (off (utf8-index->offset s (- end 1))))
    347           (if (< i start)
    348             (proc #f #f)
    349             (let ((ch (sp-ref s off)))
    350               (if (pred ch)
    351                 (proc i off)
    352                 (if (zero? i)
    353                   (lp -1 -1)
    354                   (lp (- i 1) (utf8-prev-char s off)))))))))))
     430      #f
     431      (let-optionals* opt ((start 0) (end (utf8-string-length s)))
     432        (let* ((size (string-length s))
     433               (pred (char-predicate x)))
     434          (let lp ((i (- end 1)) (off (utf8-index->offset s (- end 1))))
     435            (if (< i start)
     436                (proc #f #f)
     437                (let ((ch (sp-ref s off)))
     438                  (if (pred ch)
     439                      (proc i off)
     440                      (if (zero? i)
     441                          (lp -1 -1)
     442                          (lp (- i 1) (utf8-prev-char s off)))))))))))
    355443
    356444(define (arg1 a b) a)
    357 (define (arg2 a b) b)
     445;;(define (arg2 a b) b)
    358446
    359447(define (string-index s x . opt)
    360448  (apply with-string-index+offset arg1 s x opt))
    361 (define (string-offset s x . opt)
    362   (apply with-string-index+offset arg2 s x opt))
     449;; (define (string-offset s x . opt)
     450;;   (apply with-string-index+offset arg2 s x opt))
    363451(define (string-index-right s x . opt)
    364452  (apply with-string-index+offset-right arg1 s x opt))
    365 (define (string-offset-right s x . opt)
    366   (apply with-string-index+offset-right arg2 s x opt))
     453;; (define (string-offset-right s x . opt)
     454;;   (apply with-string-index+offset-right arg2 s x opt))
    367455
    368456(define (string-skip s x . opt)
     
    375463    (apply string-fold (lambda (c sum) (if (pred c) (+ sum 1) sum)) 0 s opt)))
    376464
     465;; cleaner to loop ourselves, but the byte-oriented substring-index
     466;; uses memcmp directly, so we go out of our way to make use of that,
     467;; while avoiding substring if at all possible
    377468(define (string-contains s1 s2 . opt)
    378   (with-two-substring-offsets
    379    (lambda (s1 s2 start1 start2 end1 end2)
    380      (let ((res (byte-string-contains s1 s2 start1 start2 end1 end2)))
    381        (and res (utf8-offset->index s1 res))))
    382    s1 s2 opt))
    383 
     469  (define (return offset index)
     470    (and offset (+ (utf8-offset->index s1 offset) index)))
     471  (if (null? opt)
     472      (return (substring-index s2 s1) 0)
     473      (let* ((start1-index (car opt))
     474             (opt (cdr opt))
     475             (start1 (utf8-index->offset s1 start1-index)))
     476        (if (null? opt)
     477            (return (substring-index s2 s1 start1) 0)
     478            (let* ((end1 (utf8-index->offset s1 (car opt)))
     479                   (opt (cdr opt))
     480                   (s2 (if (null? opt)
     481                           s2
     482                           (with-substring-offsets ##sys#substring s2 opt))))
     483              (if (= end1 (string-length s1))
     484                  (return (substring-index s2 s1 start1) 0)
     485                  (return (substring-index
     486                           s2
     487                           (##sys#substring s1 start1 end1))
     488                          start1-index)))))))
     489
     490;; XXXX consider using full unicode case mappings
    384491(define (string-contains-ci s1 s2 . opt)
    385   (with-two-substring-offsets
    386    (lambda (s1 s2 start1 start2 end1 end2)
    387      (let ((res (byte-string-contains-ci s1 s2 start1 start2 end1 end2)))
    388        (and res (utf8-offset->index s1 res))))
    389    s1 s2 opt))
     492  (define (return offset index)
     493    (and offset (+ (utf8-offset->index s1 offset) index)))
     494  (if (null? opt)
     495      (return (substring-index s2 s1) 0)
     496      (let* ((start1-index (car opt))
     497             (opt (cdr opt))
     498             (start1 (utf8-index->offset s1 start1-index)))
     499        (if (null? opt)
     500            (return (substring-index-ci s2 s1 start1) 0)
     501            (let* ((end1 (utf8-index->offset s1 (car opt)))
     502                   (opt (cdr opt))
     503                   (s2 (if (null? opt)
     504                           s2
     505                           (with-substring-offsets ##sys#substring s2 opt))))
     506              (if (= end1 (string-length s1))
     507                  (return (substring-index-ci s2 s1 start1) 0)
     508                  (return (substring-index-ci
     509                           s2
     510                           (##sys#substring s1 start1 end1))
     511                          start1-index)))))))
    390512
    391513;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    392514;; case mapping
    393515
    394 (define (make-string1+offsets proc)
    395   (lambda (s . opt)
    396     (apply with-substring-offsets proc s opt)))
    397 
    398 (define string-titlecase (make-string1+offsets byte-string-titlecase))
    399 (define string-titlecase! (make-string1+offsets byte-string-titlecase!))
    400 (define string-downcase (make-string1+offsets byte-string-downcase))
    401 (define string-downcase! (make-string1+offsets byte-string-downcase!))
    402 (define string-upcase (make-string1+offsets byte-string-upcase))
    403 (define string-upcase! (make-string1+offsets byte-string-upcase!))
     516(define string-titlecase utf8-string-titlecase)
     517(define string-titlecase! utf8-string-titlecase)
     518(define string-downcase utf8-string-downcase)
     519(define string-downcase! utf8-string-downcase)
     520(define string-upcase utf8-string-upcase)
     521(define string-upcase! utf8-string-upcase)
    404522
    405523;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     
    413531      (let ((i2 (utf8-prev-char s i)))
    414532        (if (not i2)
    415           res
    416           (let lp2 ((i3 i2) (j j))
    417             (if (eqv? i3 i)
    418               (lp i2 j)
    419               (begin
    420                 (byte-string-set! res j (byte-string-ref s i3))
    421                 (lp2 (+ i3 1) (+ j 1))))))))))
     533            res
     534            (let lp2 ((i3 i2) (j j))
     535              (if (eqv? i3 i)
     536                  (lp i2 j)
     537                  (begin
     538                    (string-set! res j (string-ref s i3))
     539                    (lp2 (+ i3 1) (+ j 1))))))))))
    422540
    423541(define (string-reverse s . opt)
    424   (apply with-substring-offsets %string-reverse s opt))
     542  (with-substring-offsets %string-reverse s opt))
    425543
    426544(define (string-reverse! s . opt)
    427   (apply
    428    with-substring-offsets
     545  (with-substring-offsets
    429546   (lambda (s start end)
    430547     (let ((s2 (%string-reverse s start end)))
    431        (byte-string-copy! s start s2)))
     548       (byte-string-copy! s start s2 0 (string-length s2))))
    432549   s opt))
    433550
    434 (define string-append/shared byte-string-append/shared)
    435 (define string-concatenate byte-string-concatenate)
    436 (define string-concatenate-reverse byte-string-concatenate-reverse)
    437 (define string-concatenate-reverse/shared byte-string-concatenate-reverse/shared)
    438 (define string-concatenate/shared byte-string-concatenate/shared)
     551(define string-append/shared string-append)
     552(define (string-concatenate ls) (string-intersperse ls ""))
     553(define (string-concatenate-reverse ls) (string-intersperse (reverse ls) ""))
     554(define string-concatenate-reverse/shared string-concatenate-reverse)
     555(define string-concatenate/shared string-concatenate)
    439556
    440557(define (xsubstring s1 from . opt)
    441   (let-optionals* opt ((to1 #f) (start 0) (end (string-length s1)))
    442     (let* ((s (substring/shared s1 start end))
     558  (let-optionals* opt ((to1 #f) (start 0) (end (utf8-string-length s1)))
     559    (let* ((s (substring s1 start end))
    443560           (len (- end start))
    444            (to (or to1 (+ from len))))
    445       (let lp ((i from) (res '()))
    446         (if (= i to)
    447           (reverse-list->string res)
    448           (lp (+ i 1) (cons (string-ref s (modulo i len)) res)))))))
     561           (to (or to1 (+ from len)))
     562           (out (open-output-string)))
     563      (let lp ((i from))
     564        (cond
     565          ((< i to)
     566           (write-utf8-char (utf8-string-ref s (modulo i len)) out)
     567           (lp (+ i 1)))))
     568      (get-output-string out))))
    449569
    450570(define (string-xcopy target tstart s from . opt)
    451   (let-optionals* opt ((to1 #f) (start 0) (end (string-length s)))
     571  (let-optionals* opt ((to1 #f) (start 0) (end (utf8-string-length s)))
    452572    (let ((to (or to1 (+ from - end start))))
    453       (string-append (substring/shared target 0 tstart)
     573      (string-append (substring target 0 tstart)
    454574                     (xsubstring s from to start end)
    455                      (substring/shared target (+ tstart (- to from)))))))
     575                     (substring target
     576                                (+ tstart (- to from))
     577                                (utf8-string-length target))))))
    456578
    457579(define (string-xcopy! target tstart s from . opt)
     
    465587        (string-for-each
    466588         (lambda (c) (if (pred c) (display c)))
    467          (apply substring/shared s (if (pair? opt) opt '(0))))))))
     589         (if (pair? opt)
     590             (apply substring s opt)
     591             s))))))
    468592
    469593(define (string-delete s filt . opt)
     
    474598  (let ((start1 (utf8-index->offset s1 start1))
    475599        (end1 (utf8-index->offset s1 end1)))
    476     (apply
    477      with-substring-offsets
     600    (with-substring-offsets
    478601     (lambda (s2 start2 end2)
    479        (string-append (byte-substring/shared s1 0 start1)
    480                       (byte-substring/shared s2 start2 end2)
    481                       (byte-substring/shared s1 end1 (byte-string-length s1))))
     602       (string-append (##sys#substring s1 0 start1)
     603                      (##sys#substring s2 start2 end2)
     604                      (##sys#substring s1 end1 (string-length s1))))
    482605     s2 opt)))
    483606
     
    485608  (let-optionals* opt ((token-set char-set:graphic)
    486609                       o2)
    487     (apply
    488      with-substring-offsets
     610    (with-substring-offsets
    489611     (lambda (s start end)
    490612       (letrec
     
    510632     s o2)))
    511633
    512 ) ; utf8-srfi-13
  • release/3/utf8/utf8-srfi-14.scm

    r4332 r9608  
    11;;;; utf8-srfi-14.scm -- Unicode capable char-sets
    22;;
    3 ;; Copyright (c) 2004-2005 Alex Shinn.  All rights reserved.
     3;; Copyright (c) 2004-2008 Alex Shinn.  All rights reserved.
    44;; BSD-style license: http://synthcode.com/license.txt
    55
     
    88;;
    99;;   (require-extension utf8-srfi-14)
    10 ;;   (module ()
    11 ;;     (import utf8-srfi-14)
    12 ;;
    13 ;;     ... ; unicode-capable SRFI-14
    14 ;;
    15 ;;     )
    1610;;
    1711;; This module provides an alternative to the standard Chicken SRFI-14.
     
    2216;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    2317
    24 (require-extension utf8-lolevel)
    25 (require-extension iset)
    26 
    27 (declare (fixnum) ) ; no chars above 2^21
    28 
    29 (module utf8-srfi-14
    30   (
     18(require-extension utf8-lolevel iset)
     19
     20(register-feature! 'srfi-14)
     21
     22(declare (fixnum)) ; no chars above 2^21
     23
     24(declare
     25 (export
    3126  ;; srfi-14
    3227  char-set char-set? char-set-copy char-set-hash
     
    5045  char-set:hex-digit char-set:blank char-set:ascii
    5146  char-set:empty char-set:full
    52   )
     47  ))
    5348
    5449(define char-set? iset?)
    5550(define char-set-copy iset-copy)
    5651
    57 (define (make-null-boundary proc default)
    58   (lambda args (if (null? args) default (apply proc args))))
    59 
    6052(define char-set-union iset-union)
    6153(define char-set-difference iset-difference)
    62 (define char-set-intersection
    63   (make-null-boundary iset-intersection char-set:full))
     54(define (char-set-intersection . args)
     55  (if (null? args)
     56      char-set:full
     57      (apply iset-intersection args)))
    6458(define char-set-xor iset-xor)
    6559(define char-set-diff+intersection iset-diff+intersection)
     
    7064(define char-set-union! iset-union!)
    7165(define char-set-difference! iset-difference!)
    72 (define char-set-intersection!
    73   (make-null-boundary iset-intersection! char-set:full))
     66(define (char-set-intersection! . args)
     67  (if (null? args)
     68      char-set:full
     69      (apply iset-intersection! args)))
    7470(define char-set-xor! iset-xor!)
    7571(define char-set-diff+intersection! iset-diff+intersection!)
     
    10298
    10399(define (char-set->string cs)
    104   (list->string (char-set->list cs)))
     100  (let ((out (open-output-string)))
     101    (char-set-for-each (lambda (ch) (write-utf8-char ch out)) cs)
     102    (get-output-string out)))
    105103
    106104(define char-set= iset=)
     
    171169
    172170(define (ucs-range->char-set! lo hi error? base-cs)
    173   (iset-union! base-cs (make-iset lo hi)))
     171  (iset-union! base-cs (make-iset lo (- hi 1))))
    174172
    175173(define (ucs-range->char-set lo hi . opt)
    176174  (let-optionals* opt ((error? #f) (base-is #f))
    177175    (if base-is
    178       (iset-union base-is (make-iset lo hi))
    179       (make-iset lo hi))))
     176      (iset-union base-is (make-iset lo (- hi 1)))
     177      (make-iset lo (- hi 1)))))
    180178
    181179;; cursors
     
    207205(define char-set:ascii #f)
    208206(define char-set:empty #f)
    209 (define char-set:full #f)
     207(define char-set:full (make-iset 0 (- (expt 2 21) 1)))
    210208
    211209(set! char-set:lower-case (make-iset 97 122))
     
    232230(set! char-set:ascii (make-iset 0 127))
    233231(set! char-set:empty (make-iset))
    234 (set! char-set:full (make-iset 0 (- (expt 2 21) 1)))
    235 
    236 ) ; utf8-srfi-14
  • release/3/utf8/utf8.html

    r7852 r9608  
    4040<h3>Version:</h3>
    4141<ul>
     42<li>2.0 No longer using syntax-case modules, relying on default integrations to provide separation of core and unicode procedures.</li>
    4243<li>1.14 Split modules into separate extensions. [Kon Lovett]</li>
    4344<li>1.13 Removed read/write-byte definition. [Kon Lovett]</li>
     
    6869<pre>
    6970  (require-extension utf8)
    70   (module mymodule ()
    71     (import utf8)
    72 
    73     ... ; unicode-aware code
    74 
    75     )
    7671</pre>
    7772
     
    9085Currently all existing eggs are UTF-8 safe to my knowledge.
    9186
    92 <p>Alternately, you may import utf8 at the top-level:
    93 
    94 <pre>
    95   ; require modules using byte-semantics
    96   ...
    97   (require-extension utf8)
    98   (import utf8)
    99   ...
    100   ; require modules using utf8-semantics
    101   ...
    102   ; unicode-aware code
    103 </pre>
    104 
    105 <p>By importing directly into the top-level, any subsequently loaded
    106 code will also use Unicode-aware semantics, even if it was not
    107 written with Unicode in mind.  This is more powerful but slightly
    108 less safe, since third party units may make assumptions about
    109 character ranges or string size.
     87<p>To make a compiled library optionally Unicode aware, so that it
     88will honor utf8 semantics if and only if the utf8 egg has been loaded
     89into the top-level or by some other extension, you need to
     90
     91<pre>
     92  (declare (not usual-integrations))
     93</pre>
    11094
    11195<p>To use Unicode-aware SRFI-13 and SRFI-14 using UTF-8 semantics:
     
    11397<pre>
    11498  (require-extension utf8-srfi-13)
    115   (module ()
    116     (import utf8-srfi-13)
    117 
    118     ... ; unicode-aware SRFI-13
    119     )
    120 
    12199  (require-extension utf8-srfi-14)
    122   (module ()
    123     (import utf8-srfi-14)
    124 
    125      ... ; unicode-capable SRFI-14
    126     )
     100</pre>
     101
     102<p>The utf8 egg provides a <code>'utf8-strings</code> feature which
     103can be used to conditionally support such utf8 extensions:
     104
     105<pre>
     106  (cond-expand
     107    (utf8-strings (use utf8-srfi-13 utf8-srfi-14))
     108    (else (use srfi-13 srfi-24)))
    127109</pre>
    128110
     
    136118<p>The default SRFI-14 char-sets are defined using ASCII-only
    137119characters, since this is both useful and lighter-weight.  To obtain
    138 full Unicode char-set definitions, use the char-set unit:
    139 
    140 <pre>
    141   (require-extension char-set)
    142 </pre>
     120full Unicode char-set definitions, use the unicode-char-sets unit:
     121
     122<pre>
     123  (require-extension unicode-char-sets)
     124</pre>
     125
     126<p>[Note this is the only extension in this egg with a
     127<code>unicode-</code> prefix, because the char-set handling only
     128depends on individual characters and is independent of the character
     129encoding used in strings.]
    143130
    144131<p>The following char-sets are provided based on the Unicode properties:
     
    242229
    243230<pre>
    244   (require-extension case-map)
     231  (require-extension utf8-case-map)
    245232</pre>
    246233
     
    280267library that directly uses the old srfi-14).
    281268
     269<p>Attempting to mutate literal strings will result in an error if the
     270mutated size does not occupy the same number of bytes as the original.
     271This is standards compliant, since the programmer is not supposed to
     272attempt to mutate literal values, but it may be a little confusing
     273since the error is inconsistent.
    282274
    283275<p>PERFORMANCE
     
    353345
    354346<pre>
    355 Copyright (c) 2004-2005, Alex Shinn
     347Copyright (c) 2004-2008, Alex Shinn
    356348All rights reserved.
    357349
  • release/3/utf8/utf8.meta

    r1 r9608  
    44 (synopsis "Unicode support")
    55 (category parsing)
    6  (needs iset syntax-case)
     6 (needs iset)
    77 (license "BSD")
    88 (author "Alex Shinn")
     
    1010        "case-map-1.dat"
    1111        "case-map-2.dat"
    12         "case-map.scm"
    13         "char-set.scm"
    14         "byte-string-srfi-13.scm"
     12        "utf8-case-map.scm"
     13        "unicode-char-sets.scm"
    1514        "utf8-srfi-13.scm"
    1615        "utf8-srfi-14.scm"
    1716        "string-pointer.scm"
    1817        "utf8-lolevel.scm"
     18        "utf8-support.scm"
    1919        "utf8.scm"
    2020        "utf8.setup"
  • release/3/utf8/utf8.scm

    r4586 r9608  
    11;;;; utf8.scm -- Unicode support for Chicken
    22;;
    3 ;; Copyright (c) 2004-2005 Alex Shinn. All rights reserved.
     3;; Copyright (c) 2004-2008 Alex Shinn. All rights reserved.
    44;; BSD-style license: http://synthcode.com/license.txt
    55
    6 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    7 ;; USAGE
    8 ;;
    9 ;; To make your code Unicode aware, just do the following:
    10 ;;
    11 ;;   (require-extension utf8)
    12 ;;   (module mymodule ()
    13 ;;     (import utf8)
    14 ;;
    15 ;;     ... ; unicode-aware code
    16 ;;
    17 ;;     )
    18 ;;
    19 ;; then all core, extra, regex and SRFI-13 string operations will be
    20 ;; Unicode aware.  string-length will return the number of codepoints,
    21 ;; not the number of bytes, string-ref will index by codepoints and
    22 ;; return a char with an integer value up to 2^21, regular expressions
    23 ;; will match single codepoints rather than bytes and understand Unicode
    24 ;; character classes, etc.
    25 ;;
    26 ;; Strings are still native strings and may be passed to external
    27 ;; libraries (either Scheme or foreign) perfectly safely.  Libraries
    28 ;; that do parsing invariably do so on ASCII character boundaries and
    29 ;; are thus guaranteed to be compatible.  Libraries that reference
    30 ;; strings by index would need to be modified with a UTF-8 version.
    31 ;; Currently all existing eggs are UTF-8 safe to my knowledge.
    32 ;;
    33 ;; Alternately, you may import utf8 at the top-level:
    34 ;;
    35 ;;   ; require modules using byte-semantics
    36 ;;   ...
    37 ;;   (require-extension utf8)
    38 ;;   (import utf8)
    39 ;;   ...
    40 ;;   ; require modules using utf8-semantics
    41 ;;   ...
    42 ;;   ; unicode-aware code
    43 ;;
    44 ;; By importing directly into the top-level, any subsequently loaded
    45 ;; code will also use Unicode-aware semantics, even if it was not
    46 ;; written with Unicode in mind.  This is more powerful but slightly
    47 ;; less safe, since third party units may make assumptions about
    48 ;; character ranges or string size.
    49 ;;
    50 ;;
    51 ;; UNICODE CHAR-SETS
    52 ;;
    53 ;; The default SRFI-14 char-sets are defined using ASCII-only
    54 ;; characters, since this is both useful and lighter-weight.  To obtain
    55 ;; full Unicode char-set definitions, use the char-set unit:
    56 ;;
    57 ;;   (require-extension char-set)
    58 ;;
    59 ;; The following char-sets are provided based on the Unicode properties:
    60 ;;
    61 ;;   char-set:alphabetic
    62 ;;   char-set:arabic
    63 ;;   char-set:armenian
    64 ;;   char-set:ascii-hex-digit
    65 ;;   char-set:bengali
    66 ;;   char-set:bidi-control
    67 ;;   char-set:bopomofo
    68 ;;   char-set:braille
    69 ;;   char-set:buhid
    70 ;;   char-set:canadian-aboriginal
    71 ;;   char-set:cherokee
    72 ;;   char-set:common
    73 ;;   char-set:cypriot
    74 ;;   char-set:cyrillic
    75 ;;   char-set:dash
    76 ;;   char-set:default-ignorable-code-point
    77 ;;   char-set:deprecated
    78 ;;   char-set:deseret
    79 ;;   char-set:devanagari
    80 ;;   char-set:diacritic
    81 ;;   char-set:ethiopic
    82 ;;   char-set:extender
    83 ;;   char-set:georgian
    84 ;;   char-set:gothic
    85 ;;   char-set:grapheme-base
    86 ;;   char-set:grapheme-extend
    87 ;;   char-set:grapheme-link
    88 ;;   char-set:greek
    89 ;;   char-set:gujarati
    90 ;;   char-set:gurmukhi
    91 ;;   char-set:han
    92 ;;   char-set:hangul
    93 ;;   char-set:hanunoo
    94 ;;   char-set:hebrew
    95 ;;   char-set:hex-digit
    96 ;;   char-set:hiragana
    97 ;;   char-set:hyphen
    98 ;;   char-set:id-continue
    99 ;;   char-set:id-start
    100 ;;   char-set:ideographic
    101 ;;   char-set:ids-binary-operator
    102 ;;   char-set:ids-trinary-operator
    103 ;;   char-set:inherited
    104 ;;   char-set:join-control
    105 ;;   char-set:kannada
    106 ;;   char-set:katakana
    107 ;;   char-set:katakana-or-hiragana
    108 ;;   char-set:khmer
    109 ;;   char-set:lao
    110 ;;   char-set:latin
    111 ;;   char-set:limbu
    112 ;;   char-set:linear-b
    113 ;;   char-set:logical-order-exception
    114 ;;   char-set:lowercase
    115 ;;   char-set:malayalam
    116 ;;   char-set:math
    117 ;;   char-set:mongolian
    118 ;;   char-set:myanmar
    119 ;;   char-set:noncharacter-code-point
    120 ;;   char-set:ogham
    121 ;;   char-set:old-italic
    122 ;;   char-set:oriya
    123 ;;   char-set:osmanya
    124 ;;   char-set:quotation-mark
    125 ;;   char-set:radical
    126 ;;   char-set:runic
    127 ;;   char-set:shavian
    128 ;;   char-set:sinhala
    129 ;;   char-set:soft-dotted
    130 ;;   char-set:sterm
    131 ;;   char-set:syriac
    132 ;;   char-set:tagalog
    133 ;;   char-set:tagbanwa
    134 ;;   char-set:tai-le
    135 ;;   char-set:tamil
    136 ;;   char-set:telugu
    137 ;;   char-set:terminal-punctuation
    138 ;;   char-set:thaana
    139 ;;   char-set:thai
    140 ;;   char-set:tibetan
    141 ;;   char-set:ugaritic
    142 ;;   char-set:unified-ideograph
    143 ;;   char-set:uppercase
    144 ;;   char-set:variation-selector
    145 ;;   char-set:white-space
    146 ;;   char-set:xid-continue
    147 ;;   char-set:xid-start
    148 ;;   char-set:yi
    149 ;;
    150 ;;
    151 ;; BYTE-STRINGS
    152 ;;
    153 ;; Sometimes you may need access to the original string primitives so
    154 ;; you can directly access bytes, such as if you were implementing your
    155 ;; own regex library or text buffer and wanted optimal performance.  For
    156 ;; these cases we have renamed the original primitives by replacing
    157 ;; "string" with "byte-string".  Thus byte-string-length is the length
    158 ;; in bytes, not characters, of the strings (the equivalent of Gauche's
    159 ;; string-size).  byte-string-set! can corrupt the UTF-8 encoding and
    160 ;; should be used sparingly if at all.
    161 ;;
    162 ;;
    163 ;; LOW LEVEL API
    164 ;;
    165 ;; Direct manipulation of the utf8 encoding is factored away in the
    166 ;; utf8-lolevel unit.  This includes an abstract string-pointer API, and
    167 ;; an analogous string-pointer implementation for ASCII strings in the
    168 ;; string-pointer unit, however as the API is not fixed you use these at
    169 ;; your own risk.
    170 ;;
    171 ;; LIMITATIONS
    172 ;;
    173 ;; peek-char currently does not have Unicode semantics (i.e. it peeks
    174 ;; only a single byte) to avoid problems with port buffering.
    175 ;;
    176 ;; char-sets are not interchangeable between the existing srfi-14 code
    177 ;; and Unicode code (i.e. do not pass a Unicode char-set to an external
    178 ;; library that directly uses the old srfi-14).
    179 ;;
    180 ;;
    181 ;; PERFORMANCE
    182 ;;
    183 ;; string-length, string-ref and string-set! are all O(n) operations as
    184 ;; opposed to the usual O(1) since UTF-8 is a variable width encoding.
    185 ;; Use of these should be discouraged - it is much cleaner to use the
    186 ;; high-level SRFI-13 procedures and string ports.  For examples of how
    187 ;; to do common idioms without these procedures look at any string-based
    188 ;; code in Gauche.
    189 ;;
    190 ;; Furthermore, string-set! and other procedures that modify strings in
    191 ;; place may invoke gc if the mutated result does not fit within the
    192 ;; same UTF-8 encoding size as the original string.  If only mutating
    193 ;; 7-bit ASCII strings (or only mutating within fixed encoding sizes
    194 ;; such as Cyrillic->Cyrillic) then no gc will occur.
    195 ;;
    196 ;; string?, string=?, string-append, all R5RS string comparisons, and
    197 ;; read-line are unmodified.
    198 ;;
    199 ;; Regular expression matching will be just as fast except in the case
    200 ;; of Unicode character classes (which were not possible before anyway).
    201 ;;
    202 ;; All other procedures incur zero to minor overhead, but keep the same
    203 ;; asymptotic performance.
    204 ;;
    205 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    206 
    207 (require-extension regex)
    208 (require-extension utf8-lolevel byte-string)
    209 
    210 (declare (fixnum) ) ; no chars above 2^21
    211 
    212 (register-feature! 'utf8-strings)
    213 
    214 (module utf8
    215   (
    216   ;; R5RS
    217   string-length string-ref string-set! make-string string substring
    218   string->list list->string string-fill! write-char read-char display
    219   ;; R5RS, should maybe be changed in Chicken core
    220   char-alphabetic? char-numeric? char-whitespace?
    221   char-upper-case? char-lower-case? char-upcase char-downcase
    222   ;; library
    223   reverse-list->string print print*
    224   ;; extras
    225   read-string write-string read-token ->string conc string-chop string-split
    226   string-translate substring=? substring-ci=? substring-index substring-index-ci
    227   ;; regexp
    228   grep regexp string-substitute string-substitute* string-split-fields
    229   string-match string-match-positions string-match-offsets
    230   string-search string-search-positions string-search-offsets
    231   ;; new
    232   string-set
    233   )
    234 
    235 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    236 ;; redefine char primitives
    237 
    238 (define (make-small-char-predicate pred)
    239   (lambda (c) (and (< (char->integer c) 128) (pred c))))
    240 
    241 (define char-alphabetic? (make-small-char-predicate small-char-alphabetic?))
    242 (define char-numeric? (make-small-char-predicate small-char-numeric?))
    243 (define char-whitespace? (make-small-char-predicate small-char-whitespace?))
    244 (define char-upper-case? (make-small-char-predicate small-char-upper-case?))
    245 (define char-lower-case? (make-small-char-predicate small-char-lower-case?))
    246 
    247 (define (char-upcase c)
    248   (if (< (char->integer c) 128)
    249     (small-char-upcase c)
    250     c))
    251 
    252 (define (char-downcase c)
    253   (if (< (char->integer c) 128)
    254     (small-char-downcase c)
    255     c))
    256 
    257 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    258 ;; redefine string primitives
    259 
    260 (define string-length utf8-string-length)
    261 
    262 (define char->string char->utf8-string)
    263 
    264 (define (string . args)
    265   (list->string args))
    266 
    267 (define (substring s start . opt)
    268   (apply with-substring-offsets byte-substring s start opt))
    269 
    270 (define (make-string len . opt)
    271   (if (pair? opt)
    272     (let* ((c (car opt))
    273            (c-i (char->integer c))
    274            (c-len (ucs-integer->length c-i)))
    275       (if (<= c-len 1)
    276         (make-byte-string len c)
    277         (let* ((size (* len c-len))
    278                (res (make-byte-string size)))
    279           (let loop ((i 0))
    280             (if (>= i size)
    281               res
    282               (begin
    283                 (string-set-at-byte-in-place! res size c-len i c-i)
    284                 (loop (+ i c-len))))))))
    285     (make-byte-string len)))
    286 
    287 (define string->list utf8-string->list)
    288 
    289 (define (list->string ls)
    290   (string-intersperse (map char->string ls) ""))
    291 
    292 (define (string-fill! str c)
    293   (let* ((size (byte-string-length str))
    294          (len (string-length str))
    295          (c-i (char->integer c))
    296          (c-len (ucs-integer->length c-i))
    297          (needed (* c-len len)))
    298     (if (= needed size)
    299       (let ((c-str (char->string c)))
    300         (do ((i 0 (+ i c-len)))
    301             ((= i size) str)
    302           (string-set-at-byte-in-place! str size len i c-i)))
    303       (let ((res (make-string len c)))
    304         (##sys#become! (list (cons str res)))))))
    305 
    306 (define string-ref utf8-string-ref)
    307 
    308 (define string-set! utf8-string-set!)
    309 
    310 (define (string-set s pos val)
    311   (let* ((size (byte-string-length s))
    312          (byte (utf8-index->offset s pos))
    313          (c (string-int-ref s byte))
    314          (c-len (utf8-start-byte->length c)))
    315     (string-set-at-byte s size byte c-len val)))
    316 
    317 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    318 ;; Basic I/O
    319 
    320 (define write-char write-utf8-char)
    321 
    322 (define read-char read-utf8-char)
    323 
    324 (define (display x . opt)
    325   (apply byte-display (if (char? x) (char->string x) x) opt))
    326 
    327 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    328 ;; library
    329 
    330 (define (reverse-list->string ls)
    331   (list->string (reverse ls)))
    332 
    333 (define (print . opt)
    334   (apply byte-print (map (lambda (x) (if (char? x) (char->string x) x)) opt)))
    335 
    336 (define (print* . opt)
    337   (apply byte-print* (map (lambda (x) (if (char? x) (char->string x) x)) opt)))
    338 
    339 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    340 ;; I/O extras
    341 
    342 ;; this could be optimized by reading bytes while counting characters,
    343 ;; instead of counting characters
    344 
    345 (define (read-string . opt)
    346   (let-optionals* opt ((num #f) (in (current-input-port)))
    347     (if num
    348       (let loop ((i 0) (acc '()))
    349         (if (>= i num)
    350           (list->string (reverse acc))
    351           (let ((ch (read-char in)))
    352             (if (eof-object? ch)
    353               (loop num acc)
    354               (loop (+ i 1) (cons ch acc))))))
    355       (read-byte-string num in))))
    356 
    357 (define (write-string str . opt)
    358   (let-optionals* opt ((num #f) (out (current-output-port)))
    359     (if (and num (< num (string-length str)))
    360       (byte-display (substring str 0 num) out)
    361       (byte-display str out))))
    362 
    363 (define (read-token pred . opt)
    364   (let ((in (if (pair? opt) (car opt) (current-input-port))))
    365     (let loop ((acc '()))
    366       (let ((ch (read-char in)))
    367         (if (or (eof-object? ch) (not (pred ch)))
    368           (list->string (reverse acc))
    369           (loop (cons ch acc)))))))
    370 
    371 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    372 ;; string extras
    373 
    374 (define (->string x)
    375   (if (char? x) (char->string x) (->byte-string x)))
    376 
    377 (define (conc . args)
    378   (apply string-append (map ->string args)))
    379 
    380 (define (string-chop str len)
    381   (let ((size (byte-string-length str)))
    382     (let loop ((i 0) (from 0) (off 0) (acc '()))
    383       (cond
    384         ((>= off size)
    385          (if (> off from)
    386            (reverse (cons (byte-substring str from off) acc))
    387            (reverse acc)))
    388         ((= i len)
    389          (loop 0 off off (cons (byte-substring str from off) acc)))
    390         (else
    391          (loop (+ i 1)
    392                from
    393                (+ off (utf8-start-byte->length (string-int-ref str off)))
    394                acc))))))
    395 
    396 (define (string-split str . opt)
    397   (let-optionals* opt ((delim #f) (keep-empty? #f))
    398     (if (or (not delim) (ascii-string? delim))
    399       (byte-string-split str (or delim " \t\n") keep-empty?)
    400       (let ((delims (string->list delim))
    401             (join (if keep-empty?
    402                     (lambda (cur acc)
    403                       (cons (list->string (reverse cur)) acc))
    404                     (lambda (cur acc)
    405                       (if (null? cur)
    406                         acc
    407                         (cons (list->string (reverse cur)) acc))))))
    408         (let loop ((ls (string->list str)) (cur '()) (acc '()))
    409           (cond
    410             ((null? ls)
    411              (reverse (join cur acc)))
    412             ((memv (car ls) delims)
    413              (loop (cdr ls) '() (join cur acc)))
    414             (else
    415              (loop (cdr ls) (cons (car ls) cur) acc))))))))
    416 
    417 (define (string->vector str)
    418   (list->vector (string->list str)))
    419 
    420 (define (vector-memv x vec)
    421   (let ((len (vector-length vec)))
    422     (let loop ((i 0))
    423       (cond ((= i len) #f)
    424             ((eqv? x (vector-ref vec i)) i)
    425             (else (loop (+ i 1)))))))
    426 
    427 (define (string-translate str from . opt)
    428   (if (and (ascii-string? from)
    429            (or (null? opt) (ascii-string? (car opt))))
    430     (apply byte-string-translate str from opt)
    431     (let* ((from-vec (string->vector from)))
    432       (let ((to-vec (and (pair? opt) (string->vector (car opt)))))
    433         (with-output-to-string
    434           (lambda ()
    435             (let ((end (sp-last str)))
    436               (let lp ((i (sp-first str)))
    437                 (when (< i end)
    438                   (let ((c (sp-ref str i)))
    439                     (display
    440                      (cond ((vector-memv c from-vec)
    441                             => (lambda (i) (vector-ref to-vec i)))
    442                            (else c)))
    443                     (lp (sp-next str i))))))))))))
    444 
    445 (define (substring=? s1 s2 . opt)
    446   (let ((s1-len (string-length s1)) (s2-len (string-length s2)))
    447     (let-optionals* opt ((start1 0)
    448                          (start2 0)
    449                          (len (min (- s1-len start1) (- s2-len start2))))
    450       (with-substring-offsets
    451        (lambda (s1 s1-start s1-end)
    452          (with-substring-offsets
    453           (lambda (s2 s2-start s2-end)
    454             (byte-substring=? s1 s2 s1-start s2-start (- s1-end s1-start)))
    455           s2 start2 (+ start2 len)))
    456        s1 start1 (+ start1 len)))))
    457 
    458 (define (substring-ci=? s1 s2 . opt)
    459   (let ((s1-len (string-length s1)) (s2-len (string-length s2)))
    460     (let-optionals* opt ((start1 0)
    461                          (start2 0)
    462                          (len (min (- s1-len start1) (- s2-len start2))))
    463       (with-substring-offsets
    464        (lambda (s1 s1-start s1-end)
    465          (with-substring-offsets
    466           (lambda (s2 s2-start s2-end)
    467             (byte-substring-ci=? s1 s2 s1-start s2-start (- s1-end s1-start)))
    468           s2 start2 (+ start2 len)))
    469        s1 start1 (+ start1 len)))))
    470 
    471 (define (substring-index which where . opt)
    472   (let* ((start (if (pair? opt) (utf8-index->offset where (car opt)) 0))
    473          (res (byte-substring-index which where start)))
    474     (and res (utf8-offset->index where res))))
    475 
    476 (define (substring-index-ci which where . opt)
    477   (let* ((start (if (pair? opt) (utf8-index->offset where (car opt)) 0))
    478          (res (byte-substring-index-ci which where start)))
    479     (and res (utf8-offset->index where res))))
    480 
    481 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    482 ;; regexps always enable utf8 unless the 4th arg is explicitly provided
    483 
    484 (define (regexp str . opt)
    485   (let-optionals* opt ((icase? #f) (ispace? #f) (utf8? #t))
    486     (byte-regexp str icase? ispace? utf8?)))
    487 
    488 (define (->rx x)
    489   (if (regexp? x) x (regexp x)))
    490 
    491 (define (opt-off s opt)
    492   (if (pair? opt) (utf8-index->offset s (car opt)) 0))
    493 
    494 (define (grep rx ls)
    495   (byte-grep (->rx rx) ls))
    496 
    497 (define (string-match rx str . opt)
    498   (byte-string-match (->rx rx) str (opt-off str opt)))
    499 
    500 (define (string-match-offsets rx str . opt)
    501   (byte-string-match-positions (->rx rx) str (opt-off str opt)))
    502 
    503 (define (string-search rx str . opt)
    504   (let* ((start (opt-off str opt))
    505          (range (if (and (pair? opt) (pair? (cdr opt)))
    506                     (opt-off str (cdr opt))
    507                     (- (byte-string-length str) start))))
    508     (byte-string-search (->rx str) str start range)))
    509 
    510 (define (string-search-offsets rx str . opt)
    511   (let* ((start (opt-off str opt))
    512          (range (if (and (pair? opt) (pair? (cdr opt)))
    513                     (opt-off str (cdr opt))
    514                     (- (byte-string-length str) start))))
    515     (byte-string-search-positions (->rx str) str start range)))
    516 
    517 (define (string-split-fields rx str . opt)
    518   (let-optionals* opt ((mode #t) o2)
    519     (let ((start (opt-off str o2)))
    520       (byte-string-split-fields (->rx rx) str mode start))))
    521 
    522 (define (string-substitute rx subst str . opt)
    523   (apply byte-string-substitute (->rx rx) subst str opt))
    524 
    525 (define (string-substitute* str smap)
    526   (byte-string-substitute*
    527    str (map (lambda (x) (cons (->rx (car x)) (cdr x))) smap)))
    528 
    529 ;; these could be a lot faster, but you don't want to be working with
    530 ;; positions anyway
    531 
    532 (define (string-match-positions rx str . opt)
    533   (let* ((size (byte-string-length str))
    534          (->pos (lambda (o) (utf8-offset->index str opt))))
    535     (map (lambda (x) (if (pair? x) (map ->pos x) x))
    536          (apply string-match-offsets rx str opt))))
    537 
    538 (define (string-search-positions rx str . opt)
    539   (let* ((size (byte-string-length str))
    540          (->pos (lambda (o) (utf8-offset->index str opt))))
    541     (map (lambda (x) (if (pair? x) (map ->pos x) x))
    542          (apply string-search-offsets rx str opt))))
    543 
    544 ) ; utf8
     6(cond-expand
     7 (compiling
     8  (declare
     9   (not
     10    usual-integrations
     11    ;; core
     12    string-length string-ref string-set!
     13    ;;
     14    char-alphabetic? char-numeric? char-whitespace? char-upper-case?
     15    char-lower-case? char-upcase char-downcase
     16    ;;
     17    make-string string display string-fill!
     18    ;;
     19    string->list list->string write-char read-char substring
     20    ;; library
     21    reverse-list->string print print*
     22    ;; extras
     23    read-token ->string conc string-chop string-split
     24    string-translate write-string
     25    ;;
     26    read-string substring=? substring-ci=?
     27    substring-index substring-index-ci
     28    ;;    ;; regex
     29    ;;    grep regexp string-substitute string-substitute*
     30    ;;    string-split-fields string-match string-match-positions
     31    ;;    string-match-offsets string-search string-search-positions
     32    ;;    string-search-offsets
     33    )))
     34 (else
     35  ))
  • release/3/utf8/utf8.setup

    r9509 r9608  
    66(install-extension 'byte-string
    77 '("byte-string.so")
    8  `((version 1.14)
     8 `((version 2.0)
    99   ,@(if has-exports? `((exports "byte-string.exports")) '()) ) )
    1010
     
    1414(install-extension 'utf8-lolevel
    1515 '("utf8-lolevel.so")
    16  `((version 1.14)
     16 `((version 2.0)
    1717   ,@(if has-exports? `((exports "utf8-lolevel.exports")) '()) ) )
    1818
    19 (compile -s -O2 -d1 -R syntax-case
     19(compile -s -O2 -d1
    2020  ,@(if has-exports? '(-check-imports -emit-exports utf8.exports) '())
    21   utf8.scm)
     21  utf8-support.scm)
    2222(install-extension 'utf8
    2323 '("utf8.scm"
    24    "utf8.so"
     24   "utf8-support.so"
    2525   "utf8.html")
    26  `((version 1.14)
     26 `((version 2.0)
    2727   ,@(if has-exports? `((exports "utf8.exports")) '())
     28   (syntax)
     29   (require-at-runtime utf8-support)
    2830   (documentation "utf8.html") ) )
    2931
    30 (compile -s -O2 -d1 -R syntax-case
     32(compile -s -O2 -d1
    3133  ,@(if has-exports? '(-check-imports -emit-exports utf8-srfi-14.exports) '())
    3234  utf8-srfi-14.scm)
     
    3436 '("utf8-srfi-14.scm"
    3537   "utf8-srfi-14.so")
    36  `((version 1.14)
     38 `((version 2.0)
    3739   ,@(if has-exports? `((exports "utf8-srfi-14.exports")) '())
    3840   (documentation "utf8.html") ) )
     
    4345(install-extension 'byte-string-srfi-13
    4446 '("byte-string-srfi-13.so")
    45  `((version 1.14)
     47 `((version 2.0)
    4648   ,@(if has-exports? `((exports "byte-string-srfi-13.exports")) '())
    4749   (documentation "utf8.html") ) )
    4850
    49 (compile -s -O2 -d1 -R syntax-case
     51(compile -s -O2 -d1
    5052  ,@(if has-exports? '(-check-imports -emit-exports utf8-srfi-13.exports) '())
    5153  utf8-srfi-13.scm)
     
    5355 '("utf8-srfi-13.scm"
    5456   "utf8-srfi-13.so")
    55  `((version 1.14)
     57 `((version 2.0)
    5658   ,@(if has-exports? `((exports "utf8-srfi-13.exports")) '())
    5759   (documentation "utf8.html") ) )
    5860
    5961(compile -s -O2 -d1
    60   ,@(if has-exports? '(-check-imports -emit-exports char-set.exports) '())
    61   char-set.scm)
    62 (install-extension 'char-set
    63  '("char-set.so")
    64  `((version 1.14)
    65    ,@(if has-exports? `((exports "char-set.exports")) '())
     62  ,@(if has-exports? '(-check-imports -emit-exports unicode-char-sets.exports) '())
     63  unicode-char-sets.scm)
     64(install-extension 'unicode-char-sets
     65 '("unicode-char-sets.so")
     66 `((version 2.0)
     67   ,@(if has-exports? `((exports "unicode-char-sets.exports")) '())
    6668   (documentation "utf8.html") ) )
    6769
    68 (compile -s -O2 -d1 -R syntax-case
    69   ,@(if has-exports? '(-check-imports -emit-exports case-map.exports) '())
    70   case-map.scm)
    71 (install-extension 'case-map
    72  '("case-map.so" "case-map-1.dat" "case-map-2.dat")
    73  `((version 1.14)
    74    ,@(if has-exports? `((exports "case-map.exports")) '())
     70(compile -s -O2 -d1
     71  ,@(if has-exports? '(-check-imports -emit-exports utf8-case-map.exports) '())
     72  utf8-case-map.scm)
     73(install-extension 'utf8-case-map
     74 '("utf8-case-map.so" "case-map-1.dat" "case-map-2.dat")
     75 `((version 2.0)
     76   ,@(if has-exports? `((exports "utf8-case-map.exports")) '())
    7577   (documentation "utf8.html") ) )
Note: See TracChangeset for help on using the changeset viewer.