Changeset 14275 in project


Ignore:
Timestamp:
04/17/09 08:38:43 (11 years ago)
Author:
Alex Shinn
Message:

Adding tests, fixing string-{match,search}-offset for real and verifying
with tests.

Adding tests, fixing string-translate, and verifying with tests.

Testing and fixing bug in string-prefix?.

Taking 'display', 'substring=?' and 'substring-ci=?' back out of
utf8-lolevel. utf8-lolevel uses a cursor API. utf8 uses an index API.
The two should not be confused.

More tests still needed for SRFI-13, the number of permutations of
calls with all start/end indexes is huge.

Location:
release/4/utf8/trunk
Files:
1 deleted
4 edited

Legend:

Unmodified
Added
Removed
  • release/4/utf8/trunk/tests/utf8-test.scm

    r14226 r14275  
    11
    2 ; Note that the srfi-13 test uses srfi-14
    32(use test utf8 utf8-srfi-13 utf8-srfi-14)
    4 
    5 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    6 ;; tests
    73
    84;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    95;; R5RS
    106
     7(test-begin)
     8
    119(test 2 (string-length "挢字"))
    1210
    1311(test 28450 (char->integer (string-ref "挢字" 0)))
    1412
    15 ;;(define str "挢字")
    1613(define str (string-copy "挢字"))
    1714
     
    8986(test '("a" "bc" "第" "f几") (string-split "a,bc、第,f几" ",、"))
    9087
     88(test "THE QUICK BROWN FOX JUMPED OVER THE LAZY SLEEPING DOG"
     89    (string-translate "the quick brown fox jumped over the lazy sleeping dog"
     90                      "abcdefghijklmnopqrstuvwxyz"
     91                      "ABCDEFGHIJKLMNOPQRSTUVWXYZ"))
     92(test ":foo:bar:baz" (string-translate "/foo/bar/baz" "/" ":"))
    9193(test "䜠爱我" (string-translate "我爱䜠" "我䜠" "䜠我"))
     94(test "䜠爱我" (string-translate "我爱䜠" '(#\我 #\䜠) '(#\䜠 #\我)))
     95(test "我䜠" (string-translate "我爱䜠" "爱"))
     96(test "我䜠" (string-translate "我爱䜠" #\爱))
    9297
    9398(test-assert (substring=? "日本語" "日本語"))
     
    110115(test '("aいc") (string-match "a[あい0-9えお]c" "aいc"))
    111116(test-assert (not (string-match "a[あい0-9えお]c" "aうc")))
     117
     118(test #f (string-search-positions "a" "b" 0))
     119(test '((0 1)) (string-search-positions "a" "a" 0))
    112120
    113121;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     
    228236(test-assert (not (string-prefix? "å
    229237ƒéº»åžƒ" "麻垃十番")))
     238(test-assert (not (string-prefix? "麻垃十番" "麻垃")))
    230239(test-assert (string-prefix? "å
    231240ƒéº»åžƒ" "麻垃十番" 1))
     
    235244(test-assert (string-suffix? "十番" "麻垃十番"))
    236245(test-assert (not (string-suffix? "九番" "麻垃十番")))
     246(test-assert (not (string-suffix? "麻垃十番" "十番")))
    237247(test-assert (string-suffix? "å
    238248ƒéº»åžƒ" "東麻垃" 1))
     
    523533(test-assert (char-set-contains? char-set:full #\a))
    524534(test-assert (char-set-contains? char-set:full (string-ref "あ" 0)))
     535
     536(test-end)
  • release/4/utf8/trunk/utf8-lolevel.scm

    r14226 r14275  
    2525  (bound-to-procedure
    2626    ##sys#char->utf8-string ##sys#become!))
     27
     28(require-library data-structures lolevel)
    2729
    2830(module utf8-lolevel
     
    3941   make-utf8-string
    4042   with-substring-offsets with-two-substring-offsets
    41    utf8-substring=? utf8-substring-ci=?
    4243   ;; string-pointers
    4344   make-string-pointer string-pointer? sp-copy
     
    4748   ;; I/O
    4849   read-utf8-char write-utf8-char char->utf8-string
    49    utf8-display)
    50 
    51 (import scheme chicken extras (only data-structures substring=? substring-ci=?) lolevel)
    52 
    53 (require-library data-structures lolevel)
     50   )
     51
     52(import scheme chicken extras lolevel)
    5453
    5554;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     
    209208                      (lp (+ i c-len))))))))
    210209      (make-string len)))
    211 
    212 (define (utf8-substring=? s1 s2 . opt)
    213   (let ((s1-len (utf8-string-length s1)) (s2-len (utf8-string-length s2)))
    214     (let-optionals* opt ((start1 0)
    215                          (start2 0)
    216                          (len (min (- s1-len start1) (- s2-len start2))))
    217       (let ((opt1 (list start1 (+ start1 len)))
    218             (opt2 (list start2 (+ start2 len))))
    219         (with-substring-offsets
    220          (lambda (s1 s1-start s1-end)
    221            (with-substring-offsets
    222             (lambda (s2 s2-start s2-end)
    223               (substring=? s1 s2 s1-start s2-start (- s1-end s1-start)))
    224             s2 opt2))
    225          s1 opt1)))) )
    226 
    227 (define (utf8-substring-ci=? s1 s2 . opt)
    228   (let ((s1-len (utf8-string-length s1)) (s2-len (utf8-string-length s2)))
    229     (let-optionals* opt ((start1 0)
    230                          (start2 0)
    231                          (len (min (- s1-len start1) (- s2-len start2))))
    232       (let ((opt1 (list start1 (+ start1 len)))
    233             (opt2 (list start2 (+ start2 len))))
    234         (with-substring-offsets
    235          (lambda (s1 s1-start s1-end)
    236            (with-substring-offsets
    237             (lambda (s2 s2-start s2-end)
    238               (substring-ci=? s1 s2 s1-start s2-start (- s1-end s1-start)))
    239             s2 opt2))
    240          s1 opt1)))) )
    241210
    242211;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     
    386355
    387356(define (write-utf8-char c . opt)
    388   (apply display (char->utf8-string c) opt))
     357  (display (char->utf8-string c)
     358           (if (pair? opt) (car opt) (current-output-port))))
    389359
    390360(define (read-utf8-char . opt)
     
    410380                         (- i 1))))))))))))
    411381
    412 (define (utf8-display x . opt)
    413   (apply display (if (char? x) (char->utf8-string x) x) opt))
    414 
    415382)
  • release/4/utf8/trunk/utf8-srfi-13.scm

    r14226 r14275  
    88  (bound-to-procedure
    99    ##sys#substring ##sys#become!))
     10
     11(require-library utf8-lolevel utf8-srfi-14 iset utf8-case-map)
    1012
    1113(module
     
    5254        utf8-lolevel utf8-srfi-14 iset utf8-case-map)
    5355
    54 (require-library utf8-lolevel utf8-srfi-14 iset utf8-case-map)
    55 
    5656(define (string-null? s) (equal? s ""))
    5757
     
    8080                       (make-final (lambda (x) "")))
    8181    (let ((out (open-output-string)))
    82       (utf8-display base out)
     82      (display base out) ; base must be a string, so normal display is fine
    8383      (let lp ((seed seed))
    8484        (if (p seed)
    85             (utf8-display (make-final seed) out)
     85            (display (make-final seed) out)
    8686            (begin
    8787              (write-utf8-char (f seed) out)
     
    346346      (else (- end1 i 1)))))
    347347
    348 (define string-prefix-length (make-string-fix-length byte-string-prefix-length))
    349 (define string-prefix-length-ci (make-string-fix-length byte-string-prefix-length-ci))
    350 (define string-suffix-length (make-string-fix-length byte-string-suffix-length))
    351 (define string-suffix-length-ci (make-string-fix-length byte-string-suffix-length-ci))
     348(define string-prefix-length
     349  (make-string-fix-length byte-string-prefix-length))
     350(define string-prefix-length-ci
     351  (make-string-fix-length byte-string-prefix-length-ci))
     352(define string-suffix-length
     353  (make-string-fix-length byte-string-suffix-length))
     354(define string-suffix-length-ci
     355  (make-string-fix-length byte-string-suffix-length-ci))
    352356
    353357(define (make-string-prefix-test proc)
    354358  (lambda (s1 s2 . opt)
    355359    (cond
    356       ((null? opt) (proc s1 s2))
    357       ((null? (cdr opt)) (proc s1 s2 (car opt)))
    358       (else
    359        (with-two-substring-offsets
    360            (lambda (s1 s2 start1 end1 start2 end2)
    361              (proc (##sys#substring s1 start1 end1)
    362                    (##sys#substring s2 start2 end2)))
    363            s1 s2 opt)))))
    364 
    365 (define string-prefix? (make-string-prefix-test utf8-substring=?))
    366 (define string-prefix-ci? (make-string-prefix-test utf8-substring-ci=?))
     360     ((null? opt)
     361      (and (<= (string-length s1) (string-length s2)) (proc s1 s2)))
     362     (else
     363      (with-two-substring-offsets
     364       (lambda (s1 s2 start1 end1 start2 end2)
     365         (let ((s1-len (- end1 start1))
     366               (s2-len (- end2 start2)))
     367           (and (<= s1-len s2-len) (proc s1 s2 start1 start2 s1-len))))
     368       s1 s2 opt)))))
     369
     370(define string-prefix? (make-string-prefix-test substring=?))
     371(define string-prefix-ci? (make-string-prefix-test substring-ci=?))
    367372
    368373(define (string-suffix? s1 s2 . opt)
     
    582587      (lambda ()
    583588        (string-for-each
    584          (lambda (c) (if (pred c) (utf8-display c)))
     589         (lambda (c) (if (pred c) (write-utf8-char c)))
    585590         (if (pair? opt)
    586591             (apply utf8-substring s opt)
  • release/4/utf8/trunk/utf8.scm

    r14265 r14275  
    329329(define read-char read-utf8-char)
    330330
    331 (define display utf8-display)
     331(define (display x . o)
     332  (let ((out (if (pair? o) (car o) (current-output-port))))
     333    (if (char? x) (write-utf8-char x out) (byte-display x out))))
     334
    332335
    333336;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     
    424427(define (string-translate str from . opt)
    425428
    426   ;Until needed elsewhere
     429  ;; Until needed elsewhere
    427430  (define (vector-char-scan vec ch . opts)
    428431    (let-optionals opts ((st 0) (ed (vector-length vec)))
    429432      (let loop ((i st))
    430         (cond ((= i ed)                         #f)
    431               ((char=? ch (vector-ref vec i))   i)
    432               (else
    433                (loop (+ i 1)) ) ) ) ) )
    434 
    435         (##sys#check-string str 'string-translate)
     433        (cond ((= i ed) #f)
     434              ((char=? ch (vector-ref vec i)) i)
     435              (else (loop (+ i 1)))))))
     436
     437  (##sys#check-string str 'string-translate)
     438
    436439  (let ((from (cond ((char? from)   from)
    437440                    ((pair? from)   (list->string from))
     
    445448                         (else
    446449                          (##sys#check-string to 'string-translate)
    447                           to))))) )
    448     (if (and (or (ascii-string? from)
    449                  (and (char? from) (> 128 (char->integer from))))
    450              to
    451              (or (ascii-string? to)
    452                  (and (char? to) (> 128 (char->integer to)))))
    453         (apply byte-string-translate str from opt)
    454         (let ((from-vec (if (char? from) (vector from) (string->vector from)))
    455               (to-vec (and to (if (char? to) (vector to) (string->vector to)))) )
    456           (let ((trans (if to-vec (lambda (i) (display (vector-ref to-vec i))) noop)))
     450                          to))))))
     451    (if (and (if (char? from)
     452                 (> 128 (char->integer from))
     453                 (ascii-string? from))
     454             (if (char? to)
     455                 (> 128 (char->integer to))
     456                 (and to (ascii-string? to))))
     457        (byte-string-translate str from to)
     458        (let ((from-vec
     459               (if (char? from) (vector from) (string->vector from)))
     460              (to-vec
     461               (and to (if (char? to) (vector to) (string->vector to)))))
     462          (let ((trans
     463                 (if to-vec (lambda (i) (display (vector-ref to-vec i))) noop)))
    457464            (with-output-to-string
    458465              (lambda ()
     
    463470                        (cond ((vector-char-scan from-vec c) => trans)
    464471                              (else (display c)))
    465                         (lp (sp-next str i) ) ) ) ) ) ) ) ) ) ) ) )
    466 
    467 (define substring=? utf8-substring=?)
    468 (define substring-ci=? utf8-substring-ci=?)
     472                        (lp (sp-next str i)))))))))))))
     473
     474(define (substring=? s1 s2 . opt)
     475  (let ((s1-len (utf8-string-length s1)) (s2-len (utf8-string-length s2)))
     476    (let-optionals* opt ((start1 0)
     477                         (start2 0)
     478                         (len (min (- s1-len start1) (- s2-len start2))))
     479      (let ((opt1 (list start1 (+ start1 len)))
     480            (opt2 (list start2 (+ start2 len))))
     481        (with-substring-offsets
     482         (lambda (s1 s1-start s1-end)
     483           (with-substring-offsets
     484            (lambda (s2 s2-start s2-end)
     485              (byte-substring=? s1 s2 s1-start s2-start (- s1-end s1-start)))
     486            s2 opt2))
     487         s1 opt1)))) )
     488
     489(define (substring-ci=? s1 s2 . opt)
     490  (let ((s1-len (utf8-string-length s1)) (s2-len (utf8-string-length s2)))
     491    (let-optionals* opt ((start1 0)
     492                         (start2 0)
     493                         (len (min (- s1-len start1) (- s2-len start2))))
     494      (let ((opt1 (list start1 (+ start1 len)))
     495            (opt2 (list start2 (+ start2 len))))
     496        (with-substring-offsets
     497         (lambda (s1 s1-start s1-end)
     498           (with-substring-offsets
     499            (lambda (s2 s2-start s2-end)
     500              (byte-substring-ci=? s1 s2 s1-start s2-start (- s1-end s1-start)))
     501            s2 opt2))
     502         s1 opt1)))) )
    469503
    470504(define (substring-index which where . opt)
     
    531565(define (string-match-positions rx str . opt)
    532566  (let* ((size (byte-string-length str))
    533          (->pos (lambda (o) (utf8-offset->index str opt))))
    534     (map (lambda (x) (if (pair? x) (map ->pos x) x))
    535          (or (apply string-match-offsets rx str opt) '()))))
     567         (->pos (lambda (o) (utf8-offset->index str o))))
     568    (let ((res (apply string-match-offsets rx str opt)))
     569      (and res (map (lambda (x) (if (pair? x) (map ->pos x) x)) res)))))
    536570
    537571(define (string-search-positions rx str . opt)
    538572  (let* ((size (byte-string-length str))
    539          (->pos (lambda (o) (utf8-offset->index str opt))))
    540     (map (lambda (x) (if (pair? x) (map ->pos x) x))
    541          (or (apply string-search-offsets rx str opt) '()))))
     573         (->pos (lambda (o) (utf8-offset->index str o))))
     574    (let ((res (apply string-search-offsets rx str opt)))
     575      (and res (map (lambda (x) (if (pair? x) (map ->pos x) x)) res)))))
    542576
    543577)
Note: See TracChangeset for help on using the changeset viewer.