Changeset 28092 in project


Ignore:
Timestamp:
01/15/13 06:16:10 (8 years ago)
Author:
Ivan Raikov
Message:

uri-generic: some more work on utf8 branch

Location:
release/4/uri-generic/branches/utf8
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • release/4/uri-generic/branches/utf8/tests/run.scm

    r28086 r28092  
    553553  '(;; URI representation of IRIs test cases
    554554    ("http://example.com/삌계탕"
    555      "http://example.com/test")
     555     "http://example.com/%EC%82%BC%EA%B3%84%ED%83%95")
    556556    ))
    557557
    558558(test-group "URI representation of IRIs"
    559559  (for-each (lambda (s)
    560               (test (cadr s) (uri->string (iri/uri-map (car s)))))
     560              (test (cadr s) (uri->string (utf8-string->uri (car s)))))
    561561            utf8-cases))
    562562
  • release/4/uri-generic/branches/utf8/uri-generic.scm

    r28086 r28092  
    4141
    4242(module uri-generic
    43   (uri-reference iri->uri make-uri update-uri update-authority
     43  (uri-reference utf8-string->uri make-uri update-uri update-authority
    4444   uri-reference? uri-auth uri-authority uri-scheme uri-path uri-query
    4545   uri-fragment uri-host uri-port uri-username uri-password
     
    155155(define (alpha-char? c)       (and (char? c) (char-set-contains? char-set:letter c)))
    156156
    157 (define (pct-encoded? c)      (match c ((#\% h1 h2) (and (hexdigit-char? h1) (hexdigit-char? h2)))
     157(define (pct-encoded? c)      (match c ((#\% h1 h2 . rst)
     158                                        (and (hexdigit-char? h1) (hexdigit-char? h2)
     159                                             (or (null? rst) (pct-encoded? rst))))
    158160                                     (else #f)))
    159161   
     
    691693;; IRI to URI mapping (RFC3987, section 3.1)
    692694
    693 (define (iri->uri s)
    694   (let ((s (if (string? s) (uri-string->char-list s) s)))
     695(define (utf8-string->uri s)
     696  (let ((s (if (string? s) (utf8-uri-string->char-list s) s)))
    695697    (print "s = " s)
    696698    (and s (or (uri s) (relative-ref s)))))
     
    889891         (len (vector-ref utf8-start-byte-length bi1)))
    890892    (if (<= len 1)
    891         (list (list b1) (cdr lst))
     893        (list b1 (cdr lst))
    892894        (let loop ((res (list bi1)) (b (cdr lst)) (i (- len 1)))
    893895          (if (zero? i)
    894896              (list ;; converts UTF-8 sequences to pct-encoded char lists
    895                ;; the pct-encoded list is built in reverse order
    896                (fold-right
    897                 (lambda (x ax)
    898                   (let ((h1 (hex-digit (quotient x 16)))
    899                         (h2 (hex-digit (remainder x 16))))
    900                        (cons  (list #\% h1 h2) ax)))
    901                 '() res)
     897               (fold
     898                (lambda (x ax)
     899                  (let ((h1 (hex-digit (quotient x 16)))
     900                        (h2 (hex-digit (remainder x 16))))
     901                    (cons* #\% h1 h2 ax)))
     902                '() res)
    902903               b)
    903904              (let ((bi2 (char->integer (car b))))
     
    913914
    914915   
    915 ;; Convert a string to a URI character list
     916;; Converts a string to a URI character list
    916917
    917918(define (uri-string->char-list s)
     919  (let loop ((cs (list)) (lst (string->list s)))
     920    (if (null? lst) (reverse cs)
     921        (match lst
     922               ((#\% h1 h2 . rst) 
     923                (and (hexdigit-char? h1) (hexdigit-char? h2)
     924                     (loop (cons (list #\% h1 h2) cs) rst)))
     925
     926               (((and c (? char?)) . rst)  (loop (cons c cs) rst))))))
     927
     928   
     929;; Converts a UTF-8 string to a URI character list
     930
     931(define (utf8-uri-string->char-list s)
    918932  (let loop ((cs (list)) (lst (string->list s)))
    919933    (if (null? lst) (reverse cs)
     
    924938               (else
    925939                (match-let (((c rst) (utf8-char-car lst)))
    926                            (loop (append c cs) rst)) )
     940                           (loop (cons c cs) rst)) )
    927941               ))
    928942    ))
Note: See TracChangeset for help on using the changeset viewer.