Changeset 36553 in project


Ignore:
Timestamp:
09/08/18 13:26:47 (2 months ago)
Author:
sjamaan
Message:

uri-generic: Update irregex alternative to CHICKEN 5 (and port a few fixes I forgot about)

Location:
release/5/uri-generic/trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • release/5/uri-generic/trunk/alternatives/uri-generic.irregex.scm

    r36551 r36553  
    22
    33;; Needs a chicken with irregex v0.8.1+  (currently in the total-irregex branch)
    4 
    5 (provide 'uri-generic)
    64
    75(module uri-generic
    86  (uri-reference make-uri update-uri update-authority
    97   uri-reference? uri-auth uri-authority uri-scheme uri-path uri-query
    10    uri-fragment uri-host uri-port uri-username uri-password
    11    authority? authority-host authority-port
     8   uri-fragment uri-host uri-ipv6-host? uri-port
     9   uri-username uri-password
     10   authority? authority-host authority-ipv6-host? authority-port
    1211   authority-username authority-password
    1312   
     
    2120   char-set:uri-reserved char-set:uri-unreserved)
    2221
    23 (import chicken scheme)
    24 (use extras data-structures ports irregex
    25      srfi-1 srfi-4 srfi-13 srfi-14)
    26 
     22(import scheme (chicken base) (chicken string) (chicken port)
     23        (chicken format) (chicken irregex) srfi-14)
    2724
    2825(define uri-error error)
     
    5552
    5653(define-record-type <URIAuth>
    57   (make-URIAuth username password host port)
     54  (make-URIAuth username password host ipv6-host? port)
    5855  URIAuth?
    5956  (username URIAuth-username URIAuth-username-set!)
    6057  (password URIAuth-password URIAuth-password-set!)
    6158  (host URIAuth-host URIAuth-host-set!)
     59  (ipv6-host? URIAuth-ipv6-host? URIAuth-ipv6-host?-set!)
    6260  (port URIAuth-port URIAuth-port-set!))
    6361
     
    7472 
    7573  (define-record-printer (<URIAuth> x out)
    76     (fprintf out "#(URIAuth host=~S port=~A)"
     74    (fprintf out "#(URIAuth host=~S~A port=~A)"
    7775             (URIAuth-host x)
     76             (if (URIAuth-ipv6-host? x) "(ipv6)" "")
    7877             (URIAuth-port x))))
    7978 (else))
     
    102101
    103102
     103(define (is-ipv6-host? h) (and (substring-index ":" h) #t))
     104
    104105(define (update-URIAuth uri-auth . args)
    105106  (let loop ((args args)
     
    107108             (new-password (URIAuth-password uri-auth))
    108109             (new-host (URIAuth-host uri-auth))
     110             (new-ipv6-host? (URIAuth-ipv6-host? uri-auth))
    109111             (new-port (URIAuth-port uri-auth)))
    110112    (cond ((null? args)
    111            (make-URIAuth new-username new-password new-host new-port))
     113           (make-URIAuth new-username new-password
     114                         new-host new-ipv6-host? new-port))
    112115          ((null? (cdr args))
    113116           (uri-error "malformed arguments to update-URIAuth"))
     
    119122                   (if (eq? key 'password) value new-password)
    120123                   (if (eq? key 'host) value new-host)
     124                   (if (eq? key 'host)
     125                       (is-ipv6-host? value)
     126                       new-ipv6-host?)
    121127                   (if (eq? key 'port) value new-port)))))))
    122128
     
    135141    (and auth (URIAuth-host auth))))
    136142
     143(define (uri-ipv6-host? x)
     144  (let ((auth (URI-authority x)))
     145    (and auth (URIAuth-ipv6-host? auth))))
     146
    137147(define (uri-port x)
    138148  (let ((auth (URI-authority x)))
     
    149159(define authority? URIAuth?)
    150160(define authority-host URIAuth-host)
     161(define authority-ipv6-host? URIAuth-ipv6-host?)
    151162(define authority-port URIAuth-port)
    152163(define authority-username URIAuth-username)
     
    172183                            ((not (eq? unset authority)) authority)
    173184                            (else (URI-authority uri)))
    174                              (make-URIAuth #f #f #f #f)))
     185                             (make-URIAuth #f #f #f #f #f)))
    175186                 (updated-auth (apply update-authority base-auth args))
    176                  (final-auth (if (uri-auth-equal? (make-URIAuth #f #f #f #f)
    177                                                   updated-auth)
     187                 (final-auth (if (uri-auth-equal?
     188                                  (make-URIAuth #f #f #f #f #f)
     189                                  updated-auth)
    178190                               #f
    179191                               updated-auth)))
     
    274286  (and (uri-reference? u) (not (relative-ref? u)) (not (uri-fragment u))))
    275287
     288;; The SRFI-14 library uses Latin1, and its definition of "letter"
     289;; includes accented letters with high bit. This wreaks havoc with
     290;; UTF-8 URIs.  Besides, the RFC only discusses ASCII letters anyway.
     291(define char-set:ascii-letter
     292  (string->char-set
     293   "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"))
     294
     295(define char-set:ascii-letter+digit
     296  (char-set-union char-set:ascii-letter char-set:digit))
     297
    276298
    277299;; RFC3986, section 2.2
     
    293315
    294316(define char-set:uri-unreserved
    295   (char-set-union char-set:letter+digit (string->char-set "-_.~")))
     317  (char-set-union char-set:ascii-letter+digit (string->char-set "-_.~")))
    296318
    297319(define unreserved `(,(char-set->string char-set:uri-unreserved)))
     
    311333           ,(and-let* ((auth (URI-authority uri))
    312334                       (host (URIAuth-host auth)))
    313               (let ((username (URIAuth-username auth)))
    314                 (list "//" (and username (list (userinfomap username (URIAuth-password auth)) "@"))
    315                       host (and (URIAuth-port auth)
    316                                 (list ":" (URIAuth-port auth))))))
     335              (let ((username (URIAuth-username auth))
     336                    (password (URIAuth-password auth))
     337                    (ipv6? (URIAuth-ipv6-host? auth))
     338                    (port (URIAuth-port auth)))
     339                (list "//" (and username (list (userinfomap
     340                                                username
     341                                                password) "@"))
     342                      (if ipv6? "[" "") host (if ipv6? "]" "")
     343                      (and port (list ":" port)))))
    317344           ,(path->string (uri-path uri))
    318345           ,(and-let* ((query (uri-query uri))) (list "?" query))
     
    325352;;
    326353
    327 (define pct-encoded `(seq #\% hex-digit hex-digit))
     354(define pct-encoded `(seq #\% (submatch-named num hex-digit hex-digit)))
    328355
    329356(define uri-decode-string
     
    333360       re str
    334361       (lambda (match)
    335          (let* ((encoded (irregex-match-substring match))
    336                 (decoded (integer->char (string->number (string-drop encoded 1) 16))))
    337            (if (char-set-contains? char-set decoded) (string decoded) encoded)))))))
     362         (let* ((encoded (irregex-match-substring match 'num))
     363                (decoded (integer->char (string->number encoded 16))))
     364           (if (char-set-contains? char-set decoded)
     365               (string decoded)
     366               (irregex-match-substring match))))))))
    338367
    339368(define (display-fragments b)
     
    407436     `(#\% ,(hex-digit (quotient i 16)) ,(hex-digit (remainder i 16)))))
    408437  (list->string
    409    (string-fold-right (lambda (c res)
    410                         (if (char-set-contains? char-set c)
    411                             (append (pct-encode c) res)
    412                             (cons c res)))
    413                       '() str)))
     438   (foldr (lambda (c res)
     439            (if (char-set-contains? char-set c)
     440                (append (pct-encode c) res)
     441                (cons c res)))
     442          '() (string->list str))))
    414443
    415444(define normalize-pct-encoding
     
    419448      re str
    420449      (lambda (match)
    421         (let* ((encoded (irregex-match-substring match))
    422                (decoded (integer->char (string->number (string-drop encoded 1) 16))))
     450        (let* ((encoded (irregex-match-substring match 'num))
     451               (decoded (integer->char (string->number encoded 16))))
    423452          (if (char-set-contains? char-set:uri-unreserved decoded)
    424453              (string decoded)
    425               (string-upcase encoded))))))))
     454              (string #\%
     455                      (char-upcase (string-ref encoded 0))
     456                      (char-upcase (string-ref encoded 1))))))))))
    426457
    427458(define path-safe-chars (char-set-union char-set:uri-unreserved (char-set #\/)))
     
    431462         (rp (irregex-match-substring m 'relpath))
    432463         (path (if ap
    433                    (if (string-null? ap)
     464                   (if (string=? "" ap)
    434465                       '()
    435466                       (cons '/ (map (lambda (s)
    436467                                       (uri-decode-string s path-safe-chars))
    437468                                     (cdr (string-split ap "/" #t)))))
    438                    (if (string-null? rp)
     469                   (if (string=? "" rp)
    439470                       '()
    440471                       (map (lambda (s)
     
    446477             (let ((user (irregex-match-substring m 'username))
    447478                   (pass (irregex-match-substring m 'password))
    448                    (host (irregex-match-substring m 'host))
     479                   (host (or (irregex-match-substring m 'reg-name)
     480                             (irregex-match-substring m 'ip-literal)
     481                             (irregex-match-substring m 'ipv4-address)))
    449482                   (port (irregex-match-substring m 'port)))
    450483               (and (or user pass host port)
    451484                    (make-URIAuth
    452                      user pass host (and port (string->number port)))))
     485                     user pass
     486                     host (is-ipv6-host? host)
     487                     (and port (string->number port)))))
    453488             path (irregex-match-substring m 'query)
    454489             (irregex-match-substring m 'fragment))))
     
    474509
    475510;; Should be provided by irregex, but that one is unavailable in Chicken 4.3
    476 (define ipv4-address `(seq ,dec-octet #\.
    477                            ,dec-octet #\.
    478                            ,dec-octet #\. ,dec-octet))
     511(define ipv4-address `(submatch-named ipv4-address
     512                                      ,dec-octet #\.
     513                                      ,dec-octet #\.
     514                                      ,dec-octet #\. ,dec-octet))
    479515
    480516;; IPv6address =                                  6( h16 ":" ) ls32
     
    494530;;                   ; 16 bits of address represented in hexadecimal
    495531
    496 (define  h16 `(= 4 hex-digit))
     532(define  h16 `(** 1 4 hex-digit))
    497533(define ls32 `(or (seq ,h16 #\: ,h16) ,ipv4-address))
    498534
     
    500536;; Chicken 4.3
    501537(define ipv6-address
    502   `(or (seq                              (= 6 ,h16 #\:) ,ls32)
    503        (seq                         "::" (= 5 ,h16 #\:) ,ls32)
    504        (seq (?                ,h16) "::" (= 4 ,h16 #\:) ,ls32)
    505        (seq (? (= 1 ,h16 #\:) ,h16) "::" (= 3 ,h16 #\:) ,ls32)
    506        (seq (? (= 2 ,h16 #\:) ,h16) "::" (= 2 ,h16 #\:) ,ls32)
    507        (seq (? (= 3 ,h16 #\:) ,h16) "::" (= 1 ,h16 #\:) ,ls32)
    508        (seq (? (= 4 ,h16 #\:) ,h16) "::"                ,ls32)
    509        (seq (? (= 5 ,h16 #\:) ,h16) "::"                ,h16)
    510        (seq (? (= 6 ,h16 #\:) ,h16) "::")))
     538  `(or (seq                                 (= 6 ,h16 #\:) ,ls32)
     539       (seq                            "::" (= 5 ,h16 #\:) ,ls32)
     540       (seq (?                   ,h16) "::" (= 4 ,h16 #\:) ,ls32)
     541       (seq (? (** 0 1 ,h16 #\:) ,h16) "::" (= 3 ,h16 #\:) ,ls32)
     542       (seq (? (** 0 2 ,h16 #\:) ,h16) "::" (= 2 ,h16 #\:) ,ls32)
     543       (seq (? (** 0 3 ,h16 #\:) ,h16) "::" (= 1 ,h16 #\:) ,ls32)
     544       (seq (? (** 0 4 ,h16 #\:) ,h16) "::"                ,ls32)
     545       (seq (? (** 0 5 ,h16 #\:) ,h16) "::"                ,h16)
     546       (seq (? (** 0 6 ,h16 #\:) ,h16) "::")))
    511547
    512548;; IPvFuture  = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
     
    516552;; IP-literal = "[" ( IPv6address / IPvFuture  ) "]"
    517553(define ip-literal
    518   `(seq #\[ (or ,ipv6-address ,ipv-future) #\]))
    519 
    520 (define reg-name `(* (or ,unreserved ,pct-encoded ,sub-delims)))
    521 
    522 (define host `(submatch-named host (or ,ip-literal ,ipv4-address ,reg-name)))
     554  `(seq #\[ (submatch-named ip-literal (or ,ipv6-address ,ipv-future)) #\]))
     555
     556(define reg-name `
     557  (submatch-named reg-name
     558                  (* (or ,unreserved ,pct-encoded ,sub-delims))))
     559
     560(define host `(or ,ip-literal ,ipv4-address ,reg-name))
    523561
    524562(define port `(submatch-named port (* numeric)))
  • release/5/uri-generic/trunk/tests/run.scm

    r36548 r36553  
    1 (load "../uri-generic.scm")
     1                                        ;(load "../uri-generic.scm")
     2(load "../alternatives/uri-generic.irregex.scm")
    23(import uri-generic (chicken format) (chicken string) srfi-1 test)
    34
Note: See TracChangeset for help on using the changeset viewer.