Changeset 33264 in project


Ignore:
Timestamp:
03/10/16 21:32:18 (5 years ago)
Author:
sjamaan
Message:

Uri-generic: Fix for SRFI-14 being secretly Latin1-encoded. Pointed out by Adrien Ramos

Location:
release/4/uri-generic/trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • release/4/uri-generic/trunk/tests/run.scm

    r30089 r33264  
    202202    ("foo&bar" "foo%26bar")
    203203    ("foo%20bar" "foo%2520bar")
    204     ("foo\x00bar\n" "foo%00bar%0A")))
     204    ("foo\x00bar\n" "foo%00bar%0A")
     205    ;; UTF-8 breakage, reported by Adrien Ramos
     206    ("D&D - Création persos.html"
     207     "D%26D%20-%20Cr%C3%A9ation%20persos.html")))
    205208
    206209(test-group "uri-encode-string test"
     
    290293     ("/?foo" (/ ""))
    291294     ("/#foo" (/ ""))
    292      ("/foo:bar" (/ "foo:bar")))
     295     ("/foo:bar" (/ "foo:bar"))
     296
     297     ;; UTF-8 breakage, reported by Adrien Ramos
     298     ("/D&D%20-%20Cr%C3%A9ation%20persos.html"
     299      (/ "D&D%20-%20Cr%C3%A9ation%20persos.html")))
    293300    ("query ",uri-query
    294301     ("//" #f)
  • release/4/uri-generic/trunk/uri-generic.scm

    r33061 r33264  
    316316           (char=? c #\~))))
    317317
     318;; The SRFI-14 library uses Latin1, and its definition of "letter"
     319;; includes accented letters with high bit. This wreaks havoc with
     320;; UTF-8 URIs.  Besides, the RFC only discusses ASCII letters anyway.
     321(define char-set:ascii-letter
     322  (string->char-set
     323   "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"))
     324
     325(define char-set:ascii-letter+digit
     326  (char-set-union char-set:ascii-letter char-set:digit))
     327
    318328(define (scheme-char? c)      (and (char? c) (char-set-contains? char-set:scheme c)))
    319329
    320330(define (ipv-future-char? c)  (and (char? c) (char-set-contains? char-set:ipv-future c)))
    321331
    322 (define (alpha-char? c)       (and (char? c) (char-set-contains? char-set:letter c)))
     332(define (alpha-char? c)       (and (char? c) (char-set-contains? char-set:ascii-letter c)))
    323333
    324334(define (pct-encoded? c)      (match c ((#\% h1 h2) (and (hexdigit-char? h1) (hexdigit-char? h2)))
     
    448458
    449459(define char-set:uri-unreserved
    450   (char-set-union char-set:letter+digit (string->char-set "-_.~")))
     460  (char-set-union char-set:ascii-letter+digit (string->char-set "-_.~")))
    451461
    452462
     
    516526
    517527(define char-set:scheme
    518   (char-set-union char-set:letter+digit (string->char-set "+-.")))
     528  (char-set-union char-set:ascii-letter+digit (string->char-set "+-.")))
    519529
    520530
Note: See TracChangeset for help on using the changeset viewer.