Ignore:
Timestamp:
03/10/16 21:32:18 (5 years ago)
Author:
sjamaan
Message:

Uri-generic: Fix for SRFI-14 being secretly Latin1-encoded. Pointed out by Adrien Ramos

File:
1 edited

Legend:

Unmodified
Added
Removed
  • release/4/uri-generic/trunk/uri-generic.scm

    r33061 r33264  
    316316           (char=? c #\~))))
    317317
     318;; The SRFI-14 library uses Latin1, and its definition of "letter"
     319;; includes accented letters with high bit. This wreaks havoc with
     320;; UTF-8 URIs.  Besides, the RFC only discusses ASCII letters anyway.
     321(define char-set:ascii-letter
     322  (string->char-set
     323   "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"))
     324
     325(define char-set:ascii-letter+digit
     326  (char-set-union char-set:ascii-letter char-set:digit))
     327
    318328(define (scheme-char? c)      (and (char? c) (char-set-contains? char-set:scheme c)))
    319329
    320330(define (ipv-future-char? c)  (and (char? c) (char-set-contains? char-set:ipv-future c)))
    321331
    322 (define (alpha-char? c)       (and (char? c) (char-set-contains? char-set:letter c)))
     332(define (alpha-char? c)       (and (char? c) (char-set-contains? char-set:ascii-letter c)))
    323333
    324334(define (pct-encoded? c)      (match c ((#\% h1 h2) (and (hexdigit-char? h1) (hexdigit-char? h2)))
     
    448458
    449459(define char-set:uri-unreserved
    450   (char-set-union char-set:letter+digit (string->char-set "-_.~")))
     460  (char-set-union char-set:ascii-letter+digit (string->char-set "-_.~")))
    451461
    452462
     
    516526
    517527(define char-set:scheme
    518   (char-set-union char-set:letter+digit (string->char-set "+-.")))
     528  (char-set-union char-set:ascii-letter+digit (string->char-set "+-.")))
    519529
    520530
Note: See TracChangeset for help on using the changeset viewer.