source: project/release/5/uri-common/tags/2.0/uri-common.scm @ 35625

Last change on this file since 35625 was 35625, checked in by sjamaan, 2 years ago

Release uri-common 2.0 for CHICKEN 5

File size: 15.1 KB
Line 
1;;
2;; URI-common provides URI handling procedures for common URI schemes
3;; that are based on the generic syntax such as http, https, file, ftp.
4;; It also provides automatic form-urlencoded query argument
5;; encoding/decoding
6;;
7; Copyright (c) 2008-2018, Peter Bex
8; All rights reserved.
9;
10; Redistribution and use in source and binary forms, with or without
11; modification, are permitted provided that the following conditions
12; are met:
13;
14; 1. Redistributions of source code must retain the above copyright
15;    notice, this list of conditions and the following disclaimer.
16; 2. Redistributions in binary form must reproduce the above copyright
17;    notice, this list of conditions and the following disclaimer in the
18;    documentation and/or other materials provided with the distribution.
19; 3. Neither the name of the author nor the names of its
20;    contributors may be used to endorse or promote products derived
21;    from this software without specific prior written permission.
22;
23; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26; FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27; COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
28; INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32; STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
34; OF THE POSSIBILITY OF SUCH DAMAGE.
35;
36; Please report bugs, suggestions and ideas to the Chicken Trac
37; ticket tracking system (assign tickets to user 'sjamaan'):
38; http://trac.callcc.org
39
40(module uri-common
41  (uri-reference uri-reference? absolute-uri absolute-uri? relative-ref?
42   uri->uri-generic uri-generic->uri uri->list
43   make-uri update-uri uri? uri-scheme uri-username uri-password
44   uri-host uri-port uri-path uri-query uri-fragment
45   uri->string form-urlencode form-urldecode form-urlencoded-separator
46   uri-relative-to uri-relative-from
47   uri-normalize-path-segments uri-normalize-case
48   uri-path-relative? uri-path-absolute?
49   uri-default-port? uri-encode-string uri-decode-string
50   
51   char-set:gen-delims char-set:sub-delims
52   char-set:uri-reserved char-set:uri-unreserved)
53
54(import scheme (chicken base) (chicken string) (chicken format))
55(import srfi-1 srfi-13 srfi-14 defstruct matchable)
56(import (prefix uri-generic generic:))
57
58;; We could use the hostinfo egg for this, but that would be yet another
59;; dependency. Besides, not all service names have a matching URI scheme
60;; nor do all URI schemes have a matching service name.
61(define default-ports
62  '((http . 80)              ; RFC 2616
63    (https . 443)            ; RFC 2818
64    (shttp . 80)             ; RFC 2660
65    (ftp . 21)               ; RFC 959; no official URI scheme defined
66    ;; nonstandard, but could be useful
67    (svn+ssh . 22)
68    (svn . 3690)
69    ))
70
71;; A common URI is a generic URI plus stored decoded versions of most components
72(defstruct URI-common
73  generic username password host port path query fragment)
74
75(define-record-printer (URI-common x out)
76  (fprintf out "#<URI-common: scheme=~S port=~S host=~S path=~S query=~S fragment=~S>"
77           (uri-scheme x) (uri-port x) (uri-host x)
78           (uri-path x) (uri-query x) (uri-fragment x)))
79
80;;; Conversion procedures
81(define (uri->uri-generic uri)
82  (URI-common-generic uri))
83
84(define (uri-reference u)
85  (let ((u1 (generic:uri-reference u)))
86    (and u1 (uri-generic->uri u1))))
87
88(define (absolute-uri u)
89  (let ((u1 (generic:absolute-uri u)))
90    (and u1 (uri-generic->uri u1))))
91
92(define (uri-generic->uri uri)
93  (make-URI-common generic: uri
94                   username: (decode-string* (generic:uri-username uri))
95                   password: (decode-string* (generic:uri-password uri))
96                   host:     (decode-string* (generic:uri-host uri))
97                   port:     (generic:uri-port uri)
98                   path:     (decode-path (generic:uri-path uri))
99                   query:    (form-urldecode (generic:uri-query uri))
100                   fragment: (decode-string* (generic:uri-fragment uri))))
101
102(define (decode-string* s)
103  (and s (uri-decode-string s)))
104
105(define (uri->list uri . rest)
106  (let-optionals rest ((userinfomap (lambda (u pw) (string-append u ":******" ))))
107    (list (uri-scheme uri)
108          (list (uri-auth->list uri userinfomap) (uri-path uri) (uri-query uri))
109          (uri-fragment uri))))
110
111(define (uri-auth->list uri userinfomap)
112  (let ((username (uri-username uri))
113        (password (uri-password uri)))
114    (list (if (and username password) (userinfomap username password) #f)
115          (uri-host uri)
116          (uri-port uri))))
117
118;;; Accessors and predicates
119(define uri-reference? URI-common?)
120(define (uri? u)
121  (and (URI-common? u) (generic:uri? (URI-common-generic u))))
122(define (absolute-uri? u)
123  (and (URI-common? u) (generic:absolute-uri? (URI-common-generic u))))
124(define (relative-ref? u)
125  (and (URI-common? u) (generic:relative-ref? (URI-common-generic u))))
126(define uri-scheme   (compose generic:uri-scheme URI-common-generic))
127(define uri-username URI-common-username)
128(define uri-password URI-common-password)
129(define uri-host     URI-common-host)
130(define uri-query    URI-common-query)
131(define uri-fragment URI-common-fragment)
132(define (uri-default-port? uri)
133  (default-port? (uri-port uri) (uri-scheme uri)))
134
135;; Uri-path variant which performs Scheme-Based Normalization for well-known
136;; "common" schemes.  See per RFC 3986, section 6.2.3
137(define (uri-path uc)
138  (let ((path (URI-common-path uc)))
139    (if (and (not (relative-ref? uc)) ; For real URIs (not relative-refs),
140             (or (null? path)         ; an empty path equals a path of "/"
141                 (eq? path #f)))
142        '(/ "")
143        path)))
144
145(define (uri-port uc)
146  (let ((u (URI-common-generic uc)))
147    (or (generic:uri-port u)
148        (alist-ref (generic:uri-scheme u) default-ports))))
149
150;;; Constructor
151(define (make-uri . key/values)
152  (apply update-uri (make-URI-common generic: (generic:make-uri)) key/values))
153
154;;; Updaters
155(define update-uri
156  (let ((unset (list 'unset)))
157    (lambda (uc #!key
158                (scheme unset) (username unset) (password unset)
159                (host unset) (port unset)
160                (path unset) (query unset) (fragment unset))
161      (let* ((uc (update-URI-common uc)) ; new copy
162             (actual-scheme (if (eq? scheme unset)
163                                (generic:uri-scheme (URI-common-generic uc))
164                                scheme))
165             (path (if (and actual-scheme (or (eq? path #f) (eq? path '())))
166                       '(/ "") ; normalize path
167                       path))
168             (actual-port (if (eq? port unset)
169                              (URI-common-port uc)
170                              port)))
171        (unless (and (eq? port unset) (eq? scheme unset))
172          ;; Clear port if it's the default for this scheme
173          (URI-common-generic-set!
174           uc (generic:update-uri
175               (URI-common-generic uc)
176               port: (if (default-port? actual-port actual-scheme)
177                         #f
178                         actual-port))))
179        ;; This code is ugly!
180        (unless (eq? scheme unset)
181          (URI-common-generic-set!
182           uc (generic:update-uri (URI-common-generic uc) scheme: scheme)))
183        (unless (eq? username unset)
184          (URI-common-generic-set!
185           uc (generic:update-uri (URI-common-generic uc)
186                                  username: (encode-string* username)))
187          (URI-common-username-set! uc username))
188        (unless (eq? password unset)
189          (URI-common-generic-set!
190           uc (generic:update-uri (URI-common-generic uc)
191                                  password: (encode-string* password)))
192          (URI-common-password-set! uc password))
193        (unless (eq? host unset)
194          (URI-common-generic-set!
195           uc (generic:update-uri (URI-common-generic uc)
196                                  host: (encode-string* host)))
197          (URI-common-host-set! uc host))
198        (unless (eq? port unset)
199          ;; Generic port set above - it depends on the scheme too
200          (URI-common-port-set! uc port))
201        (unless (eq? path unset)
202          (URI-common-generic-set!
203           uc (generic:update-uri (URI-common-generic uc)
204                                  path: (encode-path path)))
205          (URI-common-path-set! uc path))
206        (unless (eq? query unset)
207          (URI-common-generic-set!
208           uc (generic:update-uri (URI-common-generic uc)
209                                  query: (form-urlencode query)))
210          (URI-common-query-set! uc query))
211        (unless (eq? fragment unset)
212          (URI-common-generic-set!
213           uc (generic:update-uri (URI-common-generic uc)
214                                  fragment: (encode-string*
215                                             fragment)))
216          (URI-common-fragment-set! uc fragment))
217        uc))))
218
219(define uri-encode-string generic:uri-encode-string)
220(define uri-decode-string generic:uri-decode-string)
221(define char-set:gen-delims generic:char-set:gen-delims)
222(define char-set:sub-delims generic:char-set:sub-delims)
223(define char-set:uri-reserved generic:char-set:uri-reserved)
224(define char-set:uri-unreserved generic:char-set:uri-unreserved)
225
226(define (encode-string* s . rest)
227  (and s (apply uri-encode-string s rest)))
228
229(define (default-port? port scheme)
230  (eqv? port (alist-ref scheme default-ports)))
231
232(define (encode-path p)
233  (and p (match p
234                (('/ . rst) (cons '/ (map uri-encode-string rst)))
235                (else (map uri-encode-string p)))))
236
237;;; Handling of application/x-www-form-urlencoded data
238;;
239;; This implements both HTML 4's specification
240;; (http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.1)
241;; and XHTML XForms' specification
242;; (http://www.w3.org/TR/xforms/#serialize-urlencode)
243;;
244;; The latter is a more generalised form of the former, as it allows
245;; the user to specify a custom separator character.  The HTML 4
246;; spec also contains a recommendation
247;; (http://www.w3.org/TR/html401/appendix/notes.html#h-B.2.2)
248;; that semicolons should be used instead of ampersands as a separator.
249;; However, it provides no mechanism to select the separator to use
250;; when submitting a form, which makes it a pretty useless recommendation.
251;; This recommendation also complicates matters on the server because one
252;; would need to handle both form-generated GET query parameters and
253;; hardcoded GET query parameters as specified in anchors.
254;;
255;; There's also a 2006 Internet-Draft by Bjoern Hoehrmann that was
256;; intended to standardize this, but it was allowed to expire in 2007:
257;; http://ietfreport.isoc.org/idref/draft-hoehrmann-urlencoded
258;; It was different in a few ways from the x-www-form-urlencoded type.
259;; For example, www-form-urlencoded only pct-encoded the chars that
260;; are not allowed in a query string, whereas x-www-form-urlencoded
261;; pct-encodes *all* reserved chars, regardless of whether it is
262;; necessary. There are servers which do not accept input that isn't
263;; fully pct-encoded so we use strictly x-www-form-urlencoded.
264
265(define form-urlencoded-separator (make-parameter ";&"))
266
267(define (form-urlencode alist #!key (separator (form-urlencoded-separator)))
268  (and alist (not (null? alist))
269      (let* ((separator-chars (->char-set separator))
270             (separator-string (string-take
271                                ;; Can't use separator-chars here because
272                                ;; charsets have no ordering and conversion
273                                ;; to string and back reorders the chars
274                                 (if (string? separator)
275                                     separator
276                                     (char-set->string separator)) 1))
277              (enc (lambda (s)
278                     (string-translate*
279                      (uri-encode-string
280                       s
281                       (char-set-union
282                        separator-chars
283                        (char-set #\= #\+)
284                        (char-set-delete
285                         (char-set-complement char-set:uri-unreserved)
286                         #\space)))
287                       '((" " . "+")))))
288              (encoded-components
289               (reverse (fold
290                         (lambda (arg query)
291                           (match arg
292                             ((a . #f) query)
293                             ((a . #t) (cons (enc (->string a)) query))
294                             ((a . b) (cons
295                                       (sprintf "~A=~A"
296                                                (enc (->string a))
297                                                (enc (->string b)))
298                                       query))))
299                         '() alist))))
300         (and (not (null? encoded-components))
301              (string-join encoded-components separator-string)))))
302
303(define (form-urldecode query #!key (separator (form-urlencoded-separator)))
304  (if query
305      (map (lambda (part)
306             (let ((idx (string-index part #\=))
307                   (decode (lambda (s)
308                             (uri-decode-string
309                              (string-translate* s '(("+" . "%20")))))))
310               (if idx
311                   (cons (string->symbol (decode (string-take part idx)))
312                         (decode (string-drop part (add1 idx))))
313                   (cons (string->symbol (decode part))
314                         #t))))
315           (string-split query (char-set->string (->char-set separator)) #t))
316      '())) ; _always_ provide a list interface for the query, even if not there
317
318(define (decode-path p)
319  (and p (match p
320                (('/ . rst) (cons '/ (map uri-decode-string rst)))
321                (else (map uri-decode-string p)))))
322
323;;; Miscellaneous procedures
324
325;; Simple convenience procedures
326(define (uri->string uri . args)
327  (apply generic:uri->string (URI-common-generic uri) args))
328
329(define (wrap proc)
330  (lambda args
331    (uri-generic->uri (apply proc (map URI-common-generic args)))))
332
333;; TODO: What about normalization issues here? Right now uri-relative-from
334;; gives a nonempty reference when uri1 has path=() and uri2 has path=(/ "")
335;; This could be considered a bug.  Same for uri->string and with port-nrs
336;; However, URIs with paths updated by this egg do not have that problem.
337(define uri-relative-to             (wrap generic:uri-relative-to))
338(define uri-relative-from           (wrap generic:uri-relative-from))
339(define uri-normalize-case          (wrap generic:uri-normalize-case))
340(define uri-normalize-path-segments (wrap generic:uri-normalize-path-segments))
341
342;; Copied from uri-generic, because we need to use our modified uri-path
343;; procedure.  Alternatively, we could update the path with our own
344;; path, but that's even sillier.
345(define (uri-path-absolute? uri)
346  (let ((path (uri-path uri)))
347    (and (pair? path) (eq? '/ (car path)))))
348
349(define (uri-path-relative? uri)
350  (not (uri-path-absolute? uri)))
351)
Note: See TracBrowser for help on using the repository browser.