source: project/release/4/uri-common/tags/0.71/uri-common.scm @ 15455

Last change on this file since 15455 was 15455, checked in by Ivan Raikov, 11 years ago

release 0.71 of uri-common

File size: 14.0 KB
Line 
1;;
2;; URI-common provides URI handling procedures for common URI schemes
3;; that are based on the generic syntax such as http, https, file, ftp.
4;; It also provides automatic form-urlencoded query argument
5;; encoding/decoding
6;;
7; Copyright (c) 2008-2009, Peter Bex
8; All rights reserved.
9;
10; Redistribution and use in source and binary forms, with or without
11; modification, are permitted provided that the following conditions
12; are met:
13;
14; 1. Redistributions of source code must retain the above copyright
15;    notice, this list of conditions and the following disclaimer.
16; 2. Redistributions in binary form must reproduce the above copyright
17;    notice, this list of conditions and the following disclaimer in the
18;    documentation and/or other materials provided with the distribution.
19; 3. Neither the name of the author nor the names of its
20;    contributors may be used to endorse or promote products derived
21;    from this software without specific prior written permission.
22;
23; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26; FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27; COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
28; INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32; STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
34; OF THE POSSIBILITY OF SUCH DAMAGE.
35;
36; Please report bugs, suggestions and ideas to the Chicken Trac
37; ticket tracking system (assign tickets to user 'sjamaan'):
38; http://trac.callcc.org
39
40(provide 'uri-common)
41
42(module uri-common
43  (uri-reference uri-reference? absolute-uri absolute-uri? relative-ref?
44   uri->uri-generic uri-generic->uri uri->list
45   update-uri uri? uri-scheme uri-username uri-password
46   uri-host uri-port uri-path uri-query uri-fragment
47   uri->string form-urlencode form-urldecode
48   uri-relative-to uri-relative-from
49   uri-normalize-path-segments uri-normalize-case
50   uri-path-relative? uri-path-absolute?
51   char-set:query/fragment uri-default-port?)
52
53(import chicken scheme)
54(require-extension srfi-1 srfi-13 srfi-14 extras data-structures
55                   defstruct matchable)
56(require-library uri-generic)
57(import (prefix uri-generic generic:))
58
59;; We could use the hostinfo egg for this, but that would be yet another
60;; dependency. Besides, not all service names have a matching URI scheme
61;; nor do all URI schemes have a matching service name.
62(define default-ports
63  '((http . 80)              ; RFC 2616
64    (https . 443)            ; RFC 2818
65    (shttp . 80)             ; RFC 2660
66    (ftp . 21)               ; RFC 959; no official URI scheme defined
67    ;; nonstandard, but could be useful
68    (svn+ssh . 22)
69    (svn . 3690)
70    ))
71
72;; A common URI is a generic URI plus stored decoded versions of most components
73(defstruct URI-common
74  generic username password host path query fragment)
75
76(define-record-printer (URI-common x out)
77  (fprintf out "#<URI-common: scheme=~S port=~S host=~S path=~S query=~S fragment=~S>"
78           (generic:uri-scheme (URI-common-generic x))
79           (generic:uri-port (URI-common-generic x))
80           (URI-common-host x)
81           (URI-common-path x)
82           (URI-common-query x)
83           (URI-common-fragment x)))
84
85;;; Conversion procedures
86(define (uri->uri-generic uri)
87  (URI-common-generic uri))
88
89(define (uri-reference u)
90  (let ((u1 (generic:uri-reference u)))
91    (and u1 (uri-generic->uri u1))))
92
93(define (absolute-uri u)
94  (let ((u1 (generic:absolute-uri u)))
95    (and u1 (uri-generic->uri u1))))
96
97(define (uri-generic->uri uri)
98  (make-URI-common generic: uri
99                   username: (decode-string* (generic:uri-username uri))
100                   password: (decode-string* (generic:uri-password uri))
101                   host:     (decode-string* (generic:uri-host uri))
102                   path:     (decode-path (generic:uri-path uri))
103                   query:    (form-urldecode (generic:uri-query uri))
104                   fragment: (decode-string* (generic:uri-fragment uri))))
105
106(define (decode-string* s)
107  (and s (generic:uri-decode-string s)))
108
109(define (uri->list uri . rest)
110  (let-optionals rest ((userinfomap (lambda (u pw) (string-append u ":******" ))))
111    (list (uri-scheme uri)
112          (list (uri-auth->list uri userinfomap) (uri-path uri) (uri-query uri))
113          (uri-fragment uri))))
114
115(define (uri-auth->list uri userinfomap)
116  (let ((username (uri-username uri))
117        (password (uri-password uri)))
118    (list (if (and username password) (userinfomap username password) #f)
119          (uri-host uri)
120          (uri-port uri))))
121
122;;; Accessors and predicates
123(define uri-reference? URI-common?)
124(define (uri? u)
125  (and (URI-common? u) (generic:uri? (URI-common-generic u))))
126(define (absolute-uri? u)
127  (and (URI-common? u) (generic:absolute-uri? (URI-common-generic u))))
128(define (relative-ref? u)
129  (and (URI-common? u) (generic:relative-ref? (URI-common-generic u))))
130(define uri-scheme   (compose generic:uri-scheme URI-common-generic))
131(define uri-username URI-common-username)
132(define uri-password URI-common-password)
133(define uri-host     URI-common-host)
134(define uri-query    URI-common-query)
135(define uri-fragment URI-common-fragment)
136(define (uri-default-port? uri)
137  (default-port? (uri-port uri) (uri-scheme uri)))
138
139(define (uri-path uc)
140  (let ((path (URI-common-path uc)))
141    ;; XXX change (not (relative-ref? uc)) to a real-URI? (bad name) predicate
142    (if (and (not (relative-ref? uc)) ; For real URIs (not relative-refs),
143             (or (null? path)         ; an empty path equals a path of "/"
144                 (eq? path #f)))      ; as per RFC 3986, section 6.2.3
145        '(/ "")
146        path)))
147
148(define (uri-port uc)
149  (let ((u (URI-common-generic uc)))
150    (or (generic:uri-port u)
151        (alist-ref (generic:uri-scheme u) default-ports))))
152
153;;; Updaters
154(define update-uri
155  (let ((unset (list 'unset)))
156    (lambda (uc #!key
157                (scheme unset) (username unset) (password unset)
158                (host unset) (port unset)
159                (path unset) (query unset) (fragment unset))
160      (let* ((uc (update-URI-common uc)) ; new copy
161             (actual-scheme (if (eq? scheme unset)
162                                (generic:uri-scheme (URI-common-generic uc))
163                                scheme))
164             (path (if (and actual-scheme (or (eq? path #f) (eq? path '())))
165                       '(/ "") ; normalize path
166                       path))
167             ;; XXX is this really the desired behaviour?
168             ;; maybe simpler is better: do not reset to #f on default port?
169             (port (if (or
170                        (and (not (eq? scheme unset)) ; scheme specified...
171                             (eq? port unset)) ; ...and no explicit port?
172                        (default-port? port actual-scheme)) ; or default port?
173                       #f               ; then clear port
174                       port)))
175        ;; This code is ugly!
176        (unless (eq? scheme unset)
177          (URI-common-generic-set!
178           uc (generic:update-uri (URI-common-generic uc) scheme: scheme)))
179        (unless (eq? username unset)
180          (URI-common-generic-set!
181           uc (generic:update-uri (URI-common-generic uc)
182                                  username: (encode-string* username)))
183          (URI-common-username-set! uc username))
184        (unless (eq? password unset)
185          (URI-common-generic-set!
186           uc (generic:update-uri (URI-common-generic uc)
187                                  password: (encode-string* password)))
188          (URI-common-password-set! uc password))
189        (unless (eq? host unset)
190          (URI-common-generic-set!
191           uc (generic:update-uri (URI-common-generic uc)
192                                  host: (encode-string* host)))
193          (URI-common-host-set! uc host))
194        (unless (eq? port unset)
195          (URI-common-generic-set!
196           uc (generic:update-uri (URI-common-generic uc) port: port)))
197        (unless (eq? path unset)
198          (URI-common-generic-set!
199           uc (generic:update-uri (URI-common-generic uc)
200                                  path: (encode-path path)))
201          (URI-common-path-set! uc path))
202        (unless (eq? query unset)
203          (URI-common-generic-set!
204           uc (generic:update-uri (URI-common-generic uc)
205                                  query: (form-urlencode query)))
206          (URI-common-query-set! uc query))
207        (unless (eq? fragment unset)
208          (URI-common-generic-set!
209           uc (generic:update-uri (URI-common-generic uc)
210                                  fragment: (encode-string*
211                                             fragment
212                                             char-set:query/fragment)))
213          (URI-common-fragment-set! uc fragment))
214        uc))))
215
216(define (encode-string* s . rest)
217  (and s (apply generic:uri-encode-string s rest)))
218
219(define (default-port? port scheme)
220  (eqv? port (alist-ref scheme default-ports)))
221
222(define (encode-path p)
223  (and p (match p
224                (('/ . rst) (cons '/ (map generic:uri-encode-string rst)))
225                (else (map generic:uri-encode-string p)))))
226
227;; Characters allowed in queries and fragments
228(define char-set:query/fragment
229  (char-set-difference
230   (char-set-complement generic:char-set:uri-unreserved)
231   (string->char-set ":@?/")
232   generic:char-set:sub-delims))
233
234;;; Handling of application/x-www-form-urlencoded data
235;;
236;; This implements both HTML 4's specification
237;; (http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.1)
238;; and XHTML XForms' specification
239;; (http://www.w3.org/TR/xforms/#structure-model-submission)
240;;
241;; The latter is a more generalised form of the former, as it allows
242;; the user to specify a custom separator character.  The HTML 4
243;; spec also contains a recommendation
244;; (http://www.w3.org/TR/html401/appendix/notes.html#h-B.2.2)
245;; that semicolons should be used instead of ampersands as a separator.
246;; However, it provides no mechanism to select the separator to use
247;; when submitting a form, which makes it a pretty useless recommendation.
248;; This recommendation also complicates matters on the server because one
249;; would need to handle both form-generated GET query parameters and
250;; hardcoded GET query parameters as specified in anchors.
251;;
252;; There's also a 2006 Internet-Draft by Bjoern Hoehrmann that was
253;; intended to standardize this, but it was allowed to expire in 2007:
254;; http://ietfreport.isoc.org/idref/draft-hoehrmann-urlencoded
255
256(define form-urlencoded-separator (make-parameter ";&"))
257
258(define (form-urlencode alist #!key (separator (form-urlencoded-separator)))
259  (and alist (not (null? alist))
260       (let* ((separator-chars (->char-set separator))
261              (join-string (string-take
262                            (if (string? separator-chars)
263                                separator-chars
264                                (char-set->string separator-chars)) 1))
265              (enc (lambda (s)
266                     (string-translate*
267                      (generic:uri-encode-string
268                       s
269                       (char-set-union separator-chars
270                                       (char-set #\= #\+)
271                                       (char-set-delete char-set:query/fragment
272                                                        #\space)))
273                      '((" " . "+"))))))
274         (string-join
275          (reverse (fold
276                    (lambda (arg query)
277                      (match arg
278                             ((a . #f) query)
279                             ((a . #t) (cons (enc (->string a)) query))
280                             ((a . b) (cons
281                                       (sprintf "~A=~A"
282                                                (enc (->string a))
283                                                (enc b))
284                                       query))))
285                    '() alist))
286          join-string))))
287
288(define (form-urldecode query #!key (separator (form-urlencoded-separator)))
289  (if query
290      (map (lambda (part)
291             (let ((idx (string-index part #\=))
292                   (decode (lambda (s)
293                             (generic:uri-decode-string
294                              (string-translate* s '(("+" . "%20")))))))
295               (if idx
296                   (cons (string->symbol (decode (string-take part idx)))
297                         (decode (string-drop part (add1 idx))))
298                   (cons (string->symbol (decode part))
299                         #t))))
300           (string-split query (char-set->string (->char-set separator)) #t))
301      '())) ; _always_ provide a list interface for the query, even if not there
302
303(define (decode-path p)
304  (and p (match p
305                (('/ . rst) (cons '/ (map generic:uri-decode-string rst)))
306                (else (map generic:uri-decode-string p)))))
307
308;;; Miscellaneous procedures
309
310;; Simple convenience procedures
311(define (uri->string uri . args)
312  (apply generic:uri->string (URI-common-generic uri) args))
313
314(define (wrap proc)
315  (lambda args
316    (uri-generic->uri (apply proc (map URI-common-generic args)))))
317
318;; TODO: What about normalization issues here? Right now uri-relative-from
319;; gives a nonempty reference when uri1 has path=() and uri2 has path=(/ "")
320;; This could be considered a bug.  Same for uri->string and with port-nrs
321;; However, URIs with paths updated by this egg do not have that problem.
322(define uri-relative-to             (wrap generic:uri-relative-to))
323(define uri-relative-from           (wrap generic:uri-relative-from))
324(define uri-normalize-case          (wrap generic:uri-normalize-case))
325(define uri-normalize-path-segments (wrap generic:uri-normalize-path-segments))
326(define (uri-path-absolute? uri)
327  (generic:uri-path-absolute? (URI-common-generic uri)))
328(define (uri-path-relative? uri)
329  (generic:uri-path-relative? (URI-common-generic uri)))
330
331)
Note: See TracBrowser for help on using the repository browser.