source: project/release/4/uri-common/trunk/uri-common.scm @ 13216

Last change on this file since 13216 was 13216, checked in by sjamaan, 11 years ago

Add new uri-generic predicates to uri-common too

File size: 12.9 KB
Line 
1;;
2;; URI-common provides URI handling procedures for common URI schemes
3;; that are based on the generic syntax such as http, https, file, ftp.
4;; It also provides automatic form-urlencoded query argument
5;; encoding/decoding
6;;
7; Copyright (c) 2008-2009, Peter Bex
8; All rights reserved.
9;
10; Redistribution and use in source and binary forms, with or without
11; modification, are permitted provided that the following conditions
12; are met:
13;
14; 1. Redistributions of source code must retain the above copyright
15;    notice, this list of conditions and the following disclaimer.
16; 2. Redistributions in binary form must reproduce the above copyright
17;    notice, this list of conditions and the following disclaimer in the
18;    documentation and/or other materials provided with the distribution.
19; 3. Neither the name of the author nor the names of its
20;    contributors may be used to endorse or promote products derived
21;    from this software without specific prior written permission.
22;
23; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26; FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27; COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
28; INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32; STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
34; OF THE POSSIBILITY OF SUCH DAMAGE.
35;
36; Please report bugs, suggestions and ideas to the Chicken Trac
37; ticket tracking system (assign tickets to user 'sjamaan'):
38; http://trac.callcc.org
39
40(provide 'uri-common)
41
42(module uri-common
43  (uri-reference absolute-uri absolute-uri? relative-ref?
44   uri->uri-generic uri-generic->uri
45   update-uri uri? uri-scheme uri-username uri-password
46   uri-host uri-port uri-path uri-query uri-fragment
47   uri->string form-urlencode form-urldecode
48   uri-relative-to uri-relative-from
49   uri-normalize-path-segments uri-normalize-case
50   char-set:query/fragment)
51
52(import chicken scheme)
53(require-extension srfi-1 srfi-13 srfi-14 extras data-structures
54                   defstruct matchable)
55(require-library uri-generic)
56(import (prefix uri-generic generic:))
57
58;; We could use the hostinfo egg for this, but that would be yet another
59;; dependency. Besides, not all service names have a matching URI scheme
60;; nor do all URI schemes have a matching service name.
61(define default-ports
62  '((http . 80)              ; RFC 2616
63    (https . 443)            ; RFC 2818
64    (shttp . 80)             ; RFC 2660
65    (ftp . 21)               ; RFC 959; no official URI scheme defined
66    ;; nonstandard, but could be useful
67    (svn+ssh . 22)
68    (svn . 3690)
69    ))
70
71;; A common URI is a generic URI plus stored decoded versions of most components
72(defstruct URI-common
73  generic username password host path query fragment)
74
75(define-record-printer (URI-common x out)
76  (fprintf out "#(URI-common scheme=~S port=~S host=~S path=~S query=~S fragment=~S"
77           (generic:uri-scheme (URI-common-generic x))
78           (generic:uri-port (URI-common-generic x))
79           (URI-common-host x)
80           (URI-common-path x)
81           (URI-common-query x)
82           (URI-common-fragment x)))
83
84;;; Conversion procedures
85(define (uri->uri-generic uri)
86  (URI-common-generic uri))
87
88(define (uri-reference u)
89  (uri-generic->uri (generic:uri-reference u)))
90
91(define (absolute-uri u)
92  (uri-generic->uri (generic:absolute-uri u)))
93
94(define (uri-generic->uri uri)
95  (make-URI-common generic: uri
96                   username: (decode-string* (generic:uri-username uri))
97                   password: (decode-string* (generic:uri-password uri))
98                   host:     (decode-string* (generic:uri-host uri))
99                   path:     (decode-path (generic:uri-path uri))
100                   query:    (form-urldecode (generic:uri-query uri))
101                   fragment: (decode-string* (generic:uri-fragment uri))))
102
103(define (decode-string* s)
104  (and s (generic:uri-decode-string s)))
105
106;;; Accessors and predicates
107(define uri?         URI-common?)
108(define (absolute-uri? u)
109  (and (URI-common? u) (generic:absolute-uri? (URI-common-generic u))))
110(define (relative-ref? u)
111  (and (URI-common? u) (generic:relative-ref? (URI-common-generic u))))
112(define uri-scheme   (compose generic:uri-scheme URI-common-generic))
113(define uri-username URI-common-username)
114(define uri-password URI-common-password)
115(define uri-host     URI-common-host)
116(define uri-query    URI-common-query)
117(define uri-fragment URI-common-fragment)
118
119(define (uri-path uc)
120  (let ((path (URI-common-path uc)))
121    ;; does not apply for relative references
122    ;; XXX: Make a "relative-ref?" predicate (or something like that)
123    (if (and (uri-host uc)
124             (or (eq? path '()) (eq? path #f)))
125        '(/ "")
126        path)))
127
128(define (uri-port uc)
129  (let ((u (URI-common-generic uc)))
130    (or (generic:uri-port u)
131        (alist-ref (generic:uri-scheme u) default-ports))))
132
133;;; Updaters
134(define update-uri
135  (let ((unset (list 'unset)))
136    (lambda (uc #!key
137                (scheme unset) (username unset) (password unset)
138                (host unset) (port unset)
139                (path unset) (query unset) (fragment unset))
140      (let* ((uc (update-URI-common uc)) ; new copy
141             (actual-scheme (if (eq? scheme unset)
142                                (generic:uri-scheme (URI-common-generic uc))
143                                scheme))
144             (path (if (and actual-scheme (or (eq? path #f) (eq? path '())))
145                       '(/ "") ; normalize path
146                       path))
147             ;; XXX is this really the desired behaviour?
148             ;; maybe simpler is better: do not reset to #f on default port?
149             (port (if (or
150                        (and (not (eq? scheme unset)) ; scheme specified...
151                             (eq? port unset)) ; ...and no explicit port?
152                        (default-port? port actual-scheme)) ; or default port?
153                       #f               ; then clear port
154                       port)))
155        ;; This code is ugly!
156        (unless (eq? scheme unset)
157          (URI-common-generic-set!
158           uc (generic:update-uri (URI-common-generic uc) scheme: scheme)))
159        (unless (eq? username unset)
160          (URI-common-generic-set!
161           uc (generic:update-uri (URI-common-generic uc)
162                                  username: (encode-string* username)))
163          (URI-common-username-set! uc username))
164        (unless (eq? password unset)
165          (URI-common-generic-set!
166           uc (generic:update-uri (URI-common-generic uc)
167                                  password: (encode-string* password)))
168          (URI-common-password-set! uc password))
169        (unless (eq? host unset)
170          (URI-common-generic-set!
171           uc (generic:update-uri (URI-common-generic uc)
172                                  host: (encode-string* host)))
173          (URI-common-host-set! uc host))
174        (unless (eq? port unset)
175          (URI-common-generic-set!
176           uc (generic:update-uri (URI-common-generic uc) port: port)))
177        (unless (eq? path unset)
178          (URI-common-generic-set!
179           uc (generic:update-uri (URI-common-generic uc)
180                                  path: (encode-path path)))
181          (URI-common-path-set! uc path))
182        (unless (eq? query unset)
183          (URI-common-generic-set!
184           uc (generic:update-uri (URI-common-generic uc)
185                                  query: (form-urlencode query)))
186          (URI-common-query-set! uc query))
187        (unless (eq? fragment unset)
188          (URI-common-generic-set!
189           uc (generic:update-uri (URI-common-generic uc)
190                                  fragment: (encode-string*
191                                             fragment
192                                             char-set:query/fragment)))
193          (URI-common-fragment-set! uc fragment))
194        uc))))
195
196(define (encode-string* s . rest)
197  (and s (apply generic:uri-encode-string s rest)))
198
199(define (default-port? port scheme)
200  (eqv? port (alist-ref scheme default-ports)))
201
202(define (encode-path p)
203  (and p (match p
204                (('/ . rst) (cons '/ (map generic:uri-encode-string rst)))
205                (else (map generic:uri-encode-string p)))))
206
207;; Characters allowed in queries and fragments
208(define char-set:query/fragment
209  (char-set-difference
210   (char-set-complement generic:char-set:uri-unreserved)
211   (string->char-set ":@?/")
212   generic:char-set:sub-delims))
213
214;;; Handling of application/x-www-form-urlencoded data
215;;
216;; This implements both HTML 4's specification
217;; (http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.1)
218;; and XHTML XForms' specification
219;; (http://www.w3.org/TR/xforms/#structure-model-submission)
220;;
221;; The latter is a more generalised form of the former, as it allows
222;; the user to specify a custom separator character.  The HTML 4
223;; spec also contains a recommendation
224;; (http://www.w3.org/TR/html401/appendix/notes.html#h-B.2.2)
225;; that semicolons should be used instead of ampersands as a separator.
226;; However, it provides no mechanism to select the separator to use
227;; when submitting a form, which makes it a pretty useless recommendation.
228;; This recommendation also complicates matters on the server because one
229;; would need to handle both form-generated GET query parameters and
230;; hardcoded GET query parameters as specified in anchors.
231;;
232;; There's also a 2006 Internet-Draft by Bjoern Hoehrmann that was
233;; intended to standardize this, but it was allowed to expire in 2007:
234;; http://ietfreport.isoc.org/idref/draft-hoehrmann-urlencoded
235
236(define form-urlencoded-separator (make-parameter ";&"))
237
238(define (form-urlencode alist #!key (separator (form-urlencoded-separator)))
239  (and alist (not (null? alist))
240       (let* ((separator-chars (->char-set separator))
241              (join-string (string-take
242                            (if (string? separator-chars)
243                                separator-chars
244                                (char-set->string separator-chars)) 1))
245              (enc (lambda (s)
246                     (string-translate*
247                      (generic:uri-encode-string
248                       s
249                       (char-set-union separator-chars
250                                       (char-set #\= #\+)
251                                       (char-set-delete char-set:query/fragment
252                                                        #\space)))
253                      '((" " . "+"))))))
254         (string-join
255          (reverse (fold
256                    (lambda (arg query)
257                      (match arg
258                             ((a . #f) query)
259                             ((a . #t) (cons (enc (->string a)) query))
260                             ((a . b) (cons
261                                       (sprintf "~A=~A"
262                                                (enc (->string a))
263                                                (enc b))
264                                       query))))
265                    '() alist))
266          join-string))))
267
268(define (form-urldecode query #!key (separator (form-urlencoded-separator)))
269  (if query
270      (map (lambda (part)
271             (let ((idx (string-index part #\=))
272                   (decode (lambda (s)
273                             (generic:uri-decode-string
274                              (string-translate* s '(("+" . "%20")))))))
275               (if idx
276                   (cons (string->symbol (decode (string-take part idx)))
277                         (decode (string-drop part (add1 idx))))
278                   (cons (string->symbol (decode part))
279                         #t))))
280           (string-split query (char-set->string (->char-set separator)) #t))
281      '())) ; _always_ provide a list interface for the query, even if not there
282
283(define (decode-path p)
284  (and p (match p
285                (('/ . rst) (cons '/ (map generic:uri-decode-string rst)))
286                (else (map generic:uri-decode-string p)))))
287
288;;; Miscellaneous procedures
289
290;; Simple convenience procedures
291(define (uri->string uri . args)
292  (apply generic:uri->string (URI-common-generic uri) args))
293
294(define (wrap proc)
295  (lambda args
296    (uri-generic->uri (apply proc (map URI-common-generic args)))))
297
298;; TODO: What about normalization issues here? Right now uri-relative-from
299;; gives a nonempty reference when uri1 has path=() and uri2 has path=(/ "")
300;; This could be considered a bug.  Same for uri->string and with port-nrs
301;; However, URIs with paths updated by this egg do not have that problem.
302(define uri-relative-to             (wrap generic:uri-relative-to))
303(define uri-relative-from           (wrap generic:uri-relative-from))
304(define uri-normalize-case          (wrap generic:uri-normalize-case))
305(define uri-normalize-path-segments (wrap generic:uri-normalize-path-segments))
306
307)
Note: See TracBrowser for help on using the repository browser.