source: project/release/4/uri-common/trunk/uri-common.scm @ 31010

Last change on this file since 31010 was 31010, checked in by sjamaan, 6 years ago

uri-common: Keep around the port but reset it in the generic object when it's the default for the supplied scheme (and when switching schemes)

File size: 15.1 KB
Line 
1;;
2;; URI-common provides URI handling procedures for common URI schemes
3;; that are based on the generic syntax such as http, https, file, ftp.
4;; It also provides automatic form-urlencoded query argument
5;; encoding/decoding
6;;
7; Copyright (c) 2008-2014, Peter Bex
8; All rights reserved.
9;
10; Redistribution and use in source and binary forms, with or without
11; modification, are permitted provided that the following conditions
12; are met:
13;
14; 1. Redistributions of source code must retain the above copyright
15;    notice, this list of conditions and the following disclaimer.
16; 2. Redistributions in binary form must reproduce the above copyright
17;    notice, this list of conditions and the following disclaimer in the
18;    documentation and/or other materials provided with the distribution.
19; 3. Neither the name of the author nor the names of its
20;    contributors may be used to endorse or promote products derived
21;    from this software without specific prior written permission.
22;
23; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26; FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27; COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
28; INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32; STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
34; OF THE POSSIBILITY OF SUCH DAMAGE.
35;
36; Please report bugs, suggestions and ideas to the Chicken Trac
37; ticket tracking system (assign tickets to user 'sjamaan'):
38; http://trac.callcc.org
39
40(provide 'uri-common)
41
42(module uri-common
43  (uri-reference uri-reference? absolute-uri absolute-uri? relative-ref?
44   uri->uri-generic uri-generic->uri uri->list
45   make-uri update-uri uri? uri-scheme uri-username uri-password
46   uri-host uri-port uri-path uri-query uri-fragment
47   uri->string form-urlencode form-urldecode form-urlencoded-separator
48   uri-relative-to uri-relative-from
49   uri-normalize-path-segments uri-normalize-case
50   uri-path-relative? uri-path-absolute?
51   uri-default-port? uri-encode-string uri-decode-string
52   
53   char-set:gen-delims char-set:sub-delims
54   char-set:uri-reserved char-set:uri-unreserved)
55
56(import chicken scheme)
57(require-extension srfi-1 srfi-13 srfi-14 extras data-structures
58                   defstruct matchable)
59(require-library uri-generic)
60(import (prefix uri-generic generic:))
61
62;; We could use the hostinfo egg for this, but that would be yet another
63;; dependency. Besides, not all service names have a matching URI scheme
64;; nor do all URI schemes have a matching service name.
65(define default-ports
66  '((http . 80)              ; RFC 2616
67    (https . 443)            ; RFC 2818
68    (shttp . 80)             ; RFC 2660
69    (ftp . 21)               ; RFC 959; no official URI scheme defined
70    ;; nonstandard, but could be useful
71    (svn+ssh . 22)
72    (svn . 3690)
73    ))
74
75;; A common URI is a generic URI plus stored decoded versions of most components
76(defstruct URI-common
77  generic username password host port path query fragment)
78
79(define-record-printer (URI-common x out)
80  (fprintf out "#<URI-common: scheme=~S port=~S host=~S path=~S query=~S fragment=~S>"
81           (uri-scheme x) (uri-port x) (uri-host x)
82           (uri-path x) (uri-query x) (uri-fragment x)))
83
84;;; Conversion procedures
85(define (uri->uri-generic uri)
86  (URI-common-generic uri))
87
88(define (uri-reference u)
89  (let ((u1 (generic:uri-reference u)))
90    (and u1 (uri-generic->uri u1))))
91
92(define (absolute-uri u)
93  (let ((u1 (generic:absolute-uri u)))
94    (and u1 (uri-generic->uri u1))))
95
96(define (uri-generic->uri uri)
97  (make-URI-common generic: uri
98                   username: (decode-string* (generic:uri-username uri))
99                   password: (decode-string* (generic:uri-password uri))
100                   host:     (decode-string* (generic:uri-host uri))
101                   port:     (generic:uri-port uri)
102                   path:     (decode-path (generic:uri-path uri))
103                   query:    (form-urldecode (generic:uri-query uri))
104                   fragment: (decode-string* (generic:uri-fragment uri))))
105
106(define (decode-string* s)
107  (and s (uri-decode-string s)))
108
109(define (uri->list uri . rest)
110  (let-optionals rest ((userinfomap (lambda (u pw) (string-append u ":******" ))))
111    (list (uri-scheme uri)
112          (list (uri-auth->list uri userinfomap) (uri-path uri) (uri-query uri))
113          (uri-fragment uri))))
114
115(define (uri-auth->list uri userinfomap)
116  (let ((username (uri-username uri))
117        (password (uri-password uri)))
118    (list (if (and username password) (userinfomap username password) #f)
119          (uri-host uri)
120          (uri-port uri))))
121
122;;; Accessors and predicates
123(define uri-reference? URI-common?)
124(define (uri? u)
125  (and (URI-common? u) (generic:uri? (URI-common-generic u))))
126(define (absolute-uri? u)
127  (and (URI-common? u) (generic:absolute-uri? (URI-common-generic u))))
128(define (relative-ref? u)
129  (and (URI-common? u) (generic:relative-ref? (URI-common-generic u))))
130(define uri-scheme   (compose generic:uri-scheme URI-common-generic))
131(define uri-username URI-common-username)
132(define uri-password URI-common-password)
133(define uri-host     URI-common-host)
134(define uri-query    URI-common-query)
135(define uri-fragment URI-common-fragment)
136(define (uri-default-port? uri)
137  (default-port? (uri-port uri) (uri-scheme uri)))
138
139;; Uri-path variant which performs Scheme-Based Normalization for well-known
140;; "common" schemes.  See per RFC 3986, section 6.2.3
141(define (uri-path uc)
142  (let ((path (URI-common-path uc)))
143    (if (and (not (relative-ref? uc)) ; For real URIs (not relative-refs),
144             (or (null? path)         ; an empty path equals a path of "/"
145                 (eq? path #f)))
146        '(/ "")
147        path)))
148
149(define (uri-port uc)
150  (let ((u (URI-common-generic uc)))
151    (or (generic:uri-port u)
152        (alist-ref (generic:uri-scheme u) default-ports))))
153
154;;; Constructor
155(define (make-uri . key/values)
156  (apply update-uri (make-URI-common generic: (generic:make-uri)) key/values))
157
158;;; Updaters
159(define update-uri
160  (let ((unset (list 'unset)))
161    (lambda (uc #!key
162                (scheme unset) (username unset) (password unset)
163                (host unset) (port unset)
164                (path unset) (query unset) (fragment unset))
165      (let* ((uc (update-URI-common uc)) ; new copy
166             (actual-scheme (if (eq? scheme unset)
167                                (generic:uri-scheme (URI-common-generic uc))
168                                scheme))
169             (path (if (and actual-scheme (or (eq? path #f) (eq? path '())))
170                       '(/ "") ; normalize path
171                       path))
172             (actual-port (if (eq? port unset)
173                              (URI-common-port uc)
174                              port)))
175        (unless (and (eq? port unset) (eq? scheme unset))
176          ;; Clear port if it's the default for this scheme
177          (URI-common-generic-set!
178           uc (generic:update-uri
179               (URI-common-generic uc)
180               port: (if (default-port? actual-port actual-scheme)
181                         #f
182                         actual-port))))
183        ;; This code is ugly!
184        (unless (eq? scheme unset)
185          (URI-common-generic-set!
186           uc (generic:update-uri (URI-common-generic uc) scheme: scheme)))
187        (unless (eq? username unset)
188          (URI-common-generic-set!
189           uc (generic:update-uri (URI-common-generic uc)
190                                  username: (encode-string* username)))
191          (URI-common-username-set! uc username))
192        (unless (eq? password unset)
193          (URI-common-generic-set!
194           uc (generic:update-uri (URI-common-generic uc)
195                                  password: (encode-string* password)))
196          (URI-common-password-set! uc password))
197        (unless (eq? host unset)
198          (URI-common-generic-set!
199           uc (generic:update-uri (URI-common-generic uc)
200                                  host: (encode-string* host)))
201          (URI-common-host-set! uc host))
202        (unless (eq? port unset)
203          ;; Generic port set above - it depends on the scheme too
204          (URI-common-port-set! uc port))
205        (unless (eq? path unset)
206          (URI-common-generic-set!
207           uc (generic:update-uri (URI-common-generic uc)
208                                  path: (encode-path path)))
209          (URI-common-path-set! uc path))
210        (unless (eq? query unset)
211          (URI-common-generic-set!
212           uc (generic:update-uri (URI-common-generic uc)
213                                  query: (form-urlencode query)))
214          (URI-common-query-set! uc query))
215        (unless (eq? fragment unset)
216          (URI-common-generic-set!
217           uc (generic:update-uri (URI-common-generic uc)
218                                  fragment: (encode-string*
219                                             fragment)))
220          (URI-common-fragment-set! uc fragment))
221        uc))))
222
223(define uri-encode-string generic:uri-encode-string)
224(define uri-decode-string generic:uri-decode-string)
225(define char-set:gen-delims generic:char-set:gen-delims)
226(define char-set:sub-delims generic:char-set:sub-delims)
227(define char-set:uri-reserved generic:char-set:uri-reserved)
228(define char-set:uri-unreserved generic:char-set:uri-unreserved)
229
230(define (encode-string* s . rest)
231  (and s (apply uri-encode-string s rest)))
232
233(define (default-port? port scheme)
234  (eqv? port (alist-ref scheme default-ports)))
235
236(define (encode-path p)
237  (and p (match p
238                (('/ . rst) (cons '/ (map uri-encode-string rst)))
239                (else (map uri-encode-string p)))))
240
241;;; Handling of application/x-www-form-urlencoded data
242;;
243;; This implements both HTML 4's specification
244;; (http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.1)
245;; and XHTML XForms' specification
246;; (http://www.w3.org/TR/xforms/#serialize-urlencode)
247;;
248;; The latter is a more generalised form of the former, as it allows
249;; the user to specify a custom separator character.  The HTML 4
250;; spec also contains a recommendation
251;; (http://www.w3.org/TR/html401/appendix/notes.html#h-B.2.2)
252;; that semicolons should be used instead of ampersands as a separator.
253;; However, it provides no mechanism to select the separator to use
254;; when submitting a form, which makes it a pretty useless recommendation.
255;; This recommendation also complicates matters on the server because one
256;; would need to handle both form-generated GET query parameters and
257;; hardcoded GET query parameters as specified in anchors.
258;;
259;; There's also a 2006 Internet-Draft by Bjoern Hoehrmann that was
260;; intended to standardize this, but it was allowed to expire in 2007:
261;; http://ietfreport.isoc.org/idref/draft-hoehrmann-urlencoded
262;; It was different in a few ways from the x-www-form-urlencoded type.
263;; For example, www-form-urlencoded only pct-encoded the chars that
264;; are not allowed in a query string, whereas x-www-form-urlencoded
265;; pct-encodes *all* reserved chars, regardless of whether it is
266;; necessary. There are servers which do not accept input that isn't
267;; fully pct-encoded so we use strictly x-www-form-urlencoded.
268
269(define form-urlencoded-separator (make-parameter ";&"))
270
271(define (form-urlencode alist #!key (separator (form-urlencoded-separator)))
272  (and alist (not (null? alist))
273      (let* ((separator-chars (->char-set separator))
274             (separator-string (string-take
275                                ;; Can't use separator-chars here because
276                                ;; charsets have no ordering and conversion
277                                ;; to string and back reorders the chars
278                                 (if (string? separator)
279                                     separator
280                                     (char-set->string separator)) 1))
281              (enc (lambda (s)
282                     (string-translate*
283                      (uri-encode-string
284                       s
285                       (char-set-union
286                        separator-chars
287                        (char-set #\= #\+)
288                        (char-set-delete
289                         (char-set-complement char-set:uri-unreserved)
290                         #\space)))
291                       '((" " . "+")))))
292              (encoded-components
293               (reverse (fold
294                         (lambda (arg query)
295                           (match arg
296                             ((a . #f) query)
297                             ((a . #t) (cons (enc (->string a)) query))
298                             ((a . b) (cons
299                                       (sprintf "~A=~A"
300                                                (enc (->string a))
301                                                (enc (->string b)))
302                                       query))))
303                         '() alist))))
304         (and (not (null? encoded-components))
305              (string-join encoded-components separator-string)))))
306
307(define (form-urldecode query #!key (separator (form-urlencoded-separator)))
308  (if query
309      (map (lambda (part)
310             (let ((idx (string-index part #\=))
311                   (decode (lambda (s)
312                             (uri-decode-string
313                              (string-translate* s '(("+" . "%20")))))))
314               (if idx
315                   (cons (string->symbol (decode (string-take part idx)))
316                         (decode (string-drop part (add1 idx))))
317                   (cons (string->symbol (decode part))
318                         #t))))
319           (string-split query (char-set->string (->char-set separator)) #t))
320      '())) ; _always_ provide a list interface for the query, even if not there
321
322(define (decode-path p)
323  (and p (match p
324                (('/ . rst) (cons '/ (map uri-decode-string rst)))
325                (else (map uri-decode-string p)))))
326
327;;; Miscellaneous procedures
328
329;; Simple convenience procedures
330(define (uri->string uri . args)
331  (apply generic:uri->string (URI-common-generic uri) args))
332
333(define (wrap proc)
334  (lambda args
335    (uri-generic->uri (apply proc (map URI-common-generic args)))))
336
337;; TODO: What about normalization issues here? Right now uri-relative-from
338;; gives a nonempty reference when uri1 has path=() and uri2 has path=(/ "")
339;; This could be considered a bug.  Same for uri->string and with port-nrs
340;; However, URIs with paths updated by this egg do not have that problem.
341(define uri-relative-to             (wrap generic:uri-relative-to))
342(define uri-relative-from           (wrap generic:uri-relative-from))
343(define uri-normalize-case          (wrap generic:uri-normalize-case))
344(define uri-normalize-path-segments (wrap generic:uri-normalize-path-segments))
345
346;; Copied from uri-generic, because we need to use our modified uri-path
347;; procedure.  Alternatively, we could update the path with our own
348;; path, but that's even sillier.
349(define (uri-path-absolute? uri)
350  (let ((path (uri-path uri)))
351    (and (pair? path) (eq? '/ (car path)))))
352
353(define (uri-path-relative? uri)
354  (not (uri-path-absolute? uri)))
355)
Note: See TracBrowser for help on using the repository browser.