1 | ;; Change the accuracy of a number to 'digits' number of digits to the |
---|
2 | ;; right of the decimal point. |
---|
3 | (define (chop-number num digits) |
---|
4 | (let ((factor (expt 10 digits))) |
---|
5 | (/ (round (* num factor)) factor))) |
---|
6 | |
---|
7 | (define (quality-parser str) |
---|
8 | ;; Anything that's not a number is seen as if the value is missing, hence 1.0 |
---|
9 | (let* ((num (or (string->number str) 1.0)) |
---|
10 | (imprecise (chop-number num 3))) |
---|
11 | (max 0.0 (min 1.0 imprecise)))) |
---|
12 | |
---|
13 | ;; TODO: Make this use SRFI-19 |
---|
14 | (define (rfc822-time-parser str) |
---|
15 | 0) |
---|
16 | |
---|
17 | ;; Get the raw contents of a header |
---|
18 | (define (header-contents name headers #!optional (default #f)) |
---|
19 | (alist-ref name (headers-v headers) eq? default)) |
---|
20 | |
---|
21 | ;; Get all values of a header |
---|
22 | (define (header-values header-name headers) |
---|
23 | (map (cut vector-ref <> 0) (header-contents header-name headers '()))) |
---|
24 | |
---|
25 | ;; Get the value of a header which is an alist |
---|
26 | ;; Example: header = ((foo . bar) (qux . mooh)), header-list-ref foo |
---|
27 | ;; will return bar. (the header itself is encoded in a param+value |
---|
28 | (define (header-list-ref value headers #!optional (equal? eq?)) |
---|
29 | (find (lambda (h) (equal? (car (get-value h)) value)) headers)) |
---|
30 | |
---|
31 | ;; Get the value from one header entry |
---|
32 | (define get-value (cut vector-ref <> 0)) |
---|
33 | |
---|
34 | ;; Get all params from one header entry |
---|
35 | (define get-params (cut vector-ref <> 1)) |
---|
36 | |
---|
37 | ;; Get one specific parameter from one header entry |
---|
38 | (define (get-param param contents) |
---|
39 | (alist-ref param (vector-ref contents 1) eq?)) |
---|
40 | |
---|
41 | ;; Get-param, except if no quality is present return 1 |
---|
42 | (define (get-quality header-contents) |
---|
43 | (or (get-param 'q header-contents) 1.0)) |
---|
44 | |
---|
45 | ;;;; Header parsers |
---|
46 | |
---|
47 | ;; Find a matching endpoint for a token, ignoring escaped copies of the token |
---|
48 | (define (escaped-string-end str start stop-char-set) |
---|
49 | (let ((len (string-length str))) |
---|
50 | (let loop ((start start)) |
---|
51 | (let ((pos (string-index str (char-set-adjoin stop-char-set #\\) start))) |
---|
52 | (if pos |
---|
53 | (if (char=? #\\ (string-ref str pos)) |
---|
54 | ;; Escaped matching closing symbol |
---|
55 | (if (= len (add1 pos)) |
---|
56 | pos |
---|
57 | (loop (+ pos 2))) |
---|
58 | ;; Reached the matching closing symbol |
---|
59 | pos) |
---|
60 | len))))) ; No matching closing symbol? "Insert" it at the end |
---|
61 | |
---|
62 | ;; RFC 2616 4.2 says multi-headers are a comma-separated list of tokens |
---|
63 | (define (split-multi-header value) |
---|
64 | (let ((len (string-length value))) |
---|
65 | (let loop ((result '()) |
---|
66 | (start-pos 0) ; Where the current header value starts |
---|
67 | (search-pos 0)) ; Where the searching starts |
---|
68 | (or (and-let* (((< search-pos len)) |
---|
69 | (pos (string-index value (char-set #\, #\") search-pos))) |
---|
70 | (if (char=? #\, (string-ref value pos)) |
---|
71 | (loop (cons (string-copy value start-pos pos) result) (add1 pos) (add1 pos)) |
---|
72 | (let ((end-pos (escaped-string-end value (add1 pos) (char-set #\")))) |
---|
73 | (loop result start-pos (add1 end-pos))))) |
---|
74 | (reverse (cons (string-drop value start-pos) result)))))) |
---|
75 | |
---|
76 | ;; Remove all escape characters from the input, recognising "escaped escapes" |
---|
77 | (define (unescape str) |
---|
78 | (let ((last-char (sub1 (string-length str)))) |
---|
79 | (let loop ((result "") |
---|
80 | (start-pos 0)) |
---|
81 | (or (and-let* ((pos (string-index str #\\ start-pos))) |
---|
82 | (if (= pos last-char) |
---|
83 | (string-append result (string-copy str start-pos)) |
---|
84 | (loop (string-append result (string-copy str start-pos pos) |
---|
85 | (string-copy str (add1 pos) (+ pos 2))) |
---|
86 | (+ pos 2)))) |
---|
87 | (string-append result (string-copy str start-pos)))))) |
---|
88 | |
---|
89 | ;; Try to parse a token, starting at the provided offset, up until the |
---|
90 | ;; char-set where we should stop. Returns two values: the token or #f if |
---|
91 | ;; there is no token left, and the position on which the token ends. |
---|
92 | (define (parse-token value start-pos stop-char-set) |
---|
93 | (if (>= start-pos (string-length value)) |
---|
94 | (values #f start-pos) |
---|
95 | (let ((stop (char-set-adjoin stop-char-set #\"))) |
---|
96 | (let ((pos (string-index value stop start-pos))) |
---|
97 | (if pos |
---|
98 | (if (not (char=? #\" (string-ref value pos))) |
---|
99 | (values (string-trim-both (string-copy value start-pos pos)) |
---|
100 | pos) ; Stop-char found, but no quoting |
---|
101 | (let ((end-pos (escaped-string-end value (add1 pos) |
---|
102 | (char-set #\")))) |
---|
103 | ;; Found the double quote? Recurse on the remainder |
---|
104 | (receive (rest final-pos) |
---|
105 | (parse-token value (add1 end-pos) stop-char-set) |
---|
106 | (values (string-append |
---|
107 | (string-trim-both |
---|
108 | (string-copy value start-pos pos)) |
---|
109 | (if (= pos end-pos) |
---|
110 | (unescape (string-copy value (add1 pos))) |
---|
111 | (unescape (string-copy value (add1 pos) end-pos))) |
---|
112 | (or rest "")) |
---|
113 | final-pos)))) |
---|
114 | ;; Nothing found? Then the remainder of the string is the token |
---|
115 | (values (string-trim-both (string-copy value start-pos)) |
---|
116 | (string-length value))))))) |
---|
117 | |
---|
118 | ;; Just put all header strings in a list, so we can pass it on |
---|
119 | ;; Make no assumptions about the contents (only value, don't try to parse params) |
---|
120 | ;; This is different from (multiple (without-params generic-header-parser)) |
---|
121 | ;; because this does not assume it can split up comma-separated values |
---|
122 | (define (unknown-header-parser name contents headers) |
---|
123 | (update-header-contents! name (list (vector contents '())) headers)) |
---|
124 | |
---|
125 | (define (parse-parameters string start-pos param-parsers) |
---|
126 | (let loop ((start-pos start-pos) |
---|
127 | (params '())) |
---|
128 | (receive (key pos) |
---|
129 | (parse-token string start-pos (char-set #\; #\=)) |
---|
130 | (if key |
---|
131 | (if (and (< pos (string-length string)) (char=? (string-ref string pos) #\=)) |
---|
132 | (receive (value pos) |
---|
133 | (parse-token string (add1 pos) (char-set #\;)) |
---|
134 | (let ((key (string->symbol (string-downcase key)))) |
---|
135 | ;; In case of no value ("foo="), use the empty string as value |
---|
136 | (let ((value ((alist-ref key param-parsers eq? identity) (or value "")))) |
---|
137 | (loop (add1 pos) (cons (cons key value) params))))) |
---|
138 | ;; Missing value is interpreted as "present", |
---|
139 | ;; so #t. If not present, it's #f when looking it up |
---|
140 | (loop (add1 pos) (cons (cons (string->symbol key) #t) params))) |
---|
141 | (values (reverse params) pos))))) |
---|
142 | |
---|
143 | (define (parse-value+parameters string start-pos value-parser param-parsers) |
---|
144 | (receive (value pos) |
---|
145 | (parse-token string start-pos (char-set #\;)) |
---|
146 | (if (not value) |
---|
147 | (values #f pos) ;; XXX this is wrong and not expected by the caller! |
---|
148 | (receive (params pos) |
---|
149 | (parse-parameters string (add1 pos) param-parsers) |
---|
150 | (values (vector (value-parser value) params) pos))))) |
---|
151 | |
---|
152 | (define (with-params value-parser parameter-parsers) |
---|
153 | (lambda (entry) |
---|
154 | (receive (type+params pos) |
---|
155 | (parse-value+parameters entry 0 value-parser parameter-parsers) |
---|
156 | type+params))) |
---|
157 | |
---|
158 | (define (multiple other-parser #!optional (parameter-parsers '())) |
---|
159 | (lambda (name entries headers) |
---|
160 | (fold (lambda (entry headers) |
---|
161 | (update-header-contents! |
---|
162 | name |
---|
163 | (list ((with-params other-parser parameter-parsers) entry)) |
---|
164 | headers)) |
---|
165 | headers |
---|
166 | (split-multi-header entries)))) |
---|
167 | |
---|
168 | (define (single other-parser #!optional (parameter-parsers '())) |
---|
169 | (lambda (name contents headers) |
---|
170 | (replace-header-contents! |
---|
171 | name |
---|
172 | (list ((with-params other-parser parameter-parsers) contents)) |
---|
173 | headers))) |
---|
174 | |
---|
175 | (define (key/values key/value-parsers) |
---|
176 | (lambda (k/v) |
---|
177 | ;; We're abusing parse-parameters here to read value |
---|
178 | ;; instead of params. This is weird, but it works :) |
---|
179 | (receive (key+value pos) |
---|
180 | (parse-parameters k/v 0 key/value-parsers) |
---|
181 | (car key+value)))) |
---|
182 | |
---|
183 | (define symbol-parser-ci |
---|
184 | (compose string->symbol string-trim-both string-downcase)) |
---|
185 | |
---|
186 | (define symbol-parser |
---|
187 | (compose string->symbol string-trim-both)) |
---|
188 | |
---|
189 | (define (natnum-parser contents) |
---|
190 | (let ((num (string->number contents))) |
---|
191 | (if num (inexact->exact (max 0 (round num))) 0))) |
---|
192 | |
---|
193 | (define (host-parser contents) |
---|
194 | (let ((idx (string-index-right contents #\:))) |
---|
195 | (if idx |
---|
196 | (cons (substring/shared contents 0 idx) |
---|
197 | (inexact->exact |
---|
198 | (round (or (string->number (substring/shared contents (add1 idx))) |
---|
199 | 80)))) |
---|
200 | (cons contents 80)))) |
---|
201 | |
---|
202 | ; base64 of 128 bit hex digest as per RFC1864 |
---|
203 | (define md5-parser base64:decode) |
---|
204 | |
---|
205 | ;; bytes <start>-<end>/<total> |
---|
206 | (define (range-parser s) |
---|
207 | (and-let* ((parts (string-match "bytes[ \t]+([0-9]+)-([0-9]+)/([0-9]+)" s))) |
---|
208 | (map string->number (drop parts 1)))) |
---|
209 | |
---|
210 | ;; rfc1123-date | rfc850-date | asctime-date |
---|
211 | (define http-time-parser rfc822-time-parser) |
---|
212 | |
---|
213 | ;; (W/)<string> |
---|
214 | (define (entity-tag-parser name contents headers) |
---|
215 | (let ((contents (string-trim-both contents))) |
---|
216 | (replace-header-contents! |
---|
217 | name |
---|
218 | (list (if (string-prefix? "W/" contents) |
---|
219 | (vector `(weak . ,(parse-token contents 2 (char-set))) '()) |
---|
220 | (vector `(strong . ,(parse-token contents 0 (char-set))) '()))) |
---|
221 | headers))) |
---|
222 | |
---|
223 | ;;;; MAJOR TODOs |
---|
224 | ;; RFC822/1123 mailbox parser - just strings for now |
---|
225 | (define mailbox-parser identity) |
---|
226 | |
---|
227 | ;; Either an entity-tag or a http-date |
---|
228 | (define if-range-parser identity) |
---|
229 | |
---|
230 | ;; Either delta-seconds or RFC822 timestamp |
---|
231 | (define (retry-after-parser contents) |
---|
232 | (if (string-match "[ \t]*[0-9]+[ \t]*" contents) |
---|
233 | (natnum-parser contents) |
---|
234 | (rfc822-time-parser contents))) |
---|
235 | |
---|
236 | ;; Tricky - see 2616 14.45 |
---|
237 | ;; We probably shouldn't try to do too much parsing here |
---|
238 | (define via-parser (multiple identity)) |
---|
239 | |
---|
240 | ;; Tricky - see 2616 14.46 |
---|
241 | (define warning-parser (multiple identity)) |
---|
242 | ;;;; END MAJOR TODOs |
---|
243 | |
---|
244 | (define (key/value-parser str) |
---|
245 | (let ((idx (string-index str #\=))) |
---|
246 | (cons (string-take str idx) (string-drop str (add1 idx))))) |
---|
247 | |
---|
248 | ;; The 'expires' header defined by the Netscape cookie spec contains |
---|
249 | ;; an embedded comma. RFC 2109 cookies use Max-Age instead. |
---|
250 | (define (old-style-cookie? cookie) |
---|
251 | (not (not (string-search (regexp "^[^=\"]+=[^;]*;.*expires[[:space:]]*=" #f) cookie)))) |
---|
252 | |
---|
253 | (define set-cookie-parser |
---|
254 | (let ((param-parsers `((expires . ,rfc822-time-parser) |
---|
255 | (max-age . ,string->number) |
---|
256 | (version . ,string->number)))) |
---|
257 | (lambda (name contents headers) |
---|
258 | (if (old-style-cookie? contents) |
---|
259 | (update-header-contents! |
---|
260 | name |
---|
261 | (list ((with-params key/value-parser param-parsers) contents)) |
---|
262 | headers) |
---|
263 | ((multiple key/value-parser param-parsers) name contents headers))))) |
---|
264 | |
---|
265 | (define cache-control-parser |
---|
266 | (let ((splitter (lambda (str) ;; Is this correct? |
---|
267 | (map (compose string->symbol string-trim-both) |
---|
268 | (string-split str ","))))) |
---|
269 | (multiple |
---|
270 | (key/values `((max-age . ,natnum-parser) |
---|
271 | (s-maxage . ,natnum-parser) |
---|
272 | (max-stale . ,natnum-parser) |
---|
273 | (min-fresh . ,natnum-parser) |
---|
274 | (private . ,splitter) |
---|
275 | (no-cache . ,splitter)))))) |
---|
276 | |
---|
277 | ;; This too |
---|
278 | (define (pragma-parser name value headers) |
---|
279 | (update-header-contents! name (parse-parameters value 0 `()) headers)) |
---|
280 | |
---|
281 | ;; This one includes q parser |
---|
282 | (define (te-parser name value headers) |
---|
283 | (update-header-contents! name (parse-parameters value 0 `((q . ,quality-parser))) headers)) |
---|
284 | |
---|
285 | ;; Cookie headers are also braindead: there can be several cookies in one header, |
---|
286 | ;; separated by either commas or semicolons. The only way to distinguish a |
---|
287 | ;; new cookie from a parameter of the current cookie is the dollar in front |
---|
288 | ;; of all parameter names. |
---|
289 | ;; Also, there's a $Version attribute that prepends all cookies, which is |
---|
290 | ;; considered to apply to all cookies that follow. |
---|
291 | ;; |
---|
292 | ;; This code is a bit of a hack in the way it abuses parse-parameters |
---|
293 | (define (cookie-parser name value headers) |
---|
294 | ;; Get all attributes (dollar-prefixed params) and remaining cookies & params |
---|
295 | (define (split-attribs pairs) |
---|
296 | (span (lambda (pair) |
---|
297 | (string-prefix? "$" (symbol->string (car pair)))) |
---|
298 | pairs)) |
---|
299 | (receive (params pos) |
---|
300 | (parse-parameters value 0 `(($version . ,string->number) |
---|
301 | ($port . ,string->number))) |
---|
302 | (receive (global-attrs remaining) |
---|
303 | (split-attribs params) |
---|
304 | (let loop ((remaining remaining) |
---|
305 | (cookies '())) |
---|
306 | (if (null? remaining) |
---|
307 | (update-header-contents! name (reverse cookies) headers) |
---|
308 | (let ((cookie (cons (symbol->string (caar remaining)) |
---|
309 | (cdar remaining))) |
---|
310 | (params (cdr remaining))) |
---|
311 | (receive (local-attrs rest) |
---|
312 | (split-attribs params) |
---|
313 | (let* ((all-attrs (append global-attrs local-attrs)) |
---|
314 | (result (vector cookie all-attrs))) |
---|
315 | (loop rest (cons result cookies)))))))))) |
---|
316 | |
---|
317 | ;;; Unparsers ;;; |
---|
318 | (define (unparse-params params unparsers) |
---|
319 | (let loop ((params params) |
---|
320 | (results '())) |
---|
321 | (if (null? params) |
---|
322 | (string-join (reverse results) "; " 'prefix) |
---|
323 | (let* ((unparser (alist-ref unparsers (caar params) eq? |
---|
324 | (lambda (name value) |
---|
325 | (case value |
---|
326 | ;; #t means param is present (no value) |
---|
327 | ((#t) (->string name)) |
---|
328 | ;; #f means param is missing |
---|
329 | ((#f) #f) |
---|
330 | (else (conc name "=" value)))))) |
---|
331 | (str (unparser (caar params) (cdar params)))) |
---|
332 | (loop (cdr params) (if str (cons str results) results)))))) |
---|
333 | |
---|
334 | (define must-be-quoted-chars (char-set-adjoin char-set:iso-control #\")) |
---|
335 | |
---|
336 | (define quote-string |
---|
337 | (let ((smap (map (lambda (c) |
---|
338 | (cons (string c) |
---|
339 | (string-append "\\" (string c)))) |
---|
340 | (char-set->list must-be-quoted-chars)))) |
---|
341 | (lambda (string) |
---|
342 | (let ((error-chars (char-set #\newline))) |
---|
343 | (if (string-any error-chars string) |
---|
344 | (signal-http-condition "Unencoded newline in header contents! Please encode the newline in a way appropriate for this header" |
---|
345 | 'unencoded-header 'value string) |
---|
346 | (string-append "\"" (string-translate* string smap) "\"")))))) |
---|
347 | |
---|
348 | (define unparse-token |
---|
349 | (let* ((trigger-quoting-chars (char-set-union |
---|
350 | (char-set-adjoin must-be-quoted-chars #\, #\; #\=) |
---|
351 | char-set:blank))) |
---|
352 | (lambda (token) |
---|
353 | (if (string-any trigger-quoting-chars token) |
---|
354 | (quote-string token) |
---|
355 | token)))) |
---|
356 | |
---|
357 | ;; There's no need to make a specific header unparser for every header type. |
---|
358 | ;; Usually, the Scheme value representing a header can unambiguously be |
---|
359 | ;; unparsed into a header just by checking its type. |
---|
360 | (define (default-header-unparser header-name header-contents) |
---|
361 | (let loop ((headers (reverse header-contents)) |
---|
362 | (result '())) |
---|
363 | (if (null? headers) |
---|
364 | (sprintf "~A: ~A\r\n" |
---|
365 | (header-name->string header-name) |
---|
366 | (string-join result ", ")) |
---|
367 | (let* ((contents (get-value (car headers))) |
---|
368 | (value (cond |
---|
369 | ((pair? contents) ; alist? |
---|
370 | (if (eq? (cdr contents) #t) |
---|
371 | (unparse-token (->string (car contents))) |
---|
372 | (conc (unparse-token (->string (car contents))) "=" |
---|
373 | (unparse-token (->string (cdr contents)))))) |
---|
374 | ((uri? contents) (unparse-token (uri->string contents))) |
---|
375 | (else (unparse-token (->string contents))))) |
---|
376 | (parameter-unparsers '())) ; Maybe we want to make this a param |
---|
377 | (loop (cdr headers) |
---|
378 | (cons |
---|
379 | (string-append |
---|
380 | value |
---|
381 | (unparse-params (get-params (car headers)) |
---|
382 | parameter-unparsers)) |
---|
383 | result)))))) |
---|
384 | |
---|
385 | (define (etag-unparser header-name header-contents) |
---|
386 | (let ((contents (get-value (car header-contents)))) |
---|
387 | (sprintf "~A: ~A\r\n" |
---|
388 | (header-name->string header-name) |
---|
389 | (string-append |
---|
390 | (if (eq? 'weak (car contents)) "W/" "") |
---|
391 | (if (string-prefix? "W/" (cdr contents)) |
---|
392 | (quote-string (cdr contents)) |
---|
393 | (unparse-token (cdr contents))))))) |
---|
394 | |
---|
395 | (define (host-unparser header-name header-contents) |
---|
396 | (let ((contents (get-value (car header-contents)))) |
---|
397 | ;; XXX: urlencode? |
---|
398 | (sprintf "~A: ~A\r\n" |
---|
399 | (header-name->string header-name) |
---|
400 | (if (= (cdr contents) 80) |
---|
401 | (car contents) |
---|
402 | (conc (car contents) ":" (cdr contents)))))) |
---|