Changeset 12499 in project


Ignore:
Timestamp:
11/14/08 21:53:28 (11 years ago)
Author:
sjamaan
Message:

Implement product-parser with comments

Location:
release/4/intarweb/trunk
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • release/4/intarweb/trunk/header-parsers.scm

    r12456 r12499  
    123123                      (string-length value)))))))
    124124
     125;; Comments are a bit like tokens, except they can be nested
     126(define (parse-comment value start-pos)
     127  (let* ((len (string-length value))
     128         (nospace-pos (and (< start-pos len)
     129                           (string-skip value char-set:whitespace start-pos))))
     130    (if (and nospace-pos (char=? (string-ref value nospace-pos) #\())
     131        (let loop ((result "")
     132                   (start-pos (add1 nospace-pos)))
     133          (if (>= start-pos len)
     134              (values result len)
     135              (let ((pos (string-index value (char-set #\( #\)) start-pos)))
     136                (if pos
     137                    (if (char=? #\( (string-ref value pos)) ; Nested comment
     138                        (receive (nested end-pos)
     139                          (parse-comment value pos)
     140                          (loop (sprintf "~A~A(~A)"
     141                                         result
     142                                         (string-copy value start-pos pos)
     143                                         nested)
     144                                (add1 end-pos)))
     145                        ;; Else it's a )
     146                        (values (conc result (string-copy value start-pos pos)) (add1 pos)))
     147                    ;; Nothing found?  Then the remainder of the string is the token
     148                    (values (conc result (string-copy value start-pos))
     149                            (string-length value))))))
     150        ;; No (? Then fail to match the 'comment'
     151        (values #f start-pos))))
     152
    125153;; Just put all header strings in a list, so we can pass it on
    126154;; Make no assumptions about the contents (only value, don't try to parse params)
     
    218246(define http-time-parser rfc822-time-parser)
    219247
    220 ;; (W/)<string>
     248;; [W/]<string>
     249;; This is a full parser, because it needs to be able to distinguish
     250;; between W/"foo" and "W/foo". If it's preprocessed by the tokenizer
     251;; both get "normalised" to the same thing: W/foo
     252;;
     253;; XXX It could be a good idea if the single/multiple token parsers
     254;; did not do anything to their contents.  If the consuming parsers
     255;; want tokens, they know how to have it. OTOH, it would mean much
     256;; more code for all the parsers as they need to tokenize more...
    221257(define (entity-tag-parser name contents headers)
    222258  (let ((contents (string-trim-both contents)))
     
    227263               (vector `(strong . ,(parse-token contents 0 (char-set))) '())))
    228264     headers)))
     265
     266;; ( <product>[/<version>] [<comment>] )+
     267;; This parser is a full parser because parse-token cannot handle
     268;; comments yet... (if a ; is in a comment, it breaks down)
     269(define (product-parser name contents headers)
     270  (replace-header-contents!
     271   name
     272   (let loop ((start-pos 0)
     273              (products '()))
     274     (let*-values (((product pos) (parse-token contents start-pos (char-set #\/)))
     275                   ((version pos2) (parse-token contents (add1 pos)
     276                                                (char-set-union (char-set #\()
     277                                                                char-set:whitespace)))
     278                   ((comment pos3) (parse-comment contents pos2)))
     279       (if product
     280           (loop pos3 (cons (vector (list product version comment) '()) products))
     281           (reverse products))))
     282   headers))
    229283
    230284;;;; MAJOR TODOs
     
    388442                 result))))))
    389443
    390 (define (etag-unparser header-name header-contents)
     444(define (entity-tag-unparser header-name header-contents)
    391445  (let ((contents (get-value (car header-contents))))
    392446    (string-append
  • release/4/intarweb/trunk/intarweb.scm

    r12455 r12499  
    6060
    6161   ;; http-header-parsers
    62    split-multi-header unknown-header-parser single multiple parse-token
     62   split-multi-header unknown-header-parser single multiple
     63   parse-token parse-comment
    6364   header-contents header-values header-value header-list-ref
    6465   get-quality get-value get-params get-param
    65    natnum-parser symbol-parser-ci symbol-parser
    66    quote-string unparse-token default-header-unparser etag-unparser
     66   natnum-parser symbol-parser-ci symbol-parser product-parser
     67   quote-string unparse-token default-header-unparser entity-tag-unparser
    6768   )
    6869
     
    247248     (referer . ,(single uri-reference))
    248249     (retry-after . ,retry-after-parser)
    249      (server . ,(single identity))
     250     (server . ,product-parser)
    250251     (te . ,te-parser)
    251252     (trailer . ,(multiple symbol-parser-ci))
    252253     (transfer-encoding . ,(single symbol-parser-ci))
    253254     (upgrade . ,(multiple update-header-contents!))
    254      (user-agent . ,(single identity))
     255     (user-agent . ,product-parser)
    255256     (vary . ,(multiple symbol-parser-ci))
    256257     (via . ,via-parser)
     
    357358(define header-unparsers
    358359  (make-parameter
    359    `((etag . ,etag-unparser)
     360   `((etag . ,entity-tag-unparser)
    360361     (host . ,host-unparser))))
    361362
  • release/4/intarweb/trunk/tests/run.scm

    r11983 r12499  
    198198              '(strong . "W/bar")
    199199              (header-value 'etag headers))))
     200
     201  (test-group "Product parser"
     202    (test "Simple product"
     203          '(("Mozilla" "5.0" #f))
     204          (header-values 'user-agent (test-read-headers "User-Agent: Mozilla/5.0\r\n")))
     205    (test "Realistic product (comments, semicolons)"
     206          '(("Mozilla" "5.0" "X11; U; NetBSD amd64; en-US; rv:1.9.0.3") ("Gecko" "2008110501" #f) ("Minefield" "3.0.3" #f))
     207          (header-values 'user-agent (test-read-headers "User-Agent: Mozilla/5.0 (X11; U; NetBSD amd64; en-US; rv:1.9.0.3) Gecko/2008110501 Minefield/3.0.3\r\n"))))
    200208
    201209  (test-group "Set-Cookie-parser"
     
    576584
    577585;; TODO:
    578 ;; - Implement comments parsing (better: a sane parsing system!)
     586;; - Fix the parsing system so it's not so broken (more comfortable combinators)
    579587;; - Test malformed headers
    580588;; - When headers are malformed, what to do? Return #f for value and let
Note: See TracChangeset for help on using the changeset viewer.