source: project/release/4/abnf/abnf.scm @ 14886

Last change on this file since 14886 was 14886, checked in by Ivan Raikov, 11 years ago

abnf bug fixes

File size: 7.3 KB
Line 
1;;
2;;  Parser for the grammar defined in RFC4234, "Augmented BNF for
3;;  Syntax Specifications: ABNF".
4;;
5;;  Based on the Haskell Rfc2234 module by Peter Simons.
6;;
7;;  Redistribution and use in source and binary forms, with or without
8;;  modification, are permitted provided that the following conditions
9;;  are met:
10;;
11;;  - Redistributions of source code must retain the above copyright
12;;  notice, this list of conditions and the following disclaimer.
13;;
14;;  - Redistributions in binary form must reproduce the above
15;;  copyright notice, this list of conditions and the following
16;;  disclaimer in the documentation and/or other materials provided
17;;  with the distribution.
18;;
19;;  - Neither name of the copyright holders nor the names of its
20;;  contributors may be used to endorse or promote products derived
21;;  from this software without specific prior written permission.
22;;
23;;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND THE
24;;  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
25;;  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
26;;  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27;;  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR THE
28;;  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29;;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30;;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
31;;  USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
32;;  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33;;  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
34;;  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35;;  POSSIBILITY OF SUCH DAMAGE.
36;;
37
38
39(module abnf 
40
41        (
42         (concatenation lex:seq) (alternatives lex:bar) 
43         repetition repetition1 repetition-n
44         optional-sequence range set set-from-string
45
46         char lit alpha 
47         binary decimal hexadecimal ascii-char cr lf crlf ctl 
48         dquote htab lwsp octet sp vchar wsp 
49         quoted-pair quoted-string
50
51         bind drop-consumed collect-chars longest memo
52         )
53
54   (import scheme chicken data-structures )
55
56   (require-extension matchable srfi-1 srfi-14 srfi-69)
57
58   (require-library lexgen)
59   (import (prefix lexgen lex:))
60
61(define (longest p)
62  (lambda (cont s) 
63        (p (lambda (s1) 
64             (if (or (null? s1) (null? (cdr s1))) (p cont s1)
65                 (cont (list (lex:longest s1))))) s)))
66
67(define (bind f p)
68    (lambda (cont s)
69      (let ((cont1 (lambda (ss) 
70                     (let ((ss1 (map (lambda (s) 
71                                       (match s ((eaten food) 
72                                                 (let ((x (f eaten)))
73                                                   (if x (list x food) s)))
74                                              (else #f))) ss)))
75                       (cont ss1)))))
76        (p (if (null? s) cont cont1) s))))
77
78
79(define-record-type box (make-box contents)
80  box? (contents box-contents box-contents-set! ))
81
82(define box make-box)
83(define unbox box-contents)
84(define set-box! box-contents-set!)
85
86(define (drop-consumed p)
87  (lambda (cont ss)
88    (let* ((ss1 (map (lambda (s)
89                       (match s
90                              ((eaten food)  (list (list (make-box eaten)) food))
91                              (else s)))
92                     ss))
93          (cont1  (lambda (ss)
94                    (let ((ss1 (map (lambda (s)
95                                      (match s
96                                             ((eaten food) (let* ((eaten1 (unbox (last eaten)))
97                                                                  (s1     (list eaten1 food)))
98                                                             s1))
99                                             (else s)))
100                                    ss)))
101                      (cont ss1)))))
102      (p cont1 ss1))))
103
104(define (collect-chars . rest)
105  (define (consumed-chars cs) 
106    (and (pair? cs)
107         (let loop ((cs cs) (ax (list)))
108           (cond ((null? cs)         (list ax))
109                 ((char? (car cs))   (loop (cdr cs) (cons (car cs) ax)))
110                 (else               (cons ax cs))))))
111  (let-optionals rest ((kons #f))
112    (let ((make (cond ((symbol? kons)     (lambda (x) `(,kons ,(car x))))
113                      ((procedure? kons)  (lambda (x) (kons (car x))))
114                      (else car))))
115      (lambda (x)
116        (let* ((x1   (consumed-chars x))
117               (res  (and x1 (not (null? (car x1))) (cons (make x1) (cdr x1)))))
118          res)))))
119
120(define (memo p . rest)
121  (let-optionals rest ((reduce (lex:try <)))
122    (lex:cps-table p reduce)))
123
124;;;; Terminal values (RFC 4234, Section 2.3)
125
126;; Matches a single character
127(define char lex:char)
128
129;; Concatenation (RFC 4234, Section 3.1)
130(define-syntax concatenation
131  (syntax-rules () 
132    ((_)     lex:pass)
133    ((_ a)    a)
134    ((_ a b)  (lex:seq a b))
135    ((concatenation a b ...) 
136     (lex:seq a (concatenation b ...)))
137    ))
138   
139;; Alternatives (RFC 4234, Section 3.2)
140(define-syntax alternatives
141  (syntax-rules () 
142    ((_)      lex:pass)
143    ((_ a)    a)
144    ((_ a b)  (lex:bar a b))
145    ((alternatives a b ...) 
146     (lex:bar a (alternatives b ...)))
147    ))
148
149;; Value range alternatives (RFC 4234, Section 3.4)
150(define range lex:range)
151
152;; Repetition (RFC 4234, Section 3.6)
153(define (repetition p)  (lex:star p))
154
155;; Convenience function for positive closure
156(define repetition1 lex:pos)
157
158;;  Specific repetition (RFC 4234, Section 3.7)
159(define (repetition-n n p)
160  (let ((ps (list-tabulate n (lambda (i) p))))
161    (lex:lst ps)))
162
163(define optional-sequence lex:opt)
164
165;; Matches a literal string (case-insensitive)
166
167(define (lit s)
168  (let* ((f  (lambda (t) (lex:tok t (lex:try char-ci=?))))
169        (ps (map f (if (string? s) (string->list s) s))))
170    (lex:lst ps)))
171
172
173;;;; Core Rules (RFC 4234, Section 6.1)
174
175;; Match any character of the alphabet.
176(define alpha  (lex:set char-set:letter))
177
178;; Match [0..1]
179(define binary    (lex:range #\0 #\1))
180
181;; Match [0..9]
182(define decimal   (lex:range #\0 #\9))
183
184;; Match [0..9] and [A..F,a..f]
185(define hexadecimal (lex:set char-set:hex-digit))
186
187;; Match any 7-bit US-ASCII character except for NUL (ASCII value 0, that is).
188
189(define ascii-char (lex:set (ucs-range->char-set 1 127)))
190
191;; Match the carriage return character \r.
192
193(define cr (char (integer->char 13)))
194
195;; Match the linefeed character \n.
196
197(define lf (char (integer->char 10)))
198
199;; Match the Internet newline \r\n.
200
201(define crlf (lex:seq cr lf))
202
203;; Match any US-ASCII control character. That is any character with a
204;; decimal value in the range of [0..31,127].
205
206(define ctl   (lex:set char-set:iso-control))
207
208;; Match the double quote character "
209
210(define dquote (char #\"))
211
212;; Match the tab \t character
213
214(define htab    (char (integer->char 9)))
215
216;; Match either 'sp' or 'htab'.
217
218(define wsp (lex:set (char-set #\space #\tab)))
219
220;; Match linear white space: *(WSP / CRLF WSP)
221
222(define lwsp (lex:star (lex:bar wsp (lex:seq (drop-consumed crlf) wsp))))
223
224
225;; Match /any/ character.
226(define octet  (lex:set char-set:full))
227
228;; Match the space character
229
230(define sp (char #\space))
231
232;; Match any printable ASCII character. (The "v" stands for
233;; "visible".) That is any character in the decimal range of
234;; [33..126].
235
236(define vchar  (lex:set char-set:graphic))
237
238;; Match a "quoted pair". Any characters (excluding CR and LF) may be
239;; quoted.
240
241(define quoted-pair (lex:seq (char #\\) (lex:bar vchar wsp)))
242
243;; Match a quoted string. The specials \ and " must be escaped inside
244;; a quoted string; CR and LF are not allowed at all.
245
246(define char-set:quoted (char-set-complement (string->char-set "\\\"\r\n")))
247(define qtext (lex:set char-set:quoted))
248(define qcont (lex:bar (lex:pos qtext) quoted-pair))
249
250(define quoted-string  (lex:seq dquote (lex:seq (lex:star qcont) dquote)))
251
252;;;; Additional convenience procedures and parser combinators
253
254;; match any character from an SRFI-14 character set
255(define set lex:set)
256
257;; match any character from a set defined as a string
258(define (set-from-string s)
259  (lex:set (string->char-set s)))
260
261)
Note: See TracBrowser for help on using the repository browser.