source: project/release/4/abnf/abnf.scm @ 14876

Last change on this file since 14876 was 14876, checked in by Ivan Raikov, 10 years ago

abnf adapted to cps lexgen interface

File size: 6.7 KB
Line 
1;;
2;;  Parser for the grammar defined in RFC4234, "Augmented BNF for
3;;  Syntax Specifications: ABNF".
4;;
5;;  Based on the Haskell Rfc2234 module by Peter Simons.
6;;
7;;  Redistribution and use in source and binary forms, with or without
8;;  modification, are permitted provided that the following conditions
9;;  are met:
10;;
11;;  - Redistributions of source code must retain the above copyright
12;;  notice, this list of conditions and the following disclaimer.
13;;
14;;  - Redistributions in binary form must reproduce the above
15;;  copyright notice, this list of conditions and the following
16;;  disclaimer in the documentation and/or other materials provided
17;;  with the distribution.
18;;
19;;  - Neither name of the copyright holders nor the names of its
20;;  contributors may be used to endorse or promote products derived
21;;  from this software without specific prior written permission.
22;;
23;;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND THE
24;;  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
25;;  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
26;;  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27;;  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR THE
28;;  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29;;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30;;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
31;;  USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
32;;  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33;;  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
34;;  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35;;  POSSIBILITY OF SUCH DAMAGE.
36;;
37
38
39(module abnf 
40
41        (
42         (concatenation lex:seq) (alternatives lex:bar) 
43         repetition repetition1 repetition-n
44         optional-sequence range set set-from-string
45
46         char lit alpha 
47         binary decimal hexadecimal ascii-char cr lf crlf ctl 
48         dquote htab lwsp octet sp vchar wsp 
49         quoted-pair quoted-string
50
51         bind drop-consumed collect-chars
52         )
53
54   (import scheme chicken data-structures )
55
56   (require-extension matchable srfi-1 srfi-14)
57
58   (require-library lexgen)
59   (import (prefix lexgen lex:))
60
61;;;; Terminal values (RFC 4234, Section 2.3)
62
63;; Matches a single character
64(define char lex:char)
65
66;; Concatenation (RFC 4234, Section 3.1)
67(define-syntax concatenation
68  (syntax-rules () 
69    ((_)     lex:pass)
70    ((_ a)    a)
71    ((_ a b)  (lex:seq a b))
72    ((concatenation a b ...) 
73     (lex:seq a (concatenation b ...)))
74    ))
75   
76;; Alternatives (RFC 4234, Section 3.2)
77(define-syntax alternatives
78  (syntax-rules () 
79    ((_)      lex:pass)
80    ((_ a)    a)
81    ((_ a b)  (lex:bar a b))
82    ((alternatives a b ...) 
83     (lex:bar a (alternatives b ...)))
84    ))
85
86;; Value range alternatives (RFC 4234, Section 3.4)
87(define range lex:range)
88
89;; Repetition (RFC 4234, Section 3.6)
90(define repetition lex:star)
91
92;; Convenience function for positive closure
93(define repetition1 lex:pos)
94
95;;  Specific repetition (RFC 4234, Section 3.7)
96(define (repetition-n n p)
97  (let ((ps (list-tabulate n (lambda (i) p))))
98    (lex:lst ps)))
99
100(define optional-sequence lex:opt)
101
102;; Matches a literal string (case-insensitive)
103
104(define (lit s)
105  (let* ((f  (lambda (t) (lex:tok t (lex:try char-ci=?))))
106        (ps (map f (if (string? s) (string->list s) s))))
107    (lex:lst ps)))
108
109
110;;;; Core Rules (RFC 4234, Section 6.1)
111
112;; Match any character of the alphabet.
113(define alpha  (lex:set char-set:letter))
114
115;; Match [0..1]
116(define binary    (lex:range #\0 #\1))
117
118;; Match [0..9]
119(define decimal   (lex:range #\0 #\9))
120
121;; Match [0..9] and [A..F,a..f]
122(define hexadecimal (lex:set char-set:hex-digit))
123
124;; Match any 7-bit US-ASCII character except for NUL (ASCII value 0, that is).
125
126(define ascii-char (lex:set (ucs-range->char-set 1 127)))
127
128;; Match the carriage return character \r.
129
130(define cr (char (integer->char 13)))
131
132;; Match the linefeed character \n.
133
134(define lf (char (integer->char 10)))
135
136;; Match the Internet newline \r\n.
137
138(define crlf (lex:seq cr lf))
139
140;; Match any US-ASCII control character. That is any character with a
141;; decimal value in the range of [0..31,127].
142
143(define ctl   (lex:set char-set:iso-control))
144
145;; Match the double quote character "
146
147(define dquote (char #\"))
148
149;; Match the tab \t character
150
151(define htab    (char (integer->char 9)))
152
153;; Match either 'sp' or 'htab'.
154
155(define wsp (lex:set (char-set #\space #\tab)))
156
157;; Match linear white space: *(WSP / CRLF WSP)
158
159(define lwsp (lex:star (lex:bar wsp (lex:seq crlf wsp))))
160
161
162;; Match /any/ character.
163(define octet  (lex:set char-set:full))
164
165;; Match the space character
166
167(define sp (char #\space))
168
169;; Match any printable ASCII character. (The "v" stands for
170;; "visible".) That is any character in the decimal range of
171;; [33..126].
172
173(define vchar  (lex:set char-set:graphic))
174
175;; Match a "quoted pair". Any characters (excluding CR and LF) may be
176;; quoted.
177
178(define quoted-pair (lex:seq (char #\\) (lex:bar vchar wsp)))
179
180;; Match a quoted string. The specials \ and " must be escaped inside
181;; a quoted string; CR and LF are not allowed at all.
182
183(define char-set:quoted (char-set-complement (string->char-set "\\\"\r\n")))
184(define qtext (lex:set char-set:quoted))
185(define qcont (lex:bar (lex:pos qtext) quoted-pair))
186
187(define quoted-string  (lex:seq dquote (lex:seq (lex:star qcont) dquote)))
188
189;;;; Additional convenience procedures and parser combinators
190
191;; match any character from an SRFI-14 character set
192(define set lex:set)
193
194;; match any character from a set defined as a string
195(define (set-from-string s)
196  (lex:set (string->char-set s)))
197
198(define (bind f p)
199  (lambda (a r s)
200    (let ((a1 (lambda (s1) 
201                (match (lex:longest s1)
202                       ((eaten food) 
203                        (let ((x (f eaten)))
204                          (a `((,x ,food)))))
205                       (else #f)))))
206      (p a1 r s))))
207
208
209(define-record-type box (make-box contents)
210  box? (contents box-contents ))
211
212(define box make-box)
213(define unbox box-contents)
214
215(define (drop-consumed p)
216  (lambda (a r ss)
217    (if (null? ss) (a ss)
218        (let ((ss1 (map (lambda (s)
219                          (match s ((eaten food)  (list (list (make-box eaten)) food))
220                                 (else s)))
221                        ss)))
222          (match (lex:longest (p a r ss1))
223                 ((eaten food) (let ((eaten1 (unbox (last eaten))))
224                                 (a (list (list eaten1 food)))))
225                 (else (r ss1)))))))
226
227(define (collect-chars . rest)
228  (define (consumed-chars cs) 
229    (and (pair? cs)
230         (let loop ((cs cs) (ax (list)))
231           (cond ((null? cs)         `(,ax))
232                 ((char? (car cs))   (loop (cdr cs) (cons (car cs) ax)))
233                 (else               (cons ax cs))))))
234  (let-optionals rest ((kons #f))
235    (let ((make (cond ((symbol? kons) (lambda (x) `(,kons ,(car x))))
236                      ((procedure? kons) (lambda (x) (kons (car x))))
237                      (else car))))
238      (lambda (x)
239        (let ((x1 (consumed-chars x)))
240          (and x1 (cons (make x1) (cdr x1))))))))
241
242)
Note: See TracBrowser for help on using the repository browser.