source: project/release/4/abnf/abnf.scm @ 14812

Last change on this file since 14812 was 14812, checked in by Ivan Raikov, 11 years ago

numerous fixes to lexgen function invocations

File size: 6.3 KB
Line 
1;;
2;;  Parser for the grammar defined in RFC4234, "Augmented BNF for
3;;  Syntax Specifications: ABNF".
4;;
5;;  Based on the Haskell Rfc2234 module by Peter Simons.
6;;
7;;  Redistribution and use in source and binary forms, with or without
8;;  modification, are permitted provided that the following conditions
9;;  are met:
10;;
11;;  - Redistributions of source code must retain the above copyright
12;;  notice, this list of conditions and the following disclaimer.
13;;
14;;  - Redistributions in binary form must reproduce the above
15;;  copyright notice, this list of conditions and the following
16;;  disclaimer in the documentation and/or other materials provided
17;;  with the distribution.
18;;
19;;  - Neither name of the copyright holders nor the names of its
20;;  contributors may be used to endorse or promote products derived
21;;  from this software without specific prior written permission.
22;;
23;;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND THE
24;;  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
25;;  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
26;;  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27;;  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR THE
28;;  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29;;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30;;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
31;;  USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
32;;  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33;;  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
34;;  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35;;  POSSIBILITY OF SUCH DAMAGE.
36;;
37
38
39(module abnf 
40
41   (alpha char 
42          binary decimal hexadecimal ascii-char lit cr lf crlf ctl 
43          dquote htab lwsp octet sp vchar wsp 
44          quoted-pair quoted-string
45          concatenation alternatives range 
46          repetition repetition1 repetition-n
47          optional-sequence
48          set set-from-string
49          bind drop consume collect 
50          )
51
52   (import scheme chicken data-structures srfi-1 srfi-14)
53
54   (require-extension matchable )
55
56   (require-library lexgen)
57   (import (prefix lexgen lex:))
58
59;;;; Terminal values (RFC 4234, Section 2.3)
60
61;; Matches a single character
62(define char lex:char)
63
64;; Matches a literal string (case-insensitive)
65(define (lit s)
66  (let ((f (lambda (t) (lex:tok t (lex:try char-ci=?)))))
67    (lex:seq (map f (if (string? s) (string->list s) s)))))
68
69
70;; Concatenation (RFC 4234, Section 3.1)
71(define concatenation   lex:seq)
72
73;; Alternatives (RFC 4234, Section 3.2)
74(define alternatives  lex:bar)
75
76;; Value range alternatives (RFC 4234, Section 3.4)
77(define range lex:range)
78
79;; Repetition (RFC 4234, Section 3.6)
80(define repetition lex:star)
81
82;; convenience function for positive closure
83(define repetition1 lex:pos)
84
85;;  Specific repetition (RFC 4234, Section 3.7)
86(define (repetition-n n p)
87  (and (integer? n) (positive? n)
88       (lex:bar (list-tabulate n (lambda (i) p)))))
89
90(define optional-sequence lex:opt)
91
92
93;;;; Core Rules (RFC 4234, Section 6.1)
94
95;; Match any character of the alphabet.
96(define alpha  (lex:set char-set:letter))
97
98;; Match [0..1]
99(define binary    (lex:range #\0 #\1))
100
101;; Match [0..9]
102(define decimal   (lex:range #\0 #\9))
103
104;; Match [0..9] and [A..F,a..f]
105(define hexadecimal (lex:set char-set:hex-digit))
106
107;; Match any 7-bit US-ASCII character except for NUL (ASCII value 0, that is).
108
109(define ascii-char (lex:set (ucs-range->char-set 1 127)))
110
111;; Match the carriage return character \r.
112
113(define cr (char (integer->char 13)))
114
115;; Match the linefeed character \n.
116
117(define lf (char (integer->char 10)))
118
119
120;; Match the Internet newline \r\n.
121
122(define crlf (lex:seq (list cr lf)))
123
124;; Match any US-ASCII control character. That is any character with a
125;; decimal value in the range of [0..31,127].
126
127(define ctl   (lex:set char-set:iso-control))
128
129;; Match the double quote character "
130
131(define dquote (char #\"))
132
133;; Match the tab \t character
134
135(define htab    (char (integer->char 9)))
136
137;; Match either 'sp' or 'htab'.
138
139(define wsp (lex:set char-set:whitespace))
140
141;; Match "linear white-space". That is any number of consecutive wsp,
142;; optionally followed by a 'crlf' and (at least) one more wsp.
143
144(define lwsp (lex:seq (list (lex:pos wsp) (lex:opt crlf) (lex:pos wsp))))
145
146;; Match /any/ character.
147(define octet  (lex:set char-set:full))
148
149;; Match the space character
150
151(define sp (char #\space))
152
153;; Match any printable ASCII character. (The "v" stands for
154;; "visible".) That is any character in the decimal range of
155;; [33..126].
156
157(define vchar  (lex:set char-set:graphic))
158
159
160;; Match a "quoted pair". Any characters (excluding CR and LF) may be
161;; quoted.
162
163(define char-set:not-crlf (char-set-complement (string->char-set "\r\n")))
164(define quoted-pair (lex:seq (list (char #\\) (lex:set char-set:not-crlf))))
165
166;; Match a quoted string. The specials \ and " must be escaped inside
167;; a quoted string; CR and LF are not allowed at all.
168
169(define char-set:quoted (char-set-complement (string->char-set "\\\"\r\n")))
170(define qtext (lex:set char-set:quoted))
171(define qcont (lex:bar (list (lex:pos qtext) quoted-pair)))
172
173(define quoted-string  (lex:seq (list dquote (lex:star qcont) dquote)))
174
175;;;; Additional convenience procedures and parser combinators
176
177;; match any character from an SRFI-14 character set
178(define set lex:set)
179
180;; match any character from a set defined as a string
181(define (set-from-string s)
182  (lex:set (string->char-set s)))
183
184(define (bind f p)
185  (lambda (s)
186    (let ((s1 (p s)))
187      (match (lex:longest (p s))
188             ((eaten food) 
189              (let ((x (f eaten)))
190                (and x `((,x ,food)))))
191             (else #f)))))
192
193(define (drop p) 
194  (bind (lambda (x) (drop-while atom? x)) p))
195
196(define (consume p) 
197  (lambda (s)
198    (let loop ((lst (list)) (rst s))
199      (match (p rst)
200             ((a rst)  (loop (cons a lst) rst))
201             (else  (list (reverse lst) rst))))))
202
203
204(define (collect . rest)
205  (define (consumed-atoms cs) 
206    (and (pair? cs)
207         (let loop ((cs cs) (ax (list)))
208           (cond ((null? cs)         `(,ax))
209                 ((atom? (car cs))   (loop (cdr cs) (cons (car cs) ax)))
210                 (else               (cons ax cs))))))
211  (let-optionals rest ((kons #f))
212    (let ((make (cond ((symbol? kons) (lambda (x) `(,kons ,(car x))))
213                      ((procedure? kons) (lambda (x) (kons (car x))))
214                      (else car))))
215      (lambda (x)
216        (let ((x1 (consumed-atoms x)))
217          (and x1 (cons (make x1) (cdr x1))))))))
218
219)
Note: See TracBrowser for help on using the repository browser.