source: project/release/4/abnf/abnf.scm @ 14756

Last change on this file since 14756 was 14756, checked in by Ivan Raikov, 11 years ago

some small code reorganization in abnf

File size: 6.3 KB
Line 
1;;
2;;  Parser for the grammar defined in RFC4234, "Augmented BNF for
3;;  Syntax Specifications: ABNF".
4;;
5;;  Based on the Haskell Rfc2234 module by Peter Simons.
6;;
7;;  Redistribution and use in source and binary forms, with or without
8;;  modification, are permitted provided that the following conditions
9;;  are met:
10;;
11;;  - Redistributions of source code must retain the above copyright
12;;  notice, this list of conditions and the following disclaimer.
13;;
14;;  - Redistributions in binary form must reproduce the above
15;;  copyright notice, this list of conditions and the following
16;;  disclaimer in the documentation and/or other materials provided
17;;  with the distribution.
18;;
19;;  - Neither name of the copyright holders nor the names of its
20;;  contributors may be used to endorse or promote products derived
21;;  from this software without specific prior written permission.
22;;
23;;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND THE
24;;  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
25;;  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
26;;  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27;;  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR THE
28;;  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29;;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30;;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
31;;  USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
32;;  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33;;  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
34;;  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35;;  POSSIBILITY OF SUCH DAMAGE.
36;;
37
38
39(module abnf
40
41   (alpha binary decimal hexadecimal char lit cr lf crlf ctl 
42          dquote htab lwsp octet sp vchar wsp 
43          quoted-pair quoted-string
44          concatenation alternatives range 
45          repetition repetition1 repetition-n
46          optional-sequence
47          set set-from-string
48          bind drop consume collect 
49          )
50
51   (import scheme chicken data-structures srfi-1 srfi-14)
52
53   (require-extension matchable )
54
55   (require-library lexgen)
56   (import (prefix lexgen lex:))
57
58;;;; Terminal values (RFC 4234, Section 2.3)
59
60;; Matches a single character
61(define char lex:char)
62
63;; Matches a literal string (case-insensitive)
64(define (lit s)
65  (let ((f (lambda (t) (lex:tok t (lex:try char-ci=?)))))
66    (lex:seq (map f (if (string? s) (string->list s) s)))))
67
68
69;; Concatenation (RFC 4234, Section 3.1)
70(define concatenation   lex:seq)
71
72;; Alternatives (RFC 4234, Section 3.2)
73(define alternatives  lex:bar)
74
75;; Value range alternatives (RFC 4234, Section 3.4)
76(define range lex:range)
77
78;; Repetition (RFC 4234, Section 3.6)
79(define repetition lex:star)
80
81;; convenience function for positive closure
82(define repetition1 lex:pos)
83
84;;  Specific repetition (RFC 4234, Section 3.7)
85(define (repetition-n n p)
86  (and (integer? n) (positive? n)
87       (lex:bar (list-tabulate n (lambda (i) p)))))
88
89(define optional-sequence lex:opt)
90
91
92;;;; Core Rules (RFC 4234, Section 6.1)
93
94;; Match any character of the alphabet.
95(define alpha  (lex:set char-set:letter))
96
97;; Match [0..1]
98(define binary    (lex:range #\0 #\1))
99
100;; Match [0..9]
101(define decimal   (lex:range #\0 #\9))
102
103;; Match [0..9] and [A..F,a..f]
104(define hexadecimal (lex:set char-set:hex-digit))
105
106;; Match any 7-bit US-ASCII character except for NUL (ASCII value 0, that is).
107
108(define char (lex:set (ucs-range->char-set 1 127)))
109
110;; Match the carriage return character \r.
111
112(define cr (char (integer->char 13)))
113
114;; Match the linefeed character \n.
115
116(define lf (char (integer->char 10)))
117
118
119;; Match the Internet newline \r\n.
120
121(define crlf (lex:seq (list cr lf)))
122
123;; Match any US-ASCII control character. That is any character with a
124;; decimal value in the range of [0..31,127].
125
126(define ctl   (lex:set char-set:iso-control))
127
128;; Match the double quote character "
129
130(define dquote (char #\"))
131
132;; Match the tab \t character
133
134(define htab    (char (integer->char 9)))
135
136;; Match "linear white-space". That is any number of consecutive wsp,
137;; optionally followed by a 'crlf' and (at least) one more wsp.
138
139(define lwsp (lex:seq (list (lex:pos wsp) (lex:opt crlf) (lex:pos wsp))))
140
141;; Match /any/ character.
142(define octet  (lex:set char-set:full))
143
144;; Match the space character
145
146(define sp (char #\space))
147
148
149;; Match any printable ASCII character. (The "v" stands for
150;; "visible".) That is any character in the decimal range of
151;; [33..126].
152
153(define vchar  (lex:set char-set:graphic))
154
155;; Match either 'sp' or 'htab'.
156
157(define wsp (lex:set char-set:whitespace))
158
159
160;; Match a "quoted pair". Any characters (excluding CR and LF) may be
161;; quoted.
162
163(define quoted-pair (lex:seq (char #\\) (lex:set char-set:not-crlf)))
164(define char-set:not-crlf (char-set-complement (string->char-set "\r\n")))
165
166;; Match a quoted string. The specials \ and " must be escaped inside
167;; a quoted string; CR and LF are not allowed at all.
168
169(define quoted-string
170  (lex:seq dquote (lex:star qcont) dquote))
171
172(define qcont (lex:bar (lex:pos qtext) quoted-pair))
173(define qtext (lex:set char-set:quoted))
174(define char-set:quoted (char-set-complement (string->char-set "\\\"\r\n")))
175
176;;;; Additional convenience procedures and parser combinators
177
178;; match any character from an SRFI-14 character set
179(define set lex:set)
180
181;; match any character from a set defined as a string
182(define (set-from-string s)
183  (lex:set (string->char-set s)))
184
185(define (bind f p)
186  (lambda (s)
187    (let ((s1 (p s)))
188      (match (longest (p s))
189             ((eaten food) 
190              (let ((x (f eaten)))
191                (and x `((,x ,food)))))
192             (else #f)))))
193
194(define (drop p) 
195  (bind (lambda (x) (drop-while atom? x)) p))
196
197(define (consume p) 
198  (lambda (s)
199    (let loop ((lst (list)) (rst s))
200      (match (p rst)
201             ((a rst)  (loop (cons a lst) rst))
202             (else  (list (reverse lst) rst))))))
203
204
205(define (collect . rest)
206  (define (consumed-atoms cs) 
207    (and (pair? cs)
208         (let loop ((cs cs) (ax (list)))
209           (cond ((null? cs)         `(,ax))
210                 ((atom? (car cs))   (loop (cdr cs) (cons (car cs) ax)))
211                 (else               (cons ax cs))))))
212  (let-optionals rest ((kons #f))
213    (let ((make (cond ((symbol? kons) (lambda (x) `(,kons ,(car x))))
214                      ((procedure? kons) (lambda (x) (kons (car x))))
215                      (else car))))
216      (lambda (x)
217        (let ((x1 (consumed-atoms x)))
218          (and x1 (cons (make x1) (cdr x1))))))))
219
220
221)
Note: See TracBrowser for help on using the repository browser.