source: project/release/4/lexgen/trunk/lexgen.scm @ 15779

Last change on this file since 15779 was 15779, checked in by Ivan Raikov, 10 years ago

lexgen: the seq combinator must check if the first parser in the
sequence has failed.

File size: 7.5 KB
Line 
1;;
2;;  Lexer combinator library.
3;;
4;;  Based on the SML lexer generator by Thant Tessman.
5;;
6;;  Ported to Chicken Scheme by Ivan Raikov.
7;;  Copyright 2009 Ivan Raikov.
8;;
9;;
10;;  Redistribution and use in source and binary forms, with or without
11;;  modification, are permitted provided that the following conditions
12;;  are met:
13;;
14;;  - Redistributions of source code must retain the above copyright
15;;  notice, this list of conditions and the following disclaimer.
16;;
17;;  - Redistributions in binary form must reproduce the above
18;;  copyright notice, this list of conditions and the following
19;;  disclaimer in the documentation and/or other materials provided
20;;  with the distribution.
21;;
22;;  - Neither name of the copyright holders nor the names of its
23;;  contributors may be used to endorse or promote products derived
24;;  from this software without specific prior written permission.
25;;
26;;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND THE
27;;  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
28;;  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
29;;  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
30;;  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR THE
31;;  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32;;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33;;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
34;;  USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
35;;  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36;;  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37;;  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38;;  POSSIBILITY OF SUCH DAMAGE.
39;;
40
41(module lexgen
42
43  ( tok seq star bar 
44    try pass pos opt char 
45    set range lst lit 
46    cps-table longest lex )
47
48
49  (import scheme chicken data-structures srfi-14 srfi-69)
50  (require-library srfi-1)
51  (import (only srfi-1 first second filter-map fold concatenate lset<= ))
52  (require-extension matchable)
53
54;;
55;;   This is a lexer generator comprised in its core of four small
56;;   functions. The programmer assembles these functions into regular
57;;   expression pattern-matching functions.
58;;
59;;   The idea is that a pattern matcher function takes a list of
60;;   streams, and returns a new list of streams advanced by every
61;;   combination allowed by the pattern matcher function. In this
62;;   implementation, a stream is simply a tuple containing a list of
63;;   elements consumed by the pattern matcher, and a list of
64;;   characters not yet consumed.
65;;
66;;   Note that the number of streams returned by the function
67;;   typically won't match the number of streams passed in. If the
68;;   pattern doesn't match at all, the empty list is returned.
69;;
70
71;; 'tok' builds a pattern matcher function that applies procedure p to
72;; a given token and an input character. If the procedure returns a
73;; true value, that value is prepended to the list of consumed
74;; elements, and the input character is removed from the list of input
75;; elements.
76
77(define (tok t p)
78  (let ((f (lambda (s) 
79             (match s
80                    ((c (h . r)) 
81                     (let ((ans (p t h)))
82                        (and ans (list (cons ans c) r))))
83                    (else #f)))))
84    (lambda (cont streams)
85      (let ((streams1 (filter-map f streams)))
86        (cont streams1)))))
87   
88
89;; This matches a sequence of patterns.
90
91(define (seq p1 p2)
92  (lambda (cont streams)
93    (p1 (lambda (streams1) 
94          (if (null? streams1) 
95              (cont streams1) (p2 cont streams1)))
96        streams)))
97
98;; This matches either one of two patterns. It's analogous to patterns
99;; separated by the '|' in regular expressions.
100
101(define (bar p1 p2)
102  (lambda (cont streams)
103    (let ((cont1 (lambda (streams1) 
104                   (if (lset<= equal? streams1 streams) (p2 cont streams) (cont streams1)))))
105      (p1 cont1 streams))))
106
107;; Kleene closure. Analogous to '*'
108
109(define (star p)
110  (lambda (cont streams)
111    (if (null? streams) (cont streams)
112        (let ((cont1 (lambda (streams1) (cont (concatenate (list streams streams1))))))
113          (p (lambda (streams1) 
114               (cond ((lset<= equal? streams1 streams) (cont streams))
115                     (else  ((star p) cont1 streams1))))
116             streams)))))
117
118;; The rest of these are built from the previous four and are provided
119;; for convenience.
120
121;; this parser always succeeds
122(define (pass cont s) (cont s))
123 
124;; Positive closure. Analogous to '+'
125
126(define (pos pat) (seq pat (star pat)))
127
128;; Optional pattern. Analogous to '?'
129
130(define (opt pat) (bar pat pass))
131
132;; Converts a binary predicate procedure to a binary procedure that
133;; returns its right argument when the predicate is true, and false
134;; otherwise.
135
136(define (try p) (lambda (x y) (let ((res (p x y))) (and res y))))
137
138;; Matches a single character
139
140(define (char c) (tok c (try char=?)))
141 
142;; Matches any of a SRFI-14 set of characters.
143
144(define (set s)
145  (let ((cs (if (char-set? s) s (list->char-set (if (string? s) (string->list s) s)))))
146    (tok cs (try char-set-contains?))))
147
148;; Range of characters. Analogous to character class '[]'
149
150(define (range a b)
151  (if (char<? b a) (range b a)
152      (set (ucs-range->char-set (char->integer a) (+ 1 (char->integer b))))))
153
154;; Matches a consecutive list of patterns
155
156(define (lst ps)
157  (let ((ps (reverse ps)))
158    (let loop ((ps (cdr ps)) (p1 (car ps)))
159      (cond ((null? ps) p1)
160            (else (loop (cdr ps) (seq (car ps) p1)))))))
161 
162;; Matches a literal string s
163
164(define (lit s)
165  (let ((f (lambda (t) (tok t (try char=?)))))
166    (lst (map f (if (string? s) (string->list s) s)))))
167
168
169;; Takes the resulting streams produced by the application of a
170;; pattern on a stream (or streams) and selects the longest match if
171;; one exists.
172
173(define (longest streams)
174  (match-let (((count stream)
175               (fold (lambda (stream max)
176                       (match (list stream max)
177                              (((eaten food) (max-count max-stream)) 
178                               (if (< max-count (length eaten))
179                                   (list (length eaten) stream) max))
180                              (else (error 'longest "invalid stream" stream))))
181                     (list 0 `(() ()))
182                     streams)))
183             (and (positive? count) stream)))
184
185
186;; This takes a pattern and a string, turns the string into a list of
187;; streams (containing one stream), applies the pattern, and returns
188;; the longest match.
189
190(define (->char-list s)
191  (if (string? s) (string->list s) s))
192
193(define (lex pat error s)
194  (let* ((stream (->char-list s))
195         (res    (longest (pat (lambda (s1) (if (null? s1) (error s) s1)) `((() ,stream))))))
196    (and res (list (reverse (first res)) (second res)))))
197
198
199(define make-table      make-hash-table)
200(define table-ref       hash-table-ref/default)
201(define table-put!      hash-table-set!)
202(define table-for-each  hash-table-for-each)
203
204
205;; tabled execution wrapper
206;; from _Tabled Execution in Scheme_, by Willcock, et al.
207
208(define (cps-table f combine)
209  (let ((memo    (make-table equal?))
210        (k-list  (make-table equal?)))
211    (lambda (k arg)
212      (table-put! k-list arg (cons k (table-ref k-list arg (list))))
213      (if (not (table-ref memo arg  #f))
214          (let ((memo-table (make-table)))
215            (table-put! memo arg memo-table)
216            (f (lambda (result)
217                 (if (null? result) (k result)
218                     (let* ((len     (length result))
219                            (old-len (table-ref memo-table result #f))
220                            (new-len (combine (or old-len 0) len)))
221                       (if (not (equal? old-len new-len))
222                           (begin
223                             (table-put! memo-table result new-len)
224                             (for-each (lambda (saved-k) (saved-k result))
225                                       (table-ref k-list arg (list))))))))
226               
227               arg))
228          (let ((memo-table (table-ref memo arg #f) ))
229            (table-for-each memo-table (lambda (result len) (k result)))
230            )))))
231
232)
Note: See TracBrowser for help on using the repository browser.