source: project/release/4/lexgen/trunk/lexgen.scm @ 14808

Last change on this file since 14808 was 14808, checked in by Ivan Raikov, 11 years ago

now using require-extension srfi-1

File size: 5.7 KB
Line 
1;;
2;;  Lexer combinator library.
3;;
4;;  Based on the SML lexer generator by Thant Tessman.
5;;
6;;  Ported to Chicken Scheme by Ivan Raikov.
7;;  Copyright 2009 Ivan Raikov.
8;;
9;;
10;;  Redistribution and use in source and binary forms, with or without
11;;  modification, are permitted provided that the following conditions
12;;  are met:
13;;
14;;  - Redistributions of source code must retain the above copyright
15;;  notice, this list of conditions and the following disclaimer.
16;;
17;;  - Redistributions in binary form must reproduce the above
18;;  copyright notice, this list of conditions and the following
19;;  disclaimer in the documentation and/or other materials provided
20;;  with the distribution.
21;;
22;;  - Neither name of the copyright holders nor the names of its
23;;  contributors may be used to endorse or promote products derived
24;;  from this software without specific prior written permission.
25;;
26;;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND THE
27;;  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
28;;  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
29;;  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
30;;  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR THE
31;;  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32;;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33;;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
34;;  USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
35;;  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36;;  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37;;  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38;;  POSSIBILITY OF SUCH DAMAGE.
39;;
40
41(module lexgen
42
43  ( tok seq star bar 
44    try pos opt char 
45    set range lit 
46    longest lex )
47
48
49   (import scheme chicken data-structures srfi-14)
50   (require-extension srfi-1 matchable)
51
52;;
53;;   This is a lexer generator comprised in its core of four small
54;;   functions. The programmer assembles these functions into regular
55;;   expression pattern-matching functions.
56;;
57;;   The idea is that a pattern matcher function takes a list of
58;;   streams, and returns a new list of streams advanced by every
59;;   combination allowed by the pattern matcher function. In this
60;;   implementation, a stream is simply a tuple containing a list of
61;;   elements consumed by the pattern matcher, and a list of
62;;   characters not yet consumed.
63;;
64;;   Note that the number of streams returned by the function
65;;   typically won't match the number of streams passed in. If the
66;;   pattern doesn't match at all, the empty list is returned.
67;;
68
69;; 'tok' builds a pattern matcher function that applies procedure p to
70;; a given token and an input character. If the procedure returns a
71;; true value, that value is prepended to the list of consumed
72;; elements, and the input character is removed from the list of input
73;; elements.
74
75(define (tok t p)
76  (let ((f (lambda (s) 
77             (match s ((s (h . r)) 
78                       (let ((ans (p t h)))
79                         (and ans (list (cons ans s) r))))
80                    (else #f)))))
81    (lambda (streams)
82      (filter-map f streams))))
83   
84
85;; This matches a sequence of patterns.
86
87(define (seq pats)
88  (lambda (streams)
89    (fold (lambda (f s) (f s)) streams pats)))
90
91
92;; This matches any of a list of patterns. It's analogous to a series
93;; of patterns separated by the '|' in traditional regular
94;; expressions.
95
96(define (bar pats)
97  (lambda (streams)
98    (concatenate (map (lambda (f) (f streams)) pats))))
99
100;; Kleene closure. Analogous to '*'
101
102(define (star pat)
103  (define (f streams)
104    (let ((res (pat streams)))
105      (if (null? res) (list)
106          (cons res (f res)))))
107  (lambda (streams)
108    (concatenate (cons streams (f streams)))))
109
110
111;; The rest of these are built from the previous four and are provided
112;; for convenience.
113
114;; Positive closure. Analogous to '+'
115
116(define (pos pat)
117  (seq (list pat (star pat))))
118
119;; Optional pattern. Analogous to '?'
120
121(define (opt pat)
122  (bar (list pat identity)))
123
124;; Converts a binary predicate procedure to a binary procedure that
125;; returns its right argument when the predicate is true, and false
126;; otherwise.
127
128(define (try p) (lambda (x y) (let ((res (p x y))) (and res y))))
129
130;; Matches a single character
131
132(define (char c) (tok c (try char=?)))
133 
134;; Matches any of a SRFI-14 set of characters.
135
136(define (set s)
137  (let ((cs (if (char-set? s) s (list->char-set (if (string? s) (string->list s) s)))))
138    (tok cs (try char-set-contains?))))
139
140;; Range of characters. Analogous to character class '[]'
141
142(define (range a b)
143  (if (char<? b a) (range b a)
144      (set (ucs-range->char-set 
145            (char->integer a) (char->integer b)))))
146
147;; Matches a literal string s
148
149(define (lit s)
150  (let ((f (lambda (t) (tok t (try char=?)))))
151    (seq (map f (if (string? s) (string->list s) s)))))
152
153
154;; Takes the resulting streams produced by the application of a
155;; pattern on a stream (or streams) and selects the longest match if
156;; one exists.
157
158(define (longest streams)
159  (match-let (((count stream)
160               (fold (lambda (stream max)
161                       (match (list stream max)
162                              (((eaten food) (max-count max-stream)) 
163                               (if (< max-count (length eaten))
164                                   (list (length eaten) stream) max))
165                              (else (error 'longest "invalid stream" stream))))
166                     (list 0 `(() ()))
167                     streams)))
168             (and (positive? count) stream)))
169
170
171;; This takes a pattern and a string, turns the string into a list of
172;; streams (containing one stream), applies the pattern, and returns
173;; the longest match.
174
175(define (->char-list s)
176  (if (string? s) (string->list s) s))
177
178(define (lex pat s)
179  (let* ((stream (->char-list s))
180         (res    (longest (pat `((() ,stream))))))
181    (and res (list (reverse (first res)) (second res)))))
182
183)
Note: See TracBrowser for help on using the repository browser.