source: project/release/3/lexgen/trunk/lexgen.scm @ 13284

Last change on this file since 13284 was 13284, checked in by Ivan Raikov, 11 years ago

Procedure longest rewritten to use fold.

File size: 5.4 KB
Line 
1;;
2;;  Lexer combinator library.
3;;
4;;  Based on the SML lexer generator by Thant Tessman.
5;;
6;;  Ported to Chicken Scheme by Ivan Raikov.
7;;  Copyright 2009 Ivan Raikov.
8;;
9;;
10;;  Redistribution and use in source and binary forms, with or without
11;;  modification, are permitted provided that the following conditions
12;;  are met:
13;;
14;;  - Redistributions of source code must retain the above copyright
15;;  notice, this list of conditions and the following disclaimer.
16;;
17;;  - Redistributions in binary form must reproduce the above
18;;  copyright notice, this list of conditions and the following
19;;  disclaimer in the documentation and/or other materials provided
20;;  with the distribution.
21;;
22;;  - Neither name of the copyright holders nor the names of its
23;;  contributors may be used to endorse or promote products derived
24;;  from this software without specific prior written permission.
25;;
26;;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND THE
27;;  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
28;;  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
29;;  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
30;;  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR THE
31;;  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32;;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33;;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
34;;  USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
35;;  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36;;  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37;;  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38;;  POSSIBILITY OF SUCH DAMAGE.
39;;
40
41(define-extension lexgen)
42
43(declare
44 (not usual-integrations)
45 (fixnum)
46 (inline)
47 (lambda-lift)
48 (export pred seq star bar 
49         pos opt char set
50         set range lit longest 
51         lex))
52
53
54(require-extension syntax-case)
55(require-extension matchable)
56(require-extension srfi-1)
57(require-extension srfi-14)
58
59;;
60;;   This is a lexer generator comprised in its core of only four
61;;   small functions. The programmer assembles these functions into
62;;   regular expression pattern-matching functions.
63;;
64;;   The idea is that a pattern matcher function takes a list of
65;;   streams, and returns a new list of streams advanced by every
66;;   combination allowed by the pattern matcher function. In this
67;;   implementation, a stream is simply a tuple containing a list of
68;;   characters consumed by the pattern matcher, and a list of
69;;   characters not yet consumed.
70;;
71;;   Note that the number of streams returned by the function
72;;   typically won't match the number of streams passed in. If the
73;;   pattern doesn't match at all, the empty list is returned.
74;;
75
76;; The first function 'pred' builds a pattern matcher function that
77;; applies a predicate to a given token and an input character.
78
79(define (pred t p)
80  (let ((f (lambda (s) 
81             (match s ((s (h . r)) (if (p t h) (list (cons h s) r) #f))
82                    (else #f)))))
83    (lambda (streams)
84      (filter-map f streams))))
85   
86
87;; This matches a sequence of patterns.
88
89(define (seq pats)
90  (lambda (streams)
91    (fold (lambda (f s) (f s)) streams pats)))
92
93
94;; This matches any of a list of patterns. It's analogous to a series
95;; of patterns separated by the '|' in traditional regular
96;; expressions.
97
98(define (bar pats)
99  (lambda (streams)
100    (concatenate (map (lambda (f) (f streams)) pats))))
101
102;; Kleene closure. Analogous to '*'
103
104(define (star pat)
105  (define (f streams)
106    (let ((res (pat streams)))
107      (if (null? res) (list)
108          (cons res (f res)))))
109  (lambda (streams)
110    (concatenate (cons streams (f streams)))))
111
112
113;; The rest of these are built from the previous four and are provided
114;; for convenience.
115
116;; Positive closure. Analogous to '+'
117
118(define (pos pat)
119  (seq (list pat (star pat))))
120
121;; Optional pattern. Analogous to '?'
122
123(define (opt pat)
124  (bar (list pat identity)))
125
126;; Matches a single character
127
128(define (char c)
129  (pred c char=?))
130 
131;; Matches any of a SRFI-14 set of characters.
132
133(define (set s)
134  (let ((cs (if (char-set? s) s (list->char-set (if (string? s) (string->list s) s)))))
135    (pred cs char-set-contains?)))
136
137;; Range of characters. Analogous to character class '[]'
138
139(define (range a b)
140  (if (char<? b a) (range b a)
141      (set (ucs-range->char-set 
142            (char->integer a) (char->integer b)))))
143
144;; Matches a literal string s
145
146(define (lit s)
147  (seq (map (lambda (t) (pred t char=?)) (if (string? s) (string->list s) s))))
148
149
150;; Takes the resulting streams produced by the application of a
151;; pattern on a stream (or streams) and selects the longest match if
152;; one exists.
153
154(define (longest streams)
155  (match-let (((count stream)
156               (fold (lambda (stream max)
157                       (match (list stream max)
158                              (((eaten food) (max-count max-stream)) 
159                               (if (< max-count (length eaten))
160                                   (list (length eaten) stream) max))
161                              (else (error 'longest "invalid stream" stream))))
162                     (list 0 `(() ()))
163                     streams)))
164             (and (positive? count) stream)))
165
166
167;; This takes a pattern and a string, turns the string into a list of
168;; streams (containing one stream), applies the pattern, and returns
169;; the longest match.
170
171(define (->char-list s)
172  (if (string? s) (string->list s) s))
173
174(define (lex pat s)
175  (let* ((stream (->char-list s))
176         (res    (longest (pat `((() ,stream))))))
177    (and res (list (reverse (first res)) (second res)))))
Note: See TracBrowser for help on using the repository browser.