Ignore:
Timestamp:
07/03/10 07:04:54 (10 years ago)
Author:
Ivan Raikov
Message:

yet another improved approach to parametric lexgen

Location:
release/4/lexgen/branches/lexgen-typeclass
Files:
1 edited
1 copied

Legend:

Unmodified
Added
Removed
  • release/4/lexgen/branches/lexgen-typeclass/lexgen.scm

    r18280 r18684  
    2323(module lexgen
    2424
    25   ( tok seq star star* bar redo
    26     try pass pos opt char
    27     set range lst lit
     25  ( seq star star* bar redo
     26    try pass pos opt lst
    2827    bind drop rebind
    29     longest lex stream-unfold )
     28
     29    longest lex
     30
     31    make-<Input> <Input>
     32    (Input->Token tok) <Token>
     33    (Token->CharLex char range set lit) <CharLex>
     34    )
    3035
    3136
     
    3338  (require-library srfi-1)
    3439  (import (only srfi-1 first second filter-map fold concatenate every lset<= ))
     40  (require-extension typeclass)
    3541
    3642;;
     
    7379;;
    7480
    75 ;; input stream comparison procedures
     81
     82;; input stream type class
     83
     84(define-class <Input>
     85  empty?
     86  head
     87  tail)
     88
     89
     90;; consumed stream comparison procedures
    7691
    7792(define (lst<= pred xx yy)
     
    93108(define (safe-car x) (and (pair? x) (car x)))
    94109
    95 ;; 'tok' builds a pattern matcher function that applies procedure p to
    96 ;; a given token and an input character. If the procedure returns a
    97 ;; true value, that value is prepended to the list of consumed
    98 ;; elements, and the input character is removed from the list of input
    99 ;; elements.
    100 
    101 (define (tok t p )
    102    (let ((f (lambda (s)
    103               (print "tok: s = " s)
    104               (let ((l (length s)))
    105                 (cond ((fx= l 2)
    106                        (let ((c (car s))
    107                              (u (cadr s)))
    108                          (and (pair? u)
    109                               (let ((ans (p t (car u))))
    110                                 (and ans (list (cons ans c) (cdr u)))))))
    111 
    112                        ((fx= l 3)
    113                         (let ((c (car s))
    114                               (u (cadr s))
    115                               (succ (caddr s)))
    116                           (print "u = " u)
    117                           (and (pair? u)
    118                                (let ((ans (p t (car u))))
    119                                  (and ans
    120                                       (let* ((u1   (cdr u))
    121                                              (dd   (print "u1 = " u1))
    122                                              (rest (or (and (pair? u1) u1)
    123                                                        (succ u1))))
    124                                         (cons (cons ans c) rest)))
    125                                  ))))
    126 
    127                      (else #f))))))
    128      (lambda (cont streams)
    129        (let ((streams1 (filter-map f streams)))
    130          (cont streams1)))))
    131    
    132110
    133111;; This matches a sequence of patterns.
     
    192170   
    193171
    194 ;; The rest of these are built from the previous five and are provided
    195 ;; for convenience.
    196 
    197172;; this parser always succeeds
    198173(define (pass cont s) (cont s))
     
    212187(define (try p) (lambda (x y) (let ((res (p x y))) (and res y))))
    213188
    214 ;; Matches a single character
    215 
    216 (define (char c) (tok c (try char=?)))
    217  
    218 ;; Matches any of a SRFI-14 set of characters.
    219 
    220 (define (set s)
    221   (let ((cs (if (char-set? s) s (list->char-set (if (string? s) (string->list s) s)))))
    222     (tok cs (try char-set-contains?))))
    223 
    224 ;; Range of characters. Analogous to character class '[]'
    225 
    226 (define (range a b)
    227   (if (char<? b a) (range b a)
    228       (set (ucs-range->char-set (char->integer a) (+ 1 (char->integer b))))))
    229189
    230190;; Matches a consecutive list of patterns
     
    236196            (else (loop (cdr ps) (seq (car ps) p1)))))))
    237197 
    238 ;; Matches a literal string s
    239 
    240 (define (lit s)
    241   (let ((f (lambda (t) (tok t (try char=?)))))
    242     (lst (map f (if (string? s) (string->list s) s)))))
    243198
    244199;; datatype used by bind and drop
     
    368323    (and res (list (reverse (first res)) (second res)))))
    369324
    370 
    371 
    372 ;; A helper procedure to transform streams from one format to
    373 ;; another. Procedure F must be a procedure of two arguments, a state
    374 ;; and a list of unconsumed characters. Procedure G is applied to an
    375 ;; unconsumed element, and is expected to return the original element
    376 ;; representation, before F was applied to the unconsumed stream.
    377 
    378 (define (stream-unfold init f g)
    379   (lambda (strm)
    380     (let ((l (length strm)))
    381       (cond ((fx= l 2)
    382              (let ((c (car strm))
    383                    (u (cadr strm)))
    384                (and (pair? u)
    385                     (let ((h (car u))
    386                           (r (cdr u)))
    387                       (letrec ((succ0 (lambda (r)
    388                                         (if (null? r) strm
    389                                             (let ((h1 (g (car r)))
    390                                                   (r1 (f (car r) (cdr r))))
    391                                               (list (cons h1 r1) succ0))))))
    392                         (list c (cons h (f init r)) succ0))))))
    393            
    394             ((fx= l 3)
    395              (let ((c (car strm))
    396                    (u (cadr strm))
    397                    (succ (caddr strm)))
    398                (and (pair? u)
    399                     (let ((h (car u))
    400                           (r (cdr u)))
    401                       (letrec ((succ0 (lambda (r)
    402                                         (if (null? r) strm
    403                                             (let ((h1 (g (car r)))
    404                                                   (r1 (f (car r) (succ (cdr r)))))
    405                                               (list (cons h1 r1) succ0))))))
    406                         (list c (cons h (f init r)) succ0))))))
    407            
    408            (else #f)))))
    409  
     325;; The following procedures are specific to the input class type
     326
     327(define-class <Token> (<Input> input)  tok)
     328
     329 
     330;; 'tok' builds a pattern matcher function that applies procedure p to
     331;; a given token and an input character. If the procedure returns a
     332;; true value, that value is prepended to the list of consumed
     333;; elements, and the input character is removed from the list of input
     334;; elements.
     335
     336(define=> (tok <Input>)
     337  (lambda (t p )
     338    (let ((f (lambda (s)
     339               (let ((c (car s))
     340                     (u (cadr s)))
     341                 (and (not (empty? u))
     342                      (let ((ans (p t (head u))))
     343                        (and ans (list (cons ans c) (tail u)))))
     344                 ))))
     345      (lambda (cont streams)
     346        (let ((streams1 (filter-map f streams)))
     347          (cont streams1))))))
     348 
     349(define (Input->Token I) (make-<Token> I (tok I)))
     350
     351(define-class <CharLex> (<Token> T)  char set range lit)
     352
     353
     354;; Matches a single character
     355
     356(define=> (char <Token>)
     357  (lambda (c) (tok c (try char=?))))
     358 
     359;; Matches any of a SRFI-14 set of characters.
     360
     361(define=> (set <Token>)
     362  (lambda (s)
     363    (let ((cs (if (char-set? s) s (list->char-set (if (string? s) (string->list s) s)))))
     364      (tok cs (try char-set-contains?)))))
     365
     366;; Range of characters. Analogous to character class '[]'
     367
     368(define=> (range <Token>)
     369  (letrec ((range0
     370            (lambda (a b)
     371              (if (char<? b a) (range0 b a)
     372                  (tok (ucs-range->char-set (char->integer a) (+ 1 (char->integer b)))
     373                       (try char-set-contains?))))))
     374    range0))
     375
     376;; Matches a literal string s
     377
     378(define=> (lit <Token>)
     379  (lambda (s)
     380    (let ((f (lambda (t) (tok t (try char=?)))))
     381      (lst (map f (if (string? s) (string->list s) s))))))
     382
     383
     384(define (Token->CharLex T)
     385  (make-<CharLex> T
     386                  (char T)
     387                  (set T)
     388                  (range T)
     389                  (lit T)
     390                  ))
    410391
    411392
Note: See TracChangeset for help on using the changeset viewer.