Ticket #281: match-index-valid.diff

File match-index-valid.diff, 11.8 KB (added by sjamaan, 14 years ago)

Export match-index-valid? and make it and some other procedures accept named submatches too

  • irregex-core.scm

    diff --git a/irregex-core.scm b/irregex-core.scm
    index 7cd57d8..086d2d1 100644
    a b  
    199199     (define (%irregex-match-fail m) (vector-ref m (- (vector-length m) 1)))
    200200     (define (%irregex-match-fail-set! m x) (vector-set! m (- (vector-length m) 1) x)))))
    201201
     202(cond-expand
     203  (building-chicken
     204   (define-inline (irregex-match-valid-numeric-index? m n)
     205     (let ((v (internal "##sys#slot" m 1)))
     206       (and (< (* n 4) (internal "##sys#size" v))
     207            (internal "##sys#slot" v (+ 1 (* n 4)))))))
     208  (else
     209   (define (irregex-match-valid-numeric-index? m n)
     210     (and (< (+ 3 (* n 4)) (vector-length m))
     211          (vector-ref m (+ 4 (* n 4)))))))
     212
    202213;; public interface with error checking
    203 (define (irregex-match-start-chunk m n)
    204   (if (not (irregex-match-valid-index? m n))
    205       (error "irregex-match-start-chunk: not a valid index" m n))
    206   (%irregex-match-start-chunk m n))
    207 (define (irregex-match-start-index m n)
    208   (if (not (irregex-match-valid-index? m n))
    209       (error "irregex-match-start-index: not a valid index" m n))
    210   (%irregex-match-start-index m n))
    211 (define (irregex-match-end-chunk m n)
    212   (if (not (irregex-match-valid-index? m n))
    213       (error "irregex-match-end-chunk: not a valid index" m n))
    214   (%irregex-match-end-chunk m n))
    215 (define (irregex-match-end-index m n)
    216   (if (not (irregex-match-valid-index? m n))
    217       (error "irregex-match-end-index: not a valid index" m n))
    218   (%irregex-match-end-index m n))
     214(define (irregex-match-start-chunk m . opt)
     215  (let ((n (irregex-match-numeric-index m opt)))
     216    (if (not (irregex-match-valid-numeric-index? m n))
     217        (error "irregex-match-start-chunk: not a valid index" m n)
     218        (%irregex-match-start-chunk m n))))
     219(define (irregex-match-start-index m . opt)
     220  (let ((n (irregex-match-numeric-index m opt)))
     221    (if (not (irregex-match-valid-numeric-index? m n))
     222        (error "irregex-match-start-index: not a valid index" m n)
     223        (%irregex-match-start-index m n))))
     224(define (irregex-match-end-chunk m . opt)
     225  (let ((n (irregex-match-numeric-index m opt)))
     226    (if (not (irregex-match-valid-numeric-index? m n))
     227        (error "irregex-match-end-chunk: not a valid index" m n)
     228        (%irregex-match-end-chunk m n))))
     229(define (irregex-match-end-index m . opt)
     230  (let ((n (irregex-match-numeric-index m opt)))
     231    (if (not (irregex-match-valid-numeric-index? m n))
     232        (error "irregex-match-end-index: not a valid index" m n)
     233        (%irregex-match-end-index m n))))
    219234
    220235(define (irregex-match-start-chunk-set! m n start)
    221236  (vector-set! m (+ 3 (* n 4)) start))
     
    226241(define (irregex-match-end-index-set! m n end)
    227242  (vector-set! m (+ 6 (* n 4)) end))
    228243
    229 (define (irregex-match-index m opt)
     244(define (irregex-match-numeric-index m opt)
    230245  (if (pair? opt)
    231246      (if (number? (car opt))
    232247          (car opt)
     
    241256                  (else (lp (cdr ls) exists)))))
    242257      0))
    243258
    244 (cond-expand
    245   (building-chicken
    246    (define-inline (%irregex-match-valid-index? m n)
    247      (let ((v (internal "##sys#slot" m 1)))
    248        (and (< (* n 4) (internal "##sys#size" v))
    249             (internal "##sys#slot" v (+ 1 (* n 4)))))))
    250   (else
    251    (define (%irregex-match-valid-index? m n)
    252      (and (< (+ 3 (* n 4)) (vector-length m))
    253           (vector-ref m (+ 4 (* n 4)))))))
     259(define (irregex-match-valid-named-index? m n)
     260  (and (assq n (irregex-match-names m)) #t))
    254261
    255262(define (irregex-match-valid-index? m n)
    256263  (if (not (irregex-match-data? m))
    257264      (error "irregex-match-valid-index?: not match data" m))
    258   (if (not (integer? n))
    259       (error "irregex-match-valid-index?: not an integer" n))
    260   (%irregex-match-valid-index? m n))
     265  (if (integer? n)
     266      (irregex-match-valid-numeric-index? m n)
     267      (irregex-match-valid-named-index? m n)))
    261268
    262269(define (irregex-match-substring m . opt)
    263270  (if (not (irregex-match-data? m))
    264271      (error "irregex-match-substring: not match data" m))
    265272  (let* ((cnk (irregex-match-chunker m))
    266          (n (irregex-match-index m opt)))
    267     (and (%irregex-match-valid-index? m n)
     273         (n (irregex-match-numeric-index m opt)))
     274    (and (irregex-match-valid-numeric-index? m n)
    268275         ((chunker-get-substring cnk)
    269276          (%irregex-match-start-chunk m n)
    270277          (%irregex-match-start-index m n)
     
    275282  (if (not (irregex-match-data? m))
    276283      (error "irregex-match-subchunk: not match data" m))
    277284  (let* ((cnk (irregex-match-chunker m))
    278          (n (irregex-match-index m opt))
     285         (n (irregex-match-numeric-index m opt))
    279286         (get-subchunk (chunker-get-subchunk cnk)))
    280287    (if (not get-subchunk)
    281288        (error "this chunk type does not support match subchunks")
    282289        (and n
    283              (%irregex-match-valid-index? m n)
     290             (irregex-match-valid-numeric-index? m n)
    284291             (get-subchunk
    285292              (%irregex-match-start-chunk m n)
    286293              (%irregex-match-start-index m n)
  • irregex.import.scm

    diff --git a/irregex.import.scm b/irregex.import.scm
    index 63bd132..4f2a81a 100644
    a b  
    5050   irregex-match-string
    5151   irregex-match-subchunk
    5252   irregex-match-substring
     53   irregex-match-valid-index?
    5354   irregex-match/chunked
    5455   irregex-names
    5556   irregex-new-matches
  • irregex.scm

    diff --git a/irregex.scm b/irregex.scm
    index e2fba1f..4b18d34 100644
    a b  
    5656   irregex-match-string
    5757   irregex-match-subchunk
    5858   irregex-match-substring
     59   irregex-match-valid-index?
    5960   irregex-match/chunked
    6061   irregex-names
    6162   irregex-new-matches
  • manual/Unit

    diff --git a/manual/Unit irregex b/manual/Unit irregex
    index 51073ca..387b842 100644
    a b submatch corresponding to this name. If a named submatch occurs 
    154154multiple times in the irregex, it will also occur multiple times in
    155155this list.
    156156
     157===== irregex-match-valid-index?
     158
     159<procedure>(irregex-match-valid-index? <match> <index-or-name>)</procedure><br>
     160
     161Returns {{#t}} iff the {{index-or-name}} named submatch or index is
     162defined in the {{match}} object.
     163
    157164===== irregex-match-substring
    158165===== irregex-match-start-index
    159166===== irregex-match-end-index
    160167
    161168<procedure>(irregex-match-substring <match> [<index-or-name>])</procedure><br>
    162 <procedure>(irregex-match-start-index <match> <index-or-name>)</procedure><br>
    163 <procedure>(irregex-match-end-index <match> <index-or-name>)</procedure>
     169<procedure>(irregex-match-start-index <match> [<index-or-name>])</procedure><br>
     170<procedure>(irregex-match-end-index <match> [<index-or-name>])</procedure>
    164171
    165172Fetches the matched substring (or its start or end offset) at the
    166173given submatch index, or named submatch.  The entire match is index 0,
  • tests/test-irregex.scm

    diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm
    index 5fdc034..662e98f 100644
    a b  
    265265  (test-equal 2 (irregex-match-num-submatches (irregex-search "(a(.*))b" "axxxb")))
    266266  (test-equal 2 (irregex-match-num-submatches (irregex-search "a(.*)(b)" "axxxb")))
    267267  (test-equal 10 (irregex-match-num-submatches (irregex-search "((((((((((a))))))))))" "a")))
     268  (test-assert
     269   (irregex-match-valid-index? (irregex-search "a.*b" "axxxb") 0))
     270  (test-assert
     271   (not (irregex-match-valid-index? (irregex-search "a.*b" "axxxb") 1)))
     272  (test-assert
     273   (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") 0))
     274  (test-assert
     275   (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") 1))
     276  (test-assert
     277   (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") 2))
     278  (test-assert
     279   (not (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") 3)))
     280  (test-equal 1 (irregex-match-start-index (irregex-search "a(.*)(b)" "axxxb") 1))
     281  (test-equal 4 (irregex-match-end-index (irregex-search "a(.*)(b)" "axxxb") 1))
    268282  )
    269283
    270284;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     
    293307
    294308(define (extract name irx str)
    295309  (irregex-match-substring (irregex-match irx str) name))
     310(define (valid? name irx str)
     311  (irregex-match-valid-index? (irregex-match irx str) name))
     312(define (start-idx name irx str)
     313  (irregex-match-start-index (irregex-match irx str) name))
     314(define (end-idx name irx str)
     315  (irregex-match-end-index (irregex-match irx str) name))
    296316
    297317(test-group "named submatches"
    298318  (test-equal "matching submatch is seen and extracted"
    299         "first" (extract 'first `(or (submatch-named first "first")
    300                                      (submatch-named second "second"))
    301                          "first"))
     319              "first" (extract 'first `(or (submatch-named first "first")
     320                                           (submatch-named second "second"))
     321                               "first"))
     322  (test-assert "matching submatch index is valid"
     323               (valid? 'first `(or (submatch-named first "first")
     324                                   (submatch-named second "second"))
     325                       "first"))
    302326  (test-equal "nonmatching submatch is known but returns false"
    303         #f (extract 'second `(or (submatch-named first "first")
    304                                  (submatch-named second "second"))
    305                     "first"))
     327              #f
     328              (extract 'second `(or (submatch-named first "first")
     329                                    (submatch-named second "second"))
     330                       "first"))
     331  (test-assert "nonmatching submatch index is valid"
     332               (valid? 'second `(or (submatch-named first "first")
     333                                    (submatch-named second "second"))
     334                       "first"))
    306335  (test-error "nonexisting submatch is unknown and raises an error"
    307336              (extract 'third `(or (submatch-named first "first")
    308337                                   (submatch-named second "second"))
    309338                       "first"))
     339  (test-assert "nonexisting submatch index is invalid"
     340               (not (valid? 'third `(or (submatch-named first "first")
     341                                         (submatch-named second "second"))
     342                            "first")))
    310343  (test-equal "matching alternative is used"
    311         "first" (extract 'sub `(or (submatch-named sub "first")
    312                                    (submatch-named sub "second"))
    313                          "first"))
     344              "first" (extract 'sub `(or (submatch-named sub "first")
     345                                         (submatch-named sub "second"))
     346                               "first"))
    314347  (test-equal "matching alternative is used (second match)"
    315         "second" (extract 'sub `(or (submatch-named sub "first")
    316                                     (submatch-named sub "second"))
    317                          "second"))
     348              "second" (extract 'sub `(or (submatch-named sub "first")
     349                                          (submatch-named sub "second"))
     350                                "second"))
    318351  (test-equal "last match is used with multiple matches for a name"
    319         "second" (extract 'sub `(seq (submatch-named sub "first")
    320                                      space
    321                                      (submatch-named sub "second"))
    322                          "first second")))
     352              "second" (extract 'sub `(seq (submatch-named sub "first")
     353                                           space
     354                                           (submatch-named sub "second"))
     355                                "first second"))
     356  (test-equal "submatch start"
     357              1
     358              (start-idx 'xs `(seq "a" (submatch-named xs (+ "x")) "b") "axxxb"))
     359  (test-error "unknown submatch start"
     360              (start-idx 'xs `(seq "a" (submatch-named ys (+ "x")) "b") "axxxb"))
     361  (test-equal "submatch end"
     362              4 (end-idx 'xs `(seq "a" (submatch-named xs (+ "x")) "b") "axxxb"))
     363  (test-error "unknown submatch start"
     364              (end-idx 'xs `(seq "a" (submatch-named ys (+ "x")) "b") "axxxb")))
    323365
    324366(test-end)
    325367