diff --git a/irregex-core.scm b/irregex-core.scm
index 7cd57d8..086d2d1 100644
--- a/irregex-core.scm
+++ b/irregex-core.scm
@@ -199,23 +199,38 @@
(define (%irregex-match-fail m) (vector-ref m (- (vector-length m) 1)))
(define (%irregex-match-fail-set! m x) (vector-set! m (- (vector-length m) 1) x)))))
+(cond-expand
+ (building-chicken
+ (define-inline (irregex-match-valid-numeric-index? m n)
+ (let ((v (internal "##sys#slot" m 1)))
+ (and (< (* n 4) (internal "##sys#size" v))
+ (internal "##sys#slot" v (+ 1 (* n 4)))))))
+ (else
+ (define (irregex-match-valid-numeric-index? m n)
+ (and (< (+ 3 (* n 4)) (vector-length m))
+ (vector-ref m (+ 4 (* n 4)))))))
+
;; public interface with error checking
-(define (irregex-match-start-chunk m n)
- (if (not (irregex-match-valid-index? m n))
- (error "irregex-match-start-chunk: not a valid index" m n))
- (%irregex-match-start-chunk m n))
-(define (irregex-match-start-index m n)
- (if (not (irregex-match-valid-index? m n))
- (error "irregex-match-start-index: not a valid index" m n))
- (%irregex-match-start-index m n))
-(define (irregex-match-end-chunk m n)
- (if (not (irregex-match-valid-index? m n))
- (error "irregex-match-end-chunk: not a valid index" m n))
- (%irregex-match-end-chunk m n))
-(define (irregex-match-end-index m n)
- (if (not (irregex-match-valid-index? m n))
- (error "irregex-match-end-index: not a valid index" m n))
- (%irregex-match-end-index m n))
+(define (irregex-match-start-chunk m . opt)
+ (let ((n (irregex-match-numeric-index m opt)))
+ (if (not (irregex-match-valid-numeric-index? m n))
+ (error "irregex-match-start-chunk: not a valid index" m n)
+ (%irregex-match-start-chunk m n))))
+(define (irregex-match-start-index m . opt)
+ (let ((n (irregex-match-numeric-index m opt)))
+ (if (not (irregex-match-valid-numeric-index? m n))
+ (error "irregex-match-start-index: not a valid index" m n)
+ (%irregex-match-start-index m n))))
+(define (irregex-match-end-chunk m . opt)
+ (let ((n (irregex-match-numeric-index m opt)))
+ (if (not (irregex-match-valid-numeric-index? m n))
+ (error "irregex-match-end-chunk: not a valid index" m n)
+ (%irregex-match-end-chunk m n))))
+(define (irregex-match-end-index m . opt)
+ (let ((n (irregex-match-numeric-index m opt)))
+ (if (not (irregex-match-valid-numeric-index? m n))
+ (error "irregex-match-end-index: not a valid index" m n)
+ (%irregex-match-end-index m n))))
(define (irregex-match-start-chunk-set! m n start)
(vector-set! m (+ 3 (* n 4)) start))
@@ -226,7 +241,7 @@
(define (irregex-match-end-index-set! m n end)
(vector-set! m (+ 6 (* n 4)) end))
-(define (irregex-match-index m opt)
+(define (irregex-match-numeric-index m opt)
(if (pair? opt)
(if (number? (car opt))
(car opt)
@@ -241,30 +256,22 @@
(else (lp (cdr ls) exists)))))
0))
-(cond-expand
- (building-chicken
- (define-inline (%irregex-match-valid-index? m n)
- (let ((v (internal "##sys#slot" m 1)))
- (and (< (* n 4) (internal "##sys#size" v))
- (internal "##sys#slot" v (+ 1 (* n 4)))))))
- (else
- (define (%irregex-match-valid-index? m n)
- (and (< (+ 3 (* n 4)) (vector-length m))
- (vector-ref m (+ 4 (* n 4)))))))
+(define (irregex-match-valid-named-index? m n)
+ (and (assq n (irregex-match-names m)) #t))
(define (irregex-match-valid-index? m n)
(if (not (irregex-match-data? m))
(error "irregex-match-valid-index?: not match data" m))
- (if (not (integer? n))
- (error "irregex-match-valid-index?: not an integer" n))
- (%irregex-match-valid-index? m n))
+ (if (integer? n)
+ (irregex-match-valid-numeric-index? m n)
+ (irregex-match-valid-named-index? m n)))
(define (irregex-match-substring m . opt)
(if (not (irregex-match-data? m))
(error "irregex-match-substring: not match data" m))
(let* ((cnk (irregex-match-chunker m))
- (n (irregex-match-index m opt)))
- (and (%irregex-match-valid-index? m n)
+ (n (irregex-match-numeric-index m opt)))
+ (and (irregex-match-valid-numeric-index? m n)
((chunker-get-substring cnk)
(%irregex-match-start-chunk m n)
(%irregex-match-start-index m n)
@@ -275,12 +282,12 @@
(if (not (irregex-match-data? m))
(error "irregex-match-subchunk: not match data" m))
(let* ((cnk (irregex-match-chunker m))
- (n (irregex-match-index m opt))
+ (n (irregex-match-numeric-index m opt))
(get-subchunk (chunker-get-subchunk cnk)))
(if (not get-subchunk)
(error "this chunk type does not support match subchunks")
(and n
- (%irregex-match-valid-index? m n)
+ (irregex-match-valid-numeric-index? m n)
(get-subchunk
(%irregex-match-start-chunk m n)
(%irregex-match-start-index m n)
diff --git a/irregex.import.scm b/irregex.import.scm
index 63bd132..4f2a81a 100644
--- a/irregex.import.scm
+++ b/irregex.import.scm
@@ -50,6 +50,7 @@
irregex-match-string
irregex-match-subchunk
irregex-match-substring
+ irregex-match-valid-index?
irregex-match/chunked
irregex-names
irregex-new-matches
diff --git a/irregex.scm b/irregex.scm
index e2fba1f..4b18d34 100644
--- a/irregex.scm
+++ b/irregex.scm
@@ -56,6 +56,7 @@
irregex-match-string
irregex-match-subchunk
irregex-match-substring
+ irregex-match-valid-index?
irregex-match/chunked
irregex-names
irregex-new-matches
diff --git a/manual/Unit irregex b/manual/Unit irregex
index 51073ca..387b842 100644
--- a/manual/Unit irregex
+++ b/manual/Unit irregex
@@ -154,13 +154,20 @@ submatch corresponding to this name. If a named submatch occurs
multiple times in the irregex, it will also occur multiple times in
this list.
+===== irregex-match-valid-index?
+
+(irregex-match-valid-index? )
+
+Returns {{#t}} iff the {{index-or-name}} named submatch or index is
+defined in the {{match}} object.
+
===== irregex-match-substring
===== irregex-match-start-index
===== irregex-match-end-index
(irregex-match-substring [])
-(irregex-match-start-index )
-(irregex-match-end-index )
+(irregex-match-start-index [])
+(irregex-match-end-index [])
Fetches the matched substring (or its start or end offset) at the
given submatch index, or named submatch. The entire match is index 0,
diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm
index 5fdc034..662e98f 100644
--- a/tests/test-irregex.scm
+++ b/tests/test-irregex.scm
@@ -265,6 +265,20 @@
(test-equal 2 (irregex-match-num-submatches (irregex-search "(a(.*))b" "axxxb")))
(test-equal 2 (irregex-match-num-submatches (irregex-search "a(.*)(b)" "axxxb")))
(test-equal 10 (irregex-match-num-submatches (irregex-search "((((((((((a))))))))))" "a")))
+ (test-assert
+ (irregex-match-valid-index? (irregex-search "a.*b" "axxxb") 0))
+ (test-assert
+ (not (irregex-match-valid-index? (irregex-search "a.*b" "axxxb") 1)))
+ (test-assert
+ (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") 0))
+ (test-assert
+ (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") 1))
+ (test-assert
+ (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") 2))
+ (test-assert
+ (not (irregex-match-valid-index? (irregex-search "a(.*)(b)" "axxxb") 3)))
+ (test-equal 1 (irregex-match-start-index (irregex-search "a(.*)(b)" "axxxb") 1))
+ (test-equal 4 (irregex-match-end-index (irregex-search "a(.*)(b)" "axxxb") 1))
)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -293,33 +307,61 @@
(define (extract name irx str)
(irregex-match-substring (irregex-match irx str) name))
+(define (valid? name irx str)
+ (irregex-match-valid-index? (irregex-match irx str) name))
+(define (start-idx name irx str)
+ (irregex-match-start-index (irregex-match irx str) name))
+(define (end-idx name irx str)
+ (irregex-match-end-index (irregex-match irx str) name))
(test-group "named submatches"
(test-equal "matching submatch is seen and extracted"
- "first" (extract 'first `(or (submatch-named first "first")
- (submatch-named second "second"))
- "first"))
+ "first" (extract 'first `(or (submatch-named first "first")
+ (submatch-named second "second"))
+ "first"))
+ (test-assert "matching submatch index is valid"
+ (valid? 'first `(or (submatch-named first "first")
+ (submatch-named second "second"))
+ "first"))
(test-equal "nonmatching submatch is known but returns false"
- #f (extract 'second `(or (submatch-named first "first")
- (submatch-named second "second"))
- "first"))
+ #f
+ (extract 'second `(or (submatch-named first "first")
+ (submatch-named second "second"))
+ "first"))
+ (test-assert "nonmatching submatch index is valid"
+ (valid? 'second `(or (submatch-named first "first")
+ (submatch-named second "second"))
+ "first"))
(test-error "nonexisting submatch is unknown and raises an error"
(extract 'third `(or (submatch-named first "first")
(submatch-named second "second"))
"first"))
+ (test-assert "nonexisting submatch index is invalid"
+ (not (valid? 'third `(or (submatch-named first "first")
+ (submatch-named second "second"))
+ "first")))
(test-equal "matching alternative is used"
- "first" (extract 'sub `(or (submatch-named sub "first")
- (submatch-named sub "second"))
- "first"))
+ "first" (extract 'sub `(or (submatch-named sub "first")
+ (submatch-named sub "second"))
+ "first"))
(test-equal "matching alternative is used (second match)"
- "second" (extract 'sub `(or (submatch-named sub "first")
- (submatch-named sub "second"))
- "second"))
+ "second" (extract 'sub `(or (submatch-named sub "first")
+ (submatch-named sub "second"))
+ "second"))
(test-equal "last match is used with multiple matches for a name"
- "second" (extract 'sub `(seq (submatch-named sub "first")
- space
- (submatch-named sub "second"))
- "first second")))
+ "second" (extract 'sub `(seq (submatch-named sub "first")
+ space
+ (submatch-named sub "second"))
+ "first second"))
+ (test-equal "submatch start"
+ 1
+ (start-idx 'xs `(seq "a" (submatch-named xs (+ "x")) "b") "axxxb"))
+ (test-error "unknown submatch start"
+ (start-idx 'xs `(seq "a" (submatch-named ys (+ "x")) "b") "axxxb"))
+ (test-equal "submatch end"
+ 4 (end-idx 'xs `(seq "a" (submatch-named xs (+ "x")) "b") "axxxb"))
+ (test-error "unknown submatch start"
+ (end-idx 'xs `(seq "a" (submatch-named ys (+ "x")) "b") "axxxb")))
(test-end)