Changeset 6175 in project


Ignore:
Timestamp:
09/27/07 20:12:03 (12 years ago)
Author:
Kon Lovett
Message:

Changes for PCRE 7.4, use of compiled regexp in posix & utils units.

Location:
chicken/trunk
Files:
4 added
5 deleted
51 edited

Legend:

Unmodified
Added
Removed
  • chicken/trunk/Makefile.bsd

    r6083 r6175  
    6565
    6666chicken-config.h: chicken-defaults.h
    67         echo "#define C_STACK_GROWS_DOWNWARD 1" >$@
    68         echo "#define HAVE_ALLOCA 1" >>$@
    6967        echo "#define HAVE_DIRENT_H 1" >>$@
    7068        echo "#define HAVE_DLFCN_H 1" >>$@
     69        echo "#define HAVE_INTTYPES_H 1" >>$@
     70        echo "#define HAVE_LIMITS_H 1" >>$@
     71        echo "#define HAVE_LONG_LONG 1" >>$@
     72        echo "#define HAVE_MEMMOVE 1" >>$@
     73        echo "#define HAVE_MEMORY_H 1" >>$@
     74        echo "#define HAVE_STDINT_H 1" >>$@
     75        echo "#define HAVE_STDLIB_H 1" >>$@
     76        echo "#define HAVE_STRERROR 1" >>$@
     77        echo "#define HAVE_STRINGS_H 1" >>$@
     78        echo "#define HAVE_STRING_H 1" >>$@
     79        echo "#define HAVE_STRTOLL 1" >>$@
     80        echo "#define HAVE_STRTOQ 1" >>$@
     81        echo "#define HAVE_SYS_STAT_H 1" >>$@
     82        echo "#define HAVE_SYS_TYPES_H 1" >>$@
     83        echo "#define HAVE_UNISTD_H 1" >>$@
     84        echo "#define HAVE_UNSIGNED_LONG_LONG 1" >>$@
     85        echo "#define STDC_HEADERS 1" >>$@
     86        echo "#define HAVE_ALLOCA 1" >>$@
     87        echo "#define HAVE_ALLOCA_H 1" >>$@
    7188        echo "#define HAVE_GRP_H 1" >>$@
    72         echo "#define HAVE_ALLOCA_H 1" >>$@
    73         echo "#define HAVE_STRERROR 1" >>$@
    7489        echo "#define HAVE_ERRNO_H 1" >>$@
     90        echo "#define HAVE_SYSEXITS_H 1" >>$@
     91        echo "#define C_STACK_GROWS_DOWNWARD 1" >>$@
    7592ifdef GCHOOKS
    7693        echo "#define C_GC_HOOKS" >>$@
     
    88105        echo "#define C_HACKED_APPLY" >>$@
    89106endif
    90         echo "#define HAVE_LIMITS_H 1" >>$@
    91         echo "#define HAVE_SYSEXITS_H 1" >>$@
    92         echo "#define HAVE_MEMMOVE 1" >>$@
    93107        cat chicken-defaults.h >>$@
    94108
  • chicken/trunk/Makefile.cross-linux-mingw

    r6083 r6175  
    9595
    9696chicken-config.h: chicken-defaults.h
    97         echo "#define C_STACK_GROWS_DOWNWARD 1" >$@
     97        echo "#define HAVE_DIRENT_H 1" >>$@
     98        echo "#define HAVE_DLFCN_H 1" >>$@
     99        echo "#define HAVE_INTTYPES_H 1" >>$@
     100        echo "#define HAVE_LIMITS_H 1" >>$@
     101        echo "#define HAVE_LONG_LONG 1" >>$@
     102        echo "#define HAVE_MEMMOVE 1" >>$@
     103        echo "#define HAVE_MEMORY_H 1" >>$@
     104        echo "#define HAVE_STDINT_H 1" >>$@
     105        echo "#define HAVE_STDLIB_H 1" >>$@
     106        echo "#define HAVE_STRERROR 1" >>$@
     107        echo "#define HAVE_STRINGS_H 1" >>$@
     108        echo "#define HAVE_STRING_H 1" >>$@
     109        echo "#define HAVE_STRTOLL 1" >>$@
     110        echo "#define HAVE_SYS_STAT_H 1" >>$@
     111        echo "#define HAVE_SYS_TYPES_H 1" >>$@
     112        echo "#define HAVE_UNISTD_H 1" >>$@
     113        echo "#define HAVE_UNSIGNED_LONG_LONG 1" >>$@
     114        echo "#define HAVE_WINDOWS_H 1" >>$@
     115        echo "#define HAVE__STRTOI64 1" >>$@
     116        echo "#define STDC_HEADERS 1" >>$@
     117        echo "#define HAVE_ALLOCA_H 1" >>$@
    98118        echo "#define HAVE_DIRECT_H 1" >>$@
    99         echo "#define HAVE_ALLOCA_H 1" >>$@
     119        echo "#define HAVE_ERRNO_H 1" >>$@
    100120        echo "#define HAVE_GCVT 1" >>$@
    101         echo "#define HAVE_STDINT_H 1" >>$@
    102         echo "#define HAVE_WINDOWS_H 1" >>$@
    103121        echo "#define HAVE_LOADLIBRARY 1" >>$@
    104122        echo "#define HAVE_GETPROCADDRESS 1" >>$@
    105123        echo "#define HAVE_WINSOCK2_H 1" >>$@
    106124        echo "#define HAVE_WS2TCPIP_H 1" >>$@
     125        echo "#define C_STACK_GROWS_DOWNWARD 1" >>$@
    107126ifdef GCHOOKS
    108127        echo "#define C_GC_HOOKS" >>$@
     
    118137endif
    119138        echo "#define C_HACKED_APPLY" >>$@
    120         echo "#define HAVE_MEMMOVE 1" >>$@
    121139        cat chicken-defaults.h >>$@
    122140
  • chicken/trunk/Makefile.linux

    r6083 r6175  
    6969
    7070chicken-config.h: chicken-defaults.h
    71         echo "#define C_STACK_GROWS_DOWNWARD 1" >$@
    72         echo "#define HAVE_ALLOCA 1" >>$@
    7371        echo "#define HAVE_DIRENT_H 1" >>$@
    7472        echo "#define HAVE_DLFCN_H 1" >>$@
     73        echo "#define HAVE_INTTYPES_H 1" >>$@
     74        echo "#define HAVE_LIMITS_H 1" >>$@
     75        echo "#define HAVE_LONG_LONG 1" >>$@
     76        echo "#define HAVE_MEMMOVE 1" >>$@
     77        echo "#define HAVE_MEMORY_H 1" >>$@
     78        echo "#define HAVE_STDINT_H 1" >>$@
     79        echo "#define HAVE_STDLIB_H 1" >>$@
     80        echo "#define HAVE_STRERROR 1" >>$@
     81        echo "#define HAVE_STRINGS_H 1" >>$@
     82        echo "#define HAVE_STRING_H 1" >>$@
     83        echo "#define HAVE_STRTOLL 1" >>$@
     84        echo "#define HAVE_STRTOQ 1" >>$@
     85        echo "#define HAVE_SYS_STAT_H 1" >>$@
     86        echo "#define HAVE_SYS_TYPES_H 1" >>$@
     87        echo "#define HAVE_UNISTD_H 1" >>$@
     88        echo "#define HAVE_UNSIGNED_LONG_LONG 1" >>$@
     89        echo "#define STDC_HEADERS 1" >>$@
     90        echo "#define HAVE_ALLOCA 1" >>$@
     91        echo "#define HAVE_ALLOCA_H 1" >>$@
    7592        echo "#define HAVE_GRP_H 1" >>$@
    76         echo "#define HAVE_ALLOCA_H 1" >>$@
    77         echo "#define HAVE_STRERROR 1" >>$@
    7893        echo "#define HAVE_ERRNO_H 1" >>$@
     94        echo "#define HAVE_GCVT 1" >>$@
     95        echo "#define HAVE_SYSEXITS_H 1" >>$@
     96        echo "#define HAVE_MEMMOVE 1" >>$@
     97        echo "#define C_STACK_GROWS_DOWNWARD 1" >>$@
    7998ifdef GCHOOKS
    8099        echo "#define C_GC_HOOKS" >>$@
     
    92111        echo "#define C_HACKED_APPLY" >>$@
    93112endif
    94         echo "#define HAVE_GCVT 1" >>$@
    95         echo "#define HAVE_LIMITS_H 1" >>$@
    96         echo "#define HAVE_SYSEXITS_H 1" >>$@
    97         echo "#define HAVE_MEMMOVE 1" >>$@
    98113        cat chicken-defaults.h >>$@
    99114
  • chicken/trunk/Makefile.macosx

    r6083 r6175  
    7171
    7272chicken-config.h: chicken-defaults.h
    73         echo "#define C_STACK_GROWS_DOWNWARD 1" >$@
    74         echo "#define HAVE_ALLOCA 1" >>$@
    7573        echo "#define HAVE_DIRENT_H 1" >>$@
    7674        echo "#define HAVE_DLFCN_H 1" >>$@
     75        echo "#define HAVE_INTTYPES_H 1" >>$@
     76        echo "#define HAVE_LIMITS_H 1" >>$@
     77        echo "#define HAVE_LONG_LONG 1" >>$@
     78        echo "#define HAVE_MEMMOVE 1" >>$@
     79        echo "#define HAVE_MEMORY_H 1" >>$@
     80        echo "#define HAVE_STDINT_H 1" >>$@
     81        echo "#define HAVE_STDLIB_H 1" >>$@
     82        echo "#define HAVE_STRERROR 1" >>$@
     83        echo "#define HAVE_STRINGS_H 1" >>$@
     84        echo "#define HAVE_STRING_H 1" >>$@
     85        echo "#define HAVE_STRTOLL 1" >>$@
     86        echo "#define HAVE_STRTOQ 1" >>$@
     87        echo "#define HAVE_SYS_STAT_H 1" >>$@
     88        echo "#define HAVE_SYS_TYPES_H 1" >>$@
     89        echo "#define HAVE_UNISTD_H 1" >>$@
     90        echo "#define HAVE_UNSIGNED_LONG_LONG 1" >>$@
     91        echo "#define STDC_HEADERS 1" >>$@
     92        echo "#define HAVE_ALLOCA 1" >>$@
     93        echo "#define HAVE_ALLOCA_H 1" >>$@
    7794        echo "#define HAVE_GRP_H 1" >>$@
    78         echo "#define HAVE_ALLOCA_H 1" >>$@
    79         echo "#define HAVE_STRERROR 1" >>$@
    8095        echo "#define HAVE_CRT_EXTERNS_H 1" >>$@
    8196        echo "#define HAVE_ERRNO_H 1" >>$@
     97        echo "#define HAVE_SYSEXITS_H 1" >>$@
     98        echo "#define C_STACK_GROWS_DOWNWARD 1" >>$@
    8299ifdef GCHOOKS
    83100        echo "#define C_GC_HOOKS" >>$@
     
    93110endif
    94111        echo "#define C_HACKED_APPLY" >>$@
    95         echo "#define HAVE_LIMITS_H 1" >>$@
    96         echo "#define HAVE_SYSEXITS_H 1" >>$@
    97         echo "#define HAVE_MEMMOVE 1" >>$@
    98112        cat chicken-defaults.h >>$@
    99113
  • chicken/trunk/Makefile.mingw

    r6112 r6175  
    9191
    9292chicken-config.h: chicken-defaults.h
    93         echo "#define C_STACK_GROWS_DOWNWARD 1" >$@
     93        echo "#define HAVE_DIRENT_H 1" >>$@
     94        echo "#define HAVE_DLFCN_H 1" >>$@
     95        echo "#define HAVE_INTTYPES_H 1" >>$@
     96        echo "#define HAVE_LIMITS_H 1" >>$@
     97        echo "#define HAVE_LONG_LONG 1" >>$@
     98        echo "#define HAVE_MEMMOVE 1" >>$@
     99        echo "#define HAVE_MEMORY_H 1" >>$@
     100        echo "#define HAVE_STDINT_H 1" >>$@
     101        echo "#define HAVE_STDLIB_H 1" >>$@
     102        echo "#define HAVE_STRERROR 1" >>$@
     103        echo "#define HAVE_STRINGS_H 1" >>$@
     104        echo "#define HAVE_STRING_H 1" >>$@
     105        echo "#define HAVE_STRTOLL 1" >>$@
     106        echo "#define HAVE_SYS_STAT_H 1" >>$@
     107        echo "#define HAVE_SYS_TYPES_H 1" >>$@
     108        echo "#define HAVE_UNISTD_H 1" >>$@
     109        echo "#define HAVE_UNSIGNED_LONG_LONG 1" >>$@
     110        echo "#define HAVE_WINDOWS_H 1" >>$@
     111        echo "#define HAVE__STRTOI64 1" >>$@
     112        echo "#define STDC_HEADERS 1" >>$@
     113        echo "#define HAVE_ALLOCA_H 1" >>$@
    94114        echo "#define HAVE_DIRECT_H 1" >>$@
    95         echo "#define HAVE_ALLOCA_H 1" >>$@
     115        echo "#define HAVE_ERRNO_H 1" >>$@
    96116        echo "#define HAVE_GCVT 1" >>$@
    97         echo "#define HAVE_STDINT_H 1" >>$@
    98         echo "#define HAVE_WINDOWS_H 1" >>$@
    99117        echo "#define HAVE_LOADLIBRARY 1" >>$@
    100118        echo "#define HAVE_GETPROCADDRESS 1" >>$@
    101119        echo "#define HAVE_WINSOCK2_H 1" >>$@
    102120        echo "#define HAVE_WS2TCPIP_H 1" >>$@
     121        echo "#define C_STACK_GROWS_DOWNWARD 1" >>$@
    103122ifdef GCHOOKS
    104123        echo "#define C_GC_HOOKS" >>$@
     
    114133endif
    115134        echo "#define C_HACKED_APPLY" >>$@
    116         echo "#define HAVE_MEMMOVE 1" >>$@
    117135        cat chicken-defaults.h >>$@
    118136
  • chicken/trunk/Makefile.solaris

    r6083 r6175  
    6565
    6666chicken-config.h: chicken-defaults.h
    67         echo "#define C_STACK_GROWS_DOWNWARD 1" >$@
    68         echo "#define HAVE_ALLOCA 1" >>$@
    6967        echo "#define HAVE_DIRENT_H 1" >>$@
    7068        echo "#define HAVE_DLFCN_H 1" >>$@
     69        echo "#define HAVE_INTTYPES_H 1" >>$@
     70        echo "#define HAVE_LIMITS_H 1" >>$@
     71        echo "#define HAVE_LONG_LONG 1" >>$@
     72        echo "#define HAVE_MEMMOVE 1" >>$@
     73        echo "#define HAVE_MEMORY_H 1" >>$@
     74        echo "#define HAVE_STDINT_H 1" >>$@
     75        echo "#define HAVE_STDLIB_H 1" >>$@
     76        echo "#define HAVE_STRERROR 1" >>$@
     77        echo "#define HAVE_STRINGS_H 1" >>$@
     78        echo "#define HAVE_STRING_H 1" >>$@
     79        echo "#define HAVE_STRTOLL 1" >>$@
     80        echo "#define HAVE_SYS_STAT_H 1" >>$@
     81        echo "#define HAVE_SYS_TYPES_H 1" >>$@
     82        echo "#define HAVE_UNISTD_H 1" >>$@
     83        echo "#define HAVE_UNSIGNED_LONG_LONG 1" >>$@
     84        echo "#define STDC_HEADERS 1" >>$@
     85        echo "#define HAVE_ALLOCA_H 1" >>$@
     86        echo "#define HAVE_ALLOCA 1" >>$@
    7187        echo "#define HAVE_GRP_H 1" >>$@
    72         echo "#define HAVE_ALLOCA_H 1" >>$@
    73         echo "#define HAVE_STRERROR 1" >>$@
    7488        echo "#define HAVE_ERRNO_H 1" >>$@
    75 ifeq ($(ARCH),x86-64)
    76         echo "#define HAVE_STDINT_H 1" >>$@
    77 endif
     89        echo "#define HAVE_GCVT 1" >>$@
     90        echo "#define HAVE_SYSEXITS_H 1" >>$@
     91        echo "#define C_STACK_GROWS_DOWNWARD 1" >>$@
    7892ifdef GCHOOKS
    7993        echo "#define C_GC_HOOKS" >>$@
     
    91105        echo "#define C_HACKED_APPLY" >>$@
    92106endif
    93         echo "#define HAVE_GCVT 1" >>$@
    94         echo "#define HAVE_LIMITS_H 1" >>$@
    95         echo "#define HAVE_SYSEXITS_H 1" >>$@
    96         echo "#define HAVE_MEMMOVE 1" >>$@
    97107        cat chicken-defaults.h >>$@
    98108
  • chicken/trunk/NEWS

    r5853 r6175  
     12.712
     2
     3- regex unit:
     4  Uses PCRE 7.4
     5
     6- utils unit:
     7  Uses compiled regular expressions
     8
     9- unit posix:
     10  Uses compiled regular expressions
     11
    1122.701
    213
  • chicken/trunk/README

    r6105 r6175  
    33  (c)2000-2007 Felix L. Winkelmann
    44
    5   Version 2.711
     5  Version 2.712
    66
    77
  • chicken/trunk/buildversion

    r6105 r6175  
    1 2.711
     12.712
  • chicken/trunk/chicken-ffi-macros.scm

    r5639 r6175  
    88;
    99;   Redistributions of source code must retain the above copyright notice, this list of conditions and the following
    10 ;     disclaimer. 
     10;     disclaimer.
    1111;   Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
    12 ;     disclaimer in the documentation and/or other materials provided with the distribution. 
     12;     disclaimer in the documentation and/or other materials provided with the distribution.
    1313;   Neither the name of the author nor the names of its contributors may be used to endorse or promote
    14 ;     products derived from this software without specific prior written permission. 
     14;     products derived from this software without specific prior written permission.
    1515;
    1616; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
     
    2424; POSSIBILITY OF SUCH DAMAGE.
    2525;
    26 ; Send bugs, suggestions and ideas to: 
     26; Send bugs, suggestions and ideas to:
    2727;
    2828; felix@call-with-current-continuation.org
     
    6262
    6363(define-macro (foreign-primitive . xs)
    64   (##sys#check-syntax 'foreign-primitive xs '#(_ 1)) 
     64  (##sys#check-syntax 'foreign-primitive xs '#(_ 1))
    6565  `(##core#foreign-primitive ,@(map (lambda (x) (list 'quote x)) xs)) )
    6666
     
    122122       ,@(if (pair? init)
    123123             `((##core#set! ,var ,(car init)))
    124              '() ) ) ) ) 
     124             '() ) ) ) )
    125125
    126126(define-macro (let-location bindings . body)
     
    153153  (let ([tmp (gensym 'code_)])
    154154    `(begin
    155        (declare 
     155       (declare
    156156         (foreign-declare
    157157          ,(sprintf "static C_word ~A() { ~A\n; return C_SCHEME_UNDEFINED; }\n" tmp (string-intersperse strs "\n")) ) )
     
    169169(define-macro (define-foreign-record name . slots)
    170170  (let ([fname (if (pair? name) (->string (cadr name)) (sprintf "struct ~A" name))]
    171         [tname (if (pair? name) (car name) name)] 
    172         [var (gensym)] 
     171        [tname (if (pair? name) (car name) name)]
     172        [var (gensym)]
    173173        [renamer identity]
    174174        [ctor #f]
     
    198198    `(begin
    199199       ,@(if (pair? name)
    200              '() 
     200             '()
    201201             `((declare
    202202                 (foreign-declare
    203203                  ,(string-intersperse
    204204                    (append
    205                      (cons 
     205                     (cons
    206206                      (string-append "struct " (->string name) " { ")
    207207                      (map (lambda (slot)
    208208                             (case (length slot)
    209                                [(3) 
     209                               [(3)
    210210                                (sprintf "~A[~A];"
    211211                                         (##compiler#foreign-type-declaration
     
    215215                               [(2)
    216216                                (sprintf "~A;"
    217                                          (##compiler#foreign-type-declaration 
     217                                         (##compiler#foreign-type-declaration
    218218                                          (car slot)
    219219                                          (->string (cadr slot)) ) ) ]
     
    238238                     `(begin
    239239                        (define ,(string->symbol (renamer (sprintf "~A-~A" tname sname)))
    240                           (let ([,cvar 
     240                          (let ([,cvar
    241241                                 (foreign-lambda* ,type2 ([,tname ,var] [int ,svar])
    242                                    ,(sprintf "return(~A~A->~A[~A]);" 
     242                                   ,(sprintf "return(~A~A->~A[~A]);"
    243243                                             (if (not (strtype type)) "" "&")
    244244                                             var sname svar) ) ] )
     
    248248                                  ;; this should signal a range exn...
    249249                                  (syntax-error 'define-foreign-record "array access out of range" ',tname ',svar ,size) ) ) ) )
    250                         ,@(if (and (pair? type) (eq? 'const (car type))) 
     250                        ,@(if (and (pair? type) (eq? 'const (car type)))
    251251                              '()
    252252                              (if (eq? type type2)
     
    258258                                          (if (##core#check (and (fx>= ,svar 0) (fx< ,svar ,size)))
    259259                                              (,cvar ,var ,svar ,xvar)
    260                                               (syntax-error 
     260                                              (syntax-error
    261261                                               'define-foreign-record
    262262                                               "array access out of range" ',tname ',svar ,size) ) ) ) ) )
     
    264264                  [(2)
    265265                   (let* ([type (car slot)]
    266                           [sname (cadr slot)] 
     266                          [sname (cadr slot)]
    267267                          [type2 (stype type)] )
    268268                     `(begin
    269269                        (define ,(string->symbol (renamer (sprintf "~A-~A" tname sname)))
    270270                          (foreign-lambda* ,type2 ([,tname ,var])
    271                             ,(sprintf "return(~A~A->~A);" 
     271                            ,(sprintf "return(~A~A->~A);"
    272272                                      (if (not (strtype type)) "" "&")
    273273                                      var sname) ) )
     
    291291;;; Foreign enumerations (or enum-like constants)
    292292
    293 (define-macro (define-foreign-enum typename . enums)
    294   (let ((name typename)
    295         (type (->string typename))
    296         (defsymval ''())
    297         (symbols (map (lambda (e) (if (pair? e) (car e) e)) enums))
    298         (extvals (map (lambda (e)
    299                         (if (pair? e)
    300                             (if (pair? (cdr e))
    301                                 (cadr e)
    302                                 (syntax-error 'define-foreign-enum
    303                                               "invalid enum specification" e) )
    304                             e ) )
    305                       enums))
    306         (symvals (map (lambda (e)
    307                         (if (pair? e)
    308                             (if (pair? (cddr e))
    309                                 (caddr e)
    310                                 `(quote ,(car e)))
    311                             `(quote ,e)))
    312                       enums)) )
    313     (when (list? typename)
    314       (let ([len (length typename)])
    315         (unless (<= 2 len 3)
    316           (syntax-error 'define-foreign-enum "invalid typename specification" typename) )
    317         (set! name (car typename))
    318         (set! type (cadr typename))
    319         (when (= 3 len)
    320           (set! defsymval (caddr typename)) ) ) )
    321     (let ((aliases (map gensym symbols))
    322           (s->e (string->symbol (conc name "->number")))
    323           (e->s (string->symbol (conc "number->" name)) ) )
    324       `(begin
    325          ,@(map (lambda (a v) `(define-foreign-variable ,a integer ,(->string v))) aliases extvals)
    326          (define (,s->e syms)
    327            (let loop ((syms (if (symbol? syms) (list syms) syms)) (sum 0))
    328              (if (null? syms)
    329                  sum
    330                  (loop (cdr syms)
    331                        (bitwise-ior
    332                         sum
    333                         (let ((val (car syms)))
    334                           (case val
    335                             ,@(map (lambda (a s) `((,s) ,a)) aliases symbols)
    336                             (else (error "not a member of enum" val ',name)) ) ) ) ) ) ) )
    337          (define (,e->s val)
    338            (cond
    339             ,@(map (lambda (a sv) `((= val ,a) ,sv)) aliases symvals)
    340             (else ,defsymval) ) )
    341          (define-foreign-type ,name ,type ,s->e ,e->s) ) ) ) )
     293;; (define-foreign-enum TYPE [USE-ALIASES] ENUM ...)
     294;; TYPE : TYPENAME or (SCHEMENAME REALTYPE [DEFAULT-SCHEME-VALUE])
     295;; USE-ALIAES : boolean, default #t
     296;; ENUM : TYPENAME or (SCHEMENAME REALTYPE [SCHEME-VALUE])
     297
     298(define-macro (define-foreign-enum typespec . enums)
     299  (let ([use-aliases (if (pair? enums)
     300                         (let ([flag (car enums)])
     301                           (if (boolean? flag)
     302                               (begin (set! enums (cdr enums)) flag)
     303                               #t ) )
     304                         #t ) ] )
     305    (let ((name typespec)
     306          (type (->string typespec))
     307          (defsymval ''())
     308
     309          (symbols (map (lambda (e) (if (pair? e) (car e) e)) enums))
     310          (extvals (map (lambda (e)
     311                          (if (pair? e)
     312                              (if (pair? (cdr e))
     313                                  (cadr e)
     314                                  (syntax-error 'define-foreign-enum
     315                                                "invalid enum specification" e) )
     316                              e ) )
     317                        enums))
     318          (symvals (map (lambda (e)
     319                          (if (pair? e)
     320                              (if (pair? (cddr e))
     321                                  (caddr e)
     322                                  `(quote ,(car e)))
     323                              `(quote ,e)))
     324                        enums)) )
     325      (when (list? typespec)
     326        (let ([len (length typespec)])
     327          (unless (<= 2 len 3)
     328            (syntax-error 'define-foreign-enum "invalid type specification" typespec) )
     329          (set! name (car typespec))
     330          (set! type (cadr typespec))
     331          (when (= 3 len)
     332            (set! defsymval (caddr typespec)) ) ) )
     333      (let ((aliases (if use-aliases (map gensym symbols) symbols))
     334            (s->e (string->symbol (conc name "->number")))
     335            (e->s (string->symbol (conc "number->" name)) ) )
     336        `(begin
     337           ,@(map (lambda (a v) `(define-foreign-variable ,a ,type ,(->string v))) aliases extvals)
     338           (define (,s->e syms)
     339             (let loop ((syms (if (symbol? syms) (list syms) syms)) (sum 0))
     340               (if (null? syms)
     341                   sum
     342                   (loop (cdr syms)
     343                         (bitwise-ior
     344                          sum
     345                          (let ((val (car syms)))
     346                            (case val
     347                              ,@(map (lambda (a s) `((,s) ,a)) aliases symbols)
     348                              (else (error "not a member of enum" val ',name)) ) ) ) ) ) ) )
     349           (define (,e->s val)
     350             (cond
     351              ,@(map (lambda (a sv) `((= val ,a) ,sv)) aliases symvals)
     352              (else ,defsymval) ) )
     353           (define-foreign-type ,name ,type ,s->e ,e->s) ) ) ) ) )
    342354
    343355
     
    362374  (if (and (pair? head) (symbol? (car head)))
    363375      (cond ((memq 'compiling ##sys#features)
    364              (warning "compile macros are not available in interpreted code" 
     376             (warning "compile macros are not available in interpreted code"
    365377                      (car head) ) )
    366378            ((not (##compiler#register-compiler-macro (car head) (cdr head) body))
  • chicken/trunk/chicken-more-macros.scm

    r5358 r6175  
    453453                   (let ((clause (##sys#slot clauses 0))
    454454                         (rclauses (##sys#slot clauses 1)) )
    455                      (##sys#check-syntax 'switch clause '#(_ 1))
     455                     (##sys#check-syntax 'select clause '#(_ 1))
    456456                     (if (eq? 'else (car clause))
    457457                         `(begin ,@(cdr clause))
  • chicken/trunk/chicken.h

    r6123 r6175  
    4040#define ___CHICKEN
    4141
     42/*
     43 * N.B. This file MUST not rely upon "chicken-config.h"
     44 */
    4245#if defined(HAVE_CONFIG_H) || defined(HAVE_CHICKEN_CONFIG_H)
    4346# include "chicken-config.h"
     
    159162#endif
    160163
    161 #if defined (__alpha__) || defined (__sparc_v9__) || defined (__sparcv9) || defined(__ia64__) || defined(__x86_64__) || defined(__LP64__)
    162 # define C_SIXTY_FOUR
    163 #elif defined(__mips64) && (!defined(__GNUC__) || _MIPS_SZPTR == 64)
    164 # define C_SIXTY_FOUR
     164#ifndef C_SIXTY_FOUR
     165# if defined (__alpha__) || defined (__sparc_v9__) || defined (__sparcv9) || defined(__ia64__) || defined(__x86_64__) || defined(__LP64__)
     166#   define C_SIXTY_FOUR
     167# elif defined(__mips64) && (!defined(__GNUC__) || _MIPS_SZPTR == 64)
     168#   define C_SIXTY_FOUR
     169# endif
    165170#endif
    166171
     
    199204#endif
    200205
    201 #if defined(__linux__)
     206#if defined(__MINGW32__)
     207# include <sys/param.h>
     208#elif defined(__linux__)
    202209# include <endian.h>
    203210#elif defined(C_MACOSX) || defined(C_XXXBSD)
     
    238245#endif
    239246
    240 #ifdef __WATCOMC__
     247#ifdef __MINGW32__
    241248# include <malloc.h>
    242249#endif
     
    259266#endif
    260267
    261 #ifdef __MINGW32__
     268#ifdef __WATCOMC__
    262269# include <malloc.h>
    263270#endif
  • chicken/trunk/csc.scm

    r5861 r6175  
    3636(declare
    3737  (block)
    38   (uses extras srfi-1 srfi-13 regex utils))
     38  (uses extras srfi-1 srfi-13 utils))
    3939
    4040#>
  • chicken/trunk/cscbench.scm

    r5878 r6175  
    33; - Usage: cscbench [-debug] [-cc=<path>] OPTION ...
    44
    5 (require-extension srfi-1 utils posix)
     5(require-extension srfi-1 utils posix regex)
    66
    77(define plist-files '("boyer" "browse" "dderiv"))
     
    1313(define (abort-run) #f)
    1414
    15 (define (run)
    16   (system* "./tmpfile >tmpfile.out")
    17   (with-input-from-file "tmpfile.out"
     15(define run
     16  (let ([secrx (regexp "^ *([-.+e0-9]*(\\.[0-9]*)?) seconds elapsed$")])
    1817    (lambda ()
    19       (let loop ([line (read-line)])
    20         (if (eof-object? line)
    21             (abort-run)
    22             (let ([m (string-match " *([-.+e0-9]*(\\.[0-9]*)?) seconds elapsed" line)])
    23               (if m
    24                   (string->number (second m))
    25                   (loop (read-line)) ) ) ) ) ) ) )
     18      (system* "./tmpfile >tmpfile.out")
     19      (with-input-from-file "tmpfile.out"
     20        (lambda ()
     21          (let loop ([line (read-line)])
     22            (if (eof-object? line)
     23                (abort-run)
     24                (let ([m (string-match secrx line)])
     25                  (if m
     26                      (string->number (second m))
     27                      (loop (read-line)) ) ) ) ) ) ) ) ) )
    2628
    2729(define (display-l str len pad)
     
    3537    (display (substring str 0 (min slen len))) ) )
    3638
    37 (define (display-f-4.3 n)
    38   (let* ([m (string-match "([-+e0-9]*)(\\.([0-9]*))?" (number->string n))]
    39          [is (second m)]
    40          [fs (fourth m)] )
    41     (display-r is 4 #\space)
    42     (display #\.)
    43     (display-r fs 3 #\0) ) )
     39(define display-f-4.3
     40  (let ([florx (regexp "^([-+e0-9]*)(\\.([0-9]*))?$")])
     41    (lambda (n)
     42      (let* ([m (string-match florx (number->string n))]
     43             [is (second m)]
     44             [fs (fourth m)] )
     45        (display-r is 4 #\space)
     46        (display #\.)
     47        (display-r fs 3 #\0) ) ) ) )
    4448
    4549(define (compile-and-run file extras decls options coptions unsafe)
  • chicken/trunk/csi.scm

    r6105 r6175  
    6464
    6565(declare
     66  (always-bound
     67    ##sys#windows-platform)
    6668  (hide parse-option-string bytevector-data member* canonicalize-args do-trace do-untrace
    6769        traced-procedures describer-table
     
    838840           (register-feature! 'script)
    839841           (set-cdr! (cdr script) '())
    840            (when (and (eq? (software-type) 'windows) (not (eq? (build-platform) 'cygwin)) )
     842           (when ##sys#windows-platform
    841843             (and-let* ((sname (lookup-script-file (cadr script))))
    842844               (set-car! (cdr script) sname) ) ) ]
  • chicken/trunk/defaults.make

    r6080 r6175  
    5858else
    5959EGGDIR = $(DESTDIR)/lib/chicken/$(BINARYVERSION)
     60endif
     61
     62ifdef PCRE7
     63PCREDIR = pcre7
     64else
     65PCREDIR = pcre6
    6066endif
    6167
     
    9399endif
    94100INCLUDES ?= -I.
    95 PCRE_INCLUDES ?= $(INCLUDES) -Ipcre
     101PCRE_INCLUDES ?= $(INCLUDES) -I$(PCREDIR)
    96102C_COMPILER_COMPILE_OPTION ?= -c
    97103C_COMPILER_OUTPUT_OPTION ?= -o
     
    146152POSIXFILE ?= posixunix
    147153# CHICKEN_CONFIG_H = chicken-config.h
    148 PCRE_OBJECT_FILES ?= pcre/*.o
     154PCRE_OBJECT_FILES ?= $(PCREDIR)/*.o
    149155
    150156ifneq ($(ARCH),)
  • chicken/trunk/library.scm

    r6108 r6175  
    31563156    (lambda () sym) ) )
    31573157
     3158(define ##sys#windows-platform
     3159  (and (eq? 'windows (software-type))
     3160       ;; Still windows even if 'Linux-like'
     3161       (not (eq? 'cygwin (build-platform)))) )
     3162
    31583163(define (chicken-version #!optional full)
    31593164  (define (get-config)
     
    31633168          [mt (machine-type)] )
    31643169      (define (str x)
    3165         (if (eq? x 'unknown)
     3170        (if (eq? 'unknown x)
    31663171            ""
    31673172            (string-append (symbol->string x) "-") ) )
     
    31693174  (if full
    31703175      (let ((spec (string-append
    3171                    (if (##sys#fudge 3) " 64bit" "")
     3176                   (if (##sys#fudge 3)  " 64bit" "")
    31723177                   (if (##sys#fudge 15) " symbolgc" "")
    31733178                   (if (##sys#fudge 40) " manyargs" "")
     
    31883193
    31893194(define ##sys#pathname-directory-separator
    3190   (let ([st (software-type)])
    3191     (if (and (eq? 'windows st) (not (eq? (build-platform) 'cygwin)))
    3192         #\\
    3193         #\/) ) )
     3195  (if ##sys#windows-platform #\\ #\/) )
    31943196
    31953197
     
    32063208          "") )
    32073209    (lambda (x)
    3208       (cond [(string? x) (string->keyword x)]
     3210      (cond [(string? x)  (string->keyword x)]
    32093211            [(keyword? x) x]
    3210             [(symbol? x) (string->keyword (##sys#symbol->string x))]
    3211             [else (err x)] ) ) ) )
     3212            [(symbol? x)  (string->keyword (##sys#symbol->string x))]
     3213            [else         (err x)] ) ) ) )
    32123214
    32133215(define ##sys#features '(#:chicken #:srfi-23 #:srfi-30 #:srfi-39 #:srfi-62 #:srfi-17 #:srfi-12))
  • chicken/trunk/pcre/AUTHORS

    r2926 r6175  
    77
    88University of Cambridge Computing Service,
    9 Cambridge, England. Phone: +44 1223 334714.
     9Cambridge, England.
    1010
    11 Copyright (c) 1997-2005 University of Cambridge
     11Copyright (c) 1997-2007 University of Cambridge
    1212All rights reserved
     13
     14
     15THE C++ WRAPPER LIBRARY
     16-----------------------
     17
     18Written by:       Google Inc.
     19
     20Copyright (c) 2007 Google Inc
     21All rights reserved
     22
     23####
  • chicken/trunk/pcre/COPYING

    r2926 r6175  
    11PCRE LICENCE
    2 ------------
    32
    4 PCRE is a library of functions to support regular expressions whose syntax
    5 and semantics are as close as possible to those of the Perl 5 language.
    6 
    7 Release 6 of PCRE is distributed under the terms of the "BSD" licence, as
    8 specified below. The documentation for PCRE, supplied in the "doc"
    9 directory, is distributed under the same terms as the software itself.
    10 
    11 The basic library functions are written in C and are freestanding. Also
    12 included in the distribution is a set of C++ wrapper functions.
    13 
    14 
    15 THE BASIC LIBRARY FUNCTIONS
    16 ---------------------------
    17 
    18 Written by:       Philip Hazel
    19 Email local part: ph10
    20 Email domain:     cam.ac.uk
    21 
    22 University of Cambridge Computing Service,
    23 Cambridge, England. Phone: +44 1223 334714.
    24 
    25 Copyright (c) 1997-2005 University of Cambridge
    26 All rights reserved.
    27 
    28 
    29 THE C++ WRAPPER FUNCTIONS
    30 -------------------------
    31 
    32 Contributed by:   Google Inc.
    33 
    34 Copyright (c) 2005, Google Inc.
    35 All rights reserved.
    36 
    37 
    38 THE "BSD" LICENCE
    39 -----------------
    40 
    41 Redistribution and use in source and binary forms, with or without
    42 modification, are permitted provided that the following conditions are met:
    43 
    44     * Redistributions of source code must retain the above copyright notice,
    45       this list of conditions and the following disclaimer.
    46 
    47     * Redistributions in binary form must reproduce the above copyright
    48       notice, this list of conditions and the following disclaimer in the
    49       documentation and/or other materials provided with the distribution.
    50 
    51     * Neither the name of the University of Cambridge nor the name of Google
    52       Inc. nor the names of their contributors may be used to endorse or
    53       promote products derived from this software without specific prior
    54       written permission.
    55 
    56 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    57 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    58 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    59 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
    60 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    61 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    62 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    63 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    64 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    65 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    66 POSSIBILITY OF SUCH DAMAGE.
     3Please see the file LICENCE in the PCRE distribution for licensing details.
    674
    685End
  • chicken/trunk/pcre/LICENCE

    r2926 r6175  
    55and semantics are as close as possible to those of the Perl 5 language.
    66
    7 Release 6 of PCRE is distributed under the terms of the "BSD" licence, as
     7Release 7 of PCRE is distributed under the terms of the "BSD" licence, as
    88specified below. The documentation for PCRE, supplied in the "doc"
    99directory, is distributed under the same terms as the software itself.
     
    2121
    2222University of Cambridge Computing Service,
    23 Cambridge, England. Phone: +44 1223 334714.
     23Cambridge, England.
    2424
    25 Copyright (c) 1997-2005 University of Cambridge
     25Copyright (c) 1997-2007 University of Cambridge
    2626All rights reserved.
    2727
     
    3232Contributed by:   Google Inc.
    3333
    34 Copyright (c) 2005, Google Inc.
     34Copyright (c) 2007, Google Inc.
    3535All rights reserved.
    3636
  • chicken/trunk/pcre/NON-UNIX-USE

    r2926 r6175  
    22----------------------------------
    33
    4 See below for comments on Cygwin or MinGW and OpenVMS usage. I (Philip Hazel)
    5 have no knowledge of Windows or VMS sytems and how their libraries work. The
    6 items in the PCRE Makefile that relate to anything other than Unix-like systems
    7 have been contributed by PCRE users. There are some other comments and files in
    8 the Contrib directory on the ftp site that you may find useful. See
     4This document contains the following sections:
     5
     6  General
     7  Generic instructions for the PCRE C library
     8  The C++ wrapper functions
     9  Building for virtual Pascal
     10  Stack size in Windows environments
     11  Comments about Win32 builds
     12  Building PCRE with CMake
     13  Building under Windows with BCC5.5
     14  Building PCRE on OpenVMS
     15
     16
     17GENERAL
     18
     19I (Philip Hazel) have no experience of Windows or VMS sytems and how their
     20libraries work. The items in the PCRE distribution and Makefile that relate to
     21anything other than Unix-like systems are untested by me.
     22
     23There are some other comments and files in the Contrib directory on the ftp
     24site that you may find useful. See
    925
    1026  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib
    1127
    12 If you want to compile PCRE for a non-Unix system (or perhaps, more strictly,
    13 for a system that does not support "configure" and "make" files), note that
    14 PCRE consists entirely of code written in Standard C, and so should compile
    15 successfully on any system that has a Standard C compiler and library.
    16 
    17 
    18 GENERIC INSTRUCTIONS FOR THE C LIBRARY
    19 
    20 The following are generic comments about building PCRE. The interspersed
    21 indented commands are suggestions from Mark Tetrode as to which commands you
    22 might use on a Windows system to build a static library.
    23 
    24 (1) Copy or rename the file config.in as config.h, and change the macros that
    25 define HAVE_STRERROR and HAVE_MEMMOVE to define them as 1 rather than 0.
    26 Unfortunately, because of the way Unix autoconf works, the default setting has
    27 to be 0. You may also want to make changes to other macros in config.h. In
    28 particular, if you want to force a specific value for newline, you can define
    29 the NEWLINE macro. The default is to use '\n', thereby using whatever value
    30 your compiler gives to '\n'.
    31 
    32   rem Mark Tetrode's commands
    33   copy config.in config.h
    34   rem Use write, because notepad cannot handle UNIX files. Change values.
    35   write config.h
    36 
    37 (2) Copy or rename the file pcre.in as pcre.h, and change the macro definitions
    38 for PCRE_MAJOR, PCRE_MINOR, and PCRE_DATE near its start to the values set in
    39 configure.in.
    40 
    41   rem Mark Tetrode's commands
    42   copy pcre.in pcre.h
    43   rem Read values from configure.in
    44   write configure.in
    45   rem Change values
    46   write pcre.h
    47 
    48 (3) Compile dftables.c as a stand-alone program, and then run it with
    49 the single argument "chartables.c". This generates a set of standard
    50 character tables and writes them to that file.
    51 
    52   rem Mark Tetrode's commands
    53   rem Compile & run
    54   cl -DSUPPORT_UTF8 -DSUPPORT_UCP dftables.c
    55   dftables.exe chartables.c
    56 
    57 (4) Compile the following source files:
    58 
    59   pcre_chartables.c
    60   pcre_compile.c
    61   pcre_config.c
    62   pcre_dfa_exec.c
    63   pcre_exec.c
    64   pcre_fullinfo.c
    65   pcre_get.c
    66   pcre_globals.c
    67   pcre_info.c
    68   pcre_maketables.c
    69   pcre_ord2utf8.c
    70   pcre_printint.c
    71   pcre_refcount.c
    72   pcre_study.c
    73   pcre_tables.c
    74   pcre_try_flipped.c
    75   pcre_ucp_findchar.c
    76   pcre_valid_utf8.c
    77   pcre_version.c
    78   pcre_xclass.c
    79 
    80 and link them all together into an object library in whichever form your system
    81 keeps such libraries. This is the pcre C library. If your system has static and
    82 shared libraries, you may have to do this once for each type.
    83 
    84   rem These comments are out-of-date, referring to a previous release which
    85   rem had fewer source files. Replace with the file names from above.
    86   rem Mark Tetrode's commands, for a static library
    87   rem Compile & lib
    88   cl -DSUPPORT_UTF8 -DSUPPORT_UCP -DPOSIX_MALLOC_THRESHOLD=10 /c maketables.c get.c study.c pcre.c
    89   lib /OUT:pcre.lib maketables.obj get.obj study.obj pcre.obj
    90 
    91 (5) Similarly, compile pcreposix.c and link it (on its own) as the pcreposix
    92 library.
    93 
    94   rem Mark Tetrode's commands, for a static library
    95   rem Compile & lib
    96   cl -DSUPPORT_UTF8 -DSUPPORT_UCP -DPOSIX_MALLOC_THRESHOLD=10 /c pcreposix.c
    97   lib /OUT:pcreposix.lib pcreposix.obj
    98 
    99 (6) Compile the test program pcretest.c. This needs the functions in the
    100 pcre and pcreposix libraries when linking.
    101 
    102   rem Mark Tetrode's commands
    103   rem compile & link
    104   cl /F0x400000 pcretest.c pcre.lib pcreposix.lib
    105 
    106 (7) Run pcretest on the testinput files in the testdata directory, and check
    107 that the output matches the corresponding testoutput files. You must use the
    108 -i option when checking testinput2. Note that the supplied files are in Unix
    109 format, with just LF characters as line terminators. You may need to edit them
    110 to change this if your system uses a different convention.
    111 
    112   rem Mark Tetrode's commands
    113   pcretest testdata\testinput1 testdata\myoutput1
    114   windiff testdata\testoutput1 testdata\myoutput1
    115   pcretest -i testdata\testinput2 testdata\myoutput2
    116   windiff testdata\testoutput2 testdata\myoutput2
    117   pcretest testdata\testinput3 testdata\myoutput3
    118   windiff testdata\testoutput3 testdata\myoutput3
    119   pcretest testdata\testinput4 testdata\myoutput4
    120   windiff testdata\testoutput4 testdata\myoutput4
    121   pcretest testdata\testinput5 testdata\myoutput5
    122   windiff testdata\testoutput5 testdata\myoutput5
    123   pcretest testdata\testinput6 testdata\myoutput6
    124   windiff testdata\testoutput6 testdata\myoutput6
    125 
    126 Note that there are now three more tests (7, 8, 9) that did not exist when Mark
    127 wrote those comments. The test the new pcre_dfa_exec() function.
     28If you want to compile PCRE for a non-Unix system (especially for a system that
     29does not support "configure" and "make" files), note that the basic PCRE
     30library consists entirely of code written in Standard C, and so should compile
     31successfully on any system that has a Standard C compiler and library. The C++
     32wrapper functions are a separate issue (see below).
     33
     34The PCRE distribution includes support for CMake. This support is relatively
     35new, but has already been used successfully to build PCRE in multiple build
     36environments on Windows. There are some instructions in the section entitled
     37"Building PCRE with CMake" below.
     38
     39
     40GENERIC INSTRUCTIONS FOR THE PCRE C LIBRARY
     41
     42The following are generic comments about building the PCRE C library "by hand".
     43
     44 (1) Copy or rename the file config.h.generic as config.h, and edit the macro
     45     settings that it contains to whatever is appropriate for your environment.
     46     In particular, if you want to force a specific value for newline, you can
     47     define the NEWLINE macro. When you compile any of the PCRE modules, you
     48     must specify -DHAVE_CONFIG_H to your compiler so that config.h is included
     49     in the sources.
     50
     51     An alternative approach is not to edit config.h, but to use -D on the
     52     compiler command line to make any changes that you need to the
     53     configuration options. In this case -DHAVE_CONFIG_H must not be set.
     54
     55     NOTE: There have been occasions when the way in which certain parameters
     56     in config.h are used has changed between releases. (In the configure/make
     57     world, this is handled automatically.) When upgrading to a new release,
     58     you are strongly advised to review config.h.generic before re-using what
     59     you had previously.
     60
     61 (2) Copy or rename the file pcre.h.generic as pcre.h.
     62
     63 (3) EITHER:
     64       Copy or rename file pcre_chartables.c.dist as pcre_chartables.c.
     65
     66     OR:
     67       Compile dftables.c as a stand-alone program (using -DHAVE_CONFIG_H if
     68       you have set up config.h), and then run it with the single argument
     69       "pcre_chartables.c". This generates a set of standard character tables
     70       and writes them to that file. The tables are generated using the default
     71       C locale for your system. If you want to use a locale that is specified
     72       by LC_xxx environment variables, add the -L option to the dftables
     73       command. You must use this method if you are building on a system that
     74       uses EBCDIC code.
     75
     76     The tables in pcre_chartables.c are defaults. The caller of PCRE can
     77     specify alternative tables at run time.
     78
     79 (4) Ensure that you have the following header files:
     80
     81       pcre_internal.h
     82       ucp.h
     83       ucpinternal.h
     84       ucptable.h
     85
     86 (5) Also ensure that you have the following file, which is #included as source
     87     when building a debugging version of PCRE and is also used by pcretest.
     88
     89       pcre_printint.src
     90
     91 (6) Compile the following source files, setting -DHAVE_CONFIG_H as a compiler
     92     option if you have set up config.h with your configuration, or else use
     93     other -D settings to change the configuration as required.
     94
     95       pcre_chartables.c
     96       pcre_compile.c
     97       pcre_config.c
     98       pcre_dfa_exec.c
     99       pcre_exec.c
     100       pcre_fullinfo.c
     101       pcre_get.c
     102       pcre_globals.c
     103       pcre_info.c
     104       pcre_maketables.c
     105       pcre_newline.c
     106       pcre_ord2utf8.c
     107       pcre_refcount.c
     108       pcre_study.c
     109       pcre_tables.c
     110       pcre_try_flipped.c
     111       pcre_ucp_searchfuncs.c
     112       pcre_valid_utf8.c
     113       pcre_version.c
     114       pcre_xclass.c
     115
     116     Make sure that you include -I. in the compiler command (or equivalent for
     117     an unusual compiler) so that all included PCRE header files are first
     118     sought in the current directory. Otherwise you run the risk of picking up
     119     a previously-installed file from somewhere else.
     120
     121 (7) Now link all the compiled code into an object library in whichever form
     122     your system keeps such libraries. This is the basic PCRE C library. If
     123     your system has static and shared libraries, you may have to do this once
     124     for each type.
     125
     126 (8) Similarly, compile pcreposix.c (remembering -DHAVE_CONFIG_H if necessary)
     127     and link the result (on its own) as the pcreposix library.
     128
     129 (9) Compile the test program pcretest.c (again, don't forget -DHAVE_CONFIG_H).
     130     This needs the functions in the pcre and pcreposix libraries when linking.
     131     It also needs the pcre_printint.src source file, which it #includes.
     132
     133(10) Run pcretest on the testinput files in the testdata directory, and check
     134     that the output matches the corresponding testoutput files. Note that the
     135     supplied files are in Unix format, with just LF characters as line
     136     terminators. You may need to edit them to change this if your system uses
     137     a different convention. If you are using Windows, you probably should use
     138     the wintestinput3 file instead of testinput3 (and the corresponding output
     139     file). This is a locale test; wintestinput3 sets the locale to "french"
     140     rather than "fr_FR", and there some minor output differences.
     141
     142(11) If you want to use the pcregrep command, compile and link pcregrep.c; it
     143     uses only the basic PCRE library (it does not need the pcreposix library).
    128144
    129145
    130146THE C++ WRAPPER FUNCTIONS
    131147
    132 The PCRE distribution now contains some C++ wrapper functions and tests,
     148The PCRE distribution also contains some C++ wrapper functions and tests,
    133149contributed by Google Inc. On a system that can use "configure" and "make",
    134150the functions are automatically built into a library called pcrecpp. It should
     
    138154
    139155
    140 FURTHER REMARKS
    141 
    142 If you have a system without "configure" but where you can use a Makefile, edit
    143 Makefile.in to create Makefile, substituting suitable values for the variables
    144 at the head of the file.
    145 
    146 Some help in building a Win32 DLL of PCRE in GnuWin32 environments was
    147 contributed by Paul Sokolovsky. These environments are Mingw32
    148 (http://www.xraylith.wisc.edu/~khan/software/gnu-win32/) and CygWin
    149 (http://sourceware.cygnus.com/cygwin/). Paul comments:
    150 
    151   For CygWin, set CFLAGS=-mno-cygwin, and do 'make dll'. You'll get
    152   pcre.dll (containing pcreposix also), libpcre.dll.a, and dynamically
    153   linked pgrep and pcretest. If you have /bin/sh, run RunTest (three
    154   main test go ok, locale not supported).
    155 
    156 Changes to do MinGW with autoconf 2.50 were supplied by Fred Cox
    157 <sailorFred@yahoo.com>, who comments as follows:
    158 
    159   If you are using the PCRE DLL, the normal Unix style configure && make &&
    160   make check && make install should just work[*]. If you want to statically
    161   link against the .a file, you must define PCRE_STATIC before including
    162   pcre.h, otherwise the pcre_malloc and pcre_free exported functions will be
    163   declared __declspec(dllimport), with hilarious results.  See the configure.in
    164   and pcretest.c for how it is done for the static test.
    165 
    166   Also, there will only be a libpcre.la, not a libpcreposix.la, as you
    167   would expect from the Unix version. The single DLL includes the pcreposix
    168   interface.
    169 
    170 [*] But note that the supplied test files are in Unix format, with just LF
    171 characters as line terminators. You will have to edit them to change to CR LF
    172 terminators.
     156BUILDING FOR VIRTUAL PASCAL
    173157
    174158A script for building PCRE using Borland's C++ compiler for use with VPASCAL
    175 was contributed by Alexander Tokarev. It is called makevp.bat.
    176 
    177 These are some further comments about Win32 builds from Mark Evans. They
    178 were contributed before Fred Cox's changes were made, so it is possible that
    179 they may no longer be relevant.
    180 
    181 "The documentation for Win32 builds is a bit shy.  Under MSVC6 I
    182 followed their instructions to the letter, but there were still
    183 some things missing.
    184 
    185 (1) Must #define STATIC for entire project if linking statically.
    186     (I see no reason to use DLLs for code this compact.)  This of
    187     course is a project setting in MSVC under Preprocessor.
    188 
    189 (2) Missing some #ifdefs relating to the function pointers
    190     pcre_malloc and pcre_free.  See my solution below.  (The stubs
    191     may not be mandatory but they made me feel better.)"
    192 
    193 =========================
    194 #ifdef _WIN32
    195 #include <malloc.h>
    196 
    197 void* malloc_stub(size_t N)
    198 { return malloc(N); }
    199 void free_stub(void* p)
    200 { free(p); }
    201 void *(*pcre_malloc)(size_t) = &malloc_stub;
    202 void  (*pcre_free)(void *) = &free_stub;
    203 
    204 #else
    205 
    206 void *(*pcre_malloc)(size_t) = malloc;
    207 void  (*pcre_free)(void *) = free;
    208 
    209 #endif
    210 =========================
     159was contributed by Alexander Tokarev. Stefan Weber updated the script and added
     160additional files. The following files in the distribution are for building PCRE
     161for use with VP/Borland: makevp_c.txt, makevp_l.txt, makevp.bat, pcregexp.pas.
     162
     163
     164STACK SIZE IN WINDOWS ENVIRONMENTS
     165
     166The default processor stack size of 1Mb in some Windows environments is too
     167small for matching patterns that need much recursion. In particular, test 2 may
     168fail because of this. Normally, running out of stack causes a crash, but there
     169have been cases where the test program has just died silently. See your linker
     170documentation for how to increase stack size if you experience problems. The
     171Linux default of 8Mb is a reasonable choice for the stack, though even that can
     172be too small for some pattern/subject combinations.
     173
     174PCRE has a compile configuration option to disable the use of stack for
     175recursion so that heap is used instead. However, pattern matching is
     176significantly slower when this is done. There is more about stack usage in the
     177"pcrestack" documentation.
     178
     179
     180COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE WITH CMAKE" below)
     181
     182There are two ways of building PCRE using the "configure, make, make install"
     183paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
     184the same thing; they are completely different from each other. There is also
     185some experimental, undocumented support for building using "cmake", which you
     186might like to try if you are familiar with "cmake". However, at the present
     187time, the "cmake" process builds only a static library (not a dll), and the
     188tests are not automatically run.
     189
     190The MinGW home page (http://www.mingw.org/) says this:
     191
     192  MinGW: A collection of freely available and freely distributable Windows
     193  specific header files and import libraries combined with GNU toolsets that
     194  allow one to produce native Windows programs that do not rely on any
     195  3rd-party C runtime DLLs.
     196
     197The Cygwin home page (http://www.cygwin.com/) says this:
     198
     199  Cygwin is a Linux-like environment for Windows. It consists of two parts:
     200
     201  . A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
     202    substantial Linux API functionality
     203
     204  . A collection of tools which provide Linux look and feel.
     205
     206  The Cygwin DLL currently works with all recent, commercially released x86 32
     207  bit and 64 bit versions of Windows, with the exception of Windows CE.
     208
     209On both MinGW and Cygwin, PCRE should build correctly using:
     210
     211  ./configure && make && make install
     212
     213This should create two libraries called libpcre and libpcreposix, and, if you
     214have enabled building the C++ wrapper, a third one called libpcrecpp. These are
     215independent libraries: when you like with libpcreposix or libpcrecpp you must
     216also link with libpcre, which contains the basic functions. (Some earlier
     217releases of PCRE included the basic libpcre functions in libpcreposix. This no
     218longer happens.)
     219
     220If you want to statically link your program against a non-dll .a file, you must
     221define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
     222pcre_free() exported functions will be declared __declspec(dllimport), with
     223unwanted results.
     224
     225Using Cygwin's compiler generates libraries and executables that depend on
     226cygwin1.dll. If a library that is generated this way is distributed,
     227cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
     228licence, this forces not only PCRE to be under the GPL, but also the entire
     229application. A distributor who wants to keep their own code proprietary must
     230purchase an appropriate Cygwin licence.
     231
     232MinGW has no such restrictions. The MinGW compiler generates a library or
     233executable that can run standalone on Windows without any third party dll or
     234licensing issues.
     235
     236But there is more complication:
     237
     238If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
     239to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
     240front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
     241gcc and MinGW's gcc). So, a user can:
     242
     243. Build native binaries by using MinGW or by getting Cygwin and using
     244  -mno-cygwin.
     245
     246. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
     247  compiler flags.
     248
     249The test files that are supplied with PCRE are in Unix format, with LF
     250characters as line terminators. It may be necessary to change the line
     251terminators in order to get some of the tests to work. We hope to improve
     252things in this area in future.
     253
     254
     255BUILDING PCRE WITH CMAKE
     256
     257CMake is an alternative build facility that can be used instead of the
     258traditional Unix "configure". CMake version 2.4.7 supports Borland makefiles,
     259MinGW makefiles, MSYS makefiles, NMake makefiles, UNIX makefiles, Visual Studio
     2606, Visual Studio 7, Visual Studio 8, and Watcom W8. The following instructions
     261were contributed by a PCRE user.
     262
     2631. Download CMake 2.4.7 or above from http://www.cmake.org/, install and ensure
     264   that cmake\bin is on your path.
     265
     2662. Unzip (retaining folder structure) the PCRE source tree into a source
     267   directory such as C:\pcre.
     268
     2693. Create a new, empty build directory: C:\pcre\build\
     270
     2714. Run CMakeSetup from the Shell envirornment of your build tool, e.g., Msys
     272   for Msys/MinGW or Visual Studio Command Prompt for VC/VC++
     273
     2745. Enter C:\pcre\pcre-xx and C:\pcre\build for the source and build
     275   directories, respectively
     276
     2776. Hit the "Configure" button.
     278
     2797. Select the particular IDE / build tool that you are using (Visual Studio,
     280   MSYS makefiles, MinGW makefiles, etc.)
     281
     2828. The GUI will then list several configuration options. This is where you can
     283   enable UTF-8 support, etc.
     284
     2859. Hit "Configure" again. The adjacent "OK" button should now be active.
     286
     28710. Hit "OK".
     288
     28911. The build directory should now contain a usable build system, be it a
     290    solution file for Visual Studio, makefiles for MinGW, etc.
     291
     292Testing with RunTest.bat
     293
     2941. Copy RunTest.bat into the directory where pcretest.exe has been created.
     295
     2962. Edit RunTest.bat and insert a line that indentifies the relative location of
     297   the pcre source, e.g.:
     298
     299   set srcdir=..\pcre-7.4-RC3
     300
     3013. Run RunTest.bat from a command shell environment. Test outputs will
     302   automatically be compared to expected results, and discrepancies will
     303   identified in the console output.
     304
     3054. To test pcrecpp, run pcrecpp_unittest.exe, pcre_stringpiece_unittest.exe and
     306   pcre_scanner_unittest.exe.
     307
     308
     309BUILDING UNDER WINDOWS WITH BCC5.5
     310
     311Michael Roy sent these comments about building PCRE under Windows with BCC5.5:
     312
     313  Some of the core BCC libraries have a version of PCRE from 1998 built in,
     314  which can lead to pcre_exec() giving an erroneous PCRE_ERROR_NULL from a
     315  version mismatch. I'm including an easy workaround below, if you'd like to
     316  include it in the non-unix instructions:
     317
     318  When linking a project with BCC5.5, pcre.lib must be included before any of
     319  the libraries cw32.lib, cw32i.lib, cw32mt.lib, and cw32mti.lib on the command
     320  line.
    211321
    212322
     
    275385=========================
    276386
     387Last Updated: 21 September 2007
    277388****
  • chicken/trunk/pcre/config.h

    r5853 r6175  
     1/* config.h.  From PCRE 7.4 config.h generated from config.h.in by configure.  */
     2
    13#if defined(HAVE_CONFIG_H) || defined(HAVE_CHICKEN_CONFIG_H)
    24# include "chicken-config.h"
    35#endif
    46
    5 /* On Unix systems config.in is converted by configure into config.h. PCRE is
    6 written in Standard C, but there are a few non-standard things it can cope
    7 with, allowing it to run on SunOS4 and other "close to standard" systems.
     7/* On Unix-like systems config.h.in is converted by "configure" into config.h.
     8Some other environments also support the use of "configure". PCRE is written in
     9Standard C, but there are a few non-standard things it can cope with, allowing
     10it to run on SunOS4 and other "close to standard" systems.
    811
    9 On a non-Unix system you should just copy this file into config.h, and set up
    10 the macros the way you need them. You should normally change the definitions of
    11 HAVE_STRERROR and HAVE_MEMMOVE to 1. Unfortunately, because of the way autoconf
    12 works, these cannot be made the defaults. If your system has bcopy() and not
    13 memmove(), change the definition of HAVE_BCOPY instead of HAVE_MEMMOVE. If your
    14 system has neither bcopy() nor memmove(), leave them both as 0; an emulation
    15 function will be used. */
     12If you are going to build PCRE "by hand" on a system without "configure" you
     13should copy the distributed config.h.generic to config.h, and then set up the
     14macro definitions the way you need them. You must then add -DHAVE_CONFIG_H to
     15all of your compile commands, so that config.h is included at the start of
     16every source.
    1617
    17 /* If you are compiling for a system that needs some magic to be inserted
    18 before the definition of an exported function, define this macro to contain the
    19 relevant magic. It apears at the start of every exported function. */
     18Alternatively, you can avoid editing by using -D on the compiler command line
     19to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
    2020
    21 #define EXPORT
     21PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if
     22HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
     23them both to 0; an emulation function will be used. */
    2224
    23 /* Define to empty if the "const" keyword does not work. */
     25/* Define to 1 if you have the `memmove' function. */
     26#ifndef HAVE_MEMMOVE
     27/* hm... there must be a better way */
     28# define HAVE_MEMMOVE 1
     29#endif
    2430
    25 #undef const
     31/* The value of LINK_SIZE determines the number of bytes used to store links
     32   as offsets within the compiled regex. The default is 2, which allows for
     33   compiled patterns up to 64K long. This covers the vast majority of cases.
     34   However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
     35   for longer patterns in extreme cases. On systems that support it,
     36   "configure" can be used to override this default. */
     37#ifndef LINK_SIZE
     38#define LINK_SIZE   2
     39#endif
    2640
    27 /* Define to "unsigned" if <stddef.h> doesn't define size_t. */
     41/* The value of MATCH_LIMIT determines the default number of times the
     42   internal match() function can be called during a single execution of
     43   pcre_exec(). There is a runtime interface for setting a different limit.
     44   The limit exists in order to catch runaway regular expressions that take
     45   for ever to determine that they do not match. The default is set very large
     46   so that it does not accidentally catch legitimate cases. On systems that
     47   support it, "configure" can be used to override this default default. */
     48#ifndef MATCH_LIMIT
     49#define MATCH_LIMIT 10000000
     50#endif
    2851
    29 #undef size_t
     52/* The above limit applies to all calls of match(), whether or not they
     53   increase the recursion depth. In some environments it is desirable to limit
     54   the depth of recursive calls of match() more strictly, in order to restrict
     55   the maximum amount of stack (or heap, if NO_RECURSE is defined) that is
     56   used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of
     57   match(). To have any useful effect, it must be less than the value of
     58   MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is
     59   a runtime method for setting a different limit. On systems that support it,
     60   "configure" can be used to override the default. */
     61#ifndef MATCH_LIMIT_RECURSION
     62#define MATCH_LIMIT_RECURSION MATCH_LIMIT
     63#endif
    3064
    31 #define HAVE_BCOPY    0
     65/* This limit is parameterized just in case anybody ever wants to change it.
     66   Care must be taken if it is increased, because it guards against integer
     67   overflow caused by enormously large patterns. */
     68#ifndef MAX_NAME_COUNT
     69#define MAX_NAME_COUNT 10000
     70#endif
    3271
    33 /* The value of NEWLINE determines the newline character. The default is to
    34 leave it up to the compiler, but some sites want to force a particular value.
    35 On Unix systems, "configure" can be used to override this default. */
     72/* This limit is parameterized just in case anybody ever wants to change it.
     73   Care must be taken if it is increased, because it guards against integer
     74   overflow caused by enormously large patterns. */
     75#ifndef MAX_NAME_SIZE
     76#define MAX_NAME_SIZE 32
     77#endif
    3678
     79/* The value of NEWLINE determines the newline character sequence. On systems
     80   that support it, "configure" can be used to override the default, which is
     81   10. The possible values are 10 (LF), 13 (CR), 3338 (CRLF), -1 (ANY), or -2
     82   (ANYCRLF). */
    3783#ifndef NEWLINE
    3884#define NEWLINE '\n'
    3985#endif
    4086
    41 /* The value of LINK_SIZE determines the number of bytes used to store
    42 links as offsets within the compiled regex. The default is 2, which allows for
    43 compiled patterns up to 64K long. This covers the vast majority of cases.
    44 However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows for
    45 longer patterns in extreme cases. On Unix systems, "configure" can be used to
    46 override this default. */
     87/* Name of package */
     88#define PACKAGE "pcre"
    4789
    48 #ifndef LINK_SIZE
    49 #define LINK_SIZE   2
    50 #endif
     90/* Define to the address where bug reports for this package should be sent. */
     91#define PACKAGE_BUGREPORT ""
    5192
    52 /* The value of MATCH_LIMIT determines the default number of times the match()
    53 function can be called during a single execution of pcre_exec(). (There is a
    54 runtime method of setting a different limit.) The limit exists in order to
    55 catch runaway regular expressions that take for ever to determine that they do
    56 not match. The default is set very large so that it does not accidentally catch
    57 legitimate cases. On Unix systems, "configure" can be used to override this
    58 default default. */
     93/* Define to the full name of this package. */
     94#define PACKAGE_NAME "PCRE"
    5995
    60 #ifndef MATCH_LIMIT
    61 #define MATCH_LIMIT 10000000
    62 #endif
     96/* Define to the full name and version of this package. */
     97#define PACKAGE_STRING "PCRE 7.4"
     98
     99/* Define to the one symbol short name of this package. */
     100#define PACKAGE_TARNAME "pcre"
     101
     102/* Define to the version of this package. */
     103#define PACKAGE_VERSION "7.4"
    63104
    64105/* When calling PCRE via the POSIX interface, additional working storage is
    65 required for holding the pointers to capturing substrings because PCRE requires
    66 three integers per substring, whereas the POSIX interface provides only two. If
    67 the number of expected substrings is small, the wrapper function uses space on
    68 the stack, because this is faster than using malloc() for each call. The
    69 threshold above which the stack is no longer use is defined by POSIX_MALLOC_
    70 THRESHOLD. On Unix systems, "configure" can be used to override this default.
    71 */
    72 
     106   required for holding the pointers to capturing substrings because PCRE
     107   requires three integers per substring, whereas the POSIX interface provides
     108   only two. If the number of expected substrings is small, the wrapper
     109   function uses space on the stack, because this is faster than using
     110   malloc() for each call. The threshold above which the stack is no longer
     111   used is defined by POSIX_MALLOC_THRESHOLD. On systems that support it,
     112   "configure" can be used to override this default. */
    73113#ifndef POSIX_MALLOC_THRESHOLD
    74114#define POSIX_MALLOC_THRESHOLD 10
     
    82122match() function. On Unix systems, "configure" can be used to set this in the
    83123Makefile (use --disable-stack-for-recursion). */
    84 
    85124/* #define NO_RECURSE */
    86125
    87 #define SUPPORT_UTF8
    88 #define SUPPORT_UCP
     126/* Define to enable support for Unicode properties */
     127#define SUPPORT_UCP 
    89128
    90 #ifndef HAVE_MEMMOVE
    91 /* hm... there must be a better way */
    92 # define HAVE_MEMMOVE 1
    93 #endif
     129/* Define to enable support for the UTF-8 Unicode encoding. */
     130#define SUPPORT_UTF8
    94131
    95 /* End */
     132/* Version number of package */
     133#define VERSION "7.4"
     134
     135/* Define to empty if `const' does not conform to ANSI C. */
     136/* #undef const */
     137
     138/* Define to `unsigned int' if <sys/types.h> does not define. */
     139/* #undef size_t */
  • chicken/trunk/pcre/dftables.c

    r2926 r6175  
    77
    88                       Written by Philip Hazel
    9            Copyright (c) 1997-2005 University of Cambridge
     9           Copyright (c) 1997-2007 University of Cambridge
    1010
    1111-----------------------------------------------------------------------------
     
    3939
    4040
    41 /* This is a freestanding support program to generate a file containing default
    42 character tables for PCRE. The tables are built according to the default C
     41/* This is a freestanding support program to generate a file containing
     42character tables for PCRE. The tables are built according to the current
    4343locale. Now that pcre_maketables is a function visible to the outside world, we
    4444make use of its code from here in order to be consistent. */
     45
     46#ifdef HAVE_CONFIG_H
     47#include "config.h"
     48#endif
    4549
    4650#include <ctype.h>
    4751#include <stdio.h>
    4852#include <string.h>
     53#include <locale.h>
    4954
    5055#include "pcre_internal.h"
     
    5661int main(int argc, char **argv)
    5762{
    58 int i;
    5963FILE *f;
    60 const unsigned char *tables = pcre_maketables();
    61 const unsigned char *base_of_tables = tables;
     64int i = 1;
     65const unsigned char *tables;
     66const unsigned char *base_of_tables;
    6267
    63 if (argc != 2)
     68/* By default, the default C locale is used rather than what the building user
     69happens to have set. However, if the -L option is given, set the locale from
     70the LC_xxx environment variables. */
     71
     72if (argc > 1 && strcmp(argv[1], "-L") == 0)
     73  {
     74  setlocale(LC_ALL, "");        /* Set from environment variables */
     75  i++;
     76  }
     77
     78if (argc < i + 1)
    6479  {
    6580  fprintf(stderr, "dftables: one filename argument is required\n");
     
    6782  }
    6883
    69 f = fopen(argv[1], "wb");
     84tables = pcre_maketables();
     85base_of_tables = tables;
     86
     87f = fopen(argv[i], "wb");
    7088if (f == NULL)
    7189  {
     
    7492  }
    7593
    76 /* There are two fprintf() calls here, because gcc in pedantic mode complains
    77 about the very long string otherwise. */
     94/* There are several fprintf() calls here, because gcc in pedantic mode
     95complains about the very long string otherwise. */
    7896
    7997fprintf(f,
     
    8199  "*      Perl-Compatible Regular Expressions       *\n"
    82100  "*************************************************/\n\n"
    83   "/* This file is automatically written by the dftables auxiliary \n"
    84   "program. If you edit it by hand, you might like to edit the Makefile to \n"
    85   "prevent its ever being regenerated.\n\n");
     101  "/* This file was automatically written by the dftables auxiliary\n"
     102  "program. It contains character tables that are used when no external\n"
     103  "tables are passed to PCRE by the application that calls it. The tables\n"
     104  "are used only for characters whose code values are less than 256.\n\n");
    86105fprintf(f,
    87   "This file contains the default tables for characters with codes less than\n"
    88   "128 (ASCII characters). These tables are used when no external tables are\n"
    89   "passed to PCRE. */\n\n"
     106  "The following #includes are present because without them gcc 4.x may remove\n"
     107  "the array definition from the final binary if PCRE is built into a static\n"
     108  "library and dead code stripping is activated. This leads to link errors.\n"
     109  "Pulling in the header ensures that the array gets flagged as \"someone\n"
     110  "outside this compilation unit might reference this\" and so it will always\n"
     111  "be supplied to the linker. */\n\n"
     112  "#ifdef HAVE_CONFIG_H\n"
     113  "#include \"config.h\"\n"
     114  "#endif\n\n"
     115  "#include \"pcre_internal.h\"\n\n");
     116fprintf(f,
    90117  "const unsigned char _pcre_default_tables[] = {\n\n"
    91118  "/* This table is a lower casing table. */\n\n");
     
    163190if (isprint(i-1)) fprintf(f, " %c ", i-1);
    164191  else fprintf(f, "%3d", i-1);
    165 fprintf(f, " */\n\n/* End of chartables.c */\n");
     192fprintf(f, " */\n\n/* End of pcre_chartables.c */\n");
    166193
    167194fclose(f);
  • chicken/trunk/pcre/pcre.h

    r2926 r6175  
    33*************************************************/
    44
    5 /* In its original form, this is the .in file that is transformed by
    6 "configure" into pcre.h.
    7 
    8            Copyright (c) 1997-2005 University of Cambridge
     5/* This is the public header file for the PCRE library, to be #included by
     6applications that call the PCRE functions.
     7
     8           Copyright (c) 1997-2007 University of Cambridge
    99
    1010-----------------------------------------------------------------------------
     
    4040#define _PCRE_H
    4141
    42 /* The file pcre.h is build by "configure". Do not edit it; instead
    43 make changes to pcre.in. */
    44 
    45 #define PCRE_MAJOR          6
    46 #define PCRE_MINOR          13
    47 #define PCRE_DATE           15-Aug-2005
    48 
    49 /* Win32 uses DLL by default; it needs special stuff for exported functions. */
    50 
    51 #ifdef _WIN32
    52 #  ifdef PCRE_DEFINITION
    53 #    ifdef DLL_EXPORT
    54 #      define PCRE_DATA_SCOPE __declspec(dllexport)
     42/* The current PCRE version information. */
     43
     44#define PCRE_MAJOR          7
     45#define PCRE_MINOR          4
     46#define PCRE_PRERELEASE     
     47#define PCRE_DATE           2007-09-21
     48
     49/* When an application links to a PCRE DLL in Windows, the symbols that are
     50imported have to be identified as such. When building PCRE, the appropriate
     51export setting is defined in pcre_internal.h, which includes this file. So we
     52don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */
     53
     54#if defined(_WIN32) && !defined(PCRE_STATIC)
     55#  ifndef PCRE_EXP_DECL
     56#    define PCRE_EXP_DECL  extern __declspec(dllimport)
     57#  endif
     58#  ifdef __cplusplus
     59#    ifndef PCRECPP_EXP_DECL
     60#      define PCRECPP_EXP_DECL  extern __declspec(dllimport)
    5561#    endif
     62#    ifndef PCRECPP_EXP_DEFN
     63#      define PCRECPP_EXP_DEFN  __declspec(dllimport)
     64#    endif
     65#  endif
     66#endif
     67
     68/* By default, we use the standard "extern" declarations. */
     69
     70#ifndef PCRE_EXP_DECL
     71#  ifdef __cplusplus
     72#    define PCRE_EXP_DECL  extern "C"
    5673#  else
    57 #    ifndef PCRE_STATIC
    58 #      define PCRE_DATA_SCOPE extern __declspec(dllimport)
    59 #    endif
    60 #  endif
    61 #endif
    62 
    63 /* For other operating systems, we use the standard "extern". */
    64 
    65 #ifndef PCRE_DATA_SCOPE
    66 #  ifdef __cplusplus
    67 #    define PCRE_DATA_SCOPE     extern "C"
    68 #  else
    69 #    define PCRE_DATA_SCOPE     extern
     74#    define PCRE_EXP_DECL  extern
     75#  endif
     76#endif
     77
     78#ifdef __cplusplus
     79#  ifndef PCRECPP_EXP_DECL
     80#    define PCRECPP_EXP_DECL  extern
     81#  endif
     82#  ifndef PCRECPP_EXP_DEFN
     83#    define PCRECPP_EXP_DEFN
    7084#  endif
    7185#endif
     
    103117#define PCRE_DFA_RESTART        0x00020000
    104118#define PCRE_FIRSTLINE          0x00040000
     119#define PCRE_DUPNAMES           0x00080000
     120#define PCRE_NEWLINE_CR         0x00100000
     121#define PCRE_NEWLINE_LF         0x00200000
     122#define PCRE_NEWLINE_CRLF       0x00300000
     123#define PCRE_NEWLINE_ANY        0x00400000
     124#define PCRE_NEWLINE_ANYCRLF    0x00500000
     125#define PCRE_BSR_ANYCRLF        0x00800000
     126#define PCRE_BSR_UNICODE        0x01000000
    105127
    106128/* Exec-time and get/set-time error codes */
     
    110132#define PCRE_ERROR_BADOPTION       (-3)
    111133#define PCRE_ERROR_BADMAGIC        (-4)
    112 #define PCRE_ERROR_UNKNOWN_NODE    (-5)
     134#define PCRE_ERROR_UNKNOWN_OPCODE  (-5)
     135#define PCRE_ERROR_UNKNOWN_NODE    (-5)  /* For backward compatibility */
    113136#define PCRE_ERROR_NOMEMORY        (-6)
    114137#define PCRE_ERROR_NOSUBSTRING     (-7)
     
    126149#define PCRE_ERROR_DFA_WSSIZE     (-19)
    127150#define PCRE_ERROR_DFA_RECURSE    (-20)
     151#define PCRE_ERROR_RECURSIONLIMIT (-21)
     152#define PCRE_ERROR_NULLWSLIMIT    (-22)  /* No longer actually used */
     153#define PCRE_ERROR_BADNEWLINE     (-23)
    128154
    129155/* Request types for pcre_fullinfo() */
     
    142168#define PCRE_INFO_STUDYSIZE         10
    143169#define PCRE_INFO_DEFAULT_TABLES    11
    144 
    145 /* Request types for pcre_config() */
     170#define PCRE_INFO_OKPARTIAL         12
     171#define PCRE_INFO_JCHANGED          13
     172#define PCRE_INFO_HASCRORLF         14
     173
     174/* Request types for pcre_config(). Do not re-arrange, in order to remain
     175compatible. */
    146176
    147177#define PCRE_CONFIG_UTF8                    0
     
    152182#define PCRE_CONFIG_STACKRECURSE            5
    153183#define PCRE_CONFIG_UNICODE_PROPERTIES      6
    154 
    155 /* Bit flags for the pcre_extra structure */
    156 
    157 #define PCRE_EXTRA_STUDY_DATA          0x0001
    158 #define PCRE_EXTRA_MATCH_LIMIT         0x0002
    159 #define PCRE_EXTRA_CALLOUT_DATA        0x0004
    160 #define PCRE_EXTRA_TABLES              0x0008
     184#define PCRE_CONFIG_MATCH_LIMIT_RECURSION   7
     185#define PCRE_CONFIG_BSR                     8
     186
     187/* Bit flags for the pcre_extra structure. Do not re-arrange or redefine
     188these bits, just add new ones on the end, in order to remain compatible. */
     189
     190#define PCRE_EXTRA_STUDY_DATA             0x0001
     191#define PCRE_EXTRA_MATCH_LIMIT            0x0002
     192#define PCRE_EXTRA_CALLOUT_DATA           0x0004
     193#define PCRE_EXTRA_TABLES                 0x0008
     194#define PCRE_EXTRA_MATCH_LIMIT_RECURSION  0x0010
    161195
    162196/* Types */
     
    164198struct real_pcre;                 /* declaration; the definition is private  */
    165199typedef struct real_pcre pcre;
     200
     201/* When PCRE is compiled as a C++ library, the subject pointer type can be
     202replaced with a custom type. For conventional use, the public interface is a
     203const char *. */
     204
     205#ifndef PCRE_SPTR
     206#define PCRE_SPTR const char *
     207#endif
    166208
    167209/* The structure for passing additional data to pcre_exec(). This is defined in
     
    175217  void *callout_data;             /* Data passed back in callouts */
    176218  const unsigned char *tables;    /* Pointer to character tables */
     219  unsigned long int match_limit_recursion; /* Max recursive calls to match() */
    177220} pcre_extra;
    178221
     
    187230  int          callout_number;    /* Number compiled into pattern */
    188231  int         *offset_vector;     /* The offset vector */
    189   const char  *subject;           /* The subject being matched */
     232  PCRE_SPTR    subject;           /* The subject being matched */
    190233  int          subject_length;    /* The length of the subject */
    191234  int          start_match;       /* Offset to start of this match attempt */
     
    207250
    208251#ifndef VPCOMPAT
    209 PCRE_DATA_SCOPE void *(*pcre_malloc)(size_t);
    210 PCRE_DATA_SCOPE void  (*pcre_free)(void *);
    211 PCRE_DATA_SCOPE void *(*pcre_stack_malloc)(size_t);
    212 PCRE_DATA_SCOPE void  (*pcre_stack_free)(void *);
    213 PCRE_DATA_SCOPE int   (*pcre_callout)(pcre_callout_block *);
     252PCRE_EXP_DECL void *(*pcre_malloc)(size_t);
     253PCRE_EXP_DECL void  (*pcre_free)(void *);
     254PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
     255PCRE_EXP_DECL void  (*pcre_stack_free)(void *);
     256PCRE_EXP_DECL int   (*pcre_callout)(pcre_callout_block *);
    214257#else   /* VPCOMPAT */
    215 PCRE_DATA_SCOPE void *pcre_malloc(size_t);
    216 PCRE_DATA_SCOPE void  pcre_free(void *);
    217 PCRE_DATA_SCOPE void *pcre_stack_malloc(size_t);
    218 PCRE_DATA_SCOPE void  pcre_stack_free(void *);
    219 PCRE_DATA_SCOPE int   pcre_callout(pcre_callout_block *);
     258PCRE_EXP_DECL void *pcre_malloc(size_t);
     259PCRE_EXP_DECL void  pcre_free(void *);
     260PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
     261PCRE_EXP_DECL void  pcre_stack_free(void *);
     262PCRE_EXP_DECL int   pcre_callout(pcre_callout_block *);
    220263#endif  /* VPCOMPAT */
    221264
    222265/* Exported PCRE functions */
    223266
    224 PCRE_DATA_SCOPE pcre *pcre_compile(const char *, int, const char **, int *,
     267PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
    225268                  const unsigned char *);
    226 PCRE_DATA_SCOPE pcre *pcre_compile2(const char *, int, int *, const char **,
     269PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
    227270                  int *, const unsigned char *);
    228 PCRE_DATA_SCOPE int  pcre_config(int, void *);
    229 PCRE_DATA_SCOPE int  pcre_copy_named_substring(const pcre *, const char *,
     271PCRE_EXP_DECL int  pcre_config(int, void *);
     272PCRE_EXP_DECL int  pcre_copy_named_substring(const pcre *, const char *,
    230273                  int *, int, const char *, char *, int);
    231 PCRE_DATA_SCOPE int  pcre_copy_substring(const char *, int *, int, int, char *,
     274PCRE_EXP_DECL int  pcre_copy_substring(const char *, int *, int, int, char *,
    232275                  int);
    233 PCRE_DATA_SCOPE int  pcre_dfa_exec(const pcre *, const pcre_extra *,
     276PCRE_EXP_DECL int  pcre_dfa_exec(const pcre *, const pcre_extra *,
    234277                  const char *, int, int, int, int *, int , int *, int);
    235 PCRE_DATA_SCOPE int  pcre_exec(const pcre *, const pcre_extra *, const char *,
     278PCRE_EXP_DECL int  pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
    236279                   int, int, int, int *, int);
    237 PCRE_DATA_SCOPE void pcre_free_substring(const char *);
    238 PCRE_DATA_SCOPE void pcre_free_substring_list(const char **);
    239 PCRE_DATA_SCOPE int  pcre_fullinfo(const pcre *, const pcre_extra *, int,
     280PCRE_EXP_DECL void pcre_free_substring(const char *);
     281PCRE_EXP_DECL void pcre_free_substring_list(const char **);
     282PCRE_EXP_DECL int  pcre_fullinfo(const pcre *, const pcre_extra *, int,
    240283                  void *);
    241 PCRE_DATA_SCOPE int  pcre_get_named_substring(const pcre *, const char *,
     284PCRE_EXP_DECL int  pcre_get_named_substring(const pcre *, const char *,
    242285                  int *, int, const char *, const char **);
    243 PCRE_DATA_SCOPE int  pcre_get_stringnumber(const pcre *, const char *);
    244 PCRE_DATA_SCOPE int  pcre_get_substring(const char *, int *, int, int,
     286PCRE_EXP_DECL int  pcre_get_stringnumber(const pcre *, const char *);
     287PCRE_EXP_DECL int  pcre_get_stringtable_entries(const pcre *, const char *,
     288                  char **, char **);
     289PCRE_EXP_DECL int  pcre_get_substring(const char *, int *, int, int,
    245290                  const char **);
    246 PCRE_DATA_SCOPE int  pcre_get_substring_list(const char *, int *, int,
     291PCRE_EXP_DECL int  pcre_get_substring_list(const char *, int *, int,
    247292                  const char ***);
    248 PCRE_DATA_SCOPE int  pcre_info(const pcre *, int *, int *);
    249 PCRE_DATA_SCOPE const unsigned char *pcre_maketables(void);
    250 PCRE_DATA_SCOPE int  pcre_refcount(pcre *, int);
    251 PCRE_DATA_SCOPE pcre_extra *pcre_study(const pcre *, int, const char **);
    252 PCRE_DATA_SCOPE const char *pcre_version(void);
     293PCRE_EXP_DECL int  pcre_info(const pcre *, int *, int *);
     294PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
     295PCRE_EXP_DECL int  pcre_refcount(pcre *, int);
     296PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
     297PCRE_EXP_DECL const char *pcre_version(void);
    253298
    254299#ifdef __cplusplus
  • chicken/trunk/pcre/pcre_compile.c

    r2926 r6175  
    77
    88                       Written by Philip Hazel
    9            Copyright (c) 1997-2005 University of Cambridge
     9           Copyright (c) 1997-2007 University of Cambridge
    1010
    1111-----------------------------------------------------------------------------
     
    4343
    4444
     45#ifdef HAVE_CONFIG_H
     46#include "config.h"
     47#endif
     48
     49#define NLBLOCK cd             /* Block containing newline information */
     50#define PSSTART start_pattern  /* Field containing processed string start */
     51#define PSEND   end_pattern    /* Field containing processed string end */
     52
    4553#include "pcre_internal.h"
     54
     55
     56/* When DEBUG is defined, we need the pcre_printint() function, which is also
     57used by pcretest. DEBUG is not defined when building a production library. */
     58
     59#ifdef DEBUG
     60#include "pcre_printint.src"
     61#endif
     62
     63
     64/* Macro for setting individual bits in class bitmaps. */
     65
     66#define SETBIT(a,b) a[b/8] |= (1 << (b%8))
     67
     68/* Maximum length value to check against when making sure that the integer that
     69holds the compiled pattern length does not overflow. We make it a bit less than
     70INT_MAX to allow for adding in group terminating bytes, so that we don't have
     71to check them every time. */
     72
     73#define OFLOW_MAX (INT_MAX - 20)
    4674
    4775
     
    5078*************************************************/
    5179
    52 /* Maximum number of items on the nested bracket stacks at compile time. This
    53 applies to the nesting of all kinds of parentheses. It does not limit
    54 un-nested, non-capturing parentheses. This number can be made bigger if
    55 necessary - it is used to dimension one int and one unsigned char vector at
    56 compile time. */
    57 
    58 #define BRASTACK_SIZE 200
     80/* This value specifies the size of stack workspace that is used during the
     81first pre-compile phase that determines how much memory is required. The regex
     82is partly compiled into this space, but the compiled parts are discarded as
     83soon as they can be, so that hopefully there will never be an overrun. The code
     84does, however, check for an overrun. The largest amount I've seen used is 218,
     85so this number is very generous.
     86
     87The same workspace is used during the second, actual compile phase for
     88remembering forward references to groups so that they can be filled in at the
     89end. Each entry in this list occupies LINK_SIZE bytes, so even when LINK_SIZE
     90is 4 there is plenty of room. */
     91
     92#define COMPILE_WORK_SIZE (4096)
    5993
    6094
     
    6498is invalid. */
    6599
    66 #if !EBCDIC   /* This is the "normal" table for ASCII systems */
     100#ifndef EBCDIC  /* This is the "normal" table for ASCII systems */
    67101static const short int escapes[] = {
    68102     0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
    69103     0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
    70104   '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E,      0, -ESC_G,   /* @ - G */
    71      0,      0,      0,      0,      0,      0,      0,      0,   /* H - O */
    72 -ESC_P, -ESC_Q,      0, -ESC_S,      0,      0,      0, -ESC_W,   /* P - W */
     105-ESC_H,      0,      0, -ESC_K,      0,      0,      0,      0,   /* H - O */
     106-ESC_P, -ESC_Q, -ESC_R, -ESC_S,      0,      0, -ESC_V, -ESC_W,   /* P - W */
    73107-ESC_X,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */
    74108   '`',      7, -ESC_b,      0, -ESC_d,  ESC_e,  ESC_f,      0,   /* ` - g */
    75      0,      0,      0,      0,      0,      0,  ESC_n,      0,   /* h - o */
    76 -ESC_p,      0,  ESC_r, -ESC_s,  ESC_tee,    0,      0, -ESC_w,   /* p - w */
     109-ESC_h,      0,      0, -ESC_k,      0,      0,  ESC_n,      0,   /* h - o */
     110-ESC_p,      0,  ESC_r, -ESC_s,  ESC_tee,    0, -ESC_v, -ESC_w,   /* p - w */
    77111     0,      0, -ESC_z                                            /* x - z */
    78112};
    79113
    80 #else         /* This is the "abnormal" table for EBCDIC systems */
     114#else           /* This is the "abnormal" table for EBCDIC systems */
    81115static const short int escapes[] = {
    82116/*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',
     
    88122/*  78 */     0,   '`',    ':',     '#',    '@',  '\'',    '=',    '"',
    89123/*  80 */     0,     7, -ESC_b,       0, -ESC_d, ESC_e,  ESC_f,      0,
    90 /*  88 */     0,     0,      0,     '{',      0,     0,      0,      0,
    91 /*  90 */     0,     0,      0,     'l',      0, ESC_n,      0, -ESC_p,
     124/*  88 */-ESC_h,     0,      0,     '{',      0,     0,      0,      0,
     125/*  90 */     0,     0, -ESC_k,     'l',      0, ESC_n,      0, -ESC_p,
    92126/*  98 */     0, ESC_r,      0,     '}',      0,     0,      0,      0,
    93 /*  A0 */     0,   '~', -ESC_s, ESC_tee,      0,     0, -ESC_w,      0,
     127/*  A0 */     0,   '~', -ESC_s, ESC_tee,      0,-ESC_v, -ESC_w,      0,
    94128/*  A8 */     0,-ESC_z,      0,       0,      0,   '[',      0,      0,
    95129/*  B0 */     0,     0,      0,       0,      0,     0,      0,      0,
    96130/*  B8 */     0,     0,      0,       0,      0,   ']',    '=',    '-',
    97131/*  C0 */   '{',-ESC_A, -ESC_B,  -ESC_C, -ESC_D,-ESC_E,      0, -ESC_G,
    98 /*  C8 */     0,     0,      0,       0,      0,     0,      0,      0,
    99 /*  D0 */   '}',     0,      0,       0,      0,     0,      0, -ESC_P,
    100 /*  D8 */-ESC_Q,     0,      0,       0,      0,     0,      0,      0,
    101 /*  E0 */  '\\',     0, -ESC_S,       0,      0,     0, -ESC_W, -ESC_X,
     132/*  C8 */-ESC_H,     0,      0,       0,      0,     0,      0,      0,
     133/*  D0 */   '}',     0, -ESC_K,       0,      0,     0,      0, -ESC_P,
     134/*  D8 */-ESC_Q,-ESC_R,      0,       0,      0,     0,      0,      0,
     135/*  E0 */  '\\',     0, -ESC_S,       0,      0,-ESC_V, -ESC_W, -ESC_X,
    102136/*  E8 */     0,-ESC_Z,      0,       0,      0,     0,      0,      0,
    103137/*  F0 */     0,     0,      0,       0,      0,     0,      0,      0,
     
    107141
    108142
    109 /* Tables of names of POSIX character classes and their lengths. The list is
    110 terminated by a zero length entry. The first three must be alpha, upper, lower,
    111 as this is assumed for handling case independence. */
    112 
    113 static const char *const posix_names[] = {
    114   "alpha", "lower", "upper",
    115   "alnum", "ascii", "blank", "cntrl", "digit", "graph",
    116   "print", "punct", "space", "word",  "xdigit" };
     143/* Table of special "verbs" like (*PRUNE). This is a short table, so it is
     144searched linearly. Put all the names into a single string, in order to reduce
     145the number of relocations when a shared library is dynamically linked. */
     146
     147typedef struct verbitem {
     148  int   len;
     149  int   op;
     150} verbitem;
     151
     152static const char verbnames[] =
     153  "ACCEPT\0"
     154  "COMMIT\0"
     155  "F\0"
     156  "FAIL\0"
     157  "PRUNE\0"
     158  "SKIP\0"
     159  "THEN";
     160
     161static verbitem verbs[] = {
     162  { 6, OP_ACCEPT },
     163  { 6, OP_COMMIT },
     164  { 1, OP_FAIL },
     165  { 4, OP_FAIL },
     166  { 5, OP_PRUNE },
     167  { 4, OP_SKIP  },
     168  { 4, OP_THEN  }
     169};
     170
     171static int verbcount = sizeof(verbs)/sizeof(verbitem);
     172
     173
     174/* Tables of names of POSIX character classes and their lengths. The names are
     175now all in a single string, to reduce the number of relocations when a shared
     176library is dynamically loaded. The list of lengths is terminated by a zero
     177length entry. The first three must be alpha, lower, upper, as this is assumed
     178for handling case independence. */
     179
     180static const char posix_names[] =
     181  "alpha\0"  "lower\0"  "upper\0"  "alnum\0"  "ascii\0"  "blank\0"
     182  "cntrl\0"  "digit\0"  "graph\0"  "print\0"  "punct\0"  "space\0"
     183  "word\0"   "xdigit";
    117184
    118185static const uschar posix_name_lengths[] = {
    119186  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
    120187
    121 /* Table of class bit maps for each POSIX class; up to three may be combined
    122 to form the class. The table for [:blank:] is dynamically modified to remove
    123 the vertical space characters. */
     188/* Table of class bit maps for each POSIX class. Each class is formed from a
     189base map, with an optional addition or removal of another map. Then, for some
     190classes, there is some additional tweaking: for [:blank:] the vertical space
     191characters are removed, and for [:alpha:] and [:alnum:] the underscore
     192character is removed. The triples in the table consist of the base map offset,
     193second map offset or -1 if no second map, and a non-negative value for map
     194addition or a negative value for map subtraction (if there are two maps). The
     195absolute value of the third field has these meanings: 0 => no tweaking, 1 =>
     196remove vertical space characters, 2 => remove underscore. */
    124197
    125198static const int posix_class_maps[] = {
    126   cbit_lower, cbit_upper, -1,             /* alpha */
    127   cbit_lower, -1,         -1,             /* lower */
    128   cbit_upper, -1,         -1,             /* upper */
    129   cbit_digit, cbit_lower, cbit_upper,     /* alnum */
    130   cbit_print, cbit_cntrl, -1,             /* ascii */
    131   cbit_space, -1,         -1,             /* blank - a GNU extension */
    132   cbit_cntrl, -1,         -1,             /* cntrl */
    133   cbit_digit, -1,         -1,             /* digit */
    134   cbit_graph, -1,         -1,             /* graph */
    135   cbit_print, -1,         -1,             /* print */
    136   cbit_punct, -1,         -1,             /* punct */
    137   cbit_space, -1,         -1,             /* space */
    138   cbit_word,  -1,         -1,             /* word - a Perl extension */
    139   cbit_xdigit,-1,         -1              /* xdigit */
     199  cbit_word,  cbit_digit, -2,             /* alpha */
     200  cbit_lower, -1,          0,             /* lower */
     201  cbit_upper, -1,          0,             /* upper */
     202  cbit_word,  -1,          2,             /* alnum - word without underscore */
     203  cbit_print, cbit_cntrl,  0,             /* ascii */
     204  cbit_space, -1,          1,             /* blank - a GNU extension */
     205  cbit_cntrl, -1,          0,             /* cntrl */
     206  cbit_digit, -1,          0,             /* digit */
     207  cbit_graph, -1,          0,             /* graph */
     208  cbit_print, -1,          0,             /* print */
     209  cbit_punct, -1,          0,             /* punct */
     210  cbit_space, -1,          0,             /* space */
     211  cbit_word,  -1,          0,             /* word - a Perl extension */
     212  cbit_xdigit,-1,          0              /* xdigit */
    140213};
    141214
    142215
     216#define STRING(a)  # a
     217#define XSTRING(s) STRING(s)
     218
    143219/* The texts of compile-time error messages. These are "char *" because they
    144 are passed to the outside world. */
    145 
    146 static const char *error_texts[] = {
    147   "no error",
    148   "\\ at end of pattern",
    149   "\\c at end of pattern",
    150   "unrecognized character follows \\",
    151   "numbers out of order in {} quantifier",
     220are passed to the outside world. Do not ever re-use any error number, because
     221they are documented. Always add a new error instead. Messages marked DEAD below
     222are no longer used. This used to be a table of strings, but in order to reduce
     223the number of relocations needed when a shared library is loaded dynamically,
     224it is now one long string. We cannot use a table of offsets, because the
     225lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we
     226simply count through to the one we want - this isn't a performance issue
     227because these strings are used only when there is a compilation error. */
     228
     229static const char error_texts[] =
     230  "no error\0"
     231  "\\ at end of pattern\0"
     232  "\\c at end of pattern\0"
     233  "unrecognized character follows \\\0"
     234  "numbers out of order in {} quantifier\0"
    152235  /* 5 */
    153   "number too big in {} quantifier",
    154   "missing terminating ] for character class",
    155   "invalid escape sequence in character class",
    156   "range out of order in character class",
    157   "nothing to repeat",
     236  "number too big in {} quantifier\0"
     237  "missing terminating ] for character class\0"
     238  "invalid escape sequence in character class\0"
     239  "range out of order in character class\0"
     240  "nothing to repeat\0"
    158241  /* 10 */
    159   "operand of unlimited repeat could match the empty string",
    160   "internal error: unexpected repeat",
    161   "unrecognized character after (?",
    162   "POSIX named classes are supported only within a class",
    163   "missing )",
     242  "operand of unlimited repeat could match the empty string\0"  /** DEAD **/
     243  "internal error: unexpected repeat\0"
     244  "unrecognized character after (?\0"
     245  "POSIX named classes are supported only within a class\0"
     246  "missing )\0"
    164247  /* 15 */
    165   "reference to non-existent subpattern",
    166   "erroffset passed as NULL",
    167   "unknown option bit(s) set",
    168   "missing ) after comment",
    169   "parentheses nested too deeply",
     248  "reference to non-existent subpattern\0"
     249  "erroffset passed as NULL\0"
     250  "unknown option bit(s) set\0"
     251  "missing ) after comment\0"
     252  "parentheses nested too deeply\0"  /** DEAD **/
    170253  /* 20 */
    171   "regular expression too large",
    172   "failed to get memory",
    173   "unmatched parentheses",
    174   "internal error: code overflow",
    175   "unrecognized character after (?<",
     254  "regular expression is too large\0"
     255  "failed to get memory\0"
     256  "unmatched parentheses\0"
     257  "internal error: code overflow\0"
     258  "unrecognized character after (?<\0"
    176259  /* 25 */
    177   "lookbehind assertion is not fixed length",
    178   "malformed number after (?(",
    179   "conditional group contains more than two branches",
    180   "assertion expected after (?(",
    181   "(?R or (?digits must be followed by )",
     260  "lookbehind assertion is not fixed length\0"
     261  "malformed number or name after (?(\0"
     262  "conditional group contains more than two branches\0"
     263  "assertion expected after (?(\0"
     264  "(?R or (?[+-]digits must be followed by )\0"
    182265  /* 30 */
    183   "unknown POSIX class name",
    184   "POSIX collating elements are not supported",
    185   "this version of PCRE is not compiled with PCRE_UTF8 support",
    186   "spare error",
    187   "character value in \\x{...} sequence is too large",
     266  "unknown POSIX class name\0"
     267  "POSIX collating elements are not supported\0"
     268  "this version of PCRE is not compiled with PCRE_UTF8 support\0"
     269  "spare error\0"  /** DEAD **/
     270  "character value in \\x{...} sequence is too large\0"
    188271  /* 35 */
    189   "invalid condition (?(0)",
    190   "\\C not allowed in lookbehind assertion",
    191   "PCRE does not support \\L, \\l, \\N, \\U, or \\u",
    192   "number after (?C is > 255",
    193   "closing ) for (?C expected",
     272  "invalid condition (?(0)\0"
     273  "\\C not allowed in lookbehind assertion\0"
     274  "PCRE does not support \\L, \\l, \\N, \\U, or \\u\0"
     275  "number after (?C is > 255\0"
     276  "closing ) for (?C expected\0"
    194277  /* 40 */
    195   "recursive call could loop indefinitely",
    196   "unrecognized character after (?P",
    197   "syntax error after (?P",
    198   "two named groups have the same name",
    199   "invalid UTF-8 string",
     278  "recursive call could loop indefinitely\0"
     279  "unrecognized character after (?P\0"
     280  "syntax error in subpattern name (missing terminator)\0"
     281  "two named subpatterns have the same name\0"
     282  "invalid UTF-8 string\0"
    200283  /* 45 */
    201   "support for \\P, \\p, and \\X has not been compiled",
    202   "malformed \\P or \\p sequence",
    203   "unknown property name after \\P or \\p"
    204 };
     284  "support for \\P, \\p, and \\X has not been compiled\0"
     285  "malformed \\P or \\p sequence\0"
     286  "unknown property name after \\P or \\p\0"
     287  "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0"
     288  "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
     289  /* 50 */
     290  "repeated subpattern is too long\0"    /** DEAD **/
     291  "octal value is greater than \\377 (not in UTF-8 mode)\0"
     292  "internal error: overran compiling workspace\0"
     293  "internal error: previously-checked referenced subpattern not found\0"
     294  "DEFINE group contains more than one branch\0"
     295  /* 55 */
     296  "repeating a DEFINE group is not allowed\0"
     297  "inconsistent NEWLINE options\0"
     298  "\\g is not followed by a braced name or an optionally braced non-zero number\0"
     299  "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number\0"
     300  "(*VERB) with an argument is not supported\0"
     301  /* 60 */
     302  "(*VERB) not recognized\0"
     303  "number is too big";
    205304
    206305
     
    221320Then we can use ctype_digit and ctype_xdigit in the code. */
    222321
    223 #if !EBCDIC    /* This is the "normal" case, for ASCII systems */
     322#ifndef EBCDIC  /* This is the "normal" case, for ASCII systems */
    224323static const unsigned char digitab[] =
    225324  {
     
    257356  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
    258357
    259 #else          /* This is the "abnormal" case, for EBCDIC systems */
     358#else           /* This is the "abnormal" case, for EBCDIC systems */
    260359static const unsigned char digitab[] =
    261360  {
     
    271370  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  72- |     */
    272371  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 50 */
    273   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  88- ¬     */
     372  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  88- 95    */
    274373  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 60 */
    275374  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ?     */
     
    305404  0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /*  72- |  */
    306405  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 */
    307   0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /*  88- ¬ */
     406  0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /*  88- 95 */
    308407  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 */
    309408  0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ?  */
     
    332431
    333432static BOOL
    334   compile_regex(int, int, int *, uschar **, const uschar **, int *, BOOL, int,
    335     int *, int *, branch_chain *, compile_data *);
    336 
     433  compile_regex(int, int, uschar **, const uschar **, int *, BOOL, BOOL, int,
     434    int *, int *, branch_chain *, compile_data *, int *);
     435
     436
     437
     438/*************************************************
     439*            Find an error text                  *
     440*************************************************/
     441
     442/* The error texts are now all in one long string, to save on relocations. As
     443some of the text is of unknown length, we can't use a table of offsets.
     444Instead, just count through the strings. This is not a performance issue
     445because it happens only when there has been a compilation error.
     446
     447Argument:   the error number
     448Returns:    pointer to the error string
     449*/
     450
     451static const char *
     452find_error_text(int n)
     453{
     454const char *s = error_texts;
     455for (; n > 0; n--) while (*s++ != 0);
     456return s;
     457}
    337458
    338459
     
    343464/* This function is called when a \ has been encountered. It either returns a
    344465positive value for a simple escape such as \n, or a negative value which
    345 encodes one of the more complicated things such as \d. When UTF-8 is enabled,
    346 a positive value greater than 255 may be returned. On entry, ptr is pointing at
    347 the \. On exit, it is on the final character of the escape sequence.
     466encodes one of the more complicated things such as \d. A backreference to group
     467n is returned as -(ESC_REF + n); ESC_REF is the highest ESC_xxx macro. When
     468UTF-8 is enabled, a positive value greater than 255 may be returned. On entry,
     469ptr is pointing at the \. On exit, it is on the final character of the escape
     470sequence.
    348471
    349472Arguments:
     
    356479Returns:         zero or positive => a data character
    357480                 negative => a special escape sequence
    358                  on error, errorptr is set
     481                 on error, errorcodeptr is set
    359482*/
    360483
     
    363486  int options, BOOL isclass)
    364487{
    365 const uschar *ptr = *ptrptr;
     488BOOL utf8 = (options & PCRE_UTF8) != 0;
     489const uschar *ptr = *ptrptr + 1;
    366490int c, i;
    367491
     492GETCHARINCTEST(c, ptr);           /* Get character value, increment pointer */
     493ptr--;                            /* Set pointer back to the last byte */
     494
    368495/* If backslash is at the end of the pattern, it's an error. */
    369496
    370 c = *(++ptr);
    371497if (c == 0) *errorcodeptr = ERR1;
    372498
     
    375501Otherwise further processing may be required. */
    376502
    377 #if !EBCDIC    /* ASCII coding */
     503#ifndef EBCDIC  /* ASCII coding */
    378504else if (c < '0' || c > 'z') {}                           /* Not alphameric */
    379505else if ((i = escapes[c - '0']) != 0) c = i;
    380506
    381 #else          /* EBCDIC coding */
     507#else           /* EBCDIC coding */
    382508else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphameric */
    383509else if ((i = escapes[c - 0x48]) != 0)  c = i;
     
    389515  {
    390516  const uschar *oldptr;
     517  BOOL braced, negated;
     518
    391519  switch (c)
    392520    {
     
    402530    break;
    403531
     532    /* \g must be followed by a number, either plain or braced. If positive, it
     533    is an absolute backreference. If negative, it is a relative backreference.
     534    This is a Perl 5.10 feature. Perl 5.10 also supports \g{name} as a
     535    reference to a named group. This is part of Perl's movement towards a
     536    unified syntax for back references. As this is synonymous with \k{name}, we
     537    fudge it up by pretending it really was \k. */
     538
     539    case 'g':
     540    if (ptr[1] == '{')
     541      {
     542      const uschar *p;
     543      for (p = ptr+2; *p != 0 && *p != '}'; p++)
     544        if (*p != '-' && (digitab[*p] & ctype_digit) == 0) break;
     545      if (*p != 0 && *p != '}')
     546        {
     547        c = -ESC_k;
     548        break;
     549        }
     550      braced = TRUE;
     551      ptr++;
     552      }
     553    else braced = FALSE;
     554
     555    if (ptr[1] == '-')
     556      {
     557      negated = TRUE;
     558      ptr++;
     559      }
     560    else negated = FALSE;
     561
     562    c = 0;
     563    while ((digitab[ptr[1]] & ctype_digit) != 0)
     564      c = c * 10 + *(++ptr) - '0';
     565
     566    if (c < 0)
     567      {
     568      *errorcodeptr = ERR61;
     569      break;
     570      }
     571
     572    if (c == 0 || (braced && *(++ptr) != '}'))
     573      {
     574      *errorcodeptr = ERR57;
     575      break;
     576      }
     577
     578    if (negated)
     579      {
     580      if (c > bracount)
     581        {
     582        *errorcodeptr = ERR15;
     583        break;
     584        }
     585      c = bracount - (c - 1);
     586      }
     587
     588    c = -(ESC_REF + c);
     589    break;
     590
    404591    /* The handling of escape sequences consisting of a string of digits
    405592    starting with one that is not zero is not straightforward. By experiment,
     
    423610      while ((digitab[ptr[1]] & ctype_digit) != 0)
    424611        c = c * 10 + *(++ptr) - '0';
     612      if (c < 0)
     613        {
     614        *errorcodeptr = ERR61;
     615        break;
     616        }
    425617      if (c < 10 || c <= bracount)
    426618        {
     
    443635
    444636    /* \0 always starts an octal number, but we may drop through to here with a
    445     larger first octal digit. */
     637    larger first octal digit. The original code used just to take the least
     638    significant 8 bits of octal numbers (I think this is what early Perls used
     639    to do). Nowadays we allow for larger numbers in UTF-8 mode, but no more
     640    than 3 octal digits. */
    446641
    447642    case '0':
     
    449644    while(i++ < 2 && ptr[1] >= '0' && ptr[1] <= '7')
    450645        c = c * 8 + *(++ptr) - '0';
    451     c &= 255;     /* Take least significant 8 bits */
     646    if (!utf8 && c > 255) *errorcodeptr = ERR51;
    452647    break;
    453648
    454     /* \x is complicated when UTF-8 is enabled. \x{ddd} is a character number
    455     which can be greater than 0xff, but only if the ddd are hex digits. */
     649    /* \x is complicated. \x{ddd} is a character number which can be greater
     650    than 0xff in utf8 mode, but only if the ddd are hex digits. If not, { is
     651    treated as a data character. */
    456652
    457653    case 'x':
    458 #ifdef SUPPORT_UTF8
    459     if (ptr[1] == '{' && (options & PCRE_UTF8) != 0)
     654    if (ptr[1] == '{')
    460655      {
    461656      const uschar *pt = ptr + 2;
    462       register int count = 0;
     657      int count = 0;
     658
    463659      c = 0;
    464660      while ((digitab[*pt] & ctype_xdigit) != 0)
    465661        {
    466         int cc = *pt++;
     662        register int cc = *pt++;
     663        if (c == 0 && cc == '0') continue;     /* Leading zeroes */
    467664        count++;
    468 #if !EBCDIC    /* ASCII coding */
     665
     666#ifndef EBCDIC  /* ASCII coding */
    469667        if (cc >= 'a') cc -= 32;               /* Convert to upper case */
    470         c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));
    471 #else          /* EBCDIC coding */
     668        c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));
     669#else           /* EBCDIC coding */
    472670        if (cc >= 'a' && cc <= 'z') cc += 64;  /* Convert to upper case */
    473         c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));
     671        c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));
    474672#endif
    475673        }
     674
    476675      if (*pt == '}')
    477676        {
    478         if (c < 0 || count > 8) *errorcodeptr = ERR34;
     677        if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34;
    479678        ptr = pt;
    480679        break;
    481680        }
     681
    482682      /* If the sequence of hex digits does not end with '}', then we don't
    483683      recognize this construct; fall through to the normal \x handling. */
    484684      }
    485 #endif
    486 
    487     /* Read just a single hex char */
     685
     686    /* Read just a single-byte hex-defined char */
    488687
    489688    c = 0;
     
    492691      int cc;                               /* Some compilers don't like ++ */
    493692      cc = *(++ptr);                        /* in initializers */
    494 #if !EBCDIC    /* ASCII coding */
     693#ifndef EBCDIC  /* ASCII coding */
    495694      if (cc >= 'a') cc -= 32;              /* Convert to upper case */
    496695      c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));
    497 #else          /* EBCDIC coding */
     696#else           /* EBCDIC coding */
    498697      if (cc <= 'z') cc += 64;              /* Convert to upper case */
    499698      c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));
     
    502701    break;
    503702
    504     /* Other special escapes not starting with a digit are straightforward */
     703    /* For \c, a following letter is upper-cased; then the 0x40 bit is flipped.
     704    This coding is ASCII-specific, but then the whole concept of \cx is
     705    ASCII-specific. (However, an EBCDIC equivalent has now been added.) */
    505706
    506707    case 'c':
     
    509710      {
    510711      *errorcodeptr = ERR2;
    511       return 0;
    512       }
    513 
    514     /* A letter is upper-cased; then the 0x40 bit is flipped. This coding
    515     is ASCII-specific, but then the whole concept of \cx is ASCII-specific.
    516     (However, an EBCDIC equivalent has now been added.) */
    517 
    518 #if !EBCDIC    /* ASCII coding */
     712      break;
     713      }
     714
     715#ifndef EBCDIC  /* ASCII coding */
    519716    if (c >= 'a' && c <= 'z') c -= 32;
    520717    c ^= 0x40;
    521 #else          /* EBCDIC coding */
     718#else           /* EBCDIC coding */
    522719    if (c >= 'a' && c <= 'z') c += 64;
    523720    c ^= 0xC0;
     
    561758  ptrptr         points to the pattern position pointer
    562759  negptr         points to a boolean that is set TRUE for negation else FALSE
     760  dptr           points to an int that is set to the detailed property value
    563761  errorcodeptr   points to the error code variable
    564762
    565 Returns:     value from ucp_type_table, or -1 for an invalid type
     763Returns:         type value from ucp_type_table, or -1 for an invalid type
    566764*/
    567765
    568766static int
    569 get_ucp(const uschar **ptrptr, BOOL *negptr, int *errorcodeptr)
     767get_ucp(const uschar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)
    570768{
    571769int c, i, bot, top;
    572770const uschar *ptr = *ptrptr;
    573 char name[4];
     771char name[32];
    574772
    575773c = *(++ptr);
     
    578776*negptr = FALSE;
    579777
    580 /* \P or \p can be followed by a one- or two-character name in {}, optionally
    581 preceded by ^ for negation. */
     778/* \P or \p can be followed by a name in {}, optionally preceded by ^ for
     779negation. */
    582780
    583781if (c == '{')
     
    588786    ptr++;
    589787    }
    590   for (i = 0; i <= 2; i++)
     788  for (i = 0; i < (int)sizeof(name) - 1; i++)
    591789    {
    592790    c = *(++ptr);
     
    595793    name[i] = c;
    596794    }
    597   if (c !='}')   /* Try to distinguish error cases */
    598     {
    599     while (*(++ptr) != 0 && *ptr != '}');
    600     if (*ptr == '}') goto UNKNOWN_RETURN; else goto ERROR_RETURN;
    601     }
     795  if (c !='}') goto ERROR_RETURN;
    602796  name[i] = 0;
    603797  }
     
    620814while (bot < top)
    621815  {
    622   i = (bot + top)/2;
    623   c = strcmp(name, _pcre_utt[i].name);
    624   if (c == 0) return _pcre_utt[i].value;
     816  i = (bot + top) >> 1;
     817  c = strcmp(name, _pcre_utt_names + _pcre_utt[i].name_offset);
     818  if (c == 0)
     819    {
     820    *dptr = _pcre_utt[i].value;
     821    return _pcre_utt[i].type;
     822    }
    625823  if (c > 0) bot = i + 1; else top = i;
    626824  }
    627825
    628 UNKNOWN_RETURN:
    629826*errorcodeptr = ERR47;
    630827*ptrptr = ptr;
     
    737934*maxp = max;
    738935return p;
     936}
     937
     938
     939
     940/*************************************************
     941*       Find forward referenced subpattern       *
     942*************************************************/
     943
     944/* This function scans along a pattern's text looking for capturing
     945subpatterns, and counting them. If it finds a named pattern that matches the
     946name it is given, it returns its number. Alternatively, if the name is NULL, it
     947returns when it reaches a given numbered subpattern. This is used for forward
     948references to subpatterns. We know that if (?P< is encountered, the name will
     949be terminated by '>' because that is checked in the first pass.
     950
     951Arguments:
     952  ptr          current position in the pattern
     953  count        current count of capturing parens so far encountered
     954  name         name to seek, or NULL if seeking a numbered subpattern
     955  lorn         name length, or subpattern number if name is NULL
     956  xmode        TRUE if we are in /x mode
     957
     958Returns:       the number of the named subpattern, or -1 if not found
     959*/
     960
     961static int
     962find_parens(const uschar *ptr, int count, const uschar *name, int lorn,
     963  BOOL xmode)
     964{
     965const uschar *thisname;
     966
     967for (; *ptr != 0; ptr++)
     968  {
     969  int term;
     970
     971  /* Skip over backslashed characters and also entire \Q...\E */
     972
     973  if (*ptr == '\\')
     974    {
     975    if (*(++ptr) == 0) return -1;
     976    if (*ptr == 'Q') for (;;)
     977      {
     978      while (*(++ptr) != 0 && *ptr != '\\');
     979      if (*ptr == 0) return -1;
     980      if (*(++ptr) == 'E') break;
     981      }
     982    continue;
     983    }
     984
     985  /* Skip over character classes */
     986
     987  if (*ptr == '[')
     988    {
     989    while (*(++ptr) != ']')
     990      {
     991      if (*ptr == 0) return -1;
     992      if (*ptr == '\\')
     993        {
     994        if (*(++ptr) == 0) return -1;
     995        if (*ptr == 'Q') for (;;)
     996          {
     997          while (*(++ptr) != 0 && *ptr != '\\');
     998          if (*ptr == 0) return -1;
     999          if (*(++ptr) == 'E') break;
     1000          }
     1001        continue;
     1002        }
     1003      }
     1004    continue;
     1005    }
     1006
     1007  /* Skip comments in /x mode */
     1008
     1009  if (xmode && *ptr == '#')
     1010    {
     1011    while (*(++ptr) != 0 && *ptr != '\n');
     1012    if (*ptr == 0) return -1;
     1013    continue;
     1014    }
     1015
     1016  /* An opening parens must now be a real metacharacter */
     1017
     1018  if (*ptr != '(') continue;
     1019  if (ptr[1] != '?' && ptr[1] != '*')
     1020    {
     1021    count++;
     1022    if (name == NULL && count == lorn) return count;
     1023    continue;
     1024    }
     1025
     1026  ptr += 2;
     1027  if (*ptr == 'P') ptr++;                      /* Allow optional P */
     1028
     1029  /* We have to disambiguate (?<! and (?<= from (?<name> */
     1030
     1031  if ((*ptr != '<' || ptr[1] == '!' || ptr[1] == '=') &&
     1032       *ptr != '\'')
     1033    continue;
     1034
     1035  count++;
     1036
     1037  if (name == NULL && count == lorn) return count;
     1038  term = *ptr++;
     1039  if (term == '<') term = '>';
     1040  thisname = ptr;
     1041  while (*ptr != term) ptr++;
     1042  if (name != NULL && lorn == ptr - thisname &&
     1043      strncmp((const char *)name, (const char *)thisname, lorn) == 0)
     1044    return count;
     1045  }
     1046
     1047return -1;
    7391048}
    7401049
     
    7901099    case OP_CALLOUT:
    7911100    case OP_CREF:
    792     case OP_BRANUMBER:
     1101    case OP_RREF:
     1102    case OP_DEF:
    7931103    code += _pcre_OP_lengths[*code];
    7941104    break;
     
    8351145  int d;
    8361146  register int op = *cc;
    837   if (op >= OP_BRA) op = OP_BRA;
    838 
    8391147  switch (op)
    8401148    {
     1149    case OP_CBRA:
    8411150    case OP_BRA:
    8421151    case OP_ONCE:
    8431152    case OP_COND:
    844     d = find_fixedlength(cc, options);
     1153    d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), options);
    8451154    if (d < 0) return d;
    8461155    branchlength += d;
     
    8771186
    8781187    case OP_REVERSE:
    879     case OP_BRANUMBER:
    8801188    case OP_CREF:
     1189    case OP_RREF:
     1190    case OP_DEF:
    8811191    case OP_OPT:
    8821192    case OP_CALLOUT:
     
    8961206    case OP_CHAR:
    8971207    case OP_CHARNC:
     1208    case OP_NOT:
    8981209    branchlength++;
    8991210    cc += 2;
     
    9221233    case OP_TYPEEXACT:
    9231234    branchlength += GET2(cc,1);
     1235    if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2;
    9241236    cc += 4;
    9251237    break;
     
    9291241    case OP_PROP:
    9301242    case OP_NOTPROP:
    931     cc++;
     1243    cc += 2;
    9321244    /* Fall through */
    9331245
     
    10101322find_bracket(const uschar *code, BOOL utf8, int number)
    10111323{
    1012 #ifndef SUPPORT_UTF8
    1013 utf8 = utf8;               /* Stop pedantic compilers complaining */
    1014 #endif
    1015 
    10161324for (;;)
    10171325  {
    10181326  register int c = *code;
    10191327  if (c == OP_END) return NULL;
    1020   else if (c > OP_BRA)
     1328
     1329  /* XCLASS is used for classes that cannot be represented just by a bit
     1330  map. This includes negated single high-valued characters. The length in
     1331  the table is zero; the actual length is stored in the compiled code. */
     1332
     1333  if (c == OP_XCLASS) code += GET(code, 1);
     1334
     1335  /* Handle capturing bracket */
     1336
     1337  else if (c == OP_CBRA)
    10211338    {
    1022     int n = c - OP_BRA;
    1023     if (n > EXTRACT_BASIC_MAX) n = GET2(code, 2+LINK_SIZE);
     1339    int n = GET2(code, 1+LINK_SIZE);
    10241340    if (n == number) return (uschar *)code;
    1025     code += _pcre_OP_lengths[OP_BRA];
     1341    code += _pcre_OP_lengths[c];
    10261342    }
     1343
     1344  /* Otherwise, we can get the item's length from the table, except that for
     1345  repeated character types, we have to test for \p and \P, which have an extra
     1346  two bytes of parameters. */
     1347
    10271348  else
    10281349    {
     1350    switch(c)
     1351      {
     1352      case OP_TYPESTAR:
     1353      case OP_TYPEMINSTAR:
     1354      case OP_TYPEPLUS:
     1355      case OP_TYPEMINPLUS:
     1356      case OP_TYPEQUERY:
     1357      case OP_TYPEMINQUERY:
     1358      case OP_TYPEPOSSTAR:
     1359      case OP_TYPEPOSPLUS:
     1360      case OP_TYPEPOSQUERY:
     1361      if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
     1362      break;
     1363
     1364      case OP_TYPEUPTO:
     1365      case OP_TYPEMINUPTO:
     1366      case OP_TYPEEXACT:
     1367      case OP_TYPEPOSUPTO:
     1368      if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
     1369      break;
     1370      }
     1371
     1372    /* Add in the fixed length from the table */
     1373
    10291374    code += _pcre_OP_lengths[c];
    10301375
     1376  /* In UTF-8 mode, opcodes that are followed by a character may be followed by
     1377  a multi-byte character. The length in the table is a minimum, so we have to
     1378  arrange to skip the extra bytes. */
     1379
    10311380#ifdef SUPPORT_UTF8
    1032 
    1033     /* In UTF-8 mode, opcodes that are followed by a character may be followed
    1034     by a multi-byte character. The length in the table is a minimum, so we have
    1035     to scan along to skip the extra bytes. All opcodes are less than 128, so we
    1036     can use relatively efficient code. */
    1037 
    10381381    if (utf8) switch(c)
    10391382      {
     
    10431386      case OP_UPTO:
    10441387      case OP_MINUPTO:
     1388      case OP_POSUPTO:
    10451389      case OP_STAR:
    10461390      case OP_MINSTAR:
     1391      case OP_POSSTAR:
    10471392      case OP_PLUS:
    10481393      case OP_MINPLUS:
     1394      case OP_POSPLUS:
    10491395      case OP_QUERY:
    10501396      case OP_MINQUERY:
    1051       while ((*code & 0xc0) == 0x80) code++;
    1052       break;
    1053 
    1054       /* XCLASS is used for classes that cannot be represented just by a bit
    1055       map. This includes negated single high-valued characters. The length in
    1056       the table is zero; the actual length is stored in the compiled code. */
    1057 
    1058       case OP_XCLASS:
    1059       code += GET(code, 1) + 1;
     1397      case OP_POSQUERY:
     1398      if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
    10601399      break;
    10611400      }
     
    10841423find_recurse(const uschar *code, BOOL utf8)
    10851424{
    1086 #ifndef SUPPORT_UTF8
    1087 utf8 = utf8;               /* Stop pedantic compilers complaining */
    1088 #endif
    1089 
    10901425for (;;)
    10911426  {
    10921427  register int c = *code;
    10931428  if (c == OP_END) return NULL;
    1094   else if (c == OP_RECURSE) return code;
    1095   else if (c > OP_BRA)
    1096     {
    1097     code += _pcre_OP_lengths[OP_BRA];
    1098     }
     1429  if (c == OP_RECURSE) return code;
     1430
     1431  /* XCLASS is used for classes that cannot be represented just by a bit
     1432  map. This includes negated single high-valued characters. The length in
     1433  the table is zero; the actual length is stored in the compiled code. */
     1434
     1435  if (c == OP_XCLASS) code += GET(code, 1);
     1436
     1437  /* Otherwise, we can get the item's length from the table, except that for
     1438  repeated character types, we have to test for \p and \P, which have an extra
     1439  two bytes of parameters. */
     1440
    10991441  else
    11001442    {
     1443    switch(c)
     1444      {
     1445      case OP_TYPESTAR:
     1446      case OP_TYPEMINSTAR:
     1447      case OP_TYPEPLUS:
     1448      case OP_TYPEMINPLUS:
     1449      case OP_TYPEQUERY:
     1450      case OP_TYPEMINQUERY:
     1451      case OP_TYPEPOSSTAR:
     1452      case OP_TYPEPOSPLUS:
     1453      case OP_TYPEPOSQUERY:
     1454      if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
     1455      break;
     1456
     1457      case OP_TYPEPOSUPTO:
     1458      case OP_TYPEUPTO:
     1459      case OP_TYPEMINUPTO:
     1460      case OP_TYPEEXACT:
     1461      if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
     1462      break;
     1463      }
     1464
     1465    /* Add in the fixed length from the table */
     1466
    11011467    code += _pcre_OP_lengths[c];
    1102 
    1103 #ifdef SUPPORT_UTF8
    11041468
    11051469    /* In UTF-8 mode, opcodes that are followed by a character may be followed
    11061470    by a multi-byte character. The length in the table is a minimum, so we have
    1107     to scan along to skip the extra bytes. All opcodes are less than 128, so we
    1108     can use relatively efficient code. */
    1109 
     1471    to arrange to skip the extra bytes. */
     1472
     1473#ifdef SUPPORT_UTF8
    11101474    if (utf8) switch(c)
    11111475      {
     
    11151479      case OP_UPTO:
    11161480      case OP_MINUPTO:
     1481      case OP_POSUPTO:
    11171482      case OP_STAR:
    11181483      case OP_MINSTAR:
     1484      case OP_POSSTAR:
    11191485      case OP_PLUS:
    11201486      case OP_MINPLUS:
     1487      case OP_POSPLUS:
    11211488      case OP_QUERY:
    11221489      case OP_MINQUERY:
    1123       while ((*code & 0xc0) == 0x80) code++;
    1124       break;
    1125 
    1126       /* XCLASS is used for classes that cannot be represented just by a bit
    1127       map. This includes negated single high-valued characters. The length in
    1128       the table is zero; the actual length is stored in the compiled code. */
    1129 
    1130       case OP_XCLASS:
    1131       code += GET(code, 1) + 1;
     1490      case OP_POSQUERY:
     1491      if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
    11321492      break;
    11331493      }
     
    11441504
    11451505/* This function scans through a branch of a compiled pattern to see whether it
    1146 can match the empty string or not. It is called only from could_be_empty()
    1147 below. Note that first_significant_code() skips over assertions. If we hit an
    1148 unclosed bracket, we return "empty" - this means we've struck an inner bracket
    1149 whose current branch will already have been scanned.
     1506can match the empty string or not. It is called from could_be_empty()
     1507below and from compile_branch() when checking for an unlimited repeat of a
     1508group that can match nothing. Note that first_significant_code() skips over
     1509assertions. If we hit an unclosed bracket, we return "empty" - this means we've
     1510struck an inner bracket whose current branch will already have been scanned.
    11501511
    11511512Arguments:
     
    11611522{
    11621523register int c;
    1163 for (code = first_significant_code(code + 1 + LINK_SIZE, NULL, 0, TRUE);
     1524for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE);
    11641525     code < endcode;
    11651526     code = first_significant_code(code + _pcre_OP_lengths[c], NULL, 0, TRUE))
     
    11691530  c = *code;
    11701531
    1171   if (c >= OP_BRA)
     1532  /* Groups with zero repeats can of course be empty; skip them. */
     1533
     1534  if (c == OP_BRAZERO || c == OP_BRAMINZERO)
     1535    {
     1536    code += _pcre_OP_lengths[c];
     1537    do code += GET(code, 1); while (*code == OP_ALT);
     1538    c = *code;
     1539    continue;
     1540    }
     1541
     1542  /* For other groups, scan the branches. */
     1543
     1544  if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE || c == OP_COND)
    11721545    {
    11731546    BOOL empty_branch;
     
    11851558    while (*code == OP_ALT);
    11861559    if (!empty_branch) return FALSE;   /* All branches are non-empty */
    1187     code += 1 + LINK_SIZE;
    11881560    c = *code;
     1561    continue;
    11891562    }
    11901563
    1191   else switch (c)
     1564  /* Handle the other opcodes */
     1565
     1566  switch (c)
    11921567    {
    1193     /* Check for quantifiers after a class */
     1568    /* Check for quantifiers after a class. XCLASS is used for classes that
     1569    cannot be represented just by a bit map. This includes negated single
     1570    high-valued characters. The length in _pcre_OP_lengths[] is zero; the
     1571    actual length is stored in the compiled code, so we must update "code"
     1572    here. */
    11941573
    11951574#ifdef SUPPORT_UTF8
    11961575    case OP_XCLASS:
    1197     ccode = code + GET(code, 1);
     1576    ccode = code += GET(code, 1);
    11981577    goto CHECK_CLASS_REPEAT;
    11991578#endif
     
    12451624    case OP_PLUS:
    12461625    case OP_MINPLUS:
     1626    case OP_POSPLUS:
    12471627    case OP_EXACT:
    12481628    case OP_NOTPLUS:
    12491629    case OP_NOTMINPLUS:
     1630    case OP_NOTPOSPLUS:
    12501631    case OP_NOTEXACT:
    12511632    case OP_TYPEPLUS:
    12521633    case OP_TYPEMINPLUS:
     1634    case OP_TYPEPOSPLUS:
    12531635    case OP_TYPEEXACT:
    12541636    return FALSE;
     1637
     1638    /* These are going to continue, as they may be empty, but we have to
     1639    fudge the length for the \p and \P cases. */
     1640
     1641    case OP_TYPESTAR:
     1642    case OP_TYPEMINSTAR:
     1643    case OP_TYPEPOSSTAR:
     1644    case OP_TYPEQUERY:
     1645    case OP_TYPEMINQUERY:
     1646    case OP_TYPEPOSQUERY:
     1647    if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
     1648    break;
     1649
     1650    /* Same for these */
     1651
     1652    case OP_TYPEUPTO:
     1653    case OP_TYPEMINUPTO:
     1654    case OP_TYPEPOSUPTO:
     1655    if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
     1656    break;
    12551657
    12561658    /* End of branch */
     
    12621664    return TRUE;
    12631665
    1264     /* In UTF-8 mode, STAR, MINSTAR, QUERY, MINQUERY, UPTO, and MINUPTO  may be
    1265     followed by a multibyte character */
     1666    /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,
     1667    MINUPTO, and POSUPTO may be followed by a multibyte character */
    12661668
    12671669#ifdef SUPPORT_UTF8
    12681670    case OP_STAR:
    12691671    case OP_MINSTAR:
     1672    case OP_POSSTAR:
    12701673    case OP_QUERY:
    12711674    case OP_MINQUERY:
     1675    case OP_POSQUERY:
    12721676    case OP_UPTO:
    12731677    case OP_MINUPTO:
     1678    case OP_POSUPTO:
    12741679    if (utf8) while ((code[2] & 0xc0) == 0x80) code++;
    12751680    break;
     
    13671772check_posix_name(const uschar *ptr, int len)
    13681773{
     1774const char *pn = posix_names;
    13691775register int yield = 0;
    13701776while (posix_name_lengths[yield] != 0)
    13711777  {
    13721778  if (len == posix_name_lengths[yield] &&
    1373     strncmp((const char *)ptr, posix_names[yield], len) == 0) return yield;
     1779    strncmp((const char *)ptr, pn, len) == 0) return yield;
     1780  pn += posix_name_lengths[yield] + 1;
    13741781  yield++;
    13751782  }
     
    13891796it, after it has been compiled. This means that any OP_RECURSE items within it
    13901797that refer to the group itself or any contained groups have to have their
    1391 offsets adjusted. That is the job of this function. Before it is called, the
    1392 partially compiled regex must be temporarily terminated with OP_END.
     1798offsets adjusted. That one of the jobs of this function. Before it is called,
     1799the partially compiled regex must be temporarily terminated with OP_END.
     1800
     1801This function has been extended with the possibility of forward references for
     1802recursions and subroutine calls. It must also check the list of such references
     1803for the group we are dealing with. If it finds that one of the recursions in
     1804the current group is on this list, it adjusts the offset in the list, not the
     1805value in the reference (which is a group number).
    13931806
    13941807Arguments:
     
    13971810  utf8       TRUE in UTF-8 mode
    13981811  cd         contains pointers to tables etc.
     1812  save_hwm   the hwm forward reference pointer at the start of the group
    13991813
    14001814Returns:     nothing
     
    14021816
    14031817static void
    1404 adjust_recurse(uschar *group, int adjust, BOOL utf8, compile_data *cd)
     1818adjust_recurse(uschar *group, int adjust, BOOL utf8, compile_data *cd,
     1819  uschar *save_hwm)
    14051820{
    14061821uschar *ptr = group;
     1822
    14071823while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL)
    14081824  {
    1409   int offset = GET(ptr, 1);
    1410   if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust);
     1825  int offset;
     1826  uschar *hc;
     1827
     1828  /* See if this recursion is on the forward reference list. If so, adjust the
     1829  reference. */
     1830
     1831  for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE)
     1832    {
     1833    offset = GET(hc, 0);
     1834    if (cd->start_code + offset == ptr + 1)
     1835      {
     1836      PUT(hc, 0, offset + adjust);
     1837      break;
     1838      }
     1839    }
     1840
     1841  /* Otherwise, adjust the recursion offset if it's after the start of this
     1842  group. */
     1843
     1844  if (hc >= cd->hwm)
     1845    {
     1846    offset = GET(ptr, 1);
     1847    if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust);
     1848    }
     1849
    14111850  ptr += 1 + LINK_SIZE;
    14121851  }
     
    14871926
    14881927static BOOL
    1489 get_othercase_range(int *cptr, int d, int *ocptr, int *odptr)
     1928get_othercase_range(unsigned int *cptr, unsigned int d, unsigned int *ocptr,
     1929  unsigned int *odptr)
    14901930{
    1491 int c, chartype, othercase, next;
     1931unsigned int c, othercase, next;
    14921932
    14931933for (c = *cptr; c <= d; c++)
    1494   {
    1495   if (_pcre_ucp_findchar(c, &chartype, &othercase) == ucp_L && othercase != 0)
    1496     break;
    1497   }
     1934  { if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR) break; }
    14981935
    14991936if (c > d) return FALSE;
     
    15041941for (++c; c <= d; c++)
    15051942  {
    1506   if (_pcre_ucp_findchar(c, &chartype, &othercase) != ucp_L ||
    1507         othercase != next)
    1508     break;
     1943  if (_pcre_ucp_othercase(c) != next) break;
    15091944  next++;
    15101945  }
     
    15181953
    15191954
     1955
     1956/*************************************************
     1957*     Check if auto-possessifying is possible    *
     1958*************************************************/
     1959
     1960/* This function is called for unlimited repeats of certain items, to see
     1961whether the next thing could possibly match the repeated item. If not, it makes
     1962sense to automatically possessify the repeated item.
     1963
     1964Arguments:
     1965  op_code       the repeated op code
     1966  this          data for this item, depends on the opcode
     1967  utf8          TRUE in UTF-8 mode
     1968  utf8_char     used for utf8 character bytes, NULL if not relevant
     1969  ptr           next character in pattern
     1970  options       options bits
     1971  cd            contains pointers to tables etc.
     1972
     1973Returns:        TRUE if possessifying is wanted
     1974*/
     1975
     1976static BOOL
     1977check_auto_possessive(int op_code, int item, BOOL utf8, uschar *utf8_char,
     1978  const uschar *ptr, int options, compile_data *cd)
     1979{
     1980int next;
     1981
     1982/* Skip whitespace and comments in extended mode */
     1983
     1984if ((options & PCRE_EXTENDED) != 0)
     1985  {
     1986  for (;;)
     1987    {
     1988    while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
     1989    if (*ptr == '#')
     1990      {
     1991      while (*(++ptr) != 0)
     1992        if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
     1993      }
     1994    else break;
     1995    }
     1996  }
     1997
     1998/* If the next item is one that we can handle, get its value. A non-negative
     1999value is a character, a negative value is an escape value. */
     2000
     2001if (*ptr == '\\')
     2002  {
     2003  int temperrorcode = 0;
     2004  next = check_escape(&ptr, &temperrorcode, cd->bracount, options, FALSE);
     2005  if (temperrorcode != 0) return FALSE;
     2006  ptr++;    /* Point after the escape sequence */
     2007  }
     2008
     2009else if ((cd->ctypes[*ptr] & ctype_meta) == 0)
     2010  {
     2011#ifdef SUPPORT_UTF8
     2012  if (utf8) { GETCHARINC(next, ptr); } else
     2013#endif
     2014  next = *ptr++;
     2015  }
     2016
     2017else return FALSE;
     2018
     2019/* Skip whitespace and comments in extended mode */
     2020
     2021if ((options & PCRE_EXTENDED) != 0)
     2022  {
     2023  for (;;)
     2024    {
     2025    while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
     2026    if (*ptr == '#')
     2027      {
     2028      while (*(++ptr) != 0)
     2029        if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
     2030      }
     2031    else break;
     2032    }
     2033  }
     2034
     2035/* If the next thing is itself optional, we have to give up. */
     2036
     2037if (*ptr == '*' || *ptr == '?' || strncmp((char *)ptr, "{0,", 3) == 0)
     2038  return FALSE;
     2039
     2040/* Now compare the next item with the previous opcode. If the previous is a
     2041positive single character match, "item" either contains the character or, if
     2042"item" is greater than 127 in utf8 mode, the character's bytes are in
     2043utf8_char. */
     2044
     2045
     2046/* Handle cases when the next item is a character. */
     2047
     2048if (next >= 0) switch(op_code)
     2049  {
     2050  case OP_CHAR:
     2051#ifdef SUPPORT_UTF8
     2052  if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
     2053#endif
     2054  return item != next;
     2055
     2056  /* For CHARNC (caseless character) we must check the other case. If we have
     2057  Unicode property support, we can use it to test the other case of
     2058  high-valued characters. */
     2059
     2060  case OP_CHARNC:
     2061#ifdef SUPPORT_UTF8
     2062  if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
     2063#endif
     2064  if (item == next) return FALSE;
     2065#ifdef SUPPORT_UTF8
     2066  if (utf8)
     2067    {
     2068    unsigned int othercase;
     2069    if (next < 128) othercase = cd->fcc[next]; else
     2070#ifdef SUPPORT_UCP
     2071    othercase = _pcre_ucp_othercase((unsigned int)next);
     2072#else
     2073    othercase = NOTACHAR;
     2074#endif
     2075    return (unsigned int)item != othercase;
     2076    }
     2077  else
     2078#endif  /* SUPPORT_UTF8 */
     2079  return (item != cd->fcc[next]);  /* Non-UTF-8 mode */
     2080
     2081  /* For OP_NOT, "item" must be a single-byte character. */
     2082
     2083  case OP_NOT:
     2084  if (next < 0) return FALSE;  /* Not a character */
     2085  if (item == next) return TRUE;
     2086  if ((options & PCRE_CASELESS) == 0) return FALSE;
     2087#ifdef SUPPORT_UTF8
     2088  if (utf8)
     2089    {
     2090    unsigned int othercase;
     2091    if (next < 128) othercase = cd->fcc[next]; else
     2092#ifdef SUPPORT_UCP
     2093    othercase = _pcre_ucp_othercase(next);
     2094#else
     2095    othercase = NOTACHAR;
     2096#endif
     2097    return (unsigned int)item == othercase;
     2098    }
     2099  else
     2100#endif  /* SUPPORT_UTF8 */
     2101  return (item == cd->fcc[next]);  /* Non-UTF-8 mode */
     2102
     2103  case OP_DIGIT:
     2104  return next > 127 || (cd->ctypes[next] & ctype_digit) == 0;
     2105
     2106  case OP_NOT_DIGIT:
     2107  return next <= 127 && (cd->ctypes[next] & ctype_digit) != 0;
     2108
     2109  case OP_WHITESPACE:
     2110  return next > 127 || (cd->ctypes[next] & ctype_space) == 0;
     2111
     2112  case OP_NOT_WHITESPACE:
     2113  return next <= 127 && (cd->ctypes[next] & ctype_space) != 0;
     2114
     2115  case OP_WORDCHAR:
     2116  return next > 127 || (cd->ctypes[next] & ctype_word) == 0;
     2117
     2118  case OP_NOT_WORDCHAR:
     2119  return next <= 127 && (cd->ctypes[next] & ctype_word) != 0;
     2120
     2121  case OP_HSPACE:
     2122  case OP_NOT_HSPACE:
     2123  switch(next)
     2124    {
     2125    case 0x09:
     2126    case 0x20:
     2127    case 0xa0:
     2128    case 0x1680:
     2129    case 0x180e:
     2130    case 0x2000:
     2131    case 0x2001:
     2132    case 0x2002:
     2133    case 0x2003:
     2134    case 0x2004:
     2135    case 0x2005:
     2136    case 0x2006:
     2137    case 0x2007:
     2138    case 0x2008:
     2139    case 0x2009:
     2140    case 0x200A:
     2141    case 0x202f:
     2142    case 0x205f:
     2143    case 0x3000:
     2144    return op_code != OP_HSPACE;
     2145    default:
     2146    return op_code == OP_HSPACE;
     2147    }
     2148
     2149  case OP_VSPACE:
     2150  case OP_NOT_VSPACE:
     2151  switch(next)
     2152    {
     2153    case 0x0a:
     2154    case 0x0b:
     2155    case 0x0c:
     2156    case 0x0d:
     2157    case 0x85:
     2158    case 0x2028:
     2159    case 0x2029:
     2160    return op_code != OP_VSPACE;
     2161    default:
     2162    return op_code == OP_VSPACE;
     2163    }
     2164
     2165  default:
     2166  return FALSE;
     2167  }
     2168
     2169
     2170/* Handle the case when the next item is \d, \s, etc. */
     2171
     2172switch(op_code)
     2173  {
     2174  case OP_CHAR:
     2175  case OP_CHARNC:
     2176#ifdef SUPPORT_UTF8
     2177  if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
     2178#endif
     2179  switch(-next)
     2180    {
     2181    case ESC_d:
     2182    return item > 127 || (cd->ctypes[item] & ctype_digit) == 0;
     2183
     2184    case ESC_D:
     2185    return item <= 127 && (cd->ctypes[item] & ctype_digit) != 0;
     2186
     2187    case ESC_s:
     2188    return item > 127 || (cd->ctypes[item] & ctype_space) == 0;
     2189
     2190    case ESC_S:
     2191    return item <= 127 && (cd->ctypes[item] & ctype_space) != 0;
     2192
     2193    case ESC_w:
     2194    return item > 127 || (cd->ctypes[item] & ctype_word) == 0;
     2195
     2196    case ESC_W:
     2197    return item <= 127 && (cd->ctypes[item] & ctype_word) != 0;
     2198
     2199    case ESC_h:
     2200    case ESC_H:
     2201    switch(item)
     2202      {
     2203      case 0x09:
     2204      case 0x20:
     2205      case 0xa0:
     2206      case 0x1680:
     2207      case 0x180e:
     2208      case 0x2000:
     2209      case 0x2001:
     2210      case 0x2002:
     2211      case 0x2003:
     2212      case 0x2004:
     2213      case 0x2005:
     2214      case 0x2006:
     2215      case 0x2007:
     2216      case 0x2008:
     2217      case 0x2009:
     2218      case 0x200A:
     2219      case 0x202f:
     2220      case 0x205f:
     2221      case 0x3000:
     2222      return -next != ESC_h;
     2223      default:
     2224      return -next == ESC_h;
     2225      }
     2226
     2227    case ESC_v:
     2228    case ESC_V:
     2229    switch(item)
     2230      {
     2231      case 0x0a:
     2232      case 0x0b:
     2233      case 0x0c:
     2234      case 0x0d:
     2235      case 0x85:
     2236      case 0x2028:
     2237      case 0x2029:
     2238      return -next != ESC_v;
     2239      default:
     2240      return -next == ESC_v;
     2241      }
     2242
     2243    default:
     2244    return FALSE;
     2245    }
     2246
     2247  case OP_DIGIT:
     2248  return next == -ESC_D || next == -ESC_s || next == -ESC_W ||
     2249         next == -ESC_h || next == -ESC_v;
     2250
     2251  case OP_NOT_DIGIT:
     2252  return next == -ESC_d;
     2253
     2254  case OP_WHITESPACE:
     2255  return next == -ESC_S || next == -ESC_d || next == -ESC_w;
     2256
     2257  case OP_NOT_WHITESPACE:
     2258  return next == -ESC_s || next == -ESC_h || next == -ESC_v;
     2259
     2260  case OP_HSPACE:
     2261  return next == -ESC_S || next == -ESC_H || next == -ESC_d || next == -ESC_w;
     2262
     2263  case OP_NOT_HSPACE:
     2264  return next == -ESC_h;
     2265
     2266  /* Can't have \S in here because VT matches \S (Perl anomaly) */
     2267  case OP_VSPACE:
     2268  return next == -ESC_V || next == -ESC_d || next == -ESC_w;
     2269
     2270  case OP_NOT_VSPACE:
     2271  return next == -ESC_v;
     2272
     2273  case OP_WORDCHAR:
     2274  return next == -ESC_W || next == -ESC_s || next == -ESC_h || next == -ESC_v;
     2275
     2276  case OP_NOT_WORDCHAR:
     2277  return next == -ESC_w || next == -ESC_d;
     2278
     2279  default:
     2280  return FALSE;
     2281  }
     2282
     2283/* Control does not reach here */
     2284}
     2285
     2286
     2287
    15202288/*************************************************
    15212289*           Compile one branch                   *
    15222290*************************************************/
    15232291
    1524 /* Scan the pattern, compiling it into the code vector. If the options are
     2292/* Scan the pattern, compiling it into the a vector. If the options are
    15252293changed during the branch, the pointer is used to change the external options
    1526 bits.
     2294bits. This function is used during the pre-compile phase when we are trying
     2295to find out the amount of memory needed, as well as during the real compile
     2296phase. The value of lengthptr distinguishes the two phases.
    15272297
    15282298Arguments:
    15292299  optionsptr     pointer to the option bits
    1530   brackets       points to number of extracting brackets used
    15312300  codeptr        points to the pointer to the current code point
    15322301  ptrptr         points to the current pattern pointer
     
    15362305  bcptr          points to current branch chain
    15372306  cd             contains pointers to tables etc.
     2307  lengthptr      NULL during the real compile phase
     2308                 points to length accumulator during pre-compile phase
    15382309
    15392310Returns:         TRUE on success
     
    15422313
    15432314static BOOL
    1544 compile_branch(int *optionsptr, int *brackets, uschar **codeptr,
    1545   const uschar **ptrptr, int *errorcodeptr, int *firstbyteptr,
    1546   int *reqbyteptr, branch_chain *bcptr, compile_data *cd)
     2315compile_branch(int *optionsptr, uschar **codeptr, const uschar **ptrptr,
     2316  int *errorcodeptr, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,
     2317  compile_data *cd, int *lengthptr)
    15472318{
    15482319int repeat_type, op_type;
     
    15532324int zeroreqbyte, zerofirstbyte;
    15542325int req_caseopt, reqvary, tempreqvary;
    1555 int condcount = 0;
    15562326int options = *optionsptr;
    15572327int after_manual_callout = 0;
     2328int length_prevgroup = 0;
    15582329register int c;
    15592330register uschar *code = *codeptr;
     2331uschar *last_code = code;
     2332uschar *orig_code = code;
    15602333uschar *tempcode;
    15612334BOOL inescq = FALSE;
     
    15652338uschar *previous = NULL;
    15662339uschar *previous_callout = NULL;
     2340uschar *save_hwm = NULL;
    15672341uschar classbits[32];
    15682342
     
    15742348#else
    15752349BOOL utf8 = FALSE;
     2350uschar *utf8_char = NULL;
     2351#endif
     2352
     2353#ifdef DEBUG
     2354if (lengthptr != NULL) DPRINTF((">> start branch\n"));
    15762355#endif
    15772356
     
    16072386  BOOL possessive_quantifier;
    16082387  BOOL is_quantifier;
     2388  BOOL is_recurse;
     2389  BOOL reset_bracount;
    16092390  int class_charcount;
    16102391  int class_lastchar;
    16112392  int newoptions;
    16122393  int recno;
     2394  int refsign;
    16132395  int skipbytes;
    16142396  int subreqbyte;
    16152397  int subfirstbyte;
     2398  int terminator;
    16162399  int mclength;
    16172400  uschar mcbuffer[8];
    16182401
    1619   /* Next byte in the pattern */
     2402  /* Get next byte in the pattern */
    16202403
    16212404  c = *ptr;
     2405
     2406  /* If we are in the pre-compile phase, accumulate the length used for the
     2407  previous cycle of this loop. */
     2408
     2409  if (lengthptr != NULL)
     2410    {
     2411#ifdef DEBUG
     2412    if (code > cd->hwm) cd->hwm = code;                 /* High water info */
     2413#endif
     2414    if (code > cd->start_workspace + COMPILE_WORK_SIZE) /* Check for overrun */
     2415      {
     2416      *errorcodeptr = ERR52;
     2417      goto FAILED;
     2418      }
     2419
     2420    /* There is at least one situation where code goes backwards: this is the
     2421    case of a zero quantifier after a class (e.g. [ab]{0}). At compile time,
     2422    the class is simply eliminated. However, it is created first, so we have to
     2423    allow memory for it. Therefore, don't ever reduce the length at this point.
     2424    */
     2425
     2426    if (code < last_code) code = last_code;
     2427
     2428    /* Paranoid check for integer overflow */
     2429
     2430    if (OFLOW_MAX - *lengthptr < code - last_code)
     2431      {
     2432      *errorcodeptr = ERR20;
     2433      goto FAILED;
     2434      }
     2435
     2436    *lengthptr += code - last_code;
     2437    DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code - last_code, c));
     2438
     2439    /* If "previous" is set and it is not at the start of the work space, move
     2440    it back to there, in order to avoid filling up the work space. Otherwise,
     2441    if "previous" is NULL, reset the current code pointer to the start. */
     2442
     2443    if (previous != NULL)
     2444      {
     2445      if (previous > orig_code)
     2446        {
     2447        memmove(orig_code, previous, code - previous);
     2448        code -= previous - orig_code;
     2449        previous = orig_code;
     2450        }
     2451      }
     2452    else code = orig_code;
     2453
     2454    /* Remember where this code item starts so we can pick up the length
     2455    next time round. */
     2456
     2457    last_code = code;
     2458    }
     2459
     2460  /* In the real compile phase, just check the workspace used by the forward
     2461  reference list. */
     2462
     2463  else if (cd->hwm > cd->start_workspace + COMPILE_WORK_SIZE)
     2464    {
     2465    *errorcodeptr = ERR52;
     2466    goto FAILED;
     2467    }
    16222468
    16232469  /* If in \Q...\E, check for the end; if not, we have a literal */
     
    16352481      if (previous_callout != NULL)
    16362482        {
    1637         complete_callout(previous_callout, ptr, cd);
     2483        if (lengthptr == NULL)  /* Don't attempt in pre-compile phase */
     2484          complete_callout(previous_callout, ptr, cd);
    16382485        previous_callout = NULL;
    16392486        }
     
    16562503       after_manual_callout-- <= 0)
    16572504    {
    1658     complete_callout(previous_callout, ptr, cd);
     2505    if (lengthptr == NULL)      /* Don't attempt in pre-compile phase */
     2506      complete_callout(previous_callout, ptr, cd);
    16592507    previous_callout = NULL;
    16602508    }
     
    16672515    if (c == '#')
    16682516      {
    1669       /* The space before the ; is to avoid a warning on a silly compiler
    1670       on the Macintosh. */
    1671       while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
    1672       if (c != 0) continue;   /* Else fall through to handle end of string */
     2517      while (*(++ptr) != 0)
     2518        {
     2519        if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }
     2520        }
     2521      if (*ptr != 0) continue;
     2522
     2523      /* Else fall through to handle end of string */
     2524      c = 0;
    16732525      }
    16742526    }
     
    16842536  switch(c)
    16852537    {
    1686     /* The branch terminates at end of string, |, or ). */
    1687 
    1688     case 0:
    1689     case '|':
     2538    /* ===================================================================*/
     2539    case 0:                        /* The branch terminates at string end */
     2540    case '|':                      /* or | or ) */
    16902541    case ')':
    16912542    *firstbyteptr = firstbyte;
     
    16932544    *codeptr = code;
    16942545    *ptrptr = ptr;
     2546    if (lengthptr != NULL)
     2547      {
     2548      if (OFLOW_MAX - *lengthptr < code - last_code)
     2549        {
     2550        *errorcodeptr = ERR20;
     2551        goto FAILED;
     2552        }
     2553      *lengthptr += code - last_code;   /* To include callout length */
     2554      DPRINTF((">> end branch\n"));
     2555      }
    16952556    return TRUE;
    16962557
     2558
     2559    /* ===================================================================*/
    16972560    /* Handle single-character metacharacters. In multiline mode, ^ disables
    16982561    the setting of any following char as a first character. */
     
    17232586    break;
    17242587
    1725     /* Character classes. If the included characters are all < 255 in value, we
    1726     build a 32-byte bitmap of the permitted characters, except in the special
    1727     case where there is only one such character. For negated classes, we build
    1728     the map as usual, then invert it at the end. However, we use a different
    1729     opcode so that data characters > 255 can be handled correctly.
     2588
     2589    /* ===================================================================*/
     2590    /* Character classes. If the included characters are all < 256, we build a
     2591    32-byte bitmap of the permitted characters, except in the special case
     2592    where there is only one such character. For negated classes, we build the
     2593    map as usual, then invert it at the end. However, we use a different opcode
     2594    so that data characters > 255 can be handled correctly.
    17302595
    17312596    If the class contains characters outside the 0-255 range, a different
     
    17482613      }
    17492614
    1750     /* If the first character is '^', set the negation flag and skip it. */
    1751 
    1752     if ((c = *(++ptr)) == '^')
    1753       {
    1754       negate_class = TRUE;
     2615    /* If the first character is '^', set the negation flag and skip it. Also,
     2616    if the first few characters (either before or after ^) are \Q\E or \E we
     2617    skip them too. This makes for compatibility with Perl. */
     2618
     2619    negate_class = FALSE;
     2620    for (;;)
     2621      {
    17552622      c = *(++ptr);
    1756       }
    1757     else
    1758       {
    1759       negate_class = FALSE;
     2623      if (c == '\\')
     2624        {
     2625        if (ptr[1] == 'E') ptr++;
     2626          else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3;
     2627            else break;
     2628        }
     2629      else if (!negate_class && c == '^')
     2630        negate_class = TRUE;
     2631      else break;
    17602632      }
    17612633
    17622634    /* Keep a count of chars with values < 256 so that we can optimize the case
    1763     of just a single character (as long as it's < 256). For higher valued UTF-8
    1764     characters, we don't yet do any optimization. */
     2635    of just a single character (as long as it's < 256). However, For higher
     2636    valued UTF-8 characters, we don't yet do any optimization. */
    17652637
    17662638    class_charcount = 0;
    17672639    class_lastchar = -1;
    17682640
     2641    /* Initialize the 32-char bit map to all zeros. We build the map in a
     2642    temporary bit of memory, in case the class contains only 1 character (less
     2643    than 256), because in that case the compiled code doesn't use the bit map.
     2644    */
     2645
     2646    memset(classbits, 0, 32 * sizeof(uschar));
     2647
    17692648#ifdef SUPPORT_UTF8
    17702649    class_utf8 = FALSE;                       /* No chars >= 256 */
    1771     class_utf8data = code + LINK_SIZE + 34;   /* For UTF-8 items */
     2650    class_utf8data = code + LINK_SIZE + 2;    /* For UTF-8 items */
    17722651#endif
    17732652
    1774     /* Initialize the 32-char bit map to all zeros. We have to build the
    1775     map in a temporary bit of store, in case the class contains only 1
    1776     character (< 256), because in that case the compiled code doesn't use the
    1777     bit map. */
    1778 
    1779     memset(classbits, 0, 32 * sizeof(uschar));
    1780 
    17812653    /* Process characters until ] is reached. By writing this as a "do" it
    1782     means that an initial ] is taken as a data character. The first pass
    1783     through the regex checked the overall syntax, so we don't need to be very
    1784     strict here. At the start of the loop, c contains the first byte of the
    1785     character. */
    1786 
    1787     do
    1788       {
     2654    means that an initial ] is taken as a data character. At the start of the
     2655    loop, c contains the first byte of the character. */
     2656
     2657    if (c != 0) do
     2658      {
     2659      const uschar *oldptr;
     2660
    17892661#ifdef SUPPORT_UTF8
    17902662      if (utf8 && c > 127)
     
    17982670      if (inescq)
    17992671        {
    1800         if (c == '\\' && ptr[1] == 'E')
     2672        if (c == '\\' && ptr[1] == 'E')     /* If we are at \E */
    18012673          {
    1802           inescq = FALSE;
    1803           ptr++;
    1804           continue;
     2674          inescq = FALSE;                   /* Reset literal state */
     2675          ptr++;                            /* Skip the 'E' */
     2676          continue;                         /* Carry on with next */
    18052677          }
    1806         else goto LONE_SINGLE_CHARACTER;
     2678        goto CHECK_RANGE;                   /* Could be range if \E follows */
    18072679        }
    18082680
     
    18182690        {
    18192691        BOOL local_negate = FALSE;
    1820         int posix_class, i;
     2692        int posix_class, taboffset, tabopt;
    18212693        register const uschar *cbits = cd->cbits;
     2694        uschar pbits[32];
    18222695
    18232696        if (ptr[1] != ':')
     
    18482721          posix_class = 0;
    18492722
    1850         /* Or into the map we are building up to 3 of the static class
    1851         tables, or their negations. The [:blank:] class sets up the same
    1852         chars as the [:space:] class (all white space). We remove the vertical
    1853         white space chars afterwards. */
     2723        /* We build the bit map for the POSIX class in a chunk of local store
     2724        because we may be adding and subtracting from it, and we don't want to
     2725        subtract bits that may be in the main map already. At the end we or the
     2726        result into the bit map that is being built. */
    18542727
    18552728        posix_class *= 3;
    1856         for (i = 0; i < 3; i++)
     2729
     2730        /* Copy in the first table (always present) */
     2731
     2732        memcpy(pbits, cbits + posix_class_maps[posix_class],
     2733          32 * sizeof(uschar));
     2734
     2735        /* If there is a second table, add or remove it as required. */
     2736
     2737        taboffset = posix_class_maps[posix_class + 1];
     2738        tabopt = posix_class_maps[posix_class + 2];
     2739
     2740        if (taboffset >= 0)
    18572741          {
    1858           BOOL blankclass = strncmp((char *)ptr, "blank", 5) == 0;
    1859           int taboffset = posix_class_maps[posix_class + i];
    1860           if (taboffset < 0) break;
    1861           if (local_negate)
    1862             {
    1863             if (i == 0)
    1864               for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+taboffset];
    1865             else
    1866               for (c = 0; c < 32; c++) classbits[c] &= ~cbits[c+taboffset];
    1867             if (blankclass) classbits[1] |= 0x3c;
    1868             }
     2742          if (tabopt >= 0)
     2743            for (c = 0; c < 32; c++) pbits[c] |= cbits[c + taboffset];
    18692744          else
    1870             {
    1871             for (c = 0; c < 32; c++) classbits[c] |= cbits[c+taboffset];
    1872             if (blankclass) classbits[1] &= ~0x3c;
    1873             }
     2745            for (c = 0; c < 32; c++) pbits[c] &= ~cbits[c + taboffset];
    18742746          }
     2747
     2748        /* Not see if we need to remove any special characters. An option
     2749        value of 1 removes vertical space and 2 removes underscore. */
     2750
     2751        if (tabopt < 0) tabopt = -tabopt;
     2752        if (tabopt == 1) pbits[1] &= ~0x3c;
     2753          else if (tabopt == 2) pbits[11] &= 0x7f;
     2754
     2755        /* Add the POSIX table or its complement into the main table that is
     2756        being built and we are done. */
     2757
     2758        if (local_negate)
     2759          for (c = 0; c < 32; c++) classbits[c] |= ~pbits[c];
     2760        else
     2761          for (c = 0; c < 32; c++) classbits[c] |= pbits[c];
    18752762
    18762763        ptr = tempptr + 1;
     
    18802767
    18812768      /* Backslash may introduce a single character, or it may introduce one
    1882       of the specials, which just set a flag. Escaped items are checked for
    1883       validity in the pre-compiling pass. The sequence \b is a special case.
    1884       Inside a class (and only there) it is treated as backspace. Elsewhere
    1885       it marks a word boundary. Other escapes have preset maps ready to
    1886       or into the one we are building. We assume they have more than one
     2769      of the specials, which just set a flag. The sequence \b is a special
     2770      case. Inside a class (and only there) it is treated as backspace.
     2771      Elsewhere it marks a word boundary. Other escapes have preset maps ready
     2772      to 'or' into the one we are building. We assume they have more than one
    18872773      character in them, so set class_charcount bigger than one. */
    18882774
    18892775      if (c == '\\')
    18902776        {
    1891         c = check_escape(&ptr, errorcodeptr, *brackets, options, TRUE);
     2777        c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
     2778        if (*errorcodeptr != 0) goto FAILED;
    18922779
    18932780        if (-c == ESC_b) c = '\b';       /* \b is backslash in a class */
    18942781        else if (-c == ESC_X) c = 'X';   /* \X is literal X in a class */
     2782        else if (-c == ESC_R) c = 'R';   /* \R is literal R in a class */
    18952783        else if (-c == ESC_Q)            /* Handle start of quoted string */
    18962784          {
     
    19022790          continue;
    19032791          }
     2792        else if (-c == ESC_E) continue;  /* Ignore orphan \E */
    19042793
    19052794        if (c < 0)
     
    19072796          register const uschar *cbits = cd->cbits;
    19082797          class_charcount += 2;     /* Greater than 1 is what matters */
    1909           switch (-c)
     2798
     2799          /* Save time by not doing this in the pre-compile phase. */
     2800
     2801          if (lengthptr == NULL) switch (-c)
    19102802            {
    19112803            case ESC_d:
     
    19352827            continue;
    19362828
     2829            case ESC_E: /* Perl ignores an orphan \E */
     2830            continue;
     2831
     2832            default:    /* Not recognized; fall through */
     2833            break;      /* Need "default" setting to stop compiler warning. */
     2834            }
     2835
     2836          /* In the pre-compile phase, just do the recognition. */
     2837
     2838          else if (c == -ESC_d || c == -ESC_D || c == -ESC_w ||
     2839                   c == -ESC_W || c == -ESC_s || c == -ESC_S) continue;
     2840
     2841          /* We need to deal with \H, \h, \V, and \v in both phases because
     2842          they use extra memory. */
     2843
     2844          if (-c == ESC_h)
     2845            {
     2846            SETBIT(classbits, 0x09); /* VT */
     2847            SETBIT(classbits, 0x20); /* SPACE */
     2848            SETBIT(classbits, 0xa0); /* NSBP */
     2849#ifdef SUPPORT_UTF8
     2850            if (utf8)
     2851              {
     2852              class_utf8 = TRUE;
     2853              *class_utf8data++ = XCL_SINGLE;
     2854              class_utf8data += _pcre_ord2utf8(0x1680, class_utf8data);
     2855              *class_utf8data++ = XCL_SINGLE;
     2856              class_utf8data += _pcre_ord2utf8(0x180e, class_utf8data);
     2857              *class_utf8data++ = XCL_RANGE;
     2858              class_utf8data += _pcre_ord2utf8(0x2000, class_utf8data);
     2859              class_utf8data += _pcre_ord2utf8(0x200A, class_utf8data);
     2860              *class_utf8data++ = XCL_SINGLE;
     2861              class_utf8data += _pcre_ord2utf8(0x202f, class_utf8data);
     2862              *class_utf8data++ = XCL_SINGLE;
     2863              class_utf8data += _pcre_ord2utf8(0x205f, class_utf8data);
     2864              *class_utf8data++ = XCL_SINGLE;
     2865              class_utf8data += _pcre_ord2utf8(0x3000, class_utf8data);
     2866              }
     2867#endif
     2868            continue;
     2869            }
     2870
     2871          if (-c == ESC_H)
     2872            {
     2873            for (c = 0; c < 32; c++)
     2874              {
     2875              int x = 0xff;
     2876              switch (c)
     2877                {
     2878                case 0x09/8: x ^= 1 << (0x09%8); break;
     2879                case 0x20/8: x ^= 1 << (0x20%8); break;
     2880                case 0xa0/8: x ^= 1 << (0xa0%8); break;
     2881                default: break;
     2882                }
     2883              classbits[c] |= x;
     2884              }
     2885
     2886#ifdef SUPPORT_UTF8
     2887            if (utf8)
     2888              {
     2889              class_utf8 = TRUE;
     2890              *class_utf8data++ = XCL_RANGE;
     2891              class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);
     2892              class_utf8data += _pcre_ord2utf8(0x167f, class_utf8data);
     2893              *class_utf8data++ = XCL_RANGE;
     2894              class_utf8data += _pcre_ord2utf8(0x1681, class_utf8data);
     2895              class_utf8data += _pcre_ord2utf8(0x180d, class_utf8data);
     2896              *class_utf8data++ = XCL_RANGE;
     2897              class_utf8data += _pcre_ord2utf8(0x180f, class_utf8data);
     2898              class_utf8data += _pcre_ord2utf8(0x1fff, class_utf8data);
     2899              *class_utf8data++ = XCL_RANGE;
     2900              class_utf8data += _pcre_ord2utf8(0x200B, class_utf8data);
     2901              class_utf8data += _pcre_ord2utf8(0x202e, class_utf8data);
     2902              *class_utf8data++ = XCL_RANGE;
     2903              class_utf8data += _pcre_ord2utf8(0x2030, class_utf8data);
     2904              class_utf8data += _pcre_ord2utf8(0x205e, class_utf8data);
     2905              *class_utf8data++ = XCL_RANGE;
     2906              class_utf8data += _pcre_ord2utf8(0x2060, class_utf8data);
     2907              class_utf8data += _pcre_ord2utf8(0x2fff, class_utf8data);
     2908              *class_utf8data++ = XCL_RANGE;
     2909              class_utf8data += _pcre_ord2utf8(0x3001, class_utf8data);
     2910              class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);
     2911              }
     2912#endif
     2913            continue;
     2914            }
     2915
     2916          if (-c == ESC_v)
     2917            {
     2918            SETBIT(classbits, 0x0a); /* LF */
     2919            SETBIT(classbits, 0x0b); /* VT */
     2920            SETBIT(classbits, 0x0c); /* FF */
     2921            SETBIT(classbits, 0x0d); /* CR */
     2922            SETBIT(classbits, 0x85); /* NEL */
     2923#ifdef SUPPORT_UTF8
     2924            if (utf8)
     2925              {
     2926              class_utf8 = TRUE;
     2927              *class_utf8data++ = XCL_RANGE;
     2928              class_utf8data += _pcre_ord2utf8(0x2028, class_utf8data);
     2929              class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);
     2930              }
     2931#endif
     2932            continue;
     2933            }
     2934
     2935          if (-c == ESC_V)
     2936            {
     2937            for (c = 0; c < 32; c++)
     2938              {
     2939              int x = 0xff;
     2940              switch (c)
     2941                {
     2942                case 0x0a/8: x ^= 1 << (0x0a%8);
     2943                             x ^= 1 << (0x0b%8);
     2944                             x ^= 1 << (0x0c%8);
     2945                             x ^= 1 << (0x0d%8);
     2946                             break;
     2947                case 0x85/8: x ^= 1 << (0x85%8); break;
     2948                default: break;
     2949                }
     2950              classbits[c] |= x;
     2951              }
     2952
     2953#ifdef SUPPORT_UTF8
     2954            if (utf8)
     2955              {
     2956              class_utf8 = TRUE;
     2957              *class_utf8data++ = XCL_RANGE;
     2958              class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);
     2959              class_utf8data += _pcre_ord2utf8(0x2027, class_utf8data);
     2960              *class_utf8data++ = XCL_RANGE;
     2961              class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);
     2962              class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);
     2963              }
     2964#endif
     2965            continue;
     2966            }
     2967
     2968          /* We need to deal with \P and \p in both phases. */
     2969
    19372970#ifdef SUPPORT_UCP
    1938             case ESC_p:
    1939             case ESC_P:
    1940               {
    1941               BOOL negated;
    1942               int property = get_ucp(&ptr, &negated, errorcodeptr);
    1943               if (property < 0) goto FAILED;
    1944               class_utf8 = TRUE;
    1945               *class_utf8data++ = ((-c == ESC_p) != negated)?
    1946                 XCL_PROP : XCL_NOTPROP;
    1947               *class_utf8data++ = property;
    1948               class_charcount -= 2;   /* Not a < 256 character */
    1949               }
     2971          if (-c == ESC_p || -c == ESC_P)
     2972            {
     2973            BOOL negated;
     2974            int pdata;
     2975            int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);
     2976            if (ptype < 0) goto FAILED;
     2977            class_utf8 = TRUE;
     2978            *class_utf8data++ = ((-c == ESC_p) != negated)?
     2979              XCL_PROP : XCL_NOTPROP;
     2980            *class_utf8data++ = ptype;
     2981            *class_utf8data++ = pdata;
     2982            class_charcount -= 2;   /* Not a < 256 character */
    19502983            continue;
     2984            }
    19512985#endif
    1952 
    1953             /* Unrecognized escapes are faulted if PCRE is running in its
    1954             strict mode. By default, for compatibility with Perl, they are
    1955             treated as literals. */
    1956 
    1957             default:
    1958             if ((options & PCRE_EXTRA) != 0)
    1959               {
    1960               *errorcodeptr = ERR7;
    1961               goto FAILED;
    1962               }
    1963             c = *ptr;              /* The final character */
    1964             class_charcount -= 2;  /* Undo the default count from above */
     2986          /* Unrecognized escapes are faulted if PCRE is running in its
     2987          strict mode. By default, for compatibility with Perl, they are
     2988          treated as literals. */
     2989
     2990          if ((options & PCRE_EXTRA) != 0)
     2991            {
     2992            *errorcodeptr = ERR7;
     2993            goto FAILED;
    19652994            }
     2995
     2996          class_charcount -= 2;  /* Undo the default count from above */
     2997          c = *ptr;              /* Get the final character and fall through */
    19662998          }
    19672999
    19683000        /* Fall through if we have a single character (c >= 0). This may be
    1969         > 256 in UTF-8 mode. */
     3001        greater than 256 in UTF-8 mode. */
    19703002
    19713003        }   /* End of backslash handling */
     
    19733005      /* A single character may be followed by '-' to form a range. However,
    19743006      Perl does not permit ']' to be the end of the range. A '-' character
    1975       here is treated as a literal. */
    1976 
    1977       if (ptr[1] == '-' && ptr[2] != ']')
     3007      at the end is treated as a literal. Perl ignores orphaned \E sequences
     3008      entirely. The code for handling \Q and \E is messy. */
     3009
     3010      CHECK_RANGE:
     3011      while (ptr[1] == '\\' && ptr[2] == 'E')
     3012        {
     3013        inescq = FALSE;
     3014        ptr += 2;
     3015        }
     3016
     3017      oldptr = ptr;
     3018
     3019      /* Remember \r or \n */
     3020
     3021      if (c == '\r' || c == '\n') cd->external_flags |= PCRE_HASCRORLF;
     3022
     3023      /* Check for range */
     3024
     3025      if (!inescq && ptr[1] == '-')
    19783026        {
    19793027        int d;
    19803028        ptr += 2;
     3029        while (*ptr == '\\' && ptr[1] == 'E') ptr += 2;
     3030
     3031        /* If we hit \Q (not followed by \E) at this point, go into escaped
     3032        mode. */
     3033
     3034        while (*ptr == '\\' && ptr[1] == 'Q')
     3035          {
     3036          ptr += 2;
     3037          if (*ptr == '\\' && ptr[1] == 'E') { ptr += 2; continue; }
     3038          inescq = TRUE;
     3039          break;
     3040          }
     3041
     3042        if (*ptr == 0 || (!inescq && *ptr == ']'))
     3043          {
     3044          ptr = oldptr;
     3045          goto LONE_SINGLE_CHARACTER;
     3046          }
    19813047
    19823048#ifdef SUPPORT_UTF8
     
    19933059        in such circumstances. */
    19943060
    1995         if (d == '\\')
     3061        if (!inescq && d == '\\')
    19963062          {
    1997           const uschar *oldptr = ptr;
    1998           d = check_escape(&ptr, errorcodeptr, *brackets, options, TRUE);
    1999 
    2000           /* \b is backslash; \X is literal X; any other special means the '-'
    2001           was literal */
     3063          d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
     3064          if (*errorcodeptr != 0) goto FAILED;
     3065
     3066          /* \b is backslash; \X is literal X; \R is literal R; any other
     3067          special means the '-' was literal */
    20023068
    20033069          if (d < 0)
    20043070            {
    20053071            if (d == -ESC_b) d = '\b';
    2006             else if (d == -ESC_X) d = 'X'; else
     3072            else if (d == -ESC_X) d = 'X';
     3073            else if (d == -ESC_R) d = 'R'; else
    20073074              {
    2008               ptr = oldptr - 2;
     3075              ptr = oldptr;
    20093076              goto LONE_SINGLE_CHARACTER;  /* A few lines below */
    20103077              }
     
    20123079          }
    20133080
    2014         /* The check that the two values are in the correct order happens in
    2015         the pre-pass. Optimize one-character ranges */
     3081        /* Check that the two values are in the correct order. Optimize
     3082        one-character ranges */
     3083
     3084        if (d < c)
     3085          {
     3086          *errorcodeptr = ERR8;
     3087          goto FAILED;
     3088          }
    20163089
    20173090        if (d == c) goto LONE_SINGLE_CHARACTER;  /* A few lines below */
     3091
     3092        /* Remember \r or \n */
     3093
     3094        if (d == '\r' || d == '\n') cd->external_flags |= PCRE_HASCRORLF;
    20183095
    20193096        /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless
     
    20343111          if ((options & PCRE_CASELESS) != 0)
    20353112            {
    2036             int occ, ocd;
    2037             int cc = c;
    2038             int origd = d;
     3113            unsigned int occ, ocd;
     3114            unsigned int cc = c;
     3115            unsigned int origd = d;
    20393116            while (get_othercase_range(&cc, origd, &occ, &ocd))
    20403117              {
    2041               if (occ >= c && ocd <= d) continue;  /* Skip embedded ranges */
    2042 
    2043               if (occ < c  && ocd >= c - 1)        /* Extend the basic range */
     3118              if (occ >= (unsigned int)c &&
     3119                  ocd <= (unsigned int)d)
     3120                continue;                          /* Skip embedded ranges */
     3121
     3122              if (occ < (unsigned int)c  &&
     3123                  ocd >= (unsigned int)c - 1)      /* Extend the basic range */
    20443124                {                                  /* if there is overlap,   */
    20453125                c = occ;                           /* noting that if occ < c */
    20463126                continue;                          /* we can't have ocd > d  */
    20473127                }                                  /* because a subrange is  */
    2048               if (ocd > d && occ <= d + 1)         /* always shorter than    */
     3128              if (ocd > (unsigned int)d &&
     3129                  occ <= (unsigned int)d + 1)      /* always shorter than    */
    20493130                {                                  /* the basic range.       */
    20503131                d = ocd;
     
    20943175        for partial ranges without UCP support. */
    20953176
    2096         for (; c <= d; c++)
     3177        class_charcount += d - c + 1;
     3178        class_lastchar = d;
     3179
     3180        /* We can save a bit of time by skipping this in the pre-compile. */
     3181
     3182        if (lengthptr == NULL) for (; c <= d; c++)
    20973183          {
    20983184          classbits[c/8] |= (1 << (c&7));
     
    21023188            classbits[uc/8] |= (1 << (uc&7));
    21033189            }
    2104           class_charcount++;                /* in case a one-char range */
    2105           class_lastchar = c;
    21063190          }
    21073191
     
    21273211        if ((options & PCRE_CASELESS) != 0)
    21283212          {
    2129           int chartype;
    2130           int othercase;
    2131           if (_pcre_ucp_findchar(c, &chartype, &othercase) >= 0 &&
    2132                othercase > 0)
     3213          unsigned int othercase;
     3214          if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR)
    21333215            {
    21343216            *class_utf8data++ = XCL_SINGLE;
     
    21553237      }
    21563238
    2157     /* Loop until ']' reached; the check for end of string happens inside the
    2158     loop. This "while" is the end of the "do" above. */
    2159 
    2160     while ((c = *(++ptr)) != ']' || inescq);
     3239    /* Loop until ']' reached. This "while" is the end of the "do" above. */
     3240
     3241    while ((c = *(++ptr)) != 0 && (c != ']' || inescq));
     3242
     3243    if (c == 0)                          /* Missing terminating ']' */
     3244      {
     3245      *errorcodeptr = ERR6;
     3246      goto FAILED;
     3247      }
     3248
     3249
     3250/* This code has been disabled because it would mean that \s counts as
     3251an explicit \r or \n reference, and that's not really what is wanted. Now
     3252we set the flag only if there is a literal "\r" or "\n" in the class. */
     3253
     3254#if 0
     3255    /* Remember whether \r or \n are in this class */
     3256
     3257    if (negate_class)
     3258      {
     3259      if ((classbits[1] & 0x24) != 0x24) cd->external_flags |= PCRE_HASCRORLF;
     3260      }
     3261    else
     3262      {
     3263      if ((classbits[1] & 0x24) != 0) cd->external_flags |= PCRE_HASCRORLF;
     3264      }
     3265#endif
     3266
    21613267
    21623268    /* If class_charcount is 1, we saw precisely one character whose value is
    2163     less than 256. In non-UTF-8 mode we can always optimize. In UTF-8 mode, we
    2164     can optimize the negative case only if there were no characters >= 128
    2165     because OP_NOT and the related opcodes like OP_NOTSTAR operate on
    2166     single-bytes only. This is an historical hangover. Maybe one day we can
    2167     tidy these opcodes to handle multi-byte characters.
     3269    less than 256. As long as there were no characters >= 128 and there was no
     3270    use of \p or \P, in other words, no use of any XCLASS features, we can
     3271    optimize.
     3272
     3273    In UTF-8 mode, we can optimize the negative case only if there were no
     3274    characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR
     3275    operate on single-bytes only. This is an historical hangover. Maybe one day
     3276    we can tidy these opcodes to handle multi-byte characters.
    21683277
    21693278    The optimization throws away the bit map. We turn the item into a
     
    21753284
    21763285#ifdef SUPPORT_UTF8
    2177     if (class_charcount == 1 &&
    2178           (!utf8 ||
    2179           (!class_utf8 && (!negate_class || class_lastchar < 128))))
    2180 
     3286    if (class_charcount == 1 && !class_utf8 &&
     3287      (!utf8 || !negate_class || class_lastchar < 128))
    21813288#else
    21823289    if (class_charcount == 1)
     
    22223329    /* If there are characters with values > 255, we have to compile an
    22233330    extended class, with its own opcode. If there are no characters < 256,
    2224     we can omit the bitmap. */
     3331    we can omit the bitmap in the actual compiled code. */
    22253332
    22263333#ifdef SUPPORT_UTF8
     
    22323339      *code = negate_class? XCL_NOT : 0;
    22333340
    2234       /* If the map is required, install it, and move on to the end of
    2235       the extra data */
     3341      /* If the map is required, move up the extra data to make room for it;
     3342      otherwise just move the code pointer to the end of the extra data. */
    22363343
    22373344      if (class_charcount > 0)
    22383345        {
    22393346        *code++ |= XCL_MAP;
     3347        memmove(code + 32, code, class_utf8data - code);
    22403348        memcpy(code, classbits, 32);
    2241         code = class_utf8data;
     3349        code = class_utf8data + 32;
    22423350        }
    2243 
    2244       /* If the map is not required, slide down the extra data. */
    2245 
    2246       else
    2247         {
    2248         int len = class_utf8data - (code + 33);
    2249         memmove(code + 1, code + 33, len);
    2250         code += len + 1;
    2251         }
     3351      else code = class_utf8data;
    22523352
    22533353      /* Now fill in the complete length of the item */
     
    22663366      {
    22673367      *code++ = OP_NCLASS;
    2268       for (c = 0; c < 32; c++) code[c] = ~classbits[c];
     3368      if (lengthptr == NULL)    /* Save time in the pre-compile phase */
     3369        for (c = 0; c < 32; c++) code[c] = ~classbits[c];
    22693370      }
    22703371    else
     
    22763377    break;
    22773378
     3379
     3380    /* ===================================================================*/
    22783381    /* Various kinds of repeat; '{' is not necessarily a quantifier, but this
    22793382    has been tested above. */
     
    23423445      }
    23433446    else repeat_type = greedy_default;
    2344 
    2345     /* If previous was a recursion, we need to wrap it inside brackets so that
    2346     it can be replicated if necessary. */
    2347 
    2348     if (*previous == OP_RECURSE)
    2349       {
    2350       memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);
    2351       code += 1 + LINK_SIZE;
    2352       *previous = OP_BRA;
    2353       PUT(previous, 1, code - previous);
    2354       *code = OP_KET;
    2355       PUT(code, 1, code - previous);
    2356       code += 1 + LINK_SIZE;
    2357       }
    23583447
    23593448    /* If previous was a character match, abolish the item and generate a
     
    23903479        }
    23913480
     3481      /* If the repetition is unlimited, it pays to see if the next thing on
     3482      the line is something that cannot possibly match this character. If so,
     3483      automatically possessifying this item gains some performance in the case
     3484      where the match fails. */
     3485
     3486      if (!possessive_quantifier &&
     3487          repeat_max < 0 &&
     3488          check_auto_possessive(*previous, c, utf8, utf8_char, ptr + 1,
     3489            options, cd))
     3490        {
     3491        repeat_type = 0;    /* Force greedy */
     3492        possessive_quantifier = TRUE;
     3493        }
     3494
    23923495      goto OUTPUT_SINGLE_REPEAT;   /* Code shared with single character types */
    23933496      }
     
    23963499    one of the special opcodes, replacing it. The code is shared with single-
    23973500    character repeats by setting opt_type to add a suitable offset into
    2398     repeat_type. OP_NOT is currently used only for single-byte chars. */
     3501    repeat_type. We can also test for auto-possessification. OP_NOT is
     3502    currently used only for single-byte chars. */
    23993503
    24003504    else if (*previous == OP_NOT)
     
    24023506      op_type = OP_NOTSTAR - OP_STAR;  /* Use "not" opcodes */
    24033507      c = previous[1];
     3508      if (!possessive_quantifier &&
     3509          repeat_max < 0 &&
     3510          check_auto_possessive(OP_NOT, c, utf8, NULL, ptr + 1, options, cd))
     3511        {
     3512        repeat_type = 0;    /* Force greedy */
     3513        possessive_quantifier = TRUE;
     3514        }
    24043515      goto OUTPUT_SINGLE_REPEAT;
    24053516      }
     
    24153526      {
    24163527      uschar *oldcode;
    2417       int prop_type;
     3528      int prop_type, prop_value;
    24183529      op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */
    24193530      c = *previous;
    24203531
     3532      if (!possessive_quantifier &&
     3533          repeat_max < 0 &&
     3534          check_auto_possessive(c, 0, utf8, NULL, ptr + 1, options, cd))
     3535        {
     3536        repeat_type = 0;    /* Force greedy */
     3537        possessive_quantifier = TRUE;
     3538        }
     3539
    24213540      OUTPUT_SINGLE_REPEAT:
    2422       prop_type = (*previous == OP_PROP || *previous == OP_NOTPROP)?
    2423         previous[1] : -1;
     3541      if (*previous == OP_PROP || *previous == OP_NOTPROP)
     3542        {
     3543        prop_type = previous[1];
     3544        prop_value = previous[2];
     3545        }
     3546      else prop_type = prop_value = -1;
    24243547
    24253548      oldcode = code;
     
    24343557      one day we will be able to remove this restriction). */
    24353558
    2436       if (repeat_max != 1) cd->nopartial = TRUE;
     3559      if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;
    24373560
    24383561      /* Combine the op_type with the repeat_type */
     
    24553578
    24563579      /* A repeat minimum of 1 is optimized into some special cases. If the
    2457       maximum is unlimited, we use OP_PLUS. Otherwise, the original item it
     3580      maximum is unlimited, we use OP_PLUS. Otherwise, the original item is
    24583581      left in place and, if the maximum is greater than 1, we use OP_UPTO with
    24593582      one less than the maximum. */
     
    24823605        /* If the maximum is unlimited, insert an OP_STAR. Before doing so,
    24833606        we have to insert the character for the previous code. For a repeated
    2484         Unicode property match, there is an extra byte that defines the
     3607        Unicode property match, there are two extra bytes that define the
    24853608        required property. In UTF-8 mode, long characters have their length in
    24863609        c, with the 0x80 bit as a flag. */
     
    24983621            {
    24993622            *code++ = c;
    2500             if (prop_type >= 0) *code++ = prop_type;
     3623            if (prop_type >= 0)
     3624              {
     3625              *code++ = prop_type;
     3626              *code++ = prop_value;
     3627              }
    25013628            }
    25023629          *code++ = OP_STAR + repeat_type;
     
    25043631
    25053632        /* Else insert an UPTO if the max is greater than the min, again
    2506         preceded by the character, for the previously inserted code. */
     3633        preceded by the character, for the previously inserted code. If the
     3634        UPTO is just for 1 instance, we can use QUERY instead. */
    25073635
    25083636        else if (repeat_max != repeat_min)
     
    25173645#endif
    25183646          *code++ = c;
    2519           if (prop_type >= 0) *code++ = prop_type;
     3647          if (prop_type >= 0)
     3648            {
     3649            *code++ = prop_type;
     3650            *code++ = prop_value;
     3651            }
    25203652          repeat_max -= repeat_min;
    2521           *code++ = OP_UPTO + repeat_type;
    2522           PUT2INC(code, 0, repeat_max);
     3653
     3654          if (repeat_max == 1)
     3655            {
     3656            *code++ = OP_QUERY + repeat_type;
     3657            }
     3658          else
     3659            {
     3660            *code++ = OP_UPTO + repeat_type;
     3661            PUT2INC(code, 0, repeat_max);
     3662            }
    25233663          }
    25243664        }
     
    25363676      *code++ = c;
    25373677
    2538       /* For a repeated Unicode property match, there is an extra byte that
    2539       defines the required property. */
     3678      /* For a repeated Unicode property match, there are two extra bytes that
     3679      define the required property. */
    25403680
    25413681#ifdef SUPPORT_UCP
    2542       if (prop_type >= 0) *code++ = prop_type;
     3682      if (prop_type >= 0)
     3683        {
     3684        *code++ = prop_type;
     3685        *code++ = prop_value;
     3686        }
    25433687#endif
    25443688      }
     
    25633707      one day we will be able to remove this restriction). */
    25643708
    2565       if (repeat_max != 1) cd->nopartial = TRUE;
     3709      if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;
    25663710
    25673711      if (repeat_min == 0 && repeat_max == -1)
     
    25833727    cases. */
    25843728
    2585     else if (*previous >= OP_BRA || *previous == OP_ONCE ||
    2586              *previous == OP_COND)
     3729    else if (*previous == OP_BRA  || *previous == OP_CBRA ||
     3730             *previous == OP_ONCE || *previous == OP_COND)
    25873731      {
    25883732      register int i;
     
    25903734      int len = code - previous;
    25913735      uschar *bralink = NULL;
     3736
     3737      /* Repeating a DEFINE group is pointless */
     3738
     3739      if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_DEF)
     3740        {
     3741        *errorcodeptr = ERR55;
     3742        goto FAILED;
     3743        }
    25923744
    25933745      /* If the maximum repeat count is unlimited, find the end of the bracket
     
    26253777        BRAZERO and do no more at this point. However, we do need to adjust
    26263778        any OP_RECURSE calls inside the group that refer to the group itself or
    2627         any internal group, because the offset is from the start of the whole
    2628         regex. Temporarily terminate the pattern while doing this. */
     3779        any internal or forward referenced group, because the offset is from
     3780        the start of the whole regex. Temporarily terminate the pattern while
     3781        doing this. */
    26293782
    26303783        if (repeat_max <= 1)
    26313784          {
    26323785          *code = OP_END;
    2633           adjust_recurse(previous, 1, utf8, cd);
     3786          adjust_recurse(previous, 1, utf8, cd, save_hwm);
    26343787          memmove(previous+1, previous, len);
    26353788          code++;
     
    26493802          int offset;
    26503803          *code = OP_END;
    2651           adjust_recurse(previous, 2 + LINK_SIZE, utf8, cd);
     3804          adjust_recurse(previous, 2 + LINK_SIZE, utf8, cd, save_hwm);
    26523805          memmove(previous + 2 + LINK_SIZE, previous, len);
    26533806          code += 2 + LINK_SIZE;
     
    26693822      times as necessary, and adjust the maximum to the number of subsequent
    26703823      copies that we need. If we set a first char from the group, and didn't
    2671       set a required char, copy the latter from the former. */
     3824      set a required char, copy the latter from the former. If there are any
     3825      forward reference subroutine calls in the group, there will be entries on
     3826      the workspace list; replicate these with an appropriate increment. */
    26723827
    26733828      else
     
    26753830        if (repeat_min > 1)
    26763831          {
    2677           if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;
    2678           for (i = 1; i < repeat_min; i++)
     3832          /* In the pre-compile phase, we don't actually do the replication. We
     3833          just adjust the length as if we had. Do some paranoid checks for
     3834          potential integer overflow. */
     3835
     3836          if (lengthptr != NULL)
    26793837            {
    2680             memcpy(code, previous, len);
    2681             code += len;
     3838            int delta = (repeat_min - 1)*length_prevgroup;
     3839            if ((double)(repeat_min - 1)*(double)length_prevgroup >
     3840                                                            (double)INT_MAX ||
     3841                OFLOW_MAX - *lengthptr < delta)
     3842              {
     3843              *errorcodeptr = ERR20;
     3844              goto FAILED;
     3845              }
     3846            *lengthptr += delta;
     3847            }
     3848
     3849          /* This is compiling for real */
     3850
     3851          else
     3852            {
     3853            if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;
     3854            for (i = 1; i < repeat_min; i++)
     3855              {
     3856              uschar *hc;
     3857              uschar *this_hwm = cd->hwm;
     3858              memcpy(code, previous, len);
     3859              for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
     3860                {
     3861                PUT(cd->hwm, 0, GET(hc, 0) + len);
     3862                cd->hwm += LINK_SIZE;
     3863                }
     3864              save_hwm = this_hwm;
     3865              code += len;
     3866              }
    26823867            }
    26833868          }
     3869
    26843870        if (repeat_max > 0) repeat_max -= repeat_min;
    26853871        }
     
    26893875      remembering the bracket starts on a stack. In the case of a zero minimum,
    26903876      the first one was set up above. In all cases the repeat_max now specifies
    2691       the number of additional copies needed. */
     3877      the number of additional copies needed. Again, we must remember to
     3878      replicate entries on the forward reference list. */
    26923879
    26933880      if (repeat_max >= 0)
    26943881        {
    2695         for (i = repeat_max - 1; i >= 0; i--)
     3882        /* In the pre-compile phase, we don't actually do the replication. We
     3883        just adjust the length as if we had. For each repetition we must add 1
     3884        to the length for BRAZERO and for all but the last repetition we must
     3885        add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some
     3886        paranoid checks to avoid integer overflow. */
     3887
     3888        if (lengthptr != NULL && repeat_max > 0)
    26963889          {
     3890          int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
     3891                      2 - 2*LINK_SIZE;   /* Last one doesn't nest */
     3892          if ((double)repeat_max *
     3893                (double)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
     3894                  > (double)INT_MAX ||
     3895              OFLOW_MAX - *lengthptr < delta)
     3896            {
     3897            *errorcodeptr = ERR20;
     3898            goto FAILED;
     3899            }
     3900          *lengthptr += delta;
     3901          }
     3902
     3903        /* This is compiling for real */
     3904
     3905        else for (i = repeat_max - 1; i >= 0; i--)
     3906          {
     3907          uschar *hc;
     3908          uschar *this_hwm = cd->hwm;
     3909
    26973910          *code++ = OP_BRAZERO + repeat_type;
    26983911
     
    27103923
    27113924          memcpy(code, previous, len);
     3925          for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
     3926            {
     3927            PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));
     3928            cd->hwm += LINK_SIZE;
     3929            }
     3930          save_hwm = this_hwm;
    27123931          code += len;
    27133932          }
     
    27323951      can't just offset backwards from the current code point, because we
    27333952      don't know if there's been an options resetting after the ket. The
    2734       correct offset was computed above. */
    2735 
    2736       else code[-ketoffset] = OP_KETRMAX + repeat_type;
     3953      correct offset was computed above.
     3954
     3955      Then, when we are doing the actual compile phase, check to see whether
     3956      this group is a non-atomic one that could match an empty string. If so,
     3957      convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so
     3958      that runtime checking can be done. [This check is also applied to
     3959      atomic groups at runtime, but in a different way.] */
     3960
     3961      else
     3962        {
     3963        uschar *ketcode = code - ketoffset;
     3964        uschar *bracode = ketcode - GET(ketcode, 1);
     3965        *ketcode = OP_KETRMAX + repeat_type;
     3966        if (lengthptr == NULL && *bracode != OP_ONCE)
     3967          {
     3968          uschar *scode = bracode;
     3969          do
     3970            {
     3971            if (could_be_empty_branch(scode, ketcode, utf8))
     3972              {
     3973              *bracode += OP_SBRA - OP_BRA;
     3974              break;
     3975              }
     3976            scode += GET(scode, 1);
     3977            }
     3978          while (*scode == OP_ALT);
     3979          }
     3980        }
    27373981      }
    27383982
     
    27453989      }
    27463990
    2747     /* If the character following a repeat is '+', we wrap the entire repeated
    2748     item inside OP_ONCE brackets. This is just syntactic sugar, taken from
    2749     Sun's Java package. The repeated item starts at tempcode, not at previous,
    2750     which might be the first part of a string whose (former) last char we
    2751     repeated. However, we don't support '+' after a greediness '?'. */
     3991    /* If the character following a repeat is '+', or if certain optimization
     3992    tests above succeeded, possessive_quantifier is TRUE. For some of the
     3993    simpler opcodes, there is an special alternative opcode for this. For
     3994    anything else, we wrap the entire repeated item inside OP_ONCE brackets.
     3995    The '+' notation is just syntactic sugar, taken from Sun's Java package,
     3996    but the special opcodes can optimize it a bit. The repeated item starts at
     3997    tempcode, not at previous, which might be the first part of a string whose
     3998    (former) last char we repeated.
     3999
     4000    Possessifying an 'exact' quantifier has no effect, so we can ignore it. But
     4001    an 'upto' may follow. We skip over an 'exact' item, and then test the
     4002    length of what remains before proceeding. */
    27524003
    27534004    if (possessive_quantifier)
    27544005      {
    2755       int len = code - tempcode;
    2756       memmove(tempcode + 1+LINK_SIZE, tempcode, len);
    2757       code += 1 + LINK_SIZE;
    2758       len += 1 + LINK_SIZE;
    2759       tempcode[0] = OP_ONCE;
    2760       *code++ = OP_KET;
    2761       PUTINC(code, 0, len);
    2762       PUT(tempcode, 1, len);
     4006      int len;
     4007      if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||
     4008          *tempcode == OP_NOTEXACT)
     4009        tempcode += _pcre_OP_lengths[*tempcode];
     4010      len = code - tempcode;
     4011      if (len > 0) switch (*tempcode)
     4012        {
     4013        case OP_STAR:  *tempcode = OP_POSSTAR; break;
     4014        case OP_PLUS:  *tempcode = OP_POSPLUS; break;
     4015        case OP_QUERY: *tempcode = OP_POSQUERY; break;
     4016        case OP_UPTO:  *tempcode = OP_POSUPTO; break;
     4017
     4018        case OP_TYPESTAR:  *tempcode = OP_TYPEPOSSTAR; break;
     4019        case OP_TYPEPLUS:  *tempcode = OP_TYPEPOSPLUS; break;
     4020        case OP_TYPEQUERY: *tempcode = OP_TYPEPOSQUERY; break;
     4021        case OP_TYPEUPTO:  *tempcode = OP_TYPEPOSUPTO; break;
     4022
     4023        case OP_NOTSTAR:  *tempcode = OP_NOTPOSSTAR; break;
     4024        case OP_NOTPLUS:  *tempcode = OP_NOTPOSPLUS; break;
     4025        case OP_NOTQUERY: *tempcode = OP_NOTPOSQUERY; break;
     4026        case OP_NOTUPTO:  *tempcode = OP_NOTPOSUPTO; break;
     4027
     4028        default:
     4029        memmove(tempcode + 1+LINK_SIZE, tempcode, len);
     4030        code += 1 + LINK_SIZE;
     4031        len += 1 + LINK_SIZE;
     4032        tempcode[0] = OP_ONCE;
     4033        *code++ = OP_KET;
     4034        PUTINC(code, 0, len);
     4035        PUT(tempcode, 1, len);
     4036        break;
     4037        }
    27634038      }
    27644039
     
    27734048
    27744049
    2775     /* Start of nested bracket sub-expression, or comment or lookahead or
    2776     lookbehind or option setting or condition. First deal with special things
    2777     that can come after a bracket; all are introduced by ?, and the appearance
    2778     of any of them means that this is not a referencing group. They were
    2779     checked for validity in the first pass over the string, so we don't have to
    2780     check for syntax errors here.  */
     4050    /* ===================================================================*/
     4051    /* Start of nested parenthesized sub-expression, or comment or lookahead or
     4052    lookbehind or option setting or condition or all the other extended
     4053    parenthesis forms.  */
    27814054
    27824055    case '(':
    27834056    newoptions = options;
    27844057    skipbytes = 0;
    2785 
    2786     if (*(++ptr) == '?')
    2787       {
    2788       int set, unset;
     4058    bravalue = OP_CBRA;
     4059    save_hwm = cd->hwm;
     4060    reset_bracount = FALSE;
     4061
     4062    /* First deal with various "verbs" that can be introduced by '*'. */
     4063
     4064    if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0)
     4065      {
     4066      int i, namelen;
     4067      const char *vn = verbnames;
     4068      const uschar *name = ++ptr;
     4069      previous = NULL;
     4070      while ((cd->ctypes[*++ptr] & ctype_letter) != 0);
     4071      if (*ptr == ':')
     4072        {
     4073        *errorcodeptr = ERR59;   /* Not supported */
     4074        goto FAILED;
     4075        }
     4076      if (*ptr != ')')
     4077        {
     4078        *errorcodeptr = ERR60;
     4079        goto FAILED;
     4080        }
     4081      namelen = ptr - name;
     4082      for (i = 0; i < verbcount; i++)
     4083        {
     4084        if (namelen == verbs[i].len &&
     4085            strncmp((char *)name, vn, namelen) == 0)
     4086          {
     4087          *code = verbs[i].op;
     4088          if (*code++ == OP_ACCEPT) cd->had_accept = TRUE;
     4089          break;
     4090          }
     4091        vn += verbs[i].len + 1;
     4092        }
     4093      if (i < verbcount) continue;
     4094      *errorcodeptr = ERR60;
     4095      goto FAILED;
     4096      }
     4097
     4098    /* Deal with the extended parentheses; all are introduced by '?', and the
     4099    appearance of any of them means that this is not a capturing group. */
     4100
     4101    else if (*ptr == '?')
     4102      {
     4103      int i, set, unset, namelen;
    27894104      int *optset;
     4105      const uschar *name;
     4106      uschar *slot;
    27904107
    27914108      switch (*(++ptr))
     
    27934110        case '#':                 /* Comment; skip to ket */
    27944111        ptr++;
    2795         while (*ptr != ')') ptr++;
     4112        while (*ptr != 0 && *ptr != ')') ptr++;
     4113        if (*ptr == 0)
     4114          {
     4115          *errorcodeptr = ERR18;
     4116          goto FAILED;
     4117          }
    27964118        continue;
    27974119
    2798         case ':':                 /* Non-extracting bracket */
     4120
     4121        /* ------------------------------------------------------------ */
     4122        case '|':                 /* Reset capture count for each branch */
     4123        reset_bracount = TRUE;
     4124        /* Fall through */
     4125
     4126        /* ------------------------------------------------------------ */
     4127        case ':':                 /* Non-capturing bracket */
    27994128        bravalue = OP_BRA;
    28004129        ptr++;
    28014130        break;
    28024131
     4132
     4133        /* ------------------------------------------------------------ */
    28034134        case '(':
    28044135        bravalue = OP_COND;       /* Conditional group */
    28054136
    2806         /* Condition to test for recursion */
    2807 
    2808         if (ptr[1] == 'R')
     4137        /* A condition can be an assertion, a number (referring to a numbered
     4138        group), a name (referring to a named group), or 'R', referring to
     4139        recursion. R<digits> and R&name are also permitted for recursion tests.
     4140
     4141        There are several syntaxes for testing a named group: (?(name)) is used
     4142        by Python; Perl 5.10 onwards uses (?(<name>) or (?('name')).
     4143
     4144        There are two unfortunate ambiguities, caused by history. (a) 'R' can
     4145        be the recursive thing or the name 'R' (and similarly for 'R' followed
     4146        by digits), and (b) a number could be a name that consists of digits.
     4147        In both cases, we look for a name first; if not found, we try the other
     4148        cases. */
     4149
     4150        /* For conditions that are assertions, check the syntax, and then exit
     4151        the switch. This will take control down to where bracketed groups,
     4152        including assertions, are processed. */
     4153
     4154        if (ptr[1] == '?' && (ptr[2] == '=' || ptr[2] == '!' || ptr[2] == '<'))
     4155          break;
     4156
     4157        /* Most other conditions use OP_CREF (a couple change to OP_RREF
     4158        below), and all need to skip 3 bytes at the start of the group. */
     4159
     4160        code[1+LINK_SIZE] = OP_CREF;
     4161        skipbytes = 3;
     4162        refsign = -1;
     4163
     4164        /* Check for a test for recursion in a named group. */
     4165
     4166        if (ptr[1] == 'R' && ptr[2] == '&')
    28094167          {
    2810           code[1+LINK_SIZE] = OP_CREF;
    2811           PUT2(code, 2+LINK_SIZE, CREF_RECURSE);
    2812           skipbytes = 3;
    2813           ptr += 3;
     4168          terminator = -1;
     4169          ptr += 2;
     4170          code[1+LINK_SIZE] = OP_RREF;    /* Change the type of test */
    28144171          }
    28154172
    2816         /* Condition to test for a numbered subpattern match. We know that
    2817         if a digit follows ( then there will just be digits until ) because
    2818         the syntax was checked in the first pass. */
    2819 
    2820         else if ((digitab[ptr[1]] && ctype_digit) != 0)
     4173        /* Check for a test for a named group's having been set, using the Perl
     4174        syntax (?(<name>) or (?('name') */
     4175
     4176        else if (ptr[1] == '<')
    28214177          {
    2822           int condref;                 /* Don't amalgamate; some compilers */
    2823           condref = *(++ptr) - '0';    /* grumble at autoincrement in declaration */
    2824           while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';
    2825           if (condref == 0)
     4178          terminator = '>';
     4179          ptr++;
     4180          }
     4181        else if (ptr[1] == '\'')
     4182          {
     4183          terminator = '\'';
     4184          ptr++;
     4185          }
     4186        else
     4187          {
     4188          terminator = 0;
     4189          if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr);
     4190          }
     4191
     4192        /* We now expect to read a name; any thing else is an error */
     4193
     4194        if ((cd->ctypes[ptr[1]] & ctype_word) == 0)
     4195          {
     4196          ptr += 1;  /* To get the right offset */
     4197          *errorcodeptr = ERR28;
     4198          goto FAILED;
     4199          }
     4200
     4201        /* Read the name, but also get it as a number if it's all digits */
     4202
     4203        recno = 0;
     4204        name = ++ptr;
     4205        while ((cd->ctypes[*ptr] & ctype_word) != 0)
     4206          {
     4207          if (recno >= 0)
     4208            recno = ((digitab[*ptr] & ctype_digit) != 0)?
     4209              recno * 10 + *ptr - '0' : -1;
     4210          ptr++;
     4211          }
     4212        namelen = ptr - name;
     4213
     4214        if ((terminator > 0 && *ptr++ != terminator) || *ptr++ != ')')
     4215          {
     4216          ptr--;      /* Error offset */
     4217          *errorcodeptr = ERR26;
     4218          goto FAILED;
     4219          }
     4220
     4221        /* Do no further checking in the pre-compile phase. */
     4222
     4223        if (lengthptr != NULL) break;
     4224
     4225        /* In the real compile we do the work of looking for the actual
     4226        reference. If the string started with "+" or "-" we require the rest to
     4227        be digits, in which case recno will be set. */
     4228
     4229        if (refsign > 0)
     4230          {
     4231          if (recno <= 0)
    28264232            {
    2827             *errorcodeptr = ERR35;
     4233            *errorcodeptr = ERR58;
    28284234            goto FAILED;
    28294235            }
    2830           ptr++;
    2831           code[1+LINK_SIZE] = OP_CREF;
    2832           PUT2(code, 2+LINK_SIZE, condref);
    2833           skipbytes = 3;
     4236          if (refsign == '-')
     4237            {
     4238            recno = cd->bracount - recno + 1;
     4239            if (recno <= 0)
     4240              {
     4241              *errorcodeptr = ERR15;
     4242              goto FAILED;
     4243              }
     4244            }
     4245          else recno += cd->bracount;
     4246          PUT2(code, 2+LINK_SIZE, recno);
     4247          break;
    28344248          }
    2835         /* For conditions that are assertions, we just fall through, having
    2836         set bravalue above. */
     4249
     4250        /* Otherwise (did not start with "+" or "-"), start by looking for the
     4251        name. */
     4252
     4253        slot = cd->name_table;
     4254        for (i = 0; i < cd->names_found; i++)
     4255          {
     4256          if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break;
     4257          slot += cd->name_entry_size;
     4258          }
     4259
     4260        /* Found a previous named subpattern */
     4261
     4262        if (i < cd->names_found)
     4263          {
     4264          recno = GET2(slot, 0);
     4265          PUT2(code, 2+LINK_SIZE, recno);
     4266          }
     4267
     4268        /* Search the pattern for a forward reference */
     4269
     4270        else if ((i = find_parens(ptr, cd->bracount, name, namelen,
     4271                        (options & PCRE_EXTENDED) != 0)) > 0)
     4272          {
     4273          PUT2(code, 2+LINK_SIZE, i);
     4274          }
     4275
     4276        /* If terminator == 0 it means that the name followed directly after
     4277        the opening parenthesis [e.g. (?(abc)...] and in this case there are
     4278        some further alternatives to try. For the cases where terminator != 0
     4279        [things like (?(<name>... or (?('name')... or (?(R&name)... ] we have
     4280        now checked all the possibilities, so give an error. */
     4281
     4282        else if (terminator != 0)
     4283          {
     4284          *errorcodeptr = ERR15;
     4285          goto FAILED;
     4286          }
     4287
     4288        /* Check for (?(R) for recursion. Allow digits after R to specify a
     4289        specific group number. */
     4290
     4291        else if (*name == 'R')
     4292          {
     4293          recno = 0;
     4294          for (i = 1; i < namelen; i++)
     4295            {
     4296            if ((digitab[name[i]] & ctype_digit) == 0)
     4297              {
     4298              *errorcodeptr = ERR15;
     4299              goto FAILED;
     4300              }
     4301            recno = recno * 10 + name[i] - '0';
     4302            }
     4303          if (recno == 0) recno = RREF_ANY;
     4304          code[1+LINK_SIZE] = OP_RREF;      /* Change test type */
     4305          PUT2(code, 2+LINK_SIZE, recno);
     4306          }
     4307
     4308        /* Similarly, check for the (?(DEFINE) "condition", which is always
     4309        false. */
     4310
     4311        else if (namelen == 6 && strncmp((char *)name, "DEFINE", 6) == 0)
     4312          {
     4313          code[1+LINK_SIZE] = OP_DEF;
     4314          skipbytes = 1;
     4315          }
     4316
     4317        /* Check for the "name" actually being a subpattern number. */
     4318
     4319        else if (recno > 0)
     4320          {
     4321          PUT2(code, 2+LINK_SIZE, recno);
     4322          }
     4323
     4324        /* Either an unidentified subpattern, or a reference to (?(0) */
     4325
     4326        else
     4327          {
     4328          *errorcodeptr = (recno == 0)? ERR35: ERR15;
     4329          goto FAILED;
     4330          }
    28374331        break;
    28384332
     4333
     4334        /* ------------------------------------------------------------ */
    28394335        case '=':                 /* Positive lookahead */
    28404336        bravalue = OP_ASSERT;
     
    28424338        break;
    28434339
     4340
     4341        /* ------------------------------------------------------------ */
    28444342        case '!':                 /* Negative lookahead */
     4343        ptr++;
     4344        if (*ptr == ')')          /* Optimize (?!) */
     4345          {
     4346          *code++ = OP_FAIL;
     4347          previous = NULL;
     4348          continue;
     4349          }
    28454350        bravalue = OP_ASSERT_NOT;
    2846         ptr++;
    28474351        break;
    28484352
    2849         case '<':                 /* Lookbehinds */
    2850         switch (*(++ptr))
     4353
     4354        /* ------------------------------------------------------------ */
     4355        case '<':                 /* Lookbehind or named define */
     4356        switch (ptr[1])
    28514357          {
    28524358          case '=':               /* Positive lookbehind */
    28534359          bravalue = OP_ASSERTBACK;
    2854           ptr++;
     4360          ptr += 2;
    28554361          break;
    28564362
    28574363          case '!':               /* Negative lookbehind */
    28584364          bravalue = OP_ASSERTBACK_NOT;
    2859           ptr++;
     4365          ptr += 2;
    28604366          break;
     4367
     4368          default:                /* Could be name define, else bad */
     4369          if ((cd->ctypes[ptr[1]] & ctype_word) != 0) goto DEFINE_NAME;
     4370          ptr++;                  /* Correct offset for error */
     4371          *errorcodeptr = ERR24;
     4372          goto FAILED;
    28614373          }
    28624374        break;
    28634375
     4376
     4377        /* ------------------------------------------------------------ */
    28644378        case '>':                 /* One-time brackets */
    28654379        bravalue = OP_ONCE;
     
    28674381        break;
    28684382
     4383
     4384        /* ------------------------------------------------------------ */
    28694385        case 'C':                 /* Callout - may be followed by digits; */
    28704386        previous_callout = code;  /* Save for later completion */
    28714387        after_manual_callout = 1; /* Skip one item before completing */
    2872         *code++ = OP_CALLOUT;     /* Already checked that the terminating */
    2873           {                       /* closing parenthesis is present. */
     4388        *code++ = OP_CALLOUT;
     4389          {
    28744390          int n = 0;
    28754391          while ((digitab[*(++ptr)] & ctype_digit) != 0)
    28764392            n = n * 10 + *ptr - '0';
     4393          if (*ptr != ')')
     4394            {
     4395            *errorcodeptr = ERR39;
     4396            goto FAILED;
     4397            }
    28774398          if (n > 255)
    28784399            {
     
    28884409        continue;
    28894410
    2890         case 'P':                 /* Named subpattern handling */
    2891         if (*(++ptr) == '<')      /* Definition */
     4411
     4412        /* ------------------------------------------------------------ */
     4413        case 'P':                 /* Python-style named subpattern handling */
     4414        if (*(++ptr) == '=' || *ptr == '>')  /* Reference or recursion */
    28924415          {
    2893           int i, namelen;
    2894           uschar *slot = cd->name_table;
    2895           const uschar *name;     /* Don't amalgamate; some compilers */
    2896           name = ++ptr;           /* grumble at autoincrement in declaration */
    2897 
    2898           while (*ptr++ != '>');
    2899           namelen = ptr - name - 1;
    2900 
    2901           for (i = 0; i < cd->names_found; i++)
     4416          is_recurse = *ptr == '>';
     4417          terminator = ')';
     4418          goto NAMED_REF_OR_RECURSE;
     4419          }
     4420        else if (*ptr != '<')    /* Test for Python-style definition */
     4421          {
     4422          *errorcodeptr = ERR41;
     4423          goto FAILED;
     4424          }
     4425        /* Fall through to handle (?P< as (?< is handled */
     4426
     4427
     4428        /* ------------------------------------------------------------ */
     4429        DEFINE_NAME:    /* Come here from (?< handling */
     4430        case '\'':
     4431          {
     4432          terminator = (*ptr == '<')? '>' : '\'';
     4433          name = ++ptr;
     4434
     4435          while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
     4436          namelen = ptr - name;
     4437
     4438          /* In the pre-compile phase, just do a syntax check. */
     4439
     4440          if (lengthptr != NULL)
    29024441            {
    2903             int crc = memcmp(name, slot+2, namelen);
    2904             if (crc == 0)
     4442            if (*ptr != terminator)
    29054443              {
    2906               if (slot[2+namelen] == 0)
     4444              *errorcodeptr = ERR42;
     4445              goto FAILED;
     4446              }
     4447            if (cd->names_found >= MAX_NAME_COUNT)
     4448              {
     4449              *errorcodeptr = ERR49;
     4450              goto FAILED;
     4451              }
     4452            if (namelen + 3 > cd->name_entry_size)
     4453              {
     4454              cd->name_entry_size = namelen + 3;
     4455              if (namelen > MAX_NAME_SIZE)
    29074456                {
    2908                 *errorcodeptr = ERR43;
     4457                *errorcodeptr = ERR48;
    29094458                goto FAILED;
    29104459                }
    2911               crc = -1;             /* Current name is substring */
    29124460              }
    2913             if (crc < 0)
     4461            }
     4462
     4463          /* In the real compile, create the entry in the table */
     4464
     4465          else
     4466            {
     4467            slot = cd->name_table;
     4468            for (i = 0; i < cd->names_found; i++)
    29144469              {
    2915               memmove(slot + cd->name_entry_size, slot,
    2916                 (cd->names_found - i) * cd->name_entry_size);
    2917               break;
     4470              int crc = memcmp(name, slot+2, namelen);
     4471              if (crc == 0)
     4472                {
     4473                if (slot[2+namelen] == 0)
     4474                  {
     4475                  if ((options & PCRE_DUPNAMES) == 0)
     4476                    {
     4477                    *errorcodeptr = ERR43;
     4478                    goto FAILED;
     4479                    }
     4480                  }
     4481                else crc = -1;      /* Current name is substring */
     4482                }
     4483              if (crc < 0)
     4484                {
     4485                memmove(slot + cd->name_entry_size, slot,
     4486                  (cd->names_found - i) * cd->name_entry_size);
     4487                break;
     4488                }
     4489              slot += cd->name_entry_size;
    29184490              }
    2919             slot += cd->name_entry_size;
     4491
     4492            PUT2(slot, 0, cd->bracount + 1);
     4493            memcpy(slot + 2, name, namelen);
     4494            slot[2+namelen] = 0;
    29204495            }
    2921 
    2922           PUT2(slot, 0, *brackets + 1);
    2923           memcpy(slot + 2, name, namelen);
    2924           slot[2+namelen] = 0;
    2925           cd->names_found++;
    2926           goto NUMBERED_GROUP;
    29274496          }
    29284497
    2929         if (*ptr == '=' || *ptr == '>')  /* Reference or recursion */
     4498        /* In both cases, count the number of names we've encountered. */
     4499
     4500        ptr++;                    /* Move past > or ' */
     4501        cd->names_found++;
     4502        goto NUMBERED_GROUP;
     4503
     4504
     4505        /* ------------------------------------------------------------ */
     4506        case '&':                 /* Perl recursion/subroutine syntax */
     4507        terminator = ')';
     4508        is_recurse = TRUE;
     4509        /* Fall through */
     4510
     4511        /* We come here from the Python syntax above that handles both
     4512        references (?P=name) and recursion (?P>name), as well as falling
     4513        through from the Perl recursion syntax (?&name). */
     4514
     4515        NAMED_REF_OR_RECURSE:
     4516        name = ++ptr;
     4517        while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
     4518        namelen = ptr - name;
     4519
     4520        /* In the pre-compile phase, do a syntax check and set a dummy
     4521        reference number. */
     4522
     4523        if (lengthptr != NULL)
    29304524          {
    2931           int i, namelen;
    2932           int type = *ptr++;
    2933           const uschar *name = ptr;
    2934           uschar *slot = cd->name_table;
    2935 
    2936           while (*ptr != ')') ptr++;
    2937           namelen = ptr - name;
    2938 
     4525          if (*ptr != terminator)
     4526            {
     4527            *errorcodeptr = ERR42;
     4528            goto FAILED;
     4529            }
     4530          if (namelen > MAX_NAME_SIZE)
     4531            {
     4532            *errorcodeptr = ERR48;
     4533            goto FAILED;
     4534            }
     4535          recno = 0;
     4536          }
     4537
     4538        /* In the real compile, seek the name in the table */
     4539
     4540        else
     4541          {
     4542          slot = cd->name_table;
    29394543          for (i = 0; i < cd->names_found; i++)
    29404544            {
     
    29424546            slot += cd->name_entry_size;
    29434547            }
    2944           if (i >= cd->names_found)
     4548
     4549          if (i < cd->names_found)         /* Back reference */
     4550            {
     4551            recno = GET2(slot, 0);
     4552            }
     4553          else if ((recno =                /* Forward back reference */
     4554                    find_parens(ptr, cd->bracount, name, namelen,
     <