Changeset 11874 in project


Ignore:
Timestamp:
09/03/08 06:54:04 (13 years ago)
Author:
Kon Lovett
Message:

Update to PCRE 7.7, chgd "runtime.c" 'C_number_to_string' procedure to only use a forward jump (& especially not a jump into a `dead' block. Bumped verno.

Location:
chicken/trunk
Files:
15 edited

Legend:

Unmodified
Added
Removed
  • chicken/trunk/NEWS

    r11557 r11874  
     13.3.12
     2
     3- PCRE 7.7
     4
    153.3.8
    26
  • chicken/trunk/README

    r11773 r11874  
    33  (c)2000-2008 Felix L. Winkelmann
    44
    5   version 3.3.11
     5  version 3.3.12
    66
    77
  • chicken/trunk/buildversion

    r11773 r11874  
    1 3.3.11
     13.3.12
  • chicken/trunk/defaults.make

    r11533 r11874  
    156156else
    157157PCRE_DIR ?= $(VPATH)/pcre
    158 C_COMPILER_PCRE_OPTIONS = -DPCRE_STATIC
     158C_COMPILER_PCRE_OPTIONS = -DPCRE_STATIC -DHAVE_CONFIG_H
    159159PCRE_INCLUDES = $(INCLUDES) -I$(PCRE_DIR)
    160160endif
  • chicken/trunk/manual/The User's Manual

    r11773 r11874  
    33== The User's Manual
    44
    5 This is the user's manual for the Chicken Scheme compiler, version 3.3.11
     5This is the user's manual for the Chicken Scheme compiler, version 3.3.12
    66
    77; [[Getting started]] : What is CHICKEN and how do I use it?
  • chicken/trunk/pcre/config.h

    r9133 r11874  
    1 /* config.h.  From PCRE 7.6 config.h generated from config.h.in by configure.  */
     1/* config.h.  From PCRE 7.7 config.h generated from config.h.in by configure.  */
    22
    3 #if defined(HAVE_CONFIG_H) || defined(HAVE_CHICKEN_CONFIG_H)
     3/* For HAVE_* macros */
     4#ifdef HAVE_CHICKEN_CONFIG_H
    45# include "chicken-config.h"
    56#endif
    67
    7 /* On Unix-like systems config.h.in is converted by "configure" into config.h.
    8 Some other environments also support the use of "configure". PCRE is written in
    9 Standard C, but there are a few non-standard things it can cope with, allowing
    10 it to run on SunOS4 and other "close to standard" systems.
     8/* By default, the \R escape sequence matches any Unicode line ending
     9   character or sequence of characters. If BSR_ANYCRLF is defined, this is
     10   changed so that backslash-R matches only CR, LF, or CRLF. The build- time
     11   default can be overridden by the user of PCRE at runtime. On systems that
     12   support it, "configure" can be used to override the default. */
     13/* #undef BSR_ANYCRLF */
    1114
    12 If you are going to build PCRE "by hand" on a system without "configure" you
    13 should copy the distributed config.h.generic to config.h, and then set up the
    14 macro definitions the way you need them. You must then add -DHAVE_CONFIG_H to
    15 all of your compile commands, so that config.h is included at the start of
    16 every source.
    17 
    18 Alternatively, you can avoid editing by using -D on the compiler command line
    19 to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
    20 
    21 PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if
    22 HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
    23 them both to 0; an emulation function will be used. */
    24 
    25 /* Define to 1 if you have the `memmove' function. */
    26 #ifndef HAVE_MEMMOVE
    27 /* hm... there must be a better way */
    28 # define HAVE_MEMMOVE 1
    29 #endif
    3015
    3116/* The value of LINK_SIZE determines the number of bytes used to store links
     
    8267   (ANYCRLF). */
    8368#ifndef NEWLINE
    84 #define NEWLINE '\n'
     69#define NEWLINE 10
    8570#endif
     71
     72/* PCRE uses recursive function calls to handle backtracking while matching.
     73   This can sometimes be a problem on systems that have stacks of limited
     74   size. Define NO_RECURSE to get a version that doesn't use recursion in the
     75   match() function; instead it creates its own stack by steam using
     76   pcre_recurse_malloc() to obtain memory from the heap. For more detail, see
     77   the comments and other stuff just above the match() function. On systems
     78   that support it, "configure" can be used to set this in the Makefile (use
     79   --disable-stack-for-recursion). */
     80/* #undef NO_RECURSE */
     81/* Make independent of Chicken stack - KRL */
     82#define NO_RECURSE 1
    8683
    8784/* Name of package */
     
    9592
    9693/* Define to the full name and version of this package. */
    97 #define PACKAGE_STRING "PCRE 7.6"
     94#define PACKAGE_STRING "PCRE 7.7"
    9895
    9996/* Define to the one symbol short name of this package. */
     
    10198
    10299/* Define to the version of this package. */
    103 #define PACKAGE_VERSION "7.6"
     100#define PACKAGE_VERSION "7.7"
    104101
    105102/* When calling PCRE via the POSIX interface, additional working storage is
     
    131128
    132129/* Version number of package */
    133 #define VERSION "7.6"
     130#define VERSION "7.7"
    134131
    135132/* Define to empty if `const' does not conform to ANSI C. */
  • chicken/trunk/pcre/pcre.h

    r9133 r11874  
    4343
    4444#define PCRE_MAJOR          7
    45 #define PCRE_MINOR          6
     45#define PCRE_MINOR          7
    4646#define PCRE_PRERELEASE     
    47 #define PCRE_DATE           2008-01-28
     47#define PCRE_DATE           2008-05-07
    4848
    4949/* When an application links to a PCRE DLL in Windows, the symbols that are
     
    125125#define PCRE_BSR_ANYCRLF        0x00800000
    126126#define PCRE_BSR_UNICODE        0x01000000
     127#define PCRE_JAVASCRIPT_COMPAT  0x02000000
    127128
    128129/* Exec-time and get/set-time error codes */
  • chicken/trunk/pcre/pcre_compile.c

    r9133 r11874  
    159159  "THEN";
    160160
    161 static verbitem verbs[] = {
     161static const verbitem verbs[] = {
    162162  { 6, OP_ACCEPT },
    163163  { 6, OP_COMMIT },
     
    169169};
    170170
    171 static int verbcount = sizeof(verbs)/sizeof(verbitem);
     171static const int verbcount = sizeof(verbs)/sizeof(verbitem);
    172172
    173173
     
    296296  "repeating a DEFINE group is not allowed\0"
    297297  "inconsistent NEWLINE options\0"
    298   "\\g is not followed by a braced name or an optionally braced non-zero number\0"
    299   "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number\0"
     298  "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
     299  "a numbered reference must not be zero\0"
    300300  "(*VERB) with an argument is not supported\0"
    301301  /* 60 */
     
    303303  "number is too big\0"
    304304  "subpattern name expected\0"
    305   "digit expected after (?+";
     305  "digit expected after (?+\0"
     306  "] is an invalid data character in JavaScript compatibility mode";
    306307
    307308
     
    532533    break;
    533534
    534     /* \g must be followed by a number, either plain or braced. If positive, it
    535     is an absolute backreference. If negative, it is a relative backreference.
    536     This is a Perl 5.10 feature. Perl 5.10 also supports \g{name} as a
    537     reference to a named group. This is part of Perl's movement towards a
    538     unified syntax for back references. As this is synonymous with \k{name}, we
    539     fudge it up by pretending it really was \k. */
     535    /* \g must be followed by one of a number of specific things:
     536
     537    (1) A number, either plain or braced. If positive, it is an absolute
     538    backreference. If negative, it is a relative backreference. This is a Perl
     539    5.10 feature.
     540
     541    (2) Perl 5.10 also supports \g{name} as a reference to a named group. This
     542    is part of Perl's movement towards a unified syntax for back references. As
     543    this is synonymous with \k{name}, we fudge it up by pretending it really
     544    was \k.
     545
     546    (3) For Oniguruma compatibility we also support \g followed by a name or a
     547    number either in angle brackets or in single quotes. However, these are
     548    (possibly recursive) subroutine calls, _not_ backreferences. Just return
     549    the -ESC_g code (cf \k). */
    540550
    541551    case 'g':
     552    if (ptr[1] == '<' || ptr[1] == '\'')
     553      {
     554      c = -ESC_g;
     555      break;
     556      }
     557
     558    /* Handle the Perl-compatible cases */
     559
    542560    if (ptr[1] == '{')
    543561      {
     
    566584      c = c * 10 + *(++ptr) - '0';
    567585
    568     if (c < 0)
     586    if (c < 0)   /* Integer overflow */
    569587      {
    570588      *errorcodeptr = ERR61;
     
    572590      }
    573591
    574     if (c == 0 || (braced && *(++ptr) != '}'))
     592    if (braced && *(++ptr) != '}')
    575593      {
    576594      *errorcodeptr = ERR57;
     595      break;
     596      }
     597
     598    if (c == 0)
     599      {
     600      *errorcodeptr = ERR58;
    577601      break;
    578602      }
     
    612636      while ((digitab[ptr[1]] & ctype_digit) != 0)
    613637        c = c * 10 + *(++ptr) - '0';
    614       if (c < 0)
     638      if (c < 0)    /* Integer overflow */
    615639        {
    616640        *errorcodeptr = ERR61;
     
    953977Arguments:
    954978  ptr          current position in the pattern
    955   count        current count of capturing parens so far encountered
     979  cd           compile background data
    956980  name         name to seek, or NULL if seeking a numbered subpattern
    957981  lorn         name length, or subpattern number if name is NULL
     
    962986
    963987static int
    964 find_parens(const uschar *ptr, int count, const uschar *name, int lorn,
     988find_parens(const uschar *ptr, compile_data *cd, const uschar *name, int lorn,
    965989  BOOL xmode)
    966990{
    967991const uschar *thisname;
     992int count = cd->bracount;
    968993
    969994for (; *ptr != 0; ptr++)
     
    9851010    }
    9861011
    987   /* Skip over character classes */
     1012  /* Skip over character classes; this logic must be similar to the way they
     1013  are handled for real. If the first character is '^', skip it. Also, if the
     1014  first few characters (either before or after ^) are \Q\E or \E we skip them
     1015  too. This makes for compatibility with Perl. */
    9881016
    9891017  if (*ptr == '[')
    9901018    {
     1019    BOOL negate_class = FALSE;
     1020    for (;;)
     1021      {
     1022      int c = *(++ptr);
     1023      if (c == '\\')
     1024        {
     1025        if (ptr[1] == 'E') ptr++;
     1026          else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3;
     1027            else break;
     1028        }
     1029      else if (!negate_class && c == '^')
     1030        negate_class = TRUE;
     1031      else break;
     1032      }
     1033
     1034    /* If the next character is ']', it is a data character that must be
     1035    skipped, except in JavaScript compatibility mode. */
     1036
     1037    if (ptr[1] == ']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)
     1038      ptr++;
     1039
    9911040    while (*(++ptr) != ']')
    9921041      {
     
    12531302    case OP_WORDCHAR:
    12541303    case OP_ANY:
     1304    case OP_ALLANY:
    12551305    branchlength++;
    12561306    cc++;
     
    15451595  /* Groups with zero repeats can of course be empty; skip them. */
    15461596
    1547   if (c == OP_BRAZERO || c == OP_BRAMINZERO)
     1597  if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO)
    15481598    {
    15491599    code += _pcre_OP_lengths[c];
     
    16311681    case OP_WORDCHAR:
    16321682    case OP_ANY:
     1683    case OP_ALLANY:
    16331684    case OP_ANYBYTE:
    16341685    case OP_CHAR:
     
    18251876repetition simply by copying (because the recursion is allowed to refer to
    18261877earlier groups that are outside the current group). However, when a group is
    1827 optional (i.e. the minimum quantifier is zero), OP_BRAZERO is inserted before
    1828 it, after it has been compiled. This means that any OP_RECURSE items within it
    1829 that refer to the group itself or any contained groups have to have their
    1830 offsets adjusted. That one of the jobs of this function. Before it is called,
    1831 the partially compiled regex must be temporarily terminated with OP_END.
     1878optional (i.e. the minimum quantifier is zero), OP_BRAZERO or OP_SKIPZERO is
     1879inserted before it, after it has been compiled. This means that any OP_RECURSE
     1880items within it that refer to the group itself or any contained groups have to
     1881have their offsets adjusted. That one of the jobs of this function. Before it
     1882is called, the partially compiled regex must be temporarily terminated with
     1883OP_END.
    18321884
    18331885This function has been extended with the possibility of forward references for
     
    21142166
    21152167  case OP_NOT:
    2116   if (next < 0) return FALSE;  /* Not a character */
    21172168  if (item == next) return TRUE;
    21182169  if ((options & PCRE_CASELESS) == 0) return FALSE;
     
    26172668    zeroreqbyte = reqbyte;
    26182669    previous = code;
    2619     *code++ = OP_ANY;
     2670    *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;
    26202671    break;
    26212672
     
    26322683    but those above are are explicitly listed afterwards. A flag byte tells
    26332684    whether the bitmap is present, and whether this is a negated class or not.
    2634     */
     2685
     2686    In JavaScript compatibility mode, an isolated ']' causes an error. In
     2687    default (Perl) mode, it is treated as a data character. */
     2688
     2689    case ']':
     2690    if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
     2691      {
     2692      *errorcodeptr = ERR64;
     2693      goto FAILED;
     2694      }
     2695    goto NORMAL_CHAR;
    26352696
    26362697    case '[':
     
    26642725        negate_class = TRUE;
    26652726      else break;
     2727      }
     2728
     2729    /* Empty classes are allowed in JavaScript compatibility mode. Otherwise,
     2730    an initial ']' is taken as a data character -- the code below handles
     2731    that. In JS mode, [] must always fail, so generate OP_FAIL, whereas
     2732    [^] must match any character, so generate OP_ALLANY. */
     2733
     2734    if (c ==']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
     2735      {
     2736      *code++ = negate_class? OP_ALLANY : OP_FAIL;
     2737      if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
     2738      zerofirstbyte = firstbyte;
     2739      break;
    26662740      }
    26672741
     
    38213895      if (repeat_min == 0)
    38223896        {
    3823         /* If the maximum is also zero, we just omit the group from the output
    3824         altogether. */
    3825 
    3826         if (repeat_max == 0)
    3827           {
    3828           code = previous;
    3829           goto END_REPEAT;
    3830           }
    3831 
    3832         /* If the maximum is 1 or unlimited, we just have to stick in the
    3833         BRAZERO and do no more at this point. However, we do need to adjust
    3834         any OP_RECURSE calls inside the group that refer to the group itself or
    3835         any internal or forward referenced group, because the offset is from
    3836         the start of the whole regex. Temporarily terminate the pattern while
    3837         doing this. */
    3838 
    3839         if (repeat_max <= 1)
     3897        /* If the maximum is also zero, we used to just omit the group from the
     3898        output altogether, like this:
     3899
     3900        ** if (repeat_max == 0)
     3901        **   {
     3902        **   code = previous;
     3903        **   goto END_REPEAT;
     3904        **   }
     3905
     3906        However, that fails when a group is referenced as a subroutine from
     3907        elsewhere in the pattern, so now we stick in OP_SKIPZERO in front of it
     3908        so that it is skipped on execution. As we don't have a list of which
     3909        groups are referenced, we cannot do this selectively.
     3910
     3911        If the maximum is 1 or unlimited, we just have to stick in the BRAZERO
     3912        and do no more at this point. However, we do need to adjust any
     3913        OP_RECURSE calls inside the group that refer to the group itself or any
     3914        internal or forward referenced group, because the offset is from the
     3915        start of the whole regex. Temporarily terminate the pattern while doing
     3916        this. */
     3917
     3918        if (repeat_max <= 1)    /* Covers 0, 1, and unlimited */
    38403919          {
    38413920          *code = OP_END;
     
    38433922          memmove(previous+1, previous, len);
    38443923          code++;
     3924          if (repeat_max == 0)
     3925            {
     3926            *previous++ = OP_SKIPZERO;
     3927            goto END_REPEAT;
     3928            }
    38453929          *previous++ = OP_BRAZERO + repeat_type;
    38463930          }
     
    40374121      }
    40384122
     4123    /* If previous is OP_FAIL, it was generated by an empty class [] in
     4124    JavaScript mode. The other ways in which OP_FAIL can be generated, that is
     4125    by (*FAIL) or (?!) set previous to NULL, which gives a "nothing to repeat"
     4126    error above. We can just ignore the repeat in JS case. */
     4127
     4128    else if (*previous == OP_FAIL) goto END_REPEAT;
     4129
    40394130    /* Else there's some kind of shambles */
    40404131
     
    43234414        /* Search the pattern for a forward reference */
    43244415
    4325         else if ((i = find_parens(ptr, cd->bracount, name, namelen,
     4416        else if ((i = find_parens(ptr, cd, name, namelen,
    43264417                        (options & PCRE_EXTENDED) != 0)) > 0)
    43274418          {
     
    45694660        through from the Perl recursion syntax (?&name). We also come here from
    45704661        the Perl \k<name> or \k'name' back reference syntax and the \k{name}
    4571         .NET syntax. */
     4662        .NET syntax, and the Oniguruma \g<...> and \g'...' subroutine syntax. */
    45724663
    45734664        NAMED_REF_OR_RECURSE:
     
    46204711            }
    46214712          else if ((recno =                /* Forward back reference */
    4622                     find_parens(ptr, cd->bracount, name, namelen,
     4713                    find_parens(ptr, cd, name, namelen,
    46234714                      (options & PCRE_EXTENDED) != 0)) <= 0)
    46244715            {
     
    46474738          {
    46484739          const uschar *called;
     4740          terminator = ')';
     4741
     4742          /* Come here from the \g<...> and \g'...' code (Oniguruma
     4743          compatibility). However, the syntax has been checked to ensure that
     4744          the ... are a (signed) number, so that neither ERR63 nor ERR29 will
     4745          be called on this path, nor with the jump to OTHER_CHAR_AFTER_QUERY
     4746          ever be taken. */
     4747
     4748          HANDLE_NUMERICAL_RECURSION:
    46494749
    46504750          if ((refsign = *ptr) == '+')
     
    46684768            recno = recno * 10 + *ptr++ - '0';
    46694769
    4670           if (*ptr != ')')
     4770          if (*ptr != terminator)
    46714771            {
    46724772            *errorcodeptr = ERR29;
     
    47214821            if (called == NULL)
    47224822              {
    4723               if (find_parens(ptr, cd->bracount, NULL, recno,
    4724                    (options & PCRE_EXTENDED) != 0) < 0)
     4823              if (find_parens(ptr, cd, NULL, recno,
     4824                    (options & PCRE_EXTENDED) != 0) < 0)
    47254825                {
    47264826                *errorcodeptr = ERR15;
     
    50925192      zeroreqbyte = reqbyte;
    50935193
     5194      /* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n'
     5195      is a subroutine call by number (Oniguruma syntax). In fact, the value
     5196      -ESC_g is returned only for these cases. So we don't need to check for <
     5197      or ' if the value is -ESC_g. For the Perl syntax \g{n} the value is
     5198      -ESC_REF+n, and for the Perl syntax \g{name} the result is -ESC_k (as
     5199      that is a synonym for a named back reference). */
     5200
     5201      if (-c == ESC_g)
     5202        {
     5203        const uschar *p;
     5204        save_hwm = cd->hwm;   /* Normally this is set when '(' is read */
     5205        terminator = (*(++ptr) == '<')? '>' : '\'';
     5206
     5207        /* These two statements stop the compiler for warning about possibly
     5208        unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In
     5209        fact, because we actually check for a number below, the paths that
     5210        would actually be in error are never taken. */
     5211
     5212        skipbytes = 0;
     5213        reset_bracount = FALSE;
     5214
     5215        /* Test for a name */
     5216
     5217        if (ptr[1] != '+' && ptr[1] != '-')
     5218          {
     5219          BOOL isnumber = TRUE;
     5220          for (p = ptr + 1; *p != 0 && *p != terminator; p++)
     5221            {
     5222            if ((cd->ctypes[*p] & ctype_digit) == 0) isnumber = FALSE;
     5223            if ((cd->ctypes[*p] & ctype_word) == 0) break;
     5224            }
     5225          if (*p != terminator)
     5226            {
     5227            *errorcodeptr = ERR57;
     5228            break;
     5229            }
     5230          if (isnumber)
     5231            {
     5232            ptr++;
     5233            goto HANDLE_NUMERICAL_RECURSION;
     5234            }
     5235          is_recurse = TRUE;
     5236          goto NAMED_REF_OR_RECURSE;
     5237          }
     5238
     5239        /* Test a signed number in angle brackets or quotes. */
     5240
     5241        p = ptr + 2;
     5242        while ((digitab[*p] & ctype_digit) != 0) p++;
     5243        if (*p != terminator)
     5244          {
     5245          *errorcodeptr = ERR57;
     5246          break;
     5247          }
     5248        ptr++;
     5249        goto HANDLE_NUMERICAL_RECURSION;
     5250        }
     5251
    50945252      /* \k<name> or \k'name' is a back reference by name (Perl syntax).
    50955253      We also support \k{name} (.NET syntax) */
     
    55985756     }
    55995757
    5600    /* .* is not anchored unless DOTALL is set and it isn't in brackets that
    5601    are or may be referenced. */
     5758   /* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and
     5759   it isn't in brackets that are or may be referenced. */
    56025760
    56035761   else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||
    5604              op == OP_TYPEPOSSTAR) &&
    5605             (*options & PCRE_DOTALL) != 0)
     5762             op == OP_TYPEPOSSTAR))
    56065763     {
    5607      if (scode[1] != OP_ANY || (bracket_map & backref_map) != 0) return FALSE;
     5764     if (scode[1] != OP_ALLANY || (bracket_map & backref_map) != 0)
     5765       return FALSE;
    56085766     }
    56095767
  • chicken/trunk/pcre/pcre_dfa_exec.c

    r9133 r11874  
    8585that follow must also be modified. */
    8686
    87 static uschar coptable[] = {
     87static const uschar coptable[] = {
    8888  0,                             /* End                                    */
    8989  0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
    9090  0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
    91   0, 0,                          /* Any, Anybyte                           */
     91  0, 0, 0,                       /* Any, AllAny, Anybyte                   */
    9292  0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
    9393  0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
     
    133133  0, 0,                          /* BRAZERO, BRAMINZERO                    */
    134134  0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
    135   0, 0                           /* FAIL, ACCEPT                           */
     135  0, 0, 0                        /* FAIL, ACCEPT, SKIPZERO                 */
    136136};
    137137
     
    139139and \w */
    140140
    141 static uschar toptable1[] = {
     141static const uschar toptable1[] = {
    142142  0, 0, 0, 0, 0, 0,
    143143  ctype_digit, ctype_digit,
    144144  ctype_space, ctype_space,
    145145  ctype_word,  ctype_word,
    146   0                               /* OP_ANY */
     146  0, 0                            /* OP_ANY, OP_ALLANY */
    147147};
    148148
    149 static uschar toptable2[] = {
     149static const uschar toptable2[] = {
    150150  0, 0, 0, 0, 0, 0,
    151151  ctype_digit, 0,
    152152  ctype_space, 0,
    153153  ctype_word,  0,
    154   1                               /* OP_ANY */
     154  1, 1                            /* OP_ANY, OP_ALLANY */
    155155};
    156156
     
    224224  recursing         regex recursive call level
    225225
    226 Returns:            > 0 =>
    227                     = 0 =>
     226Returns:            > 0 => number of match offset pairs placed in offsets
     227                    = 0 => offsets overflowed; longest matches are present
    228228                     -1 => failed to match
    229229                   < -1 => some kind of unexpected problem
     
    695695
    696696      /*-----------------------------------------------------------------*/
     697      case OP_SKIPZERO:
     698      code += 1 + GET(code, 2);
     699      while (*code == OP_ALT) code += GET(code, 1);
     700      ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
     701      break;
     702
     703      /*-----------------------------------------------------------------*/
    697704      case OP_CIRC:
    698705      if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
     
    733740      /*-----------------------------------------------------------------*/
    734741      case OP_ANY:
    735       if (clen > 0 && ((ims & PCRE_DOTALL) != 0 || !IS_NEWLINE(ptr)))
     742      if (clen > 0 && !IS_NEWLINE(ptr))
     743        { ADD_NEW(state_offset + 1, 0); }
     744      break;
     745
     746      /*-----------------------------------------------------------------*/
     747      case OP_ALLANY:
     748      if (clen > 0)
    736749        { ADD_NEW(state_offset + 1, 0); }
    737750      break;
     
    853866      /* These opcodes likewise inspect the subject character, but have an
    854867      argument that is not a data character. It is one of these opcodes:
    855       OP_ANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE, OP_WORDCHAR,
    856       OP_NOT_WORDCHAR. The value is loaded into d. */
     868      OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
     869      OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
    857870
    858871      case OP_TYPEPLUS:
     
    865878        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
    866879            (c < 256 &&
    867               (d != OP_ANY ||
    868                (ims & PCRE_DOTALL) != 0 ||
    869                !IS_NEWLINE(ptr)
    870               ) &&
     880              (d != OP_ANY || !IS_NEWLINE(ptr)) &&
    871881              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
    872882          {
     
    891901        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
    892902            (c < 256 &&
    893               (d != OP_ANY ||
    894                (ims & PCRE_DOTALL) != 0 ||
    895                !IS_NEWLINE(ptr)
    896               ) &&
     903              (d != OP_ANY || !IS_NEWLINE(ptr)) &&
    897904              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
    898905          {
     
    916923        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
    917924            (c < 256 &&
    918               (d != OP_ANY ||
    919                (ims & PCRE_DOTALL) != 0 ||
    920                !IS_NEWLINE(ptr)
    921               ) &&
     925              (d != OP_ANY || !IS_NEWLINE(ptr)) &&
    922926              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
    923927          {
     
    939943        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
    940944            (c < 256 &&
    941               (d != OP_ANY ||
    942                (ims & PCRE_DOTALL) != 0 ||
    943                !IS_NEWLINE(ptr)
    944               ) &&
     945              (d != OP_ANY || !IS_NEWLINE(ptr)) &&
    945946              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
    946947          {
     
    963964        if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
    964965            (c < 256 &&
    965               (d != OP_ANY ||
    966                (ims & PCRE_DOTALL) != 0 ||
    967                !IS_NEWLINE(ptr)
    968               ) &&
     966              (d != OP_ANY || !IS_NEWLINE(ptr)) &&
    969967              ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
    970968          {
     
    21632161/* ========================================================================== */
    21642162      /* These are the opcodes for fancy brackets of various kinds. We have
    2165       to use recursion in order to handle them. */
     2163      to use recursion in order to handle them. The "always failing" assersion
     2164      (?!) is optimised when compiling to OP_FAIL, so we have to support that,
     2165      though the other "backtracking verbs" are not supported. */
     2166
     2167      case OP_FAIL:
     2168      break;
    21662169
    21672170      case OP_ASSERT:
  • chicken/trunk/pcre/pcre_exec.c

    r9133 r11874  
    11491149    break;
    11501150
    1151     /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
    1152     that it may occur zero times. It may repeat infinitely, or not at all -
    1153     i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
    1154     repeat limits are compiled as a number of copies, with the optional ones
    1155     preceded by BRAZERO or BRAMINZERO. */
     1151    /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
     1152    indicating that it may occur zero times. It may repeat infinitely, or not
     1153    at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
     1154    with fixed upper repeat limits are compiled as a number of copies, with the
     1155    optional ones preceded by BRAZERO or BRAMINZERO. */
    11561156
    11571157    case OP_BRAZERO:
     
    11721172      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    11731173      ecode++;
     1174      }
     1175    break;
     1176
     1177    case OP_SKIPZERO:
     1178      {
     1179      next = ecode+1;
     1180      do next += GET(next,1); while (*next == OP_ALT);
     1181      ecode = next + 1 + LINK_SIZE;
    11741182      }
    11751183    break;
     
    14221430
    14231431    case OP_ANY:
    1424     if ((ims & PCRE_DOTALL) == 0)
    1425       {
    1426       if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
    1427       }
     1432    if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
     1433    /* Fall through */
     1434
     1435    case OP_ALLANY:
    14281436    if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
    1429     if (utf8)
    1430       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
     1437    if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
    14311438    ecode++;
    14321439    break;
     
    17241731      {
    17251732      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
    1726       ecode += 3;                                 /* Advance past item */
    1727 
    1728       /* If the reference is unset, set the length to be longer than the amount
    1729       of subject left; this ensures that every attempt at a match fails. We
    1730       can't just fail here, because of the possibility of quantifiers with zero
    1731       minima. */
    1732 
    1733       length = (offset >= offset_top || md->offset_vector[offset] < 0)?
    1734         md->end_subject - eptr + 1 :
    1735         md->offset_vector[offset+1] - md->offset_vector[offset];
     1733      ecode += 3;
     1734
     1735      /* If the reference is unset, there are two possibilities:
     1736
     1737      (a) In the default, Perl-compatible state, set the length to be longer
     1738      than the amount of subject left; this ensures that every attempt at a
     1739      match fails. We can't just fail here, because of the possibility of
     1740      quantifiers with zero minima.
     1741
     1742      (b) If the JavaScript compatibility flag is set, set the length to zero
     1743      so that the back reference matches an empty string.
     1744
     1745      Otherwise, set the length to the length of what was matched by the
     1746      referenced subpattern. */
     1747
     1748      if (offset >= offset_top || md->offset_vector[offset] < 0)
     1749        length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
     1750      else
     1751        length = md->offset_vector[offset+1] - md->offset_vector[offset];
    17361752
    17371753      /* Set up for repetition, or handle the non-repeated case */
     
    29362952        for (i = 1; i <= min; i++)
    29372953          {
    2938           if (eptr >= md->end_subject ||
    2939                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
     2954          if (eptr >= md->end_subject || IS_NEWLINE(eptr))
    29402955            RRETURN(MATCH_NOMATCH);
     2956          eptr++;
     2957          while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
     2958          }
     2959        break;
     2960
     2961        case OP_ALLANY:
     2962        for (i = 1; i <= min; i++)
     2963          {
     2964          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    29412965          eptr++;
    29422966          while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
     
    31523176        {
    31533177        case OP_ANY:
    3154         if ((ims & PCRE_DOTALL) == 0)
    3155           {
    3156           for (i = 1; i <= min; i++)
    3157             {
    3158             if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
    3159             eptr++;
    3160             }
    3161           }
    3162         else eptr += min;
     3178        for (i = 1; i <= min; i++)
     3179          {
     3180          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
     3181          eptr++;
     3182          }
     3183        break;
     3184
     3185        case OP_ALLANY:
     3186        eptr += min;
    31633187        break;
    31643188
     
    34173441          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    34183442          if (fi >= max || eptr >= md->end_subject ||
    3419                (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
    3420                 IS_NEWLINE(eptr)))
     3443               (ctype == OP_ANY && IS_NEWLINE(eptr)))
    34213444            RRETURN(MATCH_NOMATCH);
    34223445
     
    34243447          switch(ctype)
    34253448            {
    3426             case OP_ANY:        /* This is the DOTALL case */
    3427             break;
    3428 
     3449            case OP_ANY:        /* This is the non-NL case */
     3450            case OP_ALLANY:
    34293451            case OP_ANYBYTE:
    34303452            break;
     
    35783600          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    35793601          if (fi >= max || eptr >= md->end_subject ||
    3580                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
     3602               (ctype == OP_ANY && IS_NEWLINE(eptr)))
    35813603            RRETURN(MATCH_NOMATCH);
    35823604
     
    35843606          switch(ctype)
    35853607            {
    3586             case OP_ANY:   /* This is the DOTALL case */
    3587             break;
    3588 
     3608            case OP_ANY:     /* This is the non-NL case */
     3609            case OP_ALLANY:
    35893610            case OP_ANYBYTE:
    35903611            break;
     
    38403861          if (max < INT_MAX)
    38413862            {
    3842             if ((ims & PCRE_DOTALL) == 0)
     3863            for (i = min; i < max; i++)
    38433864              {
    3844               for (i = min; i < max; i++)
    3845                 {
    3846                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
    3847                 eptr++;
    3848                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
    3849                 }
     3865              if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
     3866              eptr++;
     3867              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
    38503868              }
    3851             else
     3869            }
     3870
     3871          /* Handle unlimited UTF-8 repeat */
     3872
     3873          else
     3874            {
     3875            for (i = min; i < max; i++)
    38523876              {
    3853               for (i = min; i < max; i++)
    3854                 {
    3855                 if (eptr >= md->end_subject) break;
    3856                 eptr++;
    3857                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
    3858                 }
     3877              if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
     3878              eptr++;
     3879              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
    38593880              }
    38603881            }
    3861 
    3862           /* Handle unlimited UTF-8 repeat */
    3863 
    3864           else
    3865             {
    3866             if ((ims & PCRE_DOTALL) == 0)
     3882          break;
     3883
     3884          case OP_ALLANY:
     3885          if (max < INT_MAX)
     3886            {
     3887            for (i = min; i < max; i++)
    38673888              {
    3868               for (i = min; i < max; i++)
    3869                 {
    3870                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
    3871                 eptr++;
    3872                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
    3873                 }
     3889              if (eptr >= md->end_subject) break;
     3890              eptr++;
     3891              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
    38743892              }
    3875             else
    3876               {
    3877               eptr = md->end_subject;
    3878               }
    3879             }
     3893            }
     3894          else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
    38803895          break;
    38813896
     
    40654080          {
    40664081          case OP_ANY:
    4067           if ((ims & PCRE_DOTALL) == 0)
    4068             {
    4069             for (i = min; i < max; i++)
    4070               {
    4071               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
    4072               eptr++;
    4073               }
    4074             break;
    4075             }
    4076           /* For DOTALL case, fall through and treat as \C */
    4077 
     4082          for (i = min; i < max; i++)
     4083            {
     4084            if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
     4085            eptr++;
     4086            }
     4087          break;
     4088
     4089          case OP_ALLANY:
    40784090          case OP_ANYBYTE:
    40794091          c = max - min;
     
    44514463md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
    44524464utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
     4465md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
    44534466
    44544467md->notbol = (options & PCRE_NOTBOL) != 0;
  • chicken/trunk/pcre/pcre_internal.h

    r9133 r11874  
    6666#define DPRINTF(p) /* Nothing */
    6767#endif
    68 
    69 
    70 /* Get the definitions provided by running "configure" */
    71 
    72 #include "config.h"
    7368
    7469
     
    520515   PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
    521516   PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
    522    PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)
     517   PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
     518   PCRE_JAVASCRIPT_COMPAT)
    523519
    524520#define PUBLIC_EXEC_OPTIONS \
     
    610606their negation. Also, they must appear in the same order as in the opcode
    611607definitions below, up to ESC_z. There's a dummy for OP_ANY because it
    612 corresponds to "." rather than an escape sequence. The final one must be
    613 ESC_REF as subsequent values are used for backreferences (\1, \2, \3, etc).
    614 There are two tests in the code for an escape greater than ESC_b and less than
    615 ESC_Z to detect the types that may be repeated. These are the types that
    616 consume characters. If any new escapes are put in between that don't consume a
    617 character, that code will have to change. */
     608corresponds to "." rather than an escape sequence, and another for OP_ALLANY
     609(which is used for [^] in JavaScript compatibility mode).
     610
     611The final escape must be ESC_REF as subsequent values are used for
     612backreferences (\1, \2, \3, etc). There are two tests in the code for an escape
     613greater than ESC_b and less than ESC_Z to detect the types that may be
     614repeated. These are the types that consume characters. If any new escapes are
     615put in between that don't consume a character, that code will have to change.
     616*/
    618617
    619618enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
    620        ESC_W, ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H, ESC_h,
    621        ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_k, ESC_REF };
     619       ESC_W, ESC_w, ESC_dum1, ESC_dum2, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,
     620       ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_g, ESC_k,
     621       ESC_REF };
    622622
    623623
     
    645645  OP_NOT_WORDCHAR,       /* 10 \W */
    646646  OP_WORDCHAR,           /* 11 \w */
    647   OP_ANY,            /* 12 Match any character */
    648   OP_ANYBYTE,        /* 13 Match any byte (\C); different to OP_ANY for UTF-8 */
    649   OP_NOTPROP,        /* 14 \P (not Unicode property) */
    650   OP_PROP,           /* 15 \p (Unicode property) */
    651   OP_ANYNL,          /* 16 \R (any newline sequence) */
    652   OP_NOT_HSPACE,     /* 17 \H (not horizontal whitespace) */
    653   OP_HSPACE,         /* 18 \h (horizontal whitespace) */
    654   OP_NOT_VSPACE,     /* 19 \V (not vertical whitespace) */
    655   OP_VSPACE,         /* 20 \v (vertical whitespace) */
    656   OP_EXTUNI,         /* 21 \X (extended Unicode sequence */
    657   OP_EODN,           /* 22 End of data or \n at end of data: \Z. */
    658   OP_EOD,            /* 23 End of data: \z */
    659 
    660   OP_OPT,            /* 24 Set runtime options */
    661   OP_CIRC,           /* 25 Start of line - varies with multiline switch */
    662   OP_DOLL,           /* 26 End of line - varies with multiline switch */
    663   OP_CHAR,           /* 27 Match one character, casefully */
    664   OP_CHARNC,         /* 28 Match one character, caselessly */
    665   OP_NOT,            /* 29 Match one character, not the following one */
    666 
    667   OP_STAR,           /* 30 The maximizing and minimizing versions of */
    668   OP_MINSTAR,        /* 31 these six opcodes must come in pairs, with */
    669   OP_PLUS,           /* 32 the minimizing one second. */
    670   OP_MINPLUS,        /* 33 This first set applies to single characters.*/
    671   OP_QUERY,          /* 34 */
    672   OP_MINQUERY,       /* 35 */
    673 
    674   OP_UPTO,           /* 36 From 0 to n matches */
    675   OP_MINUPTO,        /* 37 */
    676   OP_EXACT,          /* 38 Exactly n matches */
    677 
    678   OP_POSSTAR,        /* 39 Possessified star */
    679   OP_POSPLUS,        /* 40 Possessified plus */
    680   OP_POSQUERY,       /* 41 Posesssified query */
    681   OP_POSUPTO,        /* 42 Possessified upto */
    682 
    683   OP_NOTSTAR,        /* 43 The maximizing and minimizing versions of */
    684   OP_NOTMINSTAR,     /* 44 these six opcodes must come in pairs, with */
    685   OP_NOTPLUS,        /* 45 the minimizing one second. They must be in */
    686   OP_NOTMINPLUS,     /* 46 exactly the same order as those above. */
    687   OP_NOTQUERY,       /* 47 This set applies to "not" single characters. */
    688   OP_NOTMINQUERY,    /* 48 */
    689 
    690   OP_NOTUPTO,        /* 49 From 0 to n matches */
    691   OP_NOTMINUPTO,     /* 50 */
    692   OP_NOTEXACT,       /* 51 Exactly n matches */
    693 
    694   OP_NOTPOSSTAR,     /* 52 Possessified versions */
    695   OP_NOTPOSPLUS,     /* 53 */
    696   OP_NOTPOSQUERY,    /* 54 */
    697   OP_NOTPOSUPTO,     /* 55 */
    698 
    699   OP_TYPESTAR,       /* 56 The maximizing and minimizing versions of */
    700   OP_TYPEMINSTAR,    /* 57 these six opcodes must come in pairs, with */
    701   OP_TYPEPLUS,       /* 58 the minimizing one second. These codes must */
    702   OP_TYPEMINPLUS,    /* 59 be in exactly the same order as those above. */
    703   OP_TYPEQUERY,      /* 60 This set applies to character types such as \d */
    704   OP_TYPEMINQUERY,   /* 61 */
    705 
    706   OP_TYPEUPTO,       /* 62 From 0 to n matches */
    707   OP_TYPEMINUPTO,    /* 63 */
    708   OP_TYPEEXACT,      /* 64 Exactly n matches */
    709 
    710   OP_TYPEPOSSTAR,    /* 65 Possessified versions */
    711   OP_TYPEPOSPLUS,    /* 66 */
    712   OP_TYPEPOSQUERY,   /* 67 */
    713   OP_TYPEPOSUPTO,    /* 68 */
    714 
    715   OP_CRSTAR,         /* 69 The maximizing and minimizing versions of */
    716   OP_CRMINSTAR,      /* 70 all these opcodes must come in pairs, with */
    717   OP_CRPLUS,         /* 71 the minimizing one second. These codes must */
    718   OP_CRMINPLUS,      /* 72 be in exactly the same order as those above. */
    719   OP_CRQUERY,        /* 73 These are for character classes and back refs */
    720   OP_CRMINQUERY,     /* 74 */
    721   OP_CRRANGE,        /* 75 These are different to the three sets above. */
    722   OP_CRMINRANGE,     /* 76 */
    723 
    724   OP_CLASS,          /* 77 Match a character class, chars < 256 only */
    725   OP_NCLASS,         /* 78 Same, but the bitmap was created from a negative
     647  OP_ANY,            /* 12 Match any character (subject to DOTALL) */
     648  OP_ALLANY,         /* 13 Match any character (not subject to DOTALL) */
     649  OP_ANYBYTE,        /* 14 Match any byte (\C); different to OP_ANY for UTF-8 */
     650  OP_NOTPROP,        /* 15 \P (not Unicode property) */
     651  OP_PROP,           /* 16 \p (Unicode property) */
     652  OP_ANYNL,          /* 17 \R (any newline sequence) */
     653  OP_NOT_HSPACE,     /* 18 \H (not horizontal whitespace) */
     654  OP_HSPACE,         /* 19 \h (horizontal whitespace) */
     655  OP_NOT_VSPACE,     /* 20 \V (not vertical whitespace) */
     656  OP_VSPACE,         /* 21 \v (vertical whitespace) */
     657  OP_EXTUNI,         /* 22 \X (extended Unicode sequence */
     658  OP_EODN,           /* 23 End of data or \n at end of data: \Z. */
     659  OP_EOD,            /* 24 End of data: \z */
     660
     661  OP_OPT,            /* 25 Set runtime options */
     662  OP_CIRC,           /* 26 Start of line - varies with multiline switch */
     663  OP_DOLL,           /* 27 End of line - varies with multiline switch */
     664  OP_CHAR,           /* 28 Match one character, casefully */
     665  OP_CHARNC,         /* 29 Match one character, caselessly */
     666  OP_NOT,            /* 30 Match one character, not the following one */
     667
     668  OP_STAR,           /* 31 The maximizing and minimizing versions of */
     669  OP_MINSTAR,        /* 32 these six opcodes must come in pairs, with */
     670  OP_PLUS,           /* 33 the minimizing one second. */
     671  OP_MINPLUS,        /* 34 This first set applies to single characters.*/
     672  OP_QUERY,          /* 35 */
     673  OP_MINQUERY,       /* 36 */
     674
     675  OP_UPTO,           /* 37 From 0 to n matches */
     676  OP_MINUPTO,        /* 38 */
     677  OP_EXACT,          /* 39 Exactly n matches */
     678
     679  OP_POSSTAR,        /* 40 Possessified star */
     680  OP_POSPLUS,        /* 41 Possessified plus */
     681  OP_POSQUERY,       /* 42 Posesssified query */
     682  OP_POSUPTO,        /* 43 Possessified upto */
     683
     684  OP_NOTSTAR,        /* 44 The maximizing and minimizing versions of */
     685  OP_NOTMINSTAR,     /* 45 these six opcodes must come in pairs, with */
     686  OP_NOTPLUS,        /* 46 the minimizing one second. They must be in */
     687  OP_NOTMINPLUS,     /* 47 exactly the same order as those above. */
     688  OP_NOTQUERY,       /* 48 This set applies to "not" single characters. */
     689  OP_NOTMINQUERY,    /* 49 */
     690
     691  OP_NOTUPTO,        /* 50 From 0 to n matches */
     692  OP_NOTMINUPTO,     /* 51 */
     693  OP_NOTEXACT,       /* 52 Exactly n matches */
     694
     695  OP_NOTPOSSTAR,     /* 53 Possessified versions */
     696  OP_NOTPOSPLUS,     /* 54 */
     697  OP_NOTPOSQUERY,    /* 55 */
     698  OP_NOTPOSUPTO,     /* 56 */
     699
     700  OP_TYPESTAR,       /* 57 The maximizing and minimizing versions of */
     701  OP_TYPEMINSTAR,    /* 58 these six opcodes must come in pairs, with */
     702  OP_TYPEPLUS,       /* 59 the minimizing one second. These codes must */
     703  OP_TYPEMINPLUS,    /* 60 be in exactly the same order as those above. */
     704  OP_TYPEQUERY,      /* 61 This set applies to character types such as \d */
     705  OP_TYPEMINQUERY,   /* 62 */
     706
     707  OP_TYPEUPTO,       /* 63 From 0 to n matches */
     708  OP_TYPEMINUPTO,    /* 64 */
     709  OP_TYPEEXACT,      /* 65 Exactly n matches */
     710
     711  OP_TYPEPOSSTAR,    /* 66 Possessified versions */
     712  OP_TYPEPOSPLUS,    /* 67 */
     713  OP_TYPEPOSQUERY,   /* 68 */
     714  OP_TYPEPOSUPTO,    /* 69 */
     715
     716  OP_CRSTAR,         /* 70 The maximizing and minimizing versions of */
     717  OP_CRMINSTAR,      /* 71 all these opcodes must come in pairs, with */
     718  OP_CRPLUS,         /* 72 the minimizing one second. These codes must */
     719  OP_CRMINPLUS,      /* 73 be in exactly the same order as those above. */
     720  OP_CRQUERY,        /* 74 These are for character classes and back refs */
     721  OP_CRMINQUERY,     /* 75 */
     722  OP_CRRANGE,        /* 76 These are different to the three sets above. */
     723  OP_CRMINRANGE,     /* 77 */
     724
     725  OP_CLASS,          /* 78 Match a character class, chars < 256 only */
     726  OP_NCLASS,         /* 79 Same, but the bitmap was created from a negative
    726727                           class - the difference is relevant only when a UTF-8
    727728                           character > 255 is encountered. */
    728729
    729   OP_XCLASS,         /* 79 Extended class for handling UTF-8 chars within the
     730  OP_XCLASS,         /* 80 Extended class for handling UTF-8 chars within the
    730731                           class. This does both positive and negative. */
    731732
    732   OP_REF,            /* 80 Match a back reference */
    733   OP_RECURSE,        /* 81 Match a numbered subpattern (possibly recursive) */
    734   OP_CALLOUT,        /* 82 Call out to external function if provided */
    735 
    736   OP_ALT,            /* 83 Start of alternation */
    737   OP_KET,            /* 84 End of group that doesn't have an unbounded repeat */
    738   OP_KETRMAX,        /* 85 These two must remain together and in this */
    739   OP_KETRMIN,        /* 86 order. They are for groups the repeat for ever. */
     733  OP_REF,            /* 81 Match a back reference */
     734  OP_RECURSE,        /* 82 Match a numbered subpattern (possibly recursive) */
     735  OP_CALLOUT,        /* 83 Call out to external function if provided */
     736
     737  OP_ALT,            /* 84 Start of alternation */
     738  OP_KET,            /* 85 End of group that doesn't have an unbounded repeat */
     739  OP_KETRMAX,        /* 86 These two must remain together and in this */
     740  OP_KETRMIN,        /* 87 order. They are for groups the repeat for ever. */
    740741
    741742  /* The assertions must come before BRA, CBRA, ONCE, and COND.*/
    742743
    743   OP_ASSERT,         /* 87 Positive lookahead */
    744   OP_ASSERT_NOT,     /* 88 Negative lookahead */
    745   OP_ASSERTBACK,     /* 89 Positive lookbehind */
    746   OP_ASSERTBACK_NOT, /* 90 Negative lookbehind */
    747   OP_REVERSE,        /* 91 Move pointer back - used in lookbehind assertions */
     744  OP_ASSERT,         /* 88 Positive lookahead */
     745  OP_ASSERT_NOT,     /* 89 Negative lookahead */
     746  OP_ASSERTBACK,     /* 90 Positive lookbehind */
     747  OP_ASSERTBACK_NOT, /* 91 Negative lookbehind */
     748  OP_REVERSE,        /* 92 Move pointer back - used in lookbehind assertions */
    748749
    749750  /* ONCE, BRA, CBRA, and COND must come after the assertions, with ONCE first,
    750751  as there's a test for >= ONCE for a subpattern that isn't an assertion. */
    751752
    752   OP_ONCE,           /* 92 Atomic group */
    753   OP_BRA,            /* 93 Start of non-capturing bracket */
    754   OP_CBRA,           /* 94 Start of capturing bracket */
    755   OP_COND,           /* 95 Conditional group */
     753  OP_ONCE,           /* 93 Atomic group */
     754  OP_BRA,            /* 94 Start of non-capturing bracket */
     755  OP_CBRA,           /* 95 Start of capturing bracket */
     756  OP_COND,           /* 96 Conditional group */
    756757
    757758  /* These three must follow the previous three, in the same order. There's a
    758759  check for >= SBRA to distinguish the two sets. */
    759760
    760   OP_SBRA,           /* 96 Start of non-capturing bracket, check empty  */
    761   OP_SCBRA,          /* 97 Start of capturing bracket, check empty */
    762   OP_SCOND,          /* 98 Conditional group, check empty */
    763 
    764   OP_CREF,           /* 99 Used to hold a capture number as condition */
    765   OP_RREF,           /* 100 Used to hold a recursion number as condition */
    766   OP_DEF,            /* 101 The DEFINE condition */
    767 
    768   OP_BRAZERO,        /* 102 These two must remain together and in this */
    769   OP_BRAMINZERO,     /* 103 order. */
     761  OP_SBRA,           /* 97 Start of non-capturing bracket, check empty  */
     762  OP_SCBRA,          /* 98 Start of capturing bracket, check empty */
     763  OP_SCOND,          /* 99 Conditional group, check empty */
     764
     765  OP_CREF,           /* 100 Used to hold a capture number as condition */
     766  OP_RREF,           /* 101 Used to hold a recursion number as condition */
     767  OP_DEF,            /* 102 The DEFINE condition */
     768
     769  OP_BRAZERO,        /* 103 These two must remain together and in this */
     770  OP_BRAMINZERO,     /* 104 order. */
    770771
    771772  /* These are backtracking control verbs */
    772773
    773   OP_PRUNE,          /* 104 */
    774   OP_SKIP,           /* 105 */
    775   OP_THEN,           /* 106 */
    776   OP_COMMIT,         /* 107 */
     774  OP_PRUNE,          /* 105 */
     775  OP_SKIP,           /* 106 */
     776  OP_THEN,           /* 107 */
     777  OP_COMMIT,         /* 108 */
    777778
    778779  /* These are forced failure and success verbs */
    779780
    780   OP_FAIL,           /* 108 */
    781   OP_ACCEPT          /* 109 */
     781  OP_FAIL,           /* 109 */
     782  OP_ACCEPT,         /* 110 */
     783
     784  /* This is used to skip a subpattern with a {0} quantifier */
     785
     786  OP_SKIPZERO        /* 111 */
    782787};
    783788
     
    788793#define OP_NAME_LIST \
    789794  "End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d",         \
    790   "\\S", "\\s", "\\W", "\\w", "Any", "Anybyte",                   \
     795  "\\S", "\\s", "\\W", "\\w", "Any", "AllAny", "Anybyte",         \
    791796  "notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v",           \
    792797  "extuni",  "\\Z", "\\z",                                        \
     
    804809  "Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond",        \
    805810  "Cond ref", "Cond rec", "Cond def", "Brazero", "Braminzero",    \
    806   "*PRUNE", "*SKIP", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT"
     811  "*PRUNE", "*SKIP", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT",      \
     812  "Skip zero"
    807813
    808814
     
    820826  1, 1, 1, 1, 1,                 /* \A, \G, \K, \B, \b                     */ \
    821827  1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */ \
    822   1, 1,                          /* Any, Anybyte                           */ \
     828  1, 1, 1,                       /* Any, AllAny, Anybyte                   */ \
    823829  3, 3, 1,                       /* NOTPROP, PROP, EXTUNI                  */ \
    824830  1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */ \
     
    869875  1, 1,                          /* BRAZERO, BRAMINZERO                    */ \
    870876  1, 1, 1, 1,                    /* PRUNE, SKIP, THEN, COMMIT,             */ \
    871   1, 1                           /* FAIL, ACCEPT                           */
     877  1, 1, 1                        /* FAIL, ACCEPT, SKIPZERO                 */
    872878
    873879
     
    885891       ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
    886892       ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
    887        ERR60, ERR61, ERR62, ERR63 };
     893       ERR60, ERR61, ERR62, ERR63, ERR64 };
    888894
    889895/* The real format of the start of the pcre block; the index of names and the
     
    10101016  BOOL   noteol;                /* NOTEOL flag */
    10111017  BOOL   utf8;                  /* UTF8 flag */
     1018  BOOL   jscript_compat;        /* JAVASCRIPT_COMPAT flag */
    10121019  BOOL   endonly;               /* Dollar not before final \n */
    10131020  BOOL   notempty;              /* Empty string match not wanted */
  • chicken/trunk/pcre/pcre_study.c

    r9133 r11874  
    218218      break;
    219219
     220      /* SKIPZERO skips the bracket. */
     221
     222      case OP_SKIPZERO:
     223      do tcode += GET(tcode,1); while (*tcode == OP_ALT);
     224      tcode += 1 + LINK_SIZE;
     225      break;
     226
    220227      /* Single-char * or ? sets the bit and tries the next item */
    221228
     
    342349        {
    343350        case OP_ANY:
     351        case OP_ALLANY:
    344352        return SSB_FAIL;
    345353
  • chicken/trunk/pcre/ucpinternal.h

    r6175 r11874  
    1818#define f0_scriptmask   0xff000000  /* Mask for script field */
    1919#define f0_scriptshift          24  /* Shift for script value */
    20 #define f0_rangeflag    0x00f00000  /* Flag for a range item */
     20#define f0_rangeflag    0x00800000  /* Flag for a range item */
    2121#define f0_charmask     0x001fffff  /* Mask for code point value */
    2222
  • chicken/trunk/runtime.c

    r11768 r11874  
    77747774    default: barf(C_BAD_ARGUMENT_TYPE_ERROR, "number->string", C_fix(radix));
    77757775    }
     7776  }
     7777  else if(!C_immediatep(num) && C_block_header(num) == C_FLONUM_TAG) {
     7778    f = C_flonum_magnitude(num);
     7779
     7780    if(C_fits_in_unsigned_int_p(num) == C_SCHEME_TRUE) {
     7781      switch(radix) {
     7782      case 2:
     7783        p = to_binary((unsigned int)f);
     7784        goto fini;
     7785
     7786      case 8:
     7787        C_sprintf(p = buffer, "%o", (unsigned int)f);
     7788        goto fini;
     7789
     7790      case 16:
     7791        C_sprintf(p = buffer, "%x", (unsigned int)f);
     7792        goto fini;
     7793      }
     7794    }
     7795
     7796#if defined(__CYGWIN__) || defined(__MINGW32__)
     7797    if(C_isnan(f)) {
     7798      C_strcpy(p = buffer, "+nan.0");
     7799      goto fini;
     7800    }
     7801    else if(C_isinf(f)) {
     7802      C_sprintf(p = buffer, "%cinf.0", f > 0 ? '+' : '-');
     7803      goto fini;
     7804    }
     7805#endif
     7806
     7807#ifdef HAVE_GCVT
     7808    C_gcvt(f, flonum_print_precision, buffer);
     7809#else
     7810    C_sprintf(buffer, C_text("%.*g"), flonum_print_precision, f);
     7811#endif
     7812
     7813    if((p = C_strpbrk(buffer, C_text(".eE"))) == NULL) {
     7814      if(*buffer == 'i' || *buffer == 'n') { /* inf or nan */
     7815        C_memmove(buffer + 1, buffer, C_strlen(buffer) + 1);
     7816        *buffer = '+';
     7817      }
     7818      else if(buffer[ 1 ] != 'i') C_strcat(buffer, C_text(".0")); /* negative infinity? */
     7819    }
     7820
     7821    p = buffer;
     7822  }
     7823  else
     7824    barf(C_BAD_ARGUMENT_TYPE_ERROR, "number->string", num);
    77767825
    77777826  fini:
     
    77867835    radix = C_string(&a, radix, p);
    77877836    C_kontinue(k, radix);
    7788   }
    7789 
    7790   if(!C_immediatep(num) && C_block_header(num) == C_FLONUM_TAG) {
    7791     f = C_flonum_magnitude(num);
    7792 
    7793     if(C_fits_in_unsigned_int_p(num) == C_SCHEME_TRUE) {
    7794       switch(radix) {
    7795       case 2:
    7796         p = to_binary((unsigned int)f);
    7797         goto fini;
    7798 
    7799       case 8:
    7800         C_sprintf(p = buffer, "%o", (unsigned int)f);
    7801         goto fini;
    7802 
    7803       case 16:
    7804         C_sprintf(p = buffer, "%x", (unsigned int)f);
    7805         goto fini;
    7806       }
    7807     }
    7808 
    7809 #if defined(__CYGWIN__) || defined(__MINGW32__)
    7810     if(C_isnan(f)) {
    7811       C_strcpy(p = buffer, "+nan.0");
    7812       goto fini;
    7813     }
    7814     else if(C_isinf(f)) {
    7815       C_sprintf(p = buffer, "%cinf.0", f > 0 ? '+' : '-');
    7816       goto fini;
    7817     }
    7818 #endif
    7819 
    7820 #ifdef HAVE_GCVT
    7821     C_gcvt(f, flonum_print_precision, buffer);
    7822 #else
    7823     C_sprintf(buffer, C_text("%.*g"), flonum_print_precision, f);
    7824 #endif
    7825 
    7826     if((p = C_strpbrk(buffer, C_text(".eE"))) == NULL) {
    7827       if(*buffer == 'i' || *buffer == 'n') { /* inf or nan */
    7828         C_memmove(buffer + 1, buffer, C_strlen(buffer) + 1);
    7829         *buffer = '+';
    7830       }
    7831       else if(buffer[ 1 ] != 'i') C_strcat(buffer, C_text(".0")); /* negative infinity? */
    7832     }
    7833 
    7834     p = buffer;
    7835     goto fini;
    7836   }
    7837 
    7838   barf(C_BAD_ARGUMENT_TYPE_ERROR, "number->string", num);
    78397837}
    78407838
  • chicken/trunk/version.scm

    r11773 r11874  
    1 (define-constant +build-version+ "3.3.11")
     1(define-constant +build-version+ "3.3.12")
Note: See TracChangeset for help on using the changeset viewer.