Changeset 9133 in project


Ignore:
Timestamp:
02/29/08 18:18:10 (12 years ago)
Author:
Kon Lovett
Message:

PCRE 7.6

Location:
chicken/trunk
Files:
27 edited

Legend:

Unmodified
Added
Removed
  • chicken/trunk/NEWS

    r9040 r9133  
    113.0.4
    22
     3- unit regex: PCRE 7.6
    34- unit tcp: use of offset into string rather than substring for faster
    45  socket write [Jim Ursetto]
  • chicken/trunk/buildsvnrevision

    r9078 r9133  
    1 9077
     19129
  • chicken/trunk/pcre/config.h

    r6175 r9133  
    1 /* config.h.  From PCRE 7.4 config.h generated from config.h.in by configure.  */
     1/* config.h.  From PCRE 7.6 config.h generated from config.h.in by configure.  */
    22
    33#if defined(HAVE_CONFIG_H) || defined(HAVE_CHICKEN_CONFIG_H)
     
    9595
    9696/* Define to the full name and version of this package. */
    97 #define PACKAGE_STRING "PCRE 7.4"
     97#define PACKAGE_STRING "PCRE 7.6"
    9898
    9999/* Define to the one symbol short name of this package. */
     
    101101
    102102/* Define to the version of this package. */
    103 #define PACKAGE_VERSION "7.4"
     103#define PACKAGE_VERSION "7.6"
    104104
    105105/* When calling PCRE via the POSIX interface, additional working storage is
     
    131131
    132132/* Version number of package */
    133 #define VERSION "7.4"
     133#define VERSION "7.6"
    134134
    135135/* Define to empty if `const' does not conform to ANSI C. */
  • chicken/trunk/pcre/dftables.c

    r6175 r9133  
    77
    88                       Written by Philip Hazel
    9            Copyright (c) 1997-2007 University of Cambridge
     9           Copyright (c) 1997-2008 University of Cambridge
    1010
    1111-----------------------------------------------------------------------------
  • chicken/trunk/pcre/pcre.h

    r6175 r9133  
    66applications that call the PCRE functions.
    77
    8            Copyright (c) 1997-2007 University of Cambridge
     8           Copyright (c) 1997-2008 University of Cambridge
    99
    1010-----------------------------------------------------------------------------
     
    4343
    4444#define PCRE_MAJOR          7
    45 #define PCRE_MINOR          4
     45#define PCRE_MINOR          6
    4646#define PCRE_PRERELEASE     
    47 #define PCRE_DATE           2007-09-21
     47#define PCRE_DATE           2008-01-28
    4848
    4949/* When an application links to a PCRE DLL in Windows, the symbols that are
  • chicken/trunk/pcre/pcre_chartables.c

    r6175 r9133  
    33*************************************************/
    44
    5 /* This file contains character tables that are used when no external tables
    6 are passed to PCRE by the application that calls it. The tables are used only
    7 for characters whose code values are less than 256.
     5/* This file was automatically written by the dftables auxiliary
     6program. It contains character tables that are used when no external
     7tables are passed to PCRE by the application that calls it. The tables
     8are used only for characters whose code values are less than 256.
    89
    9 This is a default version of the tables that assumes ASCII encoding. A program
    10 called dftables (which is distributed with PCRE) can be used to build
    11 alternative versions of this file. This is necessary if you are running in an
    12 EBCDIC environment, or if you want to default to a different encoding, for
    13 example ISO-8859-1. When dftables is run, it creates these tables in the
    14 current locale. If PCRE is configured with --enable-rebuild-chartables, this
    15 happens automatically.
    16 
    17 The following #includes are present because without the gcc 4.x may remove the
    18 array definition from the final binary if PCRE is built into a static library
    19 and dead code stripping is activated. This leads to link errors. Pulling in the
    20 header ensures that the array gets flagged as "someone outside this compilation
    21 unit might reference this" and so it will always be supplied to the linker. */
     10The following #includes are present because without them gcc 4.x may remove
     11the array definition from the final binary if PCRE is built into a static
     12library and dead code stripping is activated. This leads to link errors.
     13Pulling in the header ensures that the array gets flagged as "someone
     14outside this compilation unit might reference this" and so it will always
     15be supplied to the linker. */
    2216
    2317#ifdef HAVE_CONFIG_H
     
    9993  248,249,250,251,252,253,254,255,
    10094
    101 /* This table contains bit maps for various character classes. Each map is 32
    102 bytes long and the bits run from the least significant end of each byte. The
    103 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
    104 graph, print, punct, and cntrl. Other classes are built from combinations. */
     95/* This table contains bit maps for various character classes.
     96Each map is 32 bytes long and the bits run from the least
     97significant end of each byte. The classes that have their own
     98maps are: space, xdigit, digit, upper, lower, word, graph
     99print, punct, and cntrl. Other classes are built from combinations. */
    105100
    106101  0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
  • chicken/trunk/pcre/pcre_compile.c

    r6175 r9133  
    77
    88                       Written by Philip Hazel
    9            Copyright (c) 1997-2007 University of Cambridge
     9           Copyright (c) 1997-2008 University of Cambridge
    1010
    1111-----------------------------------------------------------------------------
     
    242242  "operand of unlimited repeat could match the empty string\0"  /** DEAD **/
    243243  "internal error: unexpected repeat\0"
    244   "unrecognized character after (?\0"
     244  "unrecognized character after (? or (?-\0"
    245245  "POSIX named classes are supported only within a class\0"
    246246  "missing )\0"
     
    301301  /* 60 */
    302302  "(*VERB) not recognized\0"
    303   "number is too big";
     303  "number is too big\0"
     304  "subpattern name expected\0"
     305  "digit expected after (?+";
    304306
    305307
     
    497499if (c == 0) *errorcodeptr = ERR1;
    498500
    499 /* Non-alphamerics are literals. For digits or letters, do an initial lookup in
    500 a table. A non-zero result is something that can be returned immediately.
     501/* Non-alphanumerics are literals. For digits or letters, do an initial lookup
     502in a table. A non-zero result is something that can be returned immediately.
    501503Otherwise further processing may be required. */
    502504
    503505#ifndef EBCDIC  /* ASCII coding */
    504 else if (c < '0' || c > 'z') {}                           /* Not alphameric */
     506else if (c < '0' || c > 'z') {}                           /* Not alphanumeric */
    505507else if ((i = escapes[c - '0']) != 0) c = i;
    506508
    507509#else           /* EBCDIC coding */
    508 else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphameric */
     510else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphanumeric */
    509511else if ((i = escapes[c - 0x48]) != 0)  c = i;
    510512#endif
     
    723725
    724726    /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
    725     other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,
    726     for Perl compatibility, it is a literal. This code looks a bit odd, but
    727     there used to be some cases other than the default, and there may be again
    728     in future, so I haven't "optimized" it. */
     727    other alphanumeric following \ is an error if PCRE_EXTRA was set;
     728    otherwise, for Perl compatibility, it is a literal. This code looks a bit
     729    odd, but there used to be some cases other than the default, and there may
     730    be again in future, so I haven't "optimized" it. */
    729731
    730732    default:
     
    15071509below and from compile_branch() when checking for an unlimited repeat of a
    15081510group that can match nothing. Note that first_significant_code() skips over
    1509 assertions. If we hit an unclosed bracket, we return "empty" - this means we've
    1510 struck an inner bracket whose current branch will already have been scanned.
     1511backward and negative forward assertions when its final argument is TRUE. If we
     1512hit an unclosed bracket, we return "empty" - this means we've struck an inner
     1513bracket whose current branch will already have been scanned.
    15111514
    15121515Arguments:
     
    15291532
    15301533  c = *code;
     1534
     1535  /* Skip over forward assertions; the other assertions are skipped by
     1536  first_significant_code() with a TRUE final argument. */
     1537
     1538  if (c == OP_ASSERT)
     1539    {
     1540    do code += GET(code, 1); while (*code == OP_ALT);
     1541    c = *code;
     1542    continue;
     1543    }
    15311544
    15321545  /* Groups with zero repeats can of course be empty; skip them. */
     
    17251738
    17261739/* This function is called when the sequence "[:" or "[." or "[=" is
    1727 encountered in a character class. It checks whether this is followed by an
    1728 optional ^ and then a sequence of letters, terminated by a matching ":]" or
    1729 ".]" or "=]".
    1730 
    1731 Argument:
     1740encountered in a character class. It checks whether this is followed by a
     1741sequence of characters terminated by a matching ":]" or ".]" or "=]". If we
     1742reach an unescaped ']' without the special preceding character, return FALSE.
     1743
     1744Originally, this function only recognized a sequence of letters between the
     1745terminators, but it seems that Perl recognizes any sequence of characters,
     1746though of course unknown POSIX names are subsequently rejected. Perl gives an
     1747"Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE
     1748didn't consider this to be a POSIX class. Likewise for [:1234:].
     1749
     1750The problem in trying to be exactly like Perl is in the handling of escapes. We
     1751have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX
     1752class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code
     1753below handles the special case of \], but does not try to do any other escape
     1754processing. This makes it different from Perl for cases such as [:l\ower:]
     1755where Perl recognizes it as the POSIX class "lower" but PCRE does not recognize
     1756"l\ower". This is a lesser evil that not diagnosing bad classes when Perl does,
     1757I think.
     1758
     1759Arguments:
    17321760  ptr      pointer to the initial [
    17331761  endptr   where to return the end pointer
    1734   cd       pointer to compile data
    17351762
    17361763Returns:   TRUE or FALSE
     
    17381765
    17391766static BOOL
    1740 check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd)
     1767check_posix_syntax(const uschar *ptr, const uschar **endptr)
    17411768{
    17421769int terminator;          /* Don't combine these lines; the Solaris cc */
    17431770terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
    1744 if (*(++ptr) == '^') ptr++;
    1745 while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;
    1746 if (*ptr == terminator && ptr[1] == ']')
     1771for (++ptr; *ptr != 0; ptr++)
    17471772  {
    1748   *endptr = ptr;
    1749   return TRUE;
     1773  if (*ptr == '\\' && ptr[1] == ']') ptr++; else
     1774    {
     1775    if (*ptr == ']') return FALSE;
     1776    if (*ptr == terminator && ptr[1] == ']')
     1777      {
     1778      *endptr = ptr;
     1779      return TRUE;
     1780      }
     1781    }
    17501782  }
    17511783return FALSE;
     
    23452377BOOL utf8 = (options & PCRE_UTF8) != 0;
    23462378uschar *class_utf8data;
     2379uschar *class_utf8data_base;
    23472380uschar utf8_char[6];
    23482381#else
     
    23842417  {
    23852418  BOOL negate_class;
     2419  BOOL should_flip_negation;
    23862420  BOOL possessive_quantifier;
    23872421  BOOL is_quantifier;
     
    26072641
    26082642    if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
    2609         check_posix_syntax(ptr, &tempptr, cd))
     2643        check_posix_syntax(ptr, &tempptr))
    26102644      {
    26112645      *errorcodeptr = (ptr[1] == ':')? ERR13 : ERR31;
     
    26322666      }
    26332667
     2668    /* If a class contains a negative special such as \S, we need to flip the
     2669    negation flag at the end, so that support for characters > 255 works
     2670    correctly (they are all included in the class). */
     2671
     2672    should_flip_negation = FALSE;
     2673
    26342674    /* Keep a count of chars with values < 256 so that we can optimize the case
    26352675    of just a single character (as long as it's < 256). However, For higher
     
    26492689    class_utf8 = FALSE;                       /* No chars >= 256 */
    26502690    class_utf8data = code + LINK_SIZE + 2;    /* For UTF-8 items */
     2691    class_utf8data_base = class_utf8data;     /* For resetting in pass 1 */
    26512692#endif
    26522693
     
    26642705        GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */
    26652706        }
     2707
     2708      /* In the pre-compile phase, accumulate the length of any UTF-8 extra
     2709      data and reset the pointer. This is so that very large classes that
     2710      contain a zillion UTF-8 characters no longer overwrite the work space
     2711      (which is on the stack). */
     2712
     2713      if (lengthptr != NULL)
     2714        {
     2715        *lengthptr += class_utf8data - class_utf8data_base;
     2716        class_utf8data = class_utf8data_base;
     2717        }
     2718
    26662719#endif
    26672720
     
    26872740      if (c == '[' &&
    26882741          (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
    2689           check_posix_syntax(ptr, &tempptr, cd))
     2742          check_posix_syntax(ptr, &tempptr))
    26902743        {
    26912744        BOOL local_negate = FALSE;
     
    27042757          {
    27052758          local_negate = TRUE;
     2759          should_flip_negation = TRUE;  /* Note negative special */
    27062760          ptr++;
    27072761          }
     
    27782832        if (*errorcodeptr != 0) goto FAILED;
    27792833
    2780         if (-c == ESC_b) c = '\b';       /* \b is backslash in a class */
     2834        if (-c == ESC_b) c = '\b';       /* \b is backspace in a class */
    27812835        else if (-c == ESC_X) c = 'X';   /* \X is literal X in a class */
    27822836        else if (-c == ESC_R) c = 'R';   /* \R is literal R in a class */
     
    28062860
    28072861            case ESC_D:
     2862            should_flip_negation = TRUE;
    28082863            for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];
    28092864            continue;
     
    28142869
    28152870            case ESC_W:
     2871            should_flip_negation = TRUE;
    28162872            for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];
    28172873            continue;
     
    28232879
    28242880            case ESC_S:
     2881            should_flip_negation = TRUE;
    28252882            for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];
    28262883            classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */
    2827             continue;
    2828 
    2829             case ESC_E: /* Perl ignores an orphan \E */
    28302884            continue;
    28312885
     
    30643118          if (*errorcodeptr != 0) goto FAILED;
    30653119
    3066           /* \b is backslash; \X is literal X; \R is literal R; any other
     3120          /* \b is backspace; \X is literal X; \R is literal R; any other
    30673121          special means the '-' was literal */
    30683122
     
    33283382
    33293383    /* If there are characters with values > 255, we have to compile an
    3330     extended class, with its own opcode. If there are no characters < 256,
    3331     we can omit the bitmap in the actual compiled code. */
     3384    extended class, with its own opcode, unless there was a negated special
     3385    such as \S in the class, because in that case all characters > 255 are in
     3386    the class, so any that were explicitly given as well can be ignored. If
     3387    (when there are explicit characters > 255 that must be listed) there are no
     3388    characters < 256, we can omit the bitmap in the actual compiled code. */
    33323389
    33333390#ifdef SUPPORT_UTF8
    3334     if (class_utf8)
     3391    if (class_utf8 && !should_flip_negation)
    33353392      {
    33363393      *class_utf8data++ = XCL_END;    /* Marks the end of extra data */
     
    33583415#endif
    33593416
    3360     /* If there are no characters > 255, negate the 32-byte map if necessary,
    3361     and copy it into the code vector. If this is the first thing in the branch,
    3362     there can be no first char setting, whatever the repeat count. Any reqbyte
    3363     setting must remain unchanged after any kind of repeat. */
    3364 
     3417    /* If there are no characters > 255, set the opcode to OP_CLASS or
     3418    OP_NCLASS, depending on whether the whole class was negated and whether
     3419    there were negative specials such as \S in the class. Then copy the 32-byte
     3420    map into the code vector, negating it if necessary. */
     3421
     3422    *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
    33653423    if (negate_class)
    33663424      {
    3367       *code++ = OP_NCLASS;
    33683425      if (lengthptr == NULL)    /* Save time in the pre-compile phase */
    33693426        for (c = 0; c < 32; c++) code[c] = ~classbits[c];
     
    33713428    else
    33723429      {
    3373       *code++ = OP_CLASS;
    33743430      memcpy(code, classbits, 32);
    33753431      }
     
    40074063      if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||
    40084064          *tempcode == OP_NOTEXACT)
    4009         tempcode += _pcre_OP_lengths[*tempcode];
     4065        tempcode += _pcre_OP_lengths[*tempcode] +
     4066          ((*tempcode == OP_TYPEEXACT &&
     4067             (tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP))? 2:0);
    40104068      len = code - tempcode;
    40114069      if (len > 0) switch (*tempcode)
     
    42344292            goto FAILED;
    42354293            }
    4236           if (refsign == '-')
     4294          recno = (refsign == '-')?
     4295            cd->bracount - recno + 1 : recno +cd->bracount;
     4296          if (recno <= 0 || recno > cd->final_bracount)
    42374297            {
    4238             recno = cd->bracount - recno + 1;
    4239             if (recno <= 0)
    4240               {
    4241               *errorcodeptr = ERR15;
    4242               goto FAILED;
    4243               }
     4298            *errorcodeptr = ERR15;
     4299            goto FAILED;
    42444300            }
    4245           else recno += cd->bracount;
    42464301          PUT2(code, 2+LINK_SIZE, recno);
    42474302          break;
     
    43154370          }
    43164371
    4317         /* Check for the "name" actually being a subpattern number. */
    4318 
    4319         else if (recno > 0)
     4372        /* Check for the "name" actually being a subpattern number. We are
     4373        in the second pass here, so final_bracount is set. */
     4374
     4375        else if (recno > 0 && recno <= cd->final_bracount)
    43204376          {
    43214377          PUT2(code, 2+LINK_SIZE, recno);
     
    45114567        /* We come here from the Python syntax above that handles both
    45124568        references (?P=name) and recursion (?P>name), as well as falling
    4513         through from the Perl recursion syntax (?&name). */
     4569        through from the Perl recursion syntax (?&name). We also come here from
     4570        the Perl \k<name> or \k'name' back reference syntax and the \k{name}
     4571        .NET syntax. */
    45144572
    45154573        NAMED_REF_OR_RECURSE:
     
    45234581        if (lengthptr != NULL)
    45244582          {
     4583          if (namelen == 0)
     4584            {
     4585            *errorcodeptr = ERR62;
     4586            goto FAILED;
     4587            }
    45254588          if (*ptr != terminator)
    45264589            {
     
    45364599          }
    45374600
    4538         /* In the real compile, seek the name in the table */
     4601        /* In the real compile, seek the name in the table. We check the name
     4602        first, and then check that we have reached the end of the name in the
     4603        table. That way, if the name that is longer than any in the table,
     4604        the comparison will fail without reading beyond the table entry. */
    45394605
    45404606        else
     
    45434609          for (i = 0; i < cd->names_found; i++)
    45444610            {
    4545             if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break;
     4611            if (strncmp((char *)name, (char *)slot+2, namelen) == 0 &&
     4612                slot[2+namelen] == 0)
     4613              break;
    45464614            slot += cd->name_entry_size;
    45474615            }
     
    45804648          const uschar *called;
    45814649
    4582           if ((refsign = *ptr) == '+') ptr++;
     4650          if ((refsign = *ptr) == '+')
     4651            {
     4652            ptr++;
     4653            if ((digitab[*ptr] & ctype_digit) == 0)
     4654              {
     4655              *errorcodeptr = ERR63;
     4656              goto FAILED;
     4657              }
     4658            }
    45834659          else if (refsign == '-')
    45844660            {
     
    57465822uschar cworkspace[COMPILE_WORK_SIZE];
    57475823
    5748 
    57495824/* Set this early so that early errors get offset 0. */
    57505825
     
    59075982is a test for its doing so. */
    59085983
    5909 cd->bracount = 0;
     5984cd->bracount = cd->final_bracount = 0;
    59105985cd->names_found = 0;
    59115986cd->name_entry_size = 0;
     
    59846059*/
    59856060
     6061cd->final_bracount = cd->bracount;  /* Save for checking forward references */
    59866062cd->bracount = 0;
    59876063cd->names_found = 0;
  • chicken/trunk/pcre/pcre_config.c

    r6175 r9133  
    77
    88                       Written by Philip Hazel
    9            Copyright (c) 1997-2007 University of Cambridge
     9           Copyright (c) 1997-2008 University of Cambridge
    1010
    1111-----------------------------------------------------------------------------
  • chicken/trunk/pcre/pcre_dfa_exec.c

    r6175 r9133  
    77
    88                       Written by Philip Hazel
    9            Copyright (c) 1997-2007 University of Cambridge
     9           Copyright (c) 1997-2008 University of Cambridge
    1010
    1111-----------------------------------------------------------------------------
  • chicken/trunk/pcre/pcre_exec.c

    r6175 r9133  
    77
    88                       Written by Philip Hazel
    9            Copyright (c) 1997-2007 University of Cambridge
     9           Copyright (c) 1997-2008 University of Cambridge
    1010
    1111-----------------------------------------------------------------------------
     
    46714671      while (start_match < end_subject &&
    46724672             md->lcc[*start_match] != first_byte)
    4673         start_match++;
     4673        { NEXTCHAR(start_match); }
    46744674    else
    46754675      while (start_match < end_subject && *start_match != first_byte)
    4676         start_match++;
     4676        { NEXTCHAR(start_match); }
    46774677    }
    46784678
     
    46844684      {
    46854685      while (start_match <= end_subject && !WAS_NEWLINE(start_match))
    4686         start_match++;
     4686        { NEXTCHAR(start_match); }
    46874687
    46884688      /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
     
    47054705      {
    47064706      register unsigned int c = *start_match;
    4707       if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
     4707      if ((start_bits[c/8] & (1 << (c&7))) == 0)
     4708        { NEXTCHAR(start_match); }
     4709      else break;
    47084710      }
    47094711    }
  • chicken/trunk/pcre/pcre_fullinfo.c

    r6175 r9133  
    33*************************************************/
    44
    5 /*PCRE is a library of functions to support regular expressions whose syntax
     5/* PCRE is a library of functions to support regular expressions whose syntax
    66and semantics are as close as possible to those of the Perl 5 language.
    77
    88                       Written by Philip Hazel
    9            Copyright (c) 1997-2007 University of Cambridge
     9           Copyright (c) 1997-2008 University of Cambridge
    1010
    1111-----------------------------------------------------------------------------
  • chicken/trunk/pcre/pcre_get.c

    r6175 r9133  
    77
    88                       Written by Philip Hazel
    9            Copyright (c) 1997-2007 University of Cambridge
     9           Copyright (c) 1997-2008 University of Cambridge
    1010
    1111-----------------------------------------------------------------------------
  • chicken/trunk/pcre/pcre_globals.c

    r6175 r9133  
    77
    88                       Written by Philip Hazel
    9            Copyright (c) 1997-2007 University of Cambridge
     9           Copyright (c) 1997-2008 University of Cambridge
    1010
    1111-----------------------------------------------------------------------------
  • chicken/trunk/pcre/pcre_info.c

    r6175 r9133  
    77
    88                       Written by Philip Hazel
    9            Copyright (c) 1997-2007 University of Cambridge
     9           Copyright (c) 1997-2008 University of Cambridge
    1010
    1111-----------------------------------------------------------------------------
  • chicken/trunk/pcre/pcre_internal.h

    r6175 r9133  
    88
    99                       Written by Philip Hazel
    10            Copyright (c) 1997-2007 University of Cambridge
     10           Copyright (c) 1997-2008 University of Cambridge
    1111
    1212-----------------------------------------------------------------------------
     
    7171
    7272#include "config.h"
     73
    7374
    7475/* Standard C headers plus the external interface definition. The only time
     
    368369
    369370#ifndef SUPPORT_UTF8
     371#define NEXTCHAR(p) p++;
    370372#define GETCHAR(c, eptr) c = *eptr;
    371373#define GETCHARTEST(c, eptr) c = *eptr;
     
    376378
    377379#else   /* SUPPORT_UTF8 */
     380
     381/* Advance a character pointer one byte in non-UTF-8 mode and by one character
     382in UTF-8 mode. */
     383
     384#define NEXTCHAR(p) \
     385  p++; \
     386  if (utf8) { while((*p & 0xc0) == 0x80) p++; }
    378387
    379388/* Get the next UTF-8 character, not advancing the pointer. This is called when
     
    876885       ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
    877886       ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
    878        ERR60, ERR61 };
     887       ERR60, ERR61, ERR62, ERR63 };
    879888
    880889/* The real format of the start of the pcre block; the index of names and the
     
    939948  int  names_found;             /* Number of entries so far */
    940949  int  name_entry_size;         /* Size of each entry */
    941   int  bracount;                /* Count of capturing parens */
     950  int  bracount;                /* Count of capturing parens as we compile */
     951  int  final_bracount;          /* Saved value after first pass */
    942952  int  top_backref;             /* Maximum back reference */
    943953  unsigned int backref_map;     /* Bitmap of low back refs */
     
    10411051#define ctype_digit   0x04
    10421052#define ctype_xdigit  0x08
    1043 #define ctype_word    0x10   /* alphameric or '_' */
     1053#define ctype_word    0x10   /* alphanumeric or '_' */
    10441054#define ctype_meta    0x80   /* regexp meta char or zero (end pattern) */
    10451055
  • chicken/trunk/pcre/pcre_maketables.c

    r6175 r9133  
    77
    88                       Written by Philip Hazel
    9            Copyright (c) 1997-2007 University of Cambridge
     9           Copyright (c) 1997-2008 University of Cambridge
    1010
    1111-----------------------------------------------------------------------------
  • chicken/trunk/pcre/pcre_newline.c

    r6175 r9133  
    77
    88                       Written by Philip Hazel
    9            Copyright (c) 1997-2007 University of Cambridge
     9           Copyright (c) 1997-2008 University of Cambridge
    1010
    1111-----------------------------------------------------------------------------
  • chicken/trunk/pcre/pcre_ord2utf8.c

    r6175 r9133  
    77
    88                       Written by Philip Hazel
    9            Copyright (c) 1997-2007 University of Cambridge
     9           Copyright (c) 1997-2008 University of Cambridge
    1010
    1111-----------------------------------------------------------------------------
  • chicken/trunk/pcre/pcre_refcount.c

    r6175 r9133  
    77
    88                       Written by Philip Hazel
    9            Copyright (c) 1997-2007 University of Cambridge
     9           Copyright (c) 1997-2008 University of Cambridge
    1010
    1111-----------------------------------------------------------------------------
  • chicken/trunk/pcre/pcre_study.c

    r6175 r9133  
    77
    88                       Written by Philip Hazel
    9            Copyright (c) 1997-2007 University of Cambridge
     9           Copyright (c) 1997-2008 University of Cambridge
    1010
    1111-----------------------------------------------------------------------------
  • chicken/trunk/pcre/pcre_tables.c

    r6175 r9133  
    77
    88                       Written by Philip Hazel
    9            Copyright (c) 1997-2007 University of Cambridge
     9           Copyright (c) 1997-2008 University of Cambridge
    1010
    1111-----------------------------------------------------------------------------
  • chicken/trunk/pcre/pcre_try_flipped.c

    r6175 r9133  
    77
    88                       Written by Philip Hazel
    9            Copyright (c) 1997-2007 University of Cambridge
     9           Copyright (c) 1997-2008 University of Cambridge
    1010
    1111-----------------------------------------------------------------------------
  • chicken/trunk/pcre/pcre_ucp_searchfuncs.c

    r6175 r9133  
    77
    88                       Written by Philip Hazel
    9            Copyright (c) 1997-2007 University of Cambridge
     9           Copyright (c) 1997-2008 University of Cambridge
    1010
    1111-----------------------------------------------------------------------------
  • chicken/trunk/pcre/pcre_valid_utf8.c

    r6175 r9133  
    77
    88                       Written by Philip Hazel
    9            Copyright (c) 1997-2007 University of Cambridge
     9           Copyright (c) 1997-2008 University of Cambridge
    1010
    1111-----------------------------------------------------------------------------
     
    6363the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were in
    6464the canonical format. Once somebody had pointed out RFC 3629 to me (it
    65 obsoletes 2279), additional restrictions were applies. The values are now
     65obsoletes 2279), additional restrictions were applied. The values are now
    6666limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
    6767subrange 0xd000 to 0xdfff is excluded.
  • chicken/trunk/pcre/pcre_version.c

    r6175 r9133  
    77
    88                       Written by Philip Hazel
    9            Copyright (c) 1997-2007 University of Cambridge
     9           Copyright (c) 1997-2008 University of Cambridge
    1010
    1111-----------------------------------------------------------------------------
  • chicken/trunk/pcre/pcre_xclass.c

    r6175 r9133  
    77
    88                       Written by Philip Hazel
    9            Copyright (c) 1997-2007 University of Cambridge
     9           Copyright (c) 1997-2008 University of Cambridge
    1010
    1111-----------------------------------------------------------------------------
  • chicken/trunk/pcre/ucptable.h

    r6175 r9133  
    540540  { 0x21000294, 0x1c000000 },
    541541  { 0x21800295, 0x1400001a },
    542   { 0x218002b0, 0x18000011 },
     542  { 0x218002b0, 0x18000008 },
     543  { 0x098002b9, 0x18000008 },
    543544  { 0x098002c2, 0x60000003 },
    544545  { 0x098002c6, 0x1800000b },
     
    10401041  { 0x09800600, 0x04000003 },
    10411042  { 0x0000060b, 0x5c000000 },
    1042   { 0x0980060c, 0x54000001 },
     1043  { 0x0900060c, 0x54000000 },
     1044  { 0x0000060d, 0x54000000 },
    10431045  { 0x0080060e, 0x68000001 },
    10441046  { 0x00800610, 0x30000005 },
    10451047  { 0x0900061b, 0x54000000 },
    1046   { 0x0080061e, 0x54000001 },
     1048  { 0x0000061e, 0x54000000 },
     1049  { 0x0900061f, 0x54000000 },
    10471050  { 0x00800621, 0x1c000019 },
    10481051  { 0x09000640, 0x18000000 },
    10491052  { 0x00800641, 0x1c000009 },
    1050   { 0x1b80064b, 0x30000013 },
     1053  { 0x1b80064b, 0x3000000a },
     1054  { 0x00800656, 0x30000008 },
    10511055  { 0x09800660, 0x34000009 },
    10521056  { 0x0080066a, 0x54000003 },
     
    10751079  { 0x31800712, 0x1c00001d },
    10761080  { 0x31800730, 0x3000001a },
    1077   { 0x3180074d, 0x1c000020 },
     1081  { 0x3180074d, 0x1c000002 },
     1082  { 0x00800750, 0x1c00001d },
    10781083  { 0x37800780, 0x1c000025 },
    10791084  { 0x378007a6, 0x3000000a },
     
    14611466  { 0x1f8017e0, 0x34000009 },
    14621467  { 0x1f8017f0, 0x3c000009 },
    1463   { 0x25801800, 0x54000005 },
     1468  { 0x25801800, 0x54000001 },
     1469  { 0x09801802, 0x54000001 },
     1470  { 0x25001804, 0x54000000 },
     1471  { 0x09001805, 0x54000000 },
    14641472  { 0x25001806, 0x44000000 },
    14651473  { 0x25801807, 0x54000003 },
     
    15141522  { 0x3d801b6b, 0x30000008 },
    15151523  { 0x3d801b74, 0x68000008 },
    1516   { 0x21801d00, 0x1400002b },
    1517   { 0x21801d2c, 0x18000035 },
    1518   { 0x21801d62, 0x14000015 },
     1524  { 0x21801d00, 0x14000025 },
     1525  { 0x13801d26, 0x14000004 },
     1526  { 0x0c001d2b, 0x14000000 },
     1527  { 0x21801d2c, 0x18000030 },
     1528  { 0x13801d5d, 0x18000004 },
     1529  { 0x21801d62, 0x14000003 },
     1530  { 0x13801d66, 0x14000004 },
     1531  { 0x21801d6b, 0x1400000c },
    15191532  { 0x0c001d78, 0x18000000 },
    15201533  { 0x21801d79, 0x14000003 },
    15211534  { 0x21001d7d, 0x14000ee6 },
    15221535  { 0x21801d7e, 0x1400001c },
    1523   { 0x21801d9b, 0x18000024 },
     1536  { 0x21801d9b, 0x18000023 },
     1537  { 0x13001dbf, 0x18000000 },
    15241538  { 0x1b801dc0, 0x3000000a },
    15251539  { 0x1b801dfe, 0x30000001 },
     
    19831997  { 0x13801ffd, 0x60000001 },
    19841998  { 0x09802000, 0x7400000a },
    1985   { 0x0980200b, 0x04000004 },
     1999  { 0x0900200b, 0x04000000 },
     2000  { 0x1b80200c, 0x04000001 },
     2001  { 0x0980200e, 0x04000001 },
    19862002  { 0x09802010, 0x44000005 },
    19872003  { 0x09802016, 0x54000001 },
     
    26162632  { 0x1d8030a1, 0x1c000059 },
    26172633  { 0x090030fb, 0x54000000 },
    2618   { 0x098030fc, 0x18000002 },
     2634  { 0x090030fc, 0x18000000 },
     2635  { 0x1d8030fd, 0x18000001 },
    26192636  { 0x1d0030ff, 0x1c000000 },
    26202637  { 0x03803105, 0x1c000027 },
     
    26312648  { 0x09003250, 0x68000000 },
    26322649  { 0x09803251, 0x3c00000e },
    2633   { 0x17803260, 0x6800001f },
     2650  { 0x17803260, 0x6800001d },
     2651  { 0x0980327e, 0x68000001 },
    26342652  { 0x09803280, 0x3c000009 },
    26352653  { 0x0980328a, 0x68000026 },
     
    26792697  { 0x1980fb40, 0x1c000001 },
    26802698  { 0x1980fb43, 0x1c000001 },
    2681   { 0x1980fb46, 0x1c00006b },
     2699  { 0x1980fb46, 0x1c000009 },
     2700  { 0x0080fb50, 0x1c000061 },
    26822701  { 0x0080fbd3, 0x1c00016a },
    26832702  { 0x0900fd3e, 0x58000000 },
     
    29452964  { 0x0d01044e, 0x1400ffd8 },
    29462965  { 0x0d01044f, 0x1400ffd8 },
    2947   { 0x2e810450, 0x1c00004d },
     2966  { 0x2e810450, 0x1c00002f },
     2967  { 0x2c810480, 0x1c00001d },
    29482968  { 0x2c8104a0, 0x34000009 },
    29492969  { 0x0b810800, 0x1c000005 },
Note: See TracChangeset for help on using the changeset viewer.