source: project/release/3/regex-extras/trunk/pcre_internal.h @ 8400

Last change on this file since 8400 was 8400, checked in by Kon Lovett, 12 years ago

Save.

File size: 44.9 KB
Line 
1/*************************************************
2*      Perl-Compatible Regular Expressions       *
3*************************************************/
4
5
6/* PCRE is a library of functions to support regular expressions whose syntax
7and semantics are as close as possible to those of the Perl 5 language.
8
9                       Written by Philip Hazel
10           Copyright (c) 1997-2007 University of Cambridge
11
12-----------------------------------------------------------------------------
13Redistribution and use in source and binary forms, with or without
14modification, are permitted provided that the following conditions are met:
15
16    * Redistributions of source code must retain the above copyright notice,
17      this list of conditions and the following disclaimer.
18
19    * Redistributions in binary form must reproduce the above copyright
20      notice, this list of conditions and the following disclaimer in the
21      documentation and/or other materials provided with the distribution.
22
23    * Neither the name of the University of Cambridge nor the names of its
24      contributors may be used to endorse or promote products derived from
25      this software without specific prior written permission.
26
27THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37POSSIBILITY OF SUCH DAMAGE.
38-----------------------------------------------------------------------------
39*/
40
41/* This header contains definitions that are shared between the different
42modules, but which are not relevant to the exported API. This includes some
43functions whose names all begin with "_pcre_". */
44
45#ifndef PCRE_INTERNAL_H
46#define PCRE_INTERNAL_H
47
48/* Define DEBUG to get debugging output on stdout. */
49
50#if 0
51#define DEBUG
52#endif
53
54/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
55inline, and there are *still* stupid compilers about that don't like indented
56pre-processor statements, or at least there were when I first wrote this. After
57all, it had only been about 10 years then...
58
59It turns out that the Mac Debugging.h header also defines the macro DPRINTF, so
60be absolutely sure we get our version. */
61
62#undef DPRINTF
63#ifdef DEBUG
64#define DPRINTF(p) printf p
65#else
66#define DPRINTF(p) /* Nothing */
67#endif
68
69
70/* Get the definitions provided by running "configure" */
71
72#include "config.h"
73
74/* Standard C headers plus the external interface definition. The only time
75setjmp and stdarg are used is when NO_RECURSE is set. */
76
77#include <ctype.h>
78#include <limits.h>
79#include <setjmp.h>
80#include <stdarg.h>
81#include <stddef.h>
82#include <stdio.h>
83#include <stdlib.h>
84#include <string.h>
85
86/* When compiling a DLL for Windows, the exported symbols have to be declared
87using some MS magic. I found some useful information on this web page:
88http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the
89information there, using __declspec(dllexport) without "extern" we have a
90definition; with "extern" we have a declaration. The settings here override the
91setting in pcre.h (which is included below); it defines only PCRE_EXP_DECL,
92which is all that is needed for applications (they just import the symbols). We
93use:
94
95  PCRE_EXP_DECL       for declarations
96  PCRE_EXP_DEFN       for definitions of exported functions
97  PCRE_EXP_DATA_DEFN  for definitions of exported variables
98
99The reason for the two DEFN macros is that in non-Windows environments, one
100does not want to have "extern" before variable definitions because it leads to
101compiler warnings. So we distinguish between functions and variables. In
102Windows, the two should always be the same.
103
104The reason for wrapping this in #ifndef PCRE_EXP_DECL is so that pcretest,
105which is an application, but needs to import this file in order to "peek" at
106internals, can #include pcre.h first to get an application's-eye view.
107
108In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon,
109special-purpose environments) might want to stick other stuff in front of
110exported symbols. That's why, in the non-Windows case, we set PCRE_EXP_DEFN and
111PCRE_EXP_DATA_DEFN only if they are not already set. */
112
113#ifndef PCRE_EXP_DECL
114#  ifdef _WIN32
115#    ifndef PCRE_STATIC
116#      define PCRE_EXP_DECL       extern __declspec(dllexport)
117#      define PCRE_EXP_DEFN       __declspec(dllexport)
118#      define PCRE_EXP_DATA_DEFN  __declspec(dllexport)
119#    else
120#      define PCRE_EXP_DECL       extern
121#      define PCRE_EXP_DEFN
122#      define PCRE_EXP_DATA_DEFN
123#    endif
124#  else
125#    ifdef __cplusplus
126#      define PCRE_EXP_DECL       extern "C"
127#    else
128#      define PCRE_EXP_DECL       extern
129#    endif
130#    ifndef PCRE_EXP_DEFN
131#      define PCRE_EXP_DEFN       PCRE_EXP_DECL
132#    endif
133#    ifndef PCRE_EXP_DATA_DEFN
134#      define PCRE_EXP_DATA_DEFN
135#    endif
136#  endif
137#endif
138
139/* We need to have types that specify unsigned 16-bit and 32-bit integers. We
140cannot determine these outside the compilation (e.g. by running a program as
141part of "configure") because PCRE is often cross-compiled for use on other
142systems. Instead we make use of the maximum sizes that are available at
143preprocessor time in standard C environments. */
144
145#if USHRT_MAX == 65535
146  typedef unsigned short pcre_uint16;
147#elif UINT_MAX == 65535
148  typedef unsigned int pcre_uint16;
149#else
150  #error Cannot determine a type for 16-bit unsigned integers
151#endif
152
153#if UINT_MAX == 4294967295
154  typedef unsigned int pcre_uint32;
155#elif ULONG_MAX == 4294967295
156  typedef unsigned long int pcre_uint32;
157#else
158  #error Cannot determine a type for 32-bit unsigned integers
159#endif
160
161/* All character handling must be done as unsigned characters. Otherwise there
162are problems with top-bit-set characters and functions such as isspace().
163However, we leave the interface to the outside world as char *, because that
164should make things easier for callers. We define a short type for unsigned char
165to save lots of typing. I tried "uchar", but it causes problems on Digital
166Unix, where it is defined in sys/types, so use "uschar" instead. */
167
168typedef unsigned char uschar;
169
170/* This is an unsigned int value that no character can ever have. UTF-8
171characters only go up to 0x7fffffff (though Unicode doesn't go beyond
1720x0010ffff). */
173
174#define NOTACHAR 0xffffffff
175
176/* PCRE is able to support several different kinds of newline (CR, LF, CRLF,
177"any" and "anycrlf" at present). The following macros are used to package up
178testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various
179modules to indicate in which datablock the parameters exist, and what the
180start/end of string field names are. */
181
182#define NLTYPE_FIXED    0     /* Newline is a fixed length string */
183#define NLTYPE_ANY      1     /* Newline is any Unicode line ending */
184#define NLTYPE_ANYCRLF  2     /* Newline is CR, LF, or CRLF */
185
186/* This macro checks for a newline at the given position */
187
188#define IS_NEWLINE(p) \
189  ((NLBLOCK->nltype != NLTYPE_FIXED)? \
190    ((p) < NLBLOCK->PSEND && \
191     _pcre_is_newline((p), NLBLOCK->nltype, NLBLOCK->PSEND, &(NLBLOCK->nllen),\
192       utf8)) \
193    : \
194    ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
195     (p)[0] == NLBLOCK->nl[0] && \
196     (NLBLOCK->nllen == 1 || (p)[1] == NLBLOCK->nl[1]) \
197    ) \
198  )
199
200/* This macro checks for a newline immediately preceding the given position */
201
202#define WAS_NEWLINE(p) \
203  ((NLBLOCK->nltype != NLTYPE_FIXED)? \
204    ((p) > NLBLOCK->PSSTART && \
205     _pcre_was_newline((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \
206       &(NLBLOCK->nllen), utf8)) \
207    : \
208    ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
209     (p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \
210     (NLBLOCK->nllen == 1 || (p)[-NLBLOCK->nllen+1] == NLBLOCK->nl[1]) \
211    ) \
212  )
213
214/* When PCRE is compiled as a C++ library, the subject pointer can be replaced
215with a custom type. This makes it possible, for example, to allow pcre_exec()
216to process subject strings that are discontinuous by using a smart pointer
217class. It must always be possible to inspect all of the subject string in
218pcre_exec() because of the way it backtracks. Two macros are required in the
219normal case, for sign-unspecified and unsigned char pointers. The former is
220used for the external interface and appears in pcre.h, which is why its name
221must begin with PCRE_. */
222
223#ifdef CUSTOM_SUBJECT_PTR
224#define PCRE_SPTR CUSTOM_SUBJECT_PTR
225#define USPTR CUSTOM_SUBJECT_PTR
226#else
227#define PCRE_SPTR const char *
228#define USPTR const unsigned char *
229#endif
230
231
232
233/* Include the public PCRE header and the definitions of UCP character property
234values. */
235
236#include "pcre.h"
237#include "ucp.h"
238
239/* When compiling for use with the Virtual Pascal compiler, these functions
240need to have their names changed. PCRE must be compiled with the -DVPCOMPAT
241option on the command line. */
242
243#ifdef VPCOMPAT
244#define strlen(s)        _strlen(s)
245#define strncmp(s1,s2,m) _strncmp(s1,s2,m)
246#define memcmp(s,c,n)    _memcmp(s,c,n)
247#define memcpy(d,s,n)    _memcpy(d,s,n)
248#define memmove(d,s,n)   _memmove(d,s,n)
249#define memset(s,c,n)    _memset(s,c,n)
250#else  /* VPCOMPAT */
251
252/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(),
253define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY
254is set. Otherwise, include an emulating function for those systems that have
255neither (there some non-Unix environments where this is the case). */
256
257#ifndef HAVE_MEMMOVE
258#undef  memmove        /* some systems may have a macro */
259#ifdef HAVE_BCOPY
260#define memmove(a, b, c) bcopy(b, a, c)
261#else  /* HAVE_BCOPY */
262static void *
263pcre_memmove(void *d, const void *s, size_t n)
264{
265size_t i;
266unsigned char *dest = (unsigned char *)d;
267const unsigned char *src = (const unsigned char *)s;
268if (dest > src)
269  {
270  dest += n;
271  src += n;
272  for (i = 0; i < n; ++i) *(--dest) = *(--src);
273  return (void *)dest;
274  }
275else
276  {
277  for (i = 0; i < n; ++i) *dest++ = *src++;
278  return (void *)(dest - n);
279  }
280}
281#define memmove(a, b, c) pcre_memmove(a, b, c)
282#endif   /* not HAVE_BCOPY */
283#endif   /* not HAVE_MEMMOVE */
284#endif   /* not VPCOMPAT */
285
286
287/* PCRE keeps offsets in its compiled code as 2-byte quantities (always stored
288in big-endian order) by default. These are used, for example, to link from the
289start of a subpattern to its alternatives and its end. The use of 2 bytes per
290offset limits the size of the compiled regex to around 64K, which is big enough
291for almost everybody. However, I received a request for an even bigger limit.
292For this reason, and also to make the code easier to maintain, the storing and
293loading of offsets from the byte string is now handled by the macros that are
294defined here.
295
296The macros are controlled by the value of LINK_SIZE. This defaults to 2 in
297the config.h file, but can be overridden by using -D on the command line. This
298is automated on Unix systems via the "configure" command. */
299
300#if LINK_SIZE == 2
301
302#define PUT(a,n,d)   \
303  (a[n] = (d) >> 8), \
304  (a[(n)+1] = (d) & 255)
305
306#define GET(a,n) \
307  (((a)[n] << 8) | (a)[(n)+1])
308
309#define MAX_PATTERN_SIZE (1 << 16)
310
311
312#elif LINK_SIZE == 3
313
314#define PUT(a,n,d)       \
315  (a[n] = (d) >> 16),    \
316  (a[(n)+1] = (d) >> 8), \
317  (a[(n)+2] = (d) & 255)
318
319#define GET(a,n) \
320  (((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2])
321
322#define MAX_PATTERN_SIZE (1 << 24)
323
324
325#elif LINK_SIZE == 4
326
327#define PUT(a,n,d)        \
328  (a[n] = (d) >> 24),     \
329  (a[(n)+1] = (d) >> 16), \
330  (a[(n)+2] = (d) >> 8),  \
331  (a[(n)+3] = (d) & 255)
332
333#define GET(a,n) \
334  (((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3])
335
336#define MAX_PATTERN_SIZE (1 << 30)   /* Keep it positive */
337
338
339#else
340#error LINK_SIZE must be either 2, 3, or 4
341#endif
342
343
344/* Convenience macro defined in terms of the others */
345
346#define PUTINC(a,n,d)   PUT(a,n,d), a += LINK_SIZE
347
348
349/* PCRE uses some other 2-byte quantities that do not change when the size of
350offsets changes. There are used for repeat counts and for other things such as
351capturing parenthesis numbers in back references. */
352
353#define PUT2(a,n,d)   \
354  a[n] = (d) >> 8; \
355  a[(n)+1] = (d) & 255
356
357#define GET2(a,n) \
358  (((a)[n] << 8) | (a)[(n)+1])
359
360#define PUT2INC(a,n,d)  PUT2(a,n,d), a += 2
361
362
363/* When UTF-8 encoding is being used, a character is no longer just a single
364byte. The macros for character handling generate simple sequences when used in
365byte-mode, and more complicated ones for UTF-8 characters. BACKCHAR should
366never be called in byte mode. To make sure it can never even appear when UTF-8
367support is omitted, we don't even define it. */
368
369#ifndef SUPPORT_UTF8
370#define GETCHAR(c, eptr) c = *eptr;
371#define GETCHARTEST(c, eptr) c = *eptr;
372#define GETCHARINC(c, eptr) c = *eptr++;
373#define GETCHARINCTEST(c, eptr) c = *eptr++;
374#define GETCHARLEN(c, eptr, len) c = *eptr;
375/* #define BACKCHAR(eptr) */
376
377#else   /* SUPPORT_UTF8 */
378
379/* Get the next UTF-8 character, not advancing the pointer. This is called when
380we know we are in UTF-8 mode. */
381
382#define GETCHAR(c, eptr) \
383  c = *eptr; \
384  if (c >= 0xc0) \
385    { \
386    int gcii; \
387    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
388    int gcss = 6*gcaa; \
389    c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
390    for (gcii = 1; gcii <= gcaa; gcii++) \
391      { \
392      gcss -= 6; \
393      c |= (eptr[gcii] & 0x3f) << gcss; \
394      } \
395    }
396
397/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
398pointer. */
399
400#define GETCHARTEST(c, eptr) \
401  c = *eptr; \
402  if (utf8 && c >= 0xc0) \
403    { \
404    int gcii; \
405    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
406    int gcss = 6*gcaa; \
407    c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
408    for (gcii = 1; gcii <= gcaa; gcii++) \
409      { \
410      gcss -= 6; \
411      c |= (eptr[gcii] & 0x3f) << gcss; \
412      } \
413    }
414
415/* Get the next UTF-8 character, advancing the pointer. This is called when we
416know we are in UTF-8 mode. */
417
418#define GETCHARINC(c, eptr) \
419  c = *eptr++; \
420  if (c >= 0xc0) \
421    { \
422    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
423    int gcss = 6*gcaa; \
424    c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
425    while (gcaa-- > 0) \
426      { \
427      gcss -= 6; \
428      c |= (*eptr++ & 0x3f) << gcss; \
429      } \
430    }
431
432/* Get the next character, testing for UTF-8 mode, and advancing the pointer */
433
434#define GETCHARINCTEST(c, eptr) \
435  c = *eptr++; \
436  if (utf8 && c >= 0xc0) \
437    { \
438    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
439    int gcss = 6*gcaa; \
440    c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
441    while (gcaa-- > 0) \
442      { \
443      gcss -= 6; \
444      c |= (*eptr++ & 0x3f) << gcss; \
445      } \
446    }
447
448/* Get the next UTF-8 character, not advancing the pointer, incrementing length
449if there are extra bytes. This is called when we know we are in UTF-8 mode. */
450
451#define GETCHARLEN(c, eptr, len) \
452  c = *eptr; \
453  if (c >= 0xc0) \
454    { \
455    int gcii; \
456    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
457    int gcss = 6*gcaa; \
458    c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
459    for (gcii = 1; gcii <= gcaa; gcii++) \
460      { \
461      gcss -= 6; \
462      c |= (eptr[gcii] & 0x3f) << gcss; \
463      } \
464    len += gcaa; \
465    }
466
467/* If the pointer is not at the start of a character, move it back until
468it is. This is called only in UTF-8 mode - we don't put a test within the macro
469because almost all calls are already within a block of UTF-8 only code. */
470
471#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--
472
473#endif
474
475
476/* In case there is no definition of offsetof() provided - though any proper
477Standard C system should have one. */
478
479#ifndef offsetof
480#define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field))
481#endif
482
483
484/* These are the public options that can change during matching. */
485
486#define PCRE_IMS (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL)
487
488/* Private flags containing information about the compiled regex. They used to
489live at the top end of the options word, but that got almost full, so now they
490are in a 16-bit flags word. */
491
492#define PCRE_NOPARTIAL     0x0001  /* can't use partial with this regex */
493#define PCRE_FIRSTSET      0x0002  /* first_byte is set */
494#define PCRE_REQCHSET      0x0004  /* req_byte is set */
495#define PCRE_STARTLINE     0x0008  /* start after \n for multiline */
496#define PCRE_JCHANGED      0x0010  /* j option used in regex */
497#define PCRE_HASCRORLF     0x0020  /* explicit \r or \n in pattern */
498
499/* Options for the "extra" block produced by pcre_study(). */
500
501#define PCRE_STUDY_MAPPED   0x01     /* a map of starting chars exists */
502
503/* Masks for identifying the public options that are permitted at compile
504time, run time, or study time, respectively. */
505
506#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY| \
507                           PCRE_NEWLINE_ANYCRLF)
508
509#define PUBLIC_OPTIONS \
510  (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
511   PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
512   PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
513   PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)
514
515#define PUBLIC_EXEC_OPTIONS \
516  (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
517   PCRE_PARTIAL|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)
518
519#define PUBLIC_DFA_EXEC_OPTIONS \
520  (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
521   PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART|PCRE_NEWLINE_BITS| \
522   PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)
523
524#define PUBLIC_STUDY_OPTIONS 0   /* None defined */
525
526/* Magic number to provide a small check against being handed junk. Also used
527to detect whether a pattern was compiled on a host of different endianness. */
528
529#define MAGIC_NUMBER  0x50435245UL   /* 'PCRE' */
530
531/* Negative values for the firstchar and reqchar variables */
532
533#define REQ_UNSET (-2)
534#define REQ_NONE  (-1)
535
536/* The maximum remaining length of subject we are prepared to search for a
537req_byte match. */
538
539#define REQ_BYTE_MAX 1000
540
541/* Flags added to firstbyte or reqbyte; a "non-literal" item is either a
542variable-length repeat, or a anything other than literal characters. */
543
544#define REQ_CASELESS 0x0100    /* indicates caselessness */
545#define REQ_VARY     0x0200    /* reqbyte followed non-literal item */
546
547/* Miscellaneous definitions */
548
549typedef int BOOL;
550
551#define FALSE   0
552#define TRUE    1
553
554/* Escape items that are just an encoding of a particular data value. */
555
556#ifndef ESC_e
557#define ESC_e 27
558#endif
559
560#ifndef ESC_f
561#define ESC_f '\f'
562#endif
563
564#ifndef ESC_n
565#define ESC_n '\n'
566#endif
567
568#ifndef ESC_r
569#define ESC_r '\r'
570#endif
571
572/* We can't officially use ESC_t because it is a POSIX reserved identifier
573(presumably because of all the others like size_t). */
574
575#ifndef ESC_tee
576#define ESC_tee '\t'
577#endif
578
579/* Codes for different types of Unicode property */
580
581#define PT_ANY        0    /* Any property - matches all chars */
582#define PT_LAMP       1    /* L& - the union of Lu, Ll, Lt */
583#define PT_GC         2    /* General characteristic (e.g. L) */
584#define PT_PC         3    /* Particular characteristic (e.g. Lu) */
585#define PT_SC         4    /* Script (e.g. Han) */
586
587/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
588contain UTF-8 characters with values greater than 255. */
589
590#define XCL_NOT    0x01    /* Flag: this is a negative class */
591#define XCL_MAP    0x02    /* Flag: a 32-byte map is present */
592
593#define XCL_END       0    /* Marks end of individual items */
594#define XCL_SINGLE    1    /* Single item (one multibyte char) follows */
595#define XCL_RANGE     2    /* A range (two multibyte chars) follows */
596#define XCL_PROP      3    /* Unicode property (2-byte property code follows) */
597#define XCL_NOTPROP   4    /* Unicode inverted property (ditto) */
598
599/* These are escaped items that aren't just an encoding of a particular data
600value such as \n. They must have non-zero values, as check_escape() returns
601their negation. Also, they must appear in the same order as in the opcode
602definitions below, up to ESC_z. There's a dummy for OP_ANY because it
603corresponds to "." rather than an escape sequence. The final one must be
604ESC_REF as subsequent values are used for backreferences (\1, \2, \3, etc).
605There are two tests in the code for an escape greater than ESC_b and less than
606ESC_Z to detect the types that may be repeated. These are the types that
607consume characters. If any new escapes are put in between that don't consume a
608character, that code will have to change. */
609
610enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
611       ESC_W, ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H, ESC_h,
612       ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_k, ESC_REF };
613
614
615/* Opcode table: Starting from 1 (i.e. after OP_END), the values up to
616OP_EOD must correspond in order to the list of escapes immediately above.
617
618*** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions
619that follow must also be updated to match. There is also a table called
620"coptable" in pcre_dfa_exec.c that must be updated. */
621
622enum {
623  OP_END,            /* 0 End of pattern */
624
625  /* Values corresponding to backslashed metacharacters */
626
627  OP_SOD,            /* 1 Start of data: \A */
628  OP_SOM,            /* 2 Start of match (subject + offset): \G */
629  OP_SET_SOM,        /* 3 Set start of match (\K) */
630  OP_NOT_WORD_BOUNDARY,  /*  4 \B */
631  OP_WORD_BOUNDARY,      /*  5 \b */
632  OP_NOT_DIGIT,          /*  6 \D */
633  OP_DIGIT,              /*  7 \d */
634  OP_NOT_WHITESPACE,     /*  8 \S */
635  OP_WHITESPACE,         /*  9 \s */
636  OP_NOT_WORDCHAR,       /* 10 \W */
637  OP_WORDCHAR,           /* 11 \w */
638  OP_ANY,            /* 12 Match any character */
639  OP_ANYBYTE,        /* 13 Match any byte (\C); different to OP_ANY for UTF-8 */
640  OP_NOTPROP,        /* 14 \P (not Unicode property) */
641  OP_PROP,           /* 15 \p (Unicode property) */
642  OP_ANYNL,          /* 16 \R (any newline sequence) */
643  OP_NOT_HSPACE,     /* 17 \H (not horizontal whitespace) */
644  OP_HSPACE,         /* 18 \h (horizontal whitespace) */
645  OP_NOT_VSPACE,     /* 19 \V (not vertical whitespace) */
646  OP_VSPACE,         /* 20 \v (vertical whitespace) */
647  OP_EXTUNI,         /* 21 \X (extended Unicode sequence */
648  OP_EODN,           /* 22 End of data or \n at end of data: \Z. */
649  OP_EOD,            /* 23 End of data: \z */
650
651  OP_OPT,            /* 24 Set runtime options */
652  OP_CIRC,           /* 25 Start of line - varies with multiline switch */
653  OP_DOLL,           /* 26 End of line - varies with multiline switch */
654  OP_CHAR,           /* 27 Match one character, casefully */
655  OP_CHARNC,         /* 28 Match one character, caselessly */
656  OP_NOT,            /* 29 Match one character, not the following one */
657
658  OP_STAR,           /* 30 The maximizing and minimizing versions of */
659  OP_MINSTAR,        /* 31 these six opcodes must come in pairs, with */
660  OP_PLUS,           /* 32 the minimizing one second. */
661  OP_MINPLUS,        /* 33 This first set applies to single characters.*/
662  OP_QUERY,          /* 34 */
663  OP_MINQUERY,       /* 35 */
664
665  OP_UPTO,           /* 36 From 0 to n matches */
666  OP_MINUPTO,        /* 37 */
667  OP_EXACT,          /* 38 Exactly n matches */
668
669  OP_POSSTAR,        /* 39 Possessified star */
670  OP_POSPLUS,        /* 40 Possessified plus */
671  OP_POSQUERY,       /* 41 Posesssified query */
672  OP_POSUPTO,        /* 42 Possessified upto */
673
674  OP_NOTSTAR,        /* 43 The maximizing and minimizing versions of */
675  OP_NOTMINSTAR,     /* 44 these six opcodes must come in pairs, with */
676  OP_NOTPLUS,        /* 45 the minimizing one second. They must be in */
677  OP_NOTMINPLUS,     /* 46 exactly the same order as those above. */
678  OP_NOTQUERY,       /* 47 This set applies to "not" single characters. */
679  OP_NOTMINQUERY,    /* 48 */
680
681  OP_NOTUPTO,        /* 49 From 0 to n matches */
682  OP_NOTMINUPTO,     /* 50 */
683  OP_NOTEXACT,       /* 51 Exactly n matches */
684
685  OP_NOTPOSSTAR,     /* 52 Possessified versions */
686  OP_NOTPOSPLUS,     /* 53 */
687  OP_NOTPOSQUERY,    /* 54 */
688  OP_NOTPOSUPTO,     /* 55 */
689
690  OP_TYPESTAR,       /* 56 The maximizing and minimizing versions of */
691  OP_TYPEMINSTAR,    /* 57 these six opcodes must come in pairs, with */
692  OP_TYPEPLUS,       /* 58 the minimizing one second. These codes must */
693  OP_TYPEMINPLUS,    /* 59 be in exactly the same order as those above. */
694  OP_TYPEQUERY,      /* 60 This set applies to character types such as \d */
695  OP_TYPEMINQUERY,   /* 61 */
696
697  OP_TYPEUPTO,       /* 62 From 0 to n matches */
698  OP_TYPEMINUPTO,    /* 63 */
699  OP_TYPEEXACT,      /* 64 Exactly n matches */
700
701  OP_TYPEPOSSTAR,    /* 65 Possessified versions */
702  OP_TYPEPOSPLUS,    /* 66 */
703  OP_TYPEPOSQUERY,   /* 67 */
704  OP_TYPEPOSUPTO,    /* 68 */
705
706  OP_CRSTAR,         /* 69 The maximizing and minimizing versions of */
707  OP_CRMINSTAR,      /* 70 all these opcodes must come in pairs, with */
708  OP_CRPLUS,         /* 71 the minimizing one second. These codes must */
709  OP_CRMINPLUS,      /* 72 be in exactly the same order as those above. */
710  OP_CRQUERY,        /* 73 These are for character classes and back refs */
711  OP_CRMINQUERY,     /* 74 */
712  OP_CRRANGE,        /* 75 These are different to the three sets above. */
713  OP_CRMINRANGE,     /* 76 */
714
715  OP_CLASS,          /* 77 Match a character class, chars < 256 only */
716  OP_NCLASS,         /* 78 Same, but the bitmap was created from a negative
717                           class - the difference is relevant only when a UTF-8
718                           character > 255 is encountered. */
719
720  OP_XCLASS,         /* 79 Extended class for handling UTF-8 chars within the
721                           class. This does both positive and negative. */
722
723  OP_REF,            /* 80 Match a back reference */
724  OP_RECURSE,        /* 81 Match a numbered subpattern (possibly recursive) */
725  OP_CALLOUT,        /* 82 Call out to external function if provided */
726
727  OP_ALT,            /* 83 Start of alternation */
728  OP_KET,            /* 84 End of group that doesn't have an unbounded repeat */
729  OP_KETRMAX,        /* 85 These two must remain together and in this */
730  OP_KETRMIN,        /* 86 order. They are for groups the repeat for ever. */
731
732  /* The assertions must come before BRA, CBRA, ONCE, and COND.*/
733
734  OP_ASSERT,         /* 87 Positive lookahead */
735  OP_ASSERT_NOT,     /* 88 Negative lookahead */
736  OP_ASSERTBACK,     /* 89 Positive lookbehind */
737  OP_ASSERTBACK_NOT, /* 90 Negative lookbehind */
738  OP_REVERSE,        /* 91 Move pointer back - used in lookbehind assertions */
739
740  /* ONCE, BRA, CBRA, and COND must come after the assertions, with ONCE first,
741  as there's a test for >= ONCE for a subpattern that isn't an assertion. */
742
743  OP_ONCE,           /* 92 Atomic group */
744  OP_BRA,            /* 93 Start of non-capturing bracket */
745  OP_CBRA,           /* 94 Start of capturing bracket */
746  OP_COND,           /* 95 Conditional group */
747
748  /* These three must follow the previous three, in the same order. There's a
749  check for >= SBRA to distinguish the two sets. */
750
751  OP_SBRA,           /* 96 Start of non-capturing bracket, check empty  */
752  OP_SCBRA,          /* 97 Start of capturing bracket, check empty */
753  OP_SCOND,          /* 98 Conditional group, check empty */
754
755  OP_CREF,           /* 99 Used to hold a capture number as condition */
756  OP_RREF,           /* 100 Used to hold a recursion number as condition */
757  OP_DEF,            /* 101 The DEFINE condition */
758
759  OP_BRAZERO,        /* 102 These two must remain together and in this */
760  OP_BRAMINZERO,     /* 103 order. */
761
762  /* These are backtracking control verbs */
763
764  OP_PRUNE,          /* 104 */
765  OP_SKIP,           /* 105 */
766  OP_THEN,           /* 106 */
767  OP_COMMIT,         /* 107 */
768
769  /* These are forced failure and success verbs */
770
771  OP_FAIL,           /* 108 */
772  OP_ACCEPT          /* 109 */
773};
774
775
776/* This macro defines textual names for all the opcodes. These are used only
777for debugging. The macro is referenced only in pcre_printint.c. */
778
779#define OP_NAME_LIST \
780  "End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d",         \
781  "\\S", "\\s", "\\W", "\\w", "Any", "Anybyte",                   \
782  "notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v",           \
783  "extuni",  "\\Z", "\\z",                                        \
784  "Opt", "^", "$", "char", "charnc", "not",                       \
785  "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \
786  "*+","++", "?+", "{",                                           \
787  "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \
788  "*+","++", "?+", "{",                                           \
789  "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \
790  "*+","++", "?+", "{",                                           \
791  "*", "*?", "+", "+?", "?", "??", "{", "{",                      \
792  "class", "nclass", "xclass", "Ref", "Recurse", "Callout",       \
793  "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",     \
794  "AssertB", "AssertB not", "Reverse",                            \
795  "Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond",        \
796  "Cond ref", "Cond rec", "Cond def", "Brazero", "Braminzero",    \
797  "*PRUNE", "*SKIP", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT"
798
799
800/* This macro defines the length of fixed length operations in the compiled
801regex. The lengths are used when searching for specific things, and also in the
802debugging printing of a compiled regex. We use a macro so that it can be
803defined close to the definitions of the opcodes themselves.
804
805As things have been extended, some of these are no longer fixed lenths, but are
806minima instead. For example, the length of a single-character repeat may vary
807in UTF-8 mode. The code that uses this table must know about such things. */
808
809#define OP_LENGTHS \
810  1,                             /* End                                    */ \
811  1, 1, 1, 1, 1,                 /* \A, \G, \K, \B, \b                     */ \
812  1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */ \
813  1, 1,                          /* Any, Anybyte                           */ \
814  3, 3, 1,                       /* NOTPROP, PROP, EXTUNI                  */ \
815  1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */ \
816  1, 1, 2, 1, 1,                 /* \Z, \z, Opt, ^, $                      */ \
817  2,                             /* Char  - the minimum length             */ \
818  2,                             /* Charnc  - the minimum length           */ \
819  2,                             /* not                                    */ \
820  /* Positive single-char repeats                            ** These are  */ \
821  2, 2, 2, 2, 2, 2,              /* *, *?, +, +?, ?, ??      ** minima in  */ \
822  4, 4, 4,                       /* upto, minupto, exact     ** UTF-8 mode */ \
823  2, 2, 2, 4,                    /* *+, ++, ?+, upto+                      */ \
824  /* Negative single-char repeats - only for chars < 256                   */ \
825  2, 2, 2, 2, 2, 2,              /* NOT *, *?, +, +?, ?, ??                */ \
826  4, 4, 4,                       /* NOT upto, minupto, exact               */ \
827  2, 2, 2, 4,                    /* Possessive *, +, ?, upto               */ \
828  /* Positive type repeats                                                 */ \
829  2, 2, 2, 2, 2, 2,              /* Type *, *?, +, +?, ?, ??               */ \
830  4, 4, 4,                       /* Type upto, minupto, exact              */ \
831  2, 2, 2, 4,                    /* Possessive *+, ++, ?+, upto+           */ \
832  /* Character class & ref repeats                                         */ \
833  1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */ \
834  5, 5,                          /* CRRANGE, CRMINRANGE                    */ \
835 33,                             /* CLASS                                  */ \
836 33,                             /* NCLASS                                 */ \
837  0,                             /* XCLASS - variable length               */ \
838  3,                             /* REF                                    */ \
839  1+LINK_SIZE,                   /* RECURSE                                */ \
840  2+2*LINK_SIZE,                 /* CALLOUT                                */ \
841  1+LINK_SIZE,                   /* Alt                                    */ \
842  1+LINK_SIZE,                   /* Ket                                    */ \
843  1+LINK_SIZE,                   /* KetRmax                                */ \
844  1+LINK_SIZE,                   /* KetRmin                                */ \
845  1+LINK_SIZE,                   /* Assert                                 */ \
846  1+LINK_SIZE,                   /* Assert not                             */ \
847  1+LINK_SIZE,                   /* Assert behind                          */ \
848  1+LINK_SIZE,                   /* Assert behind not                      */ \
849  1+LINK_SIZE,                   /* Reverse                                */ \
850  1+LINK_SIZE,                   /* ONCE                                   */ \
851  1+LINK_SIZE,                   /* BRA                                    */ \
852  3+LINK_SIZE,                   /* CBRA                                   */ \
853  1+LINK_SIZE,                   /* COND                                   */ \
854  1+LINK_SIZE,                   /* SBRA                                   */ \
855  3+LINK_SIZE,                   /* SCBRA                                  */ \
856  1+LINK_SIZE,                   /* SCOND                                  */ \
857  3,                             /* CREF                                   */ \
858  3,                             /* RREF                                   */ \
859  1,                             /* DEF                                    */ \
860  1, 1,                          /* BRAZERO, BRAMINZERO                    */ \
861  1, 1, 1, 1,                    /* PRUNE, SKIP, THEN, COMMIT,             */ \
862  1, 1                           /* FAIL, ACCEPT                           */
863
864
865/* A magic value for OP_RREF to indicate the "any recursion" condition. */
866
867#define RREF_ANY  0xffff
868
869/* Error code numbers. They are given names so that they can more easily be
870tracked. */
871
872enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,
873       ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19,
874       ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29,
875       ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
876       ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
877       ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
878       ERR60, ERR61 };
879
880/* The real format of the start of the pcre block; the index of names and the
881code vector run on as long as necessary after the end. We store an explicit
882offset to the name table so that if a regex is compiled on one host, saved, and
883then run on another where the size of pointers is different, all might still
884be well. For the case of compiled-on-4 and run-on-8, we include an extra
885pointer that is always NULL. For future-proofing, a few dummy fields were
886originally included - even though you can never get this planning right - but
887there is only one left now.
888
889NOTE NOTE NOTE:
890Because people can now save and re-use compiled patterns, any additions to this
891structure should be made at the end, and something earlier (e.g. a new
892flag in the options or one of the dummy fields) should indicate that the new
893fields are present. Currently PCRE always sets the dummy fields to zero.
894NOTE NOTE NOTE:
895*/
896
897typedef struct real_pcre {
898  pcre_uint32 magic_number;
899  pcre_uint32 size;               /* Total that was malloced */
900  pcre_uint32 options;            /* Public options */
901  pcre_uint16 flags;              /* Private flags */
902  pcre_uint16 dummy1;             /* For future use */
903  pcre_uint16 top_bracket;
904  pcre_uint16 top_backref;
905  pcre_uint16 first_byte;
906  pcre_uint16 req_byte;
907  pcre_uint16 name_table_offset;  /* Offset to name table that follows */
908  pcre_uint16 name_entry_size;    /* Size of any name items */
909  pcre_uint16 name_count;         /* Number of name items */
910  pcre_uint16 ref_count;          /* Reference count */
911
912  const unsigned char *tables;    /* Pointer to tables or NULL for std */
913  const unsigned char *nullpad;   /* NULL padding */
914} real_pcre;
915
916/* The format of the block used to store data from pcre_study(). The same
917remark (see NOTE above) about extending this structure applies. */
918
919typedef struct pcre_study_data {
920  pcre_uint32 size;               /* Total that was malloced */
921  pcre_uint32 options;
922  uschar start_bits[32];
923} pcre_study_data;
924
925/* Structure for passing "static" information around between the functions
926doing the compiling, so that they are thread-safe. */
927
928typedef struct compile_data {
929  const uschar *lcc;            /* Points to lower casing table */
930  const uschar *fcc;            /* Points to case-flipping table */
931  const uschar *cbits;          /* Points to character type table */
932  const uschar *ctypes;         /* Points to table of type maps */
933  const uschar *start_workspace;/* The start of working space */
934  const uschar *start_code;     /* The start of the compiled code */
935  const uschar *start_pattern;  /* The start of the pattern */
936  const uschar *end_pattern;    /* The end of the pattern */
937  uschar *hwm;                  /* High watermark of workspace */
938  uschar *name_table;           /* The name/number table */
939  int  names_found;             /* Number of entries so far */
940  int  name_entry_size;         /* Size of each entry */
941  int  bracount;                /* Count of capturing parens */
942  int  top_backref;             /* Maximum back reference */
943  unsigned int backref_map;     /* Bitmap of low back refs */
944  int  external_options;        /* External (initial) options */
945  int  external_flags;          /* External flag bits to be set */
946  int  req_varyopt;             /* "After variable item" flag for reqbyte */
947  BOOL had_accept;              /* (*ACCEPT) encountered */
948  int  nltype;                  /* Newline type */
949  int  nllen;                   /* Newline string length */
950  uschar nl[4];                 /* Newline string when fixed length */
951} compile_data;
952
953/* Structure for maintaining a chain of pointers to the currently incomplete
954branches, for testing for left recursion. */
955
956typedef struct branch_chain {
957  struct branch_chain *outer;
958  uschar *current;
959} branch_chain;
960
961/* Structure for items in a linked list that represents an explicit recursive
962call within the pattern. */
963
964typedef struct recursion_info {
965  struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
966  int group_num;                /* Number of group that was called */
967  const uschar *after_call;     /* "Return value": points after the call in the expr */
968  USPTR save_start;             /* Old value of mstart */
969  int *offset_save;             /* Pointer to start of saved offsets */
970  int saved_max;                /* Number of saved offsets */
971} recursion_info;
972
973/* Structure for building a chain of data for holding the values of the subject
974pointer at the start of each subpattern, so as to detect when an empty string
975has been matched by a subpattern - to break infinite loops. */
976
977typedef struct eptrblock {
978  struct eptrblock *epb_prev;
979  USPTR epb_saved_eptr;
980} eptrblock;
981
982
983/* Structure for passing "static" information around between the functions
984doing traditional NFA matching, so that they are thread-safe. */
985
986typedef struct match_data {
987  unsigned long int match_call_count;      /* As it says */
988  unsigned long int match_limit;           /* As it says */
989  unsigned long int match_limit_recursion; /* As it says */
990  int   *offset_vector;         /* Offset vector */
991  int    offset_end;            /* One past the end */
992  int    offset_max;            /* The maximum usable for return data */
993  int    nltype;                /* Newline type */
994  int    nllen;                 /* Newline string length */
995  uschar nl[4];                 /* Newline string when fixed */
996  const uschar *lcc;            /* Points to lower casing table */
997  const uschar *ctypes;         /* Points to table of type maps */
998  BOOL   offset_overflow;       /* Set if too many extractions */
999  BOOL   notbol;                /* NOTBOL flag */
1000  BOOL   noteol;                /* NOTEOL flag */
1001  BOOL   utf8;                  /* UTF8 flag */
1002  BOOL   endonly;               /* Dollar not before final \n */
1003  BOOL   notempty;              /* Empty string match not wanted */
1004  BOOL   partial;               /* PARTIAL flag */
1005  BOOL   hitend;                /* Hit the end of the subject at some point */
1006  BOOL   bsr_anycrlf;           /* \R is just any CRLF, not full Unicode */
1007  const uschar *start_code;     /* For use when recursing */
1008  USPTR  start_subject;         /* Start of the subject string */
1009  USPTR  end_subject;           /* End of the subject string */
1010  USPTR  start_match_ptr;       /* Start of matched string */
1011  USPTR  end_match_ptr;         /* Subject position at end match */
1012  int    end_offset_top;        /* Highwater mark at end of match */
1013  int    capture_last;          /* Most recent capture number */
1014  int    start_offset;          /* The start offset value */
1015  eptrblock *eptrchain;         /* Chain of eptrblocks for tail recursions */
1016  int    eptrn;                 /* Next free eptrblock */
1017  recursion_info *recursive;    /* Linked list of recursion data */
1018  void  *callout_data;          /* To pass back to callouts */
1019} match_data;
1020
1021/* A similar structure is used for the same purpose by the DFA matching
1022functions. */
1023
1024typedef struct dfa_match_data {
1025  const uschar *start_code;     /* Start of the compiled pattern */
1026  const uschar *start_subject;  /* Start of the subject string */
1027  const uschar *end_subject;    /* End of subject string */
1028  const uschar *tables;         /* Character tables */
1029  int   moptions;               /* Match options */
1030  int   poptions;               /* Pattern options */
1031  int    nltype;                /* Newline type */
1032  int    nllen;                 /* Newline string length */
1033  uschar nl[4];                 /* Newline string when fixed */
1034  void  *callout_data;          /* To pass back to callouts */
1035} dfa_match_data;
1036
1037/* Bit definitions for entries in the pcre_ctypes table. */
1038
1039#define ctype_space   0x01
1040#define ctype_letter  0x02
1041#define ctype_digit   0x04
1042#define ctype_xdigit  0x08
1043#define ctype_word    0x10   /* alphameric or '_' */
1044#define ctype_meta    0x80   /* regexp meta char or zero (end pattern) */
1045
1046/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set
1047of bits for a class map. Some classes are built by combining these tables. */
1048
1049#define cbit_space     0      /* [:space:] or \s */
1050#define cbit_xdigit   32      /* [:xdigit:] */
1051#define cbit_digit    64      /* [:digit:] or \d */
1052#define cbit_upper    96      /* [:upper:] */
1053#define cbit_lower   128      /* [:lower:] */
1054#define cbit_word    160      /* [:word:] or \w */
1055#define cbit_graph   192      /* [:graph:] */
1056#define cbit_print   224      /* [:print:] */
1057#define cbit_punct   256      /* [:punct:] */
1058#define cbit_cntrl   288      /* [:cntrl:] */
1059#define cbit_length  320      /* Length of the cbits table */
1060
1061/* Offsets of the various tables from the base tables pointer, and
1062total length. */
1063
1064#define lcc_offset      0
1065#define fcc_offset    256
1066#define cbits_offset  512
1067#define ctypes_offset (cbits_offset + cbit_length)
1068#define tables_length (ctypes_offset + 256)
1069
1070/* Layout of the UCP type table that translates property names into types and
1071codes. Each entry used to point directly to a name, but to reduce the number of
1072relocations in shared libraries, it now has an offset into a single string
1073instead. */
1074
1075typedef struct {
1076  pcre_uint16 name_offset;
1077  pcre_uint16 type;
1078  pcre_uint16 value;
1079} ucp_type_table;
1080
1081
1082/* Internal shared data tables. These are tables that are used by more than one
1083of the exported public functions. They have to be "external" in the C sense,
1084but are not part of the PCRE public API. The data for these tables is in the
1085pcre_tables.c module. */
1086
1087extern const int    _pcre_utf8_table1[];
1088extern const int    _pcre_utf8_table2[];
1089extern const int    _pcre_utf8_table3[];
1090extern const uschar _pcre_utf8_table4[];
1091
1092extern const int    _pcre_utf8_table1_size;
1093
1094extern const char   _pcre_utt_names[];
1095extern const ucp_type_table _pcre_utt[];
1096extern const int _pcre_utt_size;
1097
1098extern const uschar _pcre_default_tables[];
1099
1100extern const uschar _pcre_OP_lengths[];
1101
1102
1103/* Internal shared functions. These are functions that are used by more than
1104one of the exported public functions. They have to be "external" in the C
1105sense, but are not part of the PCRE public API. */
1106
1107extern BOOL         _pcre_is_newline(const uschar *, int, const uschar *,
1108                      int *, BOOL);
1109extern int          _pcre_ord2utf8(int, uschar *);
1110extern real_pcre   *_pcre_try_flipped(const real_pcre *, real_pcre *,
1111                      const pcre_study_data *, pcre_study_data *);
1112extern int          _pcre_ucp_findprop(const unsigned int, int *, int *);
1113extern unsigned int _pcre_ucp_othercase(const unsigned int);
1114extern int          _pcre_valid_utf8(const uschar *, int);
1115extern BOOL         _pcre_was_newline(const uschar *, int, const uschar *,
1116                      int *, BOOL);
1117extern BOOL         _pcre_xclass(int, const uschar *);
1118
1119#endif
1120
1121/* End of pcre_internal.h */
Note: See TracBrowser for help on using the repository browser.