Changeset 12117 in project for chicken


Ignore:
Timestamp:
10/07/08 05:04:46 (11 years ago)
Author:
Kon Lovett
Message:

PCRE 7.8, use of "full" flonum-hash, new scheme-complete by Alex Shinn.

Location:
chicken
Files:
1 added
3 deleted
26 edited

Legend:

Unmodified
Added
Removed
  • chicken/branches/chicken-3/srfi-69.scm

    r11988 r12117  
    4444      ##sys#make-structure
    4545      ##sys#size
    46       ##sys#slot ##sys#setslot
    47       ##srfi-69#%equal?-hash ) ) ] )
     46      ##sys#slot ##sys#setslot ) ) ] )
    4847
    4948(private srfi-69
     
    8180;;; Core Inlines:
    8281
    83 (define-macro ($quick-flonum-truncate ?flo)
    84   `(##core#inline "C_quickflonumtruncate" ,?flo) )
    85 
    8682(define-macro ($fix ?wrd)
    8783  `(##core#inline "C_fix" ,?wrd) )
     
    9995  `(##core#inline "C_byteblockp" ,?obj) )
    10096
    101 (define-macro ($hash-string ?str)
     97(define-macro ($string-hash ?str)
    10298  `(##core#inline "C_hash_string" ,?str) )
    10399
    104 (define-macro ($hash-string-ci ?str)
     100(define-macro ($string-ci-hash ?str)
    105101  `(##core#inline "C_hash_string_ci" ,?str) )
    106102
     
    153149;; Number Hash:
    154150
     151#|
     152(declare
     153  (not usual-integrations + * real? complex? numerator denominator real-part imag-part) )
     154
     155(define-macro ($real-hash-body ?num)
     156  `(+ (numerator ,?num) (denominator ,?num)) )
     157
     158(define-macro ($real-hash ?num)
     159  `($fix ($real-hash-body ,?num)) )
     160
     161(define-macro ($complex-hash ?num)
     162  `($fix (+ ($real-hash-body (real-part ,?num)) (* 3 ($real-hash-body (imag-part ,?num))))) )
     163
     164(define-macro ($non-generic-number-hash ?num)
     165  `(cond [(real? ,?num)         ($real-hash ,?num)]
     166         [(complex? ,?num)      ($complex-hash ,?num)]) )
     167|#
     168
    155169(define-constant flonum-magic 331804471)
    156170
    157 #| Not sure which is "better"; went with speed
    158171(define-macro ($subbyte ?bytvec ?i)
    159172  `(##core#inline "C_subbyte" ,?bytvec ,?i) )
    160173
    161 (define-macro ($hash-flonum ?flo)
     174(define-macro ($flonum-hash ?flo)
    162175  `(fx* flonum-magic
    163         ,(let loop ([idx (fx- (##sys#size 1.0) 1)])
    164             (if (fx= 0 idx)
    165                 `($subbyte ,?flo 0)
    166                 `(fx+ ($subbyte ,?flo ,idx)
    167                       (fxshl ,(loop (fx- idx 1)) 1))))) )
    168 |#
    169 
    170 (define-macro ($hash-flonum ?flo)
    171   `(fx* flonum-magic ($quick-flonum-truncate ,?flo)) )
     176        ,(let loop ([idx (fx- (##sys#size 1.0) 1)])
     177           (if (fx= 0 idx)
     178               `($subbyte ,?flo 0)
     179               `(fx+ ($subbyte ,?flo ,idx) (fxshl ,(loop (fx- idx 1)) 1))))) )
    172180
    173181(define (##sys#number-hash-hook obj)
     
    175183
    176184(define-macro ($non-fixnum-number-hash ?obj)
    177   `(cond [(flonum? obj) ($hash-flonum ,?obj)]
     185  `(cond [(flonum? obj) ($flonum-hash ,?obj)]
    178186         [else          ($fix (##sys#number-hash-hook ,?obj))] ) )
    179187
     
    208216
    209217(define-macro ($symbol-hash ?obj)
    210   `($hash-string (##sys#slot ,?obj 1)) )
     218  `($string-hash (##sys#slot ,?obj 1)) )
    211219
    212220(define (symbol-hash obj #!optional (bound hash-default-bound))
    213221  (##sys#check-symbol obj 'symbol-hash)
    214   (##sys#check-exact bound 'string-hash)
     222  (##sys#check-exact bound 'symbol-hash)
    215223  ($hash/limit ($symbol-hash obj) bound) )
    216224
     
    228236
    229237(define-macro ($keyword-hash ?obj)
    230   `($hash-string (##sys#slot ,?obj 1)) )
     238  `($string-hash (##sys#slot ,?obj 1)) )
    231239
    232240(define (keyword-hash obj #!optional (bound hash-default-bound))
     
    350358          [(number? obj)          ($non-fixnum-number-hash obj)]
    351359          [($immediate? obj)      unknown-immediate-hash-value]
    352           [($byte-block? obj)     ($hash-string obj)]
     360          [($byte-block? obj)     ($string-hash obj)]
    353361          [(list? obj)            ($*list-hash obj)]
    354362          [(pair? obj)            ($*pair-hash obj)]
     
    371379  (##sys#check-string str 'string-hash)
    372380  (##sys#check-exact bound 'string-hash)
    373   ($hash/limit ($hash-string str) bound) )
     381  ($hash/limit ($string-hash str) bound) )
    374382
    375383(define (string-ci-hash str #!optional (bound hash-default-bound))
    376384  (##sys#check-string str 'string-ci-hash)
    377385  (##sys#check-exact bound 'string-ci-hash)
    378   ($hash/limit ($hash-string-ci str) bound) )
     386  ($hash/limit ($string-ci-hash str) bound) )
    379387
    380388
     
    624632;; %hash-table-check-resize!:
    625633
    626 #; ;UNUSED
    627 (define %hash-table-check-resize!
    628        ; Note that these are standard integrations!
    629   (let ([floor floor]
    630         [inexact->exact inexact->exact]
    631         [* *] )
    632     (lambda (ht newsiz)
    633       (let ([vec (##sys#slot ht 1)]
    634             [min-load (##sys#slot ht 5)]
    635             [max-load (##sys#slot ht 6)] )
    636         (let ([len (##sys#size vec)] )
    637           (let ([min-load-len (inexact->exact (floor (* len min-load)))]
    638                 [max-load-len (inexact->exact (floor (* len max-load)))] )
    639             (if (and (fx< len hash-table-max-length)
    640                      (fx<= min-load-len newsiz) (fx<= newsiz max-load-len))
    641                 (%hash-table-resize! ht vec len) ) ) ) ) ) ) )
    642 
    643634(define-inline (%hash-table-check-resize! ht newsiz)
    644635  (let ([vec (##sys#slot ht 1)]
  • chicken/trunk/distribution/manifest

    r12115 r12117  
    234234pcre/pcre_tables.c
    235235pcre/pcre_try_flipped.c
     236pcre/pcre_ucd.c
    236237pcre/pcre_ucp_findchar.c
    237 pcre/pcre_ucp_searchfuncs.c
    238238pcre/pcre_valid_utf8.c
    239239pcre/pcre_version.c
     
    241241pcre/pcre.h
    242242pcre/ucp.h
    243 pcre/ucpinternal.h
    244 pcre/ucptable.c
    245 pcre/ucptable.h
    246243apply-hack.x86.s
    247244apply-hack.x86-64.s
  • chicken/trunk/pcre/AUTHORS

    r6175 r12117  
    99Cambridge, England.
    1010
    11 Copyright (c) 1997-2007 University of Cambridge
     11Copyright (c) 1997-2008 University of Cambridge
    1212All rights reserved
    1313
     
    1818Written by:       Google Inc.
    1919
    20 Copyright (c) 2007 Google Inc
     20Copyright (c) 2007-2008 Google Inc
    2121All rights reserved
    2222
  • chicken/trunk/pcre/LICENCE

    r6175 r12117  
    2323Cambridge, England.
    2424
    25 Copyright (c) 1997-2007 University of Cambridge
     25Copyright (c) 1997-2008 University of Cambridge
    2626All rights reserved.
    2727
     
    3232Contributed by:   Google Inc.
    3333
    34 Copyright (c) 2007, Google Inc.
     34Copyright (c) 2007-2008, Google Inc.
    3535All rights reserved.
    3636
  • chicken/trunk/pcre/NON-UNIX-USE

    r6175 r12117  
    99  Building for virtual Pascal
    1010  Stack size in Windows environments
     11  Linking programs in Windows environments
    1112  Comments about Win32 builds
    12   Building PCRE with CMake
     13  Building PCRE on Windows with CMake
     14  Use of relative paths with CMake on Windows
     15  Testing with runtest.bat
    1316  Building under Windows with BCC5.5
    1417  Building PCRE on OpenVMS
     
    3235wrapper functions are a separate issue (see below).
    3336
    34 The PCRE distribution includes support for CMake. This support is relatively
    35 new, but has already been used successfully to build PCRE in multiple build
    36 environments on Windows. There are some instructions in the section entitled
    37 "Building PCRE with CMake" below.
     37The PCRE distribution includes a "configure" file for use by the Configure/Make
     38build system, as found in many Unix-like environments. There is also support
     39support for CMake, which some users prefer, in particular in Windows
     40environments. There are some instructions for CMake under Windows in the
     41section entitled "Building PCRE with CMake" below. CMake can also be used to
     42build PCRE in Unix-like systems.
    3843
    3944
     
    8186       pcre_internal.h
    8287       ucp.h
    83        ucpinternal.h
    84        ucptable.h
    8588
    8689 (5) Also ensure that you have the following file, which is #included as source
    87      when building a debugging version of PCRE and is also used by pcretest.
     90     when building a debugging version of PCRE, and is also used by pcretest.
    8891
    8992       pcre_printint.src
     
    109112       pcre_tables.c
    110113       pcre_try_flipped.c
    111        pcre_ucp_searchfuncs.c
     114       pcre_ucd.c
    112115       pcre_valid_utf8.c
    113116       pcre_version.c
     
    178181
    179182
     183LINKING PROGRAMS IN WINDOWS ENVIRONMENTS
     184
     185If you want to statically link a program against a PCRE library in the form of
     186a non-dll .a file, you must define PCRE_STATIC before including pcre.h,
     187otherwise the pcre_malloc() and pcre_free() exported functions will be declared
     188__declspec(dllimport), with unwanted results.
     189
     190
     191CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
     192
     193It is possible to compile programs to use different calling conventions using
     194MSVC. Search the web for "calling conventions" for more information. To make it
     195easier to change the calling convention for the exported functions in the
     196PCRE library, the macro PCRE_CALL_CONVENTION is present in all the external
     197definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
     198not set, it defaults to empty; the default calling convention is then used
     199(which is what is wanted most of the time).
     200
     201
    180202COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE WITH CMAKE" below)
    181203
     
    183205paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
    184206the same thing; they are completely different from each other. There is also
    185 some experimental, undocumented support for building using "cmake", which you
    186 might like to try if you are familiar with "cmake". However, at the present
    187 time, the "cmake" process builds only a static library (not a dll), and the
    188 tests are not automatically run.
     207support for building using CMake, which some users find a more straightforward
     208way of building PCRE under Windows. However, the tests are not run
     209automatically when CMake is used.
    189210
    190211The MinGW home page (http://www.mingw.org/) says this:
     
    218239longer happens.)
    219240
    220 If you want to statically link your program against a non-dll .a file, you must
    221 define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
    222 pcre_free() exported functions will be declared __declspec(dllimport), with
    223 unwanted results.
     241A user submitted a special-purpose patch that makes it easy to create
     242"pcre.dll" under mingw32 using the "msys" environment. It provides "pcre.dll"
     243as a special target. If you use this target, no other files are built, and in
     244particular, the pcretest and pcregrep programs are not built. An example of how
     245this might be used is:
     246
     247  ./configure --enable-utf --disable-cpp CFLAGS="-03 -s"; make pcre.dll
    224248
    225249Using Cygwin's compiler generates libraries and executables that depend on
     
    253277
    254278
    255 BUILDING PCRE WITH CMAKE
     279BUILDING PCRE ON WINDOWS WITH CMAKE
    256280
    257281CMake is an alternative build facility that can be used instead of the
     
    261285were contributed by a PCRE user.
    262286
    263 1. Download CMake 2.4.7 or above from http://www.cmake.org/, install and ensure
    264    that cmake\bin is on your path.
    265 
    266 2. Unzip (retaining folder structure) the PCRE source tree into a source
    267    directory such as C:\pcre.
    268 
    269 3. Create a new, empty build directory: C:\pcre\build\
    270 
    271 4. Run CMakeSetup from the Shell envirornment of your build tool, e.g., Msys
    272    for Msys/MinGW or Visual Studio Command Prompt for VC/VC++
    273 
    274 5. Enter C:\pcre\pcre-xx and C:\pcre\build for the source and build
    275    directories, respectively
    276 
    277 6. Hit the "Configure" button.
    278 
    279 7. Select the particular IDE / build tool that you are using (Visual Studio,
    280    MSYS makefiles, MinGW makefiles, etc.)
    281 
    282 8. The GUI will then list several configuration options. This is where you can
    283    enable UTF-8 support, etc.
    284 
    285 9. Hit "Configure" again. The adjacent "OK" button should now be active.
     2871.  Download CMake 2.4.7 or above from http://www.cmake.org/, install and ensure
     288    that cmake\bin is on your path.
     289
     2902.  Unzip (retaining folder structure) the PCRE source tree into a source
     291    directory such as C:\pcre.
     292
     2933.  Create a new, empty build directory: C:\pcre\build\
     294
     2954.  Run CMakeSetup from the Shell envirornment of your build tool, e.g., Msys
     296    for Msys/MinGW or Visual Studio Command Prompt for VC/VC++
     297
     2985.  Enter C:\pcre\pcre-xx and C:\pcre\build for the source and build
     299    directories, respectively
     300
     3016.  Hit the "Configure" button.
     302
     3037.  Select the particular IDE / build tool that you are using (Visual Studio,
     304    MSYS makefiles, MinGW makefiles, etc.)
     305
     3068.  The GUI will then list several configuration options. This is where you can
     307    enable UTF-8 support, etc.
     308
     3099.  Hit "Configure" again. The adjacent "OK" button should now be active.
    286310
    28731110. Hit "OK".
     
    290314    solution file for Visual Studio, makefiles for MinGW, etc.
    291315
    292 Testing with RunTest.bat
     316
     317USE OF RELATIVE PATHS WITH CMAKE ON WINDOWS
     318
     319A PCRE user comments as follows:
     320
     321I thought that others may want to know the current state of
     322CMAKE_USE_RELATIVE_PATHS support on Windows.
     323
     324Here it is:
     325-- AdditionalIncludeDirectories is only partially modified (only the
     326first path - see below)
     327-- Only some of the contained file paths are modified - shown below for
     328pcre.vcproj
     329-- It properly modifies
     330
     331I am sure CMake people can fix that if they want to. Until then one will
     332need to replace existing absolute paths in project files with relative
     333paths manually (e.g. from VS) - relative to project file location. I did
     334just that before being told to try CMAKE_USE_RELATIVE_PATHS. Not a big
     335deal.
     336
     337AdditionalIncludeDirectories="E:\builds\pcre\build;E:\builds\pcre\pcre-7.5;"
     338AdditionalIncludeDirectories=".;E:\builds\pcre\pcre-7.5;"
     339
     340RelativePath="pcre.h">
     341RelativePath="pcre_chartables.c">
     342RelativePath="pcre_chartables.c.rule">
     343
     344
     345TESTING WITH RUNTEST.BAT
    293346
    2943471. Copy RunTest.bat into the directory where pcretest.exe has been created.
     
    385438=========================
    386439
    387 Last Updated: 21 September 2007
     440Last Updated: 05 September 2008
    388441****
  • chicken/trunk/pcre/config.h

    r12021 r12117  
    1 /* config.h.  From PCRE 7.7 config.h generated from config.h.in by configure.  */
     1/* config.h.  From PCRE 7.8 config.h generated from config.h.in by configure.  */
    22
    33/* For HAVE_* macros */
    44#ifdef HAVE_CHICKEN_CONFIG_H
    55# include "chicken-config.h"
     6#else
     7# error "Missing \"chicken-config.h\""
    68#endif
    79
     
    7880   that support it, "configure" can be used to set this in the Makefile (use
    7981   --disable-stack-for-recursion). */
    80 /* #undef NO_RECURSE */
    81 /* Make independent of Chicken stack - KRL */
    82 #define NO_RECURSE 1
     82#define NO_RECURSE
    8383
    8484/* Name of package */
     
    9292
    9393/* Define to the full name and version of this package. */
    94 #define PACKAGE_STRING "PCRE 7.7"
     94#define PACKAGE_STRING "PCRE 7.8"
    9595
    9696/* Define to the one symbol short name of this package. */
     
    9898
    9999/* Define to the version of this package. */
    100 #define PACKAGE_VERSION "7.7"
     100#define PACKAGE_VERSION "7.8"
    101101
    102102/* When calling PCRE via the POSIX interface, additional working storage is
     
    112112#endif
    113113
    114 /* PCRE uses recursive function calls to handle backtracking while matching.
    115 This can sometimes be a problem on systems that have stacks of limited size.
    116 Define NO_RECURSE to get a version that doesn't use recursion in the match()
    117 function; instead it creates its own stack by steam using pcre_recurse_malloc
    118 to get memory. For more detail, see comments and other stuff just above the
    119 match() function. On Unix systems, "configure" can be used to set this in the
    120 Makefile (use --disable-stack-for-recursion). */
    121 /* #define NO_RECURSE */
    122 
    123114/* Define to enable support for Unicode properties */
    124115#define SUPPORT_UCP
     
    128119
    129120/* Version number of package */
    130 #define VERSION "7.7"
    131 
    132 /* Define to empty if `const' does not conform to ANSI C. */
    133 /* #undef const */
    134 
    135 /* Define to `unsigned int' if <sys/types.h> does not define. */
    136 /* #undef size_t */
     121#define VERSION "7.8"
  • chicken/trunk/pcre/pcre.h

    r12021 r12117  
    4343
    4444#define PCRE_MAJOR          7
    45 #define PCRE_MINOR          7
     45#define PCRE_MINOR          8
    4646#define PCRE_PRERELEASE     
    47 #define PCRE_DATE           2008-05-07
     47#define PCRE_DATE           2008-09-05
    4848
    4949/* When an application links to a PCRE DLL in Windows, the symbols that are
  • chicken/trunk/pcre/pcre_compile.c

    r12021 r12117  
    456456{
    457457const char *s = error_texts;
    458 for (; n > 0; n--) while (*s++ != 0);
     458for (; n > 0; n--) while (*s++ != 0) {};
    459459return s;
    460460}
     
    10031003    if (*ptr == 'Q') for (;;)
    10041004      {
    1005       while (*(++ptr) != 0 && *ptr != '\\');
     1005      while (*(++ptr) != 0 && *ptr != '\\') {};
    10061006      if (*ptr == 0) return -1;
    10071007      if (*(++ptr) == 'E') break;
     
    10461046        if (*ptr == 'Q') for (;;)
    10471047          {
    1048           while (*(++ptr) != 0 && *ptr != '\\');
     1048          while (*(++ptr) != 0 && *ptr != '\\') {};
    10491049          if (*ptr == 0) return -1;
    10501050          if (*(++ptr) == 'E') break;
     
    10601060  if (xmode && *ptr == '#')
    10611061    {
    1062     while (*(++ptr) != 0 && *ptr != '\n');
     1062    while (*(++ptr) != 0 && *ptr != '\n') {};
    10631063    if (*ptr == 0) return -1;
    10641064    continue;
     
    14511451      break;
    14521452      }
     1453#else
     1454    (void)(utf8);  /* Keep compiler happy by referencing function argument */
    14531455#endif
    14541456    }
     
    15441546      break;
    15451547      }
     1548#else
     1549    (void)(utf8);  /* Keep compiler happy by referencing function argument */
    15461550#endif
    15471551    }
     
    20162020
    20172021for (c = *cptr; c <= d; c++)
    2018   { if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR) break; }
     2022  { if ((othercase = UCD_OTHERCASE(c)) != c) break; }
    20192023
    20202024if (c > d) return FALSE;
     
    20252029for (++c; c <= d; c++)
    20262030  {
    2027   if (_pcre_ucp_othercase(c) != next) break;
     2031  if (UCD_OTHERCASE(c) != next) break;
    20282032  next++;
    20292033  }
     
    21352139#ifdef SUPPORT_UTF8
    21362140  if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
     2141#else
     2142  (void)(utf8_char);  /* Keep compiler happy by referencing function argument */
    21372143#endif
    21382144  return item != next;
     
    21532159    if (next < 128) othercase = cd->fcc[next]; else
    21542160#ifdef SUPPORT_UCP
    2155     othercase = _pcre_ucp_othercase((unsigned int)next);
     2161    othercase = UCD_OTHERCASE((unsigned int)next);
    21562162#else
    21572163    othercase = NOTACHAR;
     
    21742180    if (next < 128) othercase = cd->fcc[next]; else
    21752181#ifdef SUPPORT_UCP
    2176     othercase = _pcre_ucp_othercase(next);
     2182    othercase = UCD_OTHERCASE(next);
    21772183#else
    21782184    othercase = NOTACHAR;
     
    33403346          {
    33413347          unsigned int othercase;
    3342           if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR)
     3348          if ((othercase = UCD_OTHERCASE(c)) != c)
    33433349            {
    33443350            *class_utf8data++ = XCL_SINGLE;
     
    42174223      const uschar *name = ++ptr;
    42184224      previous = NULL;
    4219       while ((cd->ctypes[*++ptr] & ctype_letter) != 0);
     4225      while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
    42204226      if (*ptr == ':')
    42214227        {
     
    49214927
    49224928        If we are not at the pattern start, compile code to change the ims
    4923         options if this setting actually changes any of them. We also pass the
    4924         new setting back so that it can be put at the start of any following
    4925         branches, and when this group ends (if we are in a group), a resetting
    4926         item can be compiled. */
     4929        options if this setting actually changes any of them, and reset the
     4930        greedy defaults and the case value for firstbyte and reqbyte. */
    49274931
    49284932        if (*ptr == ')')
     
    49324936            {
    49334937            cd->external_options = newoptions;
    4934             options = newoptions;
    49354938            }
    49364939         else
     
    49414944              *code++ = newoptions & PCRE_IMS;
    49424945              }
    4943 
    4944             /* Change options at this level, and pass them back for use
    4945             in subsequent branches. Reset the greedy defaults and the case
    4946             value for firstbyte and reqbyte. */
    4947 
    4948             *optionsptr = options = newoptions;
    49494946            greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
    49504947            greedy_non_default = greedy_default ^ 1;
    4951             req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
     4948            req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
    49524949            }
    49534950
     4951          /* Change options at this level, and pass them back for use
     4952          in subsequent branches. When not at the start of the pattern, this
     4953          information is also necessary so that a resetting item can be
     4954          compiled at the end of a group (if we are in a group). */
     4955
     4956          *optionsptr = options = newoptions;
    49544957          previous = NULL;       /* This item can't be repeated */
    49554958          continue;              /* It is complete */
     
    59455948*/
    59465949
    5947 PCRE_EXP_DEFN pcre *
     5950PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
    59485951pcre_compile(const char *pattern, int options, const char **errorptr,
    59495952  int *erroroffset, const unsigned char *tables)
     
    59535956
    59545957
    5955 PCRE_EXP_DEFN pcre *
     5958PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
    59565959pcre_compile2(const char *pattern, int options, int *errorcodeptr,
    59575960  const char **errorptr, int *erroroffset, const unsigned char *tables)
  • chicken/trunk/pcre/pcre_config.c

    r9133 r12117  
    6363*/
    6464
    65 PCRE_EXP_DEFN int
     65PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
    6666pcre_config(int what, void *where)
    6767{
  • chicken/trunk/pcre/pcre_dfa_exec.c

    r12021 r12117  
    513513    int state_offset = current_state->offset;
    514514    int count, codevalue;
    515 #ifdef SUPPORT_UCP
    516     int chartype, script;
    517 #endif
    518515
    519516#ifdef DEBUG
     
    826823        {
    827824        BOOL OK;
    828         int category = _pcre_ucp_findprop(c, &chartype, &script);
     825        const ucd_record * prop = GET_UCD(c);
    829826        switch(code[1])
    830827          {
     
    834831
    835832          case PT_LAMP:
    836           OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
     833          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
    837834          break;
    838835
    839836          case PT_GC:
    840           OK = category == code[2];
     837          OK = _pcre_ucp_gentype[prop->chartype] == code[2];
    841838          break;
    842839
    843840          case PT_PC:
    844           OK = chartype == code[2];
     841          OK = prop->chartype == code[2];
    845842          break;
    846843
    847844          case PT_SC:
    848           OK = script == code[2];
     845          OK = prop->script == code[2];
    849846          break;
    850847
     
    995992        {
    996993        BOOL OK;
    997         int category = _pcre_ucp_findprop(c, &chartype, &script);
     994        const ucd_record * prop = GET_UCD(c);
    998995        switch(code[2])
    999996          {
     
    10031000
    10041001          case PT_LAMP:
    1005           OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
     1002          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
    10061003          break;
    10071004
    10081005          case PT_GC:
    1009           OK = category == code[3];
     1006          OK = _pcre_ucp_gentype[prop->chartype] == code[3];
    10101007          break;
    10111008
    10121009          case PT_PC:
    1013           OK = chartype == code[3];
     1010          OK = prop->chartype == code[3];
    10141011          break;
    10151012
    10161013          case PT_SC:
    1017           OK = script == code[3];
     1014          OK = prop->script == code[3];
    10181015          break;
    10191016
     
    10441041      count = current_state->count;  /* Already matched */
    10451042      if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
    1046       if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
     1043      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
    10471044        {
    10481045        const uschar *nptr = ptr + clen;
     
    10581055          int ndlen = 1;
    10591056          GETCHARLEN(nd, nptr, ndlen);
    1060           if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
     1057          if (UCD_CATEGORY(nd) != ucp_M) break;
    10611058          ncount++;
    10621059          nptr += ndlen;
     
    12171214        {
    12181215        BOOL OK;
    1219         int category = _pcre_ucp_findprop(c, &chartype, &script);
     1216        const ucd_record * prop = GET_UCD(c);
    12201217        switch(code[2])
    12211218          {
     
    12251222
    12261223          case PT_LAMP:
    1227           OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
     1224          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
    12281225          break;
    12291226
    12301227          case PT_GC:
    1231           OK = category == code[3];
     1228          OK = _pcre_ucp_gentype[prop->chartype] == code[3];
    12321229          break;
    12331230
    12341231          case PT_PC:
    1235           OK = chartype == code[3];
     1232          OK = prop->chartype == code[3];
    12361233          break;
    12371234
    12381235          case PT_SC:
    1239           OK = script == code[3];
     1236          OK = prop->script == code[3];
    12401237          break;
    12411238
     
    12751272
    12761273      ADD_ACTIVE(state_offset + 2, 0);
    1277       if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
     1274      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
    12781275        {
    12791276        const uschar *nptr = ptr + clen;
     
    12901287          int ndlen = 1;
    12911288          GETCHARLEN(nd, nptr, ndlen);
    1292           if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
     1289          if (UCD_CATEGORY(nd) != ucp_M) break;
    12931290          ncount++;
    12941291          nptr += ndlen;
     
    14641461        {
    14651462        BOOL OK;
    1466         int category = _pcre_ucp_findprop(c, &chartype, &script);
     1463        const ucd_record * prop = GET_UCD(c);
    14671464        switch(code[4])
    14681465          {
     
    14721469
    14731470          case PT_LAMP:
    1474           OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
     1471          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
    14751472          break;
    14761473
    14771474          case PT_GC:
    1478           OK = category == code[5];
     1475          OK = _pcre_ucp_gentype[prop->chartype] == code[5];
    14791476          break;
    14801477
    14811478          case PT_PC:
    1482           OK = chartype == code[5];
     1479          OK = prop->chartype == code[5];
    14831480          break;
    14841481
    14851482          case PT_SC:
    1486           OK = script == code[5];
     1483          OK = prop->script == code[5];
    14871484          break;
    14881485
     
    15171514        { ADD_ACTIVE(state_offset + 4, 0); }
    15181515      count = current_state->count;  /* Number already matched */
    1519       if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
     1516      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
    15201517        {
    15211518        const uschar *nptr = ptr + clen;
     
    15311528          int ndlen = 1;
    15321529          GETCHARLEN(nd, nptr, ndlen);
    1533           if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
     1530          if (UCD_CATEGORY(nd) != ucp_M) break;
    15341531          ncount++;
    15351532          nptr += ndlen;
     
    17111708
    17121709#ifdef SUPPORT_UCP
    1713           othercase = _pcre_ucp_othercase(c);
     1710          othercase = UCD_OTHERCASE(c);
    17141711#else
    17151712          othercase = NOTACHAR;
     
    17361733
    17371734      case OP_EXTUNI:
    1738       if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
     1735      if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
    17391736        {
    17401737        const uschar *nptr = ptr + clen;
     
    17441741          int nclen = 1;
    17451742          GETCHARLEN(c, nptr, nclen);
    1746           if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;
     1743          if (UCD_CATEGORY(c) != ucp_M) break;
    17471744          ncount++;
    17481745          nptr += nclen;
     
    19121909            {
    19131910#ifdef SUPPORT_UCP
    1914             otherd = _pcre_ucp_othercase(d);
     1911            otherd = UCD_OTHERCASE(d);
    19151912#endif  /* SUPPORT_UCP */
    19161913            }
     
    19501947            {
    19511948#ifdef SUPPORT_UCP
    1952             otherd = _pcre_ucp_othercase(d);
     1949            otherd = UCD_OTHERCASE(d);
    19531950#endif  /* SUPPORT_UCP */
    19541951            }
     
    19861983            {
    19871984#ifdef SUPPORT_UCP
    1988             otherd = _pcre_ucp_othercase(d);
     1985            otherd = UCD_OTHERCASE(d);
    19891986#endif  /* SUPPORT_UCP */
    19901987            }
     
    20182015            {
    20192016#ifdef SUPPORT_UCP
    2020             otherd = _pcre_ucp_othercase(d);
     2017            otherd = UCD_OTHERCASE(d);
    20212018#endif  /* SUPPORT_UCP */
    20222019            }
     
    20532050            {
    20542051#ifdef SUPPORT_UCP
    2055             otherd = _pcre_ucp_othercase(d);
     2052            otherd = UCD_OTHERCASE(d);
    20562053#endif  /* SUPPORT_UCP */
    20572054            }
     
    25092506*/
    25102507
    2511 PCRE_EXP_DEFN int
     2508PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
    25122509pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
    25132510  const char *subject, int length, int start_offset, int options, int *offsets,
     
    27372734    if (firstline)
    27382735      {
    2739       const uschar *t = current_subject;
     2736      USPTR t = current_subject;
     2737#ifdef SUPPORT_UTF8
     2738      if (utf8)
     2739        {
     2740        while (t < md->end_subject && !IS_NEWLINE(t))
     2741          {
     2742          t++;
     2743          while (t < end_subject && (*t & 0xc0) == 0x80) t++;
     2744          }
     2745        }
     2746      else
     2747#endif
    27402748      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
    27412749      end_subject = t;
     
    27592767      if (current_subject > md->start_subject + start_offset)
    27602768        {
    2761         while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
     2769#ifdef SUPPORT_UTF8
     2770        if (utf8)
     2771          {
     2772          while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
     2773            {
     2774            current_subject++;
     2775            while(current_subject < end_subject &&
     2776                  (*current_subject & 0xc0) == 0x80)
     2777              current_subject++;
     2778            }
     2779          }
     2780        else
     2781#endif
     2782        while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
    27622783          current_subject++;
    27632784
  • chicken/trunk/pcre/pcre_exec.c

    r12021 r12117  
    159159if (length > md->end_subject - eptr) return FALSE;
    160160
    161 /* Separate the caselesss case for speed */
     161/* Separate the caseless case for speed. In UTF-8 mode we can only do this
     162properly if Unicode properties are supported. Otherwise, we can check only
     163ASCII characters. */
    162164
    163165if ((ims & PCRE_CASELESS) != 0)
    164166  {
     167#ifdef SUPPORT_UTF8
     168#ifdef SUPPORT_UCP
     169  if (md->utf8)
     170    {
     171    USPTR endptr = eptr + length;
     172    while (eptr < endptr)
     173      {
     174      int c, d;
     175      GETCHARINC(c, eptr);
     176      GETCHARINC(d, p);
     177      if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
     178      }
     179    }
     180  else
     181#endif
     182#endif
     183
     184  /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
     185  is no UCP support. */
     186
    165187  while (length-- > 0)
    166     if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
     188    { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
    167189  }
     190
     191/* In the caseful case, we can just compare the bytes, whether or not we
     192are in UTF-8 mode. */
     193
    168194else
    169195  { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
     
    16541680    GETCHARINCTEST(c, eptr);
    16551681      {
    1656       int chartype, script;
    1657       int category = _pcre_ucp_findprop(c, &chartype, &script);
     1682      const ucd_record * prop = GET_UCD(c);
    16581683
    16591684      switch(ecode[1])
     
    16641689
    16651690        case PT_LAMP:
    1666         if ((chartype == ucp_Lu ||
    1667              chartype == ucp_Ll ||
    1668              chartype == ucp_Lt) == (op == OP_NOTPROP))
     1691        if ((prop->chartype == ucp_Lu ||
     1692             prop->chartype == ucp_Ll ||
     1693             prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
    16691694          RRETURN(MATCH_NOMATCH);
    16701695         break;
    16711696
    16721697        case PT_GC:
    1673         if ((ecode[2] != category) == (op == OP_PROP))
     1698        if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
    16741699          RRETURN(MATCH_NOMATCH);
    16751700        break;
    16761701
    16771702        case PT_PC:
    1678         if ((ecode[2] != chartype) == (op == OP_PROP))
     1703        if ((ecode[2] != prop->chartype) == (op == OP_PROP))
    16791704          RRETURN(MATCH_NOMATCH);
    16801705        break;
    16811706
    16821707        case PT_SC:
    1683         if ((ecode[2] != script) == (op == OP_PROP))
     1708        if ((ecode[2] != prop->script) == (op == OP_PROP))
    16841709          RRETURN(MATCH_NOMATCH);
    16851710        break;
     
    17001725    GETCHARINCTEST(c, eptr);
    17011726      {
    1702       int chartype, script;
    1703       int category = _pcre_ucp_findprop(c, &chartype, &script);
     1727      int category = UCD_CATEGORY(c);
    17041728      if (category == ucp_M) RRETURN(MATCH_NOMATCH);
    17051729      while (eptr < md->end_subject)
     
    17101734          GETCHARLEN(c, eptr, len);
    17111735          }
    1712         category = _pcre_ucp_findprop(c, &chartype, &script);
     1736        category = UCD_CATEGORY(c);
    17131737        if (category != ucp_M) break;
    17141738        eptr += len;
     
    21752199          {
    21762200#ifdef SUPPORT_UCP
    2177           if (dc != _pcre_ucp_othercase(fc))
     2201          if (dc != UCD_OTHERCASE(fc))
    21782202#endif
    21792203            RRETURN(MATCH_NOMATCH);
     
    22662290        unsigned int othercase;
    22672291        if ((ims & PCRE_CASELESS) != 0 &&
    2268             (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
     2292            (othercase = UCD_OTHERCASE(fc)) != fc)
    22692293          oclength = _pcre_ord2utf8(othercase, occhars);
    22702294        else oclength = 0;
     
    25862610            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
    25872611            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     2612            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    25882613            GETCHARINC(d, eptr);
    25892614            if (d < 256) d = md->lcc[d];
    2590             if (fi >= max || eptr >= md->end_subject || fc == d)
    2591               RRETURN(MATCH_NOMATCH);
     2615            if (fc == d) RRETURN(MATCH_NOMATCH);
     2616
    25922617            }
    25932618          }
     
    26952720            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
    26962721            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     2722            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    26972723            GETCHARINC(d, eptr);
    2698             if (fi >= max || eptr >= md->end_subject || fc == d)
    2699               RRETURN(MATCH_NOMATCH);
     2724            if (fc == d) RRETURN(MATCH_NOMATCH);
    27002725            }
    27012726          }
     
    28712896            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    28722897            GETCHARINCTEST(c, eptr);
    2873             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
     2898            prop_chartype = UCD_CHARTYPE(c);
    28742899            if ((prop_chartype == ucp_Lu ||
    28752900                 prop_chartype == ucp_Ll ||
     
    28842909            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    28852910            GETCHARINCTEST(c, eptr);
    2886             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
     2911            prop_category = UCD_CATEGORY(c);
    28872912            if ((prop_category == prop_value) == prop_fail_result)
    28882913              RRETURN(MATCH_NOMATCH);
     
    28952920            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    28962921            GETCHARINCTEST(c, eptr);
    2897             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
     2922            prop_chartype = UCD_CHARTYPE(c);
    28982923            if ((prop_chartype == prop_value) == prop_fail_result)
    28992924              RRETURN(MATCH_NOMATCH);
     
    29062931            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    29072932            GETCHARINCTEST(c, eptr);
    2908             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
     2933            prop_script = UCD_SCRIPT(c);
    29092934            if ((prop_script == prop_value) == prop_fail_result)
    29102935              RRETURN(MATCH_NOMATCH);
     
    29252950          {
    29262951          GETCHARINCTEST(c, eptr);
    2927           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
     2952          prop_category = UCD_CATEGORY(c);
    29282953          if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
    29292954          while (eptr < md->end_subject)
     
    29342959              GETCHARLEN(c, eptr, len);
    29352960              }
    2936             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
     2961            prop_category = UCD_CATEGORY(c);
    29372962            if (prop_category != ucp_M) break;
    29382963            eptr += len;
     
    33503375            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    33513376            GETCHARINC(c, eptr);
    3352             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
     3377            prop_chartype = UCD_CHARTYPE(c);
    33533378            if ((prop_chartype == ucp_Lu ||
    33543379                 prop_chartype == ucp_Ll ||
     
    33653390            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    33663391            GETCHARINC(c, eptr);
    3367             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
     3392            prop_category = UCD_CATEGORY(c);
    33683393            if ((prop_category == prop_value) == prop_fail_result)
    33693394              RRETURN(MATCH_NOMATCH);
     
    33783403            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    33793404            GETCHARINC(c, eptr);
    3380             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
     3405            prop_chartype = UCD_CHARTYPE(c);
    33813406            if ((prop_chartype == prop_value) == prop_fail_result)
    33823407              RRETURN(MATCH_NOMATCH);
     
    33913416            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    33923417            GETCHARINC(c, eptr);
    3393             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
     3418            prop_script = UCD_SCRIPT(c);
    33943419            if ((prop_script == prop_value) == prop_fail_result)
    33953420              RRETURN(MATCH_NOMATCH);
     
    34133438          if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
    34143439          GETCHARINCTEST(c, eptr);
    3415           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
     3440          prop_category = UCD_CATEGORY(c);
    34163441          if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
    34173442          while (eptr < md->end_subject)
     
    34223447              GETCHARLEN(c, eptr, len);
    34233448              }
    3424             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
     3449            prop_category = UCD_CATEGORY(c);
    34253450            if (prop_category != ucp_M) break;
    34263451            eptr += len;
     
    37403765            if (eptr >= md->end_subject) break;
    37413766            GETCHARLEN(c, eptr, len);
    3742             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
     3767            prop_chartype = UCD_CHARTYPE(c);
    37433768            if ((prop_chartype == ucp_Lu ||
    37443769                 prop_chartype == ucp_Ll ||
     
    37553780            if (eptr >= md->end_subject) break;
    37563781            GETCHARLEN(c, eptr, len);
    3757             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
     3782            prop_category = UCD_CATEGORY(c);
    37583783            if ((prop_category == prop_value) == prop_fail_result)
    37593784              break;
     
    37683793            if (eptr >= md->end_subject) break;
    37693794            GETCHARLEN(c, eptr, len);
    3770             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
     3795            prop_chartype = UCD_CHARTYPE(c);
    37713796            if ((prop_chartype == prop_value) == prop_fail_result)
    37723797              break;
     
    37813806            if (eptr >= md->end_subject) break;
    37823807            GETCHARLEN(c, eptr, len);
    3783             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
     3808            prop_script = UCD_SCRIPT(c);
    37843809            if ((prop_script == prop_value) == prop_fail_result)
    37853810              break;
     
    38103835          if (eptr >= md->end_subject) break;
    38113836          GETCHARINCTEST(c, eptr);
    3812           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
     3837          prop_category = UCD_CATEGORY(c);
    38133838          if (prop_category == ucp_M) break;
    38143839          while (eptr < md->end_subject)
     
    38193844              GETCHARLEN(c, eptr, len);
    38203845              }
    3821             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
     3846            prop_category = UCD_CATEGORY(c);
    38223847            if (prop_category != ucp_M) break;
    38233848            eptr += len;
     
    38413866              GETCHARLEN(c, eptr, len);
    38423867              }
    3843             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
     3868            prop_category = UCD_CATEGORY(c);
    38443869            if (prop_category != ucp_M) break;
    38453870            eptr--;
     
    43614386*/
    43624387
    4363 PCRE_EXP_DEFN int
     4388PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
    43644389pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
    43654390  PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
     
    46734698    {
    46744699    USPTR t = start_match;
     4700#ifdef SUPPORT_UTF8
     4701    if (utf8)
     4702      {
     4703      while (t < md->end_subject && !IS_NEWLINE(t))
     4704        {
     4705        t++;
     4706        while (t < end_subject && (*t & 0xc0) == 0x80) t++;
     4707        }
     4708      }
     4709    else
     4710#endif
    46754711    while (t < md->end_subject && !IS_NEWLINE(t)) t++;
    46764712    end_subject = t;
    46774713    }
    46784714
    4679   /* Now test for a unique first byte */
     4715  /* Now advance to a unique first byte if there is one. */
    46804716
    46814717  if (first_byte >= 0)
    46824718    {
    46834719    if (first_byte_caseless)
    4684       while (start_match < end_subject &&
    4685              md->lcc[*start_match] != first_byte)
    4686         { NEXTCHAR(start_match); }
     4720      while (start_match < end_subject && md->lcc[*start_match] != first_byte)
     4721        start_match++;
    46874722    else
    46884723      while (start_match < end_subject && *start_match != first_byte)
    4689         { NEXTCHAR(start_match); }
     4724        start_match++;
    46904725    }
    46914726
    4692   /* Or to just after a linebreak for a multiline match if possible */
     4727  /* Or to just after a linebreak for a multiline match */
    46934728
    46944729  else if (startline)
     
    46964731    if (start_match > md->start_subject + start_offset)
    46974732      {
    4698       while (start_match <= end_subject && !WAS_NEWLINE(start_match))
    4699         { NEXTCHAR(start_match); }
     4733#ifdef SUPPORT_UTF8
     4734      if (utf8)
     4735        {
     4736        while (start_match < end_subject && !WAS_NEWLINE(start_match))
     4737          {
     4738          start_match++;
     4739          while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
     4740            start_match++;
     4741          }
     4742        }
     4743      else
     4744#endif
     4745      while (start_match < end_subject && !WAS_NEWLINE(start_match))
     4746        start_match++;
    47004747
    47014748      /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
     
    47114758    }
    47124759
    4713   /* Or to a non-unique first char after study */
     4760  /* Or to a non-unique first byte after study */
    47144761
    47154762  else if (start_bits != NULL)
     
    47184765      {
    47194766      register unsigned int c = *start_match;
    4720       if ((start_bits[c/8] & (1 << (c&7))) == 0)
    4721         { NEXTCHAR(start_match); }
    4722       else break;
     4767      if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
     4768        else break;
    47234769      }
    47244770    }
  • chicken/trunk/pcre/pcre_fullinfo.c

    r9133 r12117  
    6666*/
    6767
    68 PCRE_EXP_DEFN int
     68PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
    6969pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
    7070  void *where)
  • chicken/trunk/pcre/pcre_get.c

    r9133 r12117  
    6666*/
    6767
    68 int
     68PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
    6969pcre_get_stringnumber(const pcre *code, const char *stringname)
    7070{
     
    115115*/
    116116
    117 int
     117PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
    118118pcre_get_stringtable_entries(const pcre *code, const char *stringname,
    119119  char **firstptr, char **lastptr)
     
    232232*/
    233233
    234 int
     234PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
    235235pcre_copy_substring(const char *subject, int *ovector, int stringcount,
    236236  int stringnumber, char *buffer, int size)
     
    277277*/
    278278
    279 int
     279PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
    280280pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
    281281  int stringcount, const char *stringname, char *buffer, int size)
     
    309309*/
    310310
    311 int
     311PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
    312312pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
    313313  const char ***listptr)
     
    354354*/
    355355
    356 void
     356PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
    357357pcre_free_substring_list(const char **pointer)
    358358{
     
    387387*/
    388388
    389 int
     389PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
    390390pcre_get_substring(const char *subject, int *ovector, int stringcount,
    391391  int stringnumber, const char **stringptr)
     
    434434*/
    435435
    436 int
     436PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
    437437pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
    438438  int stringcount, const char *stringname, const char **stringptr)
     
    457457*/
    458458
    459 void
     459PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
    460460pcre_free_substring(const char *pointer)
    461461{
  • chicken/trunk/pcre/pcre_info.c

    r9133 r12117  
    7373*/
    7474
    75 PCRE_EXP_DEFN int
     75PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
    7676pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
    7777{
  • chicken/trunk/pcre/pcre_internal.h

    r12021 r12117  
    133133#endif
    134134
     135/* When compiling with the MSVC compiler, it is sometimes necessary to include
     136a "calling convention" before exported function names. (This is secondhand
     137information; I know nothing about MSVC myself). For example, something like
     138
     139  void __cdecl function(....)
     140
     141might be needed. In order so make this easy, all the exported functions have
     142PCRE_CALL_CONVENTION just before their names. It is rarely needed; if not
     143set, we ensure here that it has no effect. */
     144
     145#ifndef PCRE_CALL_CONVENTION
     146#define PCRE_CALL_CONVENTION
     147#endif
     148
    135149/* We need to have types that specify unsigned 16-bit and 32-bit integers. We
    136150cannot determine these outside the compilation (e.g. by running a program as
     
    141155#if USHRT_MAX == 65535
    142156  typedef unsigned short pcre_uint16;
     157  typedef short pcre_int16;
    143158#elif UINT_MAX == 65535
    144159  typedef unsigned int pcre_uint16;
     160  typedef int pcre_int16;
    145161#else
    146162  #error Cannot determine a type for 16-bit unsigned integers
     
    149165#if UINT_MAX == 4294967295
    150166  typedef unsigned int pcre_uint32;
     167  typedef int pcre_int32;
    151168#elif ULONG_MAX == 4294967295
    152169  typedef unsigned long int pcre_uint32;
     170  typedef long int pcre_int32;
    153171#else
    154172  #error Cannot determine a type for 32-bit unsigned integers
     
    364382
    365383#ifndef SUPPORT_UTF8
    366 #define NEXTCHAR(p) p++;
    367384#define GETCHAR(c, eptr) c = *eptr;
    368385#define GETCHARTEST(c, eptr) c = *eptr;
     
    373390
    374391#else   /* SUPPORT_UTF8 */
    375 
    376 /* Advance a character pointer one byte in non-UTF-8 mode and by one character
    377 in UTF-8 mode. */
    378 
    379 #define NEXTCHAR(p) \
    380   p++; \
    381   if (utf8) { while((*p & 0xc0) == 0x80) p++; }
    382392
    383393/* Get the next UTF-8 character, not advancing the pointer. This is called when
     
    550560#define REQ_VARY     0x0200    /* reqbyte followed non-literal item */
    551561
    552 /* Miscellaneous definitions */
    553 
     562/* Miscellaneous definitions. The #ifndef is to pacify compiler warnings in
     563environments where these macros are defined elsewhere. */
     564
     565#ifndef FALSE
    554566typedef int BOOL;
    555567
    556568#define FALSE   0
    557569#define TRUE    1
     570#endif
    558571
    559572/* Escape items that are just an encoding of a particular data value. */
     
    11271140extern real_pcre   *_pcre_try_flipped(const real_pcre *, real_pcre *,
    11281141                      const pcre_study_data *, pcre_study_data *);
    1129 extern int          _pcre_ucp_findprop(const unsigned int, int *, int *);
    1130 extern unsigned int _pcre_ucp_othercase(const unsigned int);
    11311142extern int          _pcre_valid_utf8(const uschar *, int);
    11321143extern BOOL         _pcre_was_newline(const uschar *, int, const uschar *,
     
    11341145extern BOOL         _pcre_xclass(int, const uschar *);
    11351146
     1147
     1148/* Unicode character database (UCD) */
     1149
     1150typedef struct {
     1151  uschar script;
     1152  uschar chartype;
     1153  pcre_int32 other_case;
     1154} ucd_record;
     1155
     1156extern const ucd_record  _pcre_ucd_records[];
     1157extern const uschar      _pcre_ucd_stage1[];
     1158extern const pcre_uint16 _pcre_ucd_stage2[];
     1159extern const int         _pcre_ucp_gentype[];
     1160
     1161
     1162/* UCD access macros */
     1163
     1164#define UCD_BLOCK_SIZE 128
     1165#define GET_UCD(ch) (_pcre_ucd_records + \
     1166        _pcre_ucd_stage2[_pcre_ucd_stage1[(ch) / UCD_BLOCK_SIZE] * \
     1167        UCD_BLOCK_SIZE + ch % UCD_BLOCK_SIZE])
     1168
     1169#define UCD_CHARTYPE(ch)  GET_UCD(ch)->chartype
     1170#define UCD_SCRIPT(ch)    GET_UCD(ch)->script
     1171#define UCD_CATEGORY(ch)  _pcre_ucp_gentype[UCD_CHARTYPE(ch)]
     1172#define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case)
     1173
    11361174#endif
    11371175
  • chicken/trunk/pcre/pcre_ord2utf8.c

    r9133 r12117  
    7979return i + 1;
    8080#else
    81 return 0;   /* Keep compiler happy; this function won't ever be */
    82 #endif      /* called when SUPPORT_UTF8 is not defined. */
     81(void)(cvalue);  /* Keep compiler happy; this function won't ever be */
     82(void)(buffer);  /* called when SUPPORT_UTF8 is not defined. */
     83return 0;
     84#endif
    8385}
    8486
  • chicken/trunk/pcre/pcre_refcount.c

    r9133 r12117  
    6969*/
    7070
    71 PCRE_EXP_DEFN int
     71PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
    7272pcre_refcount(pcre *argument_re, int adjust)
    7373{
  • chicken/trunk/pcre/pcre_study.c

    r12021 r12117  
    221221
    222222      case OP_SKIPZERO:
     223      tcode++;
    223224      do tcode += GET(tcode,1); while (*tcode == OP_ALT);
    224225      tcode += 1 + LINK_SIZE;
     
    504505*/
    505506
    506 PCRE_EXP_DEFN pcre_extra *
     507PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
    507508pcre_study(const pcre *external_re, int options, const char **errorptr)
    508509{
  • chicken/trunk/pcre/pcre_tables.c

    r9133 r12117  
    8888  3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
    8989
     90/* Table to translate from particular type value to the general value. */
     91
     92const int _pcre_ucp_gentype[] = {
     93  ucp_C, ucp_C, ucp_C, ucp_C, ucp_C,  /* Cc, Cf, Cn, Co, Cs */
     94  ucp_L, ucp_L, ucp_L, ucp_L, ucp_L,  /* Ll, Lu, Lm, Lo, Lt */
     95  ucp_M, ucp_M, ucp_M,                /* Mc, Me, Mn */
     96  ucp_N, ucp_N, ucp_N,                /* Nd, Nl, No */
     97  ucp_P, ucp_P, ucp_P, ucp_P, ucp_P,  /* Pc, Pd, Pe, Pf, Pi */
     98  ucp_P, ucp_P,                       /* Ps, Po */
     99  ucp_S, ucp_S, ucp_S, ucp_S,         /* Sc, Sk, Sm, So */
     100  ucp_Z, ucp_Z, ucp_Z                 /* Zl, Zp, Zs */
     101};
     102
    90103/* The pcre_utt[] table below translates Unicode property names into type and
    91104code values. It is searched by binary chop, so must be in collating sequence of
     
    95108putting all the names into a single, large string and then using offsets in the
    96109table itself. Maintenance is more error-prone, but frequent changes to this
    97 data is unlikely. */
     110data are unlikely.
     111
     112July 2008: There is now a script called maint/GenerateUtt.py which can be used
     113to generate this data instead of maintaining it entirely by hand. */
    98114
    99115const char _pcre_utt_names[] =
     
    109125  "C\0"
    110126  "Canadian_Aboriginal\0"
     127  "Carian\0"
    111128  "Cc\0"
    112129  "Cf\0"
     130  "Cham\0"
    113131  "Cherokee\0"
    114132  "Cn\0"
     
    137155  "Kannada\0"
    138156  "Katakana\0"
     157  "Kayah_Li\0"
    139158  "Kharoshthi\0"
    140159  "Khmer\0"
     
    143162  "Lao\0"
    144163  "Latin\0"
     164  "Lepcha\0"
    145165  "Limbu\0"
    146166  "Linear_B\0"
     
    150170  "Lt\0"
    151171  "Lu\0"
     172  "Lycian\0"
     173  "Lydian\0"
    152174  "M\0"
    153175  "Malayalam\0"
     
    164186  "No\0"
    165187  "Ogham\0"
     188  "Ol_Chiki\0"
    166189  "Old_Italic\0"
    167190  "Old_Persian\0"
     
    178201  "Po\0"
    179202  "Ps\0"
     203  "Rejang\0"
    180204  "Runic\0"
    181205  "S\0"
     206  "Saurashtra\0"
    182207  "Sc\0"
    183208  "Shavian\0"
     
    186211  "Sm\0"
    187212  "So\0"
     213  "Sundanese\0"
    188214  "Syloti_Nagri\0"
    189215  "Syriac\0"
     
    198224  "Tifinagh\0"
    199225  "Ugaritic\0"
     226  "Vai\0"
    200227  "Yi\0"
    201228  "Z\0"
     
    205232
    206233const ucp_type_table _pcre_utt[] = {
    207   { 0,   PT_ANY, 0 },
    208   { 4,   PT_SC, ucp_Arabic },
    209   { 11,  PT_SC, ucp_Armenian },
    210   { 20,  PT_SC, ucp_Balinese },
    211   { 29,  PT_SC, ucp_Bengali },
    212   { 37,  PT_SC, ucp_Bopomofo },
    213   { 46,  PT_SC, ucp_Braille },
    214   { 54,  PT_SC, ucp_Buginese },
    215   { 63,  PT_SC, ucp_Buhid },
    216   { 69,  PT_GC, ucp_C },
    217   { 71,  PT_SC, ucp_Canadian_Aboriginal },
    218   { 91,  PT_PC, ucp_Cc },
    219   { 94,  PT_PC, ucp_Cf },
    220   { 97,  PT_SC, ucp_Cherokee },
    221   { 106, PT_PC, ucp_Cn },
    222   { 109, PT_PC, ucp_Co },
    223   { 112, PT_SC, ucp_Common },
    224   { 119, PT_SC, ucp_Coptic },
    225   { 126, PT_PC, ucp_Cs },
    226   { 129, PT_SC, ucp_Cuneiform },
    227   { 139, PT_SC, ucp_Cypriot },
    228   { 147, PT_SC, ucp_Cyrillic },
    229   { 156, PT_SC, ucp_Deseret },
    230   { 164, PT_SC, ucp_Devanagari },
    231   { 175, PT_SC, ucp_Ethiopic },
    232   { 184, PT_SC, ucp_Georgian },
    233   { 193, PT_SC, ucp_Glagolitic },
    234   { 204, PT_SC, ucp_Gothic },
    235   { 211, PT_SC, ucp_Greek },
    236   { 217, PT_SC, ucp_Gujarati },
    237   { 226, PT_SC, ucp_Gurmukhi },
    238   { 235, PT_SC, ucp_Han },
    239   { 239, PT_SC, ucp_Hangul },
    240   { 246, PT_SC, ucp_Hanunoo },
    241   { 254, PT_SC, ucp_Hebrew },
    242   { 261, PT_SC, ucp_Hiragana },
    243   { 270, PT_SC, ucp_Inherited },
    244   { 280, PT_SC, ucp_Kannada },
    245   { 288, PT_SC, ucp_Katakana },
    246   { 297, PT_SC, ucp_Kharoshthi },
    247   { 308, PT_SC, ucp_Khmer },
    248   { 314, PT_GC, ucp_L },
    249   { 316, PT_LAMP, 0 },
    250   { 319, PT_SC, ucp_Lao },
    251   { 323, PT_SC, ucp_Latin },
    252   { 329, PT_SC, ucp_Limbu },
    253   { 335, PT_SC, ucp_Linear_B },
    254   { 344, PT_PC, ucp_Ll },
    255   { 347, PT_PC, ucp_Lm },
    256   { 350, PT_PC, ucp_Lo },
    257   { 353, PT_PC, ucp_Lt },
    258   { 356, PT_PC, ucp_Lu },
    259   { 359, PT_GC, ucp_M },
    260   { 361, PT_SC, ucp_Malayalam },
    261   { 371, PT_PC, ucp_Mc },
    262   { 374, PT_PC, ucp_Me },
    263   { 377, PT_PC, ucp_Mn },
    264   { 380, PT_SC, ucp_Mongolian },
    265   { 390, PT_SC, ucp_Myanmar },
    266   { 398, PT_GC, ucp_N },
    267   { 400, PT_PC, ucp_Nd },
    268   { 403, PT_SC, ucp_New_Tai_Lue },
    269   { 415, PT_SC, ucp_Nko },
    270   { 419, PT_PC, ucp_Nl },
    271   { 422, PT_PC, ucp_No },
    272   { 425, PT_SC, ucp_Ogham },
    273   { 431, PT_SC, ucp_Old_Italic },
    274   { 442, PT_SC, ucp_Old_Persian },
    275   { 454, PT_SC, ucp_Oriya },
    276   { 460, PT_SC, ucp_Osmanya },
    277   { 468, PT_GC, ucp_P },
    278   { 470, PT_PC, ucp_Pc },
    279   { 473, PT_PC, ucp_Pd },
    280   { 476, PT_PC, ucp_Pe },
    281   { 479, PT_PC, ucp_Pf },
    282   { 482, PT_SC, ucp_Phags_Pa },
    283   { 491, PT_SC, ucp_Phoenician },
    284   { 502, PT_PC, ucp_Pi },
    285   { 505, PT_PC, ucp_Po },
    286   { 508, PT_PC, ucp_Ps },
    287   { 511, PT_SC, ucp_Runic },
    288   { 517, PT_GC, ucp_S },
    289   { 519, PT_PC, ucp_Sc },
    290   { 522, PT_SC, ucp_Shavian },
    291   { 530, PT_SC, ucp_Sinhala },
    292   { 538, PT_PC, ucp_Sk },
    293   { 541, PT_PC, ucp_Sm },
    294   { 544, PT_PC, ucp_So },
    295   { 547, PT_SC, ucp_Syloti_Nagri },
    296   { 560, PT_SC, ucp_Syriac },
    297   { 567, PT_SC, ucp_Tagalog },
    298   { 575, PT_SC, ucp_Tagbanwa },
    299   { 584, PT_SC, ucp_Tai_Le },
    300   { 591, PT_SC, ucp_Tamil },
    301   { 597, PT_SC, ucp_Telugu },
    302   { 604, PT_SC, ucp_Thaana },
    303   { 611, PT_SC, ucp_Thai },
    304   { 616, PT_SC, ucp_Tibetan },
    305   { 624, PT_SC, ucp_Tifinagh },
    306   { 633, PT_SC, ucp_Ugaritic },
    307   { 642, PT_SC, ucp_Yi },
    308   { 645, PT_GC, ucp_Z },
    309   { 647, PT_PC, ucp_Zl },
    310   { 650, PT_PC, ucp_Zp },
    311   { 653, PT_PC, ucp_Zs }
     234  {   0, PT_ANY, 0 },
     235  {   4, PT_SC, ucp_Arabic },
     236  {  11, PT_SC, ucp_Armenian },
     237  {  20, PT_SC, ucp_Balinese },
     238  {  29, PT_SC, ucp_Bengali },
     239  {  37, PT_SC, ucp_Bopomofo },
     240  {  46, PT_SC, ucp_Braille },
     241  {  54, PT_SC, ucp_Buginese },
     242  {  63, PT_SC, ucp_Buhid },
     243  {  69, PT_GC, ucp_C },
     244  {  71, PT_SC, ucp_Canadian_Aboriginal },
     245  {  91, PT_SC, ucp_Carian },
     246  {  98, PT_PC, ucp_Cc },
     247  { 101, PT_PC, ucp_Cf },
     248  { 104, PT_SC, ucp_Cham },
     249  { 109, PT_SC, ucp_Cherokee },
     250  { 118, PT_PC, ucp_Cn },
     251  { 121, PT_PC, ucp_Co },
     252  { 124, PT_SC, ucp_Common },
     253  { 131, PT_SC, ucp_Coptic },
     254  { 138, PT_PC, ucp_Cs },
     255  { 141, PT_SC, ucp_Cuneiform },
     256  { 151, PT_SC, ucp_Cypriot },
     257  { 159, PT_SC, ucp_Cyrillic },
     258  { 168, PT_SC, ucp_Deseret },
     259  { 176, PT_SC, ucp_Devanagari },
     260  { 187, PT_SC, ucp_Ethiopic },
     261  { 196, PT_SC, ucp_Georgian },
     262  { 205, PT_SC, ucp_Glagolitic },
     263  { 216, PT_SC, ucp_Gothic },
     264  { 223, PT_SC, ucp_Greek },
     265  { 229, PT_SC, ucp_Gujarati },
     266  { 238, PT_SC, ucp_Gurmukhi },
     267  { 247, PT_SC, ucp_Han },
     268  { 251, PT_SC, ucp_Hangul },
     269  { 258, PT_SC, ucp_Hanunoo },
     270  { 266, PT_SC, ucp_Hebrew },
     271  { 273, PT_SC, ucp_Hiragana },
     272  { 282, PT_SC, ucp_Inherited },
     273  { 292, PT_SC, ucp_Kannada },
     274  { 300, PT_SC, ucp_Katakana },
     275  { 309, PT_SC, ucp_Kayah_Li },
     276  { 318, PT_SC, ucp_Kharoshthi },
     277  { 329, PT_SC, ucp_Khmer },
     278  { 335, PT_GC, ucp_L },
     279  { 337, PT_LAMP, 0 },
     280  { 340, PT_SC, ucp_Lao },
     281  { 344, PT_SC, ucp_Latin },
     282  { 350, PT_SC, ucp_Lepcha },
     283  { 357, PT_SC, ucp_Limbu },
     284  { 363, PT_SC, ucp_Linear_B },
     285  { 372, PT_PC, ucp_Ll },
     286  { 375, PT_PC, ucp_Lm },
     287  { 378, PT_PC, ucp_Lo },
     288  { 381, PT_PC, ucp_Lt },
     289  { 384, PT_PC, ucp_Lu },
     290  { 387, PT_SC, ucp_Lycian },
     291  { 394, PT_SC, ucp_Lydian },
     292  { 401, PT_GC, ucp_M },
     293  { 403, PT_SC, ucp_Malayalam },
     294  { 413, PT_PC, ucp_Mc },
     295  { 416, PT_PC, ucp_Me },
     296  { 419, PT_PC, ucp_Mn },
     297  { 422, PT_SC, ucp_Mongolian },
     298  { 432, PT_SC, ucp_Myanmar },
     299  { 440, PT_GC, ucp_N },
     300  { 442, PT_PC, ucp_Nd },
     301  { 445, PT_SC, ucp_New_Tai_Lue },
     302  { 457, PT_SC, ucp_Nko },
     303  { 461, PT_PC, ucp_Nl },
     304  { 464, PT_PC, ucp_No },
     305  { 467, PT_SC, ucp_Ogham },
     306  { 473, PT_SC, ucp_Ol_Chiki },
     307  { 482, PT_SC, ucp_Old_Italic },
     308  { 493, PT_SC, ucp_Old_Persian },
     309  { 505, PT_SC, ucp_Oriya },
     310  { 511, PT_SC, ucp_Osmanya },
     311  { 519, PT_GC, ucp_P },
     312  { 521, PT_PC, ucp_Pc },
     313  { 524, PT_PC, ucp_Pd },
     314  { 527, PT_PC, ucp_Pe },
     315  { 530, PT_PC, ucp_Pf },
     316  { 533, PT_SC, ucp_Phags_Pa },
     317  { 542, PT_SC, ucp_Phoenician },
     318  { 553, PT_PC, ucp_Pi },
     319  { 556, PT_PC, ucp_Po },
     320  { 559, PT_PC, ucp_Ps },
     321  { 562, PT_SC, ucp_Rejang },
     322  { 569, PT_SC, ucp_Runic },
     323  { 575, PT_GC, ucp_S },
     324  { 577, PT_SC, ucp_Saurashtra },
     325  { 588, PT_PC, ucp_Sc },
     326  { 591, PT_SC, ucp_Shavian },
     327  { 599, PT_SC, ucp_Sinhala },
     328  { 607, PT_PC, ucp_Sk },
     329  { 610, PT_PC, ucp_Sm },
     330  { 613, PT_PC, ucp_So },
     331  { 616, PT_SC, ucp_Sundanese },
     332  { 626, PT_SC, ucp_Syloti_Nagri },
     333  { 639, PT_SC, ucp_Syriac },
     334  { 646, PT_SC, ucp_Tagalog },
     335  { 654, PT_SC, ucp_Tagbanwa },
     336  { 663, PT_SC, ucp_Tai_Le },
     337  { 670, PT_SC, ucp_Tamil },
     338  { 676, PT_SC, ucp_Telugu },
     339  { 683, PT_SC, ucp_Thaana },
     340  { 690, PT_SC, ucp_Thai },
     341  { 695, PT_SC, ucp_Tibetan },
     342  { 703, PT_SC, ucp_Tifinagh },
     343  { 712, PT_SC, ucp_Ugaritic },
     344  { 721, PT_SC, ucp_Vai },
     345  { 725, PT_SC, ucp_Yi },
     346  { 728, PT_GC, ucp_Z },
     347  { 730, PT_PC, ucp_Zl },
     348  { 733, PT_PC, ucp_Zp },
     349  { 736, PT_PC, ucp_Zs }
    312350};
    313351
  • chicken/trunk/pcre/pcre_valid_utf8.c

    r9133 r12117  
    155155    }
    156156  }
     157#else
     158(void)(string);  /* Keep picky compilers happy */
     159(void)(length);
    157160#endif
    158161
  • chicken/trunk/pcre/pcre_version.c

    r9133 r12117  
    8080the STRING macro with an empty argument when doing the test. */
    8181
    82 PCRE_EXP_DEFN const char *
     82PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
    8383pcre_version(void)
    8484{
  • chicken/trunk/pcre/pcre_xclass.c

    r9133 r12117  
    105105  else  /* XCL_PROP & XCL_NOTPROP */
    106106    {
    107     int chartype, script;
    108     int category = _pcre_ucp_findprop(c, &chartype, &script);
     107    const ucd_record * prop = GET_UCD(c);
    109108
    110109    switch(*data)
     
    115114
    116115      case PT_LAMP:
    117       if ((chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt) ==
     116      if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt) ==
    118117          (t == XCL_PROP)) return !negated;
    119118      break;
    120119
    121120      case PT_GC:
    122       if ((data[1] == category) == (t == XCL_PROP)) return !negated;
     121      if ((data[1] == _pcre_ucp_gentype[prop->chartype]) == (t == XCL_PROP)) return !negated;
    123122      break;
    124123
    125124      case PT_PC:
    126       if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
     125      if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated;
    127126      break;
    128127
    129128      case PT_SC:
    130       if ((data[1] == script) == (t == XCL_PROP)) return !negated;
     129      if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated;
    131130      break;
    132131
  • chicken/trunk/pcre/ucp.h

    r6175 r12117  
    122122  ucp_Ugaritic,
    123123  ucp_Yi,
    124   ucp_Balinese,      /* New for Unicode 5.0.0 */
    125   ucp_Cuneiform,     /* New for Unicode 5.0.0 */
    126   ucp_Nko,           /* New for Unicode 5.0.0 */
    127   ucp_Phags_Pa,      /* New for Unicode 5.0.0 */
    128   ucp_Phoenician     /* New for Unicode 5.0.0 */
     124  /* New for Unicode 5.0: */
     125  ucp_Balinese,
     126  ucp_Cuneiform,
     127  ucp_Nko,
     128  ucp_Phags_Pa,
     129  ucp_Phoenician,
     130  /* New for Unicode 5.1: */
     131  ucp_Carian,
     132  ucp_Cham,
     133  ucp_Kayah_Li,
     134  ucp_Lepcha,
     135  ucp_Lycian,
     136  ucp_Lydian,
     137  ucp_Ol_Chiki,
     138  ucp_Rejang,
     139  ucp_Saurashtra,
     140  ucp_Sundanese,
     141  ucp_Vai
    129142};
    130143
  • chicken/trunk/rules.make

    r12021 r12117  
    5252ifeq ($(USE_HOST_PCRE),)
    5353PCRE_OBJECTS_1 ?= \
     54       $(PCRE_DIR)pcre_chartables \
    5455       $(PCRE_DIR)pcre_compile \
    5556       $(PCRE_DIR)pcre_config \
     
    6768       $(PCRE_DIR)pcre_tables \
    6869       $(PCRE_DIR)pcre_try_flipped \
    69        $(PCRE_DIR)pcre_ucp_searchfuncs \
     70       $(PCRE_DIR)pcre_ucd \
    7071       $(PCRE_DIR)pcre_valid_utf8 \
    7172       $(PCRE_DIR)pcre_version \
    72        $(PCRE_DIR)pcre_xclass \
    73        $(PCRE_DIR)pcre_chartables
     73       $(PCRE_DIR)pcre_xclass
    7474PCRE_SHARED_OBJECTS = $(PCRE_OBJECTS_1:=$(O))
    7575PCRE_STATIC_OBJECTS = $(PCRE_OBJECTS_1:=-static$(O))
     
    657657          $(C_COMPILER_OPTIMIZATION_OPTIONS) $(C_COMPILER_SHARED_OPTIONS) $< $(C_COMPILER_OUTPUT) \
    658658          $(C_COMPILER_PCRE_OPTIONS)
    659 $(PCRE_DIR)pcre_ucp_searchfuncs$(O): pcre_ucp_searchfuncs.c pcre_internal.h config.h $(CHICKEN_CONFIG_H) pcre.h ucp.h ucptable.h ucpinternal.h
     659$(PCRE_DIR)pcre_ucd$(O): pcre_ucd.c pcre_internal.h config.h $(CHICKEN_CONFIG_H) pcre.h ucp.h
    660660        $(C_COMPILER) $(C_COMPILER_OPTIONS) $(PCRE_INCLUDES) $(C_COMPILER_COMPILE_OPTION) \
    661661          $(C_COMPILER_OPTIMIZATION_OPTIONS) $(C_COMPILER_SHARED_OPTIONS) $< $(C_COMPILER_OUTPUT) \
     
    740740          $(C_COMPILER_OPTIMIZATION_OPTIONS) $(C_COMPILER_SHARED_OPTIONS) $< $(C_COMPILER_OUTPUT) \
    741741          $(C_COMPILER_PCRE_OPTIONS)
    742 $(PCRE_DIR)pcre_ucp_searchfuncs-static$(O): pcre_ucp_searchfuncs.c pcre_internal.h config.h $(CHICKEN_CONFIG_H) pcre.h ucp.h ucptable.h ucpinternal.h
     742$(PCRE_DIR)pcre_ucd-static$(O): pcre_ucd.c pcre_internal.h config.h $(CHICKEN_CONFIG_H) pcre.h ucp.h
    743743        $(C_COMPILER) $(C_COMPILER_OPTIONS) $(PCRE_INCLUDES) $(C_COMPILER_COMPILE_OPTION) \
    744744          $(C_COMPILER_OPTIMIZATION_OPTIONS) $(C_COMPILER_SHARED_OPTIONS) $< $(C_COMPILER_OUTPUT) \
  • chicken/trunk/scheme-complete.el

    r11158 r12117  
    1313;;; (autoload 'scheme-smart-complete "scheme-complete" nil t)
    1414;;; (eval-after-load 'scheme
    15 ;;;   '(progn (define-key scheme-mode-map "\e\t" 'scheme-smart-complete)))
     15;;;   '(define-key scheme-mode-map "\e\t" 'scheme-smart-complete))
    1616;;;
    1717;;; Alternately, you may want to just bind TAB to the
     
    2020;;;
    2121;;; (eval-after-load 'scheme
    22 ;;;   '(progn (define-key scheme-mode-map "\t" 'scheme-complete-or-indent)))
     22;;;   '(define-key scheme-mode-map "\t" 'scheme-complete-or-indent))
    2323;;;
    2424;;; If you use eldoc-mode (included in Emacs), you can also get live
     
    3232;;;     (eldoc-mode)))
    3333;;;
     34;;; You can enable slightly smarter indentation with
     35;;;
     36;;; (setq lisp-indent-function 'scheme-smart-indent-function)
     37;;;
     38;;; which basically ignores the scheme-indent-function property for
     39;;; locally overridden symbols (e.g. if you use the (let loop () ...)
     40;;; idiom it won't use the special loop indentation inside).
     41;;;
    3442;;; There's a single custom variable, `scheme-default-implementation',
    3543;;; which you can use to specify your preferred implementation when we
     
    3947
    4048;;; History:
     49;;; 0.8.3: 2008/10/06 - smart indent, inferring types from imported modules,
     50;;                      optionally caching exports, chicken 4 support
    4151;;; 0.8.2: 2008/07/04 - both TAB and M-TAB scroll results (thanks Peter Bex),
    4252;;;                     better MATCH handling, fixed SRFI-55, other bugfixes
     
    27562766           (let* ((res (shell-command-to-string "gauche-config --syslibdir"))
    27572767                  (res (substring res 0 (- (length res) 1))))
    2758              (and res (file-directory-p res) res)))
     2768             (and res (file-directory-p res)
     2769                  (let* ((dir (file-name-directory res))
     2770                         (dir2 (file-name-directory
     2771                                (substring dir 0 (- (length dir) 1)))))
     2772                    (substring dir2 0 (- (length dir2) 1))))))
    27592773      "/usr/local/share/gauche"))
    27602774
     
    28172831(defun scheme-find-file-in-path (file path)
    28182832  (car (remove-if-not
    2819         #'(lambda (dir) (file-exists-p (concat dir file)))
     2833        #'(lambda (dir) (file-exists-p (concat dir "/" file)))
    28202834        path)))
    28212835
     
    30023016      ((= $ @)
    30033017       (if (consp (cdr x)) (scheme-extract-match-clause-vars (cddr x)) '()))
    3004       ((\?)
     3018      ((\? ? ) ; XXXX this is a hack, the lone ? gets read as a char (space)
    30053019       (if (and (consp (cdr x))
    30063020                (consp (cddr x))
     
    30133027       (if (consp (cdr x)) (scheme-extract-match-clause-vars (cadr x)) '()))
    30143028      ((quote) '())
    3015       ((quasiquote) '()) ; XXXX
    3016       (t (union (scheme-extract-match-clause-vars (car x))
    3017                 (scheme-extract-match-clause-vars (cdr x))))))
     3029      ((quasiquote) '())                ; XXXX
     3030      (t
     3031       (union (scheme-extract-match-clause-vars (car x))
     3032              (scheme-extract-match-clause-vars (cdr x))))))
    30183033   ((vectorp x)
    30193034    (scheme-extract-match-clause-vars (concatenate 'list x)))
     
    30623077  :group 'scheme-complete)
    30633078
     3079(defcustom scheme-complete-smart-indent-p t
     3080  "Toggles using `scheme-smart-indent' for `scheme-complete-or-indent'."
     3081  :type 'boolean
     3082  :group 'scheme-complete)
     3083
     3084(defcustom scheme-complete-cache-p t
     3085  "Toggles caching of module/load export information."
     3086  :type 'boolean
     3087  :group 'scheme-complete)
     3088
     3089;; (defcustom scheme-complete-learn-syntax-p nil
     3090;;   "Toggles parsing of syntax-rules macros for completion info."
     3091;;   :type 'boolean
     3092;;   :group 'scheme-complete)
     3093
     3094(defvar *scheme-complete-module-cache* '())
     3095
    30643096(defvar *scheme-current-implementation* nil)
    30653097(make-variable-buffer-local '*scheme-current-implementation*)
     
    30673099;; most implementations use their name as the script name
    30683100(defvar *scheme-interpreter-alist*
    3069   '(("csi" . chicken)
     3101  '(("csi"  . chicken)
    30703102    ("gosh" . gauche)
    3071     ("gsi" . gambit)
     3103    ("gsi"  . gambit)
     3104    ("mred" . mzscheme)
    30723105    ))
    30733106
     
    30793112          (save-excursion
    30803113            (goto-char (point-min))
    3081             (or (if (looking-at "#! *\\([^ \t\n]+\\)")
    3082                     (let ((script (file-name-nondirectory (match-string 1))))
    3083                       (or (cdr (assoc script *scheme-interpreter-alist*))
    3084                           (intern script))))
    3085                 (cond
    3086                  ((re-search-forward "(define-module +\\(.\\)" nil t)
    3087                   (if (equal "(" (match-string 1))
    3088                       'guile
    3089                     'gauche))
    3090                  ((re-search-forward "(use " nil t)
    3091                   'chicken)
    3092                  ((re-search-forward "(module " nil t)
    3093                   'mzscheme))))))
     3114            (or
     3115             (and (looking-at "#! *\\([^ \t\n]+\\)")
     3116                  (let ((script (file-name-nondirectory (match-string 1))))
     3117                    (cdr (assoc script *scheme-interpreter-alist*))))
     3118             (cond
     3119              ((re-search-forward "(define-module +\\(.\\)" nil t)
     3120               (if (equal "(" (match-string 1))
     3121                   'guile
     3122                 'gauche))
     3123              ((re-search-forward "(use " nil t)
     3124               'chicken)
     3125              ((re-search-forward
     3126                "\\(?:(module \\|#\\(?:lang\\|reader\\)\\)" nil t)
     3127               'mzscheme))))))
    30943128  (or *scheme-current-implementation*
    30953129      scheme-default-implementation))
     
    32073241    (reverse vars)))
    32083242
    3209 (defun scheme-extract-import-module-name (sexp &optional mzschemep)
    3210   (case (car sexp)
    3211     ((prefix)
    3212      (scheme-extract-import-module-name
    3213       (if mzschemep (caddr sexp) (cadr sexp))))
    3214     ((prefix-all-except)
    3215      (scheme-extract-import-module-name (caddr sexp)))
    3216     ((for only except rename lib library)
    3217      (scheme-extract-import-module-name (cadr sexp) mzschemep))
    3218     ((import)
    3219      (scheme-extract-import-module-name (cadr sexp) mzschemep))
    3220     ((require)
    3221      (scheme-extract-import-module-name (cadr sexp) t))
    3222     (t sexp)))
    3223 
    3224 (defun scheme-extract-import-module-imports (sexp &optional mzschemep)
    3225   (case (car sexp)
    3226     ((prefix)
    3227      (let* ((ids (scheme-extract-import-module-name
    3228                   (if mzschemep (caddr sexp) (cadr sexp))
    3229                   mzschemep))
    3230             (prefix0 (if mzschemep (cadr sexp) (caddr sexp)))
     3243(defun scheme-extract-import-module-imports (sexp)
     3244  (case (and (consp sexp) (car sexp))
     3245    ((prefix prefix-in)
     3246     (let* ((ids (scheme-extract-import-module-imports (cadr sexp)))
     3247            (prefix0 (caddr sexp))
    32313248            (prefix (if (symbolp prefix0) (symbol-name prefix0) prefix0)))
    3232        (mapcar #'(lambda (x) (intern (concat prefix (symbol-name x)))) ids)))
     3249       (mapcar #'(lambda (x)
     3250                   (cons (intern (concat prefix (symbol-name (car x))))
     3251                         (cdr x)))
     3252               ids)))
    32333253    ((prefix-all-except)
    32343254     (let ((prefix
     
    32363256           (exceptions (cddr sexp)))
    32373257       (mapcar #'(lambda (x)
    3238                    (if (memq x exceptions)
     3258                   (if (memq (car x) exceptions)
    32393259                       x
    3240                      (intern (concat prefix (symbol-name x)))))
    3241                (scheme-extract-import-module-name (caddr sexp) t))))
    3242     ((for)
    3243      (scheme-extract-import-module-name (cadr sexp) mzschemep))
    3244     ((rename)
    3245      (if mzschemep
    3246          (list (caddr sexp))
    3247        (mapcar 'cadr (cddr sexp))))
    3248     ((except)
    3249      (remove-if #'(lambda (x) (memq x (cddr sexp)))
    3250                 (scheme-extract-import-module-imports (cadr sexp) mzschemep)))
    3251     ((only)
    3252      (cddr sexp))
    3253     ((import)
    3254      (scheme-extract-import-module-imports (cadr sexp) mzschemep))
    3255     ((require for-syntax)
    3256      (scheme-extract-import-module-imports (cadr sexp) t))
     3260                     (cons (intern (concat prefix (symbol-name (car x))))
     3261                           (cdr x))))
     3262               (scheme-extract-import-module-imports (caddr sexp)))))
     3263    ((for for-syntax for-template for-label for-meta)
     3264     (scheme-extract-import-module-imports (cadr sexp)))
     3265    ((rename rename-in)
     3266     (let ((renames (cddr sexp)))
     3267       (mapcar #'(lambda (x)
     3268                   (cons (or (cadr (assq (car x) renames)) (car x)) (cdr x)))
     3269               (scheme-extract-import-module-imports (cadr sexp)))))
     3270    ((except except-in)
     3271     (remove-if #'(lambda (x) (memq (car x) (cddr sexp)))
     3272                (scheme-extract-import-module-imports (cadr sexp))))
     3273    ((only only-in)
     3274     (remove-if-not
     3275      #'(lambda (x) (memq (car x) (cddr sexp)))
     3276      (scheme-extract-import-module-imports (cadr sexp))))
     3277    ((import import-for-syntax require)
     3278     (scheme-extract-import-module-imports (cadr sexp)))
    32573279    ((library)
    32583280     (if (and (stringp (cadr sexp)) (file-exists-p (cadr sexp)))
     
    32663288       (scheme-module-exports
    32673289        (intern (apply 'concat (append (cddr sexp) (list (cadr sexp))))))))
    3268     (t sexp)))
     3290    (t
     3291     (scheme-module-exports sexp))))
    32693292
    32703293(defun scheme-extract-sexp-imports (sexp)
    3271   (case (car sexp)
    3272     ((begin)
     3294  (case (and (consp sexp) (car sexp))
     3295    ((begin define-module)
    32733296     (scheme-append-map #'scheme-extract-sexp-imports (cdr sexp)))
    32743297    ((cond-expand)
     
    32773300    ((use require-extension)
    32783301     (scheme-append-map #'scheme-module-exports (cdr sexp)))
     3302    ((import)
     3303     (scheme-extract-import-module-imports (cadr sexp)))
    32793304    ((autoload)
    32803305     (unless (member (cadr sexp) *scheme-imported-modules*)
     
    32903315    ((library module)
    32913316     (scheme-append-map #'scheme-extract-import-module-imports
    3292                  (remove-if #'(lambda (x) (memq (car x) '(import require)))
    3293                             (cdr sexp))))
    3294     (t '())))
     3317                        (remove-if #'(lambda (x)
     3318                                       (memq (car x) '(import require)))
     3319                                   (cdr sexp))))
     3320    ))
    32953321
    32963322(defun scheme-module-symbol-p (sym)
    32973323  (memq sym '(use require require-extension begin cond-expand
    3298               module library define-module autoload load)))
     3324              module library define-module autoload load import)))
    32993325
    33003326(defun scheme-skip-shebang ()
     
    33593385                                          (scheme-symbol-at-point))))))
    33603386      (case sym
    3361         ((define-syntax defmacro define-macro)
     3387        ((define-syntax define-compiled-syntax defmacro define-macro)
    33623388         (list (list (scheme-name-of-define) '(syntax))))
    33633389        ((define define-inline define-constant define-primitive defun)
     
    34363462    defs))
    34373463
     3464(defun scheme-current-exports ()
     3465  (let ((res '()))
     3466    (save-excursion
     3467      (goto-char (point-min))
     3468      (or (ignore-errors (end-of-defun) (beginning-of-defun) t)
     3469          (re-search-forward "^(" nil t)
     3470          (goto-char (point-max)))
     3471      (while (not (eobp))
     3472        (when (and (eq ?\( (char-syntax (char-after)))
     3473                   (eq ?w (char-syntax (char-after (1+ (point))))))
     3474          (let ((sym (save-excursion (forward-char) (scheme-symbol-at-point))))
     3475            (case sym
     3476              ((declare define-module)
     3477               (let ((decls (scheme-nth-sexp-at-point 0)))
     3478                 (cond
     3479                  ((and (listp decls) (assq 'export decls))
     3480                   (setq res (nconc (cdr (assq 'export decls)) res)))
     3481                  ((and (listp decls) (assq 'export-all decls))
     3482                   (goto-char (point-max))))))
     3483              ((export provide)
     3484               (unless (and (eq 'provide sym)
     3485                            (eq 'chicken (scheme-current-implementation)))
     3486                 (setq res (nconc (cdr (scheme-nth-sexp-at-point 0)) res))))
     3487              ((export-all)
     3488               (goto-char (point-max)))
     3489              ((extend)
     3490               (let ((parents (cdr (scheme-nth-sexp-at-point 0))))
     3491                 (setq res (nconc (mapcar #'car
     3492                                          (scheme-append-map
     3493                                           #'scheme-module-exports
     3494                                           parents))
     3495                                  res))))
     3496              ((module)
     3497               (forward-char)
     3498               (forward-sexp)
     3499               (let ((x (scheme-nth-sexp-at-point 0)))
     3500                 (cond
     3501                  ((eq '* x)
     3502                   (goto-char (point-max)))
     3503                  ((listp x)
     3504                   (setq res
     3505                         (nconc (remove-if-not #'symbolp (cdr x)) res))))))
     3506              )))
     3507        (scheme-goto-next-top-level)))
     3508    res))
     3509
    34383510(defun scheme-srfi-exports (i)
    34393511  (and (integerp i)
     
    34453517           info))))
    34463518
     3519(defvar scheme-module-exports-function nil)
     3520
     3521(defvar *scheme-module-exports-functions*
     3522  '((chicken  . scheme-module-exports/chicken)
     3523    (gauche   . scheme-module-exports/gauche)
     3524    (mzscheme . scheme-module-exports/mzscheme)))
     3525
    34473526(defun scheme-module-exports (mod)
    34483527  (unless (member mod *scheme-imported-modules*)
     
    34513530     ((and (consp mod) (eq 'srfi (car mod)))
    34523531      (scheme-append-map #'scheme-srfi-exports (cdr mod)))
    3453      ((not (symbolp mod))
    3454       '())
    3455      ((string-match "^srfi-" (symbol-name mod))
     3532     ((and (symbolp mod) (string-match "^srfi-" (symbol-name mod)))
    34563533      (scheme-srfi-exports
    34573534       (string-to-number (substring (symbol-name mod) 5))))
    34583535     (t
    3459       (case (scheme-current-implementation)
    3460         ((chicken)
    3461          (let ((predefined (assq mod *scheme-chicken-modules*)))
    3462            (if predefined
    3463                (cdr predefined)
    3464              (mapcar
    3465               #'(lambda (x) (cons x '((lambda obj))))
    3466               (or (mapcar #'intern
    3467                           (scheme-file->lines
    3468                            (concat "/usr/local/lib/chicken/3/"
    3469                                    (symbol-name mod)
    3470                                    ".exports")))
    3471                   (let ((setup-info (concat "/usr/local/lib/chicken/3/"
    3472                                             (symbol-name mod)
    3473                                             ".setup-info")))
    3474                     (and (file-exists-p setup-info)
    3475                          (scheme-with-find-file setup-info
    3476                            (let* ((alist (scheme-nth-sexp-at-point 0))
    3477                                   (cell (assq 'exports alist)))
    3478                              (cdr cell))))))))))
    3479         ((gauche)
    3480          (let ((path (scheme-find-file-in-path
    3481                       (concat (subst-char-in-string ?. ?/ (symbol-name mod))
    3482                               ".scm")
    3483                       (list (concat
    3484                              (car (directory-files
    3485                                    "/usr/local/share/gauche/"
    3486                                    t
    3487                                    "^[0-9]"))
    3488                              "/lib")
    3489                             "/usr/local/share/gauche/site/lib"))))
    3490            (if (not (file-exists-p path))
    3491                '()
    3492              ;; XXXX parse, don't use regexps
    3493              (scheme-with-find-file path
    3494                (when (re-search-forward "(export" nil t)
    3495                  (backward-sexp)
    3496                  (backward-char)
    3497                  (mapcar #'list (cdr (ignore-errors
    3498                                        (scheme-nth-sexp-at-point 0)))))))))
    3499         ((mzscheme)
    3500          (let ((path (scheme-find-file-in-path
    3501                       (symbol-name mod)
    3502                       '("."
    3503                         "/usr/local/lib/plt/collects"
    3504                         "/usr/local/lib/plt/collects/mzlib"))))
    3505            (if (not (file-exists-p path))
    3506                '()
    3507              ;; XXXX parse, don't use regexps
    3508              (scheme-with-find-file path
    3509                (when (re-search-forward "(provide" nil t)
    3510                  (backward-sexp)
    3511                  (backward-char)
    3512                  (mapcar #'list (cdr (ignore-errors
    3513                                        (scheme-nth-sexp-at-point 0)))))))))
    3514         (t '()))))))
     3536      (let ((cached (assq mod *scheme-complete-module-cache*)))
     3537        ;; remove stale caches
     3538        (when (and cached
     3539                   (stringp (cadr cached))
     3540                   (ignore-errors
     3541                     (let ((mtime (nth 5 (file-attributes (cadr cached))))
     3542                           (ptime (caddr cached)))
     3543                       (or (> (car mtime) (car ptime))
     3544                           (and (= (car mtime) (car ptime))
     3545                                (>= (cadr mtime) (cadr ptime)))))))
     3546          (setq *scheme-complete-module-cache*
     3547                (assq-delete-all mod *scheme-complete-module-cache*))
     3548          (setq cached nil))
     3549        (if cached
     3550            (cadddr cached)
     3551          ;; (re)compute module exports
     3552          (let ((export-fun
     3553                 (or scheme-module-exports-function
     3554                     (cdr (assq (scheme-current-implementation)
     3555                                *scheme-module-exports-functions*)))))
     3556            (when export-fun
     3557              (let ((res (funcall export-fun mod)))
     3558                (when res
     3559                  (when (and scheme-complete-cache-p (car res))
     3560                    (push (list mod
     3561                                (car res)
     3562                                (nth 5 (file-attributes (car res)))
     3563                                (cadr res))
     3564                          *scheme-complete-module-cache*))
     3565                  (cadr res)))))))))))
     3566
     3567(defun scheme-module-exports/chicken (mod)
     3568  (let ((predefined (assq mod *scheme-chicken-modules*)))
     3569    (if predefined
     3570        (list nil (cdr predefined))
     3571      (let ((export-file
     3572             (concat *scheme-chicken-base-repo* "/"
     3573                     (symbol-name mod) ".exports"))
     3574            (setup-file
     3575             (concat *scheme-chicken-base-repo* "/"
     3576                     (symbol-name mod) ".setup-info"))
     3577            (source-file
     3578             (concat (symbol-name mod) ".scm")))
     3579        (cond
     3580         ((file-exists-p source-file)
     3581          (list source-file
     3582                (scheme-with-find-file source-file
     3583                  (let ((env (scheme-current-globals))
     3584                        (exports (scheme-current-exports)))
     3585                    (if (consp exports)
     3586                        (remove-if-not #'(lambda (x) (memq (car x) exports)) env)
     3587                        env)))))
     3588         ((file-exists-p export-file)
     3589          (list export-file
     3590                (mapcar #'(lambda (x) (cons (intern x) '((lambda obj))))
     3591                        (scheme-file->lines export-file))))
     3592         ((file-exists-p setup-file)
     3593          (list setup-file
     3594                (mapcar #'(lambda (x) (cons (intern x) '((lambda obj))))
     3595                        (scheme-with-find-file setup-file
     3596                          (let* ((alist (scheme-nth-sexp-at-point 0))
     3597                                 (cell (assq 'exports alist)))
     3598                            (cdr cell))))))
     3599         )))))
     3600
     3601(defun scheme-module-exports/gauche (mod)
     3602  (let* ((file (concat (subst-char-in-string ?. ?/ (symbol-name mod)) ".scm"))
     3603         (dir
     3604          (scheme-find-file-in-path
     3605           file
     3606           (cons
     3607            (concat *scheme-gauche-site-repo-path* "/site/lib")
     3608            (mapcar
     3609             #'(lambda (x) (concat x "/lib"))
     3610             (reverse
     3611              (directory-files *scheme-gauche-repo-path* t "^[0-9]")))))))
     3612    (when dir
     3613      (list
     3614       (concat dir "/" file)
     3615       (scheme-with-find-file (concat dir "/" file)
     3616         (let ((env (scheme-current-globals))
     3617               (exports (scheme-current-exports)))
     3618           (if (consp exports)
     3619               (remove-if-not #'(lambda (x) (memq (car x) exports)) env)
     3620             env)))))))
     3621
     3622(defun scheme-module-exports/mzscheme (mod)
     3623  (let ((dir (scheme-find-file-in-path
     3624              (symbol-name mod)
     3625              '("."
     3626                "/usr/local/lib/plt/collects"
     3627                "/usr/local/lib/plt/collects/mzlib"))))
     3628    (when dir
     3629      ;; XXXX parse, don't use regexps
     3630      (list
     3631       (concat dir "/" (symbol-name mod))
     3632       (scheme-with-find-file (concat dir "/" (symbol-name mod))
     3633         (when (re-search-forward "(provide" nil t)
     3634           (backward-sexp)
     3635           (backward-char)
     3636           (mapcar #'list (cdr (ignore-errors (scheme-nth-sexp-at-point 0))))
     3637           ))))))
    35153638
    35163639;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     
    35383661      ((dir dirname) 'directory)
    35393662      ((sym id identifier) 'symbol)
    3540       ((ls alist lists) 'list)
     3663      ((ls lis lst alist lists) 'list)
    35413664      ((vec) 'vector)
    35423665      ((exc excn err error) 'exception)
     
    40334156
    40344157;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     4158;; optional indentation handling
     4159
     4160(defvar calculate-lisp-indent-last-sexp)
     4161
     4162;; Copied from scheme-indent-function, but ignore
     4163;; scheme-indent-function property for local variables.
     4164(defun scheme-smart-indent-function (indent-point state)
     4165  (let ((normal-indent (current-column)))
     4166    (goto-char (1+ (elt state 1)))
     4167    (parse-partial-sexp (point) calculate-lisp-indent-last-sexp 0 t)
     4168    (if (and (elt state 2)
     4169             (not (looking-at "\\sw\\|\\s_")))
     4170        ;; car of form doesn't seem to be a symbol
     4171        (progn
     4172          (if (not (> (save-excursion (forward-line 1) (point))
     4173                      calculate-lisp-indent-last-sexp))
     4174              (progn (goto-char calculate-lisp-indent-last-sexp)
     4175                     (beginning-of-line)
     4176                     (parse-partial-sexp (point)
     4177                                         calculate-lisp-indent-last-sexp 0 t)))
     4178          ;; Indent under the list or under the first sexp on the same
     4179          ;; line as calculate-lisp-indent-last-sexp.  Note that first
     4180          ;; thing on that line has to be complete sexp since we are
     4181          ;; inside the innermost containing sexp.
     4182          (backward-prefix-chars)
     4183          (current-column))
     4184      (let* ((function (buffer-substring (point)
     4185                                         (progn (forward-sexp 1) (point))))
     4186             (function-sym (intern-soft function))
     4187             (method (and (not (assq function-sym (scheme-current-local-vars)))
     4188                          (get function-sym 'scheme-indent-function))))
     4189        (cond ((or (eq method 'defun)
     4190                   (and (null method)
     4191                        (> (length function) 3)
     4192                        (string-match "\\`def" function)))
     4193               (lisp-indent-defform state indent-point))
     4194              ((integerp method)
     4195               (lisp-indent-specform method state
     4196                                     indent-point normal-indent))
     4197              (method
     4198               (funcall method state indent-point normal-indent)))))))
     4199
     4200;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    40354201;; optional eldoc function
    40364202
  • chicken/trunk/srfi-69.scm

    r12021 r12117  
    4444      ##sys#make-structure
    4545      ##sys#size
    46       ##sys#slot ##sys#setslot
    47       ##srfi-69#%equal?-hash ) ) ] )
     46      ##sys#slot ##sys#setslot ) ) ] )
    4847
    4948(declare
    5049  (hide
    5150    %object-uid-hash %eq?-hash %eqv?-hash %equal?-hash
    52     %hash-table-copy %hash-table-ref %hash-table-update! %hash-table-merge!
     51    %hash-table-copy %hash-table-merge!
    5352    %hash-table-for-each %hash-table-fold
     53    hash-table-canonical-length
    5454    %hash-table-rehash! %hash-table-check-resize!
    55     %hash-table-update!/default
    56     hash-table-canonical-length hash-table-rehash) )
     55    %hash-table-update!/default) )
    5756
    5857(include "unsafe-declarations.scm")
     
    6362;;; Core Inlines:
    6463
    65 (define-inline ($quick-flonum-truncate ?flo)
    66   (##core#inline "C_quickflonumtruncate" ?flo) )
    67 
    68 (define-inline ($fix ?wrd)
    69   (##core#inline "C_fix" ?wrd) )
    70 
    71 (define-inline ($block? ?obj)
    72   (##core#inline "C_blockp" ?obj) )
    73 
    74 (define-inline ($special? ?obj)
    75   (##core#inline "C_specialp" ?obj) )
    76 
    77 (define-inline ($port? ?obj)
    78   (##core#inline "C_portp" ?obj) )
    79 
    80 (define-inline ($byte-block? ?obj)
    81   (##core#inline "C_byteblockp" ?obj) )
    82 
    83 (define-inline ($hash-string ?str)
    84   (##core#inline "C_hash_string" ?str) )
    85 
    86 (define-inline ($hash-string-ci ?str)
    87   (##core#inline "C_hash_string_ci" ?str) )
     64(define-inline ($fix wrd)
     65  (##core#inline "C_fix" wrd) )
     66
     67(define-inline ($block? obj)
     68  (##core#inline "C_blockp" obj) )
     69
     70(define-inline ($special? obj)
     71  (##core#inline "C_specialp" obj) )
     72
     73(define-inline ($port? obj)
     74  (##core#inline "C_portp" obj) )
     75
     76(define-inline ($byte-block? obj)
     77  (##core#inline "C_byteblockp" obj) )
     78
     79(define-inline ($string-hash str)
     80  (##core#inline "C_hash_string" str) )
     81
     82(define-inline ($string-ci-hash str)
     83  (##core#inline "C_hash_string_ci" str) )
    8884
    8985
    9086;;;
    9187
    92 (define-inline ($immediate? ?obj)
    93   (not ($block? ?obj)) )
     88(define-inline ($immediate? obj)
     89  (not ($block? obj)) )
    9490
    9591
     
    9793
    9894;; Naming Conventions:
    99 ;; $foo - macro
    100 ;; $*foo - local macro (no such thing but at least it looks different)
     95;; $foo - inline primitive
     96;; $*foo - local inline (no such thing but at least it looks different)
    10197;; %foo - private, usually unchecked, procedure
    10298;; ##sys#foo - public, but undocumented, un-checked procedure
     
    125121;; Force Hash to Bounded Fixnum:
    126122
    127 (define-inline ($fxabs ?fxn)
    128   (let ([_fxn ?fxn]) (if (fx< _fxn 0) (fxneg _fxn) _fxn ) ) )
    129 
    130 (define-inline ($hash/limit ?hsh ?lim)
     123(define-inline ($fxabs fxn)
     124  (let ([_fxn fxn]) (if (fx< _fxn 0) (fxneg _fxn) _fxn ) ) )
     125
     126(define-inline ($hash/limit hsh lim)
    131127  (fxmod (fxand (foreign-value "C_MOST_POSITIVE_FIXNUM" int)
    132                 ($fxabs ?hsh))
    133          ?lim) )
     128                ($fxabs hsh))
     129         lim) )
    134130
    135131;; Number Hash:
     
    137133(define-constant flonum-magic 331804471)
    138134
    139 #| Not sure which is "better"; went with speed
    140 (define-macro ($subbyte ?bytvec ?i)
    141   `(##core#inline "C_subbyte" ,?bytvec ,?i) )
    142 
    143 (define-macro ($hash-flonum ?flo)
    144   `(fx* flonum-magic
    145         ,(let loop ([idx (fx- (##sys#size 1.0) 1)])
    146             (if (fx= 0 idx)
    147                 `($subbyte ,?flo 0)
    148                 `(fx+ ($subbyte ,?flo ,idx)
    149                       (fxshl ,(loop (fx- idx 1)) 1))))) )
    150 |#
    151 
    152 (define-inline ($hash-flonum ?flo)
    153   (fx* flonum-magic ($quick-flonum-truncate ?flo)) )
     135(define-inline ($subbyte bytvec i)
     136  (##core#inline "C_subbyte" bytvec i) )
     137
     138(define-syntax $flonum-hash
     139  (lambda (form r c)
     140    (let ( (flo (cadr form))
     141           (%$subbyte (r '$subbyte))
     142           (%flonum-magic (r 'flonum-magic))
     143           (%fx+ (r 'fx+))
     144           (%fx* (r 'fx*))
     145           (%fxshl (r 'fxshl)) )
     146    `(,%fx* ,%flonum-magic
     147            ,(let loop ( (idx (fx- (##sys#size 1.0) 1)) )
     148               (if (fx= 0 idx)
     149                   `(,%$subbyte ,flo 0)
     150                   `(,%fx+ (,%$subbyte ,flo ,idx)
     151                           (,%fxshl ,(loop (fx- idx 1)) 1)) ) ) ) ) ) )
    154152
    155153(define (##sys#number-hash-hook obj)
    156154  (%equal?-hash obj) )
    157155
    158 (define-inline ($non-fixnum-number-hash ?obj)
    159   (cond [(flonum? obj)  ($hash-flonum ?obj)]
    160         [else           ($fix (##sys#number-hash-hook ?obj))] ) )
    161 
    162 (define-inline ($number-hash ?obj)
     156(define-inline ($non-fixnum-number-hash obj)
     157  (cond [(flonum? obj)  ($flonum-hash obj)]
     158        [else           ($fix (##sys#number-hash-hook obj))] ) )
     159
     160(define-inline ($number-hash obj)
    163161  (cond [(fixnum? obj)  ?obj]
    164         [else           ($non-fixnum-number-hash ?obj)] ) )
     162        [else           ($non-fixnum-number-hash obj)] ) )
    165163
    166164(define (number-hash obj #!optional (bound hash-default-bound))
     
    186184
    187185#; ;NOT YET (no unique-symbol-hash)
    188 (define-macro ($symbol-hash ?obj)
    189   `(##sys#slot ,?obj INDEX-OF-UNIQUE-HASH-VALUE-COMPUTED-DURING-SYMBOL-CREATION) )
    190 
    191 (define-inline ($symbol-hash ?obj)
    192   ($hash-string (##sys#slot ?obj 1)) )
     186(define-inline ($symbol-hash obj)
     187  (##sys#slot obj INDEX-OF-UNIQUE-HASH-VALUE-COMPUTED-DURING-SYMBOL-CREATION) )
     188
     189(define-inline ($symbol-hash obj)
     190  ($string-hash (##sys#slot obj 1)) )
    193191
    194192(define (symbol-hash obj #!optional (bound hash-default-bound))
    195193  (##sys#check-symbol obj 'symbol-hash)
    196   (##sys#check-exact bound 'string-hash)
     194  (##sys#check-exact bound 'symbol-hash)
    197195  ($hash/limit ($symbol-hash obj) bound) )
    198196
     
    206204
    207205#; ;NOT YET (no unique-keyword-hash)
    208 (define-macro ($keyword-hash ?obj)
    209   `(##sys#slot ,?obj INDEX-OF-UNIQUE-HASH-VALUE-COMPUTED-DURING-KEYWORD-CREATION) )
    210 
    211 (define-inline ($keyword-hash ?obj)
    212   ($hash-string (##sys#slot ?obj 1)) )
     206(define-inline ($keyword-hash obj)
     207  (##sys#slot obj INDEX-OF-UNIQUE-HASH-VALUE-COMPUTED-DURING-KEYWORD-CREATION) )
     208
     209(define-inline ($keyword-hash obj)
     210  ($string-hash (##sys#slot obj 1)) )
    213211
    214212(define (keyword-hash obj #!optional (bound hash-default-bound))
     
    219217;; Eq Hash:
    220218
    221 (define-inline ($eq?-hash-object? ?obj)
    222   (or ($immediate? ?obj)
    223        (symbol? ?obj)
     219(define-inline ($eq?-hash-object? obj)
     220  (or ($immediate? obj)
     221       (symbol? obj)
    224222       #; ;NOT YET (no keyword vs. symbol issue)
    225        (keyword? ?obj) ) )
     223       (keyword? obj) ) )
    226224
    227225(define (%eq?-hash obj)
     
    246244;; Eqv Hash:
    247245
    248 (define-inline ($eqv?-hash-object? ?obj)
    249   (or ($eq?-hash-object? ?obj)
    250        (number? ?obj)) )
     246(define-inline ($eqv?-hash-object? obj)
     247  (or ($eq?-hash-object? obj)
     248       (number? obj)) )
    251249
    252250(define (%eqv?-hash obj)
     
    274272(define-constant recursive-hash-max-length 4)
    275273
    276 (define-inline ($*list-hash ?obj)
    277   (fx+ (length ?obj)
    278        (recursive-atomic-hash (##sys#slot ?obj 0) depth)) )
    279 
    280 (define-inline ($*pair-hash ?obj)
    281   (fx+ (fxshl (recursive-atomic-hash (##sys#slot ?obj 0) depth) 16)
    282         (recursive-atomic-hash (##sys#slot ?obj 1) depth)) )
    283 
    284 (define-inline ($*port-hash ?obj)
    285   (fx+ (fxshl (##sys#peek-fixnum ?obj 0) 4) ; Little extra "identity"
    286         (if (input-port? ?obj)
     274(define-inline ($*list-hash obj)
     275  (fx+ (length obj)
     276       (recursive-atomic-hash (##sys#slot obj 0) depth)) )
     277
     278(define-inline ($*pair-hash obj)
     279  (fx+ (fxshl (recursive-atomic-hash (##sys#slot obj 0) depth) 16)
     280        (recursive-atomic-hash (##sys#slot obj 1) depth)) )
     281
     282(define-inline ($*port-hash obj)
     283  (fx+ (fxshl (##sys#peek-fixnum obj 0) 4) ; Little extra "identity"
     284        (if (input-port? obj)
    287285            input-port-hash-value
    288286            output-port-hash-value)) )
    289287
    290 (define-inline ($*special-vector-hash ?obj)
    291   (vector-hash ?obj (##sys#peek-fixnum ?obj 0) depth 1) )
    292 
    293 (define-inline ($*regular-vector-hash ?obj)
    294   (vector-hash ?obj 0 depth 0) )
     288(define-inline ($*special-vector-hash obj)
     289  (vector-hash obj (##sys#peek-fixnum obj 0) depth 1) )
     290
     291(define-inline ($*regular-vector-hash obj)
     292  (vector-hash obj 0 depth 0) )
    295293
    296294(define (%equal?-hash obj)
     
    332330          [(number? obj)          ($non-fixnum-number-hash obj)]
    333331          [($immediate? obj)      unknown-immediate-hash-value]
    334           [($byte-block? obj)     ($hash-string obj)]
     332          [($byte-block? obj)     ($string-hash obj)]
    335333          [(list? obj)            ($*list-hash obj)]
    336334          [(pair? obj)            ($*pair-hash obj)]
     
    353351  (##sys#check-string str 'string-hash)
    354352  (##sys#check-exact bound 'string-hash)
    355   ($hash/limit ($hash-string str) bound) )
     353  ($hash/limit ($string-hash str) bound) )
    356354
    357355(define (string-ci-hash str #!optional (bound hash-default-bound))
    358356  (##sys#check-string str 'string-ci-hash)
    359357  (##sys#check-exact bound 'string-ci-hash)
    360   ($hash/limit ($hash-string-ci str) bound) )
     358  ($hash/limit ($string-ci-hash str) bound) )
    361359
    362360
Note: See TracChangeset for help on using the changeset viewer.