src/src/pcre/pcre_exec.c

   1 /* $Cambridge: exim/src/src/pcre/pcre_exec.c,v 1.6 2007/11/12 13:02:19 nm4 Exp $ */
   2
   3 /*************************************************
   4 *      Perl-Compatible Regular Expressions       *
   5 *************************************************/
   6
   7 /* PCRE is a library of functions to support regular expressions whose syntax
   8 and semantics are as close as possible to those of the Perl 5 language.
   9
  10                        Written by Philip Hazel
  11            Copyright (c) 1997-2007 University of Cambridge
  12
  13 -----------------------------------------------------------------------------
  14 Redistribution and use in source and binary forms, with or without
  15 modification, are permitted provided that the following conditions are met:
  16
  17     * Redistributions of source code must retain the above copyright notice,
  18       this list of conditions and the following disclaimer.
  19
  20     * Redistributions in binary form must reproduce the above copyright
  21       notice, this list of conditions and the following disclaimer in the
  22       documentation and/or other materials provided with the distribution.
  23
  24     * Neither the name of the University of Cambridge nor the names of its
  25       contributors may be used to endorse or promote products derived from
  26       this software without specific prior written permission.
  27
  28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38 POSSIBILITY OF SUCH DAMAGE.
  39 -----------------------------------------------------------------------------
  40 */
  41
  42
  43 /* This module contains pcre_exec(), the externally visible function that does
  44 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
  45 possible. There are also some static supporting functions. */
  46
  47 #ifdef HAVE_CONFIG_H
  48 #include "config.h"
  49 #endif
  50
  51 #define NLBLOCK md             /* Block containing newline information */
  52 #define PSSTART start_subject  /* Field containing processed string start */
  53 #define PSEND   end_subject    /* Field containing processed string end */
  54
  55 #include "pcre_internal.h"
  56
  57 /* Undefine some potentially clashing cpp symbols */
  58
  59 #undef min
  60 #undef max
  61
  62 /* Flag bits for the match() function */
  63
  64 #define match_condassert     0x01  /* Called to check a condition assertion */
  65 #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
  66
  67 /* Non-error returns from the match() function. Error returns are externally
  68 defined PCRE_ERROR_xxx codes, which are all negative. */
  69
  70 #define MATCH_MATCH        1
  71 #define MATCH_NOMATCH      0
  72
  73 /* Special internal returns from the match() function. Make them sufficiently
  74 negative to avoid the external error codes. */
  75
  76 #define MATCH_COMMIT       (-999)
  77 #define MATCH_PRUNE        (-998)
  78 #define MATCH_SKIP         (-997)
  79 #define MATCH_THEN         (-996)
  80
  81 /* Maximum number of ints of offset to save on the stack for recursive calls.
  82 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
  83 because the offset vector is always a multiple of 3 long. */
  84
  85 #define REC_STACK_SAVE_MAX 30
  86
  87 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
  88
  89 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
  90 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
  91
  92
  93
  94 #ifdef DEBUG
  95 /*************************************************
  96 *        Debugging function to print chars       *
  97 *************************************************/
  98
  99 /* Print a sequence of chars in printable format, stopping at the end of the
 100 subject if the requested.
 101
 102 Arguments:
 103   p           points to characters
 104   length      number to print
 105   is_subject  TRUE if printing from within md->start_subject
 106   md          pointer to matching data block, if is_subject is TRUE
 107
 108 Returns:     nothing
 109 */
 110
 111 static void
 112 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
 113 {
 114 unsigned int c;
 115 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
 116 while (length-- > 0)
 117   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
 118 }
 119 #endif
 120
 121
 122
 123 /*************************************************
 124 *          Match a back-reference                *
 125 *************************************************/
 126
 127 /* If a back reference hasn't been set, the length that is passed is greater
 128 than the number of characters left in the string, so the match fails.
 129
 130 Arguments:
 131   offset      index into the offset vector
 132   eptr        points into the subject
 133   length      length to be matched
 134   md          points to match data block
 135   ims         the ims flags
 136
 137 Returns:      TRUE if matched
 138 */
 139
 140 static BOOL
 141 match_ref(int offset, register USPTR eptr, int length, match_data *md,
 142   unsigned long int ims)
 143 {
 144 USPTR p = md->start_subject + md->offset_vector[offset];
 145
 146 #ifdef DEBUG
 147 if (eptr >= md->end_subject)
 148   printf("matching subject <null>");
 149 else
 150   {
 151   printf("matching subject ");
 152   pchars(eptr, length, TRUE, md);
 153   }
 154 printf(" against backref ");
 155 pchars(p, length, FALSE, md);
 156 printf("\n");
 157 #endif
 158
 159 /* Always fail if not enough characters left */
 160
 161 if (length > md->end_subject - eptr) return FALSE;
 162
 163 /* Separate the caselesss case for speed */
 164
 165 if ((ims & PCRE_CASELESS) != 0)
 166   {
 167   while (length-- > 0)
 168     if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
 169   }
 170 else
 171   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
 172
 173 return TRUE;
 174 }
 175
 176
 177
 178 /***************************************************************************
 179 ****************************************************************************
 180                    RECURSION IN THE match() FUNCTION
 181
 182 The match() function is highly recursive, though not every recursive call
 183 increases the recursive depth. Nevertheless, some regular expressions can cause
 184 it to recurse to a great depth. I was writing for Unix, so I just let it call
 185 itself recursively. This uses the stack for saving everything that has to be
 186 saved for a recursive call. On Unix, the stack can be large, and this works
 187 fine.
 188
 189 It turns out that on some non-Unix-like systems there are problems with
 190 programs that use a lot of stack. (This despite the fact that every last chip
 191 has oodles of memory these days, and techniques for extending the stack have
 192 been known for decades.) So....
 193
 194 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
 195 calls by keeping local variables that need to be preserved in blocks of memory
 196 obtained from malloc() instead instead of on the stack. Macros are used to
 197 achieve this so that the actual code doesn't look very different to what it
 198 always used to.
 199
 200 The original heap-recursive code used longjmp(). However, it seems that this
 201 can be very slow on some operating systems. Following a suggestion from Stan
 202 Switzer, the use of longjmp() has been abolished, at the cost of having to
 203 provide a unique number for each call to RMATCH. There is no way of generating
 204 a sequence of numbers at compile time in C. I have given them names, to make
 205 them stand out more clearly.
 206
 207 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
 208 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
 209 tests. Furthermore, not using longjmp() means that local dynamic variables
 210 don't have indeterminate values; this has meant that the frame size can be
 211 reduced because the result can be "passed back" by straight setting of the
 212 variable instead of being passed in the frame.
 213 ****************************************************************************
 214 ***************************************************************************/
 215
 216 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
 217 below must be updated in sync.  */
 218
 219 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
 220        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
 221        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
 222        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
 223        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
 224        RM51,  RM52, RM53, RM54 };
 225
 226 /* These versions of the macros use the stack, as normal. There are debugging
 227 versions and production versions. Note that the "rw" argument of RMATCH isn't
 228 actuall used in this definition. */
 229
 230 #ifndef NO_RECURSE
 231 #define REGISTER register
 232
 233 #ifdef DEBUG
 234 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
 235   { \
 236   printf("match() called in line %d\n", __LINE__); \
 237   rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
 238   printf("to line %d\n", __LINE__); \
 239   }
 240 #define RRETURN(ra) \
 241   { \
 242   printf("match() returned %d from line %d ", ra, __LINE__); \
 243   return ra; \
 244   }
 245 #else
 246 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
 247   rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
 248 #define RRETURN(ra) return ra
 249 #endif
 250
 251 #else
 252
 253
 254 /* These versions of the macros manage a private stack on the heap. Note that
 255 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
 256 argument of match(), which never changes. */
 257
 258 #define REGISTER
 259
 260 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
 261   {\
 262   heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
 263   frame->Xwhere = rw; \
 264   newframe->Xeptr = ra;\
 265   newframe->Xecode = rb;\
 266   newframe->Xmstart = mstart;\
 267   newframe->Xoffset_top = rc;\
 268   newframe->Xims = re;\
 269   newframe->Xeptrb = rf;\
 270   newframe->Xflags = rg;\
 271   newframe->Xrdepth = frame->Xrdepth + 1;\
 272   newframe->Xprevframe = frame;\
 273   frame = newframe;\
 274   DPRINTF(("restarting from line %d\n", __LINE__));\
 275   goto HEAP_RECURSE;\
 276   L_##rw:\
 277   DPRINTF(("jumped back to line %d\n", __LINE__));\
 278   }
 279
 280 #define RRETURN(ra)\
 281   {\
 282   heapframe *newframe = frame;\
 283   frame = newframe->Xprevframe;\
 284   (pcre_stack_free)(newframe);\
 285   if (frame != NULL)\
 286     {\
 287     rrc = ra;\
 288     goto HEAP_RETURN;\
 289     }\
 290   return ra;\
 291   }
 292
 293
 294 /* Structure for remembering the local variables in a private frame */
 295
 296 typedef struct heapframe {
 297   struct heapframe *Xprevframe;
 298
 299   /* Function arguments that may change */
 300
 301   const uschar *Xeptr;
 302   const uschar *Xecode;
 303   const uschar *Xmstart;
 304   int Xoffset_top;
 305   long int Xims;
 306   eptrblock *Xeptrb;
 307   int Xflags;
 308   unsigned int Xrdepth;
 309
 310   /* Function local variables */
 311
 312   const uschar *Xcallpat;
 313   const uschar *Xcharptr;
 314   const uschar *Xdata;
 315   const uschar *Xnext;
 316   const uschar *Xpp;
 317   const uschar *Xprev;
 318   const uschar *Xsaved_eptr;
 319
 320   recursion_info Xnew_recursive;
 321
 322   BOOL Xcur_is_word;
 323   BOOL Xcondition;
 324   BOOL Xprev_is_word;
 325
 326   unsigned long int Xoriginal_ims;
 327
 328 #ifdef SUPPORT_UCP
 329   int Xprop_type;
 330   int Xprop_value;
 331   int Xprop_fail_result;
 332   int Xprop_category;
 333   int Xprop_chartype;
 334   int Xprop_script;
 335   int Xoclength;
 336   uschar Xocchars[8];
 337 #endif
 338
 339   int Xctype;
 340   unsigned int Xfc;
 341   int Xfi;
 342   int Xlength;
 343   int Xmax;
 344   int Xmin;
 345   int Xnumber;
 346   int Xoffset;
 347   int Xop;
 348   int Xsave_capture_last;
 349   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
 350   int Xstacksave[REC_STACK_SAVE_MAX];
 351
 352   eptrblock Xnewptrb;
 353
 354   /* Where to jump back to */
 355
 356   int Xwhere;
 357
 358 } heapframe;
 359
 360 #endif
 361
 362
 363 /***************************************************************************
 364 ***************************************************************************/
 365
 366
 367
 368 /*************************************************
 369 *         Match from current position            *
 370 *************************************************/
 371
 372 /* This function is called recursively in many circumstances. Whenever it
 373 returns a negative (error) response, the outer incarnation must also return the
 374 same response.
 375
 376 Performance note: It might be tempting to extract commonly used fields from the
 377 md structure (e.g. utf8, end_subject) into individual variables to improve
 378 performance. Tests using gcc on a SPARC disproved this; in the first case, it
 379 made performance worse.
 380
 381 Arguments:
 382    eptr        pointer to current character in subject
 383    ecode       pointer to current position in compiled code
 384    mstart      pointer to the current match start position (can be modified
 385                  by encountering \K)
 386    offset_top  current top pointer
 387    md          pointer to "static" info for the match
 388    ims         current /i, /m, and /s options
 389    eptrb       pointer to chain of blocks containing eptr at start of
 390                  brackets - for testing for empty matches
 391    flags       can contain
 392                  match_condassert - this is an assertion condition
 393                  match_cbegroup - this is the start of an unlimited repeat
 394                    group that can match an empty string
 395    rdepth      the recursion depth
 396
 397 Returns:       MATCH_MATCH if matched            )  these values are >= 0
 398                MATCH_NOMATCH if failed to match  )
 399                a negative PCRE_ERROR_xxx value if aborted by an error condition
 400                  (e.g. stopped by repeated call or recursion limit)
 401 */
 402
 403 static int
 404 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
 405   int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
 406   int flags, unsigned int rdepth)
 407 {
 408 /* These variables do not need to be preserved over recursion in this function,
 409 so they can be ordinary variables in all cases. Mark some of them with
 410 "register" because they are used a lot in loops. */
 411
 412 register int  rrc;         /* Returns from recursive calls */
 413 register int  i;           /* Used for loops not involving calls to RMATCH() */
 414 register unsigned int c;   /* Character values not kept over RMATCH() calls */
 415 register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
 416
 417 BOOL minimize, possessive; /* Quantifier options */
 418
 419 /* When recursion is not being used, all "local" variables that have to be
 420 preserved over calls to RMATCH() are part of a "frame" which is obtained from
 421 heap storage. Set up the top-level frame here; others are obtained from the
 422 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
 423
 424 #ifdef NO_RECURSE
 425 heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
 426 frame->Xprevframe = NULL;            /* Marks the top level */
 427
 428 /* Copy in the original argument variables */
 429
 430 frame->Xeptr = eptr;
 431 frame->Xecode = ecode;
 432 frame->Xmstart = mstart;
 433 frame->Xoffset_top = offset_top;
 434 frame->Xims = ims;
 435 frame->Xeptrb = eptrb;
 436 frame->Xflags = flags;
 437 frame->Xrdepth = rdepth;
 438
 439 /* This is where control jumps back to to effect "recursion" */
 440
 441 HEAP_RECURSE:
 442
 443 /* Macros make the argument variables come from the current frame */
 444
 445 #define eptr               frame->Xeptr
 446 #define ecode              frame->Xecode
 447 #define mstart             frame->Xmstart
 448 #define offset_top         frame->Xoffset_top
 449 #define ims                frame->Xims
 450 #define eptrb              frame->Xeptrb
 451 #define flags              frame->Xflags
 452 #define rdepth             frame->Xrdepth
 453
 454 /* Ditto for the local variables */
 455
 456 #ifdef SUPPORT_UTF8
 457 #define charptr            frame->Xcharptr
 458 #endif
 459 #define callpat            frame->Xcallpat
 460 #define data               frame->Xdata
 461 #define next               frame->Xnext
 462 #define pp                 frame->Xpp
 463 #define prev               frame->Xprev
 464 #define saved_eptr         frame->Xsaved_eptr
 465
 466 #define new_recursive      frame->Xnew_recursive
 467
 468 #define cur_is_word        frame->Xcur_is_word
 469 #define condition          frame->Xcondition
 470 #define prev_is_word       frame->Xprev_is_word
 471
 472 #define original_ims       frame->Xoriginal_ims
 473
 474 #ifdef SUPPORT_UCP
 475 #define prop_type          frame->Xprop_type
 476 #define prop_value         frame->Xprop_value
 477 #define prop_fail_result   frame->Xprop_fail_result
 478 #define prop_category      frame->Xprop_category
 479 #define prop_chartype      frame->Xprop_chartype
 480 #define prop_script        frame->Xprop_script
 481 #define oclength           frame->Xoclength
 482 #define occhars            frame->Xocchars
 483 #endif
 484
 485 #define ctype              frame->Xctype
 486 #define fc                 frame->Xfc
 487 #define fi                 frame->Xfi
 488 #define length             frame->Xlength
 489 #define max                frame->Xmax
 490 #define min                frame->Xmin
 491 #define number             frame->Xnumber
 492 #define offset             frame->Xoffset
 493 #define op                 frame->Xop
 494 #define save_capture_last  frame->Xsave_capture_last
 495 #define save_offset1       frame->Xsave_offset1
 496 #define save_offset2       frame->Xsave_offset2
 497 #define save_offset3       frame->Xsave_offset3
 498 #define stacksave          frame->Xstacksave
 499
 500 #define newptrb            frame->Xnewptrb
 501
 502 /* When recursion is being used, local variables are allocated on the stack and
 503 get preserved during recursion in the normal way. In this environment, fi and
 504 i, and fc and c, can be the same variables. */
 505
 506 #else         /* NO_RECURSE not defined */
 507 #define fi i
 508 #define fc c
 509
 510
 511 #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
 512 const uschar *charptr;             /* in small blocks of the code. My normal */
 513 #endif                             /* style of coding would have declared    */
 514 const uschar *callpat;             /* them within each of those blocks.      */
 515 const uschar *data;                /* However, in order to accommodate the   */
 516 const uschar *next;                /* version of this code that uses an      */
 517 USPTR         pp;                  /* external "stack" implemented on the    */
 518 const uschar *prev;                /* heap, it is easier to declare them all */
 519 USPTR         saved_eptr;          /* here, so the declarations can be cut   */
 520                                    /* out in a block. The only declarations  */
 521 recursion_info new_recursive;      /* within blocks below are for variables  */
 522                                    /* that do not have to be preserved over  */
 523 BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
 524 BOOL condition;
 525 BOOL prev_is_word;
 526
 527 unsigned long int original_ims;
 528
 529 #ifdef SUPPORT_UCP
 530 int prop_type;
 531 int prop_value;
 532 int prop_fail_result;
 533 int prop_category;
 534 int prop_chartype;
 535 int prop_script;
 536 int oclength;
 537 uschar occhars[8];
 538 #endif
 539
 540 int ctype;
 541 int length;
 542 int max;
 543 int min;
 544 int number;
 545 int offset;
 546 int op;
 547 int save_capture_last;
 548 int save_offset1, save_offset2, save_offset3;
 549 int stacksave[REC_STACK_SAVE_MAX];
 550
 551 eptrblock newptrb;
 552 #endif     /* NO_RECURSE */
 553
 554 /* These statements are here to stop the compiler complaining about unitialized
 555 variables. */
 556
 557 #ifdef SUPPORT_UCP
 558 prop_value = 0;
 559 prop_fail_result = 0;
 560 #endif
 561
 562
 563 /* This label is used for tail recursion, which is used in a few cases even
 564 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
 565 used. Thanks to Ian Taylor for noticing this possibility and sending the
 566 original patch. */
 567
 568 TAIL_RECURSE:
 569
 570 /* OK, now we can get on with the real code of the function. Recursive calls
 571 are specified by the macro RMATCH and RRETURN is used to return. When
 572 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
 573 and a "return", respectively (possibly with some debugging if DEBUG is
 574 defined). However, RMATCH isn't like a function call because it's quite a
 575 complicated macro. It has to be used in one particular way. This shouldn't,
 576 however, impact performance when true recursion is being used. */
 577
 578 #ifdef SUPPORT_UTF8
 579 utf8 = md->utf8;       /* Local copy of the flag */
 580 #else
 581 utf8 = FALSE;
 582 #endif
 583
 584 /* First check that we haven't called match() too many times, or that we
 585 haven't exceeded the recursive call limit. */
 586
 587 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
 588 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
 589
 590 original_ims = ims;    /* Save for resetting on ')' */
 591
 592 /* At the start of a group with an unlimited repeat that may match an empty
 593 string, the match_cbegroup flag is set. When this is the case, add the current
 594 subject pointer to the chain of such remembered pointers, to be checked when we
 595 hit the closing ket, in order to break infinite loops that match no characters.
 596 When match() is called in other circumstances, don't add to the chain. The
 597 match_cbegroup flag must NOT be used with tail recursion, because the memory
 598 block that is used is on the stack, so a new one may be required for each
 599 match(). */
 600
 601 if ((flags & match_cbegroup) != 0)
 602   {
 603   newptrb.epb_saved_eptr = eptr;
 604   newptrb.epb_prev = eptrb;
 605   eptrb = &newptrb;
 606   }
 607
 608 /* Now start processing the opcodes. */
 609
 610 for (;;)
 611   {
 612   minimize = possessive = FALSE;
 613   op = *ecode;
 614
 615   /* For partial matching, remember if we ever hit the end of the subject after
 616   matching at least one subject character. */
 617
 618   if (md->partial &&
 619       eptr >= md->end_subject &&
 620       eptr > mstart)
 621     md->hitend = TRUE;
 622
 623   switch(op)
 624     {
 625     case OP_FAIL:
 626     RRETURN(MATCH_NOMATCH);
 627
 628     case OP_PRUNE:
 629     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
 630       ims, eptrb, flags, RM51);
 631     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 632     RRETURN(MATCH_PRUNE);
 633
 634     case OP_COMMIT:
 635     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
 636       ims, eptrb, flags, RM52);
 637     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 638     RRETURN(MATCH_COMMIT);
 639
 640     case OP_SKIP:
 641     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
 642       ims, eptrb, flags, RM53);
 643     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 644     md->start_match_ptr = eptr;   /* Pass back current position */
 645     RRETURN(MATCH_SKIP);
 646
 647     case OP_THEN:
 648     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
 649       ims, eptrb, flags, RM54);
 650     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 651     RRETURN(MATCH_THEN);
 652
 653     /* Handle a capturing bracket. If there is space in the offset vector, save
 654     the current subject position in the working slot at the top of the vector.
 655     We mustn't change the current values of the data slot, because they may be
 656     set from a previous iteration of this group, and be referred to by a
 657     reference inside the group.
 658
 659     If the bracket fails to match, we need to restore this value and also the
 660     values of the final offsets, in case they were set by a previous iteration
 661     of the same bracket.
 662
 663     If there isn't enough space in the offset vector, treat this as if it were
 664     a non-capturing bracket. Don't worry about setting the flag for the error
 665     case here; that is handled in the code for KET. */
 666
 667     case OP_CBRA:
 668     case OP_SCBRA:
 669     number = GET2(ecode, 1+LINK_SIZE);
 670     offset = number << 1;
 671
 672 #ifdef DEBUG
 673     printf("start bracket %d\n", number);
 674     printf("subject=");
 675     pchars(eptr, 16, TRUE, md);
 676     printf("\n");
 677 #endif
 678
 679     if (offset < md->offset_max)
 680       {
 681       save_offset1 = md->offset_vector[offset];
 682       save_offset2 = md->offset_vector[offset+1];
 683       save_offset3 = md->offset_vector[md->offset_end - number];
 684       save_capture_last = md->capture_last;
 685
 686       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
 687       md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
 688
 689       flags = (op == OP_SCBRA)? match_cbegroup : 0;
 690       do
 691         {
 692         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
 693           ims, eptrb, flags, RM1);
 694         if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
 695         md->capture_last = save_capture_last;
 696         ecode += GET(ecode, 1);
 697         }
 698       while (*ecode == OP_ALT);
 699
 700       DPRINTF(("bracket %d failed\n", number));
 701
 702       md->offset_vector[offset] = save_offset1;
 703       md->offset_vector[offset+1] = save_offset2;
 704       md->offset_vector[md->offset_end - number] = save_offset3;
 705
 706       RRETURN(MATCH_NOMATCH);
 707       }
 708
 709     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
 710     as a non-capturing bracket. */
 711
 712     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 713     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 714
 715     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
 716
 717     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 718     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
 719
 720     /* Non-capturing bracket. Loop for all the alternatives. When we get to the
 721     final alternative within the brackets, we would return the result of a
 722     recursive call to match() whatever happened. We can reduce stack usage by
 723     turning this into a tail recursion, except in the case when match_cbegroup
 724     is set.*/
 725
 726     case OP_BRA:
 727     case OP_SBRA:
 728     DPRINTF(("start non-capturing bracket\n"));
 729     flags = (op >= OP_SBRA)? match_cbegroup : 0;
 730     for (;;)
 731       {
 732       if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
 733         {
 734         if (flags == 0)    /* Not a possibly empty group */
 735           {
 736           ecode += _pcre_OP_lengths[*ecode];
 737           DPRINTF(("bracket 0 tail recursion\n"));
 738           goto TAIL_RECURSE;
 739           }
 740
 741         /* Possibly empty group; can't use tail recursion. */
 742
 743         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
 744           eptrb, flags, RM48);
 745         RRETURN(rrc);
 746         }
 747
 748       /* For non-final alternatives, continue the loop for a NOMATCH result;
 749       otherwise return. */
 750
 751       RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
 752         eptrb, flags, RM2);
 753       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
 754       ecode += GET(ecode, 1);
 755       }
 756     /* Control never reaches here. */
 757
 758     /* Conditional group: compilation checked that there are no more than
 759     two branches. If the condition is false, skipping the first branch takes us
 760     past the end if there is only one branch, but that's OK because that is
 761     exactly what going to the ket would do. As there is only one branch to be
 762     obeyed, we can use tail recursion to avoid using another stack frame. */
 763
 764     case OP_COND:
 765     case OP_SCOND:
 766     if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
 767       {
 768       offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
 769       condition = md->recursive != NULL &&
 770         (offset == RREF_ANY || offset == md->recursive->group_num);
 771       ecode += condition? 3 : GET(ecode, 1);
 772       }
 773
 774     else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
 775       {
 776       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
 777       condition = offset < offset_top && md->offset_vector[offset] >= 0;
 778       ecode += condition? 3 : GET(ecode, 1);
 779       }
 780
 781     else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
 782       {
 783       condition = FALSE;
 784       ecode += GET(ecode, 1);
 785       }
 786
 787     /* The condition is an assertion. Call match() to evaluate it - setting
 788     the final argument match_condassert causes it to stop at the end of an
 789     assertion. */
 790
 791     else
 792       {
 793       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
 794           match_condassert, RM3);
 795       if (rrc == MATCH_MATCH)
 796         {
 797         condition = TRUE;
 798         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
 799         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
 800         }
 801       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
 802         {
 803         RRETURN(rrc);         /* Need braces because of following else */
 804         }
 805       else
 806         {
 807         condition = FALSE;
 808         ecode += GET(ecode, 1);
 809         }
 810       }
 811
 812     /* We are now at the branch that is to be obeyed. As there is only one,
 813     we can use tail recursion to avoid using another stack frame, except when
 814     match_cbegroup is required for an unlimited repeat of a possibly empty
 815     group. If the second alternative doesn't exist, we can just plough on. */
 816
 817     if (condition || *ecode == OP_ALT)
 818       {
 819       ecode += 1 + LINK_SIZE;
 820       if (op == OP_SCOND)        /* Possibly empty group */
 821         {
 822         RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
 823         RRETURN(rrc);
 824         }
 825       else                       /* Group must match something */
 826         {
 827         flags = 0;
 828         goto TAIL_RECURSE;
 829         }
 830       }
 831     else                         /* Condition false & no 2nd alternative */
 832       {
 833       ecode += 1 + LINK_SIZE;
 834       }
 835     break;
 836
 837
 838     /* End of the pattern, either real or forced. If we are in a top-level
 839     recursion, we should restore the offsets appropriately and continue from
 840     after the call. */
 841
 842     case OP_ACCEPT:
 843     case OP_END:
 844     if (md->recursive != NULL && md->recursive->group_num == 0)
 845       {
 846       recursion_info *rec = md->recursive;
 847       DPRINTF(("End of pattern in a (?0) recursion\n"));
 848       md->recursive = rec->prevrec;
 849       memmove(md->offset_vector, rec->offset_save,
 850         rec->saved_max * sizeof(int));
 851       mstart = rec->save_start;
 852       ims = original_ims;
 853       ecode = rec->after_call;
 854       break;
 855       }
 856
 857     /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
 858     string - backtracking will then try other alternatives, if any. */
 859
 860     if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
 861     md->end_match_ptr = eptr;           /* Record where we ended */
 862     md->end_offset_top = offset_top;    /* and how many extracts were taken */
 863     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
 864     RRETURN(MATCH_MATCH);
 865
 866     /* Change option settings */
 867
 868     case OP_OPT:
 869     ims = ecode[1];
 870     ecode += 2;
 871     DPRINTF(("ims set to %02lx\n", ims));
 872     break;
 873
 874     /* Assertion brackets. Check the alternative branches in turn - the
 875     matching won't pass the KET for an assertion. If any one branch matches,
 876     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
 877     start of each branch to move the current point backwards, so the code at
 878     this level is identical to the lookahead case. */
 879
 880     case OP_ASSERT:
 881     case OP_ASSERTBACK:
 882     do
 883       {
 884       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
 885         RM4);
 886       if (rrc == MATCH_MATCH) break;
 887       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
 888       ecode += GET(ecode, 1);
 889       }
 890     while (*ecode == OP_ALT);
 891     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
 892
 893     /* If checking an assertion for a condition, return MATCH_MATCH. */
 894
 895     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
 896
 897     /* Continue from after the assertion, updating the offsets high water
 898     mark, since extracts may have been taken during the assertion. */
 899
 900     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
 901     ecode += 1 + LINK_SIZE;
 902     offset_top = md->end_offset_top;
 903     continue;
 904
 905     /* Negative assertion: all branches must fail to match */
 906
 907     case OP_ASSERT_NOT:
 908     case OP_ASSERTBACK_NOT:
 909     do
 910       {
 911       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
 912         RM5);
 913       if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
 914       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
 915       ecode += GET(ecode,1);
 916       }
 917     while (*ecode == OP_ALT);
 918
 919     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
 920
 921     ecode += 1 + LINK_SIZE;
 922     continue;
 923
 924     /* Move the subject pointer back. This occurs only at the start of
 925     each branch of a lookbehind assertion. If we are too close to the start to
 926     move back, this match function fails. When working with UTF-8 we move
 927     back a number of characters, not bytes. */
 928
 929     case OP_REVERSE:
 930 #ifdef SUPPORT_UTF8
 931     if (utf8)
 932       {
 933       i = GET(ecode, 1);
 934       while (i-- > 0)
 935         {
 936         eptr--;
 937         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
 938         BACKCHAR(eptr);
 939         }
 940       }
 941     else
 942 #endif
 943
 944     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
 945
 946       {
 947       eptr -= GET(ecode, 1);
 948       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
 949       }
 950
 951     /* Skip to next op code */
 952
 953     ecode += 1 + LINK_SIZE;
 954     break;
 955
 956     /* The callout item calls an external function, if one is provided, passing
 957     details of the match so far. This is mainly for debugging, though the
 958     function is able to force a failure. */
 959
 960     case OP_CALLOUT:
 961     if (pcre_callout != NULL)
 962       {
 963       pcre_callout_block cb;
 964       cb.version          = 1;   /* Version 1 of the callout block */
 965       cb.callout_number   = ecode[1];
 966       cb.offset_vector    = md->offset_vector;
 967       cb.subject          = (PCRE_SPTR)md->start_subject;
 968       cb.subject_length   = md->end_subject - md->start_subject;
 969       cb.start_match      = mstart - md->start_subject;
 970       cb.current_position = eptr - md->start_subject;
 971       cb.pattern_position = GET(ecode, 2);
 972       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
 973       cb.capture_top      = offset_top/2;
 974       cb.capture_last     = md->capture_last;
 975       cb.callout_data     = md->callout_data;
 976       if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
 977       if (rrc < 0) RRETURN(rrc);
 978       }
 979     ecode += 2 + 2*LINK_SIZE;
 980     break;
 981
 982     /* Recursion either matches the current regex, or some subexpression. The
 983     offset data is the offset to the starting bracket from the start of the
 984     whole pattern. (This is so that it works from duplicated subpatterns.)
 985
 986     If there are any capturing brackets started but not finished, we have to
 987     save their starting points and reinstate them after the recursion. However,
 988     we don't know how many such there are (offset_top records the completed
 989     total) so we just have to save all the potential data. There may be up to
 990     65535 such values, which is too large to put on the stack, but using malloc
 991     for small numbers seems expensive. As a compromise, the stack is used when
 992     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
 993     is used. A problem is what to do if the malloc fails ... there is no way of
 994     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
 995     values on the stack, and accept that the rest may be wrong.
 996
 997     There are also other values that have to be saved. We use a chained
 998     sequence of blocks that actually live on the stack. Thanks to Robin Houston
 999     for the original version of this logic. */
1000
1001     case OP_RECURSE:
1002       {
1003       callpat = md->start_code + GET(ecode, 1);
1004       new_recursive.group_num = (callpat == md->start_code)? 0 :
1005         GET2(callpat, 1 + LINK_SIZE);
1006
1007       /* Add to "recursing stack" */
1008
1009       new_recursive.prevrec = md->recursive;
1010       md->recursive = &new_recursive;
1011
1012       /* Find where to continue from afterwards */
1013
1014       ecode += 1 + LINK_SIZE;
1015       new_recursive.after_call = ecode;
1016
1017       /* Now save the offset data. */
1018
1019       new_recursive.saved_max = md->offset_end;
1020       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1021         new_recursive.offset_save = stacksave;
1022       else
1023         {
1024         new_recursive.offset_save =
1025           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1026         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1027         }
1028
1029       memcpy(new_recursive.offset_save, md->offset_vector,
1030             new_recursive.saved_max * sizeof(int));
1031       new_recursive.save_start = mstart;
1032       mstart = eptr;
1033
1034       /* OK, now we can do the recursion. For each top-level alternative we
1035       restore the offset and recursion data. */
1036
1037       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1038       flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1039       do
1040         {
1041         RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1042           md, ims, eptrb, flags, RM6);
1043         if (rrc == MATCH_MATCH)
1044           {
1045           DPRINTF(("Recursion matched\n"));
1046           md->recursive = new_recursive.prevrec;
1047           if (new_recursive.offset_save != stacksave)
1048             (pcre_free)(new_recursive.offset_save);
1049           RRETURN(MATCH_MATCH);
1050           }
1051         else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1052           {
1053           DPRINTF(("Recursion gave error %d\n", rrc));
1054           RRETURN(rrc);
1055           }
1056
1057         md->recursive = &new_recursive;
1058         memcpy(md->offset_vector, new_recursive.offset_save,
1059             new_recursive.saved_max * sizeof(int));
1060         callpat += GET(callpat, 1);
1061         }
1062       while (*callpat == OP_ALT);
1063
1064       DPRINTF(("Recursion didn't match\n"));
1065       md->recursive = new_recursive.prevrec;
1066       if (new_recursive.offset_save != stacksave)
1067         (pcre_free)(new_recursive.offset_save);
1068       RRETURN(MATCH_NOMATCH);
1069       }
1070     /* Control never reaches here */
1071
1072     /* "Once" brackets are like assertion brackets except that after a match,
1073     the point in the subject string is not moved back. Thus there can never be
1074     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1075     Check the alternative branches in turn - the matching won't pass the KET
1076     for this kind of subpattern. If any one branch matches, we carry on as at
1077     the end of a normal bracket, leaving the subject pointer. */
1078
1079     case OP_ONCE:
1080     prev = ecode;
1081     saved_eptr = eptr;
1082
1083     do
1084       {
1085       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1086       if (rrc == MATCH_MATCH) break;
1087       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1088       ecode += GET(ecode,1);
1089       }
1090     while (*ecode == OP_ALT);
1091
1092     /* If hit the end of the group (which could be repeated), fail */
1093
1094     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1095
1096     /* Continue as from after the assertion, updating the offsets high water
1097     mark, since extracts may have been taken. */
1098
1099     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1100
1101     offset_top = md->end_offset_top;
1102     eptr = md->end_match_ptr;
1103
1104     /* For a non-repeating ket, just continue at this level. This also
1105     happens for a repeating ket if no characters were matched in the group.
1106     This is the forcible breaking of infinite loops as implemented in Perl
1107     5.005. If there is an options reset, it will get obeyed in the normal
1108     course of events. */
1109
1110     if (*ecode == OP_KET || eptr == saved_eptr)
1111       {
1112       ecode += 1+LINK_SIZE;
1113       break;
1114       }
1115
1116     /* The repeating kets try the rest of the pattern or restart from the
1117     preceding bracket, in the appropriate order. The second "call" of match()
1118     uses tail recursion, to avoid using another stack frame. We need to reset
1119     any options that changed within the bracket before re-running it, so
1120     check the next opcode. */
1121
1122     if (ecode[1+LINK_SIZE] == OP_OPT)
1123       {
1124       ims = (ims & ~PCRE_IMS) | ecode[4];
1125       DPRINTF(("ims set to %02lx at group repeat\n", ims));
1126       }
1127
1128     if (*ecode == OP_KETRMIN)
1129       {
1130       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1131       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1132       ecode = prev;
1133       flags = 0;
1134       goto TAIL_RECURSE;
1135       }
1136     else  /* OP_KETRMAX */
1137       {
1138       RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1139       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1140       ecode += 1 + LINK_SIZE;
1141       flags = 0;
1142       goto TAIL_RECURSE;
1143       }
1144     /* Control never gets here */
1145
1146     /* An alternation is the end of a branch; scan along to find the end of the
1147     bracketed group and go to there. */
1148
1149     case OP_ALT:
1150     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1151     break;
1152
1153     /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
1154     that it may occur zero times. It may repeat infinitely, or not at all -
1155     i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
1156     repeat limits are compiled as a number of copies, with the optional ones
1157     preceded by BRAZERO or BRAMINZERO. */
1158
1159     case OP_BRAZERO:
1160       {
1161       next = ecode+1;
1162       RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1163       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1164       do next += GET(next,1); while (*next == OP_ALT);
1165       ecode = next + 1 + LINK_SIZE;
1166       }
1167     break;
1168
1169     case OP_BRAMINZERO:
1170       {
1171       next = ecode+1;
1172       do next += GET(next, 1); while (*next == OP_ALT);
1173       RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1174       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1175       ecode++;
1176       }
1177     break;
1178
1179     /* End of a group, repeated or non-repeating. */
1180
1181     case OP_KET:
1182     case OP_KETRMIN:
1183     case OP_KETRMAX:
1184     prev = ecode - GET(ecode, 1);
1185
1186     /* If this was a group that remembered the subject start, in order to break
1187     infinite repeats of empty string matches, retrieve the subject start from
1188     the chain. Otherwise, set it NULL. */
1189
1190     if (*prev >= OP_SBRA)
1191       {
1192       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1193       eptrb = eptrb->epb_prev;              /* Backup to previous group */
1194       }
1195     else saved_eptr = NULL;
1196
1197     /* If we are at the end of an assertion group, stop matching and return
1198     MATCH_MATCH, but record the current high water mark for use by positive
1199     assertions. Do this also for the "once" (atomic) groups. */
1200
1201     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1202         *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1203         *prev == OP_ONCE)
1204       {
1205       md->end_match_ptr = eptr;      /* For ONCE */
1206       md->end_offset_top = offset_top;
1207       RRETURN(MATCH_MATCH);
1208       }
1209
1210     /* For capturing groups we have to check the group number back at the start
1211     and if necessary complete handling an extraction by setting the offsets and
1212     bumping the high water mark. Note that whole-pattern recursion is coded as
1213     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1214     when the OP_END is reached. Other recursion is handled here. */
1215
1216     if (*prev == OP_CBRA || *prev == OP_SCBRA)
1217       {
1218       number = GET2(prev, 1+LINK_SIZE);
1219       offset = number << 1;
1220
1221 #ifdef DEBUG
1222       printf("end bracket %d", number);
1223       printf("\n");
1224 #endif
1225
1226       md->capture_last = number;
1227       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1228         {
1229         md->offset_vector[offset] =
1230           md->offset_vector[md->offset_end - number];
1231         md->offset_vector[offset+1] = eptr - md->start_subject;
1232         if (offset_top <= offset) offset_top = offset + 2;
1233         }
1234
1235       /* Handle a recursively called group. Restore the offsets
1236       appropriately and continue from after the call. */
1237
1238       if (md->recursive != NULL && md->recursive->group_num == number)
1239         {
1240         recursion_info *rec = md->recursive;
1241         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1242         md->recursive = rec->prevrec;
1243         mstart = rec->save_start;
1244         memcpy(md->offset_vector, rec->offset_save,
1245           rec->saved_max * sizeof(int));
1246         ecode = rec->after_call;
1247         ims = original_ims;
1248         break;
1249         }
1250       }
1251
1252     /* For both capturing and non-capturing groups, reset the value of the ims
1253     flags, in case they got changed during the group. */
1254
1255     ims = original_ims;
1256     DPRINTF(("ims reset to %02lx\n", ims));
1257
1258     /* For a non-repeating ket, just continue at this level. This also
1259     happens for a repeating ket if no characters were matched in the group.
1260     This is the forcible breaking of infinite loops as implemented in Perl
1261     5.005. If there is an options reset, it will get obeyed in the normal
1262     course of events. */
1263
1264     if (*ecode == OP_KET || eptr == saved_eptr)
1265       {
1266       ecode += 1 + LINK_SIZE;
1267       break;
1268       }
1269
1270     /* The repeating kets try the rest of the pattern or restart from the
1271     preceding bracket, in the appropriate order. In the second case, we can use
1272     tail recursion to avoid using another stack frame, unless we have an
1273     unlimited repeat of a group that can match an empty string. */
1274
1275     flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1276
1277     if (*ecode == OP_KETRMIN)
1278       {
1279       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1280       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1281       if (flags != 0)    /* Could match an empty string */
1282         {
1283         RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1284         RRETURN(rrc);
1285         }
1286       ecode = prev;
1287       goto TAIL_RECURSE;
1288       }
1289     else  /* OP_KETRMAX */
1290       {
1291       RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1292       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1293       ecode += 1 + LINK_SIZE;
1294       flags = 0;
1295       goto TAIL_RECURSE;
1296       }
1297     /* Control never gets here */
1298
1299     /* Start of subject unless notbol, or after internal newline if multiline */
1300
1301     case OP_CIRC:
1302     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1303     if ((ims & PCRE_MULTILINE) != 0)
1304       {
1305       if (eptr != md->start_subject &&
1306           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1307         RRETURN(MATCH_NOMATCH);
1308       ecode++;
1309       break;
1310       }
1311     /* ... else fall through */
1312
1313     /* Start of subject assertion */
1314
1315     case OP_SOD:
1316     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1317     ecode++;
1318     break;
1319
1320     /* Start of match assertion */
1321
1322     case OP_SOM:
1323     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1324     ecode++;
1325     break;
1326
1327     /* Reset the start of match point */
1328
1329     case OP_SET_SOM:
1330     mstart = eptr;
1331     ecode++;
1332     break;
1333
1334     /* Assert before internal newline if multiline, or before a terminating
1335     newline unless endonly is set, else end of subject unless noteol is set. */
1336
1337     case OP_DOLL:
1338     if ((ims & PCRE_MULTILINE) != 0)
1339       {
1340       if (eptr < md->end_subject)
1341         { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1342       else
1343         { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1344       ecode++;
1345       break;
1346       }
1347     else
1348       {
1349       if (md->noteol) RRETURN(MATCH_NOMATCH);
1350       if (!md->endonly)
1351         {
1352         if (eptr != md->end_subject &&
1353             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1354           RRETURN(MATCH_NOMATCH);
1355         ecode++;
1356         break;
1357         }
1358       }
1359     /* ... else fall through for endonly */
1360
1361     /* End of subject assertion (\z) */
1362
1363     case OP_EOD:
1364     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1365     ecode++;
1366     break;
1367
1368     /* End of subject or ending \n assertion (\Z) */
1369
1370     case OP_EODN:
1371     if (eptr != md->end_subject &&
1372         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1373       RRETURN(MATCH_NOMATCH);
1374     ecode++;
1375     break;
1376
1377     /* Word boundary assertions */
1378
1379     case OP_NOT_WORD_BOUNDARY:
1380     case OP_WORD_BOUNDARY:
1381       {
1382
1383       /* Find out if the previous and current characters are "word" characters.
1384       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1385       be "non-word" characters. */
1386
1387 #ifdef SUPPORT_UTF8
1388       if (utf8)
1389         {
1390         if (eptr == md->start_subject) prev_is_word = FALSE; else
1391           {
1392           const uschar *lastptr = eptr - 1;
1393           while((*lastptr & 0xc0) == 0x80) lastptr--;
1394           GETCHAR(c, lastptr);
1395           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1396           }
1397         if (eptr >= md->end_subject) cur_is_word = FALSE; else
1398           {
1399           GETCHAR(c, eptr);
1400           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1401           }
1402         }
1403       else
1404 #endif
1405
1406       /* More streamlined when not in UTF-8 mode */
1407
1408         {
1409         prev_is_word = (eptr != md->start_subject) &&
1410           ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1411         cur_is_word = (eptr < md->end_subject) &&
1412           ((md->ctypes[*eptr] & ctype_word) != 0);
1413         }
1414
1415       /* Now see if the situation is what we want */
1416
1417       if ((*ecode++ == OP_WORD_BOUNDARY)?
1418            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1419         RRETURN(MATCH_NOMATCH);
1420       }
1421     break;
1422
1423     /* Match a single character type; inline for speed */
1424
1425     case OP_ANY:
1426     if ((ims & PCRE_DOTALL) == 0)
1427       {
1428       if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1429       }
1430     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1431     if (utf8)
1432       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1433     ecode++;
1434     break;
1435
1436     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1437     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1438
1439     case OP_ANYBYTE:
1440     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1441     ecode++;
1442     break;
1443
1444     case OP_NOT_DIGIT:
1445     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1446     GETCHARINCTEST(c, eptr);
1447     if (
1448 #ifdef SUPPORT_UTF8
1449        c < 256 &&
1450 #endif
1451        (md->ctypes[c] & ctype_digit) != 0
1452        )
1453       RRETURN(MATCH_NOMATCH);
1454     ecode++;
1455     break;
1456
1457     case OP_DIGIT:
1458     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1459     GETCHARINCTEST(c, eptr);
1460     if (
1461 #ifdef SUPPORT_UTF8
1462        c >= 256 ||
1463 #endif
1464        (md->ctypes[c] & ctype_digit) == 0
1465        )
1466       RRETURN(MATCH_NOMATCH);
1467     ecode++;
1468     break;
1469
1470     case OP_NOT_WHITESPACE:
1471     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1472     GETCHARINCTEST(c, eptr);
1473     if (
1474 #ifdef SUPPORT_UTF8
1475        c < 256 &&
1476 #endif
1477        (md->ctypes[c] & ctype_space) != 0
1478        )
1479       RRETURN(MATCH_NOMATCH);
1480     ecode++;
1481     break;
1482
1483     case OP_WHITESPACE:
1484     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1485     GETCHARINCTEST(c, eptr);
1486     if (
1487 #ifdef SUPPORT_UTF8
1488        c >= 256 ||
1489 #endif
1490        (md->ctypes[c] & ctype_space) == 0
1491        )
1492       RRETURN(MATCH_NOMATCH);
1493     ecode++;
1494     break;
1495
1496     case OP_NOT_WORDCHAR:
1497     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1498     GETCHARINCTEST(c, eptr);
1499     if (
1500 #ifdef SUPPORT_UTF8
1501        c < 256 &&
1502 #endif
1503        (md->ctypes[c] & ctype_word) != 0
1504        )
1505       RRETURN(MATCH_NOMATCH);
1506     ecode++;
1507     break;
1508
1509     case OP_WORDCHAR:
1510     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1511     GETCHARINCTEST(c, eptr);
1512     if (
1513 #ifdef SUPPORT_UTF8
1514        c >= 256 ||
1515 #endif
1516        (md->ctypes[c] & ctype_word) == 0
1517        )
1518       RRETURN(MATCH_NOMATCH);
1519     ecode++;
1520     break;
1521
1522     case OP_ANYNL:
1523     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1524     GETCHARINCTEST(c, eptr);
1525     switch(c)
1526       {
1527       default: RRETURN(MATCH_NOMATCH);
1528       case 0x000d:
1529       if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1530       break;
1531
1532       case 0x000a:
1533       break;
1534
1535       case 0x000b:
1536       case 0x000c:
1537       case 0x0085:
1538       case 0x2028:
1539       case 0x2029:
1540       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1541       break;
1542       }
1543     ecode++;
1544     break;
1545
1546     case OP_NOT_HSPACE:
1547     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1548     GETCHARINCTEST(c, eptr);
1549     switch(c)
1550       {
1551       default: break;
1552       case 0x09:      /* HT */
1553       case 0x20:      /* SPACE */
1554       case 0xa0:      /* NBSP */
1555       case 0x1680:    /* OGHAM SPACE MARK */
1556       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1557       case 0x2000:    /* EN QUAD */
1558       case 0x2001:    /* EM QUAD */
1559       case 0x2002:    /* EN SPACE */
1560       case 0x2003:    /* EM SPACE */
1561       case 0x2004:    /* THREE-PER-EM SPACE */
1562       case 0x2005:    /* FOUR-PER-EM SPACE */
1563       case 0x2006:    /* SIX-PER-EM SPACE */
1564       case 0x2007:    /* FIGURE SPACE */
1565       case 0x2008:    /* PUNCTUATION SPACE */
1566       case 0x2009:    /* THIN SPACE */
1567       case 0x200A:    /* HAIR SPACE */
1568       case 0x202f:    /* NARROW NO-BREAK SPACE */
1569       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1570       case 0x3000:    /* IDEOGRAPHIC SPACE */
1571       RRETURN(MATCH_NOMATCH);
1572       }
1573     ecode++;
1574     break;
1575
1576     case OP_HSPACE:
1577     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1578     GETCHARINCTEST(c, eptr);
1579     switch(c)
1580       {
1581       default: RRETURN(MATCH_NOMATCH);
1582       case 0x09:      /* HT */
1583       case 0x20:      /* SPACE */
1584       case 0xa0:      /* NBSP */
1585       case 0x1680:    /* OGHAM SPACE MARK */
1586       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1587       case 0x2000:    /* EN QUAD */
1588       case 0x2001:    /* EM QUAD */
1589       case 0x2002:    /* EN SPACE */
1590       case 0x2003:    /* EM SPACE */
1591       case 0x2004:    /* THREE-PER-EM SPACE */
1592       case 0x2005:    /* FOUR-PER-EM SPACE */
1593       case 0x2006:    /* SIX-PER-EM SPACE */
1594       case 0x2007:    /* FIGURE SPACE */
1595       case 0x2008:    /* PUNCTUATION SPACE */
1596       case 0x2009:    /* THIN SPACE */
1597       case 0x200A:    /* HAIR SPACE */
1598       case 0x202f:    /* NARROW NO-BREAK SPACE */
1599       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1600       case 0x3000:    /* IDEOGRAPHIC SPACE */
1601       break;
1602       }
1603     ecode++;
1604     break;
1605
1606     case OP_NOT_VSPACE:
1607     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1608     GETCHARINCTEST(c, eptr);
1609     switch(c)
1610       {
1611       default: break;
1612       case 0x0a:      /* LF */
1613       case 0x0b:      /* VT */
1614       case 0x0c:      /* FF */
1615       case 0x0d:      /* CR */
1616       case 0x85:      /* NEL */
1617       case 0x2028:    /* LINE SEPARATOR */
1618       case 0x2029:    /* PARAGRAPH SEPARATOR */
1619       RRETURN(MATCH_NOMATCH);
1620       }
1621     ecode++;
1622     break;
1623
1624     case OP_VSPACE:
1625     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1626     GETCHARINCTEST(c, eptr);
1627     switch(c)
1628       {
1629       default: RRETURN(MATCH_NOMATCH);
1630       case 0x0a:      /* LF */
1631       case 0x0b:      /* VT */
1632       case 0x0c:      /* FF */
1633       case 0x0d:      /* CR */
1634       case 0x85:      /* NEL */
1635       case 0x2028:    /* LINE SEPARATOR */
1636       case 0x2029:    /* PARAGRAPH SEPARATOR */
1637       break;
1638       }
1639     ecode++;
1640     break;
1641
1642 #ifdef SUPPORT_UCP
1643     /* Check the next character by Unicode property. We will get here only
1644     if the support is in the binary; otherwise a compile-time error occurs. */
1645
1646     case OP_PROP:
1647     case OP_NOTPROP:
1648     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1649     GETCHARINCTEST(c, eptr);
1650       {
1651       int chartype, script;
1652       int category = _pcre_ucp_findprop(c, &chartype, &script);
1653
1654       switch(ecode[1])
1655         {
1656         case PT_ANY:
1657         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1658         break;
1659
1660         case PT_LAMP:
1661         if ((chartype == ucp_Lu ||
1662              chartype == ucp_Ll ||
1663              chartype == ucp_Lt) == (op == OP_NOTPROP))
1664           RRETURN(MATCH_NOMATCH);
1665          break;
1666
1667         case PT_GC:
1668         if ((ecode[2] != category) == (op == OP_PROP))
1669           RRETURN(MATCH_NOMATCH);
1670         break;
1671
1672         case PT_PC:
1673         if ((ecode[2] != chartype) == (op == OP_PROP))
1674           RRETURN(MATCH_NOMATCH);
1675         break;
1676
1677         case PT_SC:
1678         if ((ecode[2] != script) == (op == OP_PROP))
1679           RRETURN(MATCH_NOMATCH);
1680         break;
1681
1682         default:
1683         RRETURN(PCRE_ERROR_INTERNAL);
1684         }
1685
1686       ecode += 3;
1687       }
1688     break;
1689
1690     /* Match an extended Unicode sequence. We will get here only if the support
1691     is in the binary; otherwise a compile-time error occurs. */
1692
1693     case OP_EXTUNI:
1694     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1695     GETCHARINCTEST(c, eptr);
1696       {
1697       int chartype, script;
1698       int category = _pcre_ucp_findprop(c, &chartype, &script);
1699       if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1700       while (eptr < md->end_subject)
1701         {
1702         int len = 1;
1703         if (!utf8) c = *eptr; else
1704           {
1705           GETCHARLEN(c, eptr, len);
1706           }
1707         category = _pcre_ucp_findprop(c, &chartype, &script);
1708         if (category != ucp_M) break;
1709         eptr += len;
1710         }
1711       }
1712     ecode++;
1713     break;
1714 #endif
1715
1716
1717     /* Match a back reference, possibly repeatedly. Look past the end of the
1718     item to see if there is repeat information following. The code is similar
1719     to that for character classes, but repeated for efficiency. Then obey
1720     similar code to character type repeats - written out again for speed.
1721     However, if the referenced string is the empty string, always treat
1722     it as matched, any number of times (otherwise there could be infinite
1723     loops). */
1724
1725     case OP_REF:
1726       {
1727       offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1728       ecode += 3;                                 /* Advance past item */
1729
1730       /* If the reference is unset, set the length to be longer than the amount
1731       of subject left; this ensures that every attempt at a match fails. We
1732       can't just fail here, because of the possibility of quantifiers with zero
1733       minima. */
1734
1735       length = (offset >= offset_top || md->offset_vector[offset] < 0)?
1736         md->end_subject - eptr + 1 :
1737         md->offset_vector[offset+1] - md->offset_vector[offset];
1738
1739       /* Set up for repetition, or handle the non-repeated case */
1740
1741       switch (*ecode)
1742         {
1743         case OP_CRSTAR:
1744         case OP_CRMINSTAR:
1745         case OP_CRPLUS:
1746         case OP_CRMINPLUS:
1747         case OP_CRQUERY:
1748         case OP_CRMINQUERY:
1749         c = *ecode++ - OP_CRSTAR;
1750         minimize = (c & 1) != 0;
1751         min = rep_min[c];                 /* Pick up values from tables; */
1752         max = rep_max[c];                 /* zero for max => infinity */
1753         if (max == 0) max = INT_MAX;
1754         break;
1755
1756         case OP_CRRANGE:
1757         case OP_CRMINRANGE:
1758         minimize = (*ecode == OP_CRMINRANGE);
1759         min = GET2(ecode, 1);
1760         max = GET2(ecode, 3);
1761         if (max == 0) max = INT_MAX;
1762         ecode += 5;
1763         break;
1764
1765         default:               /* No repeat follows */
1766         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1767         eptr += length;
1768         continue;              /* With the main loop */
1769         }
1770
1771       /* If the length of the reference is zero, just continue with the
1772       main loop. */
1773
1774       if (length == 0) continue;
1775
1776       /* First, ensure the minimum number of matches are present. We get back
1777       the length of the reference string explicitly rather than passing the
1778       address of eptr, so that eptr can be a register variable. */
1779
1780       for (i = 1; i <= min; i++)
1781         {
1782         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1783         eptr += length;
1784         }
1785
1786       /* If min = max, continue at the same level without recursion.
1787       They are not both allowed to be zero. */
1788
1789       if (min == max) continue;
1790
1791       /* If minimizing, keep trying and advancing the pointer */
1792
1793       if (minimize)
1794         {
1795         for (fi = min;; fi++)
1796           {
1797           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1798           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1799           if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1800             RRETURN(MATCH_NOMATCH);
1801           eptr += length;
1802           }
1803         /* Control never gets here */
1804         }
1805
1806       /* If maximizing, find the longest string and work backwards */
1807
1808       else
1809         {
1810         pp = eptr;
1811         for (i = min; i < max; i++)
1812           {
1813           if (!match_ref(offset, eptr, length, md, ims)) break;
1814           eptr += length;
1815           }
1816         while (eptr >= pp)
1817           {
1818           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1819           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1820           eptr -= length;
1821           }
1822         RRETURN(MATCH_NOMATCH);
1823         }
1824       }
1825     /* Control never gets here */
1826
1827
1828
1829     /* Match a bit-mapped character class, possibly repeatedly. This op code is
1830     used when all the characters in the class have values in the range 0-255,
1831     and either the matching is caseful, or the characters are in the range
1832     0-127 when UTF-8 processing is enabled. The only difference between
1833     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1834     encountered.
1835
1836     First, look past the end of the item to see if there is repeat information
1837     following. Then obey similar code to character type repeats - written out
1838     again for speed. */
1839
1840     case OP_NCLASS:
1841     case OP_CLASS:
1842       {
1843       data = ecode + 1;                /* Save for matching */
1844       ecode += 33;                     /* Advance past the item */
1845
1846       switch (*ecode)
1847         {
1848         case OP_CRSTAR:
1849         case OP_CRMINSTAR:
1850         case OP_CRPLUS:
1851         case OP_CRMINPLUS:
1852         case OP_CRQUERY:
1853         case OP_CRMINQUERY:
1854         c = *ecode++ - OP_CRSTAR;
1855         minimize = (c & 1) != 0;
1856         min = rep_min[c];                 /* Pick up values from tables; */
1857         max = rep_max[c];                 /* zero for max => infinity */
1858         if (max == 0) max = INT_MAX;
1859         break;
1860
1861         case OP_CRRANGE:
1862         case OP_CRMINRANGE:
1863         minimize = (*ecode == OP_CRMINRANGE);
1864         min = GET2(ecode, 1);
1865         max = GET2(ecode, 3);
1866         if (max == 0) max = INT_MAX;
1867         ecode += 5;
1868         break;
1869
1870         default:               /* No repeat follows */
1871         min = max = 1;
1872         break;
1873         }
1874
1875       /* First, ensure the minimum number of matches are present. */
1876
1877 #ifdef SUPPORT_UTF8
1878       /* UTF-8 mode */
1879       if (utf8)
1880         {
1881         for (i = 1; i <= min; i++)
1882           {
1883           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1884           GETCHARINC(c, eptr);
1885           if (c > 255)
1886             {
1887             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1888             }
1889           else
1890             {
1891             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1892             }
1893           }
1894         }
1895       else
1896 #endif
1897       /* Not UTF-8 mode */
1898         {
1899         for (i = 1; i <= min; i++)
1900           {
1901           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1902           c = *eptr++;
1903           if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1904           }
1905         }
1906
1907       /* If max == min we can continue with the main loop without the
1908       need to recurse. */
1909
1910       if (min == max) continue;
1911
1912       /* If minimizing, keep testing the rest of the expression and advancing
1913       the pointer while it matches the class. */
1914
1915       if (minimize)
1916         {
1917 #ifdef SUPPORT_UTF8
1918         /* UTF-8 mode */
1919         if (utf8)
1920           {
1921           for (fi = min;; fi++)
1922             {
1923             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1924             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1925             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1926             GETCHARINC(c, eptr);
1927             if (c > 255)
1928               {
1929               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1930               }
1931             else
1932               {
1933               if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1934               }
1935             }
1936           }
1937         else
1938 #endif
1939         /* Not UTF-8 mode */
1940           {
1941           for (fi = min;; fi++)
1942             {
1943             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
1944             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1945             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1946             c = *eptr++;
1947             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1948             }
1949           }
1950         /* Control never gets here */
1951         }
1952
1953       /* If maximizing, find the longest possible run, then work backwards. */
1954
1955       else
1956         {
1957         pp = eptr;
1958
1959 #ifdef SUPPORT_UTF8
1960         /* UTF-8 mode */
1961         if (utf8)
1962           {
1963           for (i = min; i < max; i++)
1964             {
1965             int len = 1;
1966             if (eptr >= md->end_subject) break;
1967             GETCHARLEN(c, eptr, len);
1968             if (c > 255)
1969               {
1970               if (op == OP_CLASS) break;
1971               }
1972             else
1973               {
1974               if ((data[c/8] & (1 << (c&7))) == 0) break;
1975               }
1976             eptr += len;
1977             }
1978           for (;;)
1979             {
1980             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
1981             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1982             if (eptr-- == pp) break;        /* Stop if tried at original pos */
1983             BACKCHAR(eptr);
1984             }
1985           }
1986         else
1987 #endif
1988           /* Not UTF-8 mode */
1989           {
1990           for (i = min; i < max; i++)
1991             {
1992             if (eptr >= md->end_subject) break;
1993             c = *eptr;
1994             if ((data[c/8] & (1 << (c&7))) == 0) break;
1995             eptr++;
1996             }
1997           while (eptr >= pp)
1998             {
1999             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2000             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2001             eptr--;
2002             }
2003           }
2004
2005         RRETURN(MATCH_NOMATCH);
2006         }
2007       }
2008     /* Control never gets here */
2009
2010
2011     /* Match an extended character class. This opcode is encountered only
2012     in UTF-8 mode, because that's the only time it is compiled. */
2013
2014 #ifdef SUPPORT_UTF8
2015     case OP_XCLASS:
2016       {
2017       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
2018       ecode += GET(ecode, 1);                      /* Advance past the item */
2019
2020       switch (*ecode)
2021         {
2022         case OP_CRSTAR:
2023         case OP_CRMINSTAR:
2024         case OP_CRPLUS:
2025         case OP_CRMINPLUS:
2026         case OP_CRQUERY:
2027         case OP_CRMINQUERY:
2028         c = *ecode++ - OP_CRSTAR;
2029         minimize = (c & 1) != 0;
2030         min = rep_min[c];                 /* Pick up values from tables; */
2031         max = rep_max[c];                 /* zero for max => infinity */
2032         if (max == 0) max = INT_MAX;
2033         break;
2034
2035         case OP_CRRANGE:
2036         case OP_CRMINRANGE:
2037         minimize = (*ecode == OP_CRMINRANGE);
2038         min = GET2(ecode, 1);
2039         max = GET2(ecode, 3);
2040         if (max == 0) max = INT_MAX;
2041         ecode += 5;
2042         break;
2043
2044         default:               /* No repeat follows */
2045         min = max = 1;
2046         break;
2047         }
2048
2049       /* First, ensure the minimum number of matches are present. */
2050
2051       for (i = 1; i <= min; i++)
2052         {
2053         if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2054         GETCHARINC(c, eptr);
2055         if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2056         }
2057
2058       /* If max == min we can continue with the main loop without the
2059       need to recurse. */
2060
2061       if (min == max) continue;
2062
2063       /* If minimizing, keep testing the rest of the expression and advancing
2064       the pointer while it matches the class. */
2065
2066       if (minimize)
2067         {
2068         for (fi = min;; fi++)
2069           {
2070           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2071           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2072           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2073           GETCHARINC(c, eptr);
2074           if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2075           }
2076         /* Control never gets here */
2077         }
2078
2079       /* If maximizing, find the longest possible run, then work backwards. */
2080
2081       else
2082         {
2083         pp = eptr;
2084         for (i = min; i < max; i++)
2085           {
2086           int len = 1;
2087           if (eptr >= md->end_subject) break;
2088           GETCHARLEN(c, eptr, len);
2089           if (!_pcre_xclass(c, data)) break;
2090           eptr += len;
2091           }
2092         for(;;)
2093           {
2094           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2095           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2096           if (eptr-- == pp) break;        /* Stop if tried at original pos */
2097           if (utf8) BACKCHAR(eptr);
2098           }
2099         RRETURN(MATCH_NOMATCH);
2100         }
2101
2102       /* Control never gets here */
2103       }
2104 #endif    /* End of XCLASS */
2105
2106     /* Match a single character, casefully */
2107
2108     case OP_CHAR:
2109 #ifdef SUPPORT_UTF8
2110     if (utf8)
2111       {
2112       length = 1;
2113       ecode++;
2114       GETCHARLEN(fc, ecode, length);
2115       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2116       while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2117       }
2118     else
2119 #endif
2120
2121     /* Non-UTF-8 mode */
2122       {
2123       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2124       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2125       ecode += 2;
2126       }
2127     break;
2128
2129     /* Match a single character, caselessly */
2130
2131     case OP_CHARNC:
2132 #ifdef SUPPORT_UTF8
2133     if (utf8)
2134       {
2135       length = 1;
2136       ecode++;
2137       GETCHARLEN(fc, ecode, length);
2138
2139       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2140
2141       /* If the pattern character's value is < 128, we have only one byte, and
2142       can use the fast lookup table. */
2143
2144       if (fc < 128)
2145         {
2146         if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2147         }
2148
2149       /* Otherwise we must pick up the subject character */
2150
2151       else
2152         {
2153         unsigned int dc;
2154         GETCHARINC(dc, eptr);
2155         ecode += length;
2156
2157         /* If we have Unicode property support, we can use it to test the other
2158         case of the character, if there is one. */
2159
2160         if (fc != dc)
2161           {
2162 #ifdef SUPPORT_UCP
2163           if (dc != _pcre_ucp_othercase(fc))
2164 #endif
2165             RRETURN(MATCH_NOMATCH);
2166           }
2167         }
2168       }
2169     else
2170 #endif   /* SUPPORT_UTF8 */
2171
2172     /* Non-UTF-8 mode */
2173       {
2174       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2175       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2176       ecode += 2;
2177       }
2178     break;
2179
2180     /* Match a single character repeatedly. */
2181
2182     case OP_EXACT:
2183     min = max = GET2(ecode, 1);
2184     ecode += 3;
2185     goto REPEATCHAR;
2186
2187     case OP_POSUPTO:
2188     possessive = TRUE;
2189     /* Fall through */
2190
2191     case OP_UPTO:
2192     case OP_MINUPTO:
2193     min = 0;
2194     max = GET2(ecode, 1);
2195     minimize = *ecode == OP_MINUPTO;
2196     ecode += 3;
2197     goto REPEATCHAR;
2198
2199     case OP_POSSTAR:
2200     possessive = TRUE;
2201     min = 0;
2202     max = INT_MAX;
2203     ecode++;
2204     goto REPEATCHAR;
2205
2206     case OP_POSPLUS:
2207     possessive = TRUE;
2208     min = 1;
2209     max = INT_MAX;
2210     ecode++;
2211     goto REPEATCHAR;
2212
2213     case OP_POSQUERY:
2214     possessive = TRUE;
2215     min = 0;
2216     max = 1;
2217     ecode++;
2218     goto REPEATCHAR;
2219
2220     case OP_STAR:
2221     case OP_MINSTAR:
2222     case OP_PLUS:
2223     case OP_MINPLUS:
2224     case OP_QUERY:
2225     case OP_MINQUERY:
2226     c = *ecode++ - OP_STAR;
2227     minimize = (c & 1) != 0;
2228     min = rep_min[c];                 /* Pick up values from tables; */
2229     max = rep_max[c];                 /* zero for max => infinity */
2230     if (max == 0) max = INT_MAX;
2231
2232     /* Common code for all repeated single-character matches. We can give
2233     up quickly if there are fewer than the minimum number of characters left in
2234     the subject. */
2235
2236     REPEATCHAR:
2237 #ifdef SUPPORT_UTF8
2238     if (utf8)
2239       {
2240       length = 1;
2241       charptr = ecode;
2242       GETCHARLEN(fc, ecode, length);
2243       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2244       ecode += length;
2245
2246       /* Handle multibyte character matching specially here. There is
2247       support for caseless matching if UCP support is present. */
2248
2249       if (length > 1)
2250         {
2251 #ifdef SUPPORT_UCP
2252         unsigned int othercase;
2253         if ((ims & PCRE_CASELESS) != 0 &&
2254             (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
2255           oclength = _pcre_ord2utf8(othercase, occhars);
2256         else oclength = 0;
2257 #endif  /* SUPPORT_UCP */
2258
2259         for (i = 1; i <= min; i++)
2260           {
2261           if (memcmp(eptr, charptr, length) == 0) eptr += length;
2262 #ifdef SUPPORT_UCP
2263           /* Need braces because of following else */
2264           else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2265           else
2266             {
2267             if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2268             eptr += oclength;
2269             }
2270 #else   /* without SUPPORT_UCP */
2271           else { RRETURN(MATCH_NOMATCH); }
2272 #endif  /* SUPPORT_UCP */
2273           }
2274
2275         if (min == max) continue;
2276
2277         if (minimize)
2278           {
2279           for (fi = min;; fi++)
2280             {
2281             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2282             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2283             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2284             if (memcmp(eptr, charptr, length) == 0) eptr += length;
2285 #ifdef SUPPORT_UCP
2286             /* Need braces because of following else */
2287             else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2288             else
2289               {
2290               if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2291               eptr += oclength;
2292               }
2293 #else   /* without SUPPORT_UCP */
2294             else { RRETURN (MATCH_NOMATCH); }
2295 #endif  /* SUPPORT_UCP */
2296             }
2297           /* Control never gets here */
2298           }
2299
2300         else  /* Maximize */
2301           {
2302           pp = eptr;
2303           for (i = min; i < max; i++)
2304             {
2305             if (eptr > md->end_subject - length) break;
2306             if (memcmp(eptr, charptr, length) == 0) eptr += length;
2307 #ifdef SUPPORT_UCP
2308             else if (oclength == 0) break;
2309             else
2310               {
2311               if (memcmp(eptr, occhars, oclength) != 0) break;
2312               eptr += oclength;
2313               }
2314 #else   /* without SUPPORT_UCP */
2315             else break;
2316 #endif  /* SUPPORT_UCP */
2317             }
2318
2319           if (possessive) continue;
2320           for(;;)
2321            {
2322            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2323            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2324            if (eptr == pp) RRETURN(MATCH_NOMATCH);
2325 #ifdef SUPPORT_UCP
2326            eptr--;
2327            BACKCHAR(eptr);
2328 #else   /* without SUPPORT_UCP */
2329            eptr -= length;
2330 #endif  /* SUPPORT_UCP */
2331            }
2332           }
2333         /* Control never gets here */
2334         }
2335
2336       /* If the length of a UTF-8 character is 1, we fall through here, and
2337       obey the code as for non-UTF-8 characters below, though in this case the
2338       value of fc will always be < 128. */
2339       }
2340     else
2341 #endif  /* SUPPORT_UTF8 */
2342
2343     /* When not in UTF-8 mode, load a single-byte character. */
2344       {
2345       if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2346       fc = *ecode++;
2347       }
2348
2349     /* The value of fc at this point is always less than 256, though we may or
2350     may not be in UTF-8 mode. The code is duplicated for the caseless and
2351     caseful cases, for speed, since matching characters is likely to be quite
2352     common. First, ensure the minimum number of matches are present. If min =
2353     max, continue at the same level without recursing. Otherwise, if
2354     minimizing, keep trying the rest of the expression and advancing one
2355     matching character if failing, up to the maximum. Alternatively, if
2356     maximizing, find the maximum number of characters and work backwards. */
2357
2358     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2359       max, eptr));
2360
2361     if ((ims & PCRE_CASELESS) != 0)
2362       {
2363       fc = md->lcc[fc];
2364       for (i = 1; i <= min; i++)
2365         if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2366       if (min == max) continue;
2367       if (minimize)
2368         {
2369         for (fi = min;; fi++)
2370           {
2371           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2372           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2373           if (fi >= max || eptr >= md->end_subject ||
2374               fc != md->lcc[*eptr++])
2375             RRETURN(MATCH_NOMATCH);
2376           }
2377         /* Control never gets here */
2378         }
2379       else  /* Maximize */
2380         {
2381         pp = eptr;
2382         for (i = min; i < max; i++)
2383           {
2384           if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2385           eptr++;
2386           }
2387         if (possessive) continue;
2388         while (eptr >= pp)
2389           {
2390           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2391           eptr--;
2392           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2393           }
2394         RRETURN(MATCH_NOMATCH);
2395         }
2396       /* Control never gets here */
2397       }
2398
2399     /* Caseful comparisons (includes all multi-byte characters) */
2400
2401     else
2402       {
2403       for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2404       if (min == max) continue;
2405       if (minimize)
2406         {
2407         for (fi = min;; fi++)
2408           {
2409           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2410           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2411           if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2412             RRETURN(MATCH_NOMATCH);
2413           }
2414         /* Control never gets here */
2415         }
2416       else  /* Maximize */
2417         {
2418         pp = eptr;
2419         for (i = min; i < max; i++)
2420           {
2421           if (eptr >= md->end_subject || fc != *eptr) break;
2422           eptr++;
2423           }
2424         if (possessive) continue;
2425         while (eptr >= pp)
2426           {
2427           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2428           eptr--;
2429           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2430           }
2431         RRETURN(MATCH_NOMATCH);
2432         }
2433       }
2434     /* Control never gets here */
2435
2436     /* Match a negated single one-byte character. The character we are
2437     checking can be multibyte. */
2438
2439     case OP_NOT:
2440     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2441     ecode++;
2442     GETCHARINCTEST(c, eptr);
2443     if ((ims & PCRE_CASELESS) != 0)
2444       {
2445 #ifdef SUPPORT_UTF8
2446       if (c < 256)
2447 #endif
2448       c = md->lcc[c];
2449       if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2450       }
2451     else
2452       {
2453       if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2454       }
2455     break;
2456
2457     /* Match a negated single one-byte character repeatedly. This is almost a
2458     repeat of the code for a repeated single character, but I haven't found a
2459     nice way of commoning these up that doesn't require a test of the
2460     positive/negative option for each character match. Maybe that wouldn't add
2461     very much to the time taken, but character matching *is* what this is all
2462     about... */
2463
2464     case OP_NOTEXACT:
2465     min = max = GET2(ecode, 1);
2466     ecode += 3;
2467     goto REPEATNOTCHAR;
2468
2469     case OP_NOTUPTO:
2470     case OP_NOTMINUPTO:
2471     min = 0;
2472     max = GET2(ecode, 1);
2473     minimize = *ecode == OP_NOTMINUPTO;
2474     ecode += 3;
2475     goto REPEATNOTCHAR;
2476
2477     case OP_NOTPOSSTAR:
2478     possessive = TRUE;
2479     min = 0;
2480     max = INT_MAX;
2481     ecode++;
2482     goto REPEATNOTCHAR;
2483
2484     case OP_NOTPOSPLUS:
2485     possessive = TRUE;
2486     min = 1;
2487     max = INT_MAX;
2488     ecode++;
2489     goto REPEATNOTCHAR;
2490
2491     case OP_NOTPOSQUERY:
2492     possessive = TRUE;
2493     min = 0;
2494     max = 1;
2495     ecode++;
2496     goto REPEATNOTCHAR;
2497
2498     case OP_NOTPOSUPTO:
2499     possessive = TRUE;
2500     min = 0;
2501     max = GET2(ecode, 1);
2502     ecode += 3;
2503     goto REPEATNOTCHAR;
2504
2505     case OP_NOTSTAR:
2506     case OP_NOTMINSTAR:
2507     case OP_NOTPLUS:
2508     case OP_NOTMINPLUS:
2509     case OP_NOTQUERY:
2510     case OP_NOTMINQUERY:
2511     c = *ecode++ - OP_NOTSTAR;
2512     minimize = (c & 1) != 0;
2513     min = rep_min[c];                 /* Pick up values from tables; */
2514     max = rep_max[c];                 /* zero for max => infinity */
2515     if (max == 0) max = INT_MAX;
2516
2517     /* Common code for all repeated single-byte matches. We can give up quickly
2518     if there are fewer than the minimum number of bytes left in the
2519     subject. */
2520
2521     REPEATNOTCHAR:
2522     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2523     fc = *ecode++;
2524
2525     /* The code is duplicated for the caseless and caseful cases, for speed,
2526     since matching characters is likely to be quite common. First, ensure the
2527     minimum number of matches are present. If min = max, continue at the same
2528     level without recursing. Otherwise, if minimizing, keep trying the rest of
2529     the expression and advancing one matching character if failing, up to the
2530     maximum. Alternatively, if maximizing, find the maximum number of
2531     characters and work backwards. */
2532
2533     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2534       max, eptr));
2535
2536     if ((ims & PCRE_CASELESS) != 0)
2537       {
2538       fc = md->lcc[fc];
2539
2540 #ifdef SUPPORT_UTF8
2541       /* UTF-8 mode */
2542       if (utf8)
2543         {
2544         register unsigned int d;
2545         for (i = 1; i <= min; i++)
2546           {
2547           GETCHARINC(d, eptr);
2548           if (d < 256) d = md->lcc[d];
2549           if (fc == d) RRETURN(MATCH_NOMATCH);
2550           }
2551         }
2552       else
2553 #endif
2554
2555       /* Not UTF-8 mode */
2556         {
2557         for (i = 1; i <= min; i++)
2558           if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2559         }
2560
2561       if (min == max) continue;
2562
2563       if (minimize)
2564         {
2565 #ifdef SUPPORT_UTF8
2566         /* UTF-8 mode */
2567         if (utf8)
2568           {
2569           register unsigned int d;
2570           for (fi = min;; fi++)
2571             {
2572             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2573             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2574             GETCHARINC(d, eptr);
2575             if (d < 256) d = md->lcc[d];
2576             if (fi >= max || eptr >= md->end_subject || fc == d)
2577               RRETURN(MATCH_NOMATCH);
2578             }
2579           }
2580         else
2581 #endif
2582         /* Not UTF-8 mode */
2583           {
2584           for (fi = min;; fi++)
2585             {
2586             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2587             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2588             if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2589               RRETURN(MATCH_NOMATCH);
2590             }
2591           }
2592         /* Control never gets here */
2593         }
2594
2595       /* Maximize case */
2596
2597       else
2598         {
2599         pp = eptr;
2600
2601 #ifdef SUPPORT_UTF8
2602         /* UTF-8 mode */
2603         if (utf8)
2604           {
2605           register unsigned int d;
2606           for (i = min; i < max; i++)
2607             {
2608             int len = 1;
2609             if (eptr >= md->end_subject) break;
2610             GETCHARLEN(d, eptr, len);
2611             if (d < 256) d = md->lcc[d];
2612             if (fc == d) break;
2613             eptr += len;
2614             }
2615         if (possessive) continue;
2616         for(;;)
2617             {
2618             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2619             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2620             if (eptr-- == pp) break;        /* Stop if tried at original pos */
2621             BACKCHAR(eptr);
2622             }
2623           }
2624         else
2625 #endif
2626         /* Not UTF-8 mode */
2627           {
2628           for (i = min; i < max; i++)
2629             {
2630             if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2631             eptr++;
2632             }
2633           if (possessive) continue;
2634           while (eptr >= pp)
2635             {
2636             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2637             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2638             eptr--;
2639             }
2640           }
2641
2642         RRETURN(MATCH_NOMATCH);
2643         }
2644       /* Control never gets here */
2645       }
2646
2647     /* Caseful comparisons */
2648
2649     else
2650       {
2651 #ifdef SUPPORT_UTF8
2652       /* UTF-8 mode */
2653       if (utf8)
2654         {
2655         register unsigned int d;
2656         for (i = 1; i <= min; i++)
2657           {
2658           GETCHARINC(d, eptr);
2659           if (fc == d) RRETURN(MATCH_NOMATCH);
2660           }
2661         }
2662       else
2663 #endif
2664       /* Not UTF-8 mode */
2665         {
2666         for (i = 1; i <= min; i++)
2667           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2668         }
2669
2670       if (min == max) continue;
2671
2672       if (minimize)
2673         {
2674 #ifdef SUPPORT_UTF8
2675         /* UTF-8 mode */
2676         if (utf8)
2677           {
2678           register unsigned int d;
2679           for (fi = min;; fi++)
2680             {
2681             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2682             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2683             GETCHARINC(d, eptr);
2684             if (fi >= max || eptr >= md->end_subject || fc == d)
2685               RRETURN(MATCH_NOMATCH);
2686             }
2687           }
2688         else
2689 #endif
2690         /* Not UTF-8 mode */
2691           {
2692           for (fi = min;; fi++)
2693             {
2694             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2695             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2696             if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2697               RRETURN(MATCH_NOMATCH);
2698             }
2699           }
2700         /* Control never gets here */
2701         }
2702
2703       /* Maximize case */
2704
2705       else
2706         {
2707         pp = eptr;
2708
2709 #ifdef SUPPORT_UTF8
2710         /* UTF-8 mode */
2711         if (utf8)
2712           {
2713           register unsigned int d;
2714           for (i = min; i < max; i++)
2715             {
2716             int len = 1;
2717             if (eptr >= md->end_subject) break;
2718             GETCHARLEN(d, eptr, len);
2719             if (fc == d) break;
2720             eptr += len;
2721             }
2722           if (possessive) continue;
2723           for(;;)
2724             {
2725             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2726             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2727             if (eptr-- == pp) break;        /* Stop if tried at original pos */
2728             BACKCHAR(eptr);
2729             }
2730           }
2731         else
2732 #endif
2733         /* Not UTF-8 mode */
2734           {
2735           for (i = min; i < max; i++)
2736             {
2737             if (eptr >= md->end_subject || fc == *eptr) break;
2738             eptr++;
2739             }
2740           if (possessive) continue;
2741           while (eptr >= pp)
2742             {
2743             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2744             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2745             eptr--;
2746             }
2747           }
2748
2749         RRETURN(MATCH_NOMATCH);
2750         }
2751       }
2752     /* Control never gets here */
2753
2754     /* Match a single character type repeatedly; several different opcodes
2755     share code. This is very similar to the code for single characters, but we
2756     repeat it in the interests of efficiency. */
2757
2758     case OP_TYPEEXACT:
2759     min = max = GET2(ecode, 1);
2760     minimize = TRUE;
2761     ecode += 3;
2762     goto REPEATTYPE;
2763
2764     case OP_TYPEUPTO:
2765     case OP_TYPEMINUPTO:
2766     min = 0;
2767     max = GET2(ecode, 1);
2768     minimize = *ecode == OP_TYPEMINUPTO;
2769     ecode += 3;
2770     goto REPEATTYPE;
2771
2772     case OP_TYPEPOSSTAR:
2773     possessive = TRUE;
2774     min = 0;
2775     max = INT_MAX;
2776     ecode++;
2777     goto REPEATTYPE;
2778
2779     case OP_TYPEPOSPLUS:
2780     possessive = TRUE;
2781     min = 1;
2782     max = INT_MAX;
2783     ecode++;
2784     goto REPEATTYPE;
2785
2786     case OP_TYPEPOSQUERY:
2787     possessive = TRUE;
2788     min = 0;
2789     max = 1;
2790     ecode++;
2791     goto REPEATTYPE;
2792
2793     case OP_TYPEPOSUPTO:
2794     possessive = TRUE;
2795     min = 0;
2796     max = GET2(ecode, 1);
2797     ecode += 3;
2798     goto REPEATTYPE;
2799
2800     case OP_TYPESTAR:
2801     case OP_TYPEMINSTAR:
2802     case OP_TYPEPLUS:
2803     case OP_TYPEMINPLUS:
2804     case OP_TYPEQUERY:
2805     case OP_TYPEMINQUERY:
2806     c = *ecode++ - OP_TYPESTAR;
2807     minimize = (c & 1) != 0;
2808     min = rep_min[c];                 /* Pick up values from tables; */
2809     max = rep_max[c];                 /* zero for max => infinity */
2810     if (max == 0) max = INT_MAX;
2811
2812     /* Common code for all repeated single character type matches. Note that
2813     in UTF-8 mode, '.' matches a character of any length, but for the other
2814     character types, the valid characters are all one-byte long. */
2815
2816     REPEATTYPE:
2817     ctype = *ecode++;      /* Code for the character type */
2818
2819 #ifdef SUPPORT_UCP
2820     if (ctype == OP_PROP || ctype == OP_NOTPROP)
2821       {
2822       prop_fail_result = ctype == OP_NOTPROP;
2823       prop_type = *ecode++;
2824       prop_value = *ecode++;
2825       }
2826     else prop_type = -1;
2827 #endif
2828
2829     /* First, ensure the minimum number of matches are present. Use inline
2830     code for maximizing the speed, and do the type test once at the start
2831     (i.e. keep it out of the loop). Also we can test that there are at least
2832     the minimum number of bytes before we start. This isn't as effective in
2833     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
2834     is tidier. Also separate the UCP code, which can be the same for both UTF-8
2835     and single-bytes. */
2836
2837     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2838     if (min > 0)
2839       {
2840 #ifdef SUPPORT_UCP
2841       if (prop_type >= 0)
2842         {
2843         switch(prop_type)
2844           {
2845           case PT_ANY:
2846           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2847           for (i = 1; i <= min; i++)
2848             {
2849             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2850             GETCHARINCTEST(c, eptr);
2851             }
2852           break;
2853
2854           case PT_LAMP:
2855           for (i = 1; i <= min; i++)
2856             {
2857             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2858             GETCHARINCTEST(c, eptr);
2859             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2860             if ((prop_chartype == ucp_Lu ||
2861                  prop_chartype == ucp_Ll ||
2862                  prop_chartype == ucp_Lt) == prop_fail_result)
2863               RRETURN(MATCH_NOMATCH);
2864             }
2865           break;
2866
2867           case PT_GC:
2868           for (i = 1; i <= min; i++)
2869             {
2870             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2871             GETCHARINCTEST(c, eptr);
2872             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2873             if ((prop_category == prop_value) == prop_fail_result)
2874               RRETURN(MATCH_NOMATCH);
2875             }
2876           break;
2877
2878           case PT_PC:
2879           for (i = 1; i <= min; i++)
2880             {
2881             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2882             GETCHARINCTEST(c, eptr);
2883             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2884             if ((prop_chartype == prop_value) == prop_fail_result)
2885               RRETURN(MATCH_NOMATCH);
2886             }
2887           break;
2888
2889           case PT_SC:
2890           for (i = 1; i <= min; i++)
2891             {
2892             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2893             GETCHARINCTEST(c, eptr);
2894             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2895             if ((prop_script == prop_value) == prop_fail_result)
2896               RRETURN(MATCH_NOMATCH);
2897             }
2898           break;
2899
2900           default:
2901           RRETURN(PCRE_ERROR_INTERNAL);
2902           }
2903         }
2904
2905       /* Match extended Unicode sequences. We will get here only if the
2906       support is in the binary; otherwise a compile-time error occurs. */
2907
2908       else if (ctype == OP_EXTUNI)
2909         {
2910         for (i = 1; i <= min; i++)
2911           {
2912           GETCHARINCTEST(c, eptr);
2913           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2914           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2915           while (eptr < md->end_subject)
2916             {
2917             int len = 1;
2918             if (!utf8) c = *eptr; else
2919               {
2920               GETCHARLEN(c, eptr, len);
2921               }
2922             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2923             if (prop_category != ucp_M) break;
2924             eptr += len;
2925             }
2926           }
2927         }
2928
2929       else
2930 #endif     /* SUPPORT_UCP */
2931
2932 /* Handle all other cases when the coding is UTF-8 */
2933
2934 #ifdef SUPPORT_UTF8
2935       if (utf8) switch(ctype)
2936         {
2937         case OP_ANY:
2938         for (i = 1; i <= min; i++)
2939           {
2940           if (eptr >= md->end_subject ||
2941                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2942             RRETURN(MATCH_NOMATCH);
2943           eptr++;
2944           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2945           }
2946         break;
2947
2948         case OP_ANYBYTE:
2949         eptr += min;
2950         break;
2951
2952         case OP_ANYNL:
2953         for (i = 1; i <= min; i++)
2954           {
2955           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2956           GETCHARINC(c, eptr);
2957           switch(c)
2958             {
2959             default: RRETURN(MATCH_NOMATCH);
2960             case 0x000d:
2961             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2962             break;
2963
2964             case 0x000a:
2965             break;
2966
2967             case 0x000b:
2968             case 0x000c:
2969             case 0x0085:
2970             case 0x2028:
2971             case 0x2029:
2972             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2973             break;
2974             }
2975           }
2976         break;
2977
2978         case OP_NOT_HSPACE:
2979         for (i = 1; i <= min; i++)
2980           {
2981           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2982           GETCHARINC(c, eptr);
2983           switch(c)
2984             {
2985             default: break;
2986             case 0x09:      /* HT */
2987             case 0x20:      /* SPACE */
2988             case 0xa0:      /* NBSP */
2989             case 0x1680:    /* OGHAM SPACE MARK */
2990             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2991             case 0x2000:    /* EN QUAD */
2992             case 0x2001:    /* EM QUAD */
2993             case 0x2002:    /* EN SPACE */
2994             case 0x2003:    /* EM SPACE */
2995             case 0x2004:    /* THREE-PER-EM SPACE */
2996             case 0x2005:    /* FOUR-PER-EM SPACE */
2997             case 0x2006:    /* SIX-PER-EM SPACE */
2998             case 0x2007:    /* FIGURE SPACE */
2999             case 0x2008:    /* PUNCTUATION SPACE */
3000             case 0x2009:    /* THIN SPACE */
3001             case 0x200A:    /* HAIR SPACE */
3002             case 0x202f:    /* NARROW NO-BREAK SPACE */
3003             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3004             case 0x3000:    /* IDEOGRAPHIC SPACE */
3005             RRETURN(MATCH_NOMATCH);
3006             }
3007           }
3008         break;
3009
3010         case OP_HSPACE:
3011         for (i = 1; i <= min; i++)
3012           {
3013           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3014           GETCHARINC(c, eptr);
3015           switch(c)
3016             {
3017             default: RRETURN(MATCH_NOMATCH);
3018             case 0x09:      /* HT */
3019             case 0x20:      /* SPACE */
3020             case 0xa0:      /* NBSP */
3021             case 0x1680:    /* OGHAM SPACE MARK */
3022             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3023             case 0x2000:    /* EN QUAD */
3024             case 0x2001:    /* EM QUAD */
3025             case 0x2002:    /* EN SPACE */
3026             case 0x2003:    /* EM SPACE */
3027             case 0x2004:    /* THREE-PER-EM SPACE */
3028             case 0x2005:    /* FOUR-PER-EM SPACE */
3029             case 0x2006:    /* SIX-PER-EM SPACE */
3030             case 0x2007:    /* FIGURE SPACE */
3031             case 0x2008:    /* PUNCTUATION SPACE */
3032             case 0x2009:    /* THIN SPACE */
3033             case 0x200A:    /* HAIR SPACE */
3034             case 0x202f:    /* NARROW NO-BREAK SPACE */
3035             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3036             case 0x3000:    /* IDEOGRAPHIC SPACE */
3037             break;
3038             }
3039           }
3040         break;
3041
3042         case OP_NOT_VSPACE:
3043         for (i = 1; i <= min; i++)
3044           {
3045           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3046           GETCHARINC(c, eptr);
3047           switch(c)
3048             {
3049             default: break;
3050             case 0x0a:      /* LF */
3051             case 0x0b:      /* VT */
3052             case 0x0c:      /* FF */
3053             case 0x0d:      /* CR */
3054             case 0x85:      /* NEL */
3055             case 0x2028:    /* LINE SEPARATOR */
3056             case 0x2029:    /* PARAGRAPH SEPARATOR */
3057             RRETURN(MATCH_NOMATCH);
3058             }
3059           }
3060         break;
3061
3062         case OP_VSPACE:
3063         for (i = 1; i <= min; i++)
3064           {
3065           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3066           GETCHARINC(c, eptr);
3067           switch(c)
3068             {
3069             default: RRETURN(MATCH_NOMATCH);
3070             case 0x0a:      /* LF */
3071             case 0x0b:      /* VT */
3072             case 0x0c:      /* FF */
3073             case 0x0d:      /* CR */
3074             case 0x85:      /* NEL */
3075             case 0x2028:    /* LINE SEPARATOR */
3076             case 0x2029:    /* PARAGRAPH SEPARATOR */
3077             break;
3078             }
3079           }
3080         break;
3081
3082         case OP_NOT_DIGIT:
3083         for (i = 1; i <= min; i++)
3084           {
3085           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3086           GETCHARINC(c, eptr);
3087           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3088             RRETURN(MATCH_NOMATCH);
3089           }
3090         break;
3091
3092         case OP_DIGIT:
3093         for (i = 1; i <= min; i++)
3094           {
3095           if (eptr >= md->end_subject ||
3096              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3097             RRETURN(MATCH_NOMATCH);
3098           /* No need to skip more bytes - we know it's a 1-byte character */
3099           }
3100         break;
3101
3102         case OP_NOT_WHITESPACE:
3103         for (i = 1; i <= min; i++)
3104           {
3105           if (eptr >= md->end_subject ||
3106              (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))
3107             RRETURN(MATCH_NOMATCH);
3108           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3109           }
3110         break;
3111
3112         case OP_WHITESPACE:
3113         for (i = 1; i <= min; i++)
3114           {
3115           if (eptr >= md->end_subject ||
3116              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3117             RRETURN(MATCH_NOMATCH);
3118           /* No need to skip more bytes - we know it's a 1-byte character */
3119           }
3120         break;
3121
3122         case OP_NOT_WORDCHAR:
3123         for (i = 1; i <= min; i++)
3124           {
3125           if (eptr >= md->end_subject ||
3126              (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
3127             RRETURN(MATCH_NOMATCH);
3128           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3129           }
3130         break;
3131
3132         case OP_WORDCHAR:
3133         for (i = 1; i <= min; i++)
3134           {
3135           if (eptr >= md->end_subject ||
3136              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3137             RRETURN(MATCH_NOMATCH);
3138           /* No need to skip more bytes - we know it's a 1-byte character */
3139           }
3140         break;
3141
3142         default:
3143         RRETURN(PCRE_ERROR_INTERNAL);
3144         }  /* End switch(ctype) */
3145
3146       else
3147 #endif     /* SUPPORT_UTF8 */
3148
3149       /* Code for the non-UTF-8 case for minimum matching of operators other
3150       than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
3151       number of bytes present, as this was tested above. */
3152
3153       switch(ctype)
3154         {
3155         case OP_ANY:
3156         if ((ims & PCRE_DOTALL) == 0)
3157           {
3158           for (i = 1; i <= min; i++)
3159             {
3160             if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3161             eptr++;
3162             }
3163           }
3164         else eptr += min;
3165         break;
3166
3167         case OP_ANYBYTE:
3168         eptr += min;
3169         break;
3170
3171         /* Because of the CRLF case, we can't assume the minimum number of
3172         bytes are present in this case. */
3173
3174         case OP_ANYNL:
3175         for (i = 1; i <= min; i++)
3176           {
3177           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3178           switch(*eptr++)
3179             {
3180             default: RRETURN(MATCH_NOMATCH);
3181             case 0x000d:
3182             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3183             break;
3184             case 0x000a:
3185             break;
3186
3187             case 0x000b:
3188             case 0x000c:
3189             case 0x0085:
3190             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3191             break;
3192             }
3193           }
3194         break;
3195
3196         case OP_NOT_HSPACE:
3197         for (i = 1; i <= min; i++)
3198           {
3199           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3200           switch(*eptr++)
3201             {
3202             default: break;
3203             case 0x09:      /* HT */
3204             case 0x20:      /* SPACE */
3205             case 0xa0:      /* NBSP */
3206             RRETURN(MATCH_NOMATCH);
3207             }
3208           }
3209         break;
3210
3211         case OP_HSPACE:
3212         for (i = 1; i <= min; i++)
3213           {
3214           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3215           switch(*eptr++)
3216             {
3217             default: RRETURN(MATCH_NOMATCH);
3218             case 0x09:      /* HT */
3219             case 0x20:      /* SPACE */
3220             case 0xa0:      /* NBSP */
3221             break;
3222             }
3223           }
3224         break;
3225
3226         case OP_NOT_VSPACE:
3227         for (i = 1; i <= min; i++)
3228           {
3229           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3230           switch(*eptr++)
3231             {
3232             default: break;
3233             case 0x0a:      /* LF */
3234             case 0x0b:      /* VT */
3235             case 0x0c:      /* FF */
3236             case 0x0d:      /* CR */
3237             case 0x85:      /* NEL */
3238             RRETURN(MATCH_NOMATCH);
3239             }
3240           }
3241         break;
3242
3243         case OP_VSPACE:
3244         for (i = 1; i <= min; i++)
3245           {
3246           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3247           switch(*eptr++)
3248             {
3249             default: RRETURN(MATCH_NOMATCH);
3250             case 0x0a:      /* LF */
3251             case 0x0b:      /* VT */
3252             case 0x0c:      /* FF */
3253             case 0x0d:      /* CR */
3254             case 0x85:      /* NEL */
3255             break;
3256             }
3257           }
3258         break;
3259
3260         case OP_NOT_DIGIT:
3261         for (i = 1; i <= min; i++)
3262           if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3263         break;
3264
3265         case OP_DIGIT:
3266         for (i = 1; i <= min; i++)
3267           if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3268         break;
3269
3270         case OP_NOT_WHITESPACE:
3271         for (i = 1; i <= min; i++)
3272           if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3273         break;
3274
3275         case OP_WHITESPACE:
3276         for (i = 1; i <= min; i++)
3277           if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3278         break;
3279
3280         case OP_NOT_WORDCHAR:
3281         for (i = 1; i <= min; i++)
3282           if ((md->ctypes[*eptr++] & ctype_word) != 0)
3283             RRETURN(MATCH_NOMATCH);
3284         break;
3285
3286         case OP_WORDCHAR:
3287         for (i = 1; i <= min; i++)
3288           if ((md->ctypes[*eptr++] & ctype_word) == 0)
3289             RRETURN(MATCH_NOMATCH);
3290         break;
3291
3292         default:
3293         RRETURN(PCRE_ERROR_INTERNAL);
3294         }
3295       }
3296
3297     /* If min = max, continue at the same level without recursing */
3298
3299     if (min == max) continue;
3300
3301     /* If minimizing, we have to test the rest of the pattern before each
3302     subsequent match. Again, separate the UTF-8 case for speed, and also
3303     separate the UCP cases. */
3304
3305     if (minimize)
3306       {
3307 #ifdef SUPPORT_UCP
3308       if (prop_type >= 0)
3309         {
3310         switch(prop_type)
3311           {
3312           case PT_ANY:
3313           for (fi = min;; fi++)
3314             {
3315             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3316             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3317             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3318             GETCHARINC(c, eptr);
3319             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3320             }
3321           /* Control never gets here */
3322
3323           case PT_LAMP:
3324           for (fi = min;; fi++)
3325             {
3326             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3327             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3328             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3329             GETCHARINC(c, eptr);
3330             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3331             if ((prop_chartype == ucp_Lu ||
3332                  prop_chartype == ucp_Ll ||
3333                  prop_chartype == ucp_Lt) == prop_fail_result)
3334               RRETURN(MATCH_NOMATCH);
3335             }
3336           /* Control never gets here */
3337
3338           case PT_GC:
3339           for (fi = min;; fi++)
3340             {
3341             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3342             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3343             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3344             GETCHARINC(c, eptr);
3345             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3346             if ((prop_category == prop_value) == prop_fail_result)
3347               RRETURN(MATCH_NOMATCH);
3348             }
3349           /* Control never gets here */
3350
3351           case PT_PC:
3352           for (fi = min;; fi++)
3353             {
3354             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3355             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3356             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3357             GETCHARINC(c, eptr);
3358             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3359             if ((prop_chartype == prop_value) == prop_fail_result)
3360               RRETURN(MATCH_NOMATCH);
3361             }
3362           /* Control never gets here */
3363
3364           case PT_SC:
3365           for (fi = min;; fi++)
3366             {
3367             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3368             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3369             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3370             GETCHARINC(c, eptr);
3371             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3372             if ((prop_script == prop_value) == prop_fail_result)
3373               RRETURN(MATCH_NOMATCH);
3374             }
3375           /* Control never gets here */
3376
3377           default:
3378           RRETURN(PCRE_ERROR_INTERNAL);
3379           }
3380         }
3381
3382       /* Match extended Unicode sequences. We will get here only if the
3383       support is in the binary; otherwise a compile-time error occurs. */
3384
3385       else if (ctype == OP_EXTUNI)
3386         {
3387         for (fi = min;; fi++)
3388           {
3389           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3390           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3391           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3392           GETCHARINCTEST(c, eptr);
3393           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3394           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3395           while (eptr < md->end_subject)
3396             {
3397             int len = 1;
3398             if (!utf8) c = *eptr; else
3399               {
3400               GETCHARLEN(c, eptr, len);
3401               }
3402             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3403             if (prop_category != ucp_M) break;
3404             eptr += len;
3405             }
3406           }
3407         }
3408
3409       else
3410 #endif     /* SUPPORT_UCP */
3411
3412 #ifdef SUPPORT_UTF8
3413       /* UTF-8 mode */
3414       if (utf8)
3415         {
3416         for (fi = min;; fi++)
3417           {
3418           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3419           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3420           if (fi >= max || eptr >= md->end_subject ||
3421                (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3422                 IS_NEWLINE(eptr)))
3423             RRETURN(MATCH_NOMATCH);
3424
3425           GETCHARINC(c, eptr);
3426           switch(ctype)
3427             {
3428             case OP_ANY:        /* This is the DOTALL case */
3429             break;
3430
3431             case OP_ANYBYTE:
3432             break;
3433
3434             case OP_ANYNL:
3435             switch(c)
3436               {
3437               default: RRETURN(MATCH_NOMATCH);
3438               case 0x000d:
3439               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3440               break;
3441               case 0x000a:
3442               break;
3443
3444               case 0x000b:
3445               case 0x000c:
3446               case 0x0085:
3447               case 0x2028:
3448               case 0x2029:
3449               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3450               break;
3451               }
3452             break;
3453
3454             case OP_NOT_HSPACE:
3455             switch(c)
3456               {
3457               default: break;
3458               case 0x09:      /* HT */
3459               case 0x20:      /* SPACE */
3460               case 0xa0:      /* NBSP */
3461               case 0x1680:    /* OGHAM SPACE MARK */
3462               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3463               case 0x2000:    /* EN QUAD */
3464               case 0x2001:    /* EM QUAD */
3465               case 0x2002:    /* EN SPACE */
3466               case 0x2003:    /* EM SPACE */
3467               case 0x2004:    /* THREE-PER-EM SPACE */
3468               case 0x2005:    /* FOUR-PER-EM SPACE */
3469               case 0x2006:    /* SIX-PER-EM SPACE */
3470               case 0x2007:    /* FIGURE SPACE */
3471               case 0x2008:    /* PUNCTUATION SPACE */
3472               case 0x2009:    /* THIN SPACE */
3473               case 0x200A:    /* HAIR SPACE */
3474               case 0x202f:    /* NARROW NO-BREAK SPACE */
3475               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3476               case 0x3000:    /* IDEOGRAPHIC SPACE */
3477               RRETURN(MATCH_NOMATCH);
3478               }
3479             break;
3480
3481             case OP_HSPACE:
3482             switch(c)
3483               {
3484               default: RRETURN(MATCH_NOMATCH);
3485               case 0x09:      /* HT */
3486               case 0x20:      /* SPACE */
3487               case 0xa0:      /* NBSP */
3488               case 0x1680:    /* OGHAM SPACE MARK */
3489               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3490               case 0x2000:    /* EN QUAD */
3491               case 0x2001:    /* EM QUAD */
3492               case 0x2002:    /* EN SPACE */
3493               case 0x2003:    /* EM SPACE */
3494               case 0x2004:    /* THREE-PER-EM SPACE */
3495               case 0x2005:    /* FOUR-PER-EM SPACE */
3496               case 0x2006:    /* SIX-PER-EM SPACE */
3497               case 0x2007:    /* FIGURE SPACE */
3498               case 0x2008:    /* PUNCTUATION SPACE */
3499               case 0x2009:    /* THIN SPACE */
3500               case 0x200A:    /* HAIR SPACE */
3501               case 0x202f:    /* NARROW NO-BREAK SPACE */
3502               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3503               case 0x3000:    /* IDEOGRAPHIC SPACE */
3504               break;
3505               }
3506             break;
3507
3508             case OP_NOT_VSPACE:
3509             switch(c)
3510               {
3511               default: break;
3512               case 0x0a:      /* LF */
3513               case 0x0b:      /* VT */
3514               case 0x0c:      /* FF */
3515               case 0x0d:      /* CR */
3516               case 0x85:      /* NEL */
3517               case 0x2028:    /* LINE SEPARATOR */
3518               case 0x2029:    /* PARAGRAPH SEPARATOR */
3519               RRETURN(MATCH_NOMATCH);
3520               }
3521             break;
3522
3523             case OP_VSPACE:
3524             switch(c)
3525               {
3526               default: RRETURN(MATCH_NOMATCH);
3527               case 0x0a:      /* LF */
3528               case 0x0b:      /* VT */
3529               case 0x0c:      /* FF */
3530               case 0x0d:      /* CR */
3531               case 0x85:      /* NEL */
3532               case 0x2028:    /* LINE SEPARATOR */
3533               case 0x2029:    /* PARAGRAPH SEPARATOR */
3534               break;
3535               }
3536             break;
3537
3538             case OP_NOT_DIGIT:
3539             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3540               RRETURN(MATCH_NOMATCH);
3541             break;
3542
3543             case OP_DIGIT:
3544             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
3545               RRETURN(MATCH_NOMATCH);
3546             break;
3547
3548             case OP_NOT_WHITESPACE:
3549             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
3550               RRETURN(MATCH_NOMATCH);
3551             break;
3552
3553             case OP_WHITESPACE:
3554             if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
3555               RRETURN(MATCH_NOMATCH);
3556             break;
3557
3558             case OP_NOT_WORDCHAR:
3559             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
3560               RRETURN(MATCH_NOMATCH);
3561             break;
3562
3563             case OP_WORDCHAR:
3564             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
3565               RRETURN(MATCH_NOMATCH);
3566             break;
3567
3568             default:
3569             RRETURN(PCRE_ERROR_INTERNAL);
3570             }
3571           }
3572         }
3573       else
3574 #endif
3575       /* Not UTF-8 mode */
3576         {
3577         for (fi = min;; fi++)
3578           {
3579           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3580           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3581           if (fi >= max || eptr >= md->end_subject ||
3582                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3583             RRETURN(MATCH_NOMATCH);
3584
3585           c = *eptr++;
3586           switch(ctype)
3587             {
3588             case OP_ANY:   /* This is the DOTALL case */
3589             break;
3590
3591             case OP_ANYBYTE:
3592             break;
3593
3594             case OP_ANYNL:
3595             switch(c)
3596               {
3597               default: RRETURN(MATCH_NOMATCH);
3598               case 0x000d:
3599               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3600               break;
3601
3602               case 0x000a:
3603               break;
3604
3605               case 0x000b:
3606               case 0x000c:
3607               case 0x0085:
3608               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3609               break;
3610               }
3611             break;
3612
3613             case OP_NOT_HSPACE:
3614             switch(c)
3615               {
3616               default: break;
3617               case 0x09:      /* HT */
3618               case 0x20:      /* SPACE */
3619               case 0xa0:      /* NBSP */
3620               RRETURN(MATCH_NOMATCH);
3621               }
3622             break;
3623
3624             case OP_HSPACE:
3625             switch(c)
3626               {
3627               default: RRETURN(MATCH_NOMATCH);
3628               case 0x09:      /* HT */
3629               case 0x20:      /* SPACE */
3630               case 0xa0:      /* NBSP */
3631               break;
3632               }
3633             break;
3634
3635             case OP_NOT_VSPACE:
3636             switch(c)
3637               {
3638               default: break;
3639               case 0x0a:      /* LF */
3640               case 0x0b:      /* VT */
3641               case 0x0c:      /* FF */
3642               case 0x0d:      /* CR */
3643               case 0x85:      /* NEL */
3644               RRETURN(MATCH_NOMATCH);
3645               }
3646             break;
3647
3648             case OP_VSPACE:
3649             switch(c)
3650               {
3651               default: RRETURN(MATCH_NOMATCH);
3652               case 0x0a:      /* LF */
3653               case 0x0b:      /* VT */
3654               case 0x0c:      /* FF */
3655               case 0x0d:      /* CR */
3656               case 0x85:      /* NEL */
3657               break;
3658               }
3659             break;
3660
3661             case OP_NOT_DIGIT:
3662             if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3663             break;
3664
3665             case OP_DIGIT:
3666             if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3667             break;
3668
3669             case OP_NOT_WHITESPACE:
3670             if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3671             break;
3672
3673             case OP_WHITESPACE:
3674             if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3675             break;
3676
3677             case OP_NOT_WORDCHAR:
3678             if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
3679             break;
3680
3681             case OP_WORDCHAR:
3682             if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
3683             break;
3684
3685             default:
3686             RRETURN(PCRE_ERROR_INTERNAL);
3687             }
3688           }
3689         }
3690       /* Control never gets here */
3691       }
3692
3693     /* If maximizing, it is worth using inline code for speed, doing the type
3694     test once at the start (i.e. keep it out of the loop). Again, keep the
3695     UTF-8 and UCP stuff separate. */
3696
3697     else
3698       {
3699       pp = eptr;  /* Remember where we started */
3700
3701 #ifdef SUPPORT_UCP
3702       if (prop_type >= 0)
3703         {
3704         switch(prop_type)
3705           {
3706           case PT_ANY:
3707           for (i = min; i < max; i++)
3708             {
3709             int len = 1;
3710             if (eptr >= md->end_subject) break;
3711             GETCHARLEN(c, eptr, len);
3712             if (prop_fail_result) break;
3713             eptr+= len;
3714             }
3715           break;
3716
3717           case PT_LAMP:
3718           for (i = min; i < max; i++)
3719             {
3720             int len = 1;
3721             if (eptr >= md->end_subject) break;
3722             GETCHARLEN(c, eptr, len);
3723             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3724             if ((prop_chartype == ucp_Lu ||
3725                  prop_chartype == ucp_Ll ||
3726                  prop_chartype == ucp_Lt) == prop_fail_result)
3727               break;
3728             eptr+= len;
3729             }
3730           break;
3731
3732           case PT_GC:
3733           for (i = min; i < max; i++)
3734             {
3735             int len = 1;
3736             if (eptr >= md->end_subject) break;
3737             GETCHARLEN(c, eptr, len);
3738             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3739             if ((prop_category == prop_value) == prop_fail_result)
3740               break;
3741             eptr+= len;
3742             }
3743           break;
3744
3745           case PT_PC:
3746           for (i = min; i < max; i++)
3747             {
3748             int len = 1;
3749             if (eptr >= md->end_subject) break;
3750             GETCHARLEN(c, eptr, len);
3751             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3752             if ((prop_chartype == prop_value) == prop_fail_result)
3753               break;
3754             eptr+= len;
3755             }
3756           break;
3757
3758           case PT_SC:
3759           for (i = min; i < max; i++)
3760             {
3761             int len = 1;
3762             if (eptr >= md->end_subject) break;
3763             GETCHARLEN(c, eptr, len);
3764             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3765             if ((prop_script == prop_value) == prop_fail_result)
3766               break;
3767             eptr+= len;
3768             }
3769           break;
3770           }
3771
3772         /* eptr is now past the end of the maximum run */
3773
3774         if (possessive) continue;
3775         for(;;)
3776           {
3777           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3778           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3779           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3780           if (utf8) BACKCHAR(eptr);
3781           }
3782         }
3783
3784       /* Match extended Unicode sequences. We will get here only if the
3785       support is in the binary; otherwise a compile-time error occurs. */
3786
3787       else if (ctype == OP_EXTUNI)
3788         {
3789         for (i = min; i < max; i++)
3790           {
3791           if (eptr >= md->end_subject) break;
3792           GETCHARINCTEST(c, eptr);
3793           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3794           if (prop_category == ucp_M) break;
3795           while (eptr < md->end_subject)
3796             {
3797             int len = 1;
3798             if (!utf8) c = *eptr; else
3799               {
3800               GETCHARLEN(c, eptr, len);
3801               }
3802             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3803             if (prop_category != ucp_M) break;
3804             eptr += len;
3805             }
3806           }
3807
3808         /* eptr is now past the end of the maximum run */
3809
3810         if (possessive) continue;
3811         for(;;)
3812           {
3813           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3814           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3815           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3816           for (;;)                        /* Move back over one extended */
3817             {
3818             int len = 1;
3819             if (!utf8) c = *eptr; else
3820               {
3821               BACKCHAR(eptr);
3822               GETCHARLEN(c, eptr, len);
3823               }
3824             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3825             if (prop_category != ucp_M) break;
3826             eptr--;
3827             }
3828           }
3829         }
3830
3831       else
3832 #endif   /* SUPPORT_UCP */
3833
3834 #ifdef SUPPORT_UTF8
3835       /* UTF-8 mode */
3836
3837       if (utf8)
3838         {
3839         switch(ctype)
3840           {
3841           case OP_ANY:
3842           if (max < INT_MAX)
3843             {
3844             if ((ims & PCRE_DOTALL) == 0)
3845               {
3846               for (i = min; i < max; i++)
3847                 {
3848                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3849                 eptr++;
3850                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3851                 }
3852               }
3853             else
3854               {
3855               for (i = min; i < max; i++)
3856                 {
3857                 if (eptr >= md->end_subject) break;
3858                 eptr++;
3859                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3860                 }
3861               }
3862             }
3863
3864           /* Handle unlimited UTF-8 repeat */
3865
3866           else
3867             {
3868             if ((ims & PCRE_DOTALL) == 0)
3869               {
3870               for (i = min; i < max; i++)
3871                 {
3872                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3873                 eptr++;
3874                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3875                 }
3876               }
3877             else
3878               {
3879               eptr = md->end_subject;
3880               }
3881             }
3882           break;
3883
3884           /* The byte case is the same as non-UTF8 */
3885
3886           case OP_ANYBYTE:
3887           c = max - min;
3888           if (c > (unsigned int)(md->end_subject - eptr))
3889             c = md->end_subject - eptr;
3890           eptr += c;
3891           break;
3892
3893           case OP_ANYNL:
3894           for (i = min; i < max; i++)
3895             {
3896             int len = 1;
3897             if (eptr >= md->end_subject) break;
3898             GETCHARLEN(c, eptr, len);
3899             if (c == 0x000d)
3900               {
3901               if (++eptr >= md->end_subject) break;
3902               if (*eptr == 0x000a) eptr++;
3903               }
3904             else
3905               {
3906               if (c != 0x000a &&
3907                   (md->bsr_anycrlf ||
3908                    (c != 0x000b && c != 0x000c &&
3909                     c != 0x0085 && c != 0x2028 && c != 0x2029)))
3910                 break;
3911               eptr += len;
3912               }
3913             }
3914           break;
3915
3916           case OP_NOT_HSPACE:
3917           case OP_HSPACE:
3918           for (i = min; i < max; i++)
3919             {
3920             BOOL gotspace;
3921             int len = 1;
3922             if (eptr >= md->end_subject) break;
3923             GETCHARLEN(c, eptr, len);
3924             switch(c)
3925               {
3926               default: gotspace = FALSE; break;
3927               case 0x09:      /* HT */
3928               case 0x20:      /* SPACE */
3929               case 0xa0:      /* NBSP */
3930               case 0x1680:    /* OGHAM SPACE MARK */
3931               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3932               case 0x2000:    /* EN QUAD */
3933               case 0x2001:    /* EM QUAD */
3934               case 0x2002:    /* EN SPACE */
3935               case 0x2003:    /* EM SPACE */
3936               case 0x2004:    /* THREE-PER-EM SPACE */
3937               case 0x2005:    /* FOUR-PER-EM SPACE */
3938               case 0x2006:    /* SIX-PER-EM SPACE */
3939               case 0x2007:    /* FIGURE SPACE */
3940               case 0x2008:    /* PUNCTUATION SPACE */
3941               case 0x2009:    /* THIN SPACE */
3942               case 0x200A:    /* HAIR SPACE */
3943               case 0x202f:    /* NARROW NO-BREAK SPACE */
3944               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3945               case 0x3000:    /* IDEOGRAPHIC SPACE */
3946               gotspace = TRUE;
3947               break;
3948               }
3949             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
3950             eptr += len;
3951             }
3952           break;
3953
3954           case OP_NOT_VSPACE:
3955           case OP_VSPACE:
3956           for (i = min; i < max; i++)
3957             {
3958             BOOL gotspace;
3959             int len = 1;
3960             if (eptr >= md->end_subject) break;
3961             GETCHARLEN(c, eptr, len);
3962             switch(c)
3963               {
3964               default: gotspace = FALSE; break;
3965               case 0x0a:      /* LF */
3966               case 0x0b:      /* VT */
3967               case 0x0c:      /* FF */
3968               case 0x0d:      /* CR */
3969               case 0x85:      /* NEL */
3970               case 0x2028:    /* LINE SEPARATOR */
3971               case 0x2029:    /* PARAGRAPH SEPARATOR */
3972               gotspace = TRUE;
3973               break;
3974               }
3975             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
3976             eptr += len;
3977             }
3978           break;
3979
3980           case OP_NOT_DIGIT:
3981           for (i = min; i < max; i++)
3982             {
3983             int len = 1;
3984             if (eptr >= md->end_subject) break;
3985             GETCHARLEN(c, eptr, len);
3986             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
3987             eptr+= len;
3988             }
3989           break;
3990
3991           case OP_DIGIT:
3992           for (i = min; i < max; i++)
3993             {
3994             int len = 1;
3995             if (eptr >= md->end_subject) break;
3996             GETCHARLEN(c, eptr, len);
3997             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
3998             eptr+= len;
3999             }
4000           break;
4001
4002           case OP_NOT_WHITESPACE:
4003           for (i = min; i < max; i++)
4004             {
4005             int len = 1;
4006             if (eptr >= md->end_subject) break;
4007             GETCHARLEN(c, eptr, len);
4008             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
4009             eptr+= len;
4010             }
4011           break;
4012
4013           case OP_WHITESPACE:
4014           for (i = min; i < max; i++)
4015             {
4016             int len = 1;
4017             if (eptr >= md->end_subject) break;
4018             GETCHARLEN(c, eptr, len);
4019             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
4020             eptr+= len;
4021             }
4022           break;
4023
4024           case OP_NOT_WORDCHAR:
4025           for (i = min; i < max; i++)
4026             {
4027             int len = 1;
4028             if (eptr >= md->end_subject) break;
4029             GETCHARLEN(c, eptr, len);
4030             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
4031             eptr+= len;
4032             }
4033           break;
4034
4035           case OP_WORDCHAR:
4036           for (i = min; i < max; i++)
4037             {
4038             int len = 1;
4039             if (eptr >= md->end_subject) break;
4040             GETCHARLEN(c, eptr, len);
4041             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
4042             eptr+= len;
4043             }
4044           break;
4045
4046           default:
4047           RRETURN(PCRE_ERROR_INTERNAL);
4048           }
4049
4050         /* eptr is now past the end of the maximum run */
4051
4052         if (possessive) continue;
4053         for(;;)
4054           {
4055           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
4056           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4057           if (eptr-- == pp) break;        /* Stop if tried at original pos */
4058           BACKCHAR(eptr);
4059           }
4060         }
4061       else
4062 #endif  /* SUPPORT_UTF8 */
4063
4064       /* Not UTF-8 mode */
4065         {
4066         switch(ctype)
4067           {
4068           case OP_ANY:
4069           if ((ims & PCRE_DOTALL) == 0)
4070             {
4071             for (i = min; i < max; i++)
4072               {
4073               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4074               eptr++;
4075               }
4076             break;
4077             }
4078           /* For DOTALL case, fall through and treat as \C */
4079
4080           case OP_ANYBYTE:
4081           c = max - min;
4082           if (c > (unsigned int)(md->end_subject - eptr))
4083             c = md->end_subject - eptr;
4084           eptr += c;
4085           break;
4086
4087           case OP_ANYNL:
4088           for (i = min; i < max; i++)
4089             {
4090             if (eptr >= md->end_subject) break;
4091             c = *eptr;
4092             if (c == 0x000d)
4093               {
4094               if (++eptr >= md->end_subject) break;
4095               if (*eptr == 0x000a) eptr++;
4096               }
4097             else
4098               {
4099               if (c != 0x000a &&
4100                   (md->bsr_anycrlf ||
4101                     (c != 0x000b && c != 0x000c && c != 0x0085)))
4102                 break;
4103               eptr++;
4104               }
4105             }
4106           break;
4107
4108           case OP_NOT_HSPACE:
4109           for (i = min; i < max; i++)
4110             {
4111             if (eptr >= md->end_subject) break;
4112             c = *eptr;
4113             if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4114             eptr++;
4115             }
4116           break;
4117
4118           case OP_HSPACE:
4119           for (i = min; i < max; i++)
4120             {
4121             if (eptr >= md->end_subject) break;
4122             c = *eptr;
4123             if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4124             eptr++;
4125             }
4126           break;
4127
4128           case OP_NOT_VSPACE:
4129           for (i = min; i < max; i++)
4130             {
4131             if (eptr >= md->end_subject) break;
4132             c = *eptr;
4133             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4134               break;
4135             eptr++;
4136             }
4137           break;
4138
4139           case OP_VSPACE:
4140           for (i = min; i < max; i++)
4141             {
4142             if (eptr >= md->end_subject) break;
4143             c = *eptr;
4144             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4145               break;
4146             eptr++;
4147             }
4148           break;
4149
4150           case OP_NOT_DIGIT:
4151           for (i = min; i < max; i++)
4152             {
4153             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
4154               break;
4155             eptr++;
4156             }
4157           break;
4158
4159           case OP_DIGIT:
4160           for (i = min; i < max; i++)
4161             {
4162             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
4163               break;
4164             eptr++;
4165             }
4166           break;
4167
4168           case OP_NOT_WHITESPACE:
4169           for (i = min; i < max; i++)
4170             {
4171             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
4172               break;
4173             eptr++;
4174             }
4175           break;
4176
4177           case OP_WHITESPACE:
4178           for (i = min; i < max; i++)
4179             {
4180             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
4181               break;
4182             eptr++;
4183             }
4184           break;
4185
4186           case OP_NOT_WORDCHAR:
4187           for (i = min; i < max; i++)
4188             {
4189             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
4190               break;
4191             eptr++;
4192             }
4193           break;
4194
4195           case OP_WORDCHAR:
4196           for (i = min; i < max; i++)
4197             {
4198             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
4199               break;
4200             eptr++;
4201             }
4202           break;
4203
4204           default:
4205           RRETURN(PCRE_ERROR_INTERNAL);
4206           }
4207
4208         /* eptr is now past the end of the maximum run */
4209
4210         if (possessive) continue;
4211         while (eptr >= pp)
4212           {
4213           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
4214           eptr--;
4215           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4216           }
4217         }
4218
4219       /* Get here if we can't make it match with any permitted repetitions */
4220
4221       RRETURN(MATCH_NOMATCH);
4222       }
4223     /* Control never gets here */
4224
4225     /* There's been some horrible disaster. Arrival here can only mean there is
4226     something seriously wrong in the code above or the OP_xxx definitions. */
4227
4228     default:
4229     DPRINTF(("Unknown opcode %d\n", *ecode));
4230     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
4231     }
4232
4233   /* Do not stick any code in here without much thought; it is assumed
4234   that "continue" in the code above comes out to here to repeat the main
4235   loop. */
4236
4237   }             /* End of main loop */
4238 /* Control never reaches here */
4239
4240
4241 /* When compiling to use the heap rather than the stack for recursive calls to
4242 match(), the RRETURN() macro jumps here. The number that is saved in
4243 frame->Xwhere indicates which label we actually want to return to. */
4244
4245 #ifdef NO_RECURSE
4246 #define LBL(val) case val: goto L_RM##val;
4247 HEAP_RETURN:
4248 switch (frame->Xwhere)
4249   {
4250   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4251   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
4252   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
4253   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
4254   LBL(53) LBL(54)
4255 #ifdef SUPPORT_UTF8
4256   LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
4257   LBL(32) LBL(34) LBL(42) LBL(46)
4258 #ifdef SUPPORT_UCP
4259   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
4260 #endif  /* SUPPORT_UCP */
4261 #endif  /* SUPPORT_UTF8 */
4262   default:
4263   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4264   return PCRE_ERROR_INTERNAL;
4265   }
4266 #undef LBL
4267 #endif  /* NO_RECURSE */
4268 }
4269
4270
4271 /***************************************************************************
4272 ****************************************************************************
4273                    RECURSION IN THE match() FUNCTION
4274
4275 Undefine all the macros that were defined above to handle this. */
4276
4277 #ifdef NO_RECURSE
4278 #undef eptr
4279 #undef ecode
4280 #undef mstart
4281 #undef offset_top
4282 #undef ims
4283 #undef eptrb
4284 #undef flags
4285
4286 #undef callpat
4287 #undef charptr
4288 #undef data
4289 #undef next
4290 #undef pp
4291 #undef prev
4292 #undef saved_eptr
4293
4294 #undef new_recursive
4295
4296 #undef cur_is_word
4297 #undef condition
4298 #undef prev_is_word
4299
4300 #undef original_ims
4301
4302 #undef ctype
4303 #undef length
4304 #undef max
4305 #undef min
4306 #undef number
4307 #undef offset
4308 #undef op
4309 #undef save_capture_last
4310 #undef save_offset1
4311 #undef save_offset2
4312 #undef save_offset3
4313 #undef stacksave
4314
4315 #undef newptrb
4316
4317 #endif
4318
4319 /* These two are defined as macros in both cases */
4320
4321 #undef fc
4322 #undef fi
4323
4324 /***************************************************************************
4325 ***************************************************************************/
4326
4327
4328
4329 /*************************************************
4330 *         Execute a Regular Expression           *
4331 *************************************************/
4332
4333 /* This function applies a compiled re to a subject string and picks out
4334 portions of the string if it matches. Two elements in the vector are set for
4335 each substring: the offsets to the start and end of the substring.
4336
4337 Arguments:
4338   argument_re     points to the compiled expression
4339   extra_data      points to extra data or is NULL
4340   subject         points to the subject string
4341   length          length of subject string (may contain binary zeros)
4342   start_offset    where to start in the subject string
4343   options         option bits
4344   offsets         points to a vector of ints to be filled in with offsets
4345   offsetcount     the number of elements in the vector
4346
4347 Returns:          > 0 => success; value is the number of elements filled in
4348                   = 0 => success, but offsets is not big enough
4349                    -1 => failed to match
4350                  < -1 => some kind of unexpected problem
4351 */
4352
4353 PCRE_EXP_DEFN int
4354 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4355   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4356   int offsetcount)
4357 {
4358 int rc, resetcount, ocount;
4359 int first_byte = -1;
4360 int req_byte = -1;
4361 int req_byte2 = -1;
4362 int newline;
4363 unsigned long int ims;
4364 BOOL using_temporary_offsets = FALSE;
4365 BOOL anchored;
4366 BOOL startline;
4367 BOOL firstline;
4368 BOOL first_byte_caseless = FALSE;
4369 BOOL req_byte_caseless = FALSE;
4370 BOOL utf8;
4371 match_data match_block;
4372 match_data *md = &match_block;
4373 const uschar *tables;
4374 const uschar *start_bits = NULL;
4375 USPTR start_match = (USPTR)subject + start_offset;
4376 USPTR end_subject;
4377 USPTR req_byte_ptr = start_match - 1;
4378
4379 pcre_study_data internal_study;
4380 const pcre_study_data *study;
4381
4382 real_pcre internal_re;
4383 const real_pcre *external_re = (const real_pcre *)argument_re;
4384 const real_pcre *re = external_re;
4385
4386 /* Plausibility checks */
4387
4388 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
4389 if (re == NULL || subject == NULL ||
4390    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
4391 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
4392
4393 /* Fish out the optional data from the extra_data structure, first setting
4394 the default values. */
4395
4396 study = NULL;
4397 md->match_limit = MATCH_LIMIT;
4398 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
4399 md->callout_data = NULL;
4400
4401 /* The table pointer is always in native byte order. */
4402
4403 tables = external_re->tables;
4404
4405 if (extra_data != NULL)
4406   {
4407   register unsigned int flags = extra_data->flags;
4408   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
4409     study = (const pcre_study_data *)extra_data->study_data;
4410   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
4411     md->match_limit = extra_data->match_limit;
4412   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
4413     md->match_limit_recursion = extra_data->match_limit_recursion;
4414   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
4415     md->callout_data = extra_data->callout_data;
4416   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
4417   }
4418
4419 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
4420 is a feature that makes it possible to save compiled regex and re-use them
4421 in other programs later. */
4422
4423 if (tables == NULL) tables = _pcre_default_tables;
4424
4425 /* Check that the first field in the block is the magic number. If it is not,
4426 test for a regex that was compiled on a host of opposite endianness. If this is
4427 the case, flipped values are put in internal_re and internal_study if there was
4428 study data too. */
4429
4430 if (re->magic_number != MAGIC_NUMBER)
4431   {
4432   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
4433   if (re == NULL) return PCRE_ERROR_BADMAGIC;
4434   if (study != NULL) study = &internal_study;
4435   }
4436
4437 /* Set up other data */
4438
4439 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
4440 startline = (re->flags & PCRE_STARTLINE) != 0;
4441 firstline = (re->options & PCRE_FIRSTLINE) != 0;
4442
4443 /* The code starts after the real_pcre block and the capture name table. */
4444
4445 md->start_code = (const uschar *)external_re + re->name_table_offset +
4446   re->name_count * re->name_entry_size;
4447
4448 md->start_subject = (USPTR)subject;
4449 md->start_offset = start_offset;
4450 md->end_subject = md->start_subject + length;
4451 end_subject = md->end_subject;
4452
4453 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4454 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4455
4456 md->notbol = (options & PCRE_NOTBOL) != 0;
4457 md->noteol = (options & PCRE_NOTEOL) != 0;
4458 md->notempty = (options & PCRE_NOTEMPTY) != 0;
4459 md->partial = (options & PCRE_PARTIAL) != 0;
4460 md->hitend = FALSE;
4461
4462 md->recursive = NULL;                   /* No recursion at top level */
4463
4464 md->lcc = tables + lcc_offset;
4465 md->ctypes = tables + ctypes_offset;
4466
4467 /* Handle different \R options. */
4468
4469 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
4470   {
4471   case 0:
4472   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
4473     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
4474   else
4475 #ifdef BSR_ANYCRLF
4476   md->bsr_anycrlf = TRUE;
4477 #else
4478   md->bsr_anycrlf = FALSE;
4479 #endif
4480   break;
4481
4482   case PCRE_BSR_ANYCRLF:
4483   md->bsr_anycrlf = TRUE;
4484   break;
4485
4486   case PCRE_BSR_UNICODE:
4487   md->bsr_anycrlf = FALSE;
4488   break;
4489
4490   default: return PCRE_ERROR_BADNEWLINE;
4491   }
4492
4493 /* Handle different types of newline. The three bits give eight cases. If
4494 nothing is set at run time, whatever was used at compile time applies. */
4495
4496 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
4497         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
4498   {
4499   case 0: newline = NEWLINE; break;   /* Compile-time default */
4500   case PCRE_NEWLINE_CR: newline = '\r'; break;
4501   case PCRE_NEWLINE_LF: newline = '\n'; break;
4502   case PCRE_NEWLINE_CR+
4503        PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
4504   case PCRE_NEWLINE_ANY: newline = -1; break;
4505   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4506   default: return PCRE_ERROR_BADNEWLINE;
4507   }
4508
4509 if (newline == -2)
4510   {
4511   md->nltype = NLTYPE_ANYCRLF;
4512   }
4513 else if (newline < 0)
4514   {
4515   md->nltype = NLTYPE_ANY;
4516   }
4517 else
4518   {
4519   md->nltype = NLTYPE_FIXED;
4520   if (newline > 255)
4521     {
4522     md->nllen = 2;
4523     md->nl[0] = (newline >> 8) & 255;
4524     md->nl[1] = newline & 255;
4525     }
4526   else
4527     {
4528     md->nllen = 1;
4529     md->nl[0] = newline;
4530     }
4531   }
4532
4533 /* Partial matching is supported only for a restricted set of regexes at the
4534 moment. */
4535
4536 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
4537   return PCRE_ERROR_BADPARTIAL;
4538
4539 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
4540 back the character offset. */
4541
4542 #ifdef SUPPORT_UTF8
4543 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
4544   {
4545   if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
4546     return PCRE_ERROR_BADUTF8;
4547   if (start_offset > 0 && start_offset < length)
4548     {
4549     int tb = ((uschar *)subject)[start_offset];
4550     if (tb > 127)
4551       {
4552       tb &= 0xc0;
4553       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
4554       }
4555     }
4556   }
4557 #endif
4558
4559 /* The ims options can vary during the matching as a result of the presence
4560 of (?ims) items in the pattern. They are kept in a local variable so that
4561 restoring at the exit of a group is easy. */
4562
4563 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
4564
4565 /* If the expression has got more back references than the offsets supplied can
4566 hold, we get a temporary chunk of working store to use during the matching.
4567 Otherwise, we can use the vector supplied, rounding down its size to a multiple
4568 of 3. */
4569
4570 ocount = offsetcount - (offsetcount % 3);
4571
4572 if (re->top_backref > 0 && re->top_backref >= ocount/3)
4573   {
4574   ocount = re->top_backref * 3 + 3;
4575   md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
4576   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
4577   using_temporary_offsets = TRUE;
4578   DPRINTF(("Got memory to hold back references\n"));
4579   }
4580 else md->offset_vector = offsets;
4581
4582 md->offset_end = ocount;
4583 md->offset_max = (2*ocount)/3;
4584 md->offset_overflow = FALSE;
4585 md->capture_last = -1;
4586
4587 /* Compute the minimum number of offsets that we need to reset each time. Doing
4588 this makes a huge difference to execution time when there aren't many brackets
4589 in the pattern. */
4590
4591 resetcount = 2 + re->top_bracket * 2;
4592 if (resetcount > offsetcount) resetcount = ocount;
4593
4594 /* Reset the working variable associated with each extraction. These should
4595 never be used unless previously set, but they get saved and restored, and so we
4596 initialize them to avoid reading uninitialized locations. */
4597
4598 if (md->offset_vector != NULL)
4599   {
4600   register int *iptr = md->offset_vector + ocount;
4601   register int *iend = iptr - resetcount/2 + 1;
4602   while (--iptr >= iend) *iptr = -1;
4603   }
4604
4605 /* Set up the first character to match, if available. The first_byte value is
4606 never set for an anchored regular expression, but the anchoring may be forced
4607 at run time, so we have to test for anchoring. The first char may be unset for
4608 an unanchored pattern, of course. If there's no first char and the pattern was
4609 studied, there may be a bitmap of possible first characters. */
4610
4611 if (!anchored)
4612   {
4613   if ((re->flags & PCRE_FIRSTSET) != 0)
4614     {
4615     first_byte = re->first_byte & 255;
4616     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
4617       first_byte = md->lcc[first_byte];
4618     }
4619   else
4620     if (!startline && study != NULL &&
4621       (study->options & PCRE_STUDY_MAPPED) != 0)
4622         start_bits = study->start_bits;
4623   }
4624
4625 /* For anchored or unanchored matches, there may be a "last known required
4626 character" set. */
4627
4628 if ((re->flags & PCRE_REQCHSET) != 0)
4629   {
4630   req_byte = re->req_byte & 255;
4631   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
4632   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
4633   }
4634
4635
4636 /* ==========================================================================*/
4637
4638 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
4639 the loop runs just once. */
4640
4641 for(;;)
4642   {
4643   USPTR save_end_subject = end_subject;
4644   USPTR new_start_match;
4645
4646   /* Reset the maximum number of extractions we might see. */
4647
4648   if (md->offset_vector != NULL)
4649     {
4650     register int *iptr = md->offset_vector;
4651     register int *iend = iptr + resetcount;
4652     while (iptr < iend) *iptr++ = -1;
4653     }
4654
4655   /* Advance to a unique first char if possible. If firstline is TRUE, the
4656   start of the match is constrained to the first line of a multiline string.
4657   That is, the match must be before or at the first newline. Implement this by
4658   temporarily adjusting end_subject so that we stop scanning at a newline. If
4659   the match fails at the newline, later code breaks this loop. */
4660
4661   if (firstline)
4662     {
4663     USPTR t = start_match;
4664     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4665     end_subject = t;
4666     }
4667
4668   /* Now test for a unique first byte */
4669
4670   if (first_byte >= 0)
4671     {
4672     if (first_byte_caseless)
4673       while (start_match < end_subject &&
4674              md->lcc[*start_match] != first_byte)
4675         start_match++;
4676     else
4677       while (start_match < end_subject && *start_match != first_byte)
4678         start_match++;
4679     }
4680
4681   /* Or to just after a linebreak for a multiline match if possible */
4682
4683   else if (startline)
4684     {
4685     if (start_match > md->start_subject + start_offset)
4686       {
4687       while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4688         start_match++;
4689
4690       /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4691       and we are now at a LF, advance the match position by one more character.
4692       */
4693
4694       if (start_match[-1] == '\r' &&
4695            (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4696            start_match < end_subject &&
4697            *start_match == '\n')
4698         start_match++;
4699       }
4700     }
4701
4702   /* Or to a non-unique first char after study */
4703
4704   else if (start_bits != NULL)
4705     {
4706     while (start_match < end_subject)
4707       {
4708       register unsigned int c = *start_match;
4709       if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
4710       }
4711     }
4712
4713   /* Restore fudged end_subject */
4714
4715   end_subject = save_end_subject;
4716
4717 #ifdef DEBUG  /* Sigh. Some compilers never learn. */
4718   printf(">>>> Match against: ");
4719   pchars(start_match, end_subject - start_match, TRUE, md);
4720   printf("\n");
4721 #endif
4722
4723   /* If req_byte is set, we know that that character must appear in the subject
4724   for the match to succeed. If the first character is set, req_byte must be
4725   later in the subject; otherwise the test starts at the match point. This
4726   optimization can save a huge amount of backtracking in patterns with nested
4727   unlimited repeats that aren't going to match. Writing separate code for
4728   cased/caseless versions makes it go faster, as does using an autoincrement
4729   and backing off on a match.
4730
4731   HOWEVER: when the subject string is very, very long, searching to its end can
4732   take a long time, and give bad performance on quite ordinary patterns. This
4733   showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4734   string... so we don't do this when the string is sufficiently long.
4735
4736   ALSO: this processing is disabled when partial matching is requested.
4737   */
4738
4739   if (req_byte >= 0 &&
4740       end_subject - start_match < REQ_BYTE_MAX &&
4741       !md->partial)
4742     {
4743     register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4744
4745     /* We don't need to repeat the search if we haven't yet reached the
4746     place we found it at last time. */
4747
4748     if (p > req_byte_ptr)
4749       {
4750       if (req_byte_caseless)
4751         {
4752         while (p < end_subject)
4753           {
4754           register int pp = *p++;
4755           if (pp == req_byte || pp == req_byte2) { p--; break; }
4756           }
4757         }
4758       else
4759         {
4760         while (p < end_subject)
4761           {
4762           if (*p++ == req_byte) { p--; break; }
4763           }
4764         }
4765
4766       /* If we can't find the required character, break the matching loop,
4767       forcing a match failure. */
4768
4769       if (p >= end_subject)
4770         {
4771         rc = MATCH_NOMATCH;
4772         break;
4773         }
4774
4775       /* If we have found the required character, save the point where we
4776       found it, so that we don't search again next time round the loop if
4777       the start hasn't passed this character yet. */
4778
4779       req_byte_ptr = p;
4780       }
4781     }
4782
4783   /* OK, we can now run the match. */
4784
4785   md->start_match_ptr = start_match;
4786   md->match_call_count = 0;
4787   rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
4788
4789   switch(rc)
4790     {
4791     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
4792     exactly like PRUNE. */
4793
4794     case MATCH_NOMATCH:
4795     case MATCH_PRUNE:
4796     case MATCH_THEN:
4797     new_start_match = start_match + 1;
4798 #ifdef SUPPORT_UTF8
4799     if (utf8)
4800       while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
4801         new_start_match++;
4802 #endif
4803     break;
4804
4805     /* SKIP passes back the next starting point explicitly. */
4806
4807     case MATCH_SKIP:
4808     new_start_match = md->start_match_ptr;
4809     break;
4810
4811     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
4812
4813     case MATCH_COMMIT:
4814     rc = MATCH_NOMATCH;
4815     goto ENDLOOP;
4816
4817     /* Any other return is some kind of error. */
4818
4819     default:
4820     goto ENDLOOP;
4821     }
4822
4823   /* Control reaches here for the various types of "no match at this point"
4824   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
4825
4826   rc = MATCH_NOMATCH;
4827
4828   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4829   newline in the subject (though it may continue over the newline). Therefore,
4830   if we have just failed to match, starting at a newline, do not continue. */
4831
4832   if (firstline && IS_NEWLINE(start_match)) break;
4833
4834   /* Advance to new matching position */
4835
4836   start_match = new_start_match;
4837
4838   /* Break the loop if the pattern is anchored or if we have passed the end of
4839   the subject. */
4840
4841   if (anchored || start_match > end_subject) break;
4842
4843   /* If we have just passed a CR and we are now at a LF, and the pattern does
4844   not contain any explicit matches for \r or \n, and the newline option is CRLF
4845   or ANY or ANYCRLF, advance the match position by one more character. */
4846
4847   if (start_match[-1] == '\r' &&
4848       start_match < end_subject &&
4849       *start_match == '\n' &&
4850       (re->flags & PCRE_HASCRORLF) == 0 &&
4851         (md->nltype == NLTYPE_ANY ||
4852          md->nltype == NLTYPE_ANYCRLF ||
4853          md->nllen == 2))
4854     start_match++;
4855
4856   }   /* End of for(;;) "bumpalong" loop */
4857
4858 /* ==========================================================================*/
4859
4860 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4861 conditions is true:
4862
4863 (1) The pattern is anchored or the match was failed by (*COMMIT);
4864
4865 (2) We are past the end of the subject;
4866
4867 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4868     this option requests that a match occur at or before the first newline in
4869     the subject.
4870
4871 When we have a match and the offset vector is big enough to deal with any
4872 backreferences, captured substring offsets will already be set up. In the case
4873 where we had to get some local store to hold offsets for backreference
4874 processing, copy those that we can. In this case there need not be overflow if
4875 certain parts of the pattern were not used, even though there are more
4876 capturing parentheses than vector slots. */
4877
4878 ENDLOOP:
4879
4880 if (rc == MATCH_MATCH)
4881   {
4882   if (using_temporary_offsets)
4883     {
4884     if (offsetcount >= 4)
4885       {
4886       memcpy(offsets + 2, md->offset_vector + 2,
4887         (offsetcount - 2) * sizeof(int));
4888       DPRINTF(("Copied offsets from temporary memory\n"));
4889       }
4890     if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
4891     DPRINTF(("Freeing temporary memory\n"));
4892     (pcre_free)(md->offset_vector);
4893     }
4894
4895   /* Set the return code to the number of captured strings, or 0 if there are
4896   too many to fit into the vector. */
4897
4898   rc = md->offset_overflow? 0 : md->end_offset_top/2;
4899
4900   /* If there is space, set up the whole thing as substring 0. The value of
4901   md->start_match_ptr might be modified if \K was encountered on the success
4902   matching path. */
4903
4904   if (offsetcount < 2) rc = 0; else
4905     {
4906     offsets[0] = md->start_match_ptr - md->start_subject;
4907     offsets[1] = md->end_match_ptr - md->start_subject;
4908     }
4909
4910   DPRINTF((">>>> returning %d\n", rc));
4911   return rc;
4912   }
4913
4914 /* Control gets here if there has been an error, or if the overall match
4915 attempt has failed at all permitted starting positions. */
4916
4917 if (using_temporary_offsets)
4918   {
4919   DPRINTF(("Freeing temporary memory\n"));
4920   (pcre_free)(md->offset_vector);
4921   }
4922
4923 if (rc != MATCH_NOMATCH)
4924   {
4925   DPRINTF((">>>> error: returning %d\n", rc));
4926   return rc;
4927   }
4928 else if (md->partial && md->hitend)
4929   {
4930   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4931   return PCRE_ERROR_PARTIAL;
4932   }
4933 else
4934   {
4935   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
4936   return PCRE_ERROR_NOMATCH;
4937   }
4938 }
4939
4940 /* End of pcre_exec.c */