Allow only accept and warn in the not-QUIT ACL.
[exim.git] / src / src / pcre / pcre_exec.c
1 /* $Cambridge: exim/src/src/pcre/pcre_exec.c,v 1.5 2007/06/26 11:16:54 ph10 Exp $ */
2
3 /*************************************************
4 * Perl-Compatible Regular Expressions *
5 *************************************************/
6
7 /* PCRE is a library of functions to support regular expressions whose syntax
8 and semantics are as close as possible to those of the Perl 5 language.
9
10 Written by Philip Hazel
11 Copyright (c) 1997-2007 University of Cambridge
12
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
19
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
23
24 * Neither the name of the University of Cambridge nor the names of its
25 contributors may be used to endorse or promote products derived from
26 this software without specific prior written permission.
27
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
40 */
41
42
43 /* This module contains pcre_exec(), the externally visible function that does
44 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
45 possible. There are also some static supporting functions. */
46
47 #define NLBLOCK md /* Block containing newline information */
48 #define PSSTART start_subject /* Field containing processed string start */
49 #define PSEND end_subject /* Field containing processed string end */
50
51 #include "pcre_internal.h"
52
53 /* Undefine some potentially clashing cpp symbols */
54
55 #undef min
56 #undef max
57
58 /* The chain of eptrblocks for tail recursions uses memory in stack workspace,
59 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */
60
61 #define EPTR_WORK_SIZE (1000)
62
63 /* Flag bits for the match() function */
64
65 #define match_condassert 0x01 /* Called to check a condition assertion */
66 #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
67 #define match_tail_recursed 0x04 /* Tail recursive call */
68
69 /* Non-error returns from the match() function. Error returns are externally
70 defined PCRE_ERROR_xxx codes, which are all negative. */
71
72 #define MATCH_MATCH 1
73 #define MATCH_NOMATCH 0
74
75 /* Maximum number of ints of offset to save on the stack for recursive calls.
76 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
77 because the offset vector is always a multiple of 3 long. */
78
79 #define REC_STACK_SAVE_MAX 30
80
81 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
82
83 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
84 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
85
86
87
88 #ifdef DEBUG
89 /*************************************************
90 * Debugging function to print chars *
91 *************************************************/
92
93 /* Print a sequence of chars in printable format, stopping at the end of the
94 subject if the requested.
95
96 Arguments:
97 p points to characters
98 length number to print
99 is_subject TRUE if printing from within md->start_subject
100 md pointer to matching data block, if is_subject is TRUE
101
102 Returns: nothing
103 */
104
105 static void
106 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
107 {
108 unsigned int c;
109 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
110 while (length-- > 0)
111 if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
112 }
113 #endif
114
115
116
117 /*************************************************
118 * Match a back-reference *
119 *************************************************/
120
121 /* If a back reference hasn't been set, the length that is passed is greater
122 than the number of characters left in the string, so the match fails.
123
124 Arguments:
125 offset index into the offset vector
126 eptr points into the subject
127 length length to be matched
128 md points to match data block
129 ims the ims flags
130
131 Returns: TRUE if matched
132 */
133
134 static BOOL
135 match_ref(int offset, register USPTR eptr, int length, match_data *md,
136 unsigned long int ims)
137 {
138 USPTR p = md->start_subject + md->offset_vector[offset];
139
140 #ifdef DEBUG
141 if (eptr >= md->end_subject)
142 printf("matching subject <null>");
143 else
144 {
145 printf("matching subject ");
146 pchars(eptr, length, TRUE, md);
147 }
148 printf(" against backref ");
149 pchars(p, length, FALSE, md);
150 printf("\n");
151 #endif
152
153 /* Always fail if not enough characters left */
154
155 if (length > md->end_subject - eptr) return FALSE;
156
157 /* Separate the caselesss case for speed */
158
159 if ((ims & PCRE_CASELESS) != 0)
160 {
161 while (length-- > 0)
162 if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
163 }
164 else
165 { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
166
167 return TRUE;
168 }
169
170
171
172 /***************************************************************************
173 ****************************************************************************
174 RECURSION IN THE match() FUNCTION
175
176 The match() function is highly recursive, though not every recursive call
177 increases the recursive depth. Nevertheless, some regular expressions can cause
178 it to recurse to a great depth. I was writing for Unix, so I just let it call
179 itself recursively. This uses the stack for saving everything that has to be
180 saved for a recursive call. On Unix, the stack can be large, and this works
181 fine.
182
183 It turns out that on some non-Unix-like systems there are problems with
184 programs that use a lot of stack. (This despite the fact that every last chip
185 has oodles of memory these days, and techniques for extending the stack have
186 been known for decades.) So....
187
188 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
189 calls by keeping local variables that need to be preserved in blocks of memory
190 obtained from malloc() instead instead of on the stack. Macros are used to
191 achieve this so that the actual code doesn't look very different to what it
192 always used to.
193
194 The original heap-recursive code used longjmp(). However, it seems that this
195 can be very slow on some operating systems. Following a suggestion from Stan
196 Switzer, the use of longjmp() has been abolished, at the cost of having to
197 provide a unique number for each call to RMATCH. There is no way of generating
198 a sequence of numbers at compile time in C. I have given them names, to make
199 them stand out more clearly.
200
201 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
202 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
203 tests. Furthermore, not using longjmp() means that local dynamic variables
204 don't have indeterminate values; this has meant that the frame size can be
205 reduced because the result can be "passed back" by straight setting of the
206 variable instead of being passed in the frame.
207 ****************************************************************************
208 ***************************************************************************/
209
210
211 /* Numbers for RMATCH calls */
212
213 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
214 RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
215 RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
216 RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
217 RM41, RM42, RM43, RM44, RM45, RM46, RM47 };
218
219
220 /* These versions of the macros use the stack, as normal. There are debugging
221 versions and production versions. Note that the "rw" argument of RMATCH isn't
222 actuall used in this definition. */
223
224 #ifndef NO_RECURSE
225 #define REGISTER register
226
227 #ifdef DEBUG
228 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
229 { \
230 printf("match() called in line %d\n", __LINE__); \
231 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
232 printf("to line %d\n", __LINE__); \
233 }
234 #define RRETURN(ra) \
235 { \
236 printf("match() returned %d from line %d ", ra, __LINE__); \
237 return ra; \
238 }
239 #else
240 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
241 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
242 #define RRETURN(ra) return ra
243 #endif
244
245 #else
246
247
248 /* These versions of the macros manage a private stack on the heap. Note that
249 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
250 argument of match(), which never changes. */
251
252 #define REGISTER
253
254 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
255 {\
256 heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
257 frame->Xwhere = rw; \
258 newframe->Xeptr = ra;\
259 newframe->Xecode = rb;\
260 newframe->Xmstart = mstart;\
261 newframe->Xoffset_top = rc;\
262 newframe->Xims = re;\
263 newframe->Xeptrb = rf;\
264 newframe->Xflags = rg;\
265 newframe->Xrdepth = frame->Xrdepth + 1;\
266 newframe->Xprevframe = frame;\
267 frame = newframe;\
268 DPRINTF(("restarting from line %d\n", __LINE__));\
269 goto HEAP_RECURSE;\
270 L_##rw:\
271 DPRINTF(("jumped back to line %d\n", __LINE__));\
272 }
273
274 #define RRETURN(ra)\
275 {\
276 heapframe *newframe = frame;\
277 frame = newframe->Xprevframe;\
278 (pcre_stack_free)(newframe);\
279 if (frame != NULL)\
280 {\
281 rrc = ra;\
282 goto HEAP_RETURN;\
283 }\
284 return ra;\
285 }
286
287
288 /* Structure for remembering the local variables in a private frame */
289
290 typedef struct heapframe {
291 struct heapframe *Xprevframe;
292
293 /* Function arguments that may change */
294
295 const uschar *Xeptr;
296 const uschar *Xecode;
297 const uschar *Xmstart;
298 int Xoffset_top;
299 long int Xims;
300 eptrblock *Xeptrb;
301 int Xflags;
302 unsigned int Xrdepth;
303
304 /* Function local variables */
305
306 const uschar *Xcallpat;
307 const uschar *Xcharptr;
308 const uschar *Xdata;
309 const uschar *Xnext;
310 const uschar *Xpp;
311 const uschar *Xprev;
312 const uschar *Xsaved_eptr;
313
314 recursion_info Xnew_recursive;
315
316 BOOL Xcur_is_word;
317 BOOL Xcondition;
318 BOOL Xprev_is_word;
319
320 unsigned long int Xoriginal_ims;
321
322 #ifdef SUPPORT_UCP
323 int Xprop_type;
324 int Xprop_value;
325 int Xprop_fail_result;
326 int Xprop_category;
327 int Xprop_chartype;
328 int Xprop_script;
329 int Xoclength;
330 uschar Xocchars[8];
331 #endif
332
333 int Xctype;
334 unsigned int Xfc;
335 int Xfi;
336 int Xlength;
337 int Xmax;
338 int Xmin;
339 int Xnumber;
340 int Xoffset;
341 int Xop;
342 int Xsave_capture_last;
343 int Xsave_offset1, Xsave_offset2, Xsave_offset3;
344 int Xstacksave[REC_STACK_SAVE_MAX];
345
346 eptrblock Xnewptrb;
347
348 /* Where to jump back to */
349
350 int Xwhere;
351
352 } heapframe;
353
354 #endif
355
356
357 /***************************************************************************
358 ***************************************************************************/
359
360
361
362 /*************************************************
363 * Match from current position *
364 *************************************************/
365
366 /* This function is called recursively in many circumstances. Whenever it
367 returns a negative (error) response, the outer incarnation must also return the
368 same response.
369
370 Performance note: It might be tempting to extract commonly used fields from the
371 md structure (e.g. utf8, end_subject) into individual variables to improve
372 performance. Tests using gcc on a SPARC disproved this; in the first case, it
373 made performance worse.
374
375 Arguments:
376 eptr pointer to current character in subject
377 ecode pointer to current position in compiled code
378 mstart pointer to the current match start position (can be modified
379 by encountering \K)
380 offset_top current top pointer
381 md pointer to "static" info for the match
382 ims current /i, /m, and /s options
383 eptrb pointer to chain of blocks containing eptr at start of
384 brackets - for testing for empty matches
385 flags can contain
386 match_condassert - this is an assertion condition
387 match_cbegroup - this is the start of an unlimited repeat
388 group that can match an empty string
389 match_tail_recursed - this is a tail_recursed group
390 rdepth the recursion depth
391
392 Returns: MATCH_MATCH if matched ) these values are >= 0
393 MATCH_NOMATCH if failed to match )
394 a negative PCRE_ERROR_xxx value if aborted by an error condition
395 (e.g. stopped by repeated call or recursion limit)
396 */
397
398 static int
399 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
400 int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
401 int flags, unsigned int rdepth)
402 {
403 /* These variables do not need to be preserved over recursion in this function,
404 so they can be ordinary variables in all cases. Mark some of them with
405 "register" because they are used a lot in loops. */
406
407 register int rrc; /* Returns from recursive calls */
408 register int i; /* Used for loops not involving calls to RMATCH() */
409 register unsigned int c; /* Character values not kept over RMATCH() calls */
410 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
411
412 BOOL minimize, possessive; /* Quantifier options */
413
414 /* When recursion is not being used, all "local" variables that have to be
415 preserved over calls to RMATCH() are part of a "frame" which is obtained from
416 heap storage. Set up the top-level frame here; others are obtained from the
417 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
418
419 #ifdef NO_RECURSE
420 heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
421 frame->Xprevframe = NULL; /* Marks the top level */
422
423 /* Copy in the original argument variables */
424
425 frame->Xeptr = eptr;
426 frame->Xecode = ecode;
427 frame->Xmstart = mstart;
428 frame->Xoffset_top = offset_top;
429 frame->Xims = ims;
430 frame->Xeptrb = eptrb;
431 frame->Xflags = flags;
432 frame->Xrdepth = rdepth;
433
434 /* This is where control jumps back to to effect "recursion" */
435
436 HEAP_RECURSE:
437
438 /* Macros make the argument variables come from the current frame */
439
440 #define eptr frame->Xeptr
441 #define ecode frame->Xecode
442 #define mstart frame->Xmstart
443 #define offset_top frame->Xoffset_top
444 #define ims frame->Xims
445 #define eptrb frame->Xeptrb
446 #define flags frame->Xflags
447 #define rdepth frame->Xrdepth
448
449 /* Ditto for the local variables */
450
451 #ifdef SUPPORT_UTF8
452 #define charptr frame->Xcharptr
453 #endif
454 #define callpat frame->Xcallpat
455 #define data frame->Xdata
456 #define next frame->Xnext
457 #define pp frame->Xpp
458 #define prev frame->Xprev
459 #define saved_eptr frame->Xsaved_eptr
460
461 #define new_recursive frame->Xnew_recursive
462
463 #define cur_is_word frame->Xcur_is_word
464 #define condition frame->Xcondition
465 #define prev_is_word frame->Xprev_is_word
466
467 #define original_ims frame->Xoriginal_ims
468
469 #ifdef SUPPORT_UCP
470 #define prop_type frame->Xprop_type
471 #define prop_value frame->Xprop_value
472 #define prop_fail_result frame->Xprop_fail_result
473 #define prop_category frame->Xprop_category
474 #define prop_chartype frame->Xprop_chartype
475 #define prop_script frame->Xprop_script
476 #define oclength frame->Xoclength
477 #define occhars frame->Xocchars
478 #endif
479
480 #define ctype frame->Xctype
481 #define fc frame->Xfc
482 #define fi frame->Xfi
483 #define length frame->Xlength
484 #define max frame->Xmax
485 #define min frame->Xmin
486 #define number frame->Xnumber
487 #define offset frame->Xoffset
488 #define op frame->Xop
489 #define save_capture_last frame->Xsave_capture_last
490 #define save_offset1 frame->Xsave_offset1
491 #define save_offset2 frame->Xsave_offset2
492 #define save_offset3 frame->Xsave_offset3
493 #define stacksave frame->Xstacksave
494
495 #define newptrb frame->Xnewptrb
496
497 /* When recursion is being used, local variables are allocated on the stack and
498 get preserved during recursion in the normal way. In this environment, fi and
499 i, and fc and c, can be the same variables. */
500
501 #else /* NO_RECURSE not defined */
502 #define fi i
503 #define fc c
504
505
506 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
507 const uschar *charptr; /* in small blocks of the code. My normal */
508 #endif /* style of coding would have declared */
509 const uschar *callpat; /* them within each of those blocks. */
510 const uschar *data; /* However, in order to accommodate the */
511 const uschar *next; /* version of this code that uses an */
512 USPTR pp; /* external "stack" implemented on the */
513 const uschar *prev; /* heap, it is easier to declare them all */
514 USPTR saved_eptr; /* here, so the declarations can be cut */
515 /* out in a block. The only declarations */
516 recursion_info new_recursive; /* within blocks below are for variables */
517 /* that do not have to be preserved over */
518 BOOL cur_is_word; /* a recursive call to RMATCH(). */
519 BOOL condition;
520 BOOL prev_is_word;
521
522 unsigned long int original_ims;
523
524 #ifdef SUPPORT_UCP
525 int prop_type;
526 int prop_value;
527 int prop_fail_result;
528 int prop_category;
529 int prop_chartype;
530 int prop_script;
531 int oclength;
532 uschar occhars[8];
533 #endif
534
535 int ctype;
536 int length;
537 int max;
538 int min;
539 int number;
540 int offset;
541 int op;
542 int save_capture_last;
543 int save_offset1, save_offset2, save_offset3;
544 int stacksave[REC_STACK_SAVE_MAX];
545
546 eptrblock newptrb;
547 #endif /* NO_RECURSE */
548
549 /* These statements are here to stop the compiler complaining about unitialized
550 variables. */
551
552 #ifdef SUPPORT_UCP
553 prop_value = 0;
554 prop_fail_result = 0;
555 #endif
556
557
558 /* This label is used for tail recursion, which is used in a few cases even
559 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
560 used. Thanks to Ian Taylor for noticing this possibility and sending the
561 original patch. */
562
563 TAIL_RECURSE:
564
565 /* OK, now we can get on with the real code of the function. Recursive calls
566 are specified by the macro RMATCH and RRETURN is used to return. When
567 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
568 and a "return", respectively (possibly with some debugging if DEBUG is
569 defined). However, RMATCH isn't like a function call because it's quite a
570 complicated macro. It has to be used in one particular way. This shouldn't,
571 however, impact performance when true recursion is being used. */
572
573 #ifdef SUPPORT_UTF8
574 utf8 = md->utf8; /* Local copy of the flag */
575 #else
576 utf8 = FALSE;
577 #endif
578
579 /* First check that we haven't called match() too many times, or that we
580 haven't exceeded the recursive call limit. */
581
582 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
583 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
584
585 original_ims = ims; /* Save for resetting on ')' */
586
587 /* At the start of a group with an unlimited repeat that may match an empty
588 string, the match_cbegroup flag is set. When this is the case, add the current
589 subject pointer to the chain of such remembered pointers, to be checked when we
590 hit the closing ket, in order to break infinite loops that match no characters.
591 When match() is called in other circumstances, don't add to the chain. If this
592 is a tail recursion, use a block from the workspace, as the one on the stack is
593 already used. */
594
595 if ((flags & match_cbegroup) != 0)
596 {
597 eptrblock *p;
598 if ((flags & match_tail_recursed) != 0)
599 {
600 if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);
601 p = md->eptrchain + md->eptrn++;
602 }
603 else p = &newptrb;
604 p->epb_saved_eptr = eptr;
605 p->epb_prev = eptrb;
606 eptrb = p;
607 }
608
609 /* Now start processing the opcodes. */
610
611 for (;;)
612 {
613 minimize = possessive = FALSE;
614 op = *ecode;
615
616 /* For partial matching, remember if we ever hit the end of the subject after
617 matching at least one subject character. */
618
619 if (md->partial &&
620 eptr >= md->end_subject &&
621 eptr > mstart)
622 md->hitend = TRUE;
623
624 switch(op)
625 {
626 /* Handle a capturing bracket. If there is space in the offset vector, save
627 the current subject position in the working slot at the top of the vector.
628 We mustn't change the current values of the data slot, because they may be
629 set from a previous iteration of this group, and be referred to by a
630 reference inside the group.
631
632 If the bracket fails to match, we need to restore this value and also the
633 values of the final offsets, in case they were set by a previous iteration
634 of the same bracket.
635
636 If there isn't enough space in the offset vector, treat this as if it were
637 a non-capturing bracket. Don't worry about setting the flag for the error
638 case here; that is handled in the code for KET. */
639
640 case OP_CBRA:
641 case OP_SCBRA:
642 number = GET2(ecode, 1+LINK_SIZE);
643 offset = number << 1;
644
645 #ifdef DEBUG
646 printf("start bracket %d\n", number);
647 printf("subject=");
648 pchars(eptr, 16, TRUE, md);
649 printf("\n");
650 #endif
651
652 if (offset < md->offset_max)
653 {
654 save_offset1 = md->offset_vector[offset];
655 save_offset2 = md->offset_vector[offset+1];
656 save_offset3 = md->offset_vector[md->offset_end - number];
657 save_capture_last = md->capture_last;
658
659 DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
660 md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
661
662 flags = (op == OP_SCBRA)? match_cbegroup : 0;
663 do
664 {
665 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
666 ims, eptrb, flags, RM1);
667 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
668 md->capture_last = save_capture_last;
669 ecode += GET(ecode, 1);
670 }
671 while (*ecode == OP_ALT);
672
673 DPRINTF(("bracket %d failed\n", number));
674
675 md->offset_vector[offset] = save_offset1;
676 md->offset_vector[offset+1] = save_offset2;
677 md->offset_vector[md->offset_end - number] = save_offset3;
678
679 RRETURN(MATCH_NOMATCH);
680 }
681
682 /* Insufficient room for saving captured contents. Treat as a non-capturing
683 bracket. */
684
685 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
686
687 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
688 final alternative within the brackets, we would return the result of a
689 recursive call to match() whatever happened. We can reduce stack usage by
690 turning this into a tail recursion. */
691
692 case OP_BRA:
693 case OP_SBRA:
694 DPRINTF(("start non-capturing bracket\n"));
695 flags = (op >= OP_SBRA)? match_cbegroup : 0;
696 for (;;)
697 {
698 if (ecode[GET(ecode, 1)] != OP_ALT)
699 {
700 ecode += _pcre_OP_lengths[*ecode];
701 flags |= match_tail_recursed;
702 DPRINTF(("bracket 0 tail recursion\n"));
703 goto TAIL_RECURSE;
704 }
705
706 /* For non-final alternatives, continue the loop for a NOMATCH result;
707 otherwise return. */
708
709 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
710 eptrb, flags, RM2);
711 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
712 ecode += GET(ecode, 1);
713 }
714 /* Control never reaches here. */
715
716 /* Conditional group: compilation checked that there are no more than
717 two branches. If the condition is false, skipping the first branch takes us
718 past the end if there is only one branch, but that's OK because that is
719 exactly what going to the ket would do. As there is only one branch to be
720 obeyed, we can use tail recursion to avoid using another stack frame. */
721
722 case OP_COND:
723 case OP_SCOND:
724 if (ecode[LINK_SIZE+1] == OP_RREF) /* Recursion test */
725 {
726 offset = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
727 condition = md->recursive != NULL &&
728 (offset == RREF_ANY || offset == md->recursive->group_num);
729 ecode += condition? 3 : GET(ecode, 1);
730 }
731
732 else if (ecode[LINK_SIZE+1] == OP_CREF) /* Group used test */
733 {
734 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
735 condition = offset < offset_top && md->offset_vector[offset] >= 0;
736 ecode += condition? 3 : GET(ecode, 1);
737 }
738
739 else if (ecode[LINK_SIZE+1] == OP_DEF) /* DEFINE - always false */
740 {
741 condition = FALSE;
742 ecode += GET(ecode, 1);
743 }
744
745 /* The condition is an assertion. Call match() to evaluate it - setting
746 the final argument match_condassert causes it to stop at the end of an
747 assertion. */
748
749 else
750 {
751 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
752 match_condassert, RM3);
753 if (rrc == MATCH_MATCH)
754 {
755 condition = TRUE;
756 ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
757 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
758 }
759 else if (rrc != MATCH_NOMATCH)
760 {
761 RRETURN(rrc); /* Need braces because of following else */
762 }
763 else
764 {
765 condition = FALSE;
766 ecode += GET(ecode, 1);
767 }
768 }
769
770 /* We are now at the branch that is to be obeyed. As there is only one,
771 we can use tail recursion to avoid using another stack frame. If the second
772 alternative doesn't exist, we can just plough on. */
773
774 if (condition || *ecode == OP_ALT)
775 {
776 ecode += 1 + LINK_SIZE;
777 flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);
778 goto TAIL_RECURSE;
779 }
780 else
781 {
782 ecode += 1 + LINK_SIZE;
783 }
784 break;
785
786
787 /* End of the pattern. If we are in a top-level recursion, we should
788 restore the offsets appropriately and continue from after the call. */
789
790 case OP_END:
791 if (md->recursive != NULL && md->recursive->group_num == 0)
792 {
793 recursion_info *rec = md->recursive;
794 DPRINTF(("End of pattern in a (?0) recursion\n"));
795 md->recursive = rec->prevrec;
796 memmove(md->offset_vector, rec->offset_save,
797 rec->saved_max * sizeof(int));
798 mstart = rec->save_start;
799 ims = original_ims;
800 ecode = rec->after_call;
801 break;
802 }
803
804 /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
805 string - backtracking will then try other alternatives, if any. */
806
807 if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
808 md->end_match_ptr = eptr; /* Record where we ended */
809 md->end_offset_top = offset_top; /* and how many extracts were taken */
810 md->start_match_ptr = mstart; /* and the start (\K can modify) */
811 RRETURN(MATCH_MATCH);
812
813 /* Change option settings */
814
815 case OP_OPT:
816 ims = ecode[1];
817 ecode += 2;
818 DPRINTF(("ims set to %02lx\n", ims));
819 break;
820
821 /* Assertion brackets. Check the alternative branches in turn - the
822 matching won't pass the KET for an assertion. If any one branch matches,
823 the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
824 start of each branch to move the current point backwards, so the code at
825 this level is identical to the lookahead case. */
826
827 case OP_ASSERT:
828 case OP_ASSERTBACK:
829 do
830 {
831 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
832 RM4);
833 if (rrc == MATCH_MATCH) break;
834 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
835 ecode += GET(ecode, 1);
836 }
837 while (*ecode == OP_ALT);
838 if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
839
840 /* If checking an assertion for a condition, return MATCH_MATCH. */
841
842 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
843
844 /* Continue from after the assertion, updating the offsets high water
845 mark, since extracts may have been taken during the assertion. */
846
847 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
848 ecode += 1 + LINK_SIZE;
849 offset_top = md->end_offset_top;
850 continue;
851
852 /* Negative assertion: all branches must fail to match */
853
854 case OP_ASSERT_NOT:
855 case OP_ASSERTBACK_NOT:
856 do
857 {
858 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
859 RM5);
860 if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
861 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
862 ecode += GET(ecode,1);
863 }
864 while (*ecode == OP_ALT);
865
866 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
867
868 ecode += 1 + LINK_SIZE;
869 continue;
870
871 /* Move the subject pointer back. This occurs only at the start of
872 each branch of a lookbehind assertion. If we are too close to the start to
873 move back, this match function fails. When working with UTF-8 we move
874 back a number of characters, not bytes. */
875
876 case OP_REVERSE:
877 #ifdef SUPPORT_UTF8
878 if (utf8)
879 {
880 i = GET(ecode, 1);
881 while (i-- > 0)
882 {
883 eptr--;
884 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
885 BACKCHAR(eptr)
886 }
887 }
888 else
889 #endif
890
891 /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
892
893 {
894 eptr -= GET(ecode, 1);
895 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
896 }
897
898 /* Skip to next op code */
899
900 ecode += 1 + LINK_SIZE;
901 break;
902
903 /* The callout item calls an external function, if one is provided, passing
904 details of the match so far. This is mainly for debugging, though the
905 function is able to force a failure. */
906
907 case OP_CALLOUT:
908 if (pcre_callout != NULL)
909 {
910 pcre_callout_block cb;
911 cb.version = 1; /* Version 1 of the callout block */
912 cb.callout_number = ecode[1];
913 cb.offset_vector = md->offset_vector;
914 cb.subject = (PCRE_SPTR)md->start_subject;
915 cb.subject_length = md->end_subject - md->start_subject;
916 cb.start_match = mstart - md->start_subject;
917 cb.current_position = eptr - md->start_subject;
918 cb.pattern_position = GET(ecode, 2);
919 cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
920 cb.capture_top = offset_top/2;
921 cb.capture_last = md->capture_last;
922 cb.callout_data = md->callout_data;
923 if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
924 if (rrc < 0) RRETURN(rrc);
925 }
926 ecode += 2 + 2*LINK_SIZE;
927 break;
928
929 /* Recursion either matches the current regex, or some subexpression. The
930 offset data is the offset to the starting bracket from the start of the
931 whole pattern. (This is so that it works from duplicated subpatterns.)
932
933 If there are any capturing brackets started but not finished, we have to
934 save their starting points and reinstate them after the recursion. However,
935 we don't know how many such there are (offset_top records the completed
936 total) so we just have to save all the potential data. There may be up to
937 65535 such values, which is too large to put on the stack, but using malloc
938 for small numbers seems expensive. As a compromise, the stack is used when
939 there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
940 is used. A problem is what to do if the malloc fails ... there is no way of
941 returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
942 values on the stack, and accept that the rest may be wrong.
943
944 There are also other values that have to be saved. We use a chained
945 sequence of blocks that actually live on the stack. Thanks to Robin Houston
946 for the original version of this logic. */
947
948 case OP_RECURSE:
949 {
950 callpat = md->start_code + GET(ecode, 1);
951 new_recursive.group_num = (callpat == md->start_code)? 0 :
952 GET2(callpat, 1 + LINK_SIZE);
953
954 /* Add to "recursing stack" */
955
956 new_recursive.prevrec = md->recursive;
957 md->recursive = &new_recursive;
958
959 /* Find where to continue from afterwards */
960
961 ecode += 1 + LINK_SIZE;
962 new_recursive.after_call = ecode;
963
964 /* Now save the offset data. */
965
966 new_recursive.saved_max = md->offset_end;
967 if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
968 new_recursive.offset_save = stacksave;
969 else
970 {
971 new_recursive.offset_save =
972 (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
973 if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
974 }
975
976 memcpy(new_recursive.offset_save, md->offset_vector,
977 new_recursive.saved_max * sizeof(int));
978 new_recursive.save_start = mstart;
979 mstart = eptr;
980
981 /* OK, now we can do the recursion. For each top-level alternative we
982 restore the offset and recursion data. */
983
984 DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
985 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
986 do
987 {
988 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
989 md, ims, eptrb, flags, RM6);
990 if (rrc == MATCH_MATCH)
991 {
992 DPRINTF(("Recursion matched\n"));
993 md->recursive = new_recursive.prevrec;
994 if (new_recursive.offset_save != stacksave)
995 (pcre_free)(new_recursive.offset_save);
996 RRETURN(MATCH_MATCH);
997 }
998 else if (rrc != MATCH_NOMATCH)
999 {
1000 DPRINTF(("Recursion gave error %d\n", rrc));
1001 RRETURN(rrc);
1002 }
1003
1004 md->recursive = &new_recursive;
1005 memcpy(md->offset_vector, new_recursive.offset_save,
1006 new_recursive.saved_max * sizeof(int));
1007 callpat += GET(callpat, 1);
1008 }
1009 while (*callpat == OP_ALT);
1010
1011 DPRINTF(("Recursion didn't match\n"));
1012 md->recursive = new_recursive.prevrec;
1013 if (new_recursive.offset_save != stacksave)
1014 (pcre_free)(new_recursive.offset_save);
1015 RRETURN(MATCH_NOMATCH);
1016 }
1017 /* Control never reaches here */
1018
1019 /* "Once" brackets are like assertion brackets except that after a match,
1020 the point in the subject string is not moved back. Thus there can never be
1021 a move back into the brackets. Friedl calls these "atomic" subpatterns.
1022 Check the alternative branches in turn - the matching won't pass the KET
1023 for this kind of subpattern. If any one branch matches, we carry on as at
1024 the end of a normal bracket, leaving the subject pointer. */
1025
1026 case OP_ONCE:
1027 prev = ecode;
1028 saved_eptr = eptr;
1029
1030 do
1031 {
1032 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
1033 eptrb, 0, RM7);
1034 if (rrc == MATCH_MATCH) break;
1035 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1036 ecode += GET(ecode,1);
1037 }
1038 while (*ecode == OP_ALT);
1039
1040 /* If hit the end of the group (which could be repeated), fail */
1041
1042 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1043
1044 /* Continue as from after the assertion, updating the offsets high water
1045 mark, since extracts may have been taken. */
1046
1047 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1048
1049 offset_top = md->end_offset_top;
1050 eptr = md->end_match_ptr;
1051
1052 /* For a non-repeating ket, just continue at this level. This also
1053 happens for a repeating ket if no characters were matched in the group.
1054 This is the forcible breaking of infinite loops as implemented in Perl
1055 5.005. If there is an options reset, it will get obeyed in the normal
1056 course of events. */
1057
1058 if (*ecode == OP_KET || eptr == saved_eptr)
1059 {
1060 ecode += 1+LINK_SIZE;
1061 break;
1062 }
1063
1064 /* The repeating kets try the rest of the pattern or restart from the
1065 preceding bracket, in the appropriate order. The second "call" of match()
1066 uses tail recursion, to avoid using another stack frame. We need to reset
1067 any options that changed within the bracket before re-running it, so
1068 check the next opcode. */
1069
1070 if (ecode[1+LINK_SIZE] == OP_OPT)
1071 {
1072 ims = (ims & ~PCRE_IMS) | ecode[4];
1073 DPRINTF(("ims set to %02lx at group repeat\n", ims));
1074 }
1075
1076 if (*ecode == OP_KETRMIN)
1077 {
1078 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0,
1079 RM8);
1080 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1081 ecode = prev;
1082 flags = match_tail_recursed;
1083 goto TAIL_RECURSE;
1084 }
1085 else /* OP_KETRMAX */
1086 {
1087 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1088 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1089 ecode += 1 + LINK_SIZE;
1090 flags = match_tail_recursed;
1091 goto TAIL_RECURSE;
1092 }
1093 /* Control never gets here */
1094
1095 /* An alternation is the end of a branch; scan along to find the end of the
1096 bracketed group and go to there. */
1097
1098 case OP_ALT:
1099 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1100 break;
1101
1102 /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
1103 that it may occur zero times. It may repeat infinitely, or not at all -
1104 i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
1105 repeat limits are compiled as a number of copies, with the optional ones
1106 preceded by BRAZERO or BRAMINZERO. */
1107
1108 case OP_BRAZERO:
1109 {
1110 next = ecode+1;
1111 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1112 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1113 do next += GET(next,1); while (*next == OP_ALT);
1114 ecode = next + 1 + LINK_SIZE;
1115 }
1116 break;
1117
1118 case OP_BRAMINZERO:
1119 {
1120 next = ecode+1;
1121 do next += GET(next, 1); while (*next == OP_ALT);
1122 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1123 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1124 ecode++;
1125 }
1126 break;
1127
1128 /* End of a group, repeated or non-repeating. */
1129
1130 case OP_KET:
1131 case OP_KETRMIN:
1132 case OP_KETRMAX:
1133 prev = ecode - GET(ecode, 1);
1134
1135 /* If this was a group that remembered the subject start, in order to break
1136 infinite repeats of empty string matches, retrieve the subject start from
1137 the chain. Otherwise, set it NULL. */
1138
1139 if (*prev >= OP_SBRA)
1140 {
1141 saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1142 eptrb = eptrb->epb_prev; /* Backup to previous group */
1143 }
1144 else saved_eptr = NULL;
1145
1146 /* If we are at the end of an assertion group, stop matching and return
1147 MATCH_MATCH, but record the current high water mark for use by positive
1148 assertions. Do this also for the "once" (atomic) groups. */
1149
1150 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1151 *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1152 *prev == OP_ONCE)
1153 {
1154 md->end_match_ptr = eptr; /* For ONCE */
1155 md->end_offset_top = offset_top;
1156 RRETURN(MATCH_MATCH);
1157 }
1158
1159 /* For capturing groups we have to check the group number back at the start
1160 and if necessary complete handling an extraction by setting the offsets and
1161 bumping the high water mark. Note that whole-pattern recursion is coded as
1162 a recurse into group 0, so it won't be picked up here. Instead, we catch it
1163 when the OP_END is reached. Other recursion is handled here. */
1164
1165 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1166 {
1167 number = GET2(prev, 1+LINK_SIZE);
1168 offset = number << 1;
1169
1170 #ifdef DEBUG
1171 printf("end bracket %d", number);
1172 printf("\n");
1173 #endif
1174
1175 md->capture_last = number;
1176 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1177 {
1178 md->offset_vector[offset] =
1179 md->offset_vector[md->offset_end - number];
1180 md->offset_vector[offset+1] = eptr - md->start_subject;
1181 if (offset_top <= offset) offset_top = offset + 2;
1182 }
1183
1184 /* Handle a recursively called group. Restore the offsets
1185 appropriately and continue from after the call. */
1186
1187 if (md->recursive != NULL && md->recursive->group_num == number)
1188 {
1189 recursion_info *rec = md->recursive;
1190 DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1191 md->recursive = rec->prevrec;
1192 mstart = rec->save_start;
1193 memcpy(md->offset_vector, rec->offset_save,
1194 rec->saved_max * sizeof(int));
1195 ecode = rec->after_call;
1196 ims = original_ims;
1197 break;
1198 }
1199 }
1200
1201 /* For both capturing and non-capturing groups, reset the value of the ims
1202 flags, in case they got changed during the group. */
1203
1204 ims = original_ims;
1205 DPRINTF(("ims reset to %02lx\n", ims));
1206
1207 /* For a non-repeating ket, just continue at this level. This also
1208 happens for a repeating ket if no characters were matched in the group.
1209 This is the forcible breaking of infinite loops as implemented in Perl
1210 5.005. If there is an options reset, it will get obeyed in the normal
1211 course of events. */
1212
1213 if (*ecode == OP_KET || eptr == saved_eptr)
1214 {
1215 ecode += 1 + LINK_SIZE;
1216 break;
1217 }
1218
1219 /* The repeating kets try the rest of the pattern or restart from the
1220 preceding bracket, in the appropriate order. In the second case, we can use
1221 tail recursion to avoid using another stack frame. */
1222
1223 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1224
1225 if (*ecode == OP_KETRMIN)
1226 {
1227 RMATCH(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0,
1228 RM12);
1229 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1230 ecode = prev;
1231 flags |= match_tail_recursed;
1232 goto TAIL_RECURSE;
1233 }
1234 else /* OP_KETRMAX */
1235 {
1236 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1237 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1238 ecode += 1 + LINK_SIZE;
1239 flags = match_tail_recursed;
1240 goto TAIL_RECURSE;
1241 }
1242 /* Control never gets here */
1243
1244 /* Start of subject unless notbol, or after internal newline if multiline */
1245
1246 case OP_CIRC:
1247 if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1248 if ((ims & PCRE_MULTILINE) != 0)
1249 {
1250 if (eptr != md->start_subject &&
1251 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1252 RRETURN(MATCH_NOMATCH);
1253 ecode++;
1254 break;
1255 }
1256 /* ... else fall through */
1257
1258 /* Start of subject assertion */
1259
1260 case OP_SOD:
1261 if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1262 ecode++;
1263 break;
1264
1265 /* Start of match assertion */
1266
1267 case OP_SOM:
1268 if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1269 ecode++;
1270 break;
1271
1272 /* Reset the start of match point */
1273
1274 case OP_SET_SOM:
1275 mstart = eptr;
1276 ecode++;
1277 break;
1278
1279 /* Assert before internal newline if multiline, or before a terminating
1280 newline unless endonly is set, else end of subject unless noteol is set. */
1281
1282 case OP_DOLL:
1283 if ((ims & PCRE_MULTILINE) != 0)
1284 {
1285 if (eptr < md->end_subject)
1286 { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1287 else
1288 { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1289 ecode++;
1290 break;
1291 }
1292 else
1293 {
1294 if (md->noteol) RRETURN(MATCH_NOMATCH);
1295 if (!md->endonly)
1296 {
1297 if (eptr != md->end_subject &&
1298 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1299 RRETURN(MATCH_NOMATCH);
1300 ecode++;
1301 break;
1302 }
1303 }
1304 /* ... else fall through for endonly */
1305
1306 /* End of subject assertion (\z) */
1307
1308 case OP_EOD:
1309 if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1310 ecode++;
1311 break;
1312
1313 /* End of subject or ending \n assertion (\Z) */
1314
1315 case OP_EODN:
1316 if (eptr != md->end_subject &&
1317 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1318 RRETURN(MATCH_NOMATCH);
1319 ecode++;
1320 break;
1321
1322 /* Word boundary assertions */
1323
1324 case OP_NOT_WORD_BOUNDARY:
1325 case OP_WORD_BOUNDARY:
1326 {
1327
1328 /* Find out if the previous and current characters are "word" characters.
1329 It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1330 be "non-word" characters. */
1331
1332 #ifdef SUPPORT_UTF8
1333 if (utf8)
1334 {
1335 if (eptr == md->start_subject) prev_is_word = FALSE; else
1336 {
1337 const uschar *lastptr = eptr - 1;
1338 while((*lastptr & 0xc0) == 0x80) lastptr--;
1339 GETCHAR(c, lastptr);
1340 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1341 }
1342 if (eptr >= md->end_subject) cur_is_word = FALSE; else
1343 {
1344 GETCHAR(c, eptr);
1345 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1346 }
1347 }
1348 else
1349 #endif
1350
1351 /* More streamlined when not in UTF-8 mode */
1352
1353 {
1354 prev_is_word = (eptr != md->start_subject) &&
1355 ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1356 cur_is_word = (eptr < md->end_subject) &&
1357 ((md->ctypes[*eptr] & ctype_word) != 0);
1358 }
1359
1360 /* Now see if the situation is what we want */
1361
1362 if ((*ecode++ == OP_WORD_BOUNDARY)?
1363 cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1364 RRETURN(MATCH_NOMATCH);
1365 }
1366 break;
1367
1368 /* Match a single character type; inline for speed */
1369
1370 case OP_ANY:
1371 if ((ims & PCRE_DOTALL) == 0)
1372 {
1373 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1374 }
1375 if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1376 if (utf8)
1377 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1378 ecode++;
1379 break;
1380
1381 /* Match a single byte, even in UTF-8 mode. This opcode really does match
1382 any byte, even newline, independent of the setting of PCRE_DOTALL. */
1383
1384 case OP_ANYBYTE:
1385 if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1386 ecode++;
1387 break;
1388
1389 case OP_NOT_DIGIT:
1390 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1391 GETCHARINCTEST(c, eptr);
1392 if (
1393 #ifdef SUPPORT_UTF8
1394 c < 256 &&
1395 #endif
1396 (md->ctypes[c] & ctype_digit) != 0
1397 )
1398 RRETURN(MATCH_NOMATCH);
1399 ecode++;
1400 break;
1401
1402 case OP_DIGIT:
1403 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1404 GETCHARINCTEST(c, eptr);
1405 if (
1406 #ifdef SUPPORT_UTF8
1407 c >= 256 ||
1408 #endif
1409 (md->ctypes[c] & ctype_digit) == 0
1410 )
1411 RRETURN(MATCH_NOMATCH);
1412 ecode++;
1413 break;
1414
1415 case OP_NOT_WHITESPACE:
1416 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1417 GETCHARINCTEST(c, eptr);
1418 if (
1419 #ifdef SUPPORT_UTF8
1420 c < 256 &&
1421 #endif
1422 (md->ctypes[c] & ctype_space) != 0
1423 )
1424 RRETURN(MATCH_NOMATCH);
1425 ecode++;
1426 break;
1427
1428 case OP_WHITESPACE:
1429 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1430 GETCHARINCTEST(c, eptr);
1431 if (
1432 #ifdef SUPPORT_UTF8
1433 c >= 256 ||
1434 #endif
1435 (md->ctypes[c] & ctype_space) == 0
1436 )
1437 RRETURN(MATCH_NOMATCH);
1438 ecode++;
1439 break;
1440
1441 case OP_NOT_WORDCHAR:
1442 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1443 GETCHARINCTEST(c, eptr);
1444 if (
1445 #ifdef SUPPORT_UTF8
1446 c < 256 &&
1447 #endif
1448 (md->ctypes[c] & ctype_word) != 0
1449 )
1450 RRETURN(MATCH_NOMATCH);
1451 ecode++;
1452 break;
1453
1454 case OP_WORDCHAR:
1455 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1456 GETCHARINCTEST(c, eptr);
1457 if (
1458 #ifdef SUPPORT_UTF8
1459 c >= 256 ||
1460 #endif
1461 (md->ctypes[c] & ctype_word) == 0
1462 )
1463 RRETURN(MATCH_NOMATCH);
1464 ecode++;
1465 break;
1466
1467 case OP_ANYNL:
1468 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1469 GETCHARINCTEST(c, eptr);
1470 switch(c)
1471 {
1472 default: RRETURN(MATCH_NOMATCH);
1473 case 0x000d:
1474 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1475 break;
1476 case 0x000a:
1477 case 0x000b:
1478 case 0x000c:
1479 case 0x0085:
1480 case 0x2028:
1481 case 0x2029:
1482 break;
1483 }
1484 ecode++;
1485 break;
1486
1487 case OP_NOT_HSPACE:
1488 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1489 GETCHARINCTEST(c, eptr);
1490 switch(c)
1491 {
1492 default: break;
1493 case 0x09: /* HT */
1494 case 0x20: /* SPACE */
1495 case 0xa0: /* NBSP */
1496 case 0x1680: /* OGHAM SPACE MARK */
1497 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1498 case 0x2000: /* EN QUAD */
1499 case 0x2001: /* EM QUAD */
1500 case 0x2002: /* EN SPACE */
1501 case 0x2003: /* EM SPACE */
1502 case 0x2004: /* THREE-PER-EM SPACE */
1503 case 0x2005: /* FOUR-PER-EM SPACE */
1504 case 0x2006: /* SIX-PER-EM SPACE */
1505 case 0x2007: /* FIGURE SPACE */
1506 case 0x2008: /* PUNCTUATION SPACE */
1507 case 0x2009: /* THIN SPACE */
1508 case 0x200A: /* HAIR SPACE */
1509 case 0x202f: /* NARROW NO-BREAK SPACE */
1510 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1511 case 0x3000: /* IDEOGRAPHIC SPACE */
1512 RRETURN(MATCH_NOMATCH);
1513 }
1514 ecode++;
1515 break;
1516
1517 case OP_HSPACE:
1518 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1519 GETCHARINCTEST(c, eptr);
1520 switch(c)
1521 {
1522 default: RRETURN(MATCH_NOMATCH);
1523 case 0x09: /* HT */
1524 case 0x20: /* SPACE */
1525 case 0xa0: /* NBSP */
1526 case 0x1680: /* OGHAM SPACE MARK */
1527 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1528 case 0x2000: /* EN QUAD */
1529 case 0x2001: /* EM QUAD */
1530 case 0x2002: /* EN SPACE */
1531 case 0x2003: /* EM SPACE */
1532 case 0x2004: /* THREE-PER-EM SPACE */
1533 case 0x2005: /* FOUR-PER-EM SPACE */
1534 case 0x2006: /* SIX-PER-EM SPACE */
1535 case 0x2007: /* FIGURE SPACE */
1536 case 0x2008: /* PUNCTUATION SPACE */
1537 case 0x2009: /* THIN SPACE */
1538 case 0x200A: /* HAIR SPACE */
1539 case 0x202f: /* NARROW NO-BREAK SPACE */
1540 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1541 case 0x3000: /* IDEOGRAPHIC SPACE */
1542 break;
1543 }
1544 ecode++;
1545 break;
1546
1547 case OP_NOT_VSPACE:
1548 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1549 GETCHARINCTEST(c, eptr);
1550 switch(c)
1551 {
1552 default: break;
1553 case 0x0a: /* LF */
1554 case 0x0b: /* VT */
1555 case 0x0c: /* FF */
1556 case 0x0d: /* CR */
1557 case 0x85: /* NEL */
1558 case 0x2028: /* LINE SEPARATOR */
1559 case 0x2029: /* PARAGRAPH SEPARATOR */
1560 RRETURN(MATCH_NOMATCH);
1561 }
1562 ecode++;
1563 break;
1564
1565 case OP_VSPACE:
1566 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1567 GETCHARINCTEST(c, eptr);
1568 switch(c)
1569 {
1570 default: RRETURN(MATCH_NOMATCH);
1571 case 0x0a: /* LF */
1572 case 0x0b: /* VT */
1573 case 0x0c: /* FF */
1574 case 0x0d: /* CR */
1575 case 0x85: /* NEL */
1576 case 0x2028: /* LINE SEPARATOR */
1577 case 0x2029: /* PARAGRAPH SEPARATOR */
1578 break;
1579 }
1580 ecode++;
1581 break;
1582
1583 #ifdef SUPPORT_UCP
1584 /* Check the next character by Unicode property. We will get here only
1585 if the support is in the binary; otherwise a compile-time error occurs. */
1586
1587 case OP_PROP:
1588 case OP_NOTPROP:
1589 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1590 GETCHARINCTEST(c, eptr);
1591 {
1592 int chartype, script;
1593 int category = _pcre_ucp_findprop(c, &chartype, &script);
1594
1595 switch(ecode[1])
1596 {
1597 case PT_ANY:
1598 if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1599 break;
1600
1601 case PT_LAMP:
1602 if ((chartype == ucp_Lu ||
1603 chartype == ucp_Ll ||
1604 chartype == ucp_Lt) == (op == OP_NOTPROP))
1605 RRETURN(MATCH_NOMATCH);
1606 break;
1607
1608 case PT_GC:
1609 if ((ecode[2] != category) == (op == OP_PROP))
1610 RRETURN(MATCH_NOMATCH);
1611 break;
1612
1613 case PT_PC:
1614 if ((ecode[2] != chartype) == (op == OP_PROP))
1615 RRETURN(MATCH_NOMATCH);
1616 break;
1617
1618 case PT_SC:
1619 if ((ecode[2] != script) == (op == OP_PROP))
1620 RRETURN(MATCH_NOMATCH);
1621 break;
1622
1623 default:
1624 RRETURN(PCRE_ERROR_INTERNAL);
1625 }
1626
1627 ecode += 3;
1628 }
1629 break;
1630
1631 /* Match an extended Unicode sequence. We will get here only if the support
1632 is in the binary; otherwise a compile-time error occurs. */
1633
1634 case OP_EXTUNI:
1635 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1636 GETCHARINCTEST(c, eptr);
1637 {
1638 int chartype, script;
1639 int category = _pcre_ucp_findprop(c, &chartype, &script);
1640 if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1641 while (eptr < md->end_subject)
1642 {
1643 int len = 1;
1644 if (!utf8) c = *eptr; else
1645 {
1646 GETCHARLEN(c, eptr, len);
1647 }
1648 category = _pcre_ucp_findprop(c, &chartype, &script);
1649 if (category != ucp_M) break;
1650 eptr += len;
1651 }
1652 }
1653 ecode++;
1654 break;
1655 #endif
1656
1657
1658 /* Match a back reference, possibly repeatedly. Look past the end of the
1659 item to see if there is repeat information following. The code is similar
1660 to that for character classes, but repeated for efficiency. Then obey
1661 similar code to character type repeats - written out again for speed.
1662 However, if the referenced string is the empty string, always treat
1663 it as matched, any number of times (otherwise there could be infinite
1664 loops). */
1665
1666 case OP_REF:
1667 {
1668 offset = GET2(ecode, 1) << 1; /* Doubled ref number */
1669 ecode += 3; /* Advance past item */
1670
1671 /* If the reference is unset, set the length to be longer than the amount
1672 of subject left; this ensures that every attempt at a match fails. We
1673 can't just fail here, because of the possibility of quantifiers with zero
1674 minima. */
1675
1676 length = (offset >= offset_top || md->offset_vector[offset] < 0)?
1677 md->end_subject - eptr + 1 :
1678 md->offset_vector[offset+1] - md->offset_vector[offset];
1679
1680 /* Set up for repetition, or handle the non-repeated case */
1681
1682 switch (*ecode)
1683 {
1684 case OP_CRSTAR:
1685 case OP_CRMINSTAR:
1686 case OP_CRPLUS:
1687 case OP_CRMINPLUS:
1688 case OP_CRQUERY:
1689 case OP_CRMINQUERY:
1690 c = *ecode++ - OP_CRSTAR;
1691 minimize = (c & 1) != 0;
1692 min = rep_min[c]; /* Pick up values from tables; */
1693 max = rep_max[c]; /* zero for max => infinity */
1694 if (max == 0) max = INT_MAX;
1695 break;
1696
1697 case OP_CRRANGE:
1698 case OP_CRMINRANGE:
1699 minimize = (*ecode == OP_CRMINRANGE);
1700 min = GET2(ecode, 1);
1701 max = GET2(ecode, 3);
1702 if (max == 0) max = INT_MAX;
1703 ecode += 5;
1704 break;
1705
1706 default: /* No repeat follows */
1707 if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1708 eptr += length;
1709 continue; /* With the main loop */
1710 }
1711
1712 /* If the length of the reference is zero, just continue with the
1713 main loop. */
1714
1715 if (length == 0) continue;
1716
1717 /* First, ensure the minimum number of matches are present. We get back
1718 the length of the reference string explicitly rather than passing the
1719 address of eptr, so that eptr can be a register variable. */
1720
1721 for (i = 1; i <= min; i++)
1722 {
1723 if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1724 eptr += length;
1725 }
1726
1727 /* If min = max, continue at the same level without recursion.
1728 They are not both allowed to be zero. */
1729
1730 if (min == max) continue;
1731
1732 /* If minimizing, keep trying and advancing the pointer */
1733
1734 if (minimize)
1735 {
1736 for (fi = min;; fi++)
1737 {
1738 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1739 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1740 if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1741 RRETURN(MATCH_NOMATCH);
1742 eptr += length;
1743 }
1744 /* Control never gets here */
1745 }
1746
1747 /* If maximizing, find the longest string and work backwards */
1748
1749 else
1750 {
1751 pp = eptr;
1752 for (i = min; i < max; i++)
1753 {
1754 if (!match_ref(offset, eptr, length, md, ims)) break;
1755 eptr += length;
1756 }
1757 while (eptr >= pp)
1758 {
1759 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1760 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1761 eptr -= length;
1762 }
1763 RRETURN(MATCH_NOMATCH);
1764 }
1765 }
1766 /* Control never gets here */
1767
1768
1769
1770 /* Match a bit-mapped character class, possibly repeatedly. This op code is
1771 used when all the characters in the class have values in the range 0-255,
1772 and either the matching is caseful, or the characters are in the range
1773 0-127 when UTF-8 processing is enabled. The only difference between
1774 OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1775 encountered.
1776
1777 First, look past the end of the item to see if there is repeat information
1778 following. Then obey similar code to character type repeats - written out
1779 again for speed. */
1780
1781 case OP_NCLASS:
1782 case OP_CLASS:
1783 {
1784 data = ecode + 1; /* Save for matching */
1785 ecode += 33; /* Advance past the item */
1786
1787 switch (*ecode)
1788 {
1789 case OP_CRSTAR:
1790 case OP_CRMINSTAR:
1791 case OP_CRPLUS:
1792 case OP_CRMINPLUS:
1793 case OP_CRQUERY:
1794 case OP_CRMINQUERY:
1795 c = *ecode++ - OP_CRSTAR;
1796 minimize = (c & 1) != 0;
1797 min = rep_min[c]; /* Pick up values from tables; */
1798 max = rep_max[c]; /* zero for max => infinity */
1799 if (max == 0) max = INT_MAX;
1800 break;
1801
1802 case OP_CRRANGE:
1803 case OP_CRMINRANGE:
1804 minimize = (*ecode == OP_CRMINRANGE);
1805 min = GET2(ecode, 1);
1806 max = GET2(ecode, 3);
1807 if (max == 0) max = INT_MAX;
1808 ecode += 5;
1809 break;
1810
1811 default: /* No repeat follows */
1812 min = max = 1;
1813 break;
1814 }
1815
1816 /* First, ensure the minimum number of matches are present. */
1817
1818 #ifdef SUPPORT_UTF8
1819 /* UTF-8 mode */
1820 if (utf8)
1821 {
1822 for (i = 1; i <= min; i++)
1823 {
1824 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1825 GETCHARINC(c, eptr);
1826 if (c > 255)
1827 {
1828 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1829 }
1830 else
1831 {
1832 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1833 }
1834 }
1835 }
1836 else
1837 #endif
1838 /* Not UTF-8 mode */
1839 {
1840 for (i = 1; i <= min; i++)
1841 {
1842 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1843 c = *eptr++;
1844 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1845 }
1846 }
1847
1848 /* If max == min we can continue with the main loop without the
1849 need to recurse. */
1850
1851 if (min == max) continue;
1852
1853 /* If minimizing, keep testing the rest of the expression and advancing
1854 the pointer while it matches the class. */
1855
1856 if (minimize)
1857 {
1858 #ifdef SUPPORT_UTF8
1859 /* UTF-8 mode */
1860 if (utf8)
1861 {
1862 for (fi = min;; fi++)
1863 {
1864 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1865 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1866 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1867 GETCHARINC(c, eptr);
1868 if (c > 255)
1869 {
1870 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1871 }
1872 else
1873 {
1874 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1875 }
1876 }
1877 }
1878 else
1879 #endif
1880 /* Not UTF-8 mode */
1881 {
1882 for (fi = min;; fi++)
1883 {
1884 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
1885 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1886 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1887 c = *eptr++;
1888 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1889 }
1890 }
1891 /* Control never gets here */
1892 }
1893
1894 /* If maximizing, find the longest possible run, then work backwards. */
1895
1896 else
1897 {
1898 pp = eptr;
1899
1900 #ifdef SUPPORT_UTF8
1901 /* UTF-8 mode */
1902 if (utf8)
1903 {
1904 for (i = min; i < max; i++)
1905 {
1906 int len = 1;
1907 if (eptr >= md->end_subject) break;
1908 GETCHARLEN(c, eptr, len);
1909 if (c > 255)
1910 {
1911 if (op == OP_CLASS) break;
1912 }
1913 else
1914 {
1915 if ((data[c/8] & (1 << (c&7))) == 0) break;
1916 }
1917 eptr += len;
1918 }
1919 for (;;)
1920 {
1921 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
1922 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1923 if (eptr-- == pp) break; /* Stop if tried at original pos */
1924 BACKCHAR(eptr);
1925 }
1926 }
1927 else
1928 #endif
1929 /* Not UTF-8 mode */
1930 {
1931 for (i = min; i < max; i++)
1932 {
1933 if (eptr >= md->end_subject) break;
1934 c = *eptr;
1935 if ((data[c/8] & (1 << (c&7))) == 0) break;
1936 eptr++;
1937 }
1938 while (eptr >= pp)
1939 {
1940 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
1941 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1942 eptr--;
1943 }
1944 }
1945
1946 RRETURN(MATCH_NOMATCH);
1947 }
1948 }
1949 /* Control never gets here */
1950
1951
1952 /* Match an extended character class. This opcode is encountered only
1953 in UTF-8 mode, because that's the only time it is compiled. */
1954
1955 #ifdef SUPPORT_UTF8
1956 case OP_XCLASS:
1957 {
1958 data = ecode + 1 + LINK_SIZE; /* Save for matching */
1959 ecode += GET(ecode, 1); /* Advance past the item */
1960
1961 switch (*ecode)
1962 {
1963 case OP_CRSTAR:
1964 case OP_CRMINSTAR:
1965 case OP_CRPLUS:
1966 case OP_CRMINPLUS:
1967 case OP_CRQUERY:
1968 case OP_CRMINQUERY:
1969 c = *ecode++ - OP_CRSTAR;
1970 minimize = (c & 1) != 0;
1971 min = rep_min[c]; /* Pick up values from tables; */
1972 max = rep_max[c]; /* zero for max => infinity */
1973 if (max == 0) max = INT_MAX;
1974 break;
1975
1976 case OP_CRRANGE:
1977 case OP_CRMINRANGE:
1978 minimize = (*ecode == OP_CRMINRANGE);
1979 min = GET2(ecode, 1);
1980 max = GET2(ecode, 3);
1981 if (max == 0) max = INT_MAX;
1982 ecode += 5;
1983 break;
1984
1985 default: /* No repeat follows */
1986 min = max = 1;
1987 break;
1988 }
1989
1990 /* First, ensure the minimum number of matches are present. */
1991
1992 for (i = 1; i <= min; i++)
1993 {
1994 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1995 GETCHARINC(c, eptr);
1996 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
1997 }
1998
1999 /* If max == min we can continue with the main loop without the
2000 need to recurse. */
2001
2002 if (min == max) continue;
2003
2004 /* If minimizing, keep testing the rest of the expression and advancing
2005 the pointer while it matches the class. */
2006
2007 if (minimize)
2008 {
2009 for (fi = min;; fi++)
2010 {
2011 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2012 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2013 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2014 GETCHARINC(c, eptr);
2015 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2016 }
2017 /* Control never gets here */
2018 }
2019
2020 /* If maximizing, find the longest possible run, then work backwards. */
2021
2022 else
2023 {
2024 pp = eptr;
2025 for (i = min; i < max; i++)
2026 {
2027 int len = 1;
2028 if (eptr >= md->end_subject) break;
2029 GETCHARLEN(c, eptr, len);
2030 if (!_pcre_xclass(c, data)) break;
2031 eptr += len;
2032 }
2033 for(;;)
2034 {
2035 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2036 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2037 if (eptr-- == pp) break; /* Stop if tried at original pos */
2038 BACKCHAR(eptr)
2039 }
2040 RRETURN(MATCH_NOMATCH);
2041 }
2042
2043 /* Control never gets here */
2044 }
2045 #endif /* End of XCLASS */
2046
2047 /* Match a single character, casefully */
2048
2049 case OP_CHAR:
2050 #ifdef SUPPORT_UTF8
2051 if (utf8)
2052 {
2053 length = 1;
2054 ecode++;
2055 GETCHARLEN(fc, ecode, length);
2056 if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2057 while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2058 }
2059 else
2060 #endif
2061
2062 /* Non-UTF-8 mode */
2063 {
2064 if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2065 if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2066 ecode += 2;
2067 }
2068 break;
2069
2070 /* Match a single character, caselessly */
2071
2072 case OP_CHARNC:
2073 #ifdef SUPPORT_UTF8
2074 if (utf8)
2075 {
2076 length = 1;
2077 ecode++;
2078 GETCHARLEN(fc, ecode, length);
2079
2080 if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2081
2082 /* If the pattern character's value is < 128, we have only one byte, and
2083 can use the fast lookup table. */
2084
2085 if (fc < 128)
2086 {
2087 if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2088 }
2089
2090 /* Otherwise we must pick up the subject character */
2091
2092 else
2093 {
2094 unsigned int dc;
2095 GETCHARINC(dc, eptr);
2096 ecode += length;
2097
2098 /* If we have Unicode property support, we can use it to test the other
2099 case of the character, if there is one. */
2100
2101 if (fc != dc)
2102 {
2103 #ifdef SUPPORT_UCP
2104 if (dc != _pcre_ucp_othercase(fc))
2105 #endif
2106 RRETURN(MATCH_NOMATCH);
2107 }
2108 }
2109 }
2110 else
2111 #endif /* SUPPORT_UTF8 */
2112
2113 /* Non-UTF-8 mode */
2114 {
2115 if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2116 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2117 ecode += 2;
2118 }
2119 break;
2120
2121 /* Match a single character repeatedly. */
2122
2123 case OP_EXACT:
2124 min = max = GET2(ecode, 1);
2125 ecode += 3;
2126 goto REPEATCHAR;
2127
2128 case OP_POSUPTO:
2129 possessive = TRUE;
2130 /* Fall through */
2131
2132 case OP_UPTO:
2133 case OP_MINUPTO:
2134 min = 0;
2135 max = GET2(ecode, 1);
2136 minimize = *ecode == OP_MINUPTO;
2137 ecode += 3;
2138 goto REPEATCHAR;
2139
2140 case OP_POSSTAR:
2141 possessive = TRUE;
2142 min = 0;
2143 max = INT_MAX;
2144 ecode++;
2145 goto REPEATCHAR;
2146
2147 case OP_POSPLUS:
2148 possessive = TRUE;
2149 min = 1;
2150 max = INT_MAX;
2151 ecode++;
2152 goto REPEATCHAR;
2153
2154 case OP_POSQUERY:
2155 possessive = TRUE;
2156 min = 0;
2157 max = 1;
2158 ecode++;
2159 goto REPEATCHAR;
2160
2161 case OP_STAR:
2162 case OP_MINSTAR:
2163 case OP_PLUS:
2164 case OP_MINPLUS:
2165 case OP_QUERY:
2166 case OP_MINQUERY:
2167 c = *ecode++ - OP_STAR;
2168 minimize = (c & 1) != 0;
2169 min = rep_min[c]; /* Pick up values from tables; */
2170 max = rep_max[c]; /* zero for max => infinity */
2171 if (max == 0) max = INT_MAX;
2172
2173 /* Common code for all repeated single-character matches. We can give
2174 up quickly if there are fewer than the minimum number of characters left in
2175 the subject. */
2176
2177 REPEATCHAR:
2178 #ifdef SUPPORT_UTF8
2179 if (utf8)
2180 {
2181 length = 1;
2182 charptr = ecode;
2183 GETCHARLEN(fc, ecode, length);
2184 if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2185 ecode += length;
2186
2187 /* Handle multibyte character matching specially here. There is
2188 support for caseless matching if UCP support is present. */
2189
2190 if (length > 1)
2191 {
2192 #ifdef SUPPORT_UCP
2193 unsigned int othercase;
2194 if ((ims & PCRE_CASELESS) != 0 &&
2195 (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
2196 oclength = _pcre_ord2utf8(othercase, occhars);
2197 else oclength = 0;
2198 #endif /* SUPPORT_UCP */
2199
2200 for (i = 1; i <= min; i++)
2201 {
2202 if (memcmp(eptr, charptr, length) == 0) eptr += length;
2203 #ifdef SUPPORT_UCP
2204 /* Need braces because of following else */
2205 else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2206 else
2207 {
2208 if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2209 eptr += oclength;
2210 }
2211 #else /* without SUPPORT_UCP */
2212 else { RRETURN(MATCH_NOMATCH); }
2213 #endif /* SUPPORT_UCP */
2214 }
2215
2216 if (min == max) continue;
2217
2218 if (minimize)
2219 {
2220 for (fi = min;; fi++)
2221 {
2222 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2223 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2224 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2225 if (memcmp(eptr, charptr, length) == 0) eptr += length;
2226 #ifdef SUPPORT_UCP
2227 /* Need braces because of following else */
2228 else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2229 else
2230 {
2231 if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2232 eptr += oclength;
2233 }
2234 #else /* without SUPPORT_UCP */
2235 else { RRETURN (MATCH_NOMATCH); }
2236 #endif /* SUPPORT_UCP */
2237 }
2238 /* Control never gets here */
2239 }
2240
2241 else /* Maximize */
2242 {
2243 pp = eptr;
2244 for (i = min; i < max; i++)
2245 {
2246 if (eptr > md->end_subject - length) break;
2247 if (memcmp(eptr, charptr, length) == 0) eptr += length;
2248 #ifdef SUPPORT_UCP
2249 else if (oclength == 0) break;
2250 else
2251 {
2252 if (memcmp(eptr, occhars, oclength) != 0) break;
2253 eptr += oclength;
2254 }
2255 #else /* without SUPPORT_UCP */
2256 else break;
2257 #endif /* SUPPORT_UCP */
2258 }
2259
2260 if (possessive) continue;
2261 for(;;)
2262 {
2263 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2264 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2265 if (eptr == pp) RRETURN(MATCH_NOMATCH);
2266 #ifdef SUPPORT_UCP
2267 eptr--;
2268 BACKCHAR(eptr);
2269 #else /* without SUPPORT_UCP */
2270 eptr -= length;
2271 #endif /* SUPPORT_UCP */
2272 }
2273 }
2274 /* Control never gets here */
2275 }
2276
2277 /* If the length of a UTF-8 character is 1, we fall through here, and
2278 obey the code as for non-UTF-8 characters below, though in this case the
2279 value of fc will always be < 128. */
2280 }
2281 else
2282 #endif /* SUPPORT_UTF8 */
2283
2284 /* When not in UTF-8 mode, load a single-byte character. */
2285 {
2286 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2287 fc = *ecode++;
2288 }
2289
2290 /* The value of fc at this point is always less than 256, though we may or
2291 may not be in UTF-8 mode. The code is duplicated for the caseless and
2292 caseful cases, for speed, since matching characters is likely to be quite
2293 common. First, ensure the minimum number of matches are present. If min =
2294 max, continue at the same level without recursing. Otherwise, if
2295 minimizing, keep trying the rest of the expression and advancing one
2296 matching character if failing, up to the maximum. Alternatively, if
2297 maximizing, find the maximum number of characters and work backwards. */
2298
2299 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2300 max, eptr));
2301
2302 if ((ims & PCRE_CASELESS) != 0)
2303 {
2304 fc = md->lcc[fc];
2305 for (i = 1; i <= min; i++)
2306 if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2307 if (min == max) continue;
2308 if (minimize)
2309 {
2310 for (fi = min;; fi++)
2311 {
2312 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2313 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2314 if (fi >= max || eptr >= md->end_subject ||
2315 fc != md->lcc[*eptr++])
2316 RRETURN(MATCH_NOMATCH);
2317 }
2318 /* Control never gets here */
2319 }
2320 else /* Maximize */
2321 {
2322 pp = eptr;
2323 for (i = min; i < max; i++)
2324 {
2325 if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2326 eptr++;
2327 }
2328 if (possessive) continue;
2329 while (eptr >= pp)
2330 {
2331 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2332 eptr--;
2333 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2334 }
2335 RRETURN(MATCH_NOMATCH);
2336 }
2337 /* Control never gets here */
2338 }
2339
2340 /* Caseful comparisons (includes all multi-byte characters) */
2341
2342 else
2343 {
2344 for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2345 if (min == max) continue;
2346 if (minimize)
2347 {
2348 for (fi = min;; fi++)
2349 {
2350 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2351 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2352 if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2353 RRETURN(MATCH_NOMATCH);
2354 }
2355 /* Control never gets here */
2356 }
2357 else /* Maximize */
2358 {
2359 pp = eptr;
2360 for (i = min; i < max; i++)
2361 {
2362 if (eptr >= md->end_subject || fc != *eptr) break;
2363 eptr++;
2364 }
2365 if (possessive) continue;
2366 while (eptr >= pp)
2367 {
2368 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2369 eptr--;
2370 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2371 }
2372 RRETURN(MATCH_NOMATCH);
2373 }
2374 }
2375 /* Control never gets here */
2376
2377 /* Match a negated single one-byte character. The character we are
2378 checking can be multibyte. */
2379
2380 case OP_NOT:
2381 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2382 ecode++;
2383 GETCHARINCTEST(c, eptr);
2384 if ((ims & PCRE_CASELESS) != 0)
2385 {
2386 #ifdef SUPPORT_UTF8
2387 if (c < 256)
2388 #endif
2389 c = md->lcc[c];
2390 if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2391 }
2392 else
2393 {
2394 if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2395 }
2396 break;
2397
2398 /* Match a negated single one-byte character repeatedly. This is almost a
2399 repeat of the code for a repeated single character, but I haven't found a
2400 nice way of commoning these up that doesn't require a test of the
2401 positive/negative option for each character match. Maybe that wouldn't add
2402 very much to the time taken, but character matching *is* what this is all
2403 about... */
2404
2405 case OP_NOTEXACT:
2406 min = max = GET2(ecode, 1);
2407 ecode += 3;
2408 goto REPEATNOTCHAR;
2409
2410 case OP_NOTUPTO:
2411 case OP_NOTMINUPTO:
2412 min = 0;
2413 max = GET2(ecode, 1);
2414 minimize = *ecode == OP_NOTMINUPTO;
2415 ecode += 3;
2416 goto REPEATNOTCHAR;
2417
2418 case OP_NOTPOSSTAR:
2419 possessive = TRUE;
2420 min = 0;
2421 max = INT_MAX;
2422 ecode++;
2423 goto REPEATNOTCHAR;
2424
2425 case OP_NOTPOSPLUS:
2426 possessive = TRUE;
2427 min = 1;
2428 max = INT_MAX;
2429 ecode++;
2430 goto REPEATNOTCHAR;
2431
2432 case OP_NOTPOSQUERY:
2433 possessive = TRUE;
2434 min = 0;
2435 max = 1;
2436 ecode++;
2437 goto REPEATNOTCHAR;
2438
2439 case OP_NOTPOSUPTO:
2440 possessive = TRUE;
2441 min = 0;
2442 max = GET2(ecode, 1);
2443 ecode += 3;
2444 goto REPEATNOTCHAR;
2445
2446 case OP_NOTSTAR:
2447 case OP_NOTMINSTAR:
2448 case OP_NOTPLUS:
2449 case OP_NOTMINPLUS:
2450 case OP_NOTQUERY:
2451 case OP_NOTMINQUERY:
2452 c = *ecode++ - OP_NOTSTAR;
2453 minimize = (c & 1) != 0;
2454 min = rep_min[c]; /* Pick up values from tables; */
2455 max = rep_max[c]; /* zero for max => infinity */
2456 if (max == 0) max = INT_MAX;
2457
2458 /* Common code for all repeated single-byte matches. We can give up quickly
2459 if there are fewer than the minimum number of bytes left in the
2460 subject. */
2461
2462 REPEATNOTCHAR:
2463 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2464 fc = *ecode++;
2465
2466 /* The code is duplicated for the caseless and caseful cases, for speed,
2467 since matching characters is likely to be quite common. First, ensure the
2468 minimum number of matches are present. If min = max, continue at the same
2469 level without recursing. Otherwise, if minimizing, keep trying the rest of
2470 the expression and advancing one matching character if failing, up to the
2471 maximum. Alternatively, if maximizing, find the maximum number of
2472 characters and work backwards. */
2473
2474 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2475 max, eptr));
2476
2477 if ((ims & PCRE_CASELESS) != 0)
2478 {
2479 fc = md->lcc[fc];
2480
2481 #ifdef SUPPORT_UTF8
2482 /* UTF-8 mode */
2483 if (utf8)
2484 {
2485 register unsigned int d;
2486 for (i = 1; i <= min; i++)
2487 {
2488 GETCHARINC(d, eptr);
2489 if (d < 256) d = md->lcc[d];
2490 if (fc == d) RRETURN(MATCH_NOMATCH);
2491 }
2492 }
2493 else
2494 #endif
2495
2496 /* Not UTF-8 mode */
2497 {
2498 for (i = 1; i <= min; i++)
2499 if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2500 }
2501
2502 if (min == max) continue;
2503
2504 if (minimize)
2505 {
2506 #ifdef SUPPORT_UTF8
2507 /* UTF-8 mode */
2508 if (utf8)
2509 {
2510 register unsigned int d;
2511 for (fi = min;; fi++)
2512 {
2513 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2514 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2515 GETCHARINC(d, eptr);
2516 if (d < 256) d = md->lcc[d];
2517 if (fi >= max || eptr >= md->end_subject || fc == d)
2518 RRETURN(MATCH_NOMATCH);
2519 }
2520 }
2521 else
2522 #endif
2523 /* Not UTF-8 mode */
2524 {
2525 for (fi = min;; fi++)
2526 {
2527 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2528 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2529 if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2530 RRETURN(MATCH_NOMATCH);
2531 }
2532 }
2533 /* Control never gets here */
2534 }
2535
2536 /* Maximize case */
2537
2538 else
2539 {
2540 pp = eptr;
2541
2542 #ifdef SUPPORT_UTF8
2543 /* UTF-8 mode */
2544 if (utf8)
2545 {
2546 register unsigned int d;
2547 for (i = min; i < max; i++)
2548 {
2549 int len = 1;
2550 if (eptr >= md->end_subject) break;
2551 GETCHARLEN(d, eptr, len);
2552 if (d < 256) d = md->lcc[d];
2553 if (fc == d) break;
2554 eptr += len;
2555 }
2556 if (possessive) continue;
2557 for(;;)
2558 {
2559 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2560 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2561 if (eptr-- == pp) break; /* Stop if tried at original pos */
2562 BACKCHAR(eptr);
2563 }
2564 }
2565 else
2566 #endif
2567 /* Not UTF-8 mode */
2568 {
2569 for (i = min; i < max; i++)
2570 {
2571 if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2572 eptr++;
2573 }
2574 if (possessive) continue;
2575 while (eptr >= pp)
2576 {
2577 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2578 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2579 eptr--;
2580 }
2581 }
2582
2583 RRETURN(MATCH_NOMATCH);
2584 }
2585 /* Control never gets here */
2586 }
2587
2588 /* Caseful comparisons */
2589
2590 else
2591 {
2592 #ifdef SUPPORT_UTF8
2593 /* UTF-8 mode */
2594 if (utf8)
2595 {
2596 register unsigned int d;
2597 for (i = 1; i <= min; i++)
2598 {
2599 GETCHARINC(d, eptr);
2600 if (fc == d) RRETURN(MATCH_NOMATCH);
2601 }
2602 }
2603 else
2604 #endif
2605 /* Not UTF-8 mode */
2606 {
2607 for (i = 1; i <= min; i++)
2608 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2609 }
2610
2611 if (min == max) continue;
2612
2613 if (minimize)
2614 {
2615 #ifdef SUPPORT_UTF8
2616 /* UTF-8 mode */
2617 if (utf8)
2618 {
2619 register unsigned int d;
2620 for (fi = min;; fi++)
2621 {
2622 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2623 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2624 GETCHARINC(d, eptr);
2625 if (fi >= max || eptr >= md->end_subject || fc == d)
2626 RRETURN(MATCH_NOMATCH);
2627 }
2628 }
2629 else
2630 #endif
2631 /* Not UTF-8 mode */
2632 {
2633 for (fi = min;; fi++)
2634 {
2635 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2636 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2637 if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2638 RRETURN(MATCH_NOMATCH);
2639 }
2640 }
2641 /* Control never gets here */
2642 }
2643
2644 /* Maximize case */
2645
2646 else
2647 {
2648 pp = eptr;
2649
2650 #ifdef SUPPORT_UTF8
2651 /* UTF-8 mode */
2652 if (utf8)
2653 {
2654 register unsigned int d;
2655 for (i = min; i < max; i++)
2656 {
2657 int len = 1;
2658 if (eptr >= md->end_subject) break;
2659 GETCHARLEN(d, eptr, len);
2660 if (fc == d) break;
2661 eptr += len;
2662 }
2663 if (possessive) continue;
2664 for(;;)
2665 {
2666 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2667 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2668 if (eptr-- == pp) break; /* Stop if tried at original pos */
2669 BACKCHAR(eptr);
2670 }
2671 }
2672 else
2673 #endif
2674 /* Not UTF-8 mode */
2675 {
2676 for (i = min; i < max; i++)
2677 {
2678 if (eptr >= md->end_subject || fc == *eptr) break;
2679 eptr++;
2680 }
2681 if (possessive) continue;
2682 while (eptr >= pp)
2683 {
2684 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2685 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2686 eptr--;
2687 }
2688 }
2689
2690 RRETURN(MATCH_NOMATCH);
2691 }
2692 }
2693 /* Control never gets here */
2694
2695 /* Match a single character type repeatedly; several different opcodes
2696 share code. This is very similar to the code for single characters, but we
2697 repeat it in the interests of efficiency. */
2698
2699 case OP_TYPEEXACT:
2700 min = max = GET2(ecode, 1);
2701 minimize = TRUE;
2702 ecode += 3;
2703 goto REPEATTYPE;
2704
2705 case OP_TYPEUPTO:
2706 case OP_TYPEMINUPTO:
2707 min = 0;
2708 max = GET2(ecode, 1);
2709 minimize = *ecode == OP_TYPEMINUPTO;
2710 ecode += 3;
2711 goto REPEATTYPE;
2712
2713 case OP_TYPEPOSSTAR:
2714 possessive = TRUE;
2715 min = 0;
2716 max = INT_MAX;
2717 ecode++;
2718 goto REPEATTYPE;
2719
2720 case OP_TYPEPOSPLUS:
2721 possessive = TRUE;
2722 min = 1;
2723 max = INT_MAX;
2724 ecode++;
2725 goto REPEATTYPE;
2726
2727 case OP_TYPEPOSQUERY:
2728 possessive = TRUE;
2729 min = 0;
2730 max = 1;
2731 ecode++;
2732 goto REPEATTYPE;
2733
2734 case OP_TYPEPOSUPTO:
2735 possessive = TRUE;
2736 min = 0;
2737 max = GET2(ecode, 1);
2738 ecode += 3;
2739 goto REPEATTYPE;
2740
2741 case OP_TYPESTAR:
2742 case OP_TYPEMINSTAR:
2743 case OP_TYPEPLUS:
2744 case OP_TYPEMINPLUS:
2745 case OP_TYPEQUERY:
2746 case OP_TYPEMINQUERY:
2747 c = *ecode++ - OP_TYPESTAR;
2748 minimize = (c & 1) != 0;
2749 min = rep_min[c]; /* Pick up values from tables; */
2750 max = rep_max[c]; /* zero for max => infinity */
2751 if (max == 0) max = INT_MAX;
2752
2753 /* Common code for all repeated single character type matches. Note that
2754 in UTF-8 mode, '.' matches a character of any length, but for the other
2755 character types, the valid characters are all one-byte long. */
2756
2757 REPEATTYPE:
2758 ctype = *ecode++; /* Code for the character type */
2759
2760 #ifdef SUPPORT_UCP
2761 if (ctype == OP_PROP || ctype == OP_NOTPROP)
2762 {
2763 prop_fail_result = ctype == OP_NOTPROP;
2764 prop_type = *ecode++;
2765 prop_value = *ecode++;
2766 }
2767 else prop_type = -1;
2768 #endif
2769
2770 /* First, ensure the minimum number of matches are present. Use inline
2771 code for maximizing the speed, and do the type test once at the start
2772 (i.e. keep it out of the loop). Also we can test that there are at least
2773 the minimum number of bytes before we start. This isn't as effective in
2774 UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
2775 is tidier. Also separate the UCP code, which can be the same for both UTF-8
2776 and single-bytes. */
2777
2778 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2779 if (min > 0)
2780 {
2781 #ifdef SUPPORT_UCP
2782 if (prop_type >= 0)
2783 {
2784 switch(prop_type)
2785 {
2786 case PT_ANY:
2787 if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2788 for (i = 1; i <= min; i++)
2789 {
2790 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2791 GETCHARINCTEST(c, eptr);
2792 }
2793 break;
2794
2795 case PT_LAMP:
2796 for (i = 1; i <= min; i++)
2797 {
2798 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2799 GETCHARINCTEST(c, eptr);
2800 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2801 if ((prop_chartype == ucp_Lu ||
2802 prop_chartype == ucp_Ll ||
2803 prop_chartype == ucp_Lt) == prop_fail_result)
2804 RRETURN(MATCH_NOMATCH);
2805 }
2806 break;
2807
2808 case PT_GC:
2809 for (i = 1; i <= min; i++)
2810 {
2811 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2812 GETCHARINCTEST(c, eptr);
2813 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2814 if ((prop_category == prop_value) == prop_fail_result)
2815 RRETURN(MATCH_NOMATCH);
2816 }
2817 break;
2818
2819 case PT_PC:
2820 for (i = 1; i <= min; i++)
2821 {
2822 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2823 GETCHARINCTEST(c, eptr);
2824 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2825 if ((prop_chartype == prop_value) == prop_fail_result)
2826 RRETURN(MATCH_NOMATCH);
2827 }
2828 break;
2829
2830 case PT_SC:
2831 for (i = 1; i <= min; i++)
2832 {
2833 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2834 GETCHARINCTEST(c, eptr);
2835 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2836 if ((prop_script == prop_value) == prop_fail_result)
2837 RRETURN(MATCH_NOMATCH);
2838 }
2839 break;
2840
2841 default:
2842 RRETURN(PCRE_ERROR_INTERNAL);
2843 }
2844 }
2845
2846 /* Match extended Unicode sequences. We will get here only if the
2847 support is in the binary; otherwise a compile-time error occurs. */
2848
2849 else if (ctype == OP_EXTUNI)
2850 {
2851 for (i = 1; i <= min; i++)
2852 {
2853 GETCHARINCTEST(c, eptr);
2854 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2855 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2856 while (eptr < md->end_subject)
2857 {
2858 int len = 1;
2859 if (!utf8) c = *eptr; else
2860 {
2861 GETCHARLEN(c, eptr, len);
2862 }
2863 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2864 if (prop_category != ucp_M) break;
2865 eptr += len;
2866 }
2867 }
2868 }
2869
2870 else
2871 #endif /* SUPPORT_UCP */
2872
2873 /* Handle all other cases when the coding is UTF-8 */
2874
2875 #ifdef SUPPORT_UTF8
2876 if (utf8) switch(ctype)
2877 {
2878 case OP_ANY:
2879 for (i = 1; i <= min; i++)
2880 {
2881 if (eptr >= md->end_subject ||
2882 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2883 RRETURN(MATCH_NOMATCH);
2884 eptr++;
2885 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2886 }
2887 break;
2888
2889 case OP_ANYBYTE:
2890 eptr += min;
2891 break;
2892
2893 case OP_ANYNL:
2894 for (i = 1; i <= min; i++)
2895 {
2896 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2897 GETCHARINC(c, eptr);
2898 switch(c)
2899 {
2900 default: RRETURN(MATCH_NOMATCH);
2901 case 0x000d:
2902 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2903 break;
2904 case 0x000a:
2905 case 0x000b:
2906 case 0x000c:
2907 case 0x0085:
2908 case 0x2028:
2909 case 0x2029:
2910 break;
2911 }
2912 }
2913 break;
2914
2915 case OP_NOT_HSPACE:
2916 for (i = 1; i <= min; i++)
2917 {
2918 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2919 GETCHARINC(c, eptr);
2920 switch(c)
2921 {
2922 default: break;
2923 case 0x09: /* HT */
2924 case 0x20: /* SPACE */
2925 case 0xa0: /* NBSP */
2926 case 0x1680: /* OGHAM SPACE MARK */
2927 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2928 case 0x2000: /* EN QUAD */
2929 case 0x2001: /* EM QUAD */
2930 case 0x2002: /* EN SPACE */
2931 case 0x2003: /* EM SPACE */
2932 case 0x2004: /* THREE-PER-EM SPACE */
2933 case 0x2005: /* FOUR-PER-EM SPACE */
2934 case 0x2006: /* SIX-PER-EM SPACE */
2935 case 0x2007: /* FIGURE SPACE */
2936 case 0x2008: /* PUNCTUATION SPACE */
2937 case 0x2009: /* THIN SPACE */
2938 case 0x200A: /* HAIR SPACE */
2939 case 0x202f: /* NARROW NO-BREAK SPACE */
2940 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2941 case 0x3000: /* IDEOGRAPHIC SPACE */
2942 RRETURN(MATCH_NOMATCH);
2943 }
2944 }
2945 break;
2946
2947 case OP_HSPACE:
2948 for (i = 1; i <= min; i++)
2949 {
2950 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2951 GETCHARINC(c, eptr);
2952 switch(c)
2953 {
2954 default: RRETURN(MATCH_NOMATCH);
2955 case 0x09: /* HT */
2956 case 0x20: /* SPACE */
2957 case 0xa0: /* NBSP */
2958 case 0x1680: /* OGHAM SPACE MARK */
2959 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2960 case 0x2000: /* EN QUAD */
2961 case 0x2001: /* EM QUAD */
2962 case 0x2002: /* EN SPACE */
2963 case 0x2003: /* EM SPACE */
2964 case 0x2004: /* THREE-PER-EM SPACE */
2965 case 0x2005: /* FOUR-PER-EM SPACE */
2966 case 0x2006: /* SIX-PER-EM SPACE */
2967 case 0x2007: /* FIGURE SPACE */
2968 case 0x2008: /* PUNCTUATION SPACE */
2969 case 0x2009: /* THIN SPACE */
2970 case 0x200A: /* HAIR SPACE */
2971 case 0x202f: /* NARROW NO-BREAK SPACE */
2972 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2973 case 0x3000: /* IDEOGRAPHIC SPACE */
2974 break;
2975 }
2976 }
2977 break;
2978
2979 case OP_NOT_VSPACE:
2980 for (i = 1; i <= min; i++)
2981 {
2982 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2983 GETCHARINC(c, eptr);
2984 switch(c)
2985 {
2986 default: break;
2987 case 0x0a: /* LF */
2988 case 0x0b: /* VT */
2989 case 0x0c: /* FF */
2990 case 0x0d: /* CR */
2991 case 0x85: /* NEL */
2992 case 0x2028: /* LINE SEPARATOR */
2993 case 0x2029: /* PARAGRAPH SEPARATOR */
2994 RRETURN(MATCH_NOMATCH);
2995 }
2996 }
2997 break;
2998
2999 case OP_VSPACE:
3000 for (i = 1; i <= min; i++)
3001 {
3002 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3003 GETCHARINC(c, eptr);
3004 switch(c)
3005 {
3006 default: RRETURN(MATCH_NOMATCH);
3007 case 0x0a: /* LF */
3008 case 0x0b: /* VT */
3009 case 0x0c: /* FF */
3010 case 0x0d: /* CR */
3011 case 0x85: /* NEL */
3012 case 0x2028: /* LINE SEPARATOR */
3013 case 0x2029: /* PARAGRAPH SEPARATOR */
3014 break;
3015 }
3016 }
3017 break;
3018
3019 case OP_NOT_DIGIT:
3020 for (i = 1; i <= min; i++)
3021 {
3022 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3023 GETCHARINC(c, eptr);
3024 if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3025 RRETURN(MATCH_NOMATCH);
3026 }
3027 break;
3028
3029 case OP_DIGIT:
3030 for (i = 1; i <= min; i++)
3031 {
3032 if (eptr >= md->end_subject ||
3033 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3034 RRETURN(MATCH_NOMATCH);
3035 /* No need to skip more bytes - we know it's a 1-byte character */
3036 }
3037 break;
3038
3039 case OP_NOT_WHITESPACE:
3040 for (i = 1; i <= min; i++)
3041 {
3042 if (eptr >= md->end_subject ||
3043 (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))
3044 RRETURN(MATCH_NOMATCH);
3045 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3046 }
3047 break;
3048
3049 case OP_WHITESPACE:
3050 for (i = 1; i <= min; i++)
3051 {
3052 if (eptr >= md->end_subject ||
3053 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3054 RRETURN(MATCH_NOMATCH);
3055 /* No need to skip more bytes - we know it's a 1-byte character */
3056 }
3057 break;
3058
3059 case OP_NOT_WORDCHAR:
3060 for (i = 1; i <= min; i++)
3061 {
3062 if (eptr >= md->end_subject ||
3063 (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))
3064 RRETURN(MATCH_NOMATCH);
3065 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3066 }
3067 break;
3068
3069 case OP_WORDCHAR:
3070 for (i = 1; i <= min; i++)
3071 {
3072 if (eptr >= md->end_subject ||
3073 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3074 RRETURN(MATCH_NOMATCH);
3075 /* No need to skip more bytes - we know it's a 1-byte character */
3076 }
3077 break;
3078
3079 default:
3080 RRETURN(PCRE_ERROR_INTERNAL);
3081 } /* End switch(ctype) */
3082
3083 else
3084 #endif /* SUPPORT_UTF8 */
3085
3086 /* Code for the non-UTF-8 case for minimum matching of operators other
3087 than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
3088 number of bytes present, as this was tested above. */
3089
3090 switch(ctype)
3091 {
3092 case OP_ANY:
3093 if ((ims & PCRE_DOTALL) == 0)
3094 {
3095 for (i = 1; i <= min; i++)
3096 {
3097 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3098 eptr++;
3099 }
3100 }
3101 else eptr += min;
3102 break;
3103
3104 case OP_ANYBYTE:
3105 eptr += min;
3106 break;
3107
3108 /* Because of the CRLF case, we can't assume the minimum number of
3109 bytes are present in this case. */
3110
3111 case OP_ANYNL:
3112 for (i = 1; i <= min; i++)
3113 {
3114 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3115 switch(*eptr++)
3116 {
3117 default: RRETURN(MATCH_NOMATCH);
3118 case 0x000d:
3119 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3120 break;
3121 case 0x000a:
3122 case 0x000b:
3123 case 0x000c:
3124 case 0x0085:
3125 break;
3126 }
3127 }
3128 break;
3129
3130 case OP_NOT_HSPACE:
3131 for (i = 1; i <= min; i++)
3132 {
3133 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3134 switch(*eptr++)
3135 {
3136 default: break;
3137 case 0x09: /* HT */
3138 case 0x20: /* SPACE */
3139 case 0xa0: /* NBSP */
3140 RRETURN(MATCH_NOMATCH);
3141 }
3142 }
3143 break;
3144
3145 case OP_HSPACE:
3146 for (i = 1; i <= min; i++)
3147 {
3148 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3149 switch(*eptr++)
3150 {
3151 default: RRETURN(MATCH_NOMATCH);
3152 case 0x09: /* HT */
3153 case 0x20: /* SPACE */
3154 case 0xa0: /* NBSP */
3155 break;
3156 }
3157 }
3158 break;
3159
3160 case OP_NOT_VSPACE:
3161 for (i = 1; i <= min; i++)
3162 {
3163 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3164 switch(*eptr++)
3165 {
3166 default: break;
3167 case 0x0a: /* LF */
3168 case 0x0b: /* VT */
3169 case 0x0c: /* FF */
3170 case 0x0d: /* CR */
3171 case 0x85: /* NEL */
3172 RRETURN(MATCH_NOMATCH);
3173 }
3174 }
3175 break;
3176
3177 case OP_VSPACE:
3178 for (i = 1; i <= min; i++)
3179 {
3180 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3181 switch(*eptr++)
3182 {
3183 default: RRETURN(MATCH_NOMATCH);
3184 case 0x0a: /* LF */
3185 case 0x0b: /* VT */
3186 case 0x0c: /* FF */
3187 case 0x0d: /* CR */
3188 case 0x85: /* NEL */
3189 break;
3190 }
3191 }
3192 break;
3193
3194 case OP_NOT_DIGIT:
3195 for (i = 1; i <= min; i++)
3196 if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3197 break;
3198
3199 case OP_DIGIT:
3200 for (i = 1; i <= min; i++)
3201 if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3202 break;
3203
3204 case OP_NOT_WHITESPACE:
3205 for (i = 1; i <= min; i++)
3206 if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3207 break;
3208
3209 case OP_WHITESPACE:
3210 for (i = 1; i <= min; i++)
3211 if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3212 break;
3213
3214 case OP_NOT_WORDCHAR:
3215 for (i = 1; i <= min; i++)
3216 if ((md->ctypes[*eptr++] & ctype_word) != 0)
3217 RRETURN(MATCH_NOMATCH);
3218 break;
3219
3220 case OP_WORDCHAR:
3221 for (i = 1; i <= min; i++)
3222 if ((md->ctypes[*eptr++] & ctype_word) == 0)
3223 RRETURN(MATCH_NOMATCH);
3224 break;
3225
3226 default:
3227 RRETURN(PCRE_ERROR_INTERNAL);
3228 }
3229 }
3230
3231 /* If min = max, continue at the same level without recursing */
3232
3233 if (min == max) continue;
3234
3235 /* If minimizing, we have to test the rest of the pattern before each
3236 subsequent match. Again, separate the UTF-8 case for speed, and also
3237 separate the UCP cases. */
3238
3239 if (minimize)
3240 {
3241 #ifdef SUPPORT_UCP
3242 if (prop_type >= 0)
3243 {
3244 switch(prop_type)
3245 {
3246 case PT_ANY:
3247 for (fi = min;; fi++)
3248 {
3249 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3250 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3251 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3252 GETCHARINC(c, eptr);
3253 if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3254 }
3255 /* Control never gets here */
3256
3257 case PT_LAMP:
3258 for (fi = min;; fi++)
3259 {
3260 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3261 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3262 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3263 GETCHARINC(c, eptr);
3264 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3265 if ((prop_chartype == ucp_Lu ||
3266 prop_chartype == ucp_Ll ||
3267 prop_chartype == ucp_Lt) == prop_fail_result)
3268 RRETURN(MATCH_NOMATCH);
3269 }
3270 /* Control never gets here */
3271
3272 case PT_GC:
3273 for (fi = min;; fi++)
3274 {
3275 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3276 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3277 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3278 GETCHARINC(c, eptr);
3279 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3280 if ((prop_category == prop_value) == prop_fail_result)
3281 RRETURN(MATCH_NOMATCH);
3282 }
3283 /* Control never gets here */
3284
3285 case PT_PC:
3286 for (fi = min;; fi++)
3287 {
3288 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3289 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3290 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3291 GETCHARINC(c, eptr);
3292 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3293 if ((prop_chartype == prop_value) == prop_fail_result)
3294 RRETURN(MATCH_NOMATCH);
3295 }
3296 /* Control never gets here */
3297
3298 case PT_SC:
3299 for (fi = min;; fi++)
3300 {
3301 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3302 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3303 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3304 GETCHARINC(c, eptr);
3305 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3306 if ((prop_script == prop_value) == prop_fail_result)
3307 RRETURN(MATCH_NOMATCH);
3308 }
3309 /* Control never gets here */
3310
3311 default:
3312 RRETURN(PCRE_ERROR_INTERNAL);
3313 }
3314 }
3315
3316 /* Match extended Unicode sequences. We will get here only if the
3317 support is in the binary; otherwise a compile-time error occurs. */
3318
3319 else if (ctype == OP_EXTUNI)
3320 {
3321 for (fi = min;; fi++)
3322 {
3323 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3324 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3325 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3326 GETCHARINCTEST(c, eptr);
3327 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3328 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3329 while (eptr < md->end_subject)
3330 {
3331 int len = 1;
3332 if (!utf8) c = *eptr; else
3333 {
3334 GETCHARLEN(c, eptr, len);
3335 }
3336 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3337 if (prop_category != ucp_M) break;
3338 eptr += len;
3339 }
3340 }
3341 }
3342
3343 else
3344 #endif /* SUPPORT_UCP */
3345
3346 #ifdef SUPPORT_UTF8
3347 /* UTF-8 mode */
3348 if (utf8)
3349 {
3350 for (fi = min;; fi++)
3351 {
3352 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3353 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3354 if (fi >= max || eptr >= md->end_subject ||
3355 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3356 IS_NEWLINE(eptr)))
3357 RRETURN(MATCH_NOMATCH);
3358
3359 GETCHARINC(c, eptr);
3360 switch(ctype)
3361 {
3362 case OP_ANY: /* This is the DOTALL case */
3363 break;
3364
3365 case OP_ANYBYTE:
3366 break;
3367
3368 case OP_ANYNL:
3369 switch(c)
3370 {
3371 default: RRETURN(MATCH_NOMATCH);
3372 case 0x000d:
3373 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3374 break;
3375 case 0x000a:
3376 case 0x000b:
3377 case 0x000c:
3378 case 0x0085:
3379 case 0x2028:
3380 case 0x2029:
3381 break;
3382 }
3383 break;
3384
3385 case OP_NOT_HSPACE:
3386 switch(c)
3387 {
3388 default: break;
3389 case 0x09: /* HT */
3390 case 0x20: /* SPACE */
3391 case 0xa0: /* NBSP */
3392 case 0x1680: /* OGHAM SPACE MARK */
3393 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3394 case 0x2000: /* EN QUAD */
3395 case 0x2001: /* EM QUAD */
3396 case 0x2002: /* EN SPACE */
3397 case 0x2003: /* EM SPACE */
3398 case 0x2004: /* THREE-PER-EM SPACE */
3399 case 0x2005: /* FOUR-PER-EM SPACE */
3400 case 0x2006: /* SIX-PER-EM SPACE */
3401 case 0x2007: /* FIGURE SPACE */
3402 case 0x2008: /* PUNCTUATION SPACE */
3403 case 0x2009: /* THIN SPACE */
3404 case 0x200A: /* HAIR SPACE */
3405 case 0x202f: /* NARROW NO-BREAK SPACE */
3406 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3407 case 0x3000: /* IDEOGRAPHIC SPACE */
3408 RRETURN(MATCH_NOMATCH);
3409 }
3410 break;
3411
3412 case OP_HSPACE:
3413 switch(c)
3414 {
3415 default: RRETURN(MATCH_NOMATCH);
3416 case 0x09: /* HT */
3417 case 0x20: /* SPACE */
3418 case 0xa0: /* NBSP */
3419 case 0x1680: /* OGHAM SPACE MARK */
3420 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3421 case 0x2000: /* EN QUAD */
3422 case 0x2001: /* EM QUAD */
3423 case 0x2002: /* EN SPACE */
3424 case 0x2003: /* EM SPACE */
3425 case 0x2004: /* THREE-PER-EM SPACE */
3426 case 0x2005: /* FOUR-PER-EM SPACE */
3427 case 0x2006: /* SIX-PER-EM SPACE */
3428 case 0x2007: /* FIGURE SPACE */
3429 case 0x2008: /* PUNCTUATION SPACE */
3430 case 0x2009: /* THIN SPACE */
3431 case 0x200A: /* HAIR SPACE */
3432 case 0x202f: /* NARROW NO-BREAK SPACE */
3433 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3434 case 0x3000: /* IDEOGRAPHIC SPACE */
3435 break;
3436 }
3437 break;
3438
3439 case OP_NOT_VSPACE:
3440 switch(c)
3441 {
3442 default: break;
3443 case 0x0a: /* LF */
3444 case 0x0b: /* VT */
3445 case 0x0c: /* FF */
3446 case 0x0d: /* CR */
3447 case 0x85: /* NEL */
3448 case 0x2028: /* LINE SEPARATOR */
3449 case 0x2029: /* PARAGRAPH SEPARATOR */
3450 RRETURN(MATCH_NOMATCH);
3451 }
3452 break;
3453
3454 case OP_VSPACE:
3455 switch(c)
3456 {
3457 default: RRETURN(MATCH_NOMATCH);
3458 case 0x0a: /* LF */
3459 case 0x0b: /* VT */
3460 case 0x0c: /* FF */
3461 case 0x0d: /* CR */
3462 case 0x85: /* NEL */
3463 case 0x2028: /* LINE SEPARATOR */
3464 case 0x2029: /* PARAGRAPH SEPARATOR */
3465 break;
3466 }
3467 break;
3468
3469 case OP_NOT_DIGIT:
3470 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3471 RRETURN(MATCH_NOMATCH);
3472 break;
3473
3474 case OP_DIGIT:
3475 if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
3476 RRETURN(MATCH_NOMATCH);
3477 break;
3478
3479 case OP_NOT_WHITESPACE:
3480 if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
3481 RRETURN(MATCH_NOMATCH);
3482 break;
3483
3484 case OP_WHITESPACE:
3485 if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
3486 RRETURN(MATCH_NOMATCH);
3487 break;
3488
3489 case OP_NOT_WORDCHAR:
3490 if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
3491 RRETURN(MATCH_NOMATCH);
3492 break;
3493
3494 case OP_WORDCHAR:
3495 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
3496 RRETURN(MATCH_NOMATCH);
3497 break;
3498
3499 default:
3500 RRETURN(PCRE_ERROR_INTERNAL);
3501 }
3502 }
3503 }
3504 else
3505 #endif
3506 /* Not UTF-8 mode */
3507 {
3508 for (fi = min;; fi++)
3509 {
3510 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3511 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3512 if (fi >= max || eptr >= md->end_subject ||
3513 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3514 RRETURN(MATCH_NOMATCH);
3515
3516 c = *eptr++;
3517 switch(ctype)
3518 {
3519 case OP_ANY: /* This is the DOTALL case */
3520 break;
3521
3522 case OP_ANYBYTE:
3523 break;
3524
3525 case OP_ANYNL:
3526 switch(c)
3527 {
3528 default: RRETURN(MATCH_NOMATCH);
3529 case 0x000d:
3530 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3531 break;
3532 case 0x000a:
3533 case 0x000b:
3534 case 0x000c:
3535 case 0x0085:
3536 break;
3537 }
3538 break;
3539
3540 case OP_NOT_HSPACE:
3541 switch(c)
3542 {
3543 default: break;
3544 case 0x09: /* HT */
3545 case 0x20: /* SPACE */
3546 case 0xa0: /* NBSP */
3547 RRETURN(MATCH_NOMATCH);
3548 }
3549 break;
3550
3551 case OP_HSPACE:
3552 switch(c)
3553 {
3554 default: RRETURN(MATCH_NOMATCH);
3555 case 0x09: /* HT */
3556 case 0x20: /* SPACE */
3557 case 0xa0: /* NBSP */
3558 break;
3559 }
3560 break;
3561
3562 case OP_NOT_VSPACE:
3563 switch(c)
3564 {
3565 default: break;
3566 case 0x0a: /* LF */
3567 case 0x0b: /* VT */
3568 case 0x0c: /* FF */
3569 case 0x0d: /* CR */
3570 case 0x85: /* NEL */
3571 RRETURN(MATCH_NOMATCH);
3572 }
3573 break;
3574
3575 case OP_VSPACE:
3576 switch(c)
3577 {
3578 default: RRETURN(MATCH_NOMATCH);
3579 case 0x0a: /* LF */
3580 case 0x0b: /* VT */
3581 case 0x0c: /* FF */
3582 case 0x0d: /* CR */
3583 case 0x85: /* NEL */
3584 break;
3585 }
3586 break;
3587
3588 case OP_NOT_DIGIT:
3589 if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3590 break;
3591
3592 case OP_DIGIT:
3593 if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3594 break;
3595
3596 case OP_NOT_WHITESPACE:
3597 if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3598 break;
3599
3600 case OP_WHITESPACE:
3601 if ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3602 break;
3603
3604 case OP_NOT_WORDCHAR:
3605 if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
3606 break;
3607
3608 case OP_WORDCHAR:
3609 if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
3610 break;
3611
3612 default:
3613 RRETURN(PCRE_ERROR_INTERNAL);
3614 }
3615 }
3616 }
3617 /* Control never gets here */
3618 }
3619
3620 /* If maximizing, it is worth using inline code for speed, doing the type
3621 test once at the start (i.e. keep it out of the loop). Again, keep the
3622 UTF-8 and UCP stuff separate. */
3623
3624 else
3625 {
3626 pp = eptr; /* Remember where we started */
3627
3628 #ifdef SUPPORT_UCP
3629 if (prop_type >= 0)
3630 {
3631 switch(prop_type)
3632 {
3633 case PT_ANY:
3634 for (i = min; i < max; i++)
3635 {
3636 int len = 1;
3637 if (eptr >= md->end_subject) break;
3638 GETCHARLEN(c, eptr, len);
3639 if (prop_fail_result) break;
3640 eptr+= len;
3641 }
3642 break;
3643
3644 case PT_LAMP:
3645 for (i = min; i < max; i++)
3646 {
3647 int len = 1;
3648 if (eptr >= md->end_subject) break;
3649 GETCHARLEN(c, eptr, len);
3650 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3651 if ((prop_chartype == ucp_Lu ||
3652 prop_chartype == ucp_Ll ||
3653 prop_chartype == ucp_Lt) == prop_fail_result)
3654 break;
3655 eptr+= len;
3656 }
3657 break;
3658
3659 case PT_GC:
3660 for (i = min; i < max; i++)
3661 {
3662 int len = 1;
3663 if (eptr >= md->end_subject) break;
3664 GETCHARLEN(c, eptr, len);
3665 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3666 if ((prop_category == prop_value) == prop_fail_result)
3667 break;
3668 eptr+= len;
3669 }
3670 break;
3671
3672 case PT_PC:
3673 for (i = min; i < max; i++)
3674 {
3675 int len = 1;
3676 if (eptr >= md->end_subject) break;
3677 GETCHARLEN(c, eptr, len);
3678 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3679 if ((prop_chartype == prop_value) == prop_fail_result)
3680 break;
3681 eptr+= len;
3682 }
3683 break;
3684
3685 case PT_SC:
3686 for (i = min; i < max; i++)
3687 {
3688 int len = 1;
3689 if (eptr >= md->end_subject) break;
3690 GETCHARLEN(c, eptr, len);
3691 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3692 if ((prop_script == prop_value) == prop_fail_result)
3693 break;
3694 eptr+= len;
3695 }
3696 break;
3697 }
3698
3699 /* eptr is now past the end of the maximum run */
3700
3701 if (possessive) continue;
3702 for(;;)
3703 {
3704 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3705 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3706 if (eptr-- == pp) break; /* Stop if tried at original pos */
3707 BACKCHAR(eptr);
3708 }
3709 }
3710
3711 /* Match extended Unicode sequences. We will get here only if the
3712 support is in the binary; otherwise a compile-time error occurs. */
3713
3714 else if (ctype == OP_EXTUNI)
3715 {
3716 for (i = min; i < max; i++)
3717 {
3718 if (eptr >= md->end_subject) break;
3719 GETCHARINCTEST(c, eptr);
3720 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3721 if (prop_category == ucp_M) break;
3722 while (eptr < md->end_subject)
3723 {
3724 int len = 1;
3725 if (!utf8) c = *eptr; else
3726 {
3727 GETCHARLEN(c, eptr, len);
3728 }
3729 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3730 if (prop_category != ucp_M) break;
3731 eptr += len;
3732 }
3733 }
3734
3735 /* eptr is now past the end of the maximum run */
3736
3737 if (possessive) continue;
3738 for(;;)
3739 {
3740 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3741 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3742 if (eptr-- == pp) break; /* Stop if tried at original pos */
3743 for (;;) /* Move back over one extended */
3744 {
3745 int len = 1;
3746 BACKCHAR(eptr);
3747 if (!utf8) c = *eptr; else
3748 {
3749 GETCHARLEN(c, eptr, len);
3750 }
3751 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3752 if (prop_category != ucp_M) break;
3753 eptr--;
3754 }
3755 }
3756 }
3757
3758 else
3759 #endif /* SUPPORT_UCP */
3760
3761 #ifdef SUPPORT_UTF8
3762 /* UTF-8 mode */
3763
3764 if (utf8)
3765 {
3766 switch(ctype)
3767 {
3768 case OP_ANY:
3769
3770 /* Special code is required for UTF8, but when the maximum is
3771 unlimited we don't need it, so we repeat the non-UTF8 code. This is
3772 probably worth it, because .* is quite a common idiom. */
3773
3774 if (max < INT_MAX)
3775 {
3776 if ((ims & PCRE_DOTALL) == 0)
3777 {
3778 for (i = min; i < max; i++)
3779 {
3780 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3781 eptr++;
3782 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3783 }
3784 }
3785 else
3786 {
3787 for (i = min; i < max; i++)
3788 {
3789 if (eptr >= md->end_subject) break;
3790 eptr++;
3791 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3792 }
3793 }
3794 }
3795
3796 /* Handle unlimited UTF-8 repeat */
3797
3798 else
3799 {
3800 if ((ims & PCRE_DOTALL) == 0)
3801 {
3802 for (i = min; i < max; i++)
3803 {
3804 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3805 eptr++;
3806 }
3807 break;
3808 }
3809 else
3810 {
3811 c = max - min;
3812 if (c > (unsigned int)(md->end_subject - eptr))
3813 c = md->end_subject - eptr;
3814 eptr += c;
3815 }
3816 }
3817 break;
3818
3819 /* The byte case is the same as non-UTF8 */
3820
3821 case OP_ANYBYTE:
3822 c = max - min;
3823 if (c > (unsigned int)(md->end_subject - eptr))
3824 c = md->end_subject - eptr;
3825 eptr += c;
3826 break;
3827
3828 case OP_ANYNL:
3829 for (i = min; i < max; i++)
3830 {
3831 int len = 1;
3832 if (eptr >= md->end_subject) break;
3833 GETCHARLEN(c, eptr, len);
3834 if (c == 0x000d)
3835 {
3836 if (++eptr >= md->end_subject) break;
3837 if (*eptr == 0x000a) eptr++;
3838 }
3839 else
3840 {
3841 if (c != 0x000a && c != 0x000b && c != 0x000c &&
3842 c != 0x0085 && c != 0x2028 && c != 0x2029)
3843 break;
3844 eptr += len;
3845 }
3846 }
3847 break;
3848
3849 case OP_NOT_HSPACE:
3850 case OP_HSPACE:
3851 for (i = min; i < max; i++)
3852 {
3853 BOOL gotspace;
3854 int len = 1;
3855 if (eptr >= md->end_subject) break;
3856 GETCHARLEN(c, eptr, len);
3857 switch(c)
3858 {
3859 default: gotspace = FALSE; break;
3860 case 0x09: /* HT */
3861 case 0x20: /* SPACE */
3862 case 0xa0: /* NBSP */
3863 case 0x1680: /* OGHAM SPACE MARK */
3864 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3865 case 0x2000: /* EN QUAD */
3866 case 0x2001: /* EM QUAD */
3867 case 0x2002: /* EN SPACE */
3868 case 0x2003: /* EM SPACE */
3869 case 0x2004: /* THREE-PER-EM SPACE */
3870 case 0x2005: /* FOUR-PER-EM SPACE */
3871 case 0x2006: /* SIX-PER-EM SPACE */
3872 case 0x2007: /* FIGURE SPACE */
3873 case 0x2008: /* PUNCTUATION SPACE */
3874 case 0x2009: /* THIN SPACE */
3875 case 0x200A: /* HAIR SPACE */
3876 case 0x202f: /* NARROW NO-BREAK SPACE */
3877 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3878 case 0x3000: /* IDEOGRAPHIC SPACE */
3879 gotspace = TRUE;
3880 break;
3881 }
3882 if (gotspace == (ctype == OP_NOT_HSPACE)) break;
3883 eptr += len;
3884 }
3885 break;
3886
3887 case OP_NOT_VSPACE:
3888 case OP_VSPACE:
3889 for (i = min; i < max; i++)
3890 {
3891 BOOL gotspace;
3892 int len = 1;
3893 if (eptr >= md->end_subject) break;
3894 GETCHARLEN(c, eptr, len);
3895 switch(c)
3896 {
3897 default: gotspace = FALSE; break;
3898 case 0x0a: /* LF */
3899 case 0x0b: /* VT */
3900 case 0x0c: /* FF */
3901 case 0x0d: /* CR */
3902 case 0x85: /* NEL */
3903 case 0x2028: /* LINE SEPARATOR */
3904 case 0x2029: /* PARAGRAPH SEPARATOR */
3905 gotspace = TRUE;
3906 break;
3907 }
3908 if (gotspace == (ctype == OP_NOT_VSPACE)) break;
3909 eptr += len;
3910 }
3911 break;
3912
3913 case OP_NOT_DIGIT:
3914 for (i = min; i < max; i++)
3915 {
3916 int len = 1;
3917 if (eptr >= md->end_subject) break;
3918 GETCHARLEN(c, eptr, len);
3919 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
3920 eptr+= len;
3921 }
3922 break;
3923
3924 case OP_DIGIT:
3925 for (i = min; i < max; i++)
3926 {
3927 int len = 1;
3928 if (eptr >= md->end_subject) break;
3929 GETCHARLEN(c, eptr, len);
3930 if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
3931 eptr+= len;
3932 }
3933 break;
3934
3935 case OP_NOT_WHITESPACE:
3936 for (i = min; i < max; i++)
3937 {
3938 int len = 1;
3939 if (eptr >= md->end_subject) break;
3940 GETCHARLEN(c, eptr, len);
3941 if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
3942 eptr+= len;
3943 }
3944 break;
3945
3946 case OP_WHITESPACE:
3947 for (i = min; i < max; i++)
3948 {
3949 int len = 1;
3950 if (eptr >= md->end_subject) break;
3951 GETCHARLEN(c, eptr, len);
3952 if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
3953 eptr+= len;
3954 }
3955 break;
3956
3957 case OP_NOT_WORDCHAR:
3958 for (i = min; i < max; i++)
3959 {
3960 int len = 1;
3961 if (eptr >= md->end_subject) break;
3962 GETCHARLEN(c, eptr, len);
3963 if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
3964 eptr+= len;
3965 }
3966 break;
3967
3968 case OP_WORDCHAR:
3969 for (i = min; i < max; i++)
3970 {
3971 int len = 1;
3972 if (eptr >= md->end_subject) break;
3973 GETCHARLEN(c, eptr, len);
3974 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
3975 eptr+= len;
3976 }
3977 break;
3978
3979 default:
3980 RRETURN(PCRE_ERROR_INTERNAL);
3981 }
3982
3983 /* eptr is now past the end of the maximum run */
3984
3985 if (possessive) continue;
3986 for(;;)
3987 {
3988 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
3989 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3990 if (eptr-- == pp) break; /* Stop if tried at original pos */
3991 BACKCHAR(eptr);
3992 }
3993 }
3994 else
3995 #endif
3996
3997 /* Not UTF-8 mode */
3998 {
3999 switch(ctype)
4000 {
4001 case OP_ANY:
4002 if ((ims & PCRE_DOTALL) == 0)
4003 {
4004 for (i = min; i < max; i++)
4005 {
4006 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4007 eptr++;
4008 }
4009 break;
4010 }
4011 /* For DOTALL case, fall through and treat as \C */
4012
4013 case OP_ANYBYTE:
4014 c = max - min;
4015 if (c > (unsigned int)(md->end_subject - eptr))
4016 c = md->end_subject - eptr;
4017 eptr += c;
4018 break;
4019
4020 case OP_ANYNL:
4021 for (i = min; i < max; i++)
4022 {
4023 if (eptr >= md->end_subject) break;
4024 c = *eptr;
4025 if (c == 0x000d)
4026 {
4027 if (++eptr >= md->end_subject) break;
4028 if (*eptr == 0x000a) eptr++;
4029 }
4030 else
4031 {
4032 if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
4033 break;
4034 eptr++;
4035 }
4036 }
4037 break;
4038
4039 case OP_NOT_HSPACE:
4040 for (i = min; i < max; i++)
4041 {
4042 if (eptr >= md->end_subject) break;
4043 c = *eptr;
4044 if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4045 eptr++;
4046 }
4047 break;
4048
4049 case OP_HSPACE:
4050 for (i = min; i < max; i++)
4051 {
4052 if (eptr >= md->end_subject) break;
4053 c = *eptr;
4054 if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4055 eptr++;
4056 }
4057 break;
4058
4059 case OP_NOT_VSPACE:
4060 for (i = min; i < max; i++)
4061 {
4062 if (eptr >= md->end_subject) break;
4063 c = *eptr;
4064 if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4065 break;
4066 eptr++;
4067 }
4068 break;
4069
4070 case OP_VSPACE:
4071 for (i = min; i < max; i++)
4072 {
4073 if (eptr >= md->end_subject) break;
4074 c = *eptr;
4075 if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4076 break;
4077 eptr++;
4078 }
4079 break;
4080
4081 case OP_NOT_DIGIT:
4082 for (i = min; i < max; i++)
4083 {
4084 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
4085 break;
4086 eptr++;
4087 }
4088 break;
4089
4090 case OP_DIGIT:
4091 for (i = min; i < max; i++)
4092 {
4093 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
4094 break;
4095 eptr++;
4096 }
4097 break;
4098
4099 case OP_NOT_WHITESPACE:
4100 for (i = min; i < max; i++)
4101 {
4102 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
4103 break;
4104 eptr++;
4105 }
4106 break;
4107
4108 case OP_WHITESPACE:
4109 for (i = min; i < max; i++)
4110 {
4111 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
4112 break;
4113 eptr++;
4114 }
4115 break;
4116
4117 case OP_NOT_WORDCHAR:
4118 for (i = min; i < max; i++)
4119 {
4120 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
4121 break;
4122 eptr++;
4123 }
4124 break;
4125
4126 case OP_WORDCHAR:
4127 for (i = min; i < max; i++)
4128 {
4129 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
4130 break;
4131 eptr++;
4132 }
4133 break;
4134
4135 default:
4136 RRETURN(PCRE_ERROR_INTERNAL);
4137 }
4138
4139 /* eptr is now past the end of the maximum run */
4140
4141 if (possessive) continue;
4142 while (eptr >= pp)
4143 {
4144 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
4145 eptr--;
4146 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4147 }
4148 }
4149
4150 /* Get here if we can't make it match with any permitted repetitions */
4151
4152 RRETURN(MATCH_NOMATCH);
4153 }
4154 /* Control never gets here */
4155
4156 /* There's been some horrible disaster. Arrival here can only mean there is
4157 something seriously wrong in the code above or the OP_xxx definitions. */
4158
4159 default:
4160 DPRINTF(("Unknown opcode %d\n", *ecode));
4161 RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
4162 }
4163
4164 /* Do not stick any code in here without much thought; it is assumed
4165 that "continue" in the code above comes out to here to repeat the main
4166 loop. */
4167
4168 } /* End of main loop */
4169 /* Control never reaches here */
4170
4171
4172 /* When compiling to use the heap rather than the stack for recursive calls to
4173 match(), the RRETURN() macro jumps here. The number that is saved in
4174 frame->Xwhere indicates which label we actually want to return to. */
4175
4176 #ifdef NO_RECURSE
4177 #define LBL(val) case val: goto L_RM##val;
4178 HEAP_RETURN:
4179 switch (frame->Xwhere)
4180 {
4181 LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4182 LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
4183 LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
4184 LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
4185 LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)
4186 LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47)
4187 default:
4188 DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4189 return PCRE_ERROR_INTERNAL;
4190 }
4191 #undef LBL
4192 #endif /* NO_RECURSE */
4193 }
4194
4195
4196 /***************************************************************************
4197 ****************************************************************************
4198 RECURSION IN THE match() FUNCTION
4199
4200 Undefine all the macros that were defined above to handle this. */
4201
4202 #ifdef NO_RECURSE
4203 #undef eptr
4204 #undef ecode
4205 #undef mstart
4206 #undef offset_top
4207 #undef ims
4208 #undef eptrb
4209 #undef flags
4210
4211 #undef callpat
4212 #undef charptr
4213 #undef data
4214 #undef next
4215 #undef pp
4216 #undef prev
4217 #undef saved_eptr
4218
4219 #undef new_recursive
4220
4221 #undef cur_is_word
4222 #undef condition
4223 #undef prev_is_word
4224
4225 #undef original_ims
4226
4227 #undef ctype
4228 #undef length
4229 #undef max
4230 #undef min
4231 #undef number
4232 #undef offset
4233 #undef op
4234 #undef save_capture_last
4235 #undef save_offset1
4236 #undef save_offset2
4237 #undef save_offset3
4238 #undef stacksave
4239
4240 #undef newptrb
4241
4242 #endif
4243
4244 /* These two are defined as macros in both cases */
4245
4246 #undef fc
4247 #undef fi
4248
4249 /***************************************************************************
4250 ***************************************************************************/
4251
4252
4253
4254 /*************************************************
4255 * Execute a Regular Expression *
4256 *************************************************/
4257
4258 /* This function applies a compiled re to a subject string and picks out
4259 portions of the string if it matches. Two elements in the vector are set for
4260 each substring: the offsets to the start and end of the substring.
4261
4262 Arguments:
4263 argument_re points to the compiled expression
4264 extra_data points to extra data or is NULL
4265 subject points to the subject string
4266 length length of subject string (may contain binary zeros)
4267 start_offset where to start in the subject string
4268 options option bits
4269 offsets points to a vector of ints to be filled in with offsets
4270 offsetcount the number of elements in the vector
4271
4272 Returns: > 0 => success; value is the number of elements filled in
4273 = 0 => success, but offsets is not big enough
4274 -1 => failed to match
4275 < -1 => some kind of unexpected problem
4276 */
4277
4278 PCRE_EXP_DEFN int
4279 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4280 PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4281 int offsetcount)
4282 {
4283 int rc, resetcount, ocount;
4284 int first_byte = -1;
4285 int req_byte = -1;
4286 int req_byte2 = -1;
4287 int newline;
4288 unsigned long int ims;
4289 BOOL using_temporary_offsets = FALSE;
4290 BOOL anchored;
4291 BOOL startline;
4292 BOOL firstline;
4293 BOOL first_byte_caseless = FALSE;
4294 BOOL req_byte_caseless = FALSE;
4295 BOOL utf8;
4296 match_data match_block;
4297 match_data *md = &match_block;
4298 const uschar *tables;
4299 const uschar *start_bits = NULL;
4300 USPTR start_match = (USPTR)subject + start_offset;
4301 USPTR end_subject;
4302 USPTR req_byte_ptr = start_match - 1;
4303 eptrblock eptrchain[EPTR_WORK_SIZE];
4304
4305 pcre_study_data internal_study;
4306 const pcre_study_data *study;
4307
4308 real_pcre internal_re;
4309 const real_pcre *external_re = (const real_pcre *)argument_re;
4310 const real_pcre *re = external_re;
4311
4312 /* Plausibility checks */
4313
4314 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
4315 if (re == NULL || subject == NULL ||
4316 (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
4317 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
4318
4319 /* Fish out the optional data from the extra_data structure, first setting
4320 the default values. */
4321
4322 study = NULL;
4323 md->match_limit = MATCH_LIMIT;
4324 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
4325 md->callout_data = NULL;
4326
4327 /* The table pointer is always in native byte order. */
4328
4329 tables = external_re->tables;
4330
4331 if (extra_data != NULL)
4332 {
4333 register unsigned int flags = extra_data->flags;
4334 if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
4335 study = (const pcre_study_data *)extra_data->study_data;
4336 if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
4337 md->match_limit = extra_data->match_limit;
4338 if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
4339 md->match_limit_recursion = extra_data->match_limit_recursion;
4340 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
4341 md->callout_data = extra_data->callout_data;
4342 if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
4343 }
4344
4345 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
4346 is a feature that makes it possible to save compiled regex and re-use them
4347 in other programs later. */
4348
4349 if (tables == NULL) tables = _pcre_default_tables;
4350
4351 /* Check that the first field in the block is the magic number. If it is not,
4352 test for a regex that was compiled on a host of opposite endianness. If this is
4353 the case, flipped values are put in internal_re and internal_study if there was
4354 study data too. */
4355
4356 if (re->magic_number != MAGIC_NUMBER)
4357 {
4358 re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
4359 if (re == NULL) return PCRE_ERROR_BADMAGIC;
4360 if (study != NULL) study = &internal_study;
4361 }
4362
4363 /* Set up other data */
4364
4365 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
4366 startline = (re->options & PCRE_STARTLINE) != 0;
4367 firstline = (re->options & PCRE_FIRSTLINE) != 0;
4368
4369 /* The code starts after the real_pcre block and the capture name table. */
4370
4371 md->start_code = (const uschar *)external_re + re->name_table_offset +
4372 re->name_count * re->name_entry_size;
4373
4374 md->start_subject = (USPTR)subject;
4375 md->start_offset = start_offset;
4376 md->end_subject = md->start_subject + length;
4377 end_subject = md->end_subject;
4378
4379 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4380 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4381
4382 md->notbol = (options & PCRE_NOTBOL) != 0;
4383 md->noteol = (options & PCRE_NOTEOL) != 0;
4384 md->notempty = (options & PCRE_NOTEMPTY) != 0;
4385 md->partial = (options & PCRE_PARTIAL) != 0;
4386 md->hitend = FALSE;
4387
4388 md->recursive = NULL; /* No recursion at top level */
4389 md->eptrchain = eptrchain; /* Make workspace generally available */
4390
4391 md->lcc = tables + lcc_offset;
4392 md->ctypes = tables + ctypes_offset;
4393
4394 /* Handle different types of newline. The three bits give eight cases. If
4395 nothing is set at run time, whatever was used at compile time applies. */
4396
4397 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
4398 PCRE_NEWLINE_BITS)
4399 {
4400 case 0: newline = NEWLINE; break; /* Compile-time default */
4401 case PCRE_NEWLINE_CR: newline = '\r'; break;
4402 case PCRE_NEWLINE_LF: newline = '\n'; break;
4403 case PCRE_NEWLINE_CR+
4404 PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
4405 case PCRE_NEWLINE_ANY: newline = -1; break;
4406 case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4407 default: return PCRE_ERROR_BADNEWLINE;
4408 }
4409
4410 if (newline == -2)
4411 {
4412 md->nltype = NLTYPE_ANYCRLF;
4413 }
4414 else if (newline < 0)
4415 {
4416 md->nltype = NLTYPE_ANY;
4417 }
4418 else
4419 {
4420 md->nltype = NLTYPE_FIXED;
4421 if (newline > 255)
4422 {
4423 md->nllen = 2;
4424 md->nl[0] = (newline >> 8) & 255;
4425 md->nl[1] = newline & 255;
4426 }
4427 else
4428 {
4429 md->nllen = 1;
4430 md->nl[0] = newline;
4431 }
4432 }
4433
4434 /* Partial matching is supported only for a restricted set of regexes at the
4435 moment. */
4436
4437 if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
4438 return PCRE_ERROR_BADPARTIAL;
4439
4440 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
4441 back the character offset. */
4442
4443 #ifdef SUPPORT_UTF8
4444 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
4445 {
4446 if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
4447 return PCRE_ERROR_BADUTF8;
4448 if (start_offset > 0 && start_offset < length)
4449 {
4450 int tb = ((uschar *)subject)[start_offset];
4451 if (tb > 127)
4452 {
4453 tb &= 0xc0;
4454 if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
4455 }
4456 }
4457 }
4458 #endif
4459
4460 /* The ims options can vary during the matching as a result of the presence
4461 of (?ims) items in the pattern. They are kept in a local variable so that
4462 restoring at the exit of a group is easy. */
4463
4464 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
4465
4466 /* If the expression has got more back references than the offsets supplied can
4467 hold, we get a temporary chunk of working store to use during the matching.
4468 Otherwise, we can use the vector supplied, rounding down its size to a multiple
4469 of 3. */
4470
4471 ocount = offsetcount - (offsetcount % 3);
4472
4473 if (re->top_backref > 0 && re->top_backref >= ocount/3)
4474 {
4475 ocount = re->top_backref * 3 + 3;
4476 md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
4477 if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
4478 using_temporary_offsets = TRUE;
4479 DPRINTF(("Got memory to hold back references\n"));
4480 }
4481 else md->offset_vector = offsets;
4482
4483 md->offset_end = ocount;
4484 md->offset_max = (2*ocount)/3;
4485 md->offset_overflow = FALSE;
4486 md->capture_last = -1;
4487
4488 /* Compute the minimum number of offsets that we need to reset each time. Doing
4489 this makes a huge difference to execution time when there aren't many brackets
4490 in the pattern. */
4491
4492 resetcount = 2 + re->top_bracket * 2;
4493 if (resetcount > offsetcount) resetcount = ocount;
4494
4495 /* Reset the working variable associated with each extraction. These should
4496 never be used unless previously set, but they get saved and restored, and so we
4497 initialize them to avoid reading uninitialized locations. */
4498
4499 if (md->offset_vector != NULL)
4500 {
4501 register int *iptr = md->offset_vector + ocount;
4502 register int *iend = iptr - resetcount/2 + 1;
4503 while (--iptr >= iend) *iptr = -1;
4504 }
4505
4506 /* Set up the first character to match, if available. The first_byte value is
4507 never set for an anchored regular expression, but the anchoring may be forced
4508 at run time, so we have to test for anchoring. The first char may be unset for
4509 an unanchored pattern, of course. If there's no first char and the pattern was
4510 studied, there may be a bitmap of possible first characters. */
4511
4512 if (!anchored)
4513 {
4514 if ((re->options & PCRE_FIRSTSET) != 0)
4515 {
4516 first_byte = re->first_byte & 255;
4517 if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
4518 first_byte = md->lcc[first_byte];
4519 }
4520 else
4521 if (!startline && study != NULL &&
4522 (study->options & PCRE_STUDY_MAPPED) != 0)
4523 start_bits = study->start_bits;
4524 }
4525
4526 /* For anchored or unanchored matches, there may be a "last known required
4527 character" set. */
4528
4529 if ((re->options & PCRE_REQCHSET) != 0)
4530 {
4531 req_byte = re->req_byte & 255;
4532 req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
4533 req_byte2 = (tables + fcc_offset)[req_byte]; /* case flipped */
4534 }
4535
4536
4537 /* ==========================================================================*/
4538
4539 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
4540 the loop runs just once. */
4541
4542 for(;;)
4543 {
4544 USPTR save_end_subject = end_subject;
4545
4546 /* Reset the maximum number of extractions we might see. */
4547
4548 if (md->offset_vector != NULL)
4549 {
4550 register int *iptr = md->offset_vector;
4551 register int *iend = iptr + resetcount;
4552 while (iptr < iend) *iptr++ = -1;
4553 }
4554
4555 /* Advance to a unique first char if possible. If firstline is TRUE, the
4556 start of the match is constrained to the first line of a multiline string.
4557 That is, the match must be before or at the first newline. Implement this by
4558 temporarily adjusting end_subject so that we stop scanning at a newline. If
4559 the match fails at the newline, later code breaks this loop. */
4560
4561 if (firstline)
4562 {
4563 USPTR t = start_match;
4564 while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4565 end_subject = t;
4566 }
4567
4568 /* Now test for a unique first byte */
4569
4570 if (first_byte >= 0)
4571 {
4572 if (first_byte_caseless)
4573 while (start_match < end_subject &&
4574 md->lcc[*start_match] != first_byte)
4575 start_match++;
4576 else
4577 while (start_match < end_subject && *start_match != first_byte)
4578 start_match++;
4579 }
4580
4581 /* Or to just after a linebreak for a multiline match if possible */
4582
4583 else if (startline)
4584 {
4585 if (start_match > md->start_subject + start_offset)
4586 {
4587 while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4588 start_match++;
4589
4590 /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4591 and we are now at a LF, advance the match position by one more character.
4592 */
4593
4594 if (start_match[-1] == '\r' &&
4595 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4596 start_match < end_subject &&
4597 *start_match == '\n')
4598 start_match++;
4599 }
4600 }
4601
4602 /* Or to a non-unique first char after study */
4603
4604 else if (start_bits != NULL)
4605 {
4606 while (start_match < end_subject)
4607 {
4608 register unsigned int c = *start_match;
4609 if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
4610 }
4611 }
4612
4613 /* Restore fudged end_subject */
4614
4615 end_subject = save_end_subject;
4616
4617 #ifdef DEBUG /* Sigh. Some compilers never learn. */
4618 printf(">>>> Match against: ");
4619 pchars(start_match, end_subject - start_match, TRUE, md);
4620 printf("\n");
4621 #endif
4622
4623 /* If req_byte is set, we know that that character must appear in the subject
4624 for the match to succeed. If the first character is set, req_byte must be
4625 later in the subject; otherwise the test starts at the match point. This
4626 optimization can save a huge amount of backtracking in patterns with nested
4627 unlimited repeats that aren't going to match. Writing separate code for
4628 cased/caseless versions makes it go faster, as does using an autoincrement
4629 and backing off on a match.
4630
4631 HOWEVER: when the subject string is very, very long, searching to its end can
4632 take a long time, and give bad performance on quite ordinary patterns. This
4633 showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4634 string... so we don't do this when the string is sufficiently long.
4635
4636 ALSO: this processing is disabled when partial matching is requested.
4637 */
4638
4639 if (req_byte >= 0 &&
4640 end_subject - start_match < REQ_BYTE_MAX &&
4641 !md->partial)
4642 {
4643 register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4644
4645 /* We don't need to repeat the search if we haven't yet reached the
4646 place we found it at last time. */
4647
4648 if (p > req_byte_ptr)
4649 {
4650 if (req_byte_caseless)
4651 {
4652 while (p < end_subject)
4653 {
4654 register int pp = *p++;
4655 if (pp == req_byte || pp == req_byte2) { p--; break; }
4656 }
4657 }
4658 else
4659 {
4660 while (p < end_subject)
4661 {
4662 if (*p++ == req_byte) { p--; break; }
4663 }
4664 }
4665
4666 /* If we can't find the required character, break the matching loop,
4667 forcing a match failure. */
4668
4669 if (p >= end_subject)
4670 {
4671 rc = MATCH_NOMATCH;
4672 break;
4673 }
4674
4675 /* If we have found the required character, save the point where we
4676 found it, so that we don't search again next time round the loop if
4677 the start hasn't passed this character yet. */
4678
4679 req_byte_ptr = p;
4680 }
4681 }
4682
4683 /* OK, we can now run the match. */
4684
4685 md->start_match_ptr = start_match; /* Insurance */
4686 md->match_call_count = 0;
4687 md->eptrn = 0; /* Next free eptrchain slot */
4688 rc = match(start_match, md->start_code, start_match, 2, md,
4689 ims, NULL, 0, 0);
4690
4691 /* Any return other than MATCH_NOMATCH breaks the loop. */
4692
4693 if (rc != MATCH_NOMATCH) break;
4694
4695 /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4696 newline in the subject (though it may continue over the newline). Therefore,
4697 if we have just failed to match, starting at a newline, do not continue. */
4698
4699 if (firstline && IS_NEWLINE(start_match)) break;
4700
4701 /* Advance the match position by one character. */
4702
4703 start_match++;
4704 #ifdef SUPPORT_UTF8
4705 if (utf8)
4706 while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4707 start_match++;
4708 #endif
4709
4710 /* Break the loop if the pattern is anchored or if we have passed the end of
4711 the subject. */
4712
4713 if (anchored || start_match > end_subject) break;
4714
4715 /* If we have just passed a CR and the newline option is CRLF or ANY or
4716 ANYCRLF, and we are now at a LF, advance the match position by one more
4717 character. */
4718
4719 if (start_match[-1] == '\r' &&
4720 (md->nltype == NLTYPE_ANY ||
4721 md->nltype == NLTYPE_ANYCRLF ||
4722 md->nllen == 2) &&
4723 start_match < end_subject &&
4724 *start_match == '\n')
4725 start_match++;
4726
4727 } /* End of for(;;) "bumpalong" loop */
4728
4729 /* ==========================================================================*/
4730
4731 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4732 conditions is true:
4733
4734 (1) The pattern is anchored;
4735
4736 (2) We are past the end of the subject;
4737
4738 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4739 this option requests that a match occur at or before the first newline in
4740 the subject.
4741
4742 When we have a match and the offset vector is big enough to deal with any
4743 backreferences, captured substring offsets will already be set up. In the case
4744 where we had to get some local store to hold offsets for backreference
4745 processing, copy those that we can. In this case there need not be overflow if
4746 certain parts of the pattern were not used, even though there are more
4747 capturing parentheses than vector slots. */
4748
4749 if (rc == MATCH_MATCH)
4750 {
4751 if (using_temporary_offsets)
4752 {
4753 if (offsetcount >= 4)
4754 {
4755 memcpy(offsets + 2, md->offset_vector + 2,
4756 (offsetcount - 2) * sizeof(int));
4757 DPRINTF(("Copied offsets from temporary memory\n"));
4758 }
4759 if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
4760 DPRINTF(("Freeing temporary memory\n"));
4761 (pcre_free)(md->offset_vector);
4762 }
4763
4764 /* Set the return code to the number of captured strings, or 0 if there are
4765 too many to fit into the vector. */
4766
4767 rc = md->offset_overflow? 0 : md->end_offset_top/2;
4768
4769 /* If there is space, set up the whole thing as substring 0. The value of
4770 md->start_match_ptr might be modified if \K was encountered on the success
4771 matching path. */
4772
4773 if (offsetcount < 2) rc = 0; else
4774 {
4775 offsets[0] = md->start_match_ptr - md->start_subject;
4776 offsets[1] = md->end_match_ptr - md->start_subject;
4777 }
4778
4779 DPRINTF((">>>> returning %d\n", rc));
4780 return rc;
4781 }
4782
4783 /* Control gets here if there has been an error, or if the overall match
4784 attempt has failed at all permitted starting positions. */
4785
4786 if (using_temporary_offsets)
4787 {
4788 DPRINTF(("Freeing temporary memory\n"));
4789 (pcre_free)(md->offset_vector);
4790 }
4791
4792 if (rc != MATCH_NOMATCH)
4793 {
4794 DPRINTF((">>>> error: returning %d\n", rc));
4795 return rc;
4796 }
4797 else if (md->partial && md->hitend)
4798 {
4799 DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4800 return PCRE_ERROR_PARTIAL;
4801 }
4802 else
4803 {
4804 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
4805 return PCRE_ERROR_NOMATCH;
4806 }
4807 }
4808
4809 /* End of pcre_exec.c */