Fix text type in ACL error message.
[exim.git] / src / src / pcre / pcre_exec.c
1 /* $Cambridge: exim/src/src/pcre/pcre_exec.c,v 1.4 2007/01/23 15:08:45 ph10 Exp $ */
2
3 /*************************************************
4 * Perl-Compatible Regular Expressions *
5 *************************************************/
6
7 /* PCRE is a library of functions to support regular expressions whose syntax
8 and semantics are as close as possible to those of the Perl 5 language.
9
10 Written by Philip Hazel
11 Copyright (c) 1997-2006 University of Cambridge
12
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
19
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
23
24 * Neither the name of the University of Cambridge nor the names of its
25 contributors may be used to endorse or promote products derived from
26 this software without specific prior written permission.
27
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
40 */
41
42
43 /* This module contains pcre_exec(), the externally visible function that does
44 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
45 possible. There are also some static supporting functions. */
46
47 #define NLBLOCK md /* Block containing newline information */
48 #define PSSTART start_subject /* Field containing processed string start */
49 #define PSEND end_subject /* Field containing processed string end */
50
51 #include "pcre_internal.h"
52
53 /* The chain of eptrblocks for tail recursions uses memory in stack workspace,
54 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */
55
56 #define EPTR_WORK_SIZE (1000)
57
58 /* Flag bits for the match() function */
59
60 #define match_condassert 0x01 /* Called to check a condition assertion */
61 #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
62 #define match_tail_recursed 0x04 /* Tail recursive call */
63
64 /* Non-error returns from the match() function. Error returns are externally
65 defined PCRE_ERROR_xxx codes, which are all negative. */
66
67 #define MATCH_MATCH 1
68 #define MATCH_NOMATCH 0
69
70 /* Maximum number of ints of offset to save on the stack for recursive calls.
71 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
72 because the offset vector is always a multiple of 3 long. */
73
74 #define REC_STACK_SAVE_MAX 30
75
76 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
77
78 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
79 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
80
81
82
83 #ifdef DEBUG
84 /*************************************************
85 * Debugging function to print chars *
86 *************************************************/
87
88 /* Print a sequence of chars in printable format, stopping at the end of the
89 subject if the requested.
90
91 Arguments:
92 p points to characters
93 length number to print
94 is_subject TRUE if printing from within md->start_subject
95 md pointer to matching data block, if is_subject is TRUE
96
97 Returns: nothing
98 */
99
100 static void
101 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
102 {
103 unsigned int c;
104 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
105 while (length-- > 0)
106 if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
107 }
108 #endif
109
110
111
112 /*************************************************
113 * Match a back-reference *
114 *************************************************/
115
116 /* If a back reference hasn't been set, the length that is passed is greater
117 than the number of characters left in the string, so the match fails.
118
119 Arguments:
120 offset index into the offset vector
121 eptr points into the subject
122 length length to be matched
123 md points to match data block
124 ims the ims flags
125
126 Returns: TRUE if matched
127 */
128
129 static BOOL
130 match_ref(int offset, register USPTR eptr, int length, match_data *md,
131 unsigned long int ims)
132 {
133 USPTR p = md->start_subject + md->offset_vector[offset];
134
135 #ifdef DEBUG
136 if (eptr >= md->end_subject)
137 printf("matching subject <null>");
138 else
139 {
140 printf("matching subject ");
141 pchars(eptr, length, TRUE, md);
142 }
143 printf(" against backref ");
144 pchars(p, length, FALSE, md);
145 printf("\n");
146 #endif
147
148 /* Always fail if not enough characters left */
149
150 if (length > md->end_subject - eptr) return FALSE;
151
152 /* Separate the caselesss case for speed */
153
154 if ((ims & PCRE_CASELESS) != 0)
155 {
156 while (length-- > 0)
157 if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
158 }
159 else
160 { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
161
162 return TRUE;
163 }
164
165
166
167 /***************************************************************************
168 ****************************************************************************
169 RECURSION IN THE match() FUNCTION
170
171 The match() function is highly recursive, though not every recursive call
172 increases the recursive depth. Nevertheless, some regular expressions can cause
173 it to recurse to a great depth. I was writing for Unix, so I just let it call
174 itself recursively. This uses the stack for saving everything that has to be
175 saved for a recursive call. On Unix, the stack can be large, and this works
176 fine.
177
178 It turns out that on some non-Unix-like systems there are problems with
179 programs that use a lot of stack. (This despite the fact that every last chip
180 has oodles of memory these days, and techniques for extending the stack have
181 been known for decades.) So....
182
183 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
184 calls by keeping local variables that need to be preserved in blocks of memory
185 obtained from malloc() instead instead of on the stack. Macros are used to
186 achieve this so that the actual code doesn't look very different to what it
187 always used to.
188 ****************************************************************************
189 ***************************************************************************/
190
191
192 /* These versions of the macros use the stack, as normal. There are debugging
193 versions and production versions. */
194
195 #ifndef NO_RECURSE
196 #define REGISTER register
197 #ifdef DEBUG
198 #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
199 { \
200 printf("match() called in line %d\n", __LINE__); \
201 rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \
202 printf("to line %d\n", __LINE__); \
203 }
204 #define RRETURN(ra) \
205 { \
206 printf("match() returned %d from line %d ", ra, __LINE__); \
207 return ra; \
208 }
209 #else
210 #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
211 rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)
212 #define RRETURN(ra) return ra
213 #endif
214
215 #else
216
217
218 /* These versions of the macros manage a private stack on the heap. Note
219 that the rd argument of RMATCH isn't actually used. It's the md argument of
220 match(), which never changes. */
221
222 #define REGISTER
223
224 #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\
225 {\
226 heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
227 if (setjmp(frame->Xwhere) == 0)\
228 {\
229 newframe->Xeptr = ra;\
230 newframe->Xecode = rb;\
231 newframe->Xoffset_top = rc;\
232 newframe->Xims = re;\
233 newframe->Xeptrb = rf;\
234 newframe->Xflags = rg;\
235 newframe->Xrdepth = frame->Xrdepth + 1;\
236 newframe->Xprevframe = frame;\
237 frame = newframe;\
238 DPRINTF(("restarting from line %d\n", __LINE__));\
239 goto HEAP_RECURSE;\
240 }\
241 else\
242 {\
243 DPRINTF(("longjumped back to line %d\n", __LINE__));\
244 frame = md->thisframe;\
245 rx = frame->Xresult;\
246 }\
247 }
248
249 #define RRETURN(ra)\
250 {\
251 heapframe *newframe = frame;\
252 frame = newframe->Xprevframe;\
253 (pcre_stack_free)(newframe);\
254 if (frame != NULL)\
255 {\
256 frame->Xresult = ra;\
257 md->thisframe = frame;\
258 longjmp(frame->Xwhere, 1);\
259 }\
260 return ra;\
261 }
262
263
264 /* Structure for remembering the local variables in a private frame */
265
266 typedef struct heapframe {
267 struct heapframe *Xprevframe;
268
269 /* Function arguments that may change */
270
271 const uschar *Xeptr;
272 const uschar *Xecode;
273 int Xoffset_top;
274 long int Xims;
275 eptrblock *Xeptrb;
276 int Xflags;
277 unsigned int Xrdepth;
278
279 /* Function local variables */
280
281 const uschar *Xcallpat;
282 const uschar *Xcharptr;
283 const uschar *Xdata;
284 const uschar *Xnext;
285 const uschar *Xpp;
286 const uschar *Xprev;
287 const uschar *Xsaved_eptr;
288
289 recursion_info Xnew_recursive;
290
291 BOOL Xcur_is_word;
292 BOOL Xcondition;
293 BOOL Xprev_is_word;
294
295 unsigned long int Xoriginal_ims;
296
297 #ifdef SUPPORT_UCP
298 int Xprop_type;
299 int Xprop_value;
300 int Xprop_fail_result;
301 int Xprop_category;
302 int Xprop_chartype;
303 int Xprop_script;
304 #endif
305
306 int Xctype;
307 unsigned int Xfc;
308 int Xfi;
309 int Xlength;
310 int Xmax;
311 int Xmin;
312 int Xnumber;
313 int Xoffset;
314 int Xop;
315 int Xsave_capture_last;
316 int Xsave_offset1, Xsave_offset2, Xsave_offset3;
317 int Xstacksave[REC_STACK_SAVE_MAX];
318
319 eptrblock Xnewptrb;
320
321 /* Place to pass back result, and where to jump back to */
322
323 int Xresult;
324 jmp_buf Xwhere;
325
326 } heapframe;
327
328 #endif
329
330
331 /***************************************************************************
332 ***************************************************************************/
333
334
335
336 /*************************************************
337 * Match from current position *
338 *************************************************/
339
340 /* This function is called recursively in many circumstances. Whenever it
341 returns a negative (error) response, the outer incarnation must also return the
342 same response.
343
344 Performance note: It might be tempting to extract commonly used fields from the
345 md structure (e.g. utf8, end_subject) into individual variables to improve
346 performance. Tests using gcc on a SPARC disproved this; in the first case, it
347 made performance worse.
348
349 Arguments:
350 eptr pointer to current character in subject
351 ecode pointer to current position in compiled code
352 offset_top current top pointer
353 md pointer to "static" info for the match
354 ims current /i, /m, and /s options
355 eptrb pointer to chain of blocks containing eptr at start of
356 brackets - for testing for empty matches
357 flags can contain
358 match_condassert - this is an assertion condition
359 match_cbegroup - this is the start of an unlimited repeat
360 group that can match an empty string
361 match_tail_recursed - this is a tail_recursed group
362 rdepth the recursion depth
363
364 Returns: MATCH_MATCH if matched ) these values are >= 0
365 MATCH_NOMATCH if failed to match )
366 a negative PCRE_ERROR_xxx value if aborted by an error condition
367 (e.g. stopped by repeated call or recursion limit)
368 */
369
370 static int
371 match(REGISTER USPTR eptr, REGISTER const uschar *ecode,
372 int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
373 int flags, unsigned int rdepth)
374 {
375 /* These variables do not need to be preserved over recursion in this function,
376 so they can be ordinary variables in all cases. Mark some of them with
377 "register" because they are used a lot in loops. */
378
379 register int rrc; /* Returns from recursive calls */
380 register int i; /* Used for loops not involving calls to RMATCH() */
381 register unsigned int c; /* Character values not kept over RMATCH() calls */
382 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
383
384 BOOL minimize, possessive; /* Quantifier options */
385
386 /* When recursion is not being used, all "local" variables that have to be
387 preserved over calls to RMATCH() are part of a "frame" which is obtained from
388 heap storage. Set up the top-level frame here; others are obtained from the
389 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
390
391 #ifdef NO_RECURSE
392 heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
393 frame->Xprevframe = NULL; /* Marks the top level */
394
395 /* Copy in the original argument variables */
396
397 frame->Xeptr = eptr;
398 frame->Xecode = ecode;
399 frame->Xoffset_top = offset_top;
400 frame->Xims = ims;
401 frame->Xeptrb = eptrb;
402 frame->Xflags = flags;
403 frame->Xrdepth = rdepth;
404
405 /* This is where control jumps back to to effect "recursion" */
406
407 HEAP_RECURSE:
408
409 /* Macros make the argument variables come from the current frame */
410
411 #define eptr frame->Xeptr
412 #define ecode frame->Xecode
413 #define offset_top frame->Xoffset_top
414 #define ims frame->Xims
415 #define eptrb frame->Xeptrb
416 #define flags frame->Xflags
417 #define rdepth frame->Xrdepth
418
419 /* Ditto for the local variables */
420
421 #ifdef SUPPORT_UTF8
422 #define charptr frame->Xcharptr
423 #endif
424 #define callpat frame->Xcallpat
425 #define data frame->Xdata
426 #define next frame->Xnext
427 #define pp frame->Xpp
428 #define prev frame->Xprev
429 #define saved_eptr frame->Xsaved_eptr
430
431 #define new_recursive frame->Xnew_recursive
432
433 #define cur_is_word frame->Xcur_is_word
434 #define condition frame->Xcondition
435 #define prev_is_word frame->Xprev_is_word
436
437 #define original_ims frame->Xoriginal_ims
438
439 #ifdef SUPPORT_UCP
440 #define prop_type frame->Xprop_type
441 #define prop_value frame->Xprop_value
442 #define prop_fail_result frame->Xprop_fail_result
443 #define prop_category frame->Xprop_category
444 #define prop_chartype frame->Xprop_chartype
445 #define prop_script frame->Xprop_script
446 #endif
447
448 #define ctype frame->Xctype
449 #define fc frame->Xfc
450 #define fi frame->Xfi
451 #define length frame->Xlength
452 #define max frame->Xmax
453 #define min frame->Xmin
454 #define number frame->Xnumber
455 #define offset frame->Xoffset
456 #define op frame->Xop
457 #define save_capture_last frame->Xsave_capture_last
458 #define save_offset1 frame->Xsave_offset1
459 #define save_offset2 frame->Xsave_offset2
460 #define save_offset3 frame->Xsave_offset3
461 #define stacksave frame->Xstacksave
462
463 #define newptrb frame->Xnewptrb
464
465 /* When recursion is being used, local variables are allocated on the stack and
466 get preserved during recursion in the normal way. In this environment, fi and
467 i, and fc and c, can be the same variables. */
468
469 #else /* NO_RECURSE not defined */
470 #define fi i
471 #define fc c
472
473
474 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
475 const uschar *charptr; /* in small blocks of the code. My normal */
476 #endif /* style of coding would have declared */
477 const uschar *callpat; /* them within each of those blocks. */
478 const uschar *data; /* However, in order to accommodate the */
479 const uschar *next; /* version of this code that uses an */
480 USPTR pp; /* external "stack" implemented on the */
481 const uschar *prev; /* heap, it is easier to declare them all */
482 USPTR saved_eptr; /* here, so the declarations can be cut */
483 /* out in a block. The only declarations */
484 recursion_info new_recursive; /* within blocks below are for variables */
485 /* that do not have to be preserved over */
486 BOOL cur_is_word; /* a recursive call to RMATCH(). */
487 BOOL condition;
488 BOOL prev_is_word;
489
490 unsigned long int original_ims;
491
492 #ifdef SUPPORT_UCP
493 int prop_type;
494 int prop_value;
495 int prop_fail_result;
496 int prop_category;
497 int prop_chartype;
498 int prop_script;
499 #endif
500
501 int ctype;
502 int length;
503 int max;
504 int min;
505 int number;
506 int offset;
507 int op;
508 int save_capture_last;
509 int save_offset1, save_offset2, save_offset3;
510 int stacksave[REC_STACK_SAVE_MAX];
511
512 eptrblock newptrb;
513 #endif /* NO_RECURSE */
514
515 /* These statements are here to stop the compiler complaining about unitialized
516 variables. */
517
518 #ifdef SUPPORT_UCP
519 prop_value = 0;
520 prop_fail_result = 0;
521 #endif
522
523
524 /* This label is used for tail recursion, which is used in a few cases even
525 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
526 used. Thanks to Ian Taylor for noticing this possibility and sending the
527 original patch. */
528
529 TAIL_RECURSE:
530
531 /* OK, now we can get on with the real code of the function. Recursive calls
532 are specified by the macro RMATCH and RRETURN is used to return. When
533 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
534 and a "return", respectively (possibly with some debugging if DEBUG is
535 defined). However, RMATCH isn't like a function call because it's quite a
536 complicated macro. It has to be used in one particular way. This shouldn't,
537 however, impact performance when true recursion is being used. */
538
539 /* First check that we haven't called match() too many times, or that we
540 haven't exceeded the recursive call limit. */
541
542 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
543 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
544
545 original_ims = ims; /* Save for resetting on ')' */
546
547 #ifdef SUPPORT_UTF8
548 utf8 = md->utf8; /* Local copy of the flag */
549 #else
550 utf8 = FALSE;
551 #endif
552
553 /* At the start of a group with an unlimited repeat that may match an empty
554 string, the match_cbegroup flag is set. When this is the case, add the current
555 subject pointer to the chain of such remembered pointers, to be checked when we
556 hit the closing ket, in order to break infinite loops that match no characters.
557 When match() is called in other circumstances, don't add to the chain. If this
558 is a tail recursion, use a block from the workspace, as the one on the stack is
559 already used. */
560
561 if ((flags & match_cbegroup) != 0)
562 {
563 eptrblock *p;
564 if ((flags & match_tail_recursed) != 0)
565 {
566 if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);
567 p = md->eptrchain + md->eptrn++;
568 }
569 else p = &newptrb;
570 p->epb_saved_eptr = eptr;
571 p->epb_prev = eptrb;
572 eptrb = p;
573 }
574
575 /* Now start processing the opcodes. */
576
577 for (;;)
578 {
579 minimize = possessive = FALSE;
580 op = *ecode;
581
582 /* For partial matching, remember if we ever hit the end of the subject after
583 matching at least one subject character. */
584
585 if (md->partial &&
586 eptr >= md->end_subject &&
587 eptr > md->start_match)
588 md->hitend = TRUE;
589
590 switch(op)
591 {
592 /* Handle a capturing bracket. If there is space in the offset vector, save
593 the current subject position in the working slot at the top of the vector.
594 We mustn't change the current values of the data slot, because they may be
595 set from a previous iteration of this group, and be referred to by a
596 reference inside the group.
597
598 If the bracket fails to match, we need to restore this value and also the
599 values of the final offsets, in case they were set by a previous iteration
600 of the same bracket.
601
602 If there isn't enough space in the offset vector, treat this as if it were
603 a non-capturing bracket. Don't worry about setting the flag for the error
604 case here; that is handled in the code for KET. */
605
606 case OP_CBRA:
607 case OP_SCBRA:
608 number = GET2(ecode, 1+LINK_SIZE);
609 offset = number << 1;
610
611 #ifdef DEBUG
612 printf("start bracket %d\n", number);
613 printf("subject=");
614 pchars(eptr, 16, TRUE, md);
615 printf("\n");
616 #endif
617
618 if (offset < md->offset_max)
619 {
620 save_offset1 = md->offset_vector[offset];
621 save_offset2 = md->offset_vector[offset+1];
622 save_offset3 = md->offset_vector[md->offset_end - number];
623 save_capture_last = md->capture_last;
624
625 DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
626 md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
627
628 flags = (op == OP_SCBRA)? match_cbegroup : 0;
629 do
630 {
631 RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
632 ims, eptrb, flags);
633 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
634 md->capture_last = save_capture_last;
635 ecode += GET(ecode, 1);
636 }
637 while (*ecode == OP_ALT);
638
639 DPRINTF(("bracket %d failed\n", number));
640
641 md->offset_vector[offset] = save_offset1;
642 md->offset_vector[offset+1] = save_offset2;
643 md->offset_vector[md->offset_end - number] = save_offset3;
644
645 RRETURN(MATCH_NOMATCH);
646 }
647
648 /* Insufficient room for saving captured contents. Treat as a non-capturing
649 bracket. */
650
651 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
652
653 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
654 final alternative within the brackets, we would return the result of a
655 recursive call to match() whatever happened. We can reduce stack usage by
656 turning this into a tail recursion. */
657
658 case OP_BRA:
659 case OP_SBRA:
660 DPRINTF(("start non-capturing bracket\n"));
661 flags = (op >= OP_SBRA)? match_cbegroup : 0;
662 for (;;)
663 {
664 if (ecode[GET(ecode, 1)] != OP_ALT)
665 {
666 ecode += _pcre_OP_lengths[*ecode];
667 flags |= match_tail_recursed;
668 DPRINTF(("bracket 0 tail recursion\n"));
669 goto TAIL_RECURSE;
670 }
671
672 /* For non-final alternatives, continue the loop for a NOMATCH result;
673 otherwise return. */
674
675 RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
676 eptrb, flags);
677 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
678 ecode += GET(ecode, 1);
679 }
680 /* Control never reaches here. */
681
682 /* Conditional group: compilation checked that there are no more than
683 two branches. If the condition is false, skipping the first branch takes us
684 past the end if there is only one branch, but that's OK because that is
685 exactly what going to the ket would do. As there is only one branch to be
686 obeyed, we can use tail recursion to avoid using another stack frame. */
687
688 case OP_COND:
689 case OP_SCOND:
690 if (ecode[LINK_SIZE+1] == OP_RREF) /* Recursion test */
691 {
692 offset = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
693 condition = md->recursive != NULL &&
694 (offset == RREF_ANY || offset == md->recursive->group_num);
695 ecode += condition? 3 : GET(ecode, 1);
696 }
697
698 else if (ecode[LINK_SIZE+1] == OP_CREF) /* Group used test */
699 {
700 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
701 condition = offset < offset_top && md->offset_vector[offset] >= 0;
702 ecode += condition? 3 : GET(ecode, 1);
703 }
704
705 else if (ecode[LINK_SIZE+1] == OP_DEF) /* DEFINE - always false */
706 {
707 condition = FALSE;
708 ecode += GET(ecode, 1);
709 }
710
711 /* The condition is an assertion. Call match() to evaluate it - setting
712 the final argument match_condassert causes it to stop at the end of an
713 assertion. */
714
715 else
716 {
717 RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
718 match_condassert);
719 if (rrc == MATCH_MATCH)
720 {
721 condition = TRUE;
722 ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
723 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
724 }
725 else if (rrc != MATCH_NOMATCH)
726 {
727 RRETURN(rrc); /* Need braces because of following else */
728 }
729 else
730 {
731 condition = FALSE;
732 ecode += GET(ecode, 1);
733 }
734 }
735
736 /* We are now at the branch that is to be obeyed. As there is only one,
737 we can use tail recursion to avoid using another stack frame. If the second
738 alternative doesn't exist, we can just plough on. */
739
740 if (condition || *ecode == OP_ALT)
741 {
742 ecode += 1 + LINK_SIZE;
743 flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);
744 goto TAIL_RECURSE;
745 }
746 else
747 {
748 ecode += 1 + LINK_SIZE;
749 }
750 break;
751
752
753 /* End of the pattern. If we are in a top-level recursion, we should
754 restore the offsets appropriately and continue from after the call. */
755
756 case OP_END:
757 if (md->recursive != NULL && md->recursive->group_num == 0)
758 {
759 recursion_info *rec = md->recursive;
760 DPRINTF(("End of pattern in a (?0) recursion\n"));
761 md->recursive = rec->prevrec;
762 memmove(md->offset_vector, rec->offset_save,
763 rec->saved_max * sizeof(int));
764 md->start_match = rec->save_start;
765 ims = original_ims;
766 ecode = rec->after_call;
767 break;
768 }
769
770 /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
771 string - backtracking will then try other alternatives, if any. */
772
773 if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);
774 md->end_match_ptr = eptr; /* Record where we ended */
775 md->end_offset_top = offset_top; /* and how many extracts were taken */
776 RRETURN(MATCH_MATCH);
777
778 /* Change option settings */
779
780 case OP_OPT:
781 ims = ecode[1];
782 ecode += 2;
783 DPRINTF(("ims set to %02lx\n", ims));
784 break;
785
786 /* Assertion brackets. Check the alternative branches in turn - the
787 matching won't pass the KET for an assertion. If any one branch matches,
788 the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
789 start of each branch to move the current point backwards, so the code at
790 this level is identical to the lookahead case. */
791
792 case OP_ASSERT:
793 case OP_ASSERTBACK:
794 do
795 {
796 RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
797 if (rrc == MATCH_MATCH) break;
798 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
799 ecode += GET(ecode, 1);
800 }
801 while (*ecode == OP_ALT);
802 if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
803
804 /* If checking an assertion for a condition, return MATCH_MATCH. */
805
806 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
807
808 /* Continue from after the assertion, updating the offsets high water
809 mark, since extracts may have been taken during the assertion. */
810
811 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
812 ecode += 1 + LINK_SIZE;
813 offset_top = md->end_offset_top;
814 continue;
815
816 /* Negative assertion: all branches must fail to match */
817
818 case OP_ASSERT_NOT:
819 case OP_ASSERTBACK_NOT:
820 do
821 {
822 RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
823 if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
824 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
825 ecode += GET(ecode,1);
826 }
827 while (*ecode == OP_ALT);
828
829 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
830
831 ecode += 1 + LINK_SIZE;
832 continue;
833
834 /* Move the subject pointer back. This occurs only at the start of
835 each branch of a lookbehind assertion. If we are too close to the start to
836 move back, this match function fails. When working with UTF-8 we move
837 back a number of characters, not bytes. */
838
839 case OP_REVERSE:
840 #ifdef SUPPORT_UTF8
841 if (utf8)
842 {
843 i = GET(ecode, 1);
844 while (i-- > 0)
845 {
846 eptr--;
847 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
848 BACKCHAR(eptr)
849 }
850 }
851 else
852 #endif
853
854 /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
855
856 {
857 eptr -= GET(ecode, 1);
858 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
859 }
860
861 /* Skip to next op code */
862
863 ecode += 1 + LINK_SIZE;
864 break;
865
866 /* The callout item calls an external function, if one is provided, passing
867 details of the match so far. This is mainly for debugging, though the
868 function is able to force a failure. */
869
870 case OP_CALLOUT:
871 if (pcre_callout != NULL)
872 {
873 pcre_callout_block cb;
874 cb.version = 1; /* Version 1 of the callout block */
875 cb.callout_number = ecode[1];
876 cb.offset_vector = md->offset_vector;
877 cb.subject = (PCRE_SPTR)md->start_subject;
878 cb.subject_length = md->end_subject - md->start_subject;
879 cb.start_match = md->start_match - md->start_subject;
880 cb.current_position = eptr - md->start_subject;
881 cb.pattern_position = GET(ecode, 2);
882 cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
883 cb.capture_top = offset_top/2;
884 cb.capture_last = md->capture_last;
885 cb.callout_data = md->callout_data;
886 if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
887 if (rrc < 0) RRETURN(rrc);
888 }
889 ecode += 2 + 2*LINK_SIZE;
890 break;
891
892 /* Recursion either matches the current regex, or some subexpression. The
893 offset data is the offset to the starting bracket from the start of the
894 whole pattern. (This is so that it works from duplicated subpatterns.)
895
896 If there are any capturing brackets started but not finished, we have to
897 save their starting points and reinstate them after the recursion. However,
898 we don't know how many such there are (offset_top records the completed
899 total) so we just have to save all the potential data. There may be up to
900 65535 such values, which is too large to put on the stack, but using malloc
901 for small numbers seems expensive. As a compromise, the stack is used when
902 there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
903 is used. A problem is what to do if the malloc fails ... there is no way of
904 returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
905 values on the stack, and accept that the rest may be wrong.
906
907 There are also other values that have to be saved. We use a chained
908 sequence of blocks that actually live on the stack. Thanks to Robin Houston
909 for the original version of this logic. */
910
911 case OP_RECURSE:
912 {
913 callpat = md->start_code + GET(ecode, 1);
914 new_recursive.group_num = (callpat == md->start_code)? 0 :
915 GET2(callpat, 1 + LINK_SIZE);
916
917 /* Add to "recursing stack" */
918
919 new_recursive.prevrec = md->recursive;
920 md->recursive = &new_recursive;
921
922 /* Find where to continue from afterwards */
923
924 ecode += 1 + LINK_SIZE;
925 new_recursive.after_call = ecode;
926
927 /* Now save the offset data. */
928
929 new_recursive.saved_max = md->offset_end;
930 if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
931 new_recursive.offset_save = stacksave;
932 else
933 {
934 new_recursive.offset_save =
935 (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
936 if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
937 }
938
939 memcpy(new_recursive.offset_save, md->offset_vector,
940 new_recursive.saved_max * sizeof(int));
941 new_recursive.save_start = md->start_match;
942 md->start_match = eptr;
943
944 /* OK, now we can do the recursion. For each top-level alternative we
945 restore the offset and recursion data. */
946
947 DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
948 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
949 do
950 {
951 RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
952 md, ims, eptrb, flags);
953 if (rrc == MATCH_MATCH)
954 {
955 DPRINTF(("Recursion matched\n"));
956 md->recursive = new_recursive.prevrec;
957 if (new_recursive.offset_save != stacksave)
958 (pcre_free)(new_recursive.offset_save);
959 RRETURN(MATCH_MATCH);
960 }
961 else if (rrc != MATCH_NOMATCH)
962 {
963 DPRINTF(("Recursion gave error %d\n", rrc));
964 RRETURN(rrc);
965 }
966
967 md->recursive = &new_recursive;
968 memcpy(md->offset_vector, new_recursive.offset_save,
969 new_recursive.saved_max * sizeof(int));
970 callpat += GET(callpat, 1);
971 }
972 while (*callpat == OP_ALT);
973
974 DPRINTF(("Recursion didn't match\n"));
975 md->recursive = new_recursive.prevrec;
976 if (new_recursive.offset_save != stacksave)
977 (pcre_free)(new_recursive.offset_save);
978 RRETURN(MATCH_NOMATCH);
979 }
980 /* Control never reaches here */
981
982 /* "Once" brackets are like assertion brackets except that after a match,
983 the point in the subject string is not moved back. Thus there can never be
984 a move back into the brackets. Friedl calls these "atomic" subpatterns.
985 Check the alternative branches in turn - the matching won't pass the KET
986 for this kind of subpattern. If any one branch matches, we carry on as at
987 the end of a normal bracket, leaving the subject pointer. */
988
989 case OP_ONCE:
990 prev = ecode;
991 saved_eptr = eptr;
992
993 do
994 {
995 RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
996 eptrb, 0);
997 if (rrc == MATCH_MATCH) break;
998 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
999 ecode += GET(ecode,1);
1000 }
1001 while (*ecode == OP_ALT);
1002
1003 /* If hit the end of the group (which could be repeated), fail */
1004
1005 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1006
1007 /* Continue as from after the assertion, updating the offsets high water
1008 mark, since extracts may have been taken. */
1009
1010 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1011
1012 offset_top = md->end_offset_top;
1013 eptr = md->end_match_ptr;
1014
1015 /* For a non-repeating ket, just continue at this level. This also
1016 happens for a repeating ket if no characters were matched in the group.
1017 This is the forcible breaking of infinite loops as implemented in Perl
1018 5.005. If there is an options reset, it will get obeyed in the normal
1019 course of events. */
1020
1021 if (*ecode == OP_KET || eptr == saved_eptr)
1022 {
1023 ecode += 1+LINK_SIZE;
1024 break;
1025 }
1026
1027 /* The repeating kets try the rest of the pattern or restart from the
1028 preceding bracket, in the appropriate order. The second "call" of match()
1029 uses tail recursion, to avoid using another stack frame. We need to reset
1030 any options that changed within the bracket before re-running it, so
1031 check the next opcode. */
1032
1033 if (ecode[1+LINK_SIZE] == OP_OPT)
1034 {
1035 ims = (ims & ~PCRE_IMS) | ecode[4];
1036 DPRINTF(("ims set to %02lx at group repeat\n", ims));
1037 }
1038
1039 if (*ecode == OP_KETRMIN)
1040 {
1041 RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);
1042 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1043 ecode = prev;
1044 flags = match_tail_recursed;
1045 goto TAIL_RECURSE;
1046 }
1047 else /* OP_KETRMAX */
1048 {
1049 RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup);
1050 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1051 ecode += 1 + LINK_SIZE;
1052 flags = match_tail_recursed;
1053 goto TAIL_RECURSE;
1054 }
1055 /* Control never gets here */
1056
1057 /* An alternation is the end of a branch; scan along to find the end of the
1058 bracketed group and go to there. */
1059
1060 case OP_ALT:
1061 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1062 break;
1063
1064 /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
1065 that it may occur zero times. It may repeat infinitely, or not at all -
1066 i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
1067 repeat limits are compiled as a number of copies, with the optional ones
1068 preceded by BRAZERO or BRAMINZERO. */
1069
1070 case OP_BRAZERO:
1071 {
1072 next = ecode+1;
1073 RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0);
1074 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1075 do next += GET(next,1); while (*next == OP_ALT);
1076 ecode = next + 1 + LINK_SIZE;
1077 }
1078 break;
1079
1080 case OP_BRAMINZERO:
1081 {
1082 next = ecode+1;
1083 do next += GET(next, 1); while (*next == OP_ALT);
1084 RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
1085 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1086 ecode++;
1087 }
1088 break;
1089
1090 /* End of a group, repeated or non-repeating. */
1091
1092 case OP_KET:
1093 case OP_KETRMIN:
1094 case OP_KETRMAX:
1095 prev = ecode - GET(ecode, 1);
1096
1097 /* If this was a group that remembered the subject start, in order to break
1098 infinite repeats of empty string matches, retrieve the subject start from
1099 the chain. Otherwise, set it NULL. */
1100
1101 if (*prev >= OP_SBRA)
1102 {
1103 saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1104 eptrb = eptrb->epb_prev; /* Backup to previous group */
1105 }
1106 else saved_eptr = NULL;
1107
1108 /* If we are at the end of an assertion group, stop matching and return
1109 MATCH_MATCH, but record the current high water mark for use by positive
1110 assertions. Do this also for the "once" (atomic) groups. */
1111
1112 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1113 *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1114 *prev == OP_ONCE)
1115 {
1116 md->end_match_ptr = eptr; /* For ONCE */
1117 md->end_offset_top = offset_top;
1118 RRETURN(MATCH_MATCH);
1119 }
1120
1121 /* For capturing groups we have to check the group number back at the start
1122 and if necessary complete handling an extraction by setting the offsets and
1123 bumping the high water mark. Note that whole-pattern recursion is coded as
1124 a recurse into group 0, so it won't be picked up here. Instead, we catch it
1125 when the OP_END is reached. Other recursion is handled here. */
1126
1127 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1128 {
1129 number = GET2(prev, 1+LINK_SIZE);
1130 offset = number << 1;
1131
1132 #ifdef DEBUG
1133 printf("end bracket %d", number);
1134 printf("\n");
1135 #endif
1136
1137 md->capture_last = number;
1138 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1139 {
1140 md->offset_vector[offset] =
1141 md->offset_vector[md->offset_end - number];
1142 md->offset_vector[offset+1] = eptr - md->start_subject;
1143 if (offset_top <= offset) offset_top = offset + 2;
1144 }
1145
1146 /* Handle a recursively called group. Restore the offsets
1147 appropriately and continue from after the call. */
1148
1149 if (md->recursive != NULL && md->recursive->group_num == number)
1150 {
1151 recursion_info *rec = md->recursive;
1152 DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1153 md->recursive = rec->prevrec;
1154 md->start_match = rec->save_start;
1155 memcpy(md->offset_vector, rec->offset_save,
1156 rec->saved_max * sizeof(int));
1157 ecode = rec->after_call;
1158 ims = original_ims;
1159 break;
1160 }
1161 }
1162
1163 /* For both capturing and non-capturing groups, reset the value of the ims
1164 flags, in case they got changed during the group. */
1165
1166 ims = original_ims;
1167 DPRINTF(("ims reset to %02lx\n", ims));
1168
1169 /* For a non-repeating ket, just continue at this level. This also
1170 happens for a repeating ket if no characters were matched in the group.
1171 This is the forcible breaking of infinite loops as implemented in Perl
1172 5.005. If there is an options reset, it will get obeyed in the normal
1173 course of events. */
1174
1175 if (*ecode == OP_KET || eptr == saved_eptr)
1176 {
1177 ecode += 1 + LINK_SIZE;
1178 break;
1179 }
1180
1181 /* The repeating kets try the rest of the pattern or restart from the
1182 preceding bracket, in the appropriate order. In the second case, we can use
1183 tail recursion to avoid using another stack frame. */
1184
1185 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1186
1187 if (*ecode == OP_KETRMIN)
1188 {
1189 RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
1190 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1191 ecode = prev;
1192 flags |= match_tail_recursed;
1193 goto TAIL_RECURSE;
1194 }
1195 else /* OP_KETRMAX */
1196 {
1197 RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags);
1198 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1199 ecode += 1 + LINK_SIZE;
1200 flags = match_tail_recursed;
1201 goto TAIL_RECURSE;
1202 }
1203 /* Control never gets here */
1204
1205 /* Start of subject unless notbol, or after internal newline if multiline */
1206
1207 case OP_CIRC:
1208 if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1209 if ((ims & PCRE_MULTILINE) != 0)
1210 {
1211 if (eptr != md->start_subject &&
1212 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1213 RRETURN(MATCH_NOMATCH);
1214 ecode++;
1215 break;
1216 }
1217 /* ... else fall through */
1218
1219 /* Start of subject assertion */
1220
1221 case OP_SOD:
1222 if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1223 ecode++;
1224 break;
1225
1226 /* Start of match assertion */
1227
1228 case OP_SOM:
1229 if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1230 ecode++;
1231 break;
1232
1233 /* Assert before internal newline if multiline, or before a terminating
1234 newline unless endonly is set, else end of subject unless noteol is set. */
1235
1236 case OP_DOLL:
1237 if ((ims & PCRE_MULTILINE) != 0)
1238 {
1239 if (eptr < md->end_subject)
1240 { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1241 else
1242 { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1243 ecode++;
1244 break;
1245 }
1246 else
1247 {
1248 if (md->noteol) RRETURN(MATCH_NOMATCH);
1249 if (!md->endonly)
1250 {
1251 if (eptr != md->end_subject &&
1252 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1253 RRETURN(MATCH_NOMATCH);
1254 ecode++;
1255 break;
1256 }
1257 }
1258 /* ... else fall through for endonly */
1259
1260 /* End of subject assertion (\z) */
1261
1262 case OP_EOD:
1263 if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1264 ecode++;
1265 break;
1266
1267 /* End of subject or ending \n assertion (\Z) */
1268
1269 case OP_EODN:
1270 if (eptr != md->end_subject &&
1271 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1272 RRETURN(MATCH_NOMATCH);
1273 ecode++;
1274 break;
1275
1276 /* Word boundary assertions */
1277
1278 case OP_NOT_WORD_BOUNDARY:
1279 case OP_WORD_BOUNDARY:
1280 {
1281
1282 /* Find out if the previous and current characters are "word" characters.
1283 It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1284 be "non-word" characters. */
1285
1286 #ifdef SUPPORT_UTF8
1287 if (utf8)
1288 {
1289 if (eptr == md->start_subject) prev_is_word = FALSE; else
1290 {
1291 const uschar *lastptr = eptr - 1;
1292 while((*lastptr & 0xc0) == 0x80) lastptr--;
1293 GETCHAR(c, lastptr);
1294 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1295 }
1296 if (eptr >= md->end_subject) cur_is_word = FALSE; else
1297 {
1298 GETCHAR(c, eptr);
1299 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1300 }
1301 }
1302 else
1303 #endif
1304
1305 /* More streamlined when not in UTF-8 mode */
1306
1307 {
1308 prev_is_word = (eptr != md->start_subject) &&
1309 ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1310 cur_is_word = (eptr < md->end_subject) &&
1311 ((md->ctypes[*eptr] & ctype_word) != 0);
1312 }
1313
1314 /* Now see if the situation is what we want */
1315
1316 if ((*ecode++ == OP_WORD_BOUNDARY)?
1317 cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1318 RRETURN(MATCH_NOMATCH);
1319 }
1320 break;
1321
1322 /* Match a single character type; inline for speed */
1323
1324 case OP_ANY:
1325 if ((ims & PCRE_DOTALL) == 0)
1326 {
1327 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1328 }
1329 if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1330 if (utf8)
1331 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1332 ecode++;
1333 break;
1334
1335 /* Match a single byte, even in UTF-8 mode. This opcode really does match
1336 any byte, even newline, independent of the setting of PCRE_DOTALL. */
1337
1338 case OP_ANYBYTE:
1339 if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1340 ecode++;
1341 break;
1342
1343 case OP_NOT_DIGIT:
1344 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1345 GETCHARINCTEST(c, eptr);
1346 if (
1347 #ifdef SUPPORT_UTF8
1348 c < 256 &&
1349 #endif
1350 (md->ctypes[c] & ctype_digit) != 0
1351 )
1352 RRETURN(MATCH_NOMATCH);
1353 ecode++;
1354 break;
1355
1356 case OP_DIGIT:
1357 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1358 GETCHARINCTEST(c, eptr);
1359 if (
1360 #ifdef SUPPORT_UTF8
1361 c >= 256 ||
1362 #endif
1363 (md->ctypes[c] & ctype_digit) == 0
1364 )
1365 RRETURN(MATCH_NOMATCH);
1366 ecode++;
1367 break;
1368
1369 case OP_NOT_WHITESPACE:
1370 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1371 GETCHARINCTEST(c, eptr);
1372 if (
1373 #ifdef SUPPORT_UTF8
1374 c < 256 &&
1375 #endif
1376 (md->ctypes[c] & ctype_space) != 0
1377 )
1378 RRETURN(MATCH_NOMATCH);
1379 ecode++;
1380 break;
1381
1382 case OP_WHITESPACE:
1383 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1384 GETCHARINCTEST(c, eptr);
1385 if (
1386 #ifdef SUPPORT_UTF8
1387 c >= 256 ||
1388 #endif
1389 (md->ctypes[c] & ctype_space) == 0
1390 )
1391 RRETURN(MATCH_NOMATCH);
1392 ecode++;
1393 break;
1394
1395 case OP_NOT_WORDCHAR:
1396 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1397 GETCHARINCTEST(c, eptr);
1398 if (
1399 #ifdef SUPPORT_UTF8
1400 c < 256 &&
1401 #endif
1402 (md->ctypes[c] & ctype_word) != 0
1403 )
1404 RRETURN(MATCH_NOMATCH);
1405 ecode++;
1406 break;
1407
1408 case OP_WORDCHAR:
1409 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1410 GETCHARINCTEST(c, eptr);
1411 if (
1412 #ifdef SUPPORT_UTF8
1413 c >= 256 ||
1414 #endif
1415 (md->ctypes[c] & ctype_word) == 0
1416 )
1417 RRETURN(MATCH_NOMATCH);
1418 ecode++;
1419 break;
1420
1421 case OP_ANYNL:
1422 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1423 GETCHARINCTEST(c, eptr);
1424 switch(c)
1425 {
1426 default: RRETURN(MATCH_NOMATCH);
1427 case 0x000d:
1428 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1429 break;
1430 case 0x000a:
1431 case 0x000b:
1432 case 0x000c:
1433 case 0x0085:
1434 case 0x2028:
1435 case 0x2029:
1436 break;
1437 }
1438 ecode++;
1439 break;
1440
1441 #ifdef SUPPORT_UCP
1442 /* Check the next character by Unicode property. We will get here only
1443 if the support is in the binary; otherwise a compile-time error occurs. */
1444
1445 case OP_PROP:
1446 case OP_NOTPROP:
1447 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1448 GETCHARINCTEST(c, eptr);
1449 {
1450 int chartype, script;
1451 int category = _pcre_ucp_findprop(c, &chartype, &script);
1452
1453 switch(ecode[1])
1454 {
1455 case PT_ANY:
1456 if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1457 break;
1458
1459 case PT_LAMP:
1460 if ((chartype == ucp_Lu ||
1461 chartype == ucp_Ll ||
1462 chartype == ucp_Lt) == (op == OP_NOTPROP))
1463 RRETURN(MATCH_NOMATCH);
1464 break;
1465
1466 case PT_GC:
1467 if ((ecode[2] != category) == (op == OP_PROP))
1468 RRETURN(MATCH_NOMATCH);
1469 break;
1470
1471 case PT_PC:
1472 if ((ecode[2] != chartype) == (op == OP_PROP))
1473 RRETURN(MATCH_NOMATCH);
1474 break;
1475
1476 case PT_SC:
1477 if ((ecode[2] != script) == (op == OP_PROP))
1478 RRETURN(MATCH_NOMATCH);
1479 break;
1480
1481 default:
1482 RRETURN(PCRE_ERROR_INTERNAL);
1483 }
1484
1485 ecode += 3;
1486 }
1487 break;
1488
1489 /* Match an extended Unicode sequence. We will get here only if the support
1490 is in the binary; otherwise a compile-time error occurs. */
1491
1492 case OP_EXTUNI:
1493 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1494 GETCHARINCTEST(c, eptr);
1495 {
1496 int chartype, script;
1497 int category = _pcre_ucp_findprop(c, &chartype, &script);
1498 if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1499 while (eptr < md->end_subject)
1500 {
1501 int len = 1;
1502 if (!utf8) c = *eptr; else
1503 {
1504 GETCHARLEN(c, eptr, len);
1505 }
1506 category = _pcre_ucp_findprop(c, &chartype, &script);
1507 if (category != ucp_M) break;
1508 eptr += len;
1509 }
1510 }
1511 ecode++;
1512 break;
1513 #endif
1514
1515
1516 /* Match a back reference, possibly repeatedly. Look past the end of the
1517 item to see if there is repeat information following. The code is similar
1518 to that for character classes, but repeated for efficiency. Then obey
1519 similar code to character type repeats - written out again for speed.
1520 However, if the referenced string is the empty string, always treat
1521 it as matched, any number of times (otherwise there could be infinite
1522 loops). */
1523
1524 case OP_REF:
1525 {
1526 offset = GET2(ecode, 1) << 1; /* Doubled ref number */
1527 ecode += 3; /* Advance past item */
1528
1529 /* If the reference is unset, set the length to be longer than the amount
1530 of subject left; this ensures that every attempt at a match fails. We
1531 can't just fail here, because of the possibility of quantifiers with zero
1532 minima. */
1533
1534 length = (offset >= offset_top || md->offset_vector[offset] < 0)?
1535 md->end_subject - eptr + 1 :
1536 md->offset_vector[offset+1] - md->offset_vector[offset];
1537
1538 /* Set up for repetition, or handle the non-repeated case */
1539
1540 switch (*ecode)
1541 {
1542 case OP_CRSTAR:
1543 case OP_CRMINSTAR:
1544 case OP_CRPLUS:
1545 case OP_CRMINPLUS:
1546 case OP_CRQUERY:
1547 case OP_CRMINQUERY:
1548 c = *ecode++ - OP_CRSTAR;
1549 minimize = (c & 1) != 0;
1550 min = rep_min[c]; /* Pick up values from tables; */
1551 max = rep_max[c]; /* zero for max => infinity */
1552 if (max == 0) max = INT_MAX;
1553 break;
1554
1555 case OP_CRRANGE:
1556 case OP_CRMINRANGE:
1557 minimize = (*ecode == OP_CRMINRANGE);
1558 min = GET2(ecode, 1);
1559 max = GET2(ecode, 3);
1560 if (max == 0) max = INT_MAX;
1561 ecode += 5;
1562 break;
1563
1564 default: /* No repeat follows */
1565 if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1566 eptr += length;
1567 continue; /* With the main loop */
1568 }
1569
1570 /* If the length of the reference is zero, just continue with the
1571 main loop. */
1572
1573 if (length == 0) continue;
1574
1575 /* First, ensure the minimum number of matches are present. We get back
1576 the length of the reference string explicitly rather than passing the
1577 address of eptr, so that eptr can be a register variable. */
1578
1579 for (i = 1; i <= min; i++)
1580 {
1581 if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1582 eptr += length;
1583 }
1584
1585 /* If min = max, continue at the same level without recursion.
1586 They are not both allowed to be zero. */
1587
1588 if (min == max) continue;
1589
1590 /* If minimizing, keep trying and advancing the pointer */
1591
1592 if (minimize)
1593 {
1594 for (fi = min;; fi++)
1595 {
1596 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1597 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1598 if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1599 RRETURN(MATCH_NOMATCH);
1600 eptr += length;
1601 }
1602 /* Control never gets here */
1603 }
1604
1605 /* If maximizing, find the longest string and work backwards */
1606
1607 else
1608 {
1609 pp = eptr;
1610 for (i = min; i < max; i++)
1611 {
1612 if (!match_ref(offset, eptr, length, md, ims)) break;
1613 eptr += length;
1614 }
1615 while (eptr >= pp)
1616 {
1617 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1618 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1619 eptr -= length;
1620 }
1621 RRETURN(MATCH_NOMATCH);
1622 }
1623 }
1624 /* Control never gets here */
1625
1626
1627
1628 /* Match a bit-mapped character class, possibly repeatedly. This op code is
1629 used when all the characters in the class have values in the range 0-255,
1630 and either the matching is caseful, or the characters are in the range
1631 0-127 when UTF-8 processing is enabled. The only difference between
1632 OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1633 encountered.
1634
1635 First, look past the end of the item to see if there is repeat information
1636 following. Then obey similar code to character type repeats - written out
1637 again for speed. */
1638
1639 case OP_NCLASS:
1640 case OP_CLASS:
1641 {
1642 data = ecode + 1; /* Save for matching */
1643 ecode += 33; /* Advance past the item */
1644
1645 switch (*ecode)
1646 {
1647 case OP_CRSTAR:
1648 case OP_CRMINSTAR:
1649 case OP_CRPLUS:
1650 case OP_CRMINPLUS:
1651 case OP_CRQUERY:
1652 case OP_CRMINQUERY:
1653 c = *ecode++ - OP_CRSTAR;
1654 minimize = (c & 1) != 0;
1655 min = rep_min[c]; /* Pick up values from tables; */
1656 max = rep_max[c]; /* zero for max => infinity */
1657 if (max == 0) max = INT_MAX;
1658 break;
1659
1660 case OP_CRRANGE:
1661 case OP_CRMINRANGE:
1662 minimize = (*ecode == OP_CRMINRANGE);
1663 min = GET2(ecode, 1);
1664 max = GET2(ecode, 3);
1665 if (max == 0) max = INT_MAX;
1666 ecode += 5;
1667 break;
1668
1669 default: /* No repeat follows */
1670 min = max = 1;
1671 break;
1672 }
1673
1674 /* First, ensure the minimum number of matches are present. */
1675
1676 #ifdef SUPPORT_UTF8
1677 /* UTF-8 mode */
1678 if (utf8)
1679 {
1680 for (i = 1; i <= min; i++)
1681 {
1682 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1683 GETCHARINC(c, eptr);
1684 if (c > 255)
1685 {
1686 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1687 }
1688 else
1689 {
1690 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1691 }
1692 }
1693 }
1694 else
1695 #endif
1696 /* Not UTF-8 mode */
1697 {
1698 for (i = 1; i <= min; i++)
1699 {
1700 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1701 c = *eptr++;
1702 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1703 }
1704 }
1705
1706 /* If max == min we can continue with the main loop without the
1707 need to recurse. */
1708
1709 if (min == max) continue;
1710
1711 /* If minimizing, keep testing the rest of the expression and advancing
1712 the pointer while it matches the class. */
1713
1714 if (minimize)
1715 {
1716 #ifdef SUPPORT_UTF8
1717 /* UTF-8 mode */
1718 if (utf8)
1719 {
1720 for (fi = min;; fi++)
1721 {
1722 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1723 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1724 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1725 GETCHARINC(c, eptr);
1726 if (c > 255)
1727 {
1728 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1729 }
1730 else
1731 {
1732 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1733 }
1734 }
1735 }
1736 else
1737 #endif
1738 /* Not UTF-8 mode */
1739 {
1740 for (fi = min;; fi++)
1741 {
1742 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1743 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1744 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1745 c = *eptr++;
1746 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1747 }
1748 }
1749 /* Control never gets here */
1750 }
1751
1752 /* If maximizing, find the longest possible run, then work backwards. */
1753
1754 else
1755 {
1756 pp = eptr;
1757
1758 #ifdef SUPPORT_UTF8
1759 /* UTF-8 mode */
1760 if (utf8)
1761 {
1762 for (i = min; i < max; i++)
1763 {
1764 int len = 1;
1765 if (eptr >= md->end_subject) break;
1766 GETCHARLEN(c, eptr, len);
1767 if (c > 255)
1768 {
1769 if (op == OP_CLASS) break;
1770 }
1771 else
1772 {
1773 if ((data[c/8] & (1 << (c&7))) == 0) break;
1774 }
1775 eptr += len;
1776 }
1777 for (;;)
1778 {
1779 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1780 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1781 if (eptr-- == pp) break; /* Stop if tried at original pos */
1782 BACKCHAR(eptr);
1783 }
1784 }
1785 else
1786 #endif
1787 /* Not UTF-8 mode */
1788 {
1789 for (i = min; i < max; i++)
1790 {
1791 if (eptr >= md->end_subject) break;
1792 c = *eptr;
1793 if ((data[c/8] & (1 << (c&7))) == 0) break;
1794 eptr++;
1795 }
1796 while (eptr >= pp)
1797 {
1798 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1799 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1800 eptr--;
1801 }
1802 }
1803
1804 RRETURN(MATCH_NOMATCH);
1805 }
1806 }
1807 /* Control never gets here */
1808
1809
1810 /* Match an extended character class. This opcode is encountered only
1811 in UTF-8 mode, because that's the only time it is compiled. */
1812
1813 #ifdef SUPPORT_UTF8
1814 case OP_XCLASS:
1815 {
1816 data = ecode + 1 + LINK_SIZE; /* Save for matching */
1817 ecode += GET(ecode, 1); /* Advance past the item */
1818
1819 switch (*ecode)
1820 {
1821 case OP_CRSTAR:
1822 case OP_CRMINSTAR:
1823 case OP_CRPLUS:
1824 case OP_CRMINPLUS:
1825 case OP_CRQUERY:
1826 case OP_CRMINQUERY:
1827 c = *ecode++ - OP_CRSTAR;
1828 minimize = (c & 1) != 0;
1829 min = rep_min[c]; /* Pick up values from tables; */
1830 max = rep_max[c]; /* zero for max => infinity */
1831 if (max == 0) max = INT_MAX;
1832 break;
1833
1834 case OP_CRRANGE:
1835 case OP_CRMINRANGE:
1836 minimize = (*ecode == OP_CRMINRANGE);
1837 min = GET2(ecode, 1);
1838 max = GET2(ecode, 3);
1839 if (max == 0) max = INT_MAX;
1840 ecode += 5;
1841 break;
1842
1843 default: /* No repeat follows */
1844 min = max = 1;
1845 break;
1846 }
1847
1848 /* First, ensure the minimum number of matches are present. */
1849
1850 for (i = 1; i <= min; i++)
1851 {
1852 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1853 GETCHARINC(c, eptr);
1854 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
1855 }
1856
1857 /* If max == min we can continue with the main loop without the
1858 need to recurse. */
1859
1860 if (min == max) continue;
1861
1862 /* If minimizing, keep testing the rest of the expression and advancing
1863 the pointer while it matches the class. */
1864
1865 if (minimize)
1866 {
1867 for (fi = min;; fi++)
1868 {
1869 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1870 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1871 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1872 GETCHARINC(c, eptr);
1873 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
1874 }
1875 /* Control never gets here */
1876 }
1877
1878 /* If maximizing, find the longest possible run, then work backwards. */
1879
1880 else
1881 {
1882 pp = eptr;
1883 for (i = min; i < max; i++)
1884 {
1885 int len = 1;
1886 if (eptr >= md->end_subject) break;
1887 GETCHARLEN(c, eptr, len);
1888 if (!_pcre_xclass(c, data)) break;
1889 eptr += len;
1890 }
1891 for(;;)
1892 {
1893 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1894 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1895 if (eptr-- == pp) break; /* Stop if tried at original pos */
1896 BACKCHAR(eptr)
1897 }
1898 RRETURN(MATCH_NOMATCH);
1899 }
1900
1901 /* Control never gets here */
1902 }
1903 #endif /* End of XCLASS */
1904
1905 /* Match a single character, casefully */
1906
1907 case OP_CHAR:
1908 #ifdef SUPPORT_UTF8
1909 if (utf8)
1910 {
1911 length = 1;
1912 ecode++;
1913 GETCHARLEN(fc, ecode, length);
1914 if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
1915 while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
1916 }
1917 else
1918 #endif
1919
1920 /* Non-UTF-8 mode */
1921 {
1922 if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
1923 if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
1924 ecode += 2;
1925 }
1926 break;
1927
1928 /* Match a single character, caselessly */
1929
1930 case OP_CHARNC:
1931 #ifdef SUPPORT_UTF8
1932 if (utf8)
1933 {
1934 length = 1;
1935 ecode++;
1936 GETCHARLEN(fc, ecode, length);
1937
1938 if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
1939
1940 /* If the pattern character's value is < 128, we have only one byte, and
1941 can use the fast lookup table. */
1942
1943 if (fc < 128)
1944 {
1945 if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
1946 }
1947
1948 /* Otherwise we must pick up the subject character */
1949
1950 else
1951 {
1952 unsigned int dc;
1953 GETCHARINC(dc, eptr);
1954 ecode += length;
1955
1956 /* If we have Unicode property support, we can use it to test the other
1957 case of the character, if there is one. */
1958
1959 if (fc != dc)
1960 {
1961 #ifdef SUPPORT_UCP
1962 if (dc != _pcre_ucp_othercase(fc))
1963 #endif
1964 RRETURN(MATCH_NOMATCH);
1965 }
1966 }
1967 }
1968 else
1969 #endif /* SUPPORT_UTF8 */
1970
1971 /* Non-UTF-8 mode */
1972 {
1973 if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
1974 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
1975 ecode += 2;
1976 }
1977 break;
1978
1979 /* Match a single character repeatedly. */
1980
1981 case OP_EXACT:
1982 min = max = GET2(ecode, 1);
1983 ecode += 3;
1984 goto REPEATCHAR;
1985
1986 case OP_POSUPTO:
1987 possessive = TRUE;
1988 /* Fall through */
1989
1990 case OP_UPTO:
1991 case OP_MINUPTO:
1992 min = 0;
1993 max = GET2(ecode, 1);
1994 minimize = *ecode == OP_MINUPTO;
1995 ecode += 3;
1996 goto REPEATCHAR;
1997
1998 case OP_POSSTAR:
1999 possessive = TRUE;
2000 min = 0;
2001 max = INT_MAX;
2002 ecode++;
2003 goto REPEATCHAR;
2004
2005 case OP_POSPLUS:
2006 possessive = TRUE;
2007 min = 1;
2008 max = INT_MAX;
2009 ecode++;
2010 goto REPEATCHAR;
2011
2012 case OP_POSQUERY:
2013 possessive = TRUE;
2014 min = 0;
2015 max = 1;
2016 ecode++;
2017 goto REPEATCHAR;
2018
2019 case OP_STAR:
2020 case OP_MINSTAR:
2021 case OP_PLUS:
2022 case OP_MINPLUS:
2023 case OP_QUERY:
2024 case OP_MINQUERY:
2025 c = *ecode++ - OP_STAR;
2026 minimize = (c & 1) != 0;
2027 min = rep_min[c]; /* Pick up values from tables; */
2028 max = rep_max[c]; /* zero for max => infinity */
2029 if (max == 0) max = INT_MAX;
2030
2031 /* Common code for all repeated single-character matches. We can give
2032 up quickly if there are fewer than the minimum number of characters left in
2033 the subject. */
2034
2035 REPEATCHAR:
2036 #ifdef SUPPORT_UTF8
2037 if (utf8)
2038 {
2039 length = 1;
2040 charptr = ecode;
2041 GETCHARLEN(fc, ecode, length);
2042 if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2043 ecode += length;
2044
2045 /* Handle multibyte character matching specially here. There is
2046 support for caseless matching if UCP support is present. */
2047
2048 if (length > 1)
2049 {
2050 int oclength = 0;
2051 uschar occhars[8];
2052
2053 #ifdef SUPPORT_UCP
2054 unsigned int othercase;
2055 if ((ims & PCRE_CASELESS) != 0 &&
2056 (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
2057 oclength = _pcre_ord2utf8(othercase, occhars);
2058 #endif /* SUPPORT_UCP */
2059
2060 for (i = 1; i <= min; i++)
2061 {
2062 if (memcmp(eptr, charptr, length) == 0) eptr += length;
2063 /* Need braces because of following else */
2064 else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2065 else
2066 {
2067 if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2068 eptr += oclength;
2069 }
2070 }
2071
2072 if (min == max) continue;
2073
2074 if (minimize)
2075 {
2076 for (fi = min;; fi++)
2077 {
2078 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2079 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2080 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2081 if (memcmp(eptr, charptr, length) == 0) eptr += length;
2082 /* Need braces because of following else */
2083 else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2084 else
2085 {
2086 if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2087 eptr += oclength;
2088 }
2089 }
2090 /* Control never gets here */
2091 }
2092
2093 else /* Maximize */
2094 {
2095 pp = eptr;
2096 for (i = min; i < max; i++)
2097 {
2098 if (eptr > md->end_subject - length) break;
2099 if (memcmp(eptr, charptr, length) == 0) eptr += length;
2100 else if (oclength == 0) break;
2101 else
2102 {
2103 if (memcmp(eptr, occhars, oclength) != 0) break;
2104 eptr += oclength;
2105 }
2106 }
2107
2108 if (possessive) continue;
2109 while (eptr >= pp)
2110 {
2111 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2112 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2113 eptr -= length;
2114 }
2115 RRETURN(MATCH_NOMATCH);
2116 }
2117 /* Control never gets here */
2118 }
2119
2120 /* If the length of a UTF-8 character is 1, we fall through here, and
2121 obey the code as for non-UTF-8 characters below, though in this case the
2122 value of fc will always be < 128. */
2123 }
2124 else
2125 #endif /* SUPPORT_UTF8 */
2126
2127 /* When not in UTF-8 mode, load a single-byte character. */
2128 {
2129 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2130 fc = *ecode++;
2131 }
2132
2133 /* The value of fc at this point is always less than 256, though we may or
2134 may not be in UTF-8 mode. The code is duplicated for the caseless and
2135 caseful cases, for speed, since matching characters is likely to be quite
2136 common. First, ensure the minimum number of matches are present. If min =
2137 max, continue at the same level without recursing. Otherwise, if
2138 minimizing, keep trying the rest of the expression and advancing one
2139 matching character if failing, up to the maximum. Alternatively, if
2140 maximizing, find the maximum number of characters and work backwards. */
2141
2142 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2143 max, eptr));
2144
2145 if ((ims & PCRE_CASELESS) != 0)
2146 {
2147 fc = md->lcc[fc];
2148 for (i = 1; i <= min; i++)
2149 if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2150 if (min == max) continue;
2151 if (minimize)
2152 {
2153 for (fi = min;; fi++)
2154 {
2155 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2156 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2157 if (fi >= max || eptr >= md->end_subject ||
2158 fc != md->lcc[*eptr++])
2159 RRETURN(MATCH_NOMATCH);
2160 }
2161 /* Control never gets here */
2162 }
2163 else /* Maximize */
2164 {
2165 pp = eptr;
2166 for (i = min; i < max; i++)
2167 {
2168 if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2169 eptr++;
2170 }
2171 if (possessive) continue;
2172 while (eptr >= pp)
2173 {
2174 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2175 eptr--;
2176 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2177 }
2178 RRETURN(MATCH_NOMATCH);
2179 }
2180 /* Control never gets here */
2181 }
2182
2183 /* Caseful comparisons (includes all multi-byte characters) */
2184
2185 else
2186 {
2187 for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2188 if (min == max) continue;
2189 if (minimize)
2190 {
2191 for (fi = min;; fi++)
2192 {
2193 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2194 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2195 if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2196 RRETURN(MATCH_NOMATCH);
2197 }
2198 /* Control never gets here */
2199 }
2200 else /* Maximize */
2201 {
2202 pp = eptr;
2203 for (i = min; i < max; i++)
2204 {
2205 if (eptr >= md->end_subject || fc != *eptr) break;
2206 eptr++;
2207 }
2208 if (possessive) continue;
2209 while (eptr >= pp)
2210 {
2211 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2212 eptr--;
2213 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2214 }
2215 RRETURN(MATCH_NOMATCH);
2216 }
2217 }
2218 /* Control never gets here */
2219
2220 /* Match a negated single one-byte character. The character we are
2221 checking can be multibyte. */
2222
2223 case OP_NOT:
2224 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2225 ecode++;
2226 GETCHARINCTEST(c, eptr);
2227 if ((ims & PCRE_CASELESS) != 0)
2228 {
2229 #ifdef SUPPORT_UTF8
2230 if (c < 256)
2231 #endif
2232 c = md->lcc[c];
2233 if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2234 }
2235 else
2236 {
2237 if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2238 }
2239 break;
2240
2241 /* Match a negated single one-byte character repeatedly. This is almost a
2242 repeat of the code for a repeated single character, but I haven't found a
2243 nice way of commoning these up that doesn't require a test of the
2244 positive/negative option for each character match. Maybe that wouldn't add
2245 very much to the time taken, but character matching *is* what this is all
2246 about... */
2247
2248 case OP_NOTEXACT:
2249 min = max = GET2(ecode, 1);
2250 ecode += 3;
2251 goto REPEATNOTCHAR;
2252
2253 case OP_NOTUPTO:
2254 case OP_NOTMINUPTO:
2255 min = 0;
2256 max = GET2(ecode, 1);
2257 minimize = *ecode == OP_NOTMINUPTO;
2258 ecode += 3;
2259 goto REPEATNOTCHAR;
2260
2261 case OP_NOTPOSSTAR:
2262 possessive = TRUE;
2263 min = 0;
2264 max = INT_MAX;
2265 ecode++;
2266 goto REPEATNOTCHAR;
2267
2268 case OP_NOTPOSPLUS:
2269 possessive = TRUE;
2270 min = 1;
2271 max = INT_MAX;
2272 ecode++;
2273 goto REPEATNOTCHAR;
2274
2275 case OP_NOTPOSQUERY:
2276 possessive = TRUE;
2277 min = 0;
2278 max = 1;
2279 ecode++;
2280 goto REPEATNOTCHAR;
2281
2282 case OP_NOTPOSUPTO:
2283 possessive = TRUE;
2284 min = 0;
2285 max = GET2(ecode, 1);
2286 ecode += 3;
2287 goto REPEATNOTCHAR;
2288
2289 case OP_NOTSTAR:
2290 case OP_NOTMINSTAR:
2291 case OP_NOTPLUS:
2292 case OP_NOTMINPLUS:
2293 case OP_NOTQUERY:
2294 case OP_NOTMINQUERY:
2295 c = *ecode++ - OP_NOTSTAR;
2296 minimize = (c & 1) != 0;
2297 min = rep_min[c]; /* Pick up values from tables; */
2298 max = rep_max[c]; /* zero for max => infinity */
2299 if (max == 0) max = INT_MAX;
2300
2301 /* Common code for all repeated single-byte matches. We can give up quickly
2302 if there are fewer than the minimum number of bytes left in the
2303 subject. */
2304
2305 REPEATNOTCHAR:
2306 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2307 fc = *ecode++;
2308
2309 /* The code is duplicated for the caseless and caseful cases, for speed,
2310 since matching characters is likely to be quite common. First, ensure the
2311 minimum number of matches are present. If min = max, continue at the same
2312 level without recursing. Otherwise, if minimizing, keep trying the rest of
2313 the expression and advancing one matching character if failing, up to the
2314 maximum. Alternatively, if maximizing, find the maximum number of
2315 characters and work backwards. */
2316
2317 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2318 max, eptr));
2319
2320 if ((ims & PCRE_CASELESS) != 0)
2321 {
2322 fc = md->lcc[fc];
2323
2324 #ifdef SUPPORT_UTF8
2325 /* UTF-8 mode */
2326 if (utf8)
2327 {
2328 register unsigned int d;
2329 for (i = 1; i <= min; i++)
2330 {
2331 GETCHARINC(d, eptr);
2332 if (d < 256) d = md->lcc[d];
2333 if (fc == d) RRETURN(MATCH_NOMATCH);
2334 }
2335 }
2336 else
2337 #endif
2338
2339 /* Not UTF-8 mode */
2340 {
2341 for (i = 1; i <= min; i++)
2342 if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2343 }
2344
2345 if (min == max) continue;
2346
2347 if (minimize)
2348 {
2349 #ifdef SUPPORT_UTF8
2350 /* UTF-8 mode */
2351 if (utf8)
2352 {
2353 register unsigned int d;
2354 for (fi = min;; fi++)
2355 {
2356 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2357 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2358 GETCHARINC(d, eptr);
2359 if (d < 256) d = md->lcc[d];
2360 if (fi >= max || eptr >= md->end_subject || fc == d)
2361 RRETURN(MATCH_NOMATCH);
2362 }
2363 }
2364 else
2365 #endif
2366 /* Not UTF-8 mode */
2367 {
2368 for (fi = min;; fi++)
2369 {
2370 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2371 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2372 if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2373 RRETURN(MATCH_NOMATCH);
2374 }
2375 }
2376 /* Control never gets here */
2377 }
2378
2379 /* Maximize case */
2380
2381 else
2382 {
2383 pp = eptr;
2384
2385 #ifdef SUPPORT_UTF8
2386 /* UTF-8 mode */
2387 if (utf8)
2388 {
2389 register unsigned int d;
2390 for (i = min; i < max; i++)
2391 {
2392 int len = 1;
2393 if (eptr >= md->end_subject) break;
2394 GETCHARLEN(d, eptr, len);
2395 if (d < 256) d = md->lcc[d];
2396 if (fc == d) break;
2397 eptr += len;
2398 }
2399 if (possessive) continue;
2400 for(;;)
2401 {
2402 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2403 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2404 if (eptr-- == pp) break; /* Stop if tried at original pos */
2405 BACKCHAR(eptr);
2406 }
2407 }
2408 else
2409 #endif
2410 /* Not UTF-8 mode */
2411 {
2412 for (i = min; i < max; i++)
2413 {
2414 if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2415 eptr++;
2416 }
2417 if (possessive) continue;
2418 while (eptr >= pp)
2419 {
2420 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2421 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2422 eptr--;
2423 }
2424 }
2425
2426 RRETURN(MATCH_NOMATCH);
2427 }
2428 /* Control never gets here */
2429 }
2430
2431 /* Caseful comparisons */
2432
2433 else
2434 {
2435 #ifdef SUPPORT_UTF8
2436 /* UTF-8 mode */
2437 if (utf8)
2438 {
2439 register unsigned int d;
2440 for (i = 1; i <= min; i++)
2441 {
2442 GETCHARINC(d, eptr);
2443 if (fc == d) RRETURN(MATCH_NOMATCH);
2444 }
2445 }
2446 else
2447 #endif
2448 /* Not UTF-8 mode */
2449 {
2450 for (i = 1; i <= min; i++)
2451 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2452 }
2453
2454 if (min == max) continue;
2455
2456 if (minimize)
2457 {
2458 #ifdef SUPPORT_UTF8
2459 /* UTF-8 mode */
2460 if (utf8)
2461 {
2462 register unsigned int d;
2463 for (fi = min;; fi++)
2464 {
2465 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2466 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2467 GETCHARINC(d, eptr);
2468 if (fi >= max || eptr >= md->end_subject || fc == d)
2469 RRETURN(MATCH_NOMATCH);
2470 }
2471 }
2472 else
2473 #endif
2474 /* Not UTF-8 mode */
2475 {
2476 for (fi = min;; fi++)
2477 {
2478 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2479 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2480 if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2481 RRETURN(MATCH_NOMATCH);
2482 }
2483 }
2484 /* Control never gets here */
2485 }
2486
2487 /* Maximize case */
2488
2489 else
2490 {
2491 pp = eptr;
2492
2493 #ifdef SUPPORT_UTF8
2494 /* UTF-8 mode */
2495 if (utf8)
2496 {
2497 register unsigned int d;
2498 for (i = min; i < max; i++)
2499 {
2500 int len = 1;
2501 if (eptr >= md->end_subject) break;
2502 GETCHARLEN(d, eptr, len);
2503 if (fc == d) break;
2504 eptr += len;
2505 }
2506 if (possessive) continue;
2507 for(;;)
2508 {
2509 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2510 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2511 if (eptr-- == pp) break; /* Stop if tried at original pos */
2512 BACKCHAR(eptr);
2513 }
2514 }
2515 else
2516 #endif
2517 /* Not UTF-8 mode */
2518 {
2519 for (i = min; i < max; i++)
2520 {
2521 if (eptr >= md->end_subject || fc == *eptr) break;
2522 eptr++;
2523 }
2524 if (possessive) continue;
2525 while (eptr >= pp)
2526 {
2527 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2528 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2529 eptr--;
2530 }
2531 }
2532
2533 RRETURN(MATCH_NOMATCH);
2534 }
2535 }
2536 /* Control never gets here */
2537
2538 /* Match a single character type repeatedly; several different opcodes
2539 share code. This is very similar to the code for single characters, but we
2540 repeat it in the interests of efficiency. */
2541
2542 case OP_TYPEEXACT:
2543 min = max = GET2(ecode, 1);
2544 minimize = TRUE;
2545 ecode += 3;
2546 goto REPEATTYPE;
2547
2548 case OP_TYPEUPTO:
2549 case OP_TYPEMINUPTO:
2550 min = 0;
2551 max = GET2(ecode, 1);
2552 minimize = *ecode == OP_TYPEMINUPTO;
2553 ecode += 3;
2554 goto REPEATTYPE;
2555
2556 case OP_TYPEPOSSTAR:
2557 possessive = TRUE;
2558 min = 0;
2559 max = INT_MAX;
2560 ecode++;
2561 goto REPEATTYPE;
2562
2563 case OP_TYPEPOSPLUS:
2564 possessive = TRUE;
2565 min = 1;
2566 max = INT_MAX;
2567 ecode++;
2568 goto REPEATTYPE;
2569
2570 case OP_TYPEPOSQUERY:
2571 possessive = TRUE;
2572 min = 0;
2573 max = 1;
2574 ecode++;
2575 goto REPEATTYPE;
2576
2577 case OP_TYPEPOSUPTO:
2578 possessive = TRUE;
2579 min = 0;
2580 max = GET2(ecode, 1);
2581 ecode += 3;
2582 goto REPEATTYPE;
2583
2584 case OP_TYPESTAR:
2585 case OP_TYPEMINSTAR:
2586 case OP_TYPEPLUS:
2587 case OP_TYPEMINPLUS:
2588 case OP_TYPEQUERY:
2589 case OP_TYPEMINQUERY:
2590 c = *ecode++ - OP_TYPESTAR;
2591 minimize = (c & 1) != 0;
2592 min = rep_min[c]; /* Pick up values from tables; */
2593 max = rep_max[c]; /* zero for max => infinity */
2594 if (max == 0) max = INT_MAX;
2595
2596 /* Common code for all repeated single character type matches. Note that
2597 in UTF-8 mode, '.' matches a character of any length, but for the other
2598 character types, the valid characters are all one-byte long. */
2599
2600 REPEATTYPE:
2601 ctype = *ecode++; /* Code for the character type */
2602
2603 #ifdef SUPPORT_UCP
2604 if (ctype == OP_PROP || ctype == OP_NOTPROP)
2605 {
2606 prop_fail_result = ctype == OP_NOTPROP;
2607 prop_type = *ecode++;
2608 prop_value = *ecode++;
2609 }
2610 else prop_type = -1;
2611 #endif
2612
2613 /* First, ensure the minimum number of matches are present. Use inline
2614 code for maximizing the speed, and do the type test once at the start
2615 (i.e. keep it out of the loop). Also we can test that there are at least
2616 the minimum number of bytes before we start. This isn't as effective in
2617 UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
2618 is tidier. Also separate the UCP code, which can be the same for both UTF-8
2619 and single-bytes. */
2620
2621 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2622 if (min > 0)
2623 {
2624 #ifdef SUPPORT_UCP
2625 if (prop_type >= 0)
2626 {
2627 switch(prop_type)
2628 {
2629 case PT_ANY:
2630 if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2631 for (i = 1; i <= min; i++)
2632 {
2633 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2634 GETCHARINC(c, eptr);
2635 }
2636 break;
2637
2638 case PT_LAMP:
2639 for (i = 1; i <= min; i++)
2640 {
2641 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2642 GETCHARINC(c, eptr);
2643 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2644 if ((prop_chartype == ucp_Lu ||
2645 prop_chartype == ucp_Ll ||
2646 prop_chartype == ucp_Lt) == prop_fail_result)
2647 RRETURN(MATCH_NOMATCH);
2648 }
2649 break;
2650
2651 case PT_GC:
2652 for (i = 1; i <= min; i++)
2653 {
2654 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2655 GETCHARINC(c, eptr);
2656 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2657 if ((prop_category == prop_value) == prop_fail_result)
2658 RRETURN(MATCH_NOMATCH);
2659 }
2660 break;
2661
2662 case PT_PC:
2663 for (i = 1; i <= min; i++)
2664 {
2665 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2666 GETCHARINC(c, eptr);
2667 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2668 if ((prop_chartype == prop_value) == prop_fail_result)
2669 RRETURN(MATCH_NOMATCH);
2670 }
2671 break;
2672
2673 case PT_SC:
2674 for (i = 1; i <= min; i++)
2675 {
2676 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2677 GETCHARINC(c, eptr);
2678 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2679 if ((prop_script == prop_value) == prop_fail_result)
2680 RRETURN(MATCH_NOMATCH);
2681 }
2682 break;
2683
2684 default:
2685 RRETURN(PCRE_ERROR_INTERNAL);
2686 }
2687 }
2688
2689 /* Match extended Unicode sequences. We will get here only if the
2690 support is in the binary; otherwise a compile-time error occurs. */
2691
2692 else if (ctype == OP_EXTUNI)
2693 {
2694 for (i = 1; i <= min; i++)
2695 {
2696 GETCHARINCTEST(c, eptr);
2697 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2698 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2699 while (eptr < md->end_subject)
2700 {
2701 int len = 1;
2702 if (!utf8) c = *eptr; else
2703 {
2704 GETCHARLEN(c, eptr, len);
2705 }
2706 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2707 if (prop_category != ucp_M) break;
2708 eptr += len;
2709 }
2710 }
2711 }
2712
2713 else
2714 #endif /* SUPPORT_UCP */
2715
2716 /* Handle all other cases when the coding is UTF-8 */
2717
2718 #ifdef SUPPORT_UTF8
2719 if (utf8) switch(ctype)
2720 {
2721 case OP_ANY:
2722 for (i = 1; i <= min; i++)
2723 {
2724 if (eptr >= md->end_subject ||
2725 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2726 RRETURN(MATCH_NOMATCH);
2727 eptr++;
2728 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2729 }
2730 break;
2731
2732 case OP_ANYBYTE:
2733 eptr += min;
2734 break;
2735
2736 case OP_ANYNL:
2737 for (i = 1; i <= min; i++)
2738 {
2739 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2740 GETCHARINC(c, eptr);
2741 switch(c)
2742 {
2743 default: RRETURN(MATCH_NOMATCH);
2744 case 0x000d:
2745 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2746 break;
2747 case 0x000a:
2748 case 0x000b:
2749 case 0x000c:
2750 case 0x0085:
2751 case 0x2028:
2752 case 0x2029:
2753 break;
2754 }
2755 }
2756 break;
2757
2758 case OP_NOT_DIGIT:
2759 for (i = 1; i <= min; i++)
2760 {
2761 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2762 GETCHARINC(c, eptr);
2763 if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
2764 RRETURN(MATCH_NOMATCH);
2765 }
2766 break;
2767
2768 case OP_DIGIT:
2769 for (i = 1; i <= min; i++)
2770 {
2771 if (eptr >= md->end_subject ||
2772 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
2773 RRETURN(MATCH_NOMATCH);
2774 /* No need to skip more bytes - we know it's a 1-byte character */
2775 }
2776 break;
2777
2778 case OP_NOT_WHITESPACE:
2779 for (i = 1; i <= min; i++)
2780 {
2781 if (eptr >= md->end_subject ||
2782 (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))
2783 RRETURN(MATCH_NOMATCH);
2784 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2785 }
2786 break;
2787
2788 case OP_WHITESPACE:
2789 for (i = 1; i <= min; i++)
2790 {
2791 if (eptr >= md->end_subject ||
2792 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
2793 RRETURN(MATCH_NOMATCH);
2794 /* No need to skip more bytes - we know it's a 1-byte character */
2795 }
2796 break;
2797
2798 case OP_NOT_WORDCHAR:
2799 for (i = 1; i <= min; i++)
2800 {
2801 if (eptr >= md->end_subject ||
2802 (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))
2803 RRETURN(MATCH_NOMATCH);
2804 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2805 }
2806 break;
2807
2808 case OP_WORDCHAR:
2809 for (i = 1; i <= min; i++)
2810 {
2811 if (eptr >= md->end_subject ||
2812 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
2813 RRETURN(MATCH_NOMATCH);
2814 /* No need to skip more bytes - we know it's a 1-byte character */
2815 }
2816 break;
2817
2818 default:
2819 RRETURN(PCRE_ERROR_INTERNAL);
2820 } /* End switch(ctype) */
2821
2822 else
2823 #endif /* SUPPORT_UTF8 */
2824
2825 /* Code for the non-UTF-8 case for minimum matching of operators other
2826 than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
2827 number of bytes present, as this was tested above. */
2828
2829 switch(ctype)
2830 {
2831 case OP_ANY:
2832 if ((ims & PCRE_DOTALL) == 0)
2833 {
2834 for (i = 1; i <= min; i++)
2835 {
2836 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2837 eptr++;
2838 }
2839 }
2840 else eptr += min;
2841 break;
2842
2843 case OP_ANYBYTE:
2844 eptr += min;
2845 break;
2846
2847 /* Because of the CRLF case, we can't assume the minimum number of
2848 bytes are present in this case. */
2849
2850 case OP_ANYNL:
2851 for (i = 1; i <= min; i++)
2852 {
2853 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2854 switch(*eptr++)
2855 {
2856 default: RRETURN(MATCH_NOMATCH);
2857 case 0x000d:
2858 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2859 break;
2860 case 0x000a:
2861 case 0x000b:
2862 case 0x000c:
2863 case 0x0085:
2864 break;
2865 }
2866 }
2867 break;
2868
2869 case OP_NOT_DIGIT:
2870 for (i = 1; i <= min; i++)
2871 if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
2872 break;
2873
2874 case OP_DIGIT:
2875 for (i = 1; i <= min; i++)
2876 if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
2877 break;
2878
2879 case OP_NOT_WHITESPACE:
2880 for (i = 1; i <= min; i++)
2881 if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
2882 break;
2883
2884 case OP_WHITESPACE:
2885 for (i = 1; i <= min; i++)
2886 if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
2887 break;
2888
2889 case OP_NOT_WORDCHAR:
2890 for (i = 1; i <= min; i++)
2891 if ((md->ctypes[*eptr++] & ctype_word) != 0)
2892 RRETURN(MATCH_NOMATCH);
2893 break;
2894
2895 case OP_WORDCHAR:
2896 for (i = 1; i <= min; i++)
2897 if ((md->ctypes[*eptr++] & ctype_word) == 0)
2898 RRETURN(MATCH_NOMATCH);
2899 break;
2900
2901 default:
2902 RRETURN(PCRE_ERROR_INTERNAL);
2903 }
2904 }
2905
2906 /* If min = max, continue at the same level without recursing */
2907
2908 if (min == max) continue;
2909
2910 /* If minimizing, we have to test the rest of the pattern before each
2911 subsequent match. Again, separate the UTF-8 case for speed, and also
2912 separate the UCP cases. */
2913
2914 if (minimize)
2915 {
2916 #ifdef SUPPORT_UCP
2917 if (prop_type >= 0)
2918 {
2919 switch(prop_type)
2920 {
2921 case PT_ANY:
2922 for (fi = min;; fi++)
2923 {
2924 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2925 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2926 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2927 GETCHARINC(c, eptr);
2928 if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2929 }
2930 /* Control never gets here */
2931
2932 case PT_LAMP:
2933 for (fi = min;; fi++)
2934 {
2935 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2936 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2937 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2938 GETCHARINC(c, eptr);
2939 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2940 if ((prop_chartype == ucp_Lu ||
2941 prop_chartype == ucp_Ll ||
2942 prop_chartype == ucp_Lt) == prop_fail_result)
2943 RRETURN(MATCH_NOMATCH);
2944 }
2945 /* Control never gets here */
2946
2947 case PT_GC:
2948 for (fi = min;; fi++)
2949 {
2950 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2951 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2952 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2953 GETCHARINC(c, eptr);
2954 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2955 if ((prop_category == prop_value) == prop_fail_result)
2956 RRETURN(MATCH_NOMATCH);
2957 }
2958 /* Control never gets here */
2959
2960 case PT_PC:
2961 for (fi = min;; fi++)
2962 {
2963 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2964 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2965 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2966 GETCHARINC(c, eptr);
2967 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2968 if ((prop_chartype == prop_value) == prop_fail_result)
2969 RRETURN(MATCH_NOMATCH);
2970 }
2971 /* Control never gets here */
2972
2973 case PT_SC:
2974 for (fi = min;; fi++)
2975 {
2976 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2977 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2978 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2979 GETCHARINC(c, eptr);
2980 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2981 if ((prop_script == prop_value) == prop_fail_result)
2982 RRETURN(MATCH_NOMATCH);
2983 }
2984 /* Control never gets here */
2985
2986 default:
2987 RRETURN(PCRE_ERROR_INTERNAL);
2988 }
2989 }
2990
2991 /* Match extended Unicode sequences. We will get here only if the
2992 support is in the binary; otherwise a compile-time error occurs. */
2993
2994 else if (ctype == OP_EXTUNI)
2995 {
2996 for (fi = min;; fi++)
2997 {
2998 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2999 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3000 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3001 GETCHARINCTEST(c, eptr);
3002 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3003 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3004 while (eptr < md->end_subject)
3005 {
3006 int len = 1;
3007 if (!utf8) c = *eptr; else
3008 {
3009 GETCHARLEN(c, eptr, len);
3010 }
3011 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3012 if (prop_category != ucp_M) break;
3013 eptr += len;
3014 }
3015 }
3016 }
3017
3018 else
3019 #endif /* SUPPORT_UCP */
3020
3021 #ifdef SUPPORT_UTF8
3022 /* UTF-8 mode */
3023 if (utf8)
3024 {
3025 for (fi = min;; fi++)
3026 {
3027 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3028 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3029 if (fi >= max || eptr >= md->end_subject ||
3030 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3031 IS_NEWLINE(eptr)))
3032 RRETURN(MATCH_NOMATCH);
3033
3034 GETCHARINC(c, eptr);
3035 switch(ctype)
3036 {
3037 case OP_ANY: /* This is the DOTALL case */
3038 break;
3039
3040 case OP_ANYBYTE:
3041 break;
3042
3043 case OP_ANYNL:
3044 switch(c)
3045 {
3046 default: RRETURN(MATCH_NOMATCH);
3047 case 0x000d:
3048 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3049 break;
3050 case 0x000a:
3051 case 0x000b:
3052 case 0x000c:
3053 case 0x0085:
3054 case 0x2028:
3055 case 0x2029:
3056 break;
3057 }
3058 break;
3059
3060 case OP_NOT_DIGIT:
3061 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3062 RRETURN(MATCH_NOMATCH);
3063 break;
3064
3065 case OP_DIGIT:
3066 if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
3067 RRETURN(MATCH_NOMATCH);
3068 break;
3069
3070 case OP_NOT_WHITESPACE:
3071 if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
3072 RRETURN(MATCH_NOMATCH);
3073 break;
3074
3075 case OP_WHITESPACE:
3076 if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
3077 RRETURN(MATCH_NOMATCH);
3078 break;
3079
3080 case OP_NOT_WORDCHAR:
3081 if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
3082 RRETURN(MATCH_NOMATCH);
3083 break;
3084
3085 case OP_WORDCHAR:
3086 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
3087 RRETURN(MATCH_NOMATCH);
3088 break;
3089
3090 default:
3091 RRETURN(PCRE_ERROR_INTERNAL);
3092 }
3093 }
3094 }
3095 else
3096 #endif
3097 /* Not UTF-8 mode */
3098 {
3099 for (fi = min;; fi++)
3100 {
3101 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3102 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3103 if (fi >= max || eptr >= md->end_subject ||
3104 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3105 RRETURN(MATCH_NOMATCH);
3106
3107 c = *eptr++;
3108 switch(ctype)
3109 {
3110 case OP_ANY: /* This is the DOTALL case */
3111 break;
3112
3113 case OP_ANYBYTE:
3114 break;
3115
3116 case OP_ANYNL:
3117 switch(c)
3118 {
3119 default: RRETURN(MATCH_NOMATCH);
3120 case 0x000d:
3121 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3122 break;
3123 case 0x000a:
3124 case 0x000b:
3125 case 0x000c:
3126 case 0x0085:
3127 break;
3128 }
3129 break;
3130
3131 case OP_NOT_DIGIT:
3132 if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3133 break;
3134
3135 case OP_DIGIT:
3136 if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3137 break;
3138
3139 case OP_NOT_WHITESPACE:
3140 if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3141 break;
3142
3143 case OP_WHITESPACE:
3144 if ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3145 break;
3146
3147 case OP_NOT_WORDCHAR:
3148 if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
3149 break;
3150
3151 case OP_WORDCHAR:
3152 if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
3153 break;
3154
3155 default:
3156 RRETURN(PCRE_ERROR_INTERNAL);
3157 }
3158 }
3159 }
3160 /* Control never gets here */
3161 }
3162
3163 /* If maximizing, it is worth using inline code for speed, doing the type
3164 test once at the start (i.e. keep it out of the loop). Again, keep the
3165 UTF-8 and UCP stuff separate. */
3166
3167 else
3168 {
3169 pp = eptr; /* Remember where we started */
3170
3171 #ifdef SUPPORT_UCP
3172 if (prop_type >= 0)
3173 {
3174 switch(prop_type)
3175 {
3176 case PT_ANY:
3177 for (i = min; i < max; i++)
3178 {
3179 int len = 1;
3180 if (eptr >= md->end_subject) break;
3181 GETCHARLEN(c, eptr, len);
3182 if (prop_fail_result) break;
3183 eptr+= len;
3184 }
3185 break;
3186
3187 case PT_LAMP:
3188 for (i = min; i < max; i++)
3189 {
3190 int len = 1;
3191 if (eptr >= md->end_subject) break;
3192 GETCHARLEN(c, eptr, len);
3193 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3194 if ((prop_chartype == ucp_Lu ||
3195 prop_chartype == ucp_Ll ||
3196 prop_chartype == ucp_Lt) == prop_fail_result)
3197 break;
3198 eptr+= len;
3199 }
3200 break;
3201
3202 case PT_GC:
3203 for (i = min; i < max; i++)
3204 {
3205 int len = 1;
3206 if (eptr >= md->end_subject) break;
3207 GETCHARLEN(c, eptr, len);
3208 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3209 if ((prop_category == prop_value) == prop_fail_result)
3210 break;
3211 eptr+= len;
3212 }
3213 break;
3214
3215 case PT_PC:
3216 for (i = min; i < max; i++)
3217 {
3218 int len = 1;
3219 if (eptr >= md->end_subject) break;
3220 GETCHARLEN(c, eptr, len);
3221 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3222 if ((prop_chartype == prop_value) == prop_fail_result)
3223 break;
3224 eptr+= len;
3225 }
3226 break;
3227
3228 case PT_SC:
3229 for (i = min; i < max; i++)
3230 {
3231 int len = 1;
3232 if (eptr >= md->end_subject) break;
3233 GETCHARLEN(c, eptr, len);
3234 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3235 if ((prop_script == prop_value) == prop_fail_result)
3236 break;
3237 eptr+= len;
3238 }
3239 break;
3240 }
3241
3242 /* eptr is now past the end of the maximum run */
3243
3244 if (possessive) continue;
3245 for(;;)
3246 {
3247 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3248 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3249 if (eptr-- == pp) break; /* Stop if tried at original pos */
3250 BACKCHAR(eptr);
3251 }
3252 }
3253
3254 /* Match extended Unicode sequences. We will get here only if the
3255 support is in the binary; otherwise a compile-time error occurs. */
3256
3257 else if (ctype == OP_EXTUNI)
3258 {
3259 for (i = min; i < max; i++)
3260 {
3261 if (eptr >= md->end_subject) break;
3262 GETCHARINCTEST(c, eptr);
3263 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3264 if (prop_category == ucp_M) break;
3265 while (eptr < md->end_subject)
3266 {
3267 int len = 1;
3268 if (!utf8) c = *eptr; else
3269 {
3270 GETCHARLEN(c, eptr, len);
3271 }
3272 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3273 if (prop_category != ucp_M) break;
3274 eptr += len;
3275 }
3276 }
3277
3278 /* eptr is now past the end of the maximum run */
3279
3280 if (possessive) continue;
3281 for(;;)
3282 {
3283 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3284 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3285 if (eptr-- == pp) break; /* Stop if tried at original pos */
3286 for (;;) /* Move back over one extended */
3287 {
3288 int len = 1;
3289 BACKCHAR(eptr);
3290 if (!utf8) c = *eptr; else
3291 {
3292 GETCHARLEN(c, eptr, len);
3293 }
3294 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3295 if (prop_category != ucp_M) break;
3296 eptr--;
3297 }
3298 }
3299 }
3300
3301 else
3302 #endif /* SUPPORT_UCP */
3303
3304 #ifdef SUPPORT_UTF8
3305 /* UTF-8 mode */
3306
3307 if (utf8)
3308 {
3309 switch(ctype)
3310 {
3311 case OP_ANY:
3312
3313 /* Special code is required for UTF8, but when the maximum is
3314 unlimited we don't need it, so we repeat the non-UTF8 code. This is
3315 probably worth it, because .* is quite a common idiom. */
3316
3317 if (max < INT_MAX)
3318 {
3319 if ((ims & PCRE_DOTALL) == 0)
3320 {
3321 for (i = min; i < max; i++)
3322 {
3323 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3324 eptr++;
3325 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3326 }
3327 }
3328 else
3329 {
3330 for (i = min; i < max; i++)
3331 {
3332 if (eptr >= md->end_subject) break;
3333 eptr++;
3334 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3335 }
3336 }
3337 }
3338
3339 /* Handle unlimited UTF-8 repeat */
3340
3341 else
3342 {
3343 if ((ims & PCRE_DOTALL) == 0)
3344 {
3345 for (i = min; i < max; i++)
3346 {
3347 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3348 eptr++;
3349 }
3350 break;
3351 }
3352 else
3353 {
3354 c = max - min;
3355 if (c > (unsigned int)(md->end_subject - eptr))
3356 c = md->end_subject - eptr;
3357 eptr += c;
3358 }
3359 }
3360 break;
3361
3362 /* The byte case is the same as non-UTF8 */
3363
3364 case OP_ANYBYTE:
3365 c = max - min;
3366 if (c > (unsigned int)(md->end_subject - eptr))
3367 c = md->end_subject - eptr;
3368 eptr += c;
3369 break;
3370
3371 case OP_ANYNL:
3372 for (i = min; i < max; i++)
3373 {
3374 int len = 1;
3375 if (eptr >= md->end_subject) break;
3376 GETCHARLEN(c, eptr, len);
3377 if (c == 0x000d)
3378 {
3379 if (++eptr >= md->end_subject) break;
3380 if (*eptr == 0x000a) eptr++;
3381 }
3382 else
3383 {
3384 if (c != 0x000a && c != 0x000b && c != 0x000c &&
3385 c != 0x0085 && c != 0x2028 && c != 0x2029)
3386 break;
3387 eptr += len;
3388 }
3389 }
3390 break;
3391
3392 case OP_NOT_DIGIT:
3393 for (i = min; i < max; i++)
3394 {
3395 int len = 1;
3396 if (eptr >= md->end_subject) break;
3397 GETCHARLEN(c, eptr, len);
3398 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
3399 eptr+= len;
3400 }
3401 break;
3402
3403 case OP_DIGIT:
3404 for (i = min; i < max; i++)
3405 {
3406 int len = 1;
3407 if (eptr >= md->end_subject) break;
3408 GETCHARLEN(c, eptr, len);
3409 if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
3410 eptr+= len;
3411 }
3412 break;
3413
3414 case OP_NOT_WHITESPACE:
3415 for (i = min; i < max; i++)
3416 {
3417 int len = 1;
3418 if (eptr >= md->end_subject) break;
3419 GETCHARLEN(c, eptr, len);
3420 if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
3421 eptr+= len;
3422 }
3423 break;
3424
3425 case OP_WHITESPACE:
3426 for (i = min; i < max; i++)
3427 {
3428 int len = 1;
3429 if (eptr >= md->end_subject) break;
3430 GETCHARLEN(c, eptr, len);
3431 if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
3432 eptr+= len;
3433 }
3434 break;
3435
3436 case OP_NOT_WORDCHAR:
3437 for (i = min; i < max; i++)
3438 {
3439 int len = 1;
3440 if (eptr >= md->end_subject) break;
3441 GETCHARLEN(c, eptr, len);
3442 if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
3443 eptr+= len;
3444 }
3445 break;
3446
3447 case OP_WORDCHAR:
3448 for (i = min; i < max; i++)
3449 {
3450 int len = 1;
3451 if (eptr >= md->end_subject) break;
3452 GETCHARLEN(c, eptr, len);
3453 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
3454 eptr+= len;
3455 }
3456 break;
3457
3458 default:
3459 RRETURN(PCRE_ERROR_INTERNAL);
3460 }
3461
3462 /* eptr is now past the end of the maximum run */
3463
3464 if (possessive) continue;
3465 for(;;)
3466 {
3467 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3468 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3469 if (eptr-- == pp) break; /* Stop if tried at original pos */
3470 BACKCHAR(eptr);
3471 }
3472 }
3473 else
3474 #endif
3475
3476 /* Not UTF-8 mode */
3477 {
3478 switch(ctype)
3479 {
3480 case OP_ANY:
3481 if ((ims & PCRE_DOTALL) == 0)
3482 {
3483 for (i = min; i < max; i++)
3484 {
3485 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3486 eptr++;
3487 }
3488 break;
3489 }
3490 /* For DOTALL case, fall through and treat as \C */
3491
3492 case OP_ANYBYTE:
3493 c = max - min;
3494 if (c > (unsigned int)(md->end_subject - eptr))
3495 c = md->end_subject - eptr;
3496 eptr += c;
3497 break;
3498
3499 case OP_ANYNL:
3500 for (i = min; i < max; i++)
3501 {
3502 if (eptr >= md->end_subject) break;
3503 c = *eptr;
3504 if (c == 0x000d)
3505 {
3506 if (++eptr >= md->end_subject) break;
3507 if (*eptr == 0x000a) eptr++;
3508 }
3509 else
3510 {
3511 if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
3512 break;
3513 eptr++;
3514 }
3515 }
3516 break;
3517
3518 case OP_NOT_DIGIT:
3519 for (i = min; i < max; i++)
3520 {
3521 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
3522 break;
3523 eptr++;
3524 }
3525 break;
3526
3527 case OP_DIGIT:
3528 for (i = min; i < max; i++)
3529 {
3530 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
3531 break;
3532 eptr++;
3533 }
3534 break;
3535
3536 case OP_NOT_WHITESPACE:
3537 for (i = min; i < max; i++)
3538 {
3539 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
3540 break;
3541 eptr++;
3542 }
3543 break;
3544
3545 case OP_WHITESPACE:
3546 for (i = min; i < max; i++)
3547 {
3548 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
3549 break;
3550 eptr++;
3551 }
3552 break;
3553
3554 case OP_NOT_WORDCHAR:
3555 for (i = min; i < max; i++)
3556 {
3557 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
3558 break;
3559 eptr++;
3560 }
3561 break;
3562
3563 case OP_WORDCHAR:
3564 for (i = min; i < max; i++)
3565 {
3566 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
3567 break;
3568 eptr++;
3569 }
3570 break;
3571
3572 default:
3573 RRETURN(PCRE_ERROR_INTERNAL);
3574 }
3575
3576 /* eptr is now past the end of the maximum run */
3577
3578 if (possessive) continue;
3579 while (eptr >= pp)
3580 {
3581 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3582 eptr--;
3583 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3584 }
3585 }
3586
3587 /* Get here if we can't make it match with any permitted repetitions */
3588
3589 RRETURN(MATCH_NOMATCH);
3590 }
3591 /* Control never gets here */
3592
3593 /* There's been some horrible disaster. Arrival here can only mean there is
3594 something seriously wrong in the code above or the OP_xxx definitions. */
3595
3596 default:
3597 DPRINTF(("Unknown opcode %d\n", *ecode));
3598 RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
3599 }
3600
3601 /* Do not stick any code in here without much thought; it is assumed
3602 that "continue" in the code above comes out to here to repeat the main
3603 loop. */
3604
3605 } /* End of main loop */
3606 /* Control never reaches here */
3607 }
3608
3609
3610 /***************************************************************************
3611 ****************************************************************************
3612 RECURSION IN THE match() FUNCTION
3613
3614 Undefine all the macros that were defined above to handle this. */
3615
3616 #ifdef NO_RECURSE
3617 #undef eptr
3618 #undef ecode
3619 #undef offset_top
3620 #undef ims
3621 #undef eptrb
3622 #undef flags
3623
3624 #undef callpat
3625 #undef charptr
3626 #undef data
3627 #undef next
3628 #undef pp
3629 #undef prev
3630 #undef saved_eptr
3631
3632 #undef new_recursive
3633
3634 #undef cur_is_word
3635 #undef condition
3636 #undef prev_is_word
3637
3638 #undef original_ims
3639
3640 #undef ctype
3641 #undef length
3642 #undef max
3643 #undef min
3644 #undef number
3645 #undef offset
3646 #undef op
3647 #undef save_capture_last
3648 #undef save_offset1
3649 #undef save_offset2
3650 #undef save_offset3
3651 #undef stacksave
3652
3653 #undef newptrb
3654
3655 #endif
3656
3657 /* These two are defined as macros in both cases */
3658
3659 #undef fc
3660 #undef fi
3661
3662 /***************************************************************************
3663 ***************************************************************************/
3664
3665
3666
3667 /*************************************************
3668 * Execute a Regular Expression *
3669 *************************************************/
3670
3671 /* This function applies a compiled re to a subject string and picks out
3672 portions of the string if it matches. Two elements in the vector are set for
3673 each substring: the offsets to the start and end of the substring.
3674
3675 Arguments:
3676 argument_re points to the compiled expression
3677 extra_data points to extra data or is NULL
3678 subject points to the subject string
3679 length length of subject string (may contain binary zeros)
3680 start_offset where to start in the subject string
3681 options option bits
3682 offsets points to a vector of ints to be filled in with offsets
3683 offsetcount the number of elements in the vector
3684
3685 Returns: > 0 => success; value is the number of elements filled in
3686 = 0 => success, but offsets is not big enough
3687 -1 => failed to match
3688 < -1 => some kind of unexpected problem
3689 */
3690
3691 PCRE_DATA_SCOPE int
3692 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
3693 PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
3694 int offsetcount)
3695 {
3696 int rc, resetcount, ocount;
3697 int first_byte = -1;
3698 int req_byte = -1;
3699 int req_byte2 = -1;
3700 int newline;
3701 unsigned long int ims;
3702 BOOL using_temporary_offsets = FALSE;
3703 BOOL anchored;
3704 BOOL startline;
3705 BOOL firstline;
3706 BOOL first_byte_caseless = FALSE;
3707 BOOL req_byte_caseless = FALSE;
3708 BOOL utf8;
3709 match_data match_block;
3710 match_data *md = &match_block;
3711 const uschar *tables;
3712 const uschar *start_bits = NULL;
3713 USPTR start_match = (USPTR)subject + start_offset;
3714 USPTR end_subject;
3715 USPTR req_byte_ptr = start_match - 1;
3716 eptrblock eptrchain[EPTR_WORK_SIZE];
3717
3718 pcre_study_data internal_study;
3719 const pcre_study_data *study;
3720
3721 real_pcre internal_re;
3722 const real_pcre *external_re = (const real_pcre *)argument_re;
3723 const real_pcre *re = external_re;
3724
3725 /* Plausibility checks */
3726
3727 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
3728 if (re == NULL || subject == NULL ||
3729 (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3730 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
3731
3732 /* Fish out the optional data from the extra_data structure, first setting
3733 the default values. */
3734
3735 study = NULL;
3736 md->match_limit = MATCH_LIMIT;
3737 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
3738 md->callout_data = NULL;
3739
3740 /* The table pointer is always in native byte order. */
3741
3742 tables = external_re->tables;
3743
3744 if (extra_data != NULL)
3745 {
3746 register unsigned int flags = extra_data->flags;
3747 if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
3748 study = (const pcre_study_data *)extra_data->study_data;
3749 if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
3750 md->match_limit = extra_data->match_limit;
3751 if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
3752 md->match_limit_recursion = extra_data->match_limit_recursion;
3753 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
3754 md->callout_data = extra_data->callout_data;
3755 if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
3756 }
3757
3758 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
3759 is a feature that makes it possible to save compiled regex and re-use them
3760 in other programs later. */
3761
3762 if (tables == NULL) tables = _pcre_default_tables;
3763
3764 /* Check that the first field in the block is the magic number. If it is not,
3765 test for a regex that was compiled on a host of opposite endianness. If this is
3766 the case, flipped values are put in internal_re and internal_study if there was
3767 study data too. */
3768
3769 if (re->magic_number != MAGIC_NUMBER)
3770 {
3771 re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
3772 if (re == NULL) return PCRE_ERROR_BADMAGIC;
3773 if (study != NULL) study = &internal_study;
3774 }
3775
3776 /* Set up other data */
3777
3778 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
3779 startline = (re->options & PCRE_STARTLINE) != 0;
3780 firstline = (re->options & PCRE_FIRSTLINE) != 0;
3781
3782 /* The code starts after the real_pcre block and the capture name table. */
3783
3784 md->start_code = (const uschar *)external_re + re->name_table_offset +
3785 re->name_count * re->name_entry_size;
3786
3787 md->start_subject = (USPTR)subject;
3788 md->start_offset = start_offset;
3789 md->end_subject = md->start_subject + length;
3790 end_subject = md->end_subject;
3791
3792 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
3793 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
3794
3795 md->notbol = (options & PCRE_NOTBOL) != 0;
3796 md->noteol = (options & PCRE_NOTEOL) != 0;
3797 md->notempty = (options & PCRE_NOTEMPTY) != 0;
3798 md->partial = (options & PCRE_PARTIAL) != 0;
3799 md->hitend = FALSE;
3800
3801 md->recursive = NULL; /* No recursion at top level */
3802 md->eptrchain = eptrchain; /* Make workspace generally available */
3803
3804 md->lcc = tables + lcc_offset;
3805 md->ctypes = tables + ctypes_offset;
3806
3807 /* Handle different types of newline. The two bits give four cases. If nothing
3808 is set at run time, whatever was used at compile time applies. */
3809
3810 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &
3811 PCRE_NEWLINE_BITS)
3812 {
3813 case 0: newline = NEWLINE; break; /* Compile-time default */
3814 case PCRE_NEWLINE_CR: newline = '\r'; break;
3815 case PCRE_NEWLINE_LF: newline = '\n'; break;
3816 case PCRE_NEWLINE_CR+
3817 PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
3818 case PCRE_NEWLINE_ANY: newline = -1; break;
3819 default: return PCRE_ERROR_BADNEWLINE;
3820 }
3821
3822 if (newline < 0)
3823 {
3824 md->nltype = NLTYPE_ANY;
3825 }
3826 else
3827 {
3828 md->nltype = NLTYPE_FIXED;
3829 if (newline > 255)
3830 {
3831 md->nllen = 2;
3832 md->nl[0] = (newline >> 8) & 255;
3833 md->nl[1] = newline & 255;
3834 }
3835 else
3836 {
3837 md->nllen = 1;
3838 md->nl[0] = newline;
3839 }
3840 }
3841
3842 /* Partial matching is supported only for a restricted set of regexes at the
3843 moment. */
3844
3845 if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
3846 return PCRE_ERROR_BADPARTIAL;
3847
3848 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3849 back the character offset. */
3850
3851 #ifdef SUPPORT_UTF8
3852 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
3853 {
3854 if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
3855 return PCRE_ERROR_BADUTF8;
3856 if (start_offset > 0 && start_offset < length)
3857 {
3858 int tb = ((uschar *)subject)[start_offset];
3859 if (tb > 127)
3860 {
3861 tb &= 0xc0;
3862 if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
3863 }
3864 }
3865 }
3866 #endif
3867
3868 /* The ims options can vary during the matching as a result of the presence
3869 of (?ims) items in the pattern. They are kept in a local variable so that
3870 restoring at the exit of a group is easy. */
3871
3872 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
3873
3874 /* If the expression has got more back references than the offsets supplied can
3875 hold, we get a temporary chunk of working store to use during the matching.
3876 Otherwise, we can use the vector supplied, rounding down its size to a multiple
3877 of 3. */
3878
3879 ocount = offsetcount - (offsetcount % 3);
3880
3881 if (re->top_backref > 0 && re->top_backref >= ocount/3)
3882 {
3883 ocount = re->top_backref * 3 + 3;
3884 md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3885 if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3886 using_temporary_offsets = TRUE;
3887 DPRINTF(("Got memory to hold back references\n"));
3888 }
3889 else md->offset_vector = offsets;
3890
3891 md->offset_end = ocount;
3892 md->offset_max = (2*ocount)/3;
3893 md->offset_overflow = FALSE;
3894 md->capture_last = -1;
3895
3896 /* Compute the minimum number of offsets that we need to reset each time. Doing
3897 this makes a huge difference to execution time when there aren't many brackets
3898 in the pattern. */
3899
3900 resetcount = 2 + re->top_bracket * 2;
3901 if (resetcount > offsetcount) resetcount = ocount;
3902
3903 /* Reset the working variable associated with each extraction. These should
3904 never be used unless previously set, but they get saved and restored, and so we
3905 initialize them to avoid reading uninitialized locations. */
3906
3907 if (md->offset_vector != NULL)
3908 {
3909 register int *iptr = md->offset_vector + ocount;
3910 register int *iend = iptr - resetcount/2 + 1;
3911 while (--iptr >= iend) *iptr = -1;
3912 }
3913
3914 /* Set up the first character to match, if available. The first_byte value is
3915 never set for an anchored regular expression, but the anchoring may be forced
3916 at run time, so we have to test for anchoring. The first char may be unset for
3917 an unanchored pattern, of course. If there's no first char and the pattern was
3918 studied, there may be a bitmap of possible first characters. */
3919
3920 if (!anchored)
3921 {
3922 if ((re->options & PCRE_FIRSTSET) != 0)
3923 {
3924 first_byte = re->first_byte & 255;
3925 if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
3926 first_byte = md->lcc[first_byte];
3927 }
3928 else
3929 if (!startline && study != NULL &&
3930 (study->options & PCRE_STUDY_MAPPED) != 0)
3931 start_bits = study->start_bits;
3932 }
3933
3934 /* For anchored or unanchored matches, there may be a "last known required
3935 character" set. */
3936
3937 if ((re->options & PCRE_REQCHSET) != 0)
3938 {
3939 req_byte = re->req_byte & 255;
3940 req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
3941 req_byte2 = (tables + fcc_offset)[req_byte]; /* case flipped */
3942 }
3943
3944
3945 /* ==========================================================================*/
3946
3947 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
3948 the loop runs just once. */
3949
3950 for(;;)
3951 {
3952 USPTR save_end_subject = end_subject;
3953
3954 /* Reset the maximum number of extractions we might see. */
3955
3956 if (md->offset_vector != NULL)
3957 {
3958 register int *iptr = md->offset_vector;
3959 register int *iend = iptr + resetcount;
3960 while (iptr < iend) *iptr++ = -1;
3961 }
3962
3963 /* Advance to a unique first char if possible. If firstline is TRUE, the
3964 start of the match is constrained to the first line of a multiline string.
3965 That is, the match must be before or at the first newline. Implement this by
3966 temporarily adjusting end_subject so that we stop scanning at a newline. If
3967 the match fails at the newline, later code breaks this loop. */
3968
3969 if (firstline)
3970 {
3971 USPTR t = start_match;
3972 while (t < md->end_subject && !IS_NEWLINE(t)) t++;
3973 end_subject = t;
3974 }
3975
3976 /* Now test for a unique first byte */
3977
3978 if (first_byte >= 0)
3979 {
3980 if (first_byte_caseless)
3981 while (start_match < end_subject &&
3982 md->lcc[*start_match] != first_byte)
3983 start_match++;
3984 else
3985 while (start_match < end_subject && *start_match != first_byte)
3986 start_match++;
3987 }
3988
3989 /* Or to just after a linebreak for a multiline match if possible */
3990
3991 else if (startline)
3992 {
3993 if (start_match > md->start_subject + start_offset)
3994 {
3995 while (start_match <= end_subject && !WAS_NEWLINE(start_match))
3996 start_match++;
3997 }
3998 }
3999
4000 /* Or to a non-unique first char after study */
4001
4002 else if (start_bits != NULL)
4003 {
4004 while (start_match < end_subject)
4005 {
4006 register unsigned int c = *start_match;
4007 if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
4008 }
4009 }
4010
4011 /* Restore fudged end_subject */
4012
4013 end_subject = save_end_subject;
4014
4015 #ifdef DEBUG /* Sigh. Some compilers never learn. */
4016 printf(">>>> Match against: ");
4017 pchars(start_match, end_subject - start_match, TRUE, md);
4018 printf("\n");
4019 #endif
4020
4021 /* If req_byte is set, we know that that character must appear in the subject
4022 for the match to succeed. If the first character is set, req_byte must be
4023 later in the subject; otherwise the test starts at the match point. This
4024 optimization can save a huge amount of backtracking in patterns with nested
4025 unlimited repeats that aren't going to match. Writing separate code for
4026 cased/caseless versions makes it go faster, as does using an autoincrement
4027 and backing off on a match.
4028
4029 HOWEVER: when the subject string is very, very long, searching to its end can
4030 take a long time, and give bad performance on quite ordinary patterns. This
4031 showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4032 string... so we don't do this when the string is sufficiently long.
4033
4034 ALSO: this processing is disabled when partial matching is requested.
4035 */
4036
4037 if (req_byte >= 0 &&
4038 end_subject - start_match < REQ_BYTE_MAX &&
4039 !md->partial)
4040 {
4041 register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4042
4043 /* We don't need to repeat the search if we haven't yet reached the
4044 place we found it at last time. */
4045
4046 if (p > req_byte_ptr)
4047 {
4048 if (req_byte_caseless)
4049 {
4050 while (p < end_subject)
4051 {
4052 register int pp = *p++;
4053 if (pp == req_byte || pp == req_byte2) { p--; break; }
4054 }
4055 }
4056 else
4057 {
4058 while (p < end_subject)
4059 {
4060 if (*p++ == req_byte) { p--; break; }
4061 }
4062 }
4063
4064 /* If we can't find the required character, break the matching loop,
4065 forcing a match failure. */
4066
4067 if (p >= end_subject)
4068 {
4069 rc = MATCH_NOMATCH;
4070 break;
4071 }
4072
4073 /* If we have found the required character, save the point where we
4074 found it, so that we don't search again next time round the loop if
4075 the start hasn't passed this character yet. */
4076
4077 req_byte_ptr = p;
4078 }
4079 }
4080
4081 /* OK, we can now run the match. */
4082
4083 md->start_match = start_match;
4084 md->match_call_count = 0;
4085 md->eptrn = 0; /* Next free eptrchain slot */
4086 rc = match(start_match, md->start_code, 2, md, ims, NULL, 0, 0);
4087
4088 /* Any return other than MATCH_NOMATCH breaks the loop. */
4089
4090 if (rc != MATCH_NOMATCH) break;
4091
4092 /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4093 newline in the subject (though it may continue over the newline). Therefore,
4094 if we have just failed to match, starting at a newline, do not continue. */
4095
4096 if (firstline && IS_NEWLINE(start_match)) break;
4097
4098 /* Advance the match position by one character. */
4099
4100 start_match++;
4101 #ifdef SUPPORT_UTF8
4102 if (utf8)
4103 while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4104 start_match++;
4105 #endif
4106
4107 /* Break the loop if the pattern is anchored or if we have passed the end of
4108 the subject. */
4109
4110 if (anchored || start_match > end_subject) break;
4111
4112 /* If we have just passed a CR and the newline option is CRLF or ANY, and we
4113 are now at a LF, advance the match position by one more character. */
4114
4115 if (start_match[-1] == '\r' &&
4116 (md->nltype == NLTYPE_ANY || md->nllen == 2) &&
4117 start_match < end_subject &&
4118 *start_match == '\n')
4119 start_match++;
4120
4121 } /* End of for(;;) "bumpalong" loop */
4122
4123 /* ==========================================================================*/
4124
4125 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4126 conditions is true:
4127
4128 (1) The pattern is anchored;
4129
4130 (2) We are past the end of the subject;
4131
4132 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4133 this option requests that a match occur at or before the first newline in
4134 the subject.
4135
4136 When we have a match and the offset vector is big enough to deal with any
4137 backreferences, captured substring offsets will already be set up. In the case
4138 where we had to get some local store to hold offsets for backreference
4139 processing, copy those that we can. In this case there need not be overflow if
4140 certain parts of the pattern were not used, even though there are more
4141 capturing parentheses than vector slots. */
4142
4143 if (rc == MATCH_MATCH)
4144 {
4145 if (using_temporary_offsets)
4146 {
4147 if (offsetcount >= 4)
4148 {
4149 memcpy(offsets + 2, md->offset_vector + 2,
4150 (offsetcount - 2) * sizeof(int));
4151 DPRINTF(("Copied offsets from temporary memory\n"));
4152 }
4153 if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
4154 DPRINTF(("Freeing temporary memory\n"));
4155 (pcre_free)(md->offset_vector);
4156 }
4157
4158 /* Set the return code to the number of captured strings, or 0 if there are
4159 too many to fit into the vector. */
4160
4161 rc = md->offset_overflow? 0 : md->end_offset_top/2;
4162
4163 /* If there is space, set up the whole thing as substring 0. */
4164
4165 if (offsetcount < 2) rc = 0; else
4166 {
4167 offsets[0] = start_match - md->start_subject;
4168 offsets[1] = md->end_match_ptr - md->start_subject;
4169 }
4170
4171 DPRINTF((">>>> returning %d\n", rc));
4172 return rc;
4173 }
4174
4175 /* Control gets here if there has been an error, or if the overall match
4176 attempt has failed at all permitted starting positions. */
4177
4178 if (using_temporary_offsets)
4179 {
4180 DPRINTF(("Freeing temporary memory\n"));
4181 (pcre_free)(md->offset_vector);
4182 }
4183
4184 if (rc != MATCH_NOMATCH)
4185 {
4186 DPRINTF((">>>> error: returning %d\n", rc));
4187 return rc;
4188 }
4189 else if (md->partial && md->hitend)
4190 {
4191 DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4192 return PCRE_ERROR_PARTIAL;
4193 }
4194 else
4195 {
4196 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
4197 return PCRE_ERROR_NOMATCH;
4198 }
4199 }
4200
4201 /* End of pcre_exec.c */