Updated embedded PCRE to version 7.4 to avoid 2 CVE issues:-
[exim.git] / src / src / pcre / pcretest.c
1 /* $Cambridge: exim/src/src/pcre/pcretest.c,v 1.8 2007/11/12 13:02:20 nm4 Exp $ */
2
3 /*************************************************
4 * PCRE testing program *
5 *************************************************/
6
7 /* This program was hacked up as a tester for PCRE. I really should have
8 written it more tidily in the first place. Will I ever learn? It has grown and
9 been extended and consequently is now rather, er, *very* untidy in places.
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 #ifdef HAVE_CONFIG_H
42 #include "config.h"
43 #endif
44
45 #include <ctype.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <time.h>
50 #include <locale.h>
51 #include <errno.h>
52
53
54 /* A number of things vary for Windows builds. Originally, pcretest opened its
55 input and output without "b"; then I was told that "b" was needed in some
56 environments, so it was added for release 5.0 to both the input and output. (It
57 makes no difference on Unix-like systems.) Later I was told that it is wrong
58 for the input on Windows. I've now abstracted the modes into two macros that
59 are set here, to make it easier to fiddle with them, and removed "b" from the
60 input mode under Windows. */
61
62 #if defined(_WIN32) || defined(WIN32)
63 #include <io.h> /* For _setmode() */
64 #include <fcntl.h> /* For _O_BINARY */
65 #define INPUT_MODE "r"
66 #define OUTPUT_MODE "wb"
67
68 #else
69 #include <sys/time.h> /* These two includes are needed */
70 #include <sys/resource.h> /* for setrlimit(). */
71 #define INPUT_MODE "rb"
72 #define OUTPUT_MODE "wb"
73 #endif
74
75
76 /* We have to include pcre_internal.h because we need the internal info for
77 displaying the results of pcre_study() and we also need to know about the
78 internal macros, structures, and other internal data values; pcretest has
79 "inside information" compared to a program that strictly follows the PCRE API.
80
81 Although pcre_internal.h does itself include pcre.h, we explicitly include it
82 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
83 appropriately for an application, not for building PCRE. */
84
85 #include "pcre.h"
86 #include "pcre_internal.h"
87
88 /* We need access to the data tables that PCRE uses. So as not to have to keep
89 two copies, we include the source file here, changing the names of the external
90 symbols to prevent clashes. */
91
92 #define _pcre_utf8_table1 utf8_table1
93 #define _pcre_utf8_table1_size utf8_table1_size
94 #define _pcre_utf8_table2 utf8_table2
95 #define _pcre_utf8_table3 utf8_table3
96 #define _pcre_utf8_table4 utf8_table4
97 #define _pcre_utt utt
98 #define _pcre_utt_size utt_size
99 #define _pcre_utt_names utt_names
100 #define _pcre_OP_lengths OP_lengths
101
102 #include "pcre_tables.c"
103
104 /* We also need the pcre_printint() function for printing out compiled
105 patterns. This function is in a separate file so that it can be included in
106 pcre_compile.c when that module is compiled with debugging enabled.
107
108 The definition of the macro PRINTABLE, which determines whether to print an
109 output character as-is or as a hex value when showing compiled patterns, is
110 contained in this file. We uses it here also, in cases when the locale has not
111 been explicitly changed, so as to get consistent output from systems that
112 differ in their output from isprint() even in the "C" locale. */
113
114 #include "pcre_printint.src"
115
116 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
117
118
119 /* It is possible to compile this test program without including support for
120 testing the POSIX interface, though this is not available via the standard
121 Makefile. */
122
123 #if !defined NOPOSIX
124 #include "pcreposix.h"
125 #endif
126
127 /* It is also possible, for the benefit of the version currently imported into
128 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
129 interface to the DFA matcher (NODFA), and without the doublecheck of the old
130 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
131 UTF8 support if PCRE is built without it. */
132
133 #ifndef SUPPORT_UTF8
134 #ifndef NOUTF8
135 #define NOUTF8
136 #endif
137 #endif
138
139
140 /* Other parameters */
141
142 #ifndef CLOCKS_PER_SEC
143 #ifdef CLK_TCK
144 #define CLOCKS_PER_SEC CLK_TCK
145 #else
146 #define CLOCKS_PER_SEC 100
147 #endif
148 #endif
149
150 /* This is the default loop count for timing. */
151
152 #define LOOPREPEAT 500000
153
154 /* Static variables */
155
156 static FILE *outfile;
157 static int log_store = 0;
158 static int callout_count;
159 static int callout_extra;
160 static int callout_fail_count;
161 static int callout_fail_id;
162 static int debug_lengths;
163 static int first_callout;
164 static int locale_set = 0;
165 static int show_malloc;
166 static int use_utf8;
167 static size_t gotten_store;
168
169 /* The buffers grow automatically if very long input lines are encountered. */
170
171 static int buffer_size = 50000;
172 static uschar *buffer = NULL;
173 static uschar *dbuffer = NULL;
174 static uschar *pbuffer = NULL;
175
176
177
178 /*************************************************
179 * Read or extend an input line *
180 *************************************************/
181
182 /* Input lines are read into buffer, but both patterns and data lines can be
183 continued over multiple input lines. In addition, if the buffer fills up, we
184 want to automatically expand it so as to be able to handle extremely large
185 lines that are needed for certain stress tests. When the input buffer is
186 expanded, the other two buffers must also be expanded likewise, and the
187 contents of pbuffer, which are a copy of the input for callouts, must be
188 preserved (for when expansion happens for a data line). This is not the most
189 optimal way of handling this, but hey, this is just a test program!
190
191 Arguments:
192 f the file to read
193 start where in buffer to start (this *must* be within buffer)
194
195 Returns: pointer to the start of new data
196 could be a copy of start, or could be moved
197 NULL if no data read and EOF reached
198 */
199
200 static uschar *
201 extend_inputline(FILE *f, uschar *start)
202 {
203 uschar *here = start;
204
205 for (;;)
206 {
207 int rlen = buffer_size - (here - buffer);
208
209 if (rlen > 1000)
210 {
211 int dlen;
212 if (fgets((char *)here, rlen, f) == NULL)
213 return (here == start)? NULL : start;
214 dlen = (int)strlen((char *)here);
215 if (dlen > 0 && here[dlen - 1] == '\n') return start;
216 here += dlen;
217 }
218
219 else
220 {
221 int new_buffer_size = 2*buffer_size;
222 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
223 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
224 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
225
226 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
227 {
228 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
229 exit(1);
230 }
231
232 memcpy(new_buffer, buffer, buffer_size);
233 memcpy(new_pbuffer, pbuffer, buffer_size);
234
235 buffer_size = new_buffer_size;
236
237 start = new_buffer + (start - buffer);
238 here = new_buffer + (here - buffer);
239
240 free(buffer);
241 free(dbuffer);
242 free(pbuffer);
243
244 buffer = new_buffer;
245 dbuffer = new_dbuffer;
246 pbuffer = new_pbuffer;
247 }
248 }
249
250 return NULL; /* Control never gets here */
251 }
252
253
254
255
256
257
258
259 /*************************************************
260 * Read number from string *
261 *************************************************/
262
263 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
264 around with conditional compilation, just do the job by hand. It is only used
265 for unpicking arguments, so just keep it simple.
266
267 Arguments:
268 str string to be converted
269 endptr where to put the end pointer
270
271 Returns: the unsigned long
272 */
273
274 static int
275 get_value(unsigned char *str, unsigned char **endptr)
276 {
277 int result = 0;
278 while(*str != 0 && isspace(*str)) str++;
279 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
280 *endptr = str;
281 return(result);
282 }
283
284
285
286
287 /*************************************************
288 * Convert UTF-8 string to value *
289 *************************************************/
290
291 /* This function takes one or more bytes that represents a UTF-8 character,
292 and returns the value of the character.
293
294 Argument:
295 utf8bytes a pointer to the byte vector
296 vptr a pointer to an int to receive the value
297
298 Returns: > 0 => the number of bytes consumed
299 -6 to 0 => malformed UTF-8 character at offset = (-return)
300 */
301
302 #if !defined NOUTF8
303
304 static int
305 utf82ord(unsigned char *utf8bytes, int *vptr)
306 {
307 int c = *utf8bytes++;
308 int d = c;
309 int i, j, s;
310
311 for (i = -1; i < 6; i++) /* i is number of additional bytes */
312 {
313 if ((d & 0x80) == 0) break;
314 d <<= 1;
315 }
316
317 if (i == -1) { *vptr = c; return 1; } /* ascii character */
318 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
319
320 /* i now has a value in the range 1-5 */
321
322 s = 6*i;
323 d = (c & utf8_table3[i]) << s;
324
325 for (j = 0; j < i; j++)
326 {
327 c = *utf8bytes++;
328 if ((c & 0xc0) != 0x80) return -(j+1);
329 s -= 6;
330 d |= (c & 0x3f) << s;
331 }
332
333 /* Check that encoding was the correct unique one */
334
335 for (j = 0; j < utf8_table1_size; j++)
336 if (d <= utf8_table1[j]) break;
337 if (j != i) return -(i+1);
338
339 /* Valid value */
340
341 *vptr = d;
342 return i+1;
343 }
344
345 #endif
346
347
348
349 /*************************************************
350 * Convert character value to UTF-8 *
351 *************************************************/
352
353 /* This function takes an integer value in the range 0 - 0x7fffffff
354 and encodes it as a UTF-8 character in 0 to 6 bytes.
355
356 Arguments:
357 cvalue the character value
358 utf8bytes pointer to buffer for result - at least 6 bytes long
359
360 Returns: number of characters placed in the buffer
361 */
362
363 #if !defined NOUTF8
364
365 static int
366 ord2utf8(int cvalue, uschar *utf8bytes)
367 {
368 register int i, j;
369 for (i = 0; i < utf8_table1_size; i++)
370 if (cvalue <= utf8_table1[i]) break;
371 utf8bytes += i;
372 for (j = i; j > 0; j--)
373 {
374 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
375 cvalue >>= 6;
376 }
377 *utf8bytes = utf8_table2[i] | cvalue;
378 return i + 1;
379 }
380
381 #endif
382
383
384
385 /*************************************************
386 * Print character string *
387 *************************************************/
388
389 /* Character string printing function. Must handle UTF-8 strings in utf8
390 mode. Yields number of characters printed. If handed a NULL file, just counts
391 chars without printing. */
392
393 static int pchars(unsigned char *p, int length, FILE *f)
394 {
395 int c = 0;
396 int yield = 0;
397
398 while (length-- > 0)
399 {
400 #if !defined NOUTF8
401 if (use_utf8)
402 {
403 int rc = utf82ord(p, &c);
404
405 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
406 {
407 length -= rc - 1;
408 p += rc;
409 if (PRINTHEX(c))
410 {
411 if (f != NULL) fprintf(f, "%c", c);
412 yield++;
413 }
414 else
415 {
416 int n = 4;
417 if (f != NULL) fprintf(f, "\\x{%02x}", c);
418 yield += (n <= 0x000000ff)? 2 :
419 (n <= 0x00000fff)? 3 :
420 (n <= 0x0000ffff)? 4 :
421 (n <= 0x000fffff)? 5 : 6;
422 }
423 continue;
424 }
425 }
426 #endif
427
428 /* Not UTF-8, or malformed UTF-8 */
429
430 c = *p++;
431 if (PRINTHEX(c))
432 {
433 if (f != NULL) fprintf(f, "%c", c);
434 yield++;
435 }
436 else
437 {
438 if (f != NULL) fprintf(f, "\\x%02x", c);
439 yield += 4;
440 }
441 }
442
443 return yield;
444 }
445
446
447
448 /*************************************************
449 * Callout function *
450 *************************************************/
451
452 /* Called from PCRE as a result of the (?C) item. We print out where we are in
453 the match. Yield zero unless more callouts than the fail count, or the callout
454 data is not zero. */
455
456 static int callout(pcre_callout_block *cb)
457 {
458 FILE *f = (first_callout | callout_extra)? outfile : NULL;
459 int i, pre_start, post_start, subject_length;
460
461 if (callout_extra)
462 {
463 fprintf(f, "Callout %d: last capture = %d\n",
464 cb->callout_number, cb->capture_last);
465
466 for (i = 0; i < cb->capture_top * 2; i += 2)
467 {
468 if (cb->offset_vector[i] < 0)
469 fprintf(f, "%2d: <unset>\n", i/2);
470 else
471 {
472 fprintf(f, "%2d: ", i/2);
473 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
474 cb->offset_vector[i+1] - cb->offset_vector[i], f);
475 fprintf(f, "\n");
476 }
477 }
478 }
479
480 /* Re-print the subject in canonical form, the first time or if giving full
481 datails. On subsequent calls in the same match, we use pchars just to find the
482 printed lengths of the substrings. */
483
484 if (f != NULL) fprintf(f, "--->");
485
486 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
487 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
488 cb->current_position - cb->start_match, f);
489
490 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
491
492 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
493 cb->subject_length - cb->current_position, f);
494
495 if (f != NULL) fprintf(f, "\n");
496
497 /* Always print appropriate indicators, with callout number if not already
498 shown. For automatic callouts, show the pattern offset. */
499
500 if (cb->callout_number == 255)
501 {
502 fprintf(outfile, "%+3d ", cb->pattern_position);
503 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
504 }
505 else
506 {
507 if (callout_extra) fprintf(outfile, " ");
508 else fprintf(outfile, "%3d ", cb->callout_number);
509 }
510
511 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
512 fprintf(outfile, "^");
513
514 if (post_start > 0)
515 {
516 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
517 fprintf(outfile, "^");
518 }
519
520 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
521 fprintf(outfile, " ");
522
523 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
524 pbuffer + cb->pattern_position);
525
526 fprintf(outfile, "\n");
527 first_callout = 0;
528
529 if (cb->callout_data != NULL)
530 {
531 int callout_data = *((int *)(cb->callout_data));
532 if (callout_data != 0)
533 {
534 fprintf(outfile, "Callout data = %d\n", callout_data);
535 return callout_data;
536 }
537 }
538
539 return (cb->callout_number != callout_fail_id)? 0 :
540 (++callout_count >= callout_fail_count)? 1 : 0;
541 }
542
543
544 /*************************************************
545 * Local malloc functions *
546 *************************************************/
547
548 /* Alternative malloc function, to test functionality and show the size of the
549 compiled re. */
550
551 static void *new_malloc(size_t size)
552 {
553 void *block = malloc(size);
554 gotten_store = size;
555 if (show_malloc)
556 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
557 return block;
558 }
559
560 static void new_free(void *block)
561 {
562 if (show_malloc)
563 fprintf(outfile, "free %p\n", block);
564 free(block);
565 }
566
567
568 /* For recursion malloc/free, to test stacking calls */
569
570 static void *stack_malloc(size_t size)
571 {
572 void *block = malloc(size);
573 if (show_malloc)
574 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
575 return block;
576 }
577
578 static void stack_free(void *block)
579 {
580 if (show_malloc)
581 fprintf(outfile, "stack_free %p\n", block);
582 free(block);
583 }
584
585
586 /*************************************************
587 * Call pcre_fullinfo() *
588 *************************************************/
589
590 /* Get one piece of information from the pcre_fullinfo() function */
591
592 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
593 {
594 int rc;
595 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
596 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
597 }
598
599
600
601 /*************************************************
602 * Byte flipping function *
603 *************************************************/
604
605 static unsigned long int
606 byteflip(unsigned long int value, int n)
607 {
608 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
609 return ((value & 0x000000ff) << 24) |
610 ((value & 0x0000ff00) << 8) |
611 ((value & 0x00ff0000) >> 8) |
612 ((value & 0xff000000) >> 24);
613 }
614
615
616
617
618 /*************************************************
619 * Check match or recursion limit *
620 *************************************************/
621
622 static int
623 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
624 int start_offset, int options, int *use_offsets, int use_size_offsets,
625 int flag, unsigned long int *limit, int errnumber, const char *msg)
626 {
627 int count;
628 int min = 0;
629 int mid = 64;
630 int max = -1;
631
632 extra->flags |= flag;
633
634 for (;;)
635 {
636 *limit = mid;
637
638 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
639 use_offsets, use_size_offsets);
640
641 if (count == errnumber)
642 {
643 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
644 min = mid;
645 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
646 }
647
648 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
649 count == PCRE_ERROR_PARTIAL)
650 {
651 if (mid == min + 1)
652 {
653 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
654 break;
655 }
656 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
657 max = mid;
658 mid = (min + mid)/2;
659 }
660 else break; /* Some other error */
661 }
662
663 extra->flags &= ~flag;
664 return count;
665 }
666
667
668
669 /*************************************************
670 * Case-independent strncmp() function *
671 *************************************************/
672
673 /*
674 Arguments:
675 s first string
676 t second string
677 n number of characters to compare
678
679 Returns: < 0, = 0, or > 0, according to the comparison
680 */
681
682 static int
683 strncmpic(uschar *s, uschar *t, int n)
684 {
685 while (n--)
686 {
687 int c = tolower(*s++) - tolower(*t++);
688 if (c) return c;
689 }
690 return 0;
691 }
692
693
694
695 /*************************************************
696 * Check newline indicator *
697 *************************************************/
698
699 /* This is used both at compile and run-time to check for <xxx> escapes, where
700 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
701 no match.
702
703 Arguments:
704 p points after the leading '<'
705 f file for error message
706
707 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
708 */
709
710 static int
711 check_newline(uschar *p, FILE *f)
712 {
713 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
714 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
715 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
716 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
717 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
718 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
719 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
720 fprintf(f, "Unknown newline type at: <%s\n", p);
721 return 0;
722 }
723
724
725
726 /*************************************************
727 * Usage function *
728 *************************************************/
729
730 static void
731 usage(void)
732 {
733 printf("Usage: pcretest [options] [<input> [<output>]]\n");
734 printf(" -b show compiled code (bytecode)\n");
735 printf(" -C show PCRE compile-time options and exit\n");
736 printf(" -d debug: show compiled code and information (-b and -i)\n");
737 #if !defined NODFA
738 printf(" -dfa force DFA matching for all subjects\n");
739 #endif
740 printf(" -help show usage information\n");
741 printf(" -i show information about compiled patterns\n"
742 " -m output memory used information\n"
743 " -o <n> set size of offsets vector to <n>\n");
744 #if !defined NOPOSIX
745 printf(" -p use POSIX interface\n");
746 #endif
747 printf(" -q quiet: do not output PCRE version number at start\n");
748 printf(" -S <n> set stack size to <n> megabytes\n");
749 printf(" -s output store (memory) used information\n"
750 " -t time compilation and execution\n");
751 printf(" -t <n> time compilation and execution, repeating <n> times\n");
752 printf(" -tm time execution (matching) only\n");
753 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
754 }
755
756
757
758 /*************************************************
759 * Main Program *
760 *************************************************/
761
762 /* Read lines from named file or stdin and write to named file or stdout; lines
763 consist of a regular expression, in delimiters and optionally followed by
764 options, followed by a set of test data, terminated by an empty line. */
765
766 int main(int argc, char **argv)
767 {
768 FILE *infile = stdin;
769 int options = 0;
770 int study_options = 0;
771 int op = 1;
772 int timeit = 0;
773 int timeitm = 0;
774 int showinfo = 0;
775 int showstore = 0;
776 int quiet = 0;
777 int size_offsets = 45;
778 int size_offsets_max;
779 int *offsets = NULL;
780 #if !defined NOPOSIX
781 int posix = 0;
782 #endif
783 int debug = 0;
784 int done = 0;
785 int all_use_dfa = 0;
786 int yield = 0;
787 int stack_size;
788
789 /* These vectors store, end-to-end, a list of captured substring names. Assume
790 that 1024 is plenty long enough for the few names we'll be testing. */
791
792 uschar copynames[1024];
793 uschar getnames[1024];
794
795 uschar *copynamesptr;
796 uschar *getnamesptr;
797
798 /* Get buffers from malloc() so that Electric Fence will check their misuse
799 when I am debugging. They grow automatically when very long lines are read. */
800
801 buffer = (unsigned char *)malloc(buffer_size);
802 dbuffer = (unsigned char *)malloc(buffer_size);
803 pbuffer = (unsigned char *)malloc(buffer_size);
804
805 /* The outfile variable is static so that new_malloc can use it. */
806
807 outfile = stdout;
808
809 /* The following _setmode() stuff is some Windows magic that tells its runtime
810 library to translate CRLF into a single LF character. At least, that's what
811 I've been told: never having used Windows I take this all on trust. Originally
812 it set 0x8000, but then I was advised that _O_BINARY was better. */
813
814 #if defined(_WIN32) || defined(WIN32)
815 _setmode( _fileno( stdout ), _O_BINARY );
816 #endif
817
818 /* Scan options */
819
820 while (argc > 1 && argv[op][0] == '-')
821 {
822 unsigned char *endptr;
823
824 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
825 showstore = 1;
826 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
827 else if (strcmp(argv[op], "-b") == 0) debug = 1;
828 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
829 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
830 #if !defined NODFA
831 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
832 #endif
833 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
834 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
835 *endptr == 0))
836 {
837 op++;
838 argc--;
839 }
840 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
841 {
842 int both = argv[op][2] == 0;
843 int temp;
844 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
845 *endptr == 0))
846 {
847 timeitm = temp;
848 op++;
849 argc--;
850 }
851 else timeitm = LOOPREPEAT;
852 if (both) timeit = timeitm;
853 }
854 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
855 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
856 *endptr == 0))
857 {
858 #if defined(_WIN32) || defined(WIN32)
859 printf("PCRE: -S not supported on this OS\n");
860 exit(1);
861 #else
862 int rc;
863 struct rlimit rlim;
864 getrlimit(RLIMIT_STACK, &rlim);
865 rlim.rlim_cur = stack_size * 1024 * 1024;
866 rc = setrlimit(RLIMIT_STACK, &rlim);
867 if (rc != 0)
868 {
869 printf("PCRE: setrlimit() failed with error %d\n", rc);
870 exit(1);
871 }
872 op++;
873 argc--;
874 #endif
875 }
876 #if !defined NOPOSIX
877 else if (strcmp(argv[op], "-p") == 0) posix = 1;
878 #endif
879 else if (strcmp(argv[op], "-C") == 0)
880 {
881 int rc;
882 printf("PCRE version %s\n", pcre_version());
883 printf("Compiled with\n");
884 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
885 printf(" %sUTF-8 support\n", rc? "" : "No ");
886 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
887 printf(" %sUnicode properties support\n", rc? "" : "No ");
888 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
889 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
890 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
891 (rc == -2)? "ANYCRLF" :
892 (rc == -1)? "ANY" : "???");
893 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
894 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
895 "all Unicode newlines");
896 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
897 printf(" Internal link size = %d\n", rc);
898 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
899 printf(" POSIX malloc threshold = %d\n", rc);
900 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
901 printf(" Default match limit = %d\n", rc);
902 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
903 printf(" Default recursion depth limit = %d\n", rc);
904 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
905 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
906 goto EXIT;
907 }
908 else if (strcmp(argv[op], "-help") == 0 ||
909 strcmp(argv[op], "--help") == 0)
910 {
911 usage();
912 goto EXIT;
913 }
914 else
915 {
916 printf("** Unknown or malformed option %s\n", argv[op]);
917 usage();
918 yield = 1;
919 goto EXIT;
920 }
921 op++;
922 argc--;
923 }
924
925 /* Get the store for the offsets vector, and remember what it was */
926
927 size_offsets_max = size_offsets;
928 offsets = (int *)malloc(size_offsets_max * sizeof(int));
929 if (offsets == NULL)
930 {
931 printf("** Failed to get %d bytes of memory for offsets vector\n",
932 (int)(size_offsets_max * sizeof(int)));
933 yield = 1;
934 goto EXIT;
935 }
936
937 /* Sort out the input and output files */
938
939 if (argc > 1)
940 {
941 infile = fopen(argv[op], INPUT_MODE);
942 if (infile == NULL)
943 {
944 printf("** Failed to open %s\n", argv[op]);
945 yield = 1;
946 goto EXIT;
947 }
948 }
949
950 if (argc > 2)
951 {
952 outfile = fopen(argv[op+1], OUTPUT_MODE);
953 if (outfile == NULL)
954 {
955 printf("** Failed to open %s\n", argv[op+1]);
956 yield = 1;
957 goto EXIT;
958 }
959 }
960
961 /* Set alternative malloc function */
962
963 pcre_malloc = new_malloc;
964 pcre_free = new_free;
965 pcre_stack_malloc = stack_malloc;
966 pcre_stack_free = stack_free;
967
968 /* Heading line unless quiet, then prompt for first regex if stdin */
969
970 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
971
972 /* Main loop */
973
974 while (!done)
975 {
976 pcre *re = NULL;
977 pcre_extra *extra = NULL;
978
979 #if !defined NOPOSIX /* There are still compilers that require no indent */
980 regex_t preg;
981 int do_posix = 0;
982 #endif
983
984 const char *error;
985 unsigned char *p, *pp, *ppp;
986 unsigned char *to_file = NULL;
987 const unsigned char *tables = NULL;
988 unsigned long int true_size, true_study_size = 0;
989 size_t size, regex_gotten_store;
990 int do_study = 0;
991 int do_debug = debug;
992 int do_G = 0;
993 int do_g = 0;
994 int do_showinfo = showinfo;
995 int do_showrest = 0;
996 int do_flip = 0;
997 int erroroffset, len, delimiter, poffset;
998
999 use_utf8 = 0;
1000 debug_lengths = 1;
1001
1002 if (infile == stdin) printf(" re> ");
1003 if (extend_inputline(infile, buffer) == NULL) break;
1004 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1005 fflush(outfile);
1006
1007 p = buffer;
1008 while (isspace(*p)) p++;
1009 if (*p == 0) continue;
1010
1011 /* See if the pattern is to be loaded pre-compiled from a file. */
1012
1013 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1014 {
1015 unsigned long int magic, get_options;
1016 uschar sbuf[8];
1017 FILE *f;
1018
1019 p++;
1020 pp = p + (int)strlen((char *)p);
1021 while (isspace(pp[-1])) pp--;
1022 *pp = 0;
1023
1024 f = fopen((char *)p, "rb");
1025 if (f == NULL)
1026 {
1027 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1028 continue;
1029 }
1030
1031 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1032
1033 true_size =
1034 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1035 true_study_size =
1036 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1037
1038 re = (real_pcre *)new_malloc(true_size);
1039 regex_gotten_store = gotten_store;
1040
1041 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1042
1043 magic = ((real_pcre *)re)->magic_number;
1044 if (magic != MAGIC_NUMBER)
1045 {
1046 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1047 {
1048 do_flip = 1;
1049 }
1050 else
1051 {
1052 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1053 fclose(f);
1054 continue;
1055 }
1056 }
1057
1058 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1059 do_flip? " (byte-inverted)" : "", p);
1060
1061 /* Need to know if UTF-8 for printing data strings */
1062
1063 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1064 use_utf8 = (get_options & PCRE_UTF8) != 0;
1065
1066 /* Now see if there is any following study data */
1067
1068 if (true_study_size != 0)
1069 {
1070 pcre_study_data *psd;
1071
1072 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1073 extra->flags = PCRE_EXTRA_STUDY_DATA;
1074
1075 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1076 extra->study_data = psd;
1077
1078 if (fread(psd, 1, true_study_size, f) != true_study_size)
1079 {
1080 FAIL_READ:
1081 fprintf(outfile, "Failed to read data from %s\n", p);
1082 if (extra != NULL) new_free(extra);
1083 if (re != NULL) new_free(re);
1084 fclose(f);
1085 continue;
1086 }
1087 fprintf(outfile, "Study data loaded from %s\n", p);
1088 do_study = 1; /* To get the data output if requested */
1089 }
1090 else fprintf(outfile, "No study data\n");
1091
1092 fclose(f);
1093 goto SHOW_INFO;
1094 }
1095
1096 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1097 the pattern; if is isn't complete, read more. */
1098
1099 delimiter = *p++;
1100
1101 if (isalnum(delimiter) || delimiter == '\\')
1102 {
1103 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1104 goto SKIP_DATA;
1105 }
1106
1107 pp = p;
1108 poffset = p - buffer;
1109
1110 for(;;)
1111 {
1112 while (*pp != 0)
1113 {
1114 if (*pp == '\\' && pp[1] != 0) pp++;
1115 else if (*pp == delimiter) break;
1116 pp++;
1117 }
1118 if (*pp != 0) break;
1119 if (infile == stdin) printf(" > ");
1120 if ((pp = extend_inputline(infile, pp)) == NULL)
1121 {
1122 fprintf(outfile, "** Unexpected EOF\n");
1123 done = 1;
1124 goto CONTINUE;
1125 }
1126 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1127 }
1128
1129 /* The buffer may have moved while being extended; reset the start of data
1130 pointer to the correct relative point in the buffer. */
1131
1132 p = buffer + poffset;
1133
1134 /* If the first character after the delimiter is backslash, make
1135 the pattern end with backslash. This is purely to provide a way
1136 of testing for the error message when a pattern ends with backslash. */
1137
1138 if (pp[1] == '\\') *pp++ = '\\';
1139
1140 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1141 for callouts. */
1142
1143 *pp++ = 0;
1144 strcpy((char *)pbuffer, (char *)p);
1145
1146 /* Look for options after final delimiter */
1147
1148 options = 0;
1149 study_options = 0;
1150 log_store = showstore; /* default from command line */
1151
1152 while (*pp != 0)
1153 {
1154 switch (*pp++)
1155 {
1156 case 'f': options |= PCRE_FIRSTLINE; break;
1157 case 'g': do_g = 1; break;
1158 case 'i': options |= PCRE_CASELESS; break;
1159 case 'm': options |= PCRE_MULTILINE; break;
1160 case 's': options |= PCRE_DOTALL; break;
1161 case 'x': options |= PCRE_EXTENDED; break;
1162
1163 case '+': do_showrest = 1; break;
1164 case 'A': options |= PCRE_ANCHORED; break;
1165 case 'B': do_debug = 1; break;
1166 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1167 case 'D': do_debug = do_showinfo = 1; break;
1168 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1169 case 'F': do_flip = 1; break;
1170 case 'G': do_G = 1; break;
1171 case 'I': do_showinfo = 1; break;
1172 case 'J': options |= PCRE_DUPNAMES; break;
1173 case 'M': log_store = 1; break;
1174 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1175
1176 #if !defined NOPOSIX
1177 case 'P': do_posix = 1; break;
1178 #endif
1179
1180 case 'S': do_study = 1; break;
1181 case 'U': options |= PCRE_UNGREEDY; break;
1182 case 'X': options |= PCRE_EXTRA; break;
1183 case 'Z': debug_lengths = 0; break;
1184 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1185 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1186
1187 case 'L':
1188 ppp = pp;
1189 /* The '\r' test here is so that it works on Windows. */
1190 /* The '0' test is just in case this is an unterminated line. */
1191 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1192 *ppp = 0;
1193 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1194 {
1195 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1196 goto SKIP_DATA;
1197 }
1198 locale_set = 1;
1199 tables = pcre_maketables();
1200 pp = ppp;
1201 break;
1202
1203 case '>':
1204 to_file = pp;
1205 while (*pp != 0) pp++;
1206 while (isspace(pp[-1])) pp--;
1207 *pp = 0;
1208 break;
1209
1210 case '<':
1211 {
1212 int x = check_newline(pp, outfile);
1213 if (x == 0) goto SKIP_DATA;
1214 options |= x;
1215 while (*pp++ != '>');
1216 }
1217 break;
1218
1219 case '\r': /* So that it works in Windows */
1220 case '\n':
1221 case ' ':
1222 break;
1223
1224 default:
1225 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1226 goto SKIP_DATA;
1227 }
1228 }
1229
1230 /* Handle compiling via the POSIX interface, which doesn't support the
1231 timing, showing, or debugging options, nor the ability to pass over
1232 local character tables. */
1233
1234 #if !defined NOPOSIX
1235 if (posix || do_posix)
1236 {
1237 int rc;
1238 int cflags = 0;
1239
1240 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1241 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1242 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1243 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1244 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1245
1246 rc = regcomp(&preg, (char *)p, cflags);
1247
1248 /* Compilation failed; go back for another re, skipping to blank line
1249 if non-interactive. */
1250
1251 if (rc != 0)
1252 {
1253 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1254 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1255 goto SKIP_DATA;
1256 }
1257 }
1258
1259 /* Handle compiling via the native interface */
1260
1261 else
1262 #endif /* !defined NOPOSIX */
1263
1264 {
1265 if (timeit > 0)
1266 {
1267 register int i;
1268 clock_t time_taken;
1269 clock_t start_time = clock();
1270 for (i = 0; i < timeit; i++)
1271 {
1272 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1273 if (re != NULL) free(re);
1274 }
1275 time_taken = clock() - start_time;
1276 fprintf(outfile, "Compile time %.4f milliseconds\n",
1277 (((double)time_taken * 1000.0) / (double)timeit) /
1278 (double)CLOCKS_PER_SEC);
1279 }
1280
1281 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1282
1283 /* Compilation failed; go back for another re, skipping to blank line
1284 if non-interactive. */
1285
1286 if (re == NULL)
1287 {
1288 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1289 SKIP_DATA:
1290 if (infile != stdin)
1291 {
1292 for (;;)
1293 {
1294 if (extend_inputline(infile, buffer) == NULL)
1295 {
1296 done = 1;
1297 goto CONTINUE;
1298 }
1299 len = (int)strlen((char *)buffer);
1300 while (len > 0 && isspace(buffer[len-1])) len--;
1301 if (len == 0) break;
1302 }
1303 fprintf(outfile, "\n");
1304 }
1305 goto CONTINUE;
1306 }
1307
1308 /* Compilation succeeded; print data if required. There are now two
1309 info-returning functions. The old one has a limited interface and
1310 returns only limited data. Check that it agrees with the newer one. */
1311
1312 if (log_store)
1313 fprintf(outfile, "Memory allocation (code space): %d\n",
1314 (int)(gotten_store -
1315 sizeof(real_pcre) -
1316 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1317
1318 /* Extract the size for possible writing before possibly flipping it,
1319 and remember the store that was got. */
1320
1321 true_size = ((real_pcre *)re)->size;
1322 regex_gotten_store = gotten_store;
1323
1324 /* If /S was present, study the regexp to generate additional info to
1325 help with the matching. */
1326
1327 if (do_study)
1328 {
1329 if (timeit > 0)
1330 {
1331 register int i;
1332 clock_t time_taken;
1333 clock_t start_time = clock();
1334 for (i = 0; i < timeit; i++)
1335 extra = pcre_study(re, study_options, &error);
1336 time_taken = clock() - start_time;
1337 if (extra != NULL) free(extra);
1338 fprintf(outfile, " Study time %.4f milliseconds\n",
1339 (((double)time_taken * 1000.0) / (double)timeit) /
1340 (double)CLOCKS_PER_SEC);
1341 }
1342 extra = pcre_study(re, study_options, &error);
1343 if (error != NULL)
1344 fprintf(outfile, "Failed to study: %s\n", error);
1345 else if (extra != NULL)
1346 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1347 }
1348
1349 /* If the 'F' option was present, we flip the bytes of all the integer
1350 fields in the regex data block and the study block. This is to make it
1351 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1352 compiled on a different architecture. */
1353
1354 if (do_flip)
1355 {
1356 real_pcre *rre = (real_pcre *)re;
1357 rre->magic_number =
1358 byteflip(rre->magic_number, sizeof(rre->magic_number));
1359 rre->size = byteflip(rre->size, sizeof(rre->size));
1360 rre->options = byteflip(rre->options, sizeof(rre->options));
1361 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1362 rre->top_bracket =
1363 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1364 rre->top_backref =
1365 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1366 rre->first_byte =
1367 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1368 rre->req_byte =
1369 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1370 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1371 sizeof(rre->name_table_offset));
1372 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1373 sizeof(rre->name_entry_size));
1374 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1375 sizeof(rre->name_count));
1376
1377 if (extra != NULL)
1378 {
1379 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1380 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1381 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1382 }
1383 }
1384
1385 /* Extract information from the compiled data if required */
1386
1387 SHOW_INFO:
1388
1389 if (do_debug)
1390 {
1391 fprintf(outfile, "------------------------------------------------------------------\n");
1392 pcre_printint(re, outfile, debug_lengths);
1393 }
1394
1395 if (do_showinfo)
1396 {
1397 unsigned long int get_options, all_options;
1398 #if !defined NOINFOCHECK
1399 int old_first_char, old_options, old_count;
1400 #endif
1401 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1402 hascrorlf;
1403 int nameentrysize, namecount;
1404 const uschar *nametable;
1405
1406 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1407 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1408 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1409 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1410 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1411 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1412 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1413 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1414 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1415 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1416 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1417 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1418
1419 #if !defined NOINFOCHECK
1420 old_count = pcre_info(re, &old_options, &old_first_char);
1421 if (count < 0) fprintf(outfile,
1422 "Error %d from pcre_info()\n", count);
1423 else
1424 {
1425 if (old_count != count) fprintf(outfile,
1426 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1427 old_count);
1428
1429 if (old_first_char != first_char) fprintf(outfile,
1430 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1431 first_char, old_first_char);
1432
1433 if (old_options != (int)get_options) fprintf(outfile,
1434 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1435 get_options, old_options);
1436 }
1437 #endif
1438
1439 if (size != regex_gotten_store) fprintf(outfile,
1440 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1441 (int)size, (int)regex_gotten_store);
1442
1443 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1444 if (backrefmax > 0)
1445 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1446
1447 if (namecount > 0)
1448 {
1449 fprintf(outfile, "Named capturing subpatterns:\n");
1450 while (namecount-- > 0)
1451 {
1452 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1453 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1454 GET2(nametable, 0));
1455 nametable += nameentrysize;
1456 }
1457 }
1458
1459 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1460 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1461
1462 all_options = ((real_pcre *)re)->options;
1463 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1464
1465 if (get_options == 0) fprintf(outfile, "No options\n");
1466 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1467 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1468 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1469 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1470 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1471 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1472 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1473 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1474 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1475 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1476 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1477 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1478 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1479 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1480 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1481 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1482
1483 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1484
1485 switch (get_options & PCRE_NEWLINE_BITS)
1486 {
1487 case PCRE_NEWLINE_CR:
1488 fprintf(outfile, "Forced newline sequence: CR\n");
1489 break;
1490
1491 case PCRE_NEWLINE_LF:
1492 fprintf(outfile, "Forced newline sequence: LF\n");
1493 break;
1494
1495 case PCRE_NEWLINE_CRLF:
1496 fprintf(outfile, "Forced newline sequence: CRLF\n");
1497 break;
1498
1499 case PCRE_NEWLINE_ANYCRLF:
1500 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1501 break;
1502
1503 case PCRE_NEWLINE_ANY:
1504 fprintf(outfile, "Forced newline sequence: ANY\n");
1505 break;
1506
1507 default:
1508 break;
1509 }
1510
1511 if (first_char == -1)
1512 {
1513 fprintf(outfile, "First char at start or follows newline\n");
1514 }
1515 else if (first_char < 0)
1516 {
1517 fprintf(outfile, "No first char\n");
1518 }
1519 else
1520 {
1521 int ch = first_char & 255;
1522 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1523 "" : " (caseless)";
1524 if (PRINTHEX(ch))
1525 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1526 else
1527 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1528 }
1529
1530 if (need_char < 0)
1531 {
1532 fprintf(outfile, "No need char\n");
1533 }
1534 else
1535 {
1536 int ch = need_char & 255;
1537 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1538 "" : " (caseless)";
1539 if (PRINTHEX(ch))
1540 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1541 else
1542 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1543 }
1544
1545 /* Don't output study size; at present it is in any case a fixed
1546 value, but it varies, depending on the computer architecture, and
1547 so messes up the test suite. (And with the /F option, it might be
1548 flipped.) */
1549
1550 if (do_study)
1551 {
1552 if (extra == NULL)
1553 fprintf(outfile, "Study returned NULL\n");
1554 else
1555 {
1556 uschar *start_bits = NULL;
1557 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1558
1559 if (start_bits == NULL)
1560 fprintf(outfile, "No starting byte set\n");
1561 else
1562 {
1563 int i;
1564 int c = 24;
1565 fprintf(outfile, "Starting byte set: ");
1566 for (i = 0; i < 256; i++)
1567 {
1568 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1569 {
1570 if (c > 75)
1571 {
1572 fprintf(outfile, "\n ");
1573 c = 2;
1574 }
1575 if (PRINTHEX(i) && i != ' ')
1576 {
1577 fprintf(outfile, "%c ", i);
1578 c += 2;
1579 }
1580 else
1581 {
1582 fprintf(outfile, "\\x%02x ", i);
1583 c += 5;
1584 }
1585 }
1586 }
1587 fprintf(outfile, "\n");
1588 }
1589 }
1590 }
1591 }
1592
1593 /* If the '>' option was present, we write out the regex to a file, and
1594 that is all. The first 8 bytes of the file are the regex length and then
1595 the study length, in big-endian order. */
1596
1597 if (to_file != NULL)
1598 {
1599 FILE *f = fopen((char *)to_file, "wb");
1600 if (f == NULL)
1601 {
1602 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1603 }
1604 else
1605 {
1606 uschar sbuf[8];
1607 sbuf[0] = (uschar)((true_size >> 24) & 255);
1608 sbuf[1] = (uschar)((true_size >> 16) & 255);
1609 sbuf[2] = (uschar)((true_size >> 8) & 255);
1610 sbuf[3] = (uschar)((true_size) & 255);
1611
1612 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1613 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1614 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1615 sbuf[7] = (uschar)((true_study_size) & 255);
1616
1617 if (fwrite(sbuf, 1, 8, f) < 8 ||
1618 fwrite(re, 1, true_size, f) < true_size)
1619 {
1620 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1621 }
1622 else
1623 {
1624 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1625 if (extra != NULL)
1626 {
1627 if (fwrite(extra->study_data, 1, true_study_size, f) <
1628 true_study_size)
1629 {
1630 fprintf(outfile, "Write error on %s: %s\n", to_file,
1631 strerror(errno));
1632 }
1633 else fprintf(outfile, "Study data written to %s\n", to_file);
1634
1635 }
1636 }
1637 fclose(f);
1638 }
1639
1640 new_free(re);
1641 if (extra != NULL) new_free(extra);
1642 if (tables != NULL) new_free((void *)tables);
1643 continue; /* With next regex */
1644 }
1645 } /* End of non-POSIX compile */
1646
1647 /* Read data lines and test them */
1648
1649 for (;;)
1650 {
1651 uschar *q;
1652 uschar *bptr;
1653 int *use_offsets = offsets;
1654 int use_size_offsets = size_offsets;
1655 int callout_data = 0;
1656 int callout_data_set = 0;
1657 int count, c;
1658 int copystrings = 0;
1659 int find_match_limit = 0;
1660 int getstrings = 0;
1661 int getlist = 0;
1662 int gmatched = 0;
1663 int start_offset = 0;
1664 int g_notempty = 0;
1665 int use_dfa = 0;
1666
1667 options = 0;
1668
1669 *copynames = 0;
1670 *getnames = 0;
1671
1672 copynamesptr = copynames;
1673 getnamesptr = getnames;
1674
1675 pcre_callout = callout;
1676 first_callout = 1;
1677 callout_extra = 0;
1678 callout_count = 0;
1679 callout_fail_count = 999999;
1680 callout_fail_id = -1;
1681 show_malloc = 0;
1682
1683 if (extra != NULL) extra->flags &=
1684 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1685
1686 len = 0;
1687 for (;;)
1688 {
1689 if (infile == stdin) printf("data> ");
1690 if (extend_inputline(infile, buffer + len) == NULL)
1691 {
1692 if (len > 0) break;
1693 done = 1;
1694 goto CONTINUE;
1695 }
1696 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1697 len = (int)strlen((char *)buffer);
1698 if (buffer[len-1] == '\n') break;
1699 }
1700
1701 while (len > 0 && isspace(buffer[len-1])) len--;
1702 buffer[len] = 0;
1703 if (len == 0) break;
1704
1705 p = buffer;
1706 while (isspace(*p)) p++;
1707
1708 bptr = q = dbuffer;
1709 while ((c = *p++) != 0)
1710 {
1711 int i = 0;
1712 int n = 0;
1713
1714 if (c == '\\') switch ((c = *p++))
1715 {
1716 case 'a': c = 7; break;
1717 case 'b': c = '\b'; break;
1718 case 'e': c = 27; break;
1719 case 'f': c = '\f'; break;
1720 case 'n': c = '\n'; break;
1721 case 'r': c = '\r'; break;
1722 case 't': c = '\t'; break;
1723 case 'v': c = '\v'; break;
1724
1725 case '0': case '1': case '2': case '3':
1726 case '4': case '5': case '6': case '7':
1727 c -= '0';
1728 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1729 c = c * 8 + *p++ - '0';
1730
1731 #if !defined NOUTF8
1732 if (use_utf8 && c > 255)
1733 {
1734 unsigned char buff8[8];
1735 int ii, utn;
1736 utn = ord2utf8(c, buff8);
1737 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1738 c = buff8[ii]; /* Last byte */
1739 }
1740 #endif
1741 break;
1742
1743 case 'x':
1744
1745 /* Handle \x{..} specially - new Perl thing for utf8 */
1746
1747 #if !defined NOUTF8
1748 if (*p == '{')
1749 {
1750 unsigned char *pt = p;
1751 c = 0;
1752 while (isxdigit(*(++pt)))
1753 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1754 if (*pt == '}')
1755 {
1756 unsigned char buff8[8];
1757 int ii, utn;
1758 utn = ord2utf8(c, buff8);
1759 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1760 c = buff8[ii]; /* Last byte */
1761 p = pt + 1;
1762 break;
1763 }
1764 /* Not correct form; fall through */
1765 }
1766 #endif
1767
1768 /* Ordinary \x */
1769
1770 c = 0;
1771 while (i++ < 2 && isxdigit(*p))
1772 {
1773 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1774 p++;
1775 }
1776 break;
1777
1778 case 0: /* \ followed by EOF allows for an empty line */
1779 p--;
1780 continue;
1781
1782 case '>':
1783 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1784 continue;
1785
1786 case 'A': /* Option setting */
1787 options |= PCRE_ANCHORED;
1788 continue;
1789
1790 case 'B':
1791 options |= PCRE_NOTBOL;
1792 continue;
1793
1794 case 'C':
1795 if (isdigit(*p)) /* Set copy string */
1796 {
1797 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1798 copystrings |= 1 << n;
1799 }
1800 else if (isalnum(*p))
1801 {
1802 uschar *npp = copynamesptr;
1803 while (isalnum(*p)) *npp++ = *p++;
1804 *npp++ = 0;
1805 *npp = 0;
1806 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1807 if (n < 0)
1808 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1809 copynamesptr = npp;
1810 }
1811 else if (*p == '+')
1812 {
1813 callout_extra = 1;
1814 p++;
1815 }
1816 else if (*p == '-')
1817 {
1818 pcre_callout = NULL;
1819 p++;
1820 }
1821 else if (*p == '!')
1822 {
1823 callout_fail_id = 0;
1824 p++;
1825 while(isdigit(*p))
1826 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1827 callout_fail_count = 0;
1828 if (*p == '!')
1829 {
1830 p++;
1831 while(isdigit(*p))
1832 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1833 }
1834 }
1835 else if (*p == '*')
1836 {
1837 int sign = 1;
1838 callout_data = 0;
1839 if (*(++p) == '-') { sign = -1; p++; }
1840 while(isdigit(*p))
1841 callout_data = callout_data * 10 + *p++ - '0';
1842 callout_data *= sign;
1843 callout_data_set = 1;
1844 }
1845 continue;
1846
1847 #if !defined NODFA
1848 case 'D':
1849 #if !defined NOPOSIX
1850 if (posix || do_posix)
1851 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1852 else
1853 #endif
1854 use_dfa = 1;
1855 continue;
1856
1857 case 'F':
1858 options |= PCRE_DFA_SHORTEST;
1859 continue;
1860 #endif
1861
1862 case 'G':
1863 if (isdigit(*p))
1864 {
1865 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1866 getstrings |= 1 << n;
1867 }
1868 else if (isalnum(*p))
1869 {
1870 uschar *npp = getnamesptr;
1871 while (isalnum(*p)) *npp++ = *p++;
1872 *npp++ = 0;
1873 *npp = 0;
1874 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1875 if (n < 0)
1876 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1877 getnamesptr = npp;
1878 }
1879 continue;
1880
1881 case 'L':
1882 getlist = 1;
1883 continue;
1884
1885 case 'M':
1886 find_match_limit = 1;
1887 continue;
1888
1889 case 'N':
1890 options |= PCRE_NOTEMPTY;
1891 continue;
1892
1893 case 'O':
1894 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1895 if (n > size_offsets_max)
1896 {
1897 size_offsets_max = n;
1898 free(offsets);
1899 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1900 if (offsets == NULL)
1901 {
1902 printf("** Failed to get %d bytes of memory for offsets vector\n",
1903 (int)(size_offsets_max * sizeof(int)));
1904 yield = 1;
1905 goto EXIT;
1906 }
1907 }
1908 use_size_offsets = n;
1909 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1910 continue;
1911
1912 case 'P':
1913 options |= PCRE_PARTIAL;
1914 continue;
1915
1916 case 'Q':
1917 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1918 if (extra == NULL)
1919 {
1920 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1921 extra->flags = 0;
1922 }
1923 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1924 extra->match_limit_recursion = n;
1925 continue;
1926
1927 case 'q':
1928 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1929 if (extra == NULL)
1930 {
1931 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1932 extra->flags = 0;
1933 }
1934 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1935 extra->match_limit = n;
1936 continue;
1937
1938 #if !defined NODFA
1939 case 'R':
1940 options |= PCRE_DFA_RESTART;
1941 continue;
1942 #endif
1943
1944 case 'S':
1945 show_malloc = 1;
1946 continue;
1947
1948 case 'Z':
1949 options |= PCRE_NOTEOL;
1950 continue;
1951
1952 case '?':
1953 options |= PCRE_NO_UTF8_CHECK;
1954 continue;
1955
1956 case '<':
1957 {
1958 int x = check_newline(p, outfile);
1959 if (x == 0) goto NEXT_DATA;
1960 options |= x;
1961 while (*p++ != '>');
1962 }
1963 continue;
1964 }
1965 *q++ = c;
1966 }
1967 *q = 0;
1968 len = q - dbuffer;
1969
1970 if ((all_use_dfa || use_dfa) && find_match_limit)
1971 {
1972 printf("**Match limit not relevant for DFA matching: ignored\n");
1973 find_match_limit = 0;
1974 }
1975
1976 /* Handle matching via the POSIX interface, which does not
1977 support timing or playing with the match limit or callout data. */
1978
1979 #if !defined NOPOSIX
1980 if (posix || do_posix)
1981 {
1982 int rc;
1983 int eflags = 0;
1984 regmatch_t *pmatch = NULL;
1985 if (use_size_offsets > 0)
1986 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1987 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1988 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1989
1990 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1991
1992 if (rc != 0)
1993 {
1994 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1995 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1996 }
1997 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1998 != 0)
1999 {
2000 fprintf(outfile, "Matched with REG_NOSUB\n");
2001 }
2002 else
2003 {
2004 size_t i;
2005 for (i = 0; i < (size_t)use_size_offsets; i++)
2006 {
2007 if (pmatch[i].rm_so >= 0)
2008 {
2009 fprintf(outfile, "%2d: ", (int)i);
2010 (void)pchars(dbuffer + pmatch[i].rm_so,
2011 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2012 fprintf(outfile, "\n");
2013 if (i == 0 && do_showrest)
2014 {
2015 fprintf(outfile, " 0+ ");
2016 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2017 outfile);
2018 fprintf(outfile, "\n");
2019 }
2020 }
2021 }
2022 }
2023 free(pmatch);
2024 }
2025
2026 /* Handle matching via the native interface - repeats for /g and /G */
2027
2028 else
2029 #endif /* !defined NOPOSIX */
2030
2031 for (;; gmatched++) /* Loop for /g or /G */
2032 {
2033 if (timeitm > 0)
2034 {
2035 register int i;
2036 clock_t time_taken;
2037 clock_t start_time = clock();
2038
2039 #if !defined NODFA
2040 if (all_use_dfa || use_dfa)
2041 {
2042 int workspace[1000];
2043 for (i = 0; i < timeitm; i++)
2044 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2045 options | g_notempty, use_offsets, use_size_offsets, workspace,
2046 sizeof(workspace)/sizeof(int));
2047 }
2048 else
2049 #endif
2050
2051 for (i = 0; i < timeitm; i++)
2052 count = pcre_exec(re, extra, (char *)bptr, len,
2053 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2054
2055 time_taken = clock() - start_time;
2056 fprintf(outfile, "Execute time %.4f milliseconds\n",
2057 (((double)time_taken * 1000.0) / (double)timeitm) /
2058 (double)CLOCKS_PER_SEC);
2059 }
2060
2061 /* If find_match_limit is set, we want to do repeated matches with
2062 varying limits in order to find the minimum value for the match limit and
2063 for the recursion limit. */
2064
2065 if (find_match_limit)
2066 {
2067 if (extra == NULL)
2068 {
2069 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2070 extra->flags = 0;
2071 }
2072
2073 (void)check_match_limit(re, extra, bptr, len, start_offset,
2074 options|g_notempty, use_offsets, use_size_offsets,
2075 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2076 PCRE_ERROR_MATCHLIMIT, "match()");
2077
2078 count = check_match_limit(re, extra, bptr, len, start_offset,
2079 options|g_notempty, use_offsets, use_size_offsets,
2080 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2081 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2082 }
2083
2084 /* If callout_data is set, use the interface with additional data */
2085
2086 else if (callout_data_set)
2087 {
2088 if (extra == NULL)
2089 {
2090 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2091 extra->flags = 0;
2092 }
2093 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2094 extra->callout_data = &callout_data;
2095 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2096 options | g_notempty, use_offsets, use_size_offsets);
2097 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2098 }
2099
2100 /* The normal case is just to do the match once, with the default
2101 value of match_limit. */
2102
2103 #if !defined NODFA
2104 else if (all_use_dfa || use_dfa)
2105 {
2106 int workspace[1000];
2107 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2108 options | g_notempty, use_offsets, use_size_offsets, workspace,
2109 sizeof(workspace)/sizeof(int));
2110 if (count == 0)
2111 {
2112 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2113 count = use_size_offsets/2;
2114 }
2115 }
2116 #endif
2117
2118 else
2119 {
2120 count = pcre_exec(re, extra, (char *)bptr, len,
2121 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2122 if (count == 0)
2123 {
2124 fprintf(outfile, "Matched, but too many substrings\n");
2125 count = use_size_offsets/3;
2126 }
2127 }
2128
2129 /* Matched */
2130
2131 if (count >= 0)
2132 {
2133 int i, maxcount;
2134
2135 #if !defined NODFA
2136 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2137 #endif
2138 maxcount = use_size_offsets/3;
2139
2140 /* This is a check against a lunatic return value. */
2141
2142 if (count > maxcount)
2143 {
2144 fprintf(outfile,
2145 "** PCRE error: returned count %d is too big for offset size %d\n",
2146 count, use_size_offsets);
2147 count = use_size_offsets/3;
2148 if (do_g || do_G)
2149 {
2150 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2151 do_g = do_G = FALSE; /* Break g/G loop */
2152 }
2153 }
2154
2155 for (i = 0; i < count * 2; i += 2)
2156 {
2157 if (use_offsets[i] < 0)
2158 fprintf(outfile, "%2d: <unset>\n", i/2);
2159 else
2160 {
2161 fprintf(outfile, "%2d: ", i/2);
2162 (void)pchars(bptr + use_offsets[i],
2163 use_offsets[i+1] - use_offsets[i], outfile);
2164 fprintf(outfile, "\n");
2165 if (i == 0)
2166 {
2167 if (do_showrest)
2168 {
2169 fprintf(outfile, " 0+ ");
2170 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2171 outfile);
2172 fprintf(outfile, "\n");
2173 }
2174 }
2175 }
2176 }
2177
2178 for (i = 0; i < 32; i++)
2179 {
2180 if ((copystrings & (1 << i)) != 0)
2181 {
2182 char copybuffer[256];
2183 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2184 i, copybuffer, sizeof(copybuffer));
2185 if (rc < 0)
2186 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2187 else
2188 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2189 }
2190 }
2191
2192 for (copynamesptr = copynames;
2193 *copynamesptr != 0;
2194 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2195 {
2196 char copybuffer[256];
2197 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2198 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2199 if (rc < 0)
2200 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2201 else
2202 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2203 }
2204
2205 for (i = 0; i < 32; i++)
2206 {
2207 if ((getstrings & (1 << i)) != 0)
2208 {
2209 const char *substring;
2210 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2211 i, &substring);
2212 if (rc < 0)
2213 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2214 else
2215 {
2216 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2217 pcre_free_substring(substring);
2218 }
2219 }
2220 }
2221
2222 for (getnamesptr = getnames;
2223 *getnamesptr != 0;
2224 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2225 {
2226 const char *substring;
2227 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2228 count, (char *)getnamesptr, &substring);
2229 if (rc < 0)
2230 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2231 else
2232 {
2233 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2234 pcre_free_substring(substring);
2235 }
2236 }
2237
2238 if (getlist)
2239 {
2240 const char **stringlist;
2241 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2242 &stringlist);
2243 if (rc < 0)
2244 fprintf(outfile, "get substring list failed %d\n", rc);
2245 else
2246 {
2247 for (i = 0; i < count; i++)
2248 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2249 if (stringlist[i] != NULL)
2250 fprintf(outfile, "string list not terminated by NULL\n");
2251 /* free((void *)stringlist); */
2252 pcre_free_substring_list(stringlist);
2253 }
2254 }
2255 }
2256
2257 /* There was a partial match */
2258
2259 else if (count == PCRE_ERROR_PARTIAL)
2260 {
2261 fprintf(outfile, "Partial match");
2262 #if !defined NODFA
2263 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2264 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2265 bptr + use_offsets[0]);
2266 #endif
2267 fprintf(outfile, "\n");
2268 break; /* Out of the /g loop */
2269 }
2270
2271 /* Failed to match. If this is a /g or /G loop and we previously set
2272 g_notempty after a null match, this is not necessarily the end. We want
2273 to advance the start offset, and continue. We won't be at the end of the
2274 string - that was checked before setting g_notempty.
2275
2276 Complication arises in the case when the newline option is "any" or
2277 "anycrlf". If the previous match was at the end of a line terminated by
2278 CRLF, an advance of one character just passes the \r, whereas we should
2279 prefer the longer newline sequence, as does the code in pcre_exec().
2280 Fudge the offset value to achieve this.
2281
2282 Otherwise, in the case of UTF-8 matching, the advance must be one
2283 character, not one byte. */
2284
2285 else
2286 {
2287 if (g_notempty != 0)
2288 {
2289 int onechar = 1;
2290 unsigned int obits = ((real_pcre *)re)->options;
2291 use_offsets[0] = start_offset;
2292 if ((obits & PCRE_NEWLINE_BITS) == 0)
2293 {
2294 int d;
2295 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2296 obits = (d == '\r')? PCRE_NEWLINE_CR :
2297 (d == '\n')? PCRE_NEWLINE_LF :
2298 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2299 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2300 (d == -1)? PCRE_NEWLINE_ANY : 0;
2301 }
2302 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2303 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2304 &&
2305 start_offset < len - 1 &&
2306 bptr[start_offset] == '\r' &&
2307 bptr[start_offset+1] == '\n')
2308 onechar++;
2309 else if (use_utf8)
2310 {
2311 while (start_offset + onechar < len)
2312 {
2313 int tb = bptr[start_offset+onechar];
2314 if (tb <= 127) break;
2315 tb &= 0xc0;
2316 if (tb != 0 && tb != 0xc0) onechar++;
2317 }
2318 }
2319 use_offsets[1] = start_offset + onechar;
2320 }
2321 else
2322 {
2323 if (count == PCRE_ERROR_NOMATCH)
2324 {
2325 if (gmatched == 0) fprintf(outfile, "No match\n");
2326 }
2327 else fprintf(outfile, "Error %d\n", count);
2328 break; /* Out of the /g loop */
2329 }
2330 }
2331
2332 /* If not /g or /G we are done */
2333
2334 if (!do_g && !do_G) break;
2335
2336 /* If we have matched an empty string, first check to see if we are at
2337 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2338 what Perl's /g options does. This turns out to be rather cunning. First
2339 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2340 same point. If this fails (picked up above) we advance to the next
2341 character. */
2342
2343 g_notempty = 0;
2344
2345 if (use_offsets[0] == use_offsets[1])
2346 {
2347 if (use_offsets[0] == len) break;
2348 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2349 }
2350
2351 /* For /g, update the start offset, leaving the rest alone */
2352
2353 if (do_g) start_offset = use_offsets[1];
2354
2355 /* For /G, update the pointer and length */
2356
2357 else
2358 {
2359 bptr += use_offsets[1];
2360 len -= use_offsets[1];
2361 }
2362 } /* End of loop for /g and /G */
2363
2364 NEXT_DATA: continue;
2365 } /* End of loop for data lines */
2366
2367 CONTINUE:
2368
2369 #if !defined NOPOSIX
2370 if (posix || do_posix) regfree(&preg);
2371 #endif
2372
2373 if (re != NULL) new_free(re);
2374 if (extra != NULL) new_free(extra);
2375 if (tables != NULL)
2376 {
2377 new_free((void *)tables);
2378 setlocale(LC_CTYPE, "C");
2379 locale_set = 0;
2380 }
2381 }
2382
2383 if (infile == stdin) fprintf(outfile, "\n");
2384
2385 EXIT:
2386
2387 if (infile != NULL && infile != stdin) fclose(infile);
2388 if (outfile != NULL && outfile != stdout) fclose(outfile);
2389
2390 free(buffer);
2391 free(dbuffer);
2392 free(pbuffer);
2393 free(offsets);
2394
2395 return yield;
2396 }
2397
2398 /* End of pcretest.c */