Allow only accept and warn in the not-QUIT ACL.
[exim.git] / src / src / pcre / pcretest.c
1 /* $Cambridge: exim/src/src/pcre/pcretest.c,v 1.7 2007/06/26 11:16:54 ph10 Exp $ */
2
3 /*************************************************
4 * PCRE testing program *
5 *************************************************/
6
7 /* This program was hacked up as a tester for PCRE. I really should have
8 written it more tidily in the first place. Will I ever learn? It has grown and
9 been extended and consequently is now rather, er, *very* untidy in places.
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 #include <ctype.h>
42 #include <stdio.h>
43 #include <string.h>
44 #include <stdlib.h>
45 #include <time.h>
46 #include <locale.h>
47 #include <errno.h>
48
49
50 /* A number of things vary for Windows builds. Originally, pcretest opened its
51 input and output without "b"; then I was told that "b" was needed in some
52 environments, so it was added for release 5.0 to both the input and output. (It
53 makes no difference on Unix-like systems.) Later I was told that it is wrong
54 for the input on Windows. I've now abstracted the modes into two macros that
55 are set here, to make it easier to fiddle with them, and removed "b" from the
56 input mode under Windows. */
57
58 #if defined(_WIN32) || defined(WIN32)
59 #include <io.h> /* For _setmode() */
60 #include <fcntl.h> /* For _O_BINARY */
61 #define INPUT_MODE "r"
62 #define OUTPUT_MODE "wb"
63
64 #else
65 #include <sys/time.h> /* These two includes are needed */
66 #include <sys/resource.h> /* for setrlimit(). */
67 #define INPUT_MODE "rb"
68 #define OUTPUT_MODE "wb"
69 #endif
70
71
72 /* We have to include pcre_internal.h because we need the internal info for
73 displaying the results of pcre_study() and we also need to know about the
74 internal macros, structures, and other internal data values; pcretest has
75 "inside information" compared to a program that strictly follows the PCRE API.
76
77 Although pcre_internal.h does itself include pcre.h, we explicitly include it
78 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
79 appropriately for an application, not for building PCRE. */
80
81 #include "pcre.h"
82 #include "pcre_internal.h"
83
84 /* We need access to the data tables that PCRE uses. So as not to have to keep
85 two copies, we include the source file here, changing the names of the external
86 symbols to prevent clashes. */
87
88 #define _pcre_utf8_table1 utf8_table1
89 #define _pcre_utf8_table1_size utf8_table1_size
90 #define _pcre_utf8_table2 utf8_table2
91 #define _pcre_utf8_table3 utf8_table3
92 #define _pcre_utf8_table4 utf8_table4
93 #define _pcre_utt utt
94 #define _pcre_utt_size utt_size
95 #define _pcre_OP_lengths OP_lengths
96
97 #include "pcre_tables.c"
98
99 /* We also need the pcre_printint() function for printing out compiled
100 patterns. This function is in a separate file so that it can be included in
101 pcre_compile.c when that module is compiled with debugging enabled.
102
103 The definition of the macro PRINTABLE, which determines whether to print an
104 output character as-is or as a hex value when showing compiled patterns, is
105 contained in this file. We uses it here also, in cases when the locale has not
106 been explicitly changed, so as to get consistent output from systems that
107 differ in their output from isprint() even in the "C" locale. */
108
109 #include "pcre_printint.src"
110
111 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
112
113
114 /* It is possible to compile this test program without including support for
115 testing the POSIX interface, though this is not available via the standard
116 Makefile. */
117
118 #if !defined NOPOSIX
119 #include "pcreposix.h"
120 #endif
121
122 /* It is also possible, for the benefit of the version currently imported into
123 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
124 interface to the DFA matcher (NODFA), and without the doublecheck of the old
125 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
126 UTF8 support if PCRE is built without it. */
127
128 #ifndef SUPPORT_UTF8
129 #ifndef NOUTF8
130 #define NOUTF8
131 #endif
132 #endif
133
134
135 /* Other parameters */
136
137 #ifndef CLOCKS_PER_SEC
138 #ifdef CLK_TCK
139 #define CLOCKS_PER_SEC CLK_TCK
140 #else
141 #define CLOCKS_PER_SEC 100
142 #endif
143 #endif
144
145 /* This is the default loop count for timing. */
146
147 #define LOOPREPEAT 500000
148
149 /* Static variables */
150
151 static FILE *outfile;
152 static int log_store = 0;
153 static int callout_count;
154 static int callout_extra;
155 static int callout_fail_count;
156 static int callout_fail_id;
157 static int first_callout;
158 static int locale_set = 0;
159 static int show_malloc;
160 static int use_utf8;
161 static size_t gotten_store;
162
163 /* The buffers grow automatically if very long input lines are encountered. */
164
165 static int buffer_size = 50000;
166 static uschar *buffer = NULL;
167 static uschar *dbuffer = NULL;
168 static uschar *pbuffer = NULL;
169
170
171
172 /*************************************************
173 * Read or extend an input line *
174 *************************************************/
175
176 /* Input lines are read into buffer, but both patterns and data lines can be
177 continued over multiple input lines. In addition, if the buffer fills up, we
178 want to automatically expand it so as to be able to handle extremely large
179 lines that are needed for certain stress tests. When the input buffer is
180 expanded, the other two buffers must also be expanded likewise, and the
181 contents of pbuffer, which are a copy of the input for callouts, must be
182 preserved (for when expansion happens for a data line). This is not the most
183 optimal way of handling this, but hey, this is just a test program!
184
185 Arguments:
186 f the file to read
187 start where in buffer to start (this *must* be within buffer)
188
189 Returns: pointer to the start of new data
190 could be a copy of start, or could be moved
191 NULL if no data read and EOF reached
192 */
193
194 static uschar *
195 extend_inputline(FILE *f, uschar *start)
196 {
197 uschar *here = start;
198
199 for (;;)
200 {
201 int rlen = buffer_size - (here - buffer);
202
203 if (rlen > 1000)
204 {
205 int dlen;
206 if (fgets((char *)here, rlen, f) == NULL)
207 return (here == start)? NULL : start;
208 dlen = (int)strlen((char *)here);
209 if (dlen > 0 && here[dlen - 1] == '\n') return start;
210 here += dlen;
211 }
212
213 else
214 {
215 int new_buffer_size = 2*buffer_size;
216 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
217 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
218 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
219
220 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
221 {
222 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
223 exit(1);
224 }
225
226 memcpy(new_buffer, buffer, buffer_size);
227 memcpy(new_pbuffer, pbuffer, buffer_size);
228
229 buffer_size = new_buffer_size;
230
231 start = new_buffer + (start - buffer);
232 here = new_buffer + (here - buffer);
233
234 free(buffer);
235 free(dbuffer);
236 free(pbuffer);
237
238 buffer = new_buffer;
239 dbuffer = new_dbuffer;
240 pbuffer = new_pbuffer;
241 }
242 }
243
244 return NULL; /* Control never gets here */
245 }
246
247
248
249
250
251
252
253 /*************************************************
254 * Read number from string *
255 *************************************************/
256
257 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
258 around with conditional compilation, just do the job by hand. It is only used
259 for unpicking arguments, so just keep it simple.
260
261 Arguments:
262 str string to be converted
263 endptr where to put the end pointer
264
265 Returns: the unsigned long
266 */
267
268 static int
269 get_value(unsigned char *str, unsigned char **endptr)
270 {
271 int result = 0;
272 while(*str != 0 && isspace(*str)) str++;
273 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
274 *endptr = str;
275 return(result);
276 }
277
278
279
280
281 /*************************************************
282 * Convert UTF-8 string to value *
283 *************************************************/
284
285 /* This function takes one or more bytes that represents a UTF-8 character,
286 and returns the value of the character.
287
288 Argument:
289 utf8bytes a pointer to the byte vector
290 vptr a pointer to an int to receive the value
291
292 Returns: > 0 => the number of bytes consumed
293 -6 to 0 => malformed UTF-8 character at offset = (-return)
294 */
295
296 #if !defined NOUTF8
297
298 static int
299 utf82ord(unsigned char *utf8bytes, int *vptr)
300 {
301 int c = *utf8bytes++;
302 int d = c;
303 int i, j, s;
304
305 for (i = -1; i < 6; i++) /* i is number of additional bytes */
306 {
307 if ((d & 0x80) == 0) break;
308 d <<= 1;
309 }
310
311 if (i == -1) { *vptr = c; return 1; } /* ascii character */
312 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
313
314 /* i now has a value in the range 1-5 */
315
316 s = 6*i;
317 d = (c & utf8_table3[i]) << s;
318
319 for (j = 0; j < i; j++)
320 {
321 c = *utf8bytes++;
322 if ((c & 0xc0) != 0x80) return -(j+1);
323 s -= 6;
324 d |= (c & 0x3f) << s;
325 }
326
327 /* Check that encoding was the correct unique one */
328
329 for (j = 0; j < utf8_table1_size; j++)
330 if (d <= utf8_table1[j]) break;
331 if (j != i) return -(i+1);
332
333 /* Valid value */
334
335 *vptr = d;
336 return i+1;
337 }
338
339 #endif
340
341
342
343 /*************************************************
344 * Convert character value to UTF-8 *
345 *************************************************/
346
347 /* This function takes an integer value in the range 0 - 0x7fffffff
348 and encodes it as a UTF-8 character in 0 to 6 bytes.
349
350 Arguments:
351 cvalue the character value
352 utf8bytes pointer to buffer for result - at least 6 bytes long
353
354 Returns: number of characters placed in the buffer
355 */
356
357 #if !defined NOUTF8
358
359 static int
360 ord2utf8(int cvalue, uschar *utf8bytes)
361 {
362 register int i, j;
363 for (i = 0; i < utf8_table1_size; i++)
364 if (cvalue <= utf8_table1[i]) break;
365 utf8bytes += i;
366 for (j = i; j > 0; j--)
367 {
368 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
369 cvalue >>= 6;
370 }
371 *utf8bytes = utf8_table2[i] | cvalue;
372 return i + 1;
373 }
374
375 #endif
376
377
378
379 /*************************************************
380 * Print character string *
381 *************************************************/
382
383 /* Character string printing function. Must handle UTF-8 strings in utf8
384 mode. Yields number of characters printed. If handed a NULL file, just counts
385 chars without printing. */
386
387 static int pchars(unsigned char *p, int length, FILE *f)
388 {
389 int c = 0;
390 int yield = 0;
391
392 while (length-- > 0)
393 {
394 #if !defined NOUTF8
395 if (use_utf8)
396 {
397 int rc = utf82ord(p, &c);
398
399 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
400 {
401 length -= rc - 1;
402 p += rc;
403 if (PRINTHEX(c))
404 {
405 if (f != NULL) fprintf(f, "%c", c);
406 yield++;
407 }
408 else
409 {
410 int n = 4;
411 if (f != NULL) fprintf(f, "\\x{%02x}", c);
412 yield += (n <= 0x000000ff)? 2 :
413 (n <= 0x00000fff)? 3 :
414 (n <= 0x0000ffff)? 4 :
415 (n <= 0x000fffff)? 5 : 6;
416 }
417 continue;
418 }
419 }
420 #endif
421
422 /* Not UTF-8, or malformed UTF-8 */
423
424 c = *p++;
425 if (PRINTHEX(c))
426 {
427 if (f != NULL) fprintf(f, "%c", c);
428 yield++;
429 }
430 else
431 {
432 if (f != NULL) fprintf(f, "\\x%02x", c);
433 yield += 4;
434 }
435 }
436
437 return yield;
438 }
439
440
441
442 /*************************************************
443 * Callout function *
444 *************************************************/
445
446 /* Called from PCRE as a result of the (?C) item. We print out where we are in
447 the match. Yield zero unless more callouts than the fail count, or the callout
448 data is not zero. */
449
450 static int callout(pcre_callout_block *cb)
451 {
452 FILE *f = (first_callout | callout_extra)? outfile : NULL;
453 int i, pre_start, post_start, subject_length;
454
455 if (callout_extra)
456 {
457 fprintf(f, "Callout %d: last capture = %d\n",
458 cb->callout_number, cb->capture_last);
459
460 for (i = 0; i < cb->capture_top * 2; i += 2)
461 {
462 if (cb->offset_vector[i] < 0)
463 fprintf(f, "%2d: <unset>\n", i/2);
464 else
465 {
466 fprintf(f, "%2d: ", i/2);
467 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
468 cb->offset_vector[i+1] - cb->offset_vector[i], f);
469 fprintf(f, "\n");
470 }
471 }
472 }
473
474 /* Re-print the subject in canonical form, the first time or if giving full
475 datails. On subsequent calls in the same match, we use pchars just to find the
476 printed lengths of the substrings. */
477
478 if (f != NULL) fprintf(f, "--->");
479
480 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
481 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
482 cb->current_position - cb->start_match, f);
483
484 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
485
486 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
487 cb->subject_length - cb->current_position, f);
488
489 if (f != NULL) fprintf(f, "\n");
490
491 /* Always print appropriate indicators, with callout number if not already
492 shown. For automatic callouts, show the pattern offset. */
493
494 if (cb->callout_number == 255)
495 {
496 fprintf(outfile, "%+3d ", cb->pattern_position);
497 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
498 }
499 else
500 {
501 if (callout_extra) fprintf(outfile, " ");
502 else fprintf(outfile, "%3d ", cb->callout_number);
503 }
504
505 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
506 fprintf(outfile, "^");
507
508 if (post_start > 0)
509 {
510 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
511 fprintf(outfile, "^");
512 }
513
514 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
515 fprintf(outfile, " ");
516
517 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
518 pbuffer + cb->pattern_position);
519
520 fprintf(outfile, "\n");
521 first_callout = 0;
522
523 if (cb->callout_data != NULL)
524 {
525 int callout_data = *((int *)(cb->callout_data));
526 if (callout_data != 0)
527 {
528 fprintf(outfile, "Callout data = %d\n", callout_data);
529 return callout_data;
530 }
531 }
532
533 return (cb->callout_number != callout_fail_id)? 0 :
534 (++callout_count >= callout_fail_count)? 1 : 0;
535 }
536
537
538 /*************************************************
539 * Local malloc functions *
540 *************************************************/
541
542 /* Alternative malloc function, to test functionality and show the size of the
543 compiled re. */
544
545 static void *new_malloc(size_t size)
546 {
547 void *block = malloc(size);
548 gotten_store = size;
549 if (show_malloc)
550 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
551 return block;
552 }
553
554 static void new_free(void *block)
555 {
556 if (show_malloc)
557 fprintf(outfile, "free %p\n", block);
558 free(block);
559 }
560
561
562 /* For recursion malloc/free, to test stacking calls */
563
564 static void *stack_malloc(size_t size)
565 {
566 void *block = malloc(size);
567 if (show_malloc)
568 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
569 return block;
570 }
571
572 static void stack_free(void *block)
573 {
574 if (show_malloc)
575 fprintf(outfile, "stack_free %p\n", block);
576 free(block);
577 }
578
579
580 /*************************************************
581 * Call pcre_fullinfo() *
582 *************************************************/
583
584 /* Get one piece of information from the pcre_fullinfo() function */
585
586 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
587 {
588 int rc;
589 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
590 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
591 }
592
593
594
595 /*************************************************
596 * Byte flipping function *
597 *************************************************/
598
599 static unsigned long int
600 byteflip(unsigned long int value, int n)
601 {
602 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
603 return ((value & 0x000000ff) << 24) |
604 ((value & 0x0000ff00) << 8) |
605 ((value & 0x00ff0000) >> 8) |
606 ((value & 0xff000000) >> 24);
607 }
608
609
610
611
612 /*************************************************
613 * Check match or recursion limit *
614 *************************************************/
615
616 static int
617 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
618 int start_offset, int options, int *use_offsets, int use_size_offsets,
619 int flag, unsigned long int *limit, int errnumber, const char *msg)
620 {
621 int count;
622 int min = 0;
623 int mid = 64;
624 int max = -1;
625
626 extra->flags |= flag;
627
628 for (;;)
629 {
630 *limit = mid;
631
632 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
633 use_offsets, use_size_offsets);
634
635 if (count == errnumber)
636 {
637 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
638 min = mid;
639 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
640 }
641
642 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
643 count == PCRE_ERROR_PARTIAL)
644 {
645 if (mid == min + 1)
646 {
647 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
648 break;
649 }
650 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
651 max = mid;
652 mid = (min + mid)/2;
653 }
654 else break; /* Some other error */
655 }
656
657 extra->flags &= ~flag;
658 return count;
659 }
660
661
662
663 /*************************************************
664 * Check newline indicator *
665 *************************************************/
666
667 /* This is used both at compile and run-time to check for <xxx> escapes, where
668 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
669 no match.
670
671 Arguments:
672 p points after the leading '<'
673 f file for error message
674
675 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
676 */
677
678 static int
679 check_newline(uschar *p, FILE *f)
680 {
681 if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
682 if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
683 if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
684 if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
685 if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
686 fprintf(f, "Unknown newline type at: <%s\n", p);
687 return 0;
688 }
689
690
691
692 /*************************************************
693 * Usage function *
694 *************************************************/
695
696 static void
697 usage(void)
698 {
699 printf("Usage: pcretest [options] [<input> [<output>]]\n");
700 printf(" -b show compiled code (bytecode)\n");
701 printf(" -C show PCRE compile-time options and exit\n");
702 printf(" -d debug: show compiled code and information (-b and -i)\n");
703 #if !defined NODFA
704 printf(" -dfa force DFA matching for all subjects\n");
705 #endif
706 printf(" -help show usage information\n");
707 printf(" -i show information about compiled patterns\n"
708 " -m output memory used information\n"
709 " -o <n> set size of offsets vector to <n>\n");
710 #if !defined NOPOSIX
711 printf(" -p use POSIX interface\n");
712 #endif
713 printf(" -q quiet: do not output PCRE version number at start\n");
714 printf(" -S <n> set stack size to <n> megabytes\n");
715 printf(" -s output store (memory) used information\n"
716 " -t time compilation and execution\n");
717 printf(" -t <n> time compilation and execution, repeating <n> times\n");
718 printf(" -tm time execution (matching) only\n");
719 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
720 }
721
722
723
724 /*************************************************
725 * Main Program *
726 *************************************************/
727
728 /* Read lines from named file or stdin and write to named file or stdout; lines
729 consist of a regular expression, in delimiters and optionally followed by
730 options, followed by a set of test data, terminated by an empty line. */
731
732 int main(int argc, char **argv)
733 {
734 FILE *infile = stdin;
735 int options = 0;
736 int study_options = 0;
737 int op = 1;
738 int timeit = 0;
739 int timeitm = 0;
740 int showinfo = 0;
741 int showstore = 0;
742 int quiet = 0;
743 int size_offsets = 45;
744 int size_offsets_max;
745 int *offsets = NULL;
746 #if !defined NOPOSIX
747 int posix = 0;
748 #endif
749 int debug = 0;
750 int done = 0;
751 int all_use_dfa = 0;
752 int yield = 0;
753 int stack_size;
754
755 /* These vectors store, end-to-end, a list of captured substring names. Assume
756 that 1024 is plenty long enough for the few names we'll be testing. */
757
758 uschar copynames[1024];
759 uschar getnames[1024];
760
761 uschar *copynamesptr;
762 uschar *getnamesptr;
763
764 /* Get buffers from malloc() so that Electric Fence will check their misuse
765 when I am debugging. They grow automatically when very long lines are read. */
766
767 buffer = (unsigned char *)malloc(buffer_size);
768 dbuffer = (unsigned char *)malloc(buffer_size);
769 pbuffer = (unsigned char *)malloc(buffer_size);
770
771 /* The outfile variable is static so that new_malloc can use it. */
772
773 outfile = stdout;
774
775 /* The following _setmode() stuff is some Windows magic that tells its runtime
776 library to translate CRLF into a single LF character. At least, that's what
777 I've been told: never having used Windows I take this all on trust. Originally
778 it set 0x8000, but then I was advised that _O_BINARY was better. */
779
780 #if defined(_WIN32) || defined(WIN32)
781 _setmode( _fileno( stdout ), _O_BINARY );
782 #endif
783
784 /* Scan options */
785
786 while (argc > 1 && argv[op][0] == '-')
787 {
788 unsigned char *endptr;
789
790 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
791 showstore = 1;
792 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
793 else if (strcmp(argv[op], "-b") == 0) debug = 1;
794 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
795 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
796 #if !defined NODFA
797 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
798 #endif
799 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
800 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
801 *endptr == 0))
802 {
803 op++;
804 argc--;
805 }
806 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
807 {
808 int both = argv[op][2] == 0;
809 int temp;
810 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
811 *endptr == 0))
812 {
813 timeitm = temp;
814 op++;
815 argc--;
816 }
817 else timeitm = LOOPREPEAT;
818 if (both) timeit = timeitm;
819 }
820 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
821 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
822 *endptr == 0))
823 {
824 #if defined(_WIN32) || defined(WIN32)
825 printf("PCRE: -S not supported on this OS\n");
826 exit(1);
827 #else
828 int rc;
829 struct rlimit rlim;
830 getrlimit(RLIMIT_STACK, &rlim);
831 rlim.rlim_cur = stack_size * 1024 * 1024;
832 rc = setrlimit(RLIMIT_STACK, &rlim);
833 if (rc != 0)
834 {
835 printf("PCRE: setrlimit() failed with error %d\n", rc);
836 exit(1);
837 }
838 op++;
839 argc--;
840 #endif
841 }
842 #if !defined NOPOSIX
843 else if (strcmp(argv[op], "-p") == 0) posix = 1;
844 #endif
845 else if (strcmp(argv[op], "-C") == 0)
846 {
847 int rc;
848 printf("PCRE version %s\n", pcre_version());
849 printf("Compiled with\n");
850 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
851 printf(" %sUTF-8 support\n", rc? "" : "No ");
852 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
853 printf(" %sUnicode properties support\n", rc? "" : "No ");
854 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
855 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
856 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
857 (rc == -2)? "ANYCRLF" :
858 (rc == -1)? "ANY" : "???");
859 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
860 printf(" Internal link size = %d\n", rc);
861 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
862 printf(" POSIX malloc threshold = %d\n", rc);
863 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
864 printf(" Default match limit = %d\n", rc);
865 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
866 printf(" Default recursion depth limit = %d\n", rc);
867 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
868 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
869 goto EXIT;
870 }
871 else if (strcmp(argv[op], "-help") == 0 ||
872 strcmp(argv[op], "--help") == 0)
873 {
874 usage();
875 goto EXIT;
876 }
877 else
878 {
879 printf("** Unknown or malformed option %s\n", argv[op]);
880 usage();
881 yield = 1;
882 goto EXIT;
883 }
884 op++;
885 argc--;
886 }
887
888 /* Get the store for the offsets vector, and remember what it was */
889
890 size_offsets_max = size_offsets;
891 offsets = (int *)malloc(size_offsets_max * sizeof(int));
892 if (offsets == NULL)
893 {
894 printf("** Failed to get %d bytes of memory for offsets vector\n",
895 (int)(size_offsets_max * sizeof(int)));
896 yield = 1;
897 goto EXIT;
898 }
899
900 /* Sort out the input and output files */
901
902 if (argc > 1)
903 {
904 infile = fopen(argv[op], INPUT_MODE);
905 if (infile == NULL)
906 {
907 printf("** Failed to open %s\n", argv[op]);
908 yield = 1;
909 goto EXIT;
910 }
911 }
912
913 if (argc > 2)
914 {
915 outfile = fopen(argv[op+1], OUTPUT_MODE);
916 if (outfile == NULL)
917 {
918 printf("** Failed to open %s\n", argv[op+1]);
919 yield = 1;
920 goto EXIT;
921 }
922 }
923
924 /* Set alternative malloc function */
925
926 pcre_malloc = new_malloc;
927 pcre_free = new_free;
928 pcre_stack_malloc = stack_malloc;
929 pcre_stack_free = stack_free;
930
931 /* Heading line unless quiet, then prompt for first regex if stdin */
932
933 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
934
935 /* Main loop */
936
937 while (!done)
938 {
939 pcre *re = NULL;
940 pcre_extra *extra = NULL;
941
942 #if !defined NOPOSIX /* There are still compilers that require no indent */
943 regex_t preg;
944 int do_posix = 0;
945 #endif
946
947 const char *error;
948 unsigned char *p, *pp, *ppp;
949 unsigned char *to_file = NULL;
950 const unsigned char *tables = NULL;
951 unsigned long int true_size, true_study_size = 0;
952 size_t size, regex_gotten_store;
953 int do_study = 0;
954 int do_debug = debug;
955 int debug_lengths = 1;
956 int do_G = 0;
957 int do_g = 0;
958 int do_showinfo = showinfo;
959 int do_showrest = 0;
960 int do_flip = 0;
961 int erroroffset, len, delimiter, poffset;
962
963 use_utf8 = 0;
964
965 if (infile == stdin) printf(" re> ");
966 if (extend_inputline(infile, buffer) == NULL) break;
967 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
968 fflush(outfile);
969
970 p = buffer;
971 while (isspace(*p)) p++;
972 if (*p == 0) continue;
973
974 /* See if the pattern is to be loaded pre-compiled from a file. */
975
976 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
977 {
978 unsigned long int magic, get_options;
979 uschar sbuf[8];
980 FILE *f;
981
982 p++;
983 pp = p + (int)strlen((char *)p);
984 while (isspace(pp[-1])) pp--;
985 *pp = 0;
986
987 f = fopen((char *)p, "rb");
988 if (f == NULL)
989 {
990 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
991 continue;
992 }
993
994 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
995
996 true_size =
997 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
998 true_study_size =
999 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1000
1001 re = (real_pcre *)new_malloc(true_size);
1002 regex_gotten_store = gotten_store;
1003
1004 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1005
1006 magic = ((real_pcre *)re)->magic_number;
1007 if (magic != MAGIC_NUMBER)
1008 {
1009 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1010 {
1011 do_flip = 1;
1012 }
1013 else
1014 {
1015 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1016 fclose(f);
1017 continue;
1018 }
1019 }
1020
1021 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1022 do_flip? " (byte-inverted)" : "", p);
1023
1024 /* Need to know if UTF-8 for printing data strings */
1025
1026 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1027 use_utf8 = (get_options & PCRE_UTF8) != 0;
1028
1029 /* Now see if there is any following study data */
1030
1031 if (true_study_size != 0)
1032 {
1033 pcre_study_data *psd;
1034
1035 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1036 extra->flags = PCRE_EXTRA_STUDY_DATA;
1037
1038 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1039 extra->study_data = psd;
1040
1041 if (fread(psd, 1, true_study_size, f) != true_study_size)
1042 {
1043 FAIL_READ:
1044 fprintf(outfile, "Failed to read data from %s\n", p);
1045 if (extra != NULL) new_free(extra);
1046 if (re != NULL) new_free(re);
1047 fclose(f);
1048 continue;
1049 }
1050 fprintf(outfile, "Study data loaded from %s\n", p);
1051 do_study = 1; /* To get the data output if requested */
1052 }
1053 else fprintf(outfile, "No study data\n");
1054
1055 fclose(f);
1056 goto SHOW_INFO;
1057 }
1058
1059 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1060 the pattern; if is isn't complete, read more. */
1061
1062 delimiter = *p++;
1063
1064 if (isalnum(delimiter) || delimiter == '\\')
1065 {
1066 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1067 goto SKIP_DATA;
1068 }
1069
1070 pp = p;
1071 poffset = p - buffer;
1072
1073 for(;;)
1074 {
1075 while (*pp != 0)
1076 {
1077 if (*pp == '\\' && pp[1] != 0) pp++;
1078 else if (*pp == delimiter) break;
1079 pp++;
1080 }
1081 if (*pp != 0) break;
1082 if (infile == stdin) printf(" > ");
1083 if ((pp = extend_inputline(infile, pp)) == NULL)
1084 {
1085 fprintf(outfile, "** Unexpected EOF\n");
1086 done = 1;
1087 goto CONTINUE;
1088 }
1089 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1090 }
1091
1092 /* The buffer may have moved while being extended; reset the start of data
1093 pointer to the correct relative point in the buffer. */
1094
1095 p = buffer + poffset;
1096
1097 /* If the first character after the delimiter is backslash, make
1098 the pattern end with backslash. This is purely to provide a way
1099 of testing for the error message when a pattern ends with backslash. */
1100
1101 if (pp[1] == '\\') *pp++ = '\\';
1102
1103 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1104 for callouts. */
1105
1106 *pp++ = 0;
1107 strcpy((char *)pbuffer, (char *)p);
1108
1109 /* Look for options after final delimiter */
1110
1111 options = 0;
1112 study_options = 0;
1113 log_store = showstore; /* default from command line */
1114
1115 while (*pp != 0)
1116 {
1117 switch (*pp++)
1118 {
1119 case 'f': options |= PCRE_FIRSTLINE; break;
1120 case 'g': do_g = 1; break;
1121 case 'i': options |= PCRE_CASELESS; break;
1122 case 'm': options |= PCRE_MULTILINE; break;
1123 case 's': options |= PCRE_DOTALL; break;
1124 case 'x': options |= PCRE_EXTENDED; break;
1125
1126 case '+': do_showrest = 1; break;
1127 case 'A': options |= PCRE_ANCHORED; break;
1128 case 'B': do_debug = 1; break;
1129 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1130 case 'D': do_debug = do_showinfo = 1; break;
1131 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1132 case 'F': do_flip = 1; break;
1133 case 'G': do_G = 1; break;
1134 case 'I': do_showinfo = 1; break;
1135 case 'J': options |= PCRE_DUPNAMES; break;
1136 case 'M': log_store = 1; break;
1137 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1138
1139 #if !defined NOPOSIX
1140 case 'P': do_posix = 1; break;
1141 #endif
1142
1143 case 'S': do_study = 1; break;
1144 case 'U': options |= PCRE_UNGREEDY; break;
1145 case 'X': options |= PCRE_EXTRA; break;
1146 case 'Z': debug_lengths = 0; break;
1147 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1148 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1149
1150 case 'L':
1151 ppp = pp;
1152 /* The '\r' test here is so that it works on Windows. */
1153 /* The '0' test is just in case this is an unterminated line. */
1154 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1155 *ppp = 0;
1156 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1157 {
1158 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1159 goto SKIP_DATA;
1160 }
1161 locale_set = 1;
1162 tables = pcre_maketables();
1163 pp = ppp;
1164 break;
1165
1166 case '>':
1167 to_file = pp;
1168 while (*pp != 0) pp++;
1169 while (isspace(pp[-1])) pp--;
1170 *pp = 0;
1171 break;
1172
1173 case '<':
1174 {
1175 int x = check_newline(pp, outfile);
1176 if (x == 0) goto SKIP_DATA;
1177 options |= x;
1178 while (*pp++ != '>');
1179 }
1180 break;
1181
1182 case '\r': /* So that it works in Windows */
1183 case '\n':
1184 case ' ':
1185 break;
1186
1187 default:
1188 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1189 goto SKIP_DATA;
1190 }
1191 }
1192
1193 /* Handle compiling via the POSIX interface, which doesn't support the
1194 timing, showing, or debugging options, nor the ability to pass over
1195 local character tables. */
1196
1197 #if !defined NOPOSIX
1198 if (posix || do_posix)
1199 {
1200 int rc;
1201 int cflags = 0;
1202
1203 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1204 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1205 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1206 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1207 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1208
1209 rc = regcomp(&preg, (char *)p, cflags);
1210
1211 /* Compilation failed; go back for another re, skipping to blank line
1212 if non-interactive. */
1213
1214 if (rc != 0)
1215 {
1216 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1217 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1218 goto SKIP_DATA;
1219 }
1220 }
1221
1222 /* Handle compiling via the native interface */
1223
1224 else
1225 #endif /* !defined NOPOSIX */
1226
1227 {
1228 if (timeit > 0)
1229 {
1230 register int i;
1231 clock_t time_taken;
1232 clock_t start_time = clock();
1233 for (i = 0; i < timeit; i++)
1234 {
1235 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1236 if (re != NULL) free(re);
1237 }
1238 time_taken = clock() - start_time;
1239 fprintf(outfile, "Compile time %.4f milliseconds\n",
1240 (((double)time_taken * 1000.0) / (double)timeit) /
1241 (double)CLOCKS_PER_SEC);
1242 }
1243
1244 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1245
1246 /* Compilation failed; go back for another re, skipping to blank line
1247 if non-interactive. */
1248
1249 if (re == NULL)
1250 {
1251 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1252 SKIP_DATA:
1253 if (infile != stdin)
1254 {
1255 for (;;)
1256 {
1257 if (extend_inputline(infile, buffer) == NULL)
1258 {
1259 done = 1;
1260 goto CONTINUE;
1261 }
1262 len = (int)strlen((char *)buffer);
1263 while (len > 0 && isspace(buffer[len-1])) len--;
1264 if (len == 0) break;
1265 }
1266 fprintf(outfile, "\n");
1267 }
1268 goto CONTINUE;
1269 }
1270
1271 /* Compilation succeeded; print data if required. There are now two
1272 info-returning functions. The old one has a limited interface and
1273 returns only limited data. Check that it agrees with the newer one. */
1274
1275 if (log_store)
1276 fprintf(outfile, "Memory allocation (code space): %d\n",
1277 (int)(gotten_store -
1278 sizeof(real_pcre) -
1279 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1280
1281 /* Extract the size for possible writing before possibly flipping it,
1282 and remember the store that was got. */
1283
1284 true_size = ((real_pcre *)re)->size;
1285 regex_gotten_store = gotten_store;
1286
1287 /* If /S was present, study the regexp to generate additional info to
1288 help with the matching. */
1289
1290 if (do_study)
1291 {
1292 if (timeit > 0)
1293 {
1294 register int i;
1295 clock_t time_taken;
1296 clock_t start_time = clock();
1297 for (i = 0; i < timeit; i++)
1298 extra = pcre_study(re, study_options, &error);
1299 time_taken = clock() - start_time;
1300 if (extra != NULL) free(extra);
1301 fprintf(outfile, " Study time %.4f milliseconds\n",
1302 (((double)time_taken * 1000.0) / (double)timeit) /
1303 (double)CLOCKS_PER_SEC);
1304 }
1305 extra = pcre_study(re, study_options, &error);
1306 if (error != NULL)
1307 fprintf(outfile, "Failed to study: %s\n", error);
1308 else if (extra != NULL)
1309 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1310 }
1311
1312 /* If the 'F' option was present, we flip the bytes of all the integer
1313 fields in the regex data block and the study block. This is to make it
1314 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1315 compiled on a different architecture. */
1316
1317 if (do_flip)
1318 {
1319 real_pcre *rre = (real_pcre *)re;
1320 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1321 rre->size = byteflip(rre->size, sizeof(rre->size));
1322 rre->options = byteflip(rre->options, sizeof(rre->options));
1323 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1324 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1325 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1326 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1327 rre->name_table_offset = byteflip(rre->name_table_offset,
1328 sizeof(rre->name_table_offset));
1329 rre->name_entry_size = byteflip(rre->name_entry_size,
1330 sizeof(rre->name_entry_size));
1331 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1332
1333 if (extra != NULL)
1334 {
1335 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1336 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1337 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1338 }
1339 }
1340
1341 /* Extract information from the compiled data if required */
1342
1343 SHOW_INFO:
1344
1345 if (do_debug)
1346 {
1347 fprintf(outfile, "------------------------------------------------------------------\n");
1348 pcre_printint(re, outfile, debug_lengths);
1349 }
1350
1351 if (do_showinfo)
1352 {
1353 unsigned long int get_options, all_options;
1354 #if !defined NOINFOCHECK
1355 int old_first_char, old_options, old_count;
1356 #endif
1357 int count, backrefmax, first_char, need_char, okpartial, jchanged;
1358 int nameentrysize, namecount;
1359 const uschar *nametable;
1360
1361 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1362 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1363 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1364 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1365 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1366 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1367 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1368 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1369 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1370 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1371 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1372
1373 #if !defined NOINFOCHECK
1374 old_count = pcre_info(re, &old_options, &old_first_char);
1375 if (count < 0) fprintf(outfile,
1376 "Error %d from pcre_info()\n", count);
1377 else
1378 {
1379 if (old_count != count) fprintf(outfile,
1380 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1381 old_count);
1382
1383 if (old_first_char != first_char) fprintf(outfile,
1384 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1385 first_char, old_first_char);
1386
1387 if (old_options != (int)get_options) fprintf(outfile,
1388 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1389 get_options, old_options);
1390 }
1391 #endif
1392
1393 if (size != regex_gotten_store) fprintf(outfile,
1394 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1395 (int)size, (int)regex_gotten_store);
1396
1397 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1398 if (backrefmax > 0)
1399 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1400
1401 if (namecount > 0)
1402 {
1403 fprintf(outfile, "Named capturing subpatterns:\n");
1404 while (namecount-- > 0)
1405 {
1406 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1407 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1408 GET2(nametable, 0));
1409 nametable += nameentrysize;
1410 }
1411 }
1412
1413 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1414
1415 all_options = ((real_pcre *)re)->options;
1416 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1417
1418 if (get_options == 0) fprintf(outfile, "No options\n");
1419 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1420 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1421 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1422 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1423 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1424 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1425 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1426 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1427 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1428 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1429 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1430 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1431 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1432 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1433
1434 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1435
1436 switch (get_options & PCRE_NEWLINE_BITS)
1437 {
1438 case PCRE_NEWLINE_CR:
1439 fprintf(outfile, "Forced newline sequence: CR\n");
1440 break;
1441
1442 case PCRE_NEWLINE_LF:
1443 fprintf(outfile, "Forced newline sequence: LF\n");
1444 break;
1445
1446 case PCRE_NEWLINE_CRLF:
1447 fprintf(outfile, "Forced newline sequence: CRLF\n");
1448 break;
1449
1450 case PCRE_NEWLINE_ANYCRLF:
1451 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1452 break;
1453
1454 case PCRE_NEWLINE_ANY:
1455 fprintf(outfile, "Forced newline sequence: ANY\n");
1456 break;
1457
1458 default:
1459 break;
1460 }
1461
1462 if (first_char == -1)
1463 {
1464 fprintf(outfile, "First char at start or follows newline\n");
1465 }
1466 else if (first_char < 0)
1467 {
1468 fprintf(outfile, "No first char\n");
1469 }
1470 else
1471 {
1472 int ch = first_char & 255;
1473 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1474 "" : " (caseless)";
1475 if (PRINTHEX(ch))
1476 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1477 else
1478 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1479 }
1480
1481 if (need_char < 0)
1482 {
1483 fprintf(outfile, "No need char\n");
1484 }
1485 else
1486 {
1487 int ch = need_char & 255;
1488 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1489 "" : " (caseless)";
1490 if (PRINTHEX(ch))
1491 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1492 else
1493 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1494 }
1495
1496 /* Don't output study size; at present it is in any case a fixed
1497 value, but it varies, depending on the computer architecture, and
1498 so messes up the test suite. (And with the /F option, it might be
1499 flipped.) */
1500
1501 if (do_study)
1502 {
1503 if (extra == NULL)
1504 fprintf(outfile, "Study returned NULL\n");
1505 else
1506 {
1507 uschar *start_bits = NULL;
1508 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1509
1510 if (start_bits == NULL)
1511 fprintf(outfile, "No starting byte set\n");
1512 else
1513 {
1514 int i;
1515 int c = 24;
1516 fprintf(outfile, "Starting byte set: ");
1517 for (i = 0; i < 256; i++)
1518 {
1519 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1520 {
1521 if (c > 75)
1522 {
1523 fprintf(outfile, "\n ");
1524 c = 2;
1525 }
1526 if (PRINTHEX(i) && i != ' ')
1527 {
1528 fprintf(outfile, "%c ", i);
1529 c += 2;
1530 }
1531 else
1532 {
1533 fprintf(outfile, "\\x%02x ", i);
1534 c += 5;
1535 }
1536 }
1537 }
1538 fprintf(outfile, "\n");
1539 }
1540 }
1541 }
1542 }
1543
1544 /* If the '>' option was present, we write out the regex to a file, and
1545 that is all. The first 8 bytes of the file are the regex length and then
1546 the study length, in big-endian order. */
1547
1548 if (to_file != NULL)
1549 {
1550 FILE *f = fopen((char *)to_file, "wb");
1551 if (f == NULL)
1552 {
1553 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1554 }
1555 else
1556 {
1557 uschar sbuf[8];
1558 sbuf[0] = (true_size >> 24) & 255;
1559 sbuf[1] = (true_size >> 16) & 255;
1560 sbuf[2] = (true_size >> 8) & 255;
1561 sbuf[3] = (true_size) & 255;
1562
1563 sbuf[4] = (true_study_size >> 24) & 255;
1564 sbuf[5] = (true_study_size >> 16) & 255;
1565 sbuf[6] = (true_study_size >> 8) & 255;
1566 sbuf[7] = (true_study_size) & 255;
1567
1568 if (fwrite(sbuf, 1, 8, f) < 8 ||
1569 fwrite(re, 1, true_size, f) < true_size)
1570 {
1571 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1572 }
1573 else
1574 {
1575 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1576 if (extra != NULL)
1577 {
1578 if (fwrite(extra->study_data, 1, true_study_size, f) <
1579 true_study_size)
1580 {
1581 fprintf(outfile, "Write error on %s: %s\n", to_file,
1582 strerror(errno));
1583 }
1584 else fprintf(outfile, "Study data written to %s\n", to_file);
1585
1586 }
1587 }
1588 fclose(f);
1589 }
1590
1591 new_free(re);
1592 if (extra != NULL) new_free(extra);
1593 if (tables != NULL) new_free((void *)tables);
1594 continue; /* With next regex */
1595 }
1596 } /* End of non-POSIX compile */
1597
1598 /* Read data lines and test them */
1599
1600 for (;;)
1601 {
1602 uschar *q;
1603 uschar *bptr;
1604 int *use_offsets = offsets;
1605 int use_size_offsets = size_offsets;
1606 int callout_data = 0;
1607 int callout_data_set = 0;
1608 int count, c;
1609 int copystrings = 0;
1610 int find_match_limit = 0;
1611 int getstrings = 0;
1612 int getlist = 0;
1613 int gmatched = 0;
1614 int start_offset = 0;
1615 int g_notempty = 0;
1616 int use_dfa = 0;
1617
1618 options = 0;
1619
1620 *copynames = 0;
1621 *getnames = 0;
1622
1623 copynamesptr = copynames;
1624 getnamesptr = getnames;
1625
1626 pcre_callout = callout;
1627 first_callout = 1;
1628 callout_extra = 0;
1629 callout_count = 0;
1630 callout_fail_count = 999999;
1631 callout_fail_id = -1;
1632 show_malloc = 0;
1633
1634 if (extra != NULL) extra->flags &=
1635 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1636
1637 len = 0;
1638 for (;;)
1639 {
1640 if (infile == stdin) printf("data> ");
1641 if (extend_inputline(infile, buffer + len) == NULL)
1642 {
1643 if (len > 0) break;
1644 done = 1;
1645 goto CONTINUE;
1646 }
1647 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1648 len = (int)strlen((char *)buffer);
1649 if (buffer[len-1] == '\n') break;
1650 }
1651
1652 while (len > 0 && isspace(buffer[len-1])) len--;
1653 buffer[len] = 0;
1654 if (len == 0) break;
1655
1656 p = buffer;
1657 while (isspace(*p)) p++;
1658
1659 bptr = q = dbuffer;
1660 while ((c = *p++) != 0)
1661 {
1662 int i = 0;
1663 int n = 0;
1664
1665 if (c == '\\') switch ((c = *p++))
1666 {
1667 case 'a': c = 7; break;
1668 case 'b': c = '\b'; break;
1669 case 'e': c = 27; break;
1670 case 'f': c = '\f'; break;
1671 case 'n': c = '\n'; break;
1672 case 'r': c = '\r'; break;
1673 case 't': c = '\t'; break;
1674 case 'v': c = '\v'; break;
1675
1676 case '0': case '1': case '2': case '3':
1677 case '4': case '5': case '6': case '7':
1678 c -= '0';
1679 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1680 c = c * 8 + *p++ - '0';
1681
1682 #if !defined NOUTF8
1683 if (use_utf8 && c > 255)
1684 {
1685 unsigned char buff8[8];
1686 int ii, utn;
1687 utn = ord2utf8(c, buff8);
1688 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1689 c = buff8[ii]; /* Last byte */
1690 }
1691 #endif
1692 break;
1693
1694 case 'x':
1695
1696 /* Handle \x{..} specially - new Perl thing for utf8 */
1697
1698 #if !defined NOUTF8
1699 if (*p == '{')
1700 {
1701 unsigned char *pt = p;
1702 c = 0;
1703 while (isxdigit(*(++pt)))
1704 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1705 if (*pt == '}')
1706 {
1707 unsigned char buff8[8];
1708 int ii, utn;
1709 utn = ord2utf8(c, buff8);
1710 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1711 c = buff8[ii]; /* Last byte */
1712 p = pt + 1;
1713 break;
1714 }
1715 /* Not correct form; fall through */
1716 }
1717 #endif
1718
1719 /* Ordinary \x */
1720
1721 c = 0;
1722 while (i++ < 2 && isxdigit(*p))
1723 {
1724 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1725 p++;
1726 }
1727 break;
1728
1729 case 0: /* \ followed by EOF allows for an empty line */
1730 p--;
1731 continue;
1732
1733 case '>':
1734 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1735 continue;
1736
1737 case 'A': /* Option setting */
1738 options |= PCRE_ANCHORED;
1739 continue;
1740
1741 case 'B':
1742 options |= PCRE_NOTBOL;
1743 continue;
1744
1745 case 'C':
1746 if (isdigit(*p)) /* Set copy string */
1747 {
1748 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1749 copystrings |= 1 << n;
1750 }
1751 else if (isalnum(*p))
1752 {
1753 uschar *npp = copynamesptr;
1754 while (isalnum(*p)) *npp++ = *p++;
1755 *npp++ = 0;
1756 *npp = 0;
1757 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1758 if (n < 0)
1759 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1760 copynamesptr = npp;
1761 }
1762 else if (*p == '+')
1763 {
1764 callout_extra = 1;
1765 p++;
1766 }
1767 else if (*p == '-')
1768 {
1769 pcre_callout = NULL;
1770 p++;
1771 }
1772 else if (*p == '!')
1773 {
1774 callout_fail_id = 0;
1775 p++;
1776 while(isdigit(*p))
1777 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1778 callout_fail_count = 0;
1779 if (*p == '!')
1780 {
1781 p++;
1782 while(isdigit(*p))
1783 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1784 }
1785 }
1786 else if (*p == '*')
1787 {
1788 int sign = 1;
1789 callout_data = 0;
1790 if (*(++p) == '-') { sign = -1; p++; }
1791 while(isdigit(*p))
1792 callout_data = callout_data * 10 + *p++ - '0';
1793 callout_data *= sign;
1794 callout_data_set = 1;
1795 }
1796 continue;
1797
1798 #if !defined NODFA
1799 case 'D':
1800 #if !defined NOPOSIX
1801 if (posix || do_posix)
1802 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1803 else
1804 #endif
1805 use_dfa = 1;
1806 continue;
1807
1808 case 'F':
1809 options |= PCRE_DFA_SHORTEST;
1810 continue;
1811 #endif
1812
1813 case 'G':
1814 if (isdigit(*p))
1815 {
1816 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1817 getstrings |= 1 << n;
1818 }
1819 else if (isalnum(*p))
1820 {
1821 uschar *npp = getnamesptr;
1822 while (isalnum(*p)) *npp++ = *p++;
1823 *npp++ = 0;
1824 *npp = 0;
1825 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1826 if (n < 0)
1827 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1828 getnamesptr = npp;
1829 }
1830 continue;
1831
1832 case 'L':
1833 getlist = 1;
1834 continue;
1835
1836 case 'M':
1837 find_match_limit = 1;
1838 continue;
1839
1840 case 'N':
1841 options |= PCRE_NOTEMPTY;
1842 continue;
1843
1844 case 'O':
1845 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1846 if (n > size_offsets_max)
1847 {
1848 size_offsets_max = n;
1849 free(offsets);
1850 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1851 if (offsets == NULL)
1852 {
1853 printf("** Failed to get %d bytes of memory for offsets vector\n",
1854 (int)(size_offsets_max * sizeof(int)));
1855 yield = 1;
1856 goto EXIT;
1857 }
1858 }
1859 use_size_offsets = n;
1860 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1861 continue;
1862
1863 case 'P':
1864 options |= PCRE_PARTIAL;
1865 continue;
1866
1867 case 'Q':
1868 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1869 if (extra == NULL)
1870 {
1871 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1872 extra->flags = 0;
1873 }
1874 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1875 extra->match_limit_recursion = n;
1876 continue;
1877
1878 case 'q':
1879 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1880 if (extra == NULL)
1881 {
1882 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1883 extra->flags = 0;
1884 }
1885 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1886 extra->match_limit = n;
1887 continue;
1888
1889 #if !defined NODFA
1890 case 'R':
1891 options |= PCRE_DFA_RESTART;
1892 continue;
1893 #endif
1894
1895 case 'S':
1896 show_malloc = 1;
1897 continue;
1898
1899 case 'Z':
1900 options |= PCRE_NOTEOL;
1901 continue;
1902
1903 case '?':
1904 options |= PCRE_NO_UTF8_CHECK;
1905 continue;
1906
1907 case '<':
1908 {
1909 int x = check_newline(p, outfile);
1910 if (x == 0) goto NEXT_DATA;
1911 options |= x;
1912 while (*p++ != '>');
1913 }
1914 continue;
1915 }
1916 *q++ = c;
1917 }
1918 *q = 0;
1919 len = q - dbuffer;
1920
1921 if ((all_use_dfa || use_dfa) && find_match_limit)
1922 {
1923 printf("**Match limit not relevant for DFA matching: ignored\n");
1924 find_match_limit = 0;
1925 }
1926
1927 /* Handle matching via the POSIX interface, which does not
1928 support timing or playing with the match limit or callout data. */
1929
1930 #if !defined NOPOSIX
1931 if (posix || do_posix)
1932 {
1933 int rc;
1934 int eflags = 0;
1935 regmatch_t *pmatch = NULL;
1936 if (use_size_offsets > 0)
1937 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1938 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1939 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1940
1941 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1942
1943 if (rc != 0)
1944 {
1945 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1946 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1947 }
1948 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1949 != 0)
1950 {
1951 fprintf(outfile, "Matched with REG_NOSUB\n");
1952 }
1953 else
1954 {
1955 size_t i;
1956 for (i = 0; i < (size_t)use_size_offsets; i++)
1957 {
1958 if (pmatch[i].rm_so >= 0)
1959 {
1960 fprintf(outfile, "%2d: ", (int)i);
1961 (void)pchars(dbuffer + pmatch[i].rm_so,
1962 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1963 fprintf(outfile, "\n");
1964 if (i == 0 && do_showrest)
1965 {
1966 fprintf(outfile, " 0+ ");
1967 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1968 outfile);
1969 fprintf(outfile, "\n");
1970 }
1971 }
1972 }
1973 }
1974 free(pmatch);
1975 }
1976
1977 /* Handle matching via the native interface - repeats for /g and /G */
1978
1979 else
1980 #endif /* !defined NOPOSIX */
1981
1982 for (;; gmatched++) /* Loop for /g or /G */
1983 {
1984 if (timeitm > 0)
1985 {
1986 register int i;
1987 clock_t time_taken;
1988 clock_t start_time = clock();
1989
1990 #if !defined NODFA
1991 if (all_use_dfa || use_dfa)
1992 {
1993 int workspace[1000];
1994 for (i = 0; i < timeitm; i++)
1995 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1996 options | g_notempty, use_offsets, use_size_offsets, workspace,
1997 sizeof(workspace)/sizeof(int));
1998 }
1999 else
2000 #endif
2001
2002 for (i = 0; i < timeitm; i++)
2003 count = pcre_exec(re, extra, (char *)bptr, len,
2004 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2005
2006 time_taken = clock() - start_time;
2007 fprintf(outfile, "Execute time %.4f milliseconds\n",
2008 (((double)time_taken * 1000.0) / (double)timeitm) /
2009 (double)CLOCKS_PER_SEC);
2010 }
2011
2012 /* If find_match_limit is set, we want to do repeated matches with
2013 varying limits in order to find the minimum value for the match limit and
2014 for the recursion limit. */
2015
2016 if (find_match_limit)
2017 {
2018 if (extra == NULL)
2019 {
2020 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2021 extra->flags = 0;
2022 }
2023
2024 (void)check_match_limit(re, extra, bptr, len, start_offset,
2025 options|g_notempty, use_offsets, use_size_offsets,
2026 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2027 PCRE_ERROR_MATCHLIMIT, "match()");
2028
2029 count = check_match_limit(re, extra, bptr, len, start_offset,
2030 options|g_notempty, use_offsets, use_size_offsets,
2031 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2032 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2033 }
2034
2035 /* If callout_data is set, use the interface with additional data */
2036
2037 else if (callout_data_set)
2038 {
2039 if (extra == NULL)
2040 {
2041 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2042 extra->flags = 0;
2043 }
2044 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2045 extra->callout_data = &callout_data;
2046 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2047 options | g_notempty, use_offsets, use_size_offsets);
2048 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2049 }
2050
2051 /* The normal case is just to do the match once, with the default
2052 value of match_limit. */
2053
2054 #if !defined NODFA
2055 else if (all_use_dfa || use_dfa)
2056 {
2057 int workspace[1000];
2058 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2059 options | g_notempty, use_offsets, use_size_offsets, workspace,
2060 sizeof(workspace)/sizeof(int));
2061 if (count == 0)
2062 {
2063 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2064 count = use_size_offsets/2;
2065 }
2066 }
2067 #endif
2068
2069 else
2070 {
2071 count = pcre_exec(re, extra, (char *)bptr, len,
2072 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2073 if (count == 0)
2074 {
2075 fprintf(outfile, "Matched, but too many substrings\n");
2076 count = use_size_offsets/3;
2077 }
2078 }
2079
2080 /* Matched */
2081
2082 if (count >= 0)
2083 {
2084 int i, maxcount;
2085
2086 #if !defined NODFA
2087 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2088 #endif
2089 maxcount = use_size_offsets/3;
2090
2091 /* This is a check against a lunatic return value. */
2092
2093 if (count > maxcount)
2094 {
2095 fprintf(outfile,
2096 "** PCRE error: returned count %d is too big for offset size %d\n",
2097 count, use_size_offsets);
2098 count = use_size_offsets/3;
2099 if (do_g || do_G)
2100 {
2101 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2102 do_g = do_G = FALSE; /* Break g/G loop */
2103 }
2104 }
2105
2106 for (i = 0; i < count * 2; i += 2)
2107 {
2108 if (use_offsets[i] < 0)
2109 fprintf(outfile, "%2d: <unset>\n", i/2);
2110 else
2111 {
2112 fprintf(outfile, "%2d: ", i/2);
2113 (void)pchars(bptr + use_offsets[i],
2114 use_offsets[i+1] - use_offsets[i], outfile);
2115 fprintf(outfile, "\n");
2116 if (i == 0)
2117 {
2118 if (do_showrest)
2119 {
2120 fprintf(outfile, " 0+ ");
2121 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2122 outfile);
2123 fprintf(outfile, "\n");
2124 }
2125 }
2126 }
2127 }
2128
2129 for (i = 0; i < 32; i++)
2130 {
2131 if ((copystrings & (1 << i)) != 0)
2132 {
2133 char copybuffer[256];
2134 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2135 i, copybuffer, sizeof(copybuffer));
2136 if (rc < 0)
2137 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2138 else
2139 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2140 }
2141 }
2142
2143 for (copynamesptr = copynames;
2144 *copynamesptr != 0;
2145 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2146 {
2147 char copybuffer[256];
2148 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2149 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2150 if (rc < 0)
2151 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2152 else
2153 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2154 }
2155
2156 for (i = 0; i < 32; i++)
2157 {
2158 if ((getstrings & (1 << i)) != 0)
2159 {
2160 const char *substring;
2161 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2162 i, &substring);
2163 if (rc < 0)
2164 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2165 else
2166 {
2167 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2168 pcre_free_substring(substring);
2169 }
2170 }
2171 }
2172
2173 for (getnamesptr = getnames;
2174 *getnamesptr != 0;
2175 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2176 {
2177 const char *substring;
2178 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2179 count, (char *)getnamesptr, &substring);
2180 if (rc < 0)
2181 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2182 else
2183 {
2184 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2185 pcre_free_substring(substring);
2186 }
2187 }
2188
2189 if (getlist)
2190 {
2191 const char **stringlist;
2192 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2193 &stringlist);
2194 if (rc < 0)
2195 fprintf(outfile, "get substring list failed %d\n", rc);
2196 else
2197 {
2198 for (i = 0; i < count; i++)
2199 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2200 if (stringlist[i] != NULL)
2201 fprintf(outfile, "string list not terminated by NULL\n");
2202 /* free((void *)stringlist); */
2203 pcre_free_substring_list(stringlist);
2204 }
2205 }
2206 }
2207
2208 /* There was a partial match */
2209
2210 else if (count == PCRE_ERROR_PARTIAL)
2211 {
2212 fprintf(outfile, "Partial match");
2213 #if !defined NODFA
2214 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2215 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2216 bptr + use_offsets[0]);
2217 #endif
2218 fprintf(outfile, "\n");
2219 break; /* Out of the /g loop */
2220 }
2221
2222 /* Failed to match. If this is a /g or /G loop and we previously set
2223 g_notempty after a null match, this is not necessarily the end. We want
2224 to advance the start offset, and continue. We won't be at the end of the
2225 string - that was checked before setting g_notempty.
2226
2227 Complication arises in the case when the newline option is "any" or
2228 "anycrlf". If the previous match was at the end of a line terminated by
2229 CRLF, an advance of one character just passes the \r, whereas we should
2230 prefer the longer newline sequence, as does the code in pcre_exec().
2231 Fudge the offset value to achieve this.
2232
2233 Otherwise, in the case of UTF-8 matching, the advance must be one
2234 character, not one byte. */
2235
2236 else
2237 {
2238 if (g_notempty != 0)
2239 {
2240 int onechar = 1;
2241 unsigned int obits = ((real_pcre *)re)->options;
2242 use_offsets[0] = start_offset;
2243 if ((obits & PCRE_NEWLINE_BITS) == 0)
2244 {
2245 int d;
2246 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2247 obits = (d == '\r')? PCRE_NEWLINE_CR :
2248 (d == '\n')? PCRE_NEWLINE_LF :
2249 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2250 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2251 (d == -1)? PCRE_NEWLINE_ANY : 0;
2252 }
2253 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2254 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2255 &&
2256 start_offset < len - 1 &&
2257 bptr[start_offset] == '\r' &&
2258 bptr[start_offset+1] == '\n')
2259 onechar++;
2260 else if (use_utf8)
2261 {
2262 while (start_offset + onechar < len)
2263 {
2264 int tb = bptr[start_offset+onechar];
2265 if (tb <= 127) break;
2266 tb &= 0xc0;
2267 if (tb != 0 && tb != 0xc0) onechar++;
2268 }
2269 }
2270 use_offsets[1] = start_offset + onechar;
2271 }
2272 else
2273 {
2274 if (count == PCRE_ERROR_NOMATCH)
2275 {
2276 if (gmatched == 0) fprintf(outfile, "No match\n");
2277 }
2278 else fprintf(outfile, "Error %d\n", count);
2279 break; /* Out of the /g loop */
2280 }
2281 }
2282
2283 /* If not /g or /G we are done */
2284
2285 if (!do_g && !do_G) break;
2286
2287 /* If we have matched an empty string, first check to see if we are at
2288 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2289 what Perl's /g options does. This turns out to be rather cunning. First
2290 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2291 same point. If this fails (picked up above) we advance to the next
2292 character. */
2293
2294 g_notempty = 0;
2295
2296 if (use_offsets[0] == use_offsets[1])
2297 {
2298 if (use_offsets[0] == len) break;
2299 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2300 }
2301
2302 /* For /g, update the start offset, leaving the rest alone */
2303
2304 if (do_g) start_offset = use_offsets[1];
2305
2306 /* For /G, update the pointer and length */
2307
2308 else
2309 {
2310 bptr += use_offsets[1];
2311 len -= use_offsets[1];
2312 }
2313 } /* End of loop for /g and /G */
2314
2315 NEXT_DATA: continue;
2316 } /* End of loop for data lines */
2317
2318 CONTINUE:
2319
2320 #if !defined NOPOSIX
2321 if (posix || do_posix) regfree(&preg);
2322 #endif
2323
2324 if (re != NULL) new_free(re);
2325 if (extra != NULL) new_free(extra);
2326 if (tables != NULL)
2327 {
2328 new_free((void *)tables);
2329 setlocale(LC_CTYPE, "C");
2330 locale_set = 0;
2331 }
2332 }
2333
2334 if (infile == stdin) fprintf(outfile, "\n");
2335
2336 EXIT:
2337
2338 if (infile != NULL && infile != stdin) fclose(infile);
2339 if (outfile != NULL && outfile != stdout) fclose(outfile);
2340
2341 free(buffer);
2342 free(dbuffer);
2343 free(pbuffer);
2344 free(offsets);
2345
2346 return yield;
2347 }
2348
2349 /* End of pcretest.c */