Install PCRE 6.2.
[exim.git] / src / src / pcre / pcretest.c
CommitLineData
92e772ff 1/* $Cambridge: exim/src/src/pcre/pcretest.c,v 1.3 2005/08/08 10:22:14 ph10 Exp $ */
8ac170f3 2
c86f6258
PH
3/*************************************************
4* PCRE testing program *
5*************************************************/
6
7/* This program was hacked up as a tester for PCRE. I really should have
8written it more tidily in the first place. Will I ever learn? It has grown and
8ac170f3 9been extended and consequently is now rather, er, *very* untidy in places.
c86f6258
PH
10
11-----------------------------------------------------------------------------
12Redistribution and use in source and binary forms, with or without
13modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36POSSIBILITY OF SUCH DAMAGE.
37-----------------------------------------------------------------------------
38*/
39
40
41#include <ctype.h>
42#include <stdio.h>
43#include <string.h>
44#include <stdlib.h>
45#include <time.h>
46#include <locale.h>
47#include <errno.h>
48
c86f6258 49#define PCRE_SPY /* For Win32 build, import data, not export */
8ac170f3
PH
50
51/* We need the internal info for displaying the results of pcre_study() and
52other internal data; pcretest also uses some of the fixed tables, and generally
53has "inside information" compared to a program that strictly follows the PCRE
54API. */
55
56#include "pcre_internal.h"
57
c86f6258
PH
58
59/* It is possible to compile this test program without including support for
60testing the POSIX interface, though this is not available via the standard
61Makefile. */
62
63#if !defined NOPOSIX
64#include "pcreposix.h"
65#endif
66
92e772ff
PH
67/* It is also possible, for the benefit of the version imported into Exim, to
68build pcretest without support for UTF8 (define NOUTF8), without the interface
8ac170f3
PH
69to the DFA matcher (NODFA), and without the doublecheck of the old "info"
70function (define NOINFOCHECK). */
71
72
c86f6258
PH
73#ifndef CLOCKS_PER_SEC
74#ifdef CLK_TCK
75#define CLOCKS_PER_SEC CLK_TCK
76#else
77#define CLOCKS_PER_SEC 100
78#endif
79#endif
80
81#define LOOPREPEAT 500000
82
83#define BUFFER_SIZE 30000
84#define PBUFFER_SIZE BUFFER_SIZE
85#define DBUFFER_SIZE BUFFER_SIZE
86
87
88static FILE *outfile;
89static int log_store = 0;
90static int callout_count;
91static int callout_extra;
92static int callout_fail_count;
93static int callout_fail_id;
94static int first_callout;
95static int show_malloc;
96static int use_utf8;
97static size_t gotten_store;
98
99static uschar *pbuffer = NULL;
100
101
c86f6258
PH
102
103/*************************************************
104* Read number from string *
105*************************************************/
106
107/* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
108around with conditional compilation, just do the job by hand. It is only used
109for unpicking the -o argument, so just keep it simple.
110
111Arguments:
112 str string to be converted
113 endptr where to put the end pointer
114
115Returns: the unsigned long
116*/
117
118static int
119get_value(unsigned char *str, unsigned char **endptr)
120{
121int result = 0;
122while(*str != 0 && isspace(*str)) str++;
123while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
124*endptr = str;
125return(result);
126}
127
128
129
c86f6258
PH
130
131/*************************************************
132* Convert UTF-8 string to value *
133*************************************************/
134
135/* This function takes one or more bytes that represents a UTF-8 character,
136and returns the value of the character.
137
138Argument:
139 buffer a pointer to the byte vector
140 vptr a pointer to an int to receive the value
141
142Returns: > 0 => the number of bytes consumed
143 -6 to 0 => malformed UTF-8 character at offset = (-return)
144*/
145
8ac170f3
PH
146#if !defined NOUTF8
147
c86f6258
PH
148static int
149utf82ord(unsigned char *buffer, int *vptr)
150{
151int c = *buffer++;
152int d = c;
153int i, j, s;
154
155for (i = -1; i < 6; i++) /* i is number of additional bytes */
156 {
157 if ((d & 0x80) == 0) break;
158 d <<= 1;
159 }
160
161if (i == -1) { *vptr = c; return 1; } /* ascii character */
162if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
163
164/* i now has a value in the range 1-5 */
165
166s = 6*i;
8ac170f3 167d = (c & _pcre_utf8_table3[i]) << s;
c86f6258
PH
168
169for (j = 0; j < i; j++)
170 {
171 c = *buffer++;
172 if ((c & 0xc0) != 0x80) return -(j+1);
173 s -= 6;
174 d |= (c & 0x3f) << s;
175 }
176
177/* Check that encoding was the correct unique one */
178
8ac170f3
PH
179for (j = 0; j < _pcre_utf8_table1_size; j++)
180 if (d <= _pcre_utf8_table1[j]) break;
c86f6258
PH
181if (j != i) return -(i+1);
182
183/* Valid value */
184
185*vptr = d;
186return i+1;
187}
188
8ac170f3
PH
189#endif
190
c86f6258
PH
191
192
193/*************************************************
194* Print character string *
195*************************************************/
196
197/* Character string printing function. Must handle UTF-8 strings in utf8
198mode. Yields number of characters printed. If handed a NULL file, just counts
199chars without printing. */
200
201static int pchars(unsigned char *p, int length, FILE *f)
202{
203int c;
204int yield = 0;
205
206while (length-- > 0)
207 {
92e772ff 208#if !defined NOUTF8
c86f6258
PH
209 if (use_utf8)
210 {
211 int rc = utf82ord(p, &c);
212
213 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
214 {
215 length -= rc - 1;
216 p += rc;
217 if (c < 256 && isprint(c))
218 {
219 if (f != NULL) fprintf(f, "%c", c);
220 yield++;
221 }
222 else
223 {
224 int n;
225 if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
226 yield += n;
227 }
228 continue;
229 }
230 }
8ac170f3 231#endif
c86f6258
PH
232
233 /* Not UTF-8, or malformed UTF-8 */
234
235 if (isprint(c = *(p++)))
236 {
237 if (f != NULL) fprintf(f, "%c", c);
238 yield++;
239 }
240 else
241 {
242 if (f != NULL) fprintf(f, "\\x%02x", c);
243 yield += 4;
244 }
245 }
246
247return yield;
248}
249
250
251
252/*************************************************
253* Callout function *
254*************************************************/
255
256/* Called from PCRE as a result of the (?C) item. We print out where we are in
257the match. Yield zero unless more callouts than the fail count, or the callout
258data is not zero. */
259
260static int callout(pcre_callout_block *cb)
261{
262FILE *f = (first_callout | callout_extra)? outfile : NULL;
263int i, pre_start, post_start, subject_length;
264
265if (callout_extra)
266 {
267 fprintf(f, "Callout %d: last capture = %d\n",
268 cb->callout_number, cb->capture_last);
269
270 for (i = 0; i < cb->capture_top * 2; i += 2)
271 {
272 if (cb->offset_vector[i] < 0)
273 fprintf(f, "%2d: <unset>\n", i/2);
274 else
275 {
276 fprintf(f, "%2d: ", i/2);
277 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
278 cb->offset_vector[i+1] - cb->offset_vector[i], f);
279 fprintf(f, "\n");
280 }
281 }
282 }
283
284/* Re-print the subject in canonical form, the first time or if giving full
285datails. On subsequent calls in the same match, we use pchars just to find the
286printed lengths of the substrings. */
287
288if (f != NULL) fprintf(f, "--->");
289
290pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
291post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
292 cb->current_position - cb->start_match, f);
293
294subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
295
296(void)pchars((unsigned char *)(cb->subject + cb->current_position),
297 cb->subject_length - cb->current_position, f);
298
299if (f != NULL) fprintf(f, "\n");
300
301/* Always print appropriate indicators, with callout number if not already
302shown. For automatic callouts, show the pattern offset. */
303
304if (cb->callout_number == 255)
305 {
306 fprintf(outfile, "%+3d ", cb->pattern_position);
307 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
308 }
309else
310 {
311 if (callout_extra) fprintf(outfile, " ");
312 else fprintf(outfile, "%3d ", cb->callout_number);
313 }
314
315for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
316fprintf(outfile, "^");
317
318if (post_start > 0)
319 {
320 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
321 fprintf(outfile, "^");
322 }
323
324for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
325 fprintf(outfile, " ");
326
327fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
328 pbuffer + cb->pattern_position);
329
330fprintf(outfile, "\n");
331first_callout = 0;
332
333if (cb->callout_data != NULL)
334 {
335 int callout_data = *((int *)(cb->callout_data));
336 if (callout_data != 0)
337 {
338 fprintf(outfile, "Callout data = %d\n", callout_data);
339 return callout_data;
340 }
341 }
342
343return (cb->callout_number != callout_fail_id)? 0 :
344 (++callout_count >= callout_fail_count)? 1 : 0;
345}
346
347
348/*************************************************
349* Local malloc functions *
350*************************************************/
351
352/* Alternative malloc function, to test functionality and show the size of the
353compiled re. */
354
355static void *new_malloc(size_t size)
356{
357void *block = malloc(size);
358gotten_store = size;
359if (show_malloc)
360 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
361return block;
362}
363
364static void new_free(void *block)
365{
366if (show_malloc)
367 fprintf(outfile, "free %p\n", block);
368free(block);
369}
370
371
372/* For recursion malloc/free, to test stacking calls */
373
374static void *stack_malloc(size_t size)
375{
376void *block = malloc(size);
377if (show_malloc)
378 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
379return block;
380}
381
382static void stack_free(void *block)
383{
384if (show_malloc)
385 fprintf(outfile, "stack_free %p\n", block);
386free(block);
387}
388
389
390/*************************************************
391* Call pcre_fullinfo() *
392*************************************************/
393
394/* Get one piece of information from the pcre_fullinfo() function */
395
396static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
397{
398int rc;
399if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
400 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
401}
402
403
404
405/*************************************************
406* Byte flipping function *
407*************************************************/
408
409static long int
410byteflip(long int value, int n)
411{
412if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
413return ((value & 0x000000ff) << 24) |
414 ((value & 0x0000ff00) << 8) |
415 ((value & 0x00ff0000) >> 8) |
416 ((value & 0xff000000) >> 24);
417}
418
419
420
421
422/*************************************************
423* Main Program *
424*************************************************/
425
426/* Read lines from named file or stdin and write to named file or stdout; lines
427consist of a regular expression, in delimiters and optionally followed by
428options, followed by a set of test data, terminated by an empty line. */
429
430int main(int argc, char **argv)
431{
432FILE *infile = stdin;
433int options = 0;
434int study_options = 0;
435int op = 1;
436int timeit = 0;
437int showinfo = 0;
438int showstore = 0;
439int size_offsets = 45;
440int size_offsets_max;
8ac170f3 441int *offsets = NULL;
c86f6258
PH
442#if !defined NOPOSIX
443int posix = 0;
444#endif
445int debug = 0;
446int done = 0;
8ac170f3
PH
447int all_use_dfa = 0;
448int yield = 0;
c86f6258
PH
449
450unsigned char *buffer;
451unsigned char *dbuffer;
452
453/* Get buffers from malloc() so that Electric Fence will check their misuse
454when I am debugging. */
455
456buffer = (unsigned char *)malloc(BUFFER_SIZE);
457dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
458pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
459
460/* The outfile variable is static so that new_malloc can use it. The _setmode()
461stuff is some magic that I don't understand, but which apparently does good
462things in Windows. It's related to line terminations. */
463
464#if defined(_WIN32) || defined(WIN32)
465_setmode( _fileno( stdout ), 0x8000 );
466#endif /* defined(_WIN32) || defined(WIN32) */
467
468outfile = stdout;
469
470/* Scan options */
471
472while (argc > 1 && argv[op][0] == '-')
473 {
474 unsigned char *endptr;
475
476 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
477 showstore = 1;
478 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
479 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
480 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
92e772ff 481#if !defined NODFA
8ac170f3
PH
482 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
483#endif
c86f6258
PH
484 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
485 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
486 *endptr == 0))
487 {
488 op++;
489 argc--;
490 }
491#if !defined NOPOSIX
492 else if (strcmp(argv[op], "-p") == 0) posix = 1;
493#endif
494 else if (strcmp(argv[op], "-C") == 0)
495 {
496 int rc;
497 printf("PCRE version %s\n", pcre_version());
498 printf("Compiled with\n");
499 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
500 printf(" %sUTF-8 support\n", rc? "" : "No ");
501 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
502 printf(" %sUnicode properties support\n", rc? "" : "No ");
503 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
504 printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF");
505 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
506 printf(" Internal link size = %d\n", rc);
507 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
508 printf(" POSIX malloc threshold = %d\n", rc);
509 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
510 printf(" Default match limit = %d\n", rc);
511 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
512 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
513 exit(0);
514 }
515 else
516 {
517 printf("** Unknown or malformed option %s\n", argv[op]);
518 printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
519 printf(" -C show PCRE compile-time options and exit\n");
8ac170f3
PH
520 printf(" -d debug: show compiled code; implies -i\n");
521#if !defined NODFA
522 printf(" -dfa force DFA matching for all subjects\n");
523#endif
524 printf(" -i show information about compiled pattern\n"
c86f6258
PH
525 " -m output memory used information\n"
526 " -o <n> set size of offsets vector to <n>\n");
527#if !defined NOPOSIX
528 printf(" -p use POSIX interface\n");
529#endif
530 printf(" -s output store (memory) used information\n"
531 " -t time compilation and execution\n");
8ac170f3
PH
532 yield = 1;
533 goto EXIT;
c86f6258
PH
534 }
535 op++;
536 argc--;
537 }
538
539/* Get the store for the offsets vector, and remember what it was */
540
541size_offsets_max = size_offsets;
542offsets = (int *)malloc(size_offsets_max * sizeof(int));
543if (offsets == NULL)
544 {
545 printf("** Failed to get %d bytes of memory for offsets vector\n",
546 size_offsets_max * sizeof(int));
8ac170f3
PH
547 yield = 1;
548 goto EXIT;
c86f6258
PH
549 }
550
551/* Sort out the input and output files */
552
553if (argc > 1)
554 {
555 infile = fopen(argv[op], "rb");
556 if (infile == NULL)
557 {
558 printf("** Failed to open %s\n", argv[op]);
8ac170f3
PH
559 yield = 1;
560 goto EXIT;
c86f6258
PH
561 }
562 }
563
564if (argc > 2)
565 {
566 outfile = fopen(argv[op+1], "wb");
567 if (outfile == NULL)
568 {
569 printf("** Failed to open %s\n", argv[op+1]);
8ac170f3
PH
570 yield = 1;
571 goto EXIT;
c86f6258
PH
572 }
573 }
574
575/* Set alternative malloc function */
576
577pcre_malloc = new_malloc;
578pcre_free = new_free;
579pcre_stack_malloc = stack_malloc;
580pcre_stack_free = stack_free;
581
582/* Heading line, then prompt for first regex if stdin */
583
584fprintf(outfile, "PCRE version %s\n\n", pcre_version());
585
586/* Main loop */
587
588while (!done)
589 {
590 pcre *re = NULL;
591 pcre_extra *extra = NULL;
592
593#if !defined NOPOSIX /* There are still compilers that require no indent */
594 regex_t preg;
595 int do_posix = 0;
596#endif
597
598 const char *error;
599 unsigned char *p, *pp, *ppp;
600 unsigned char *to_file = NULL;
601 const unsigned char *tables = NULL;
602 unsigned long int true_size, true_study_size = 0;
603 size_t size, regex_gotten_store;
604 int do_study = 0;
605 int do_debug = debug;
606 int do_G = 0;
607 int do_g = 0;
608 int do_showinfo = showinfo;
609 int do_showrest = 0;
610 int do_flip = 0;
611 int erroroffset, len, delimiter;
612
613 use_utf8 = 0;
614
615 if (infile == stdin) printf(" re> ");
616 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
617 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
618 fflush(outfile);
619
620 p = buffer;
621 while (isspace(*p)) p++;
622 if (*p == 0) continue;
623
624 /* See if the pattern is to be loaded pre-compiled from a file. */
625
626 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
627 {
628 unsigned long int magic;
629 uschar sbuf[8];
630 FILE *f;
631
632 p++;
633 pp = p + (int)strlen((char *)p);
634 while (isspace(pp[-1])) pp--;
635 *pp = 0;
636
637 f = fopen((char *)p, "rb");
638 if (f == NULL)
639 {
640 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
641 continue;
642 }
643
644 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
645
646 true_size =
647 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
648 true_study_size =
649 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
650
651 re = (real_pcre *)new_malloc(true_size);
652 regex_gotten_store = gotten_store;
653
654 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
655
656 magic = ((real_pcre *)re)->magic_number;
657 if (magic != MAGIC_NUMBER)
658 {
659 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
660 {
661 do_flip = 1;
662 }
663 else
664 {
665 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
666 fclose(f);
667 continue;
668 }
669 }
670
671 fprintf(outfile, "Compiled regex%s loaded from %s\n",
672 do_flip? " (byte-inverted)" : "", p);
673
674 /* Need to know if UTF-8 for printing data strings */
675
676 new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
677 use_utf8 = (options & PCRE_UTF8) != 0;
678
679 /* Now see if there is any following study data */
680
681 if (true_study_size != 0)
682 {
683 pcre_study_data *psd;
684
685 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
686 extra->flags = PCRE_EXTRA_STUDY_DATA;
687
688 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
689 extra->study_data = psd;
690
691 if (fread(psd, 1, true_study_size, f) != true_study_size)
692 {
693 FAIL_READ:
694 fprintf(outfile, "Failed to read data from %s\n", p);
695 if (extra != NULL) new_free(extra);
696 if (re != NULL) new_free(re);
697 fclose(f);
698 continue;
699 }
700 fprintf(outfile, "Study data loaded from %s\n", p);
701 do_study = 1; /* To get the data output if requested */
702 }
703 else fprintf(outfile, "No study data\n");
704
705 fclose(f);
706 goto SHOW_INFO;
707 }
708
709 /* In-line pattern (the usual case). Get the delimiter and seek the end of
710 the pattern; if is isn't complete, read more. */
711
712 delimiter = *p++;
713
714 if (isalnum(delimiter) || delimiter == '\\')
715 {
716 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
717 goto SKIP_DATA;
718 }
719
720 pp = p;
721
722 for(;;)
723 {
724 while (*pp != 0)
725 {
726 if (*pp == '\\' && pp[1] != 0) pp++;
727 else if (*pp == delimiter) break;
728 pp++;
729 }
730 if (*pp != 0) break;
731
732 len = BUFFER_SIZE - (pp - buffer);
733 if (len < 256)
734 {
735 fprintf(outfile, "** Expression too long - missing delimiter?\n");
736 goto SKIP_DATA;
737 }
738
739 if (infile == stdin) printf(" > ");
740 if (fgets((char *)pp, len, infile) == NULL)
741 {
742 fprintf(outfile, "** Unexpected EOF\n");
743 done = 1;
744 goto CONTINUE;
745 }
746 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
747 }
748
749 /* If the first character after the delimiter is backslash, make
750 the pattern end with backslash. This is purely to provide a way
751 of testing for the error message when a pattern ends with backslash. */
752
753 if (pp[1] == '\\') *pp++ = '\\';
754
755 /* Terminate the pattern at the delimiter, and save a copy of the pattern
756 for callouts. */
757
758 *pp++ = 0;
759 strcpy((char *)pbuffer, (char *)p);
760
761 /* Look for options after final delimiter */
762
763 options = 0;
764 study_options = 0;
765 log_store = showstore; /* default from command line */
766
767 while (*pp != 0)
768 {
769 switch (*pp++)
770 {
8ac170f3 771 case 'f': options |= PCRE_FIRSTLINE; break;
c86f6258
PH
772 case 'g': do_g = 1; break;
773 case 'i': options |= PCRE_CASELESS; break;
774 case 'm': options |= PCRE_MULTILINE; break;
775 case 's': options |= PCRE_DOTALL; break;
776 case 'x': options |= PCRE_EXTENDED; break;
777
778 case '+': do_showrest = 1; break;
779 case 'A': options |= PCRE_ANCHORED; break;
780 case 'C': options |= PCRE_AUTO_CALLOUT; break;
781 case 'D': do_debug = do_showinfo = 1; break;
782 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
783 case 'F': do_flip = 1; break;
784 case 'G': do_G = 1; break;
785 case 'I': do_showinfo = 1; break;
786 case 'M': log_store = 1; break;
787 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
788
789#if !defined NOPOSIX
790 case 'P': do_posix = 1; break;
791#endif
792
793 case 'S': do_study = 1; break;
794 case 'U': options |= PCRE_UNGREEDY; break;
795 case 'X': options |= PCRE_EXTRA; break;
796 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
797 case '?': options |= PCRE_NO_UTF8_CHECK; break;
798
799 case 'L':
800 ppp = pp;
8ac170f3
PH
801 /* The '\r' test here is so that it works on Windows */
802 while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
c86f6258
PH
803 *ppp = 0;
804 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
805 {
806 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
807 goto SKIP_DATA;
808 }
809 tables = pcre_maketables();
810 pp = ppp;
811 break;
812
813 case '>':
814 to_file = pp;
815 while (*pp != 0) pp++;
816 while (isspace(pp[-1])) pp--;
817 *pp = 0;
818 break;
819
8ac170f3
PH
820 case '\r': /* So that it works in Windows */
821 case '\n':
822 case ' ':
823 break;
c86f6258
PH
824
825 default:
826 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
827 goto SKIP_DATA;
828 }
829 }
830
831 /* Handle compiling via the POSIX interface, which doesn't support the
832 timing, showing, or debugging options, nor the ability to pass over
833 local character tables. */
834
835#if !defined NOPOSIX
836 if (posix || do_posix)
837 {
838 int rc;
839 int cflags = 0;
840
841 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
842 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
8ac170f3 843 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
c86f6258
PH
844 rc = regcomp(&preg, (char *)p, cflags);
845
846 /* Compilation failed; go back for another re, skipping to blank line
847 if non-interactive. */
848
849 if (rc != 0)
850 {
851 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
852 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
853 goto SKIP_DATA;
854 }
855 }
856
857 /* Handle compiling via the native interface */
858
859 else
860#endif /* !defined NOPOSIX */
861
862 {
863 if (timeit)
864 {
865 register int i;
866 clock_t time_taken;
867 clock_t start_time = clock();
868 for (i = 0; i < LOOPREPEAT; i++)
869 {
870 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
871 if (re != NULL) free(re);
872 }
873 time_taken = clock() - start_time;
874 fprintf(outfile, "Compile time %.3f milliseconds\n",
875 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
876 (double)CLOCKS_PER_SEC);
877 }
878
879 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
880
881 /* Compilation failed; go back for another re, skipping to blank line
882 if non-interactive. */
883
884 if (re == NULL)
885 {
886 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
887 SKIP_DATA:
888 if (infile != stdin)
889 {
890 for (;;)
891 {
892 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
893 {
894 done = 1;
895 goto CONTINUE;
896 }
897 len = (int)strlen((char *)buffer);
898 while (len > 0 && isspace(buffer[len-1])) len--;
899 if (len == 0) break;
900 }
901 fprintf(outfile, "\n");
902 }
903 goto CONTINUE;
904 }
905
906 /* Compilation succeeded; print data if required. There are now two
907 info-returning functions. The old one has a limited interface and
908 returns only limited data. Check that it agrees with the newer one. */
909
910 if (log_store)
911 fprintf(outfile, "Memory allocation (code space): %d\n",
912 (int)(gotten_store -
913 sizeof(real_pcre) -
914 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
915
916 /* Extract the size for possible writing before possibly flipping it,
917 and remember the store that was got. */
918
919 true_size = ((real_pcre *)re)->size;
920 regex_gotten_store = gotten_store;
921
922 /* If /S was present, study the regexp to generate additional info to
923 help with the matching. */
924
925 if (do_study)
926 {
927 if (timeit)
928 {
929 register int i;
930 clock_t time_taken;
931 clock_t start_time = clock();
932 for (i = 0; i < LOOPREPEAT; i++)
933 extra = pcre_study(re, study_options, &error);
934 time_taken = clock() - start_time;
935 if (extra != NULL) free(extra);
936 fprintf(outfile, " Study time %.3f milliseconds\n",
937 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
938 (double)CLOCKS_PER_SEC);
939 }
940 extra = pcre_study(re, study_options, &error);
941 if (error != NULL)
942 fprintf(outfile, "Failed to study: %s\n", error);
943 else if (extra != NULL)
944 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
945 }
946
947 /* If the 'F' option was present, we flip the bytes of all the integer
948 fields in the regex data block and the study block. This is to make it
949 possible to test PCRE's handling of byte-flipped patterns, e.g. those
950 compiled on a different architecture. */
951
952 if (do_flip)
953 {
954 real_pcre *rre = (real_pcre *)re;
955 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
956 rre->size = byteflip(rre->size, sizeof(rre->size));
957 rre->options = byteflip(rre->options, sizeof(rre->options));
958 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
959 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
960 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
961 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
962 rre->name_table_offset = byteflip(rre->name_table_offset,
963 sizeof(rre->name_table_offset));
964 rre->name_entry_size = byteflip(rre->name_entry_size,
965 sizeof(rre->name_entry_size));
966 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
967
968 if (extra != NULL)
969 {
970 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
971 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
972 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
973 }
974 }
975
976 /* Extract information from the compiled data if required */
977
978 SHOW_INFO:
979
980 if (do_showinfo)
981 {
982 unsigned long int get_options, all_options;
8ac170f3 983#if !defined NOINFOCHECK
c86f6258 984 int old_first_char, old_options, old_count;
8ac170f3 985#endif
c86f6258
PH
986 int count, backrefmax, first_char, need_char;
987 int nameentrysize, namecount;
988 const uschar *nametable;
989
990 if (do_debug)
991 {
992 fprintf(outfile, "------------------------------------------------------------------\n");
8ac170f3 993 _pcre_printint(re, outfile);
c86f6258
PH
994 }
995
996 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
997 new_info(re, NULL, PCRE_INFO_SIZE, &size);
998 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
999 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1000 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1001 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1002 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1003 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1004 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1005
8ac170f3 1006#if !defined NOINFOCHECK
c86f6258
PH
1007 old_count = pcre_info(re, &old_options, &old_first_char);
1008 if (count < 0) fprintf(outfile,
1009 "Error %d from pcre_info()\n", count);
1010 else
1011 {
1012 if (old_count != count) fprintf(outfile,
1013 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1014 old_count);
1015
1016 if (old_first_char != first_char) fprintf(outfile,
1017 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1018 first_char, old_first_char);
1019
1020 if (old_options != (int)get_options) fprintf(outfile,
1021 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1022 get_options, old_options);
1023 }
8ac170f3 1024#endif
c86f6258
PH
1025
1026 if (size != regex_gotten_store) fprintf(outfile,
1027 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1028 (int)size, (int)regex_gotten_store);
1029
1030 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1031 if (backrefmax > 0)
1032 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1033
1034 if (namecount > 0)
1035 {
1036 fprintf(outfile, "Named capturing subpatterns:\n");
1037 while (namecount-- > 0)
1038 {
1039 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1040 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1041 GET2(nametable, 0));
1042 nametable += nameentrysize;
1043 }
1044 }
1045
1046 /* The NOPARTIAL bit is a private bit in the options, so we have
1047 to fish it out via out back door */
1048
1049 all_options = ((real_pcre *)re)->options;
1050 if (do_flip)
1051 {
1052 all_options = byteflip(all_options, sizeof(all_options));
1053 }
1054
1055 if ((all_options & PCRE_NOPARTIAL) != 0)
1056 fprintf(outfile, "Partial matching not supported\n");
1057
1058 if (get_options == 0) fprintf(outfile, "No options\n");
8ac170f3 1059 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",
c86f6258
PH
1060 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1061 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1062 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1063 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
8ac170f3 1064 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
c86f6258
PH
1065 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1066 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1067 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1068 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1069 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1070 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1071
1072 if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1073 fprintf(outfile, "Case state changes\n");
1074
1075 if (first_char == -1)
1076 {
1077 fprintf(outfile, "First char at start or follows \\n\n");
1078 }
1079 else if (first_char < 0)
1080 {
1081 fprintf(outfile, "No first char\n");
1082 }
1083 else
1084 {
1085 int ch = first_char & 255;
1086 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1087 "" : " (caseless)";
1088 if (isprint(ch))
1089 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1090 else
1091 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1092 }
1093
1094 if (need_char < 0)
1095 {
1096 fprintf(outfile, "No need char\n");
1097 }
1098 else
1099 {
1100 int ch = need_char & 255;
1101 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1102 "" : " (caseless)";
1103 if (isprint(ch))
1104 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1105 else
1106 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1107 }
1108
1109 /* Don't output study size; at present it is in any case a fixed
1110 value, but it varies, depending on the computer architecture, and
1111 so messes up the test suite. (And with the /F option, it might be
1112 flipped.) */
1113
1114 if (do_study)
1115 {
1116 if (extra == NULL)
1117 fprintf(outfile, "Study returned NULL\n");
1118 else
1119 {
1120 uschar *start_bits = NULL;
1121 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1122
1123 if (start_bits == NULL)
1124 fprintf(outfile, "No starting byte set\n");
1125 else
1126 {
1127 int i;
1128 int c = 24;
1129 fprintf(outfile, "Starting byte set: ");
1130 for (i = 0; i < 256; i++)
1131 {
1132 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1133 {
1134 if (c > 75)
1135 {
1136 fprintf(outfile, "\n ");
1137 c = 2;
1138 }
1139 if (isprint(i) && i != ' ')
1140 {
1141 fprintf(outfile, "%c ", i);
1142 c += 2;
1143 }
1144 else
1145 {
1146 fprintf(outfile, "\\x%02x ", i);
1147 c += 5;
1148 }
1149 }
1150 }
1151 fprintf(outfile, "\n");
1152 }
1153 }
1154 }
1155 }
1156
1157 /* If the '>' option was present, we write out the regex to a file, and
1158 that is all. The first 8 bytes of the file are the regex length and then
1159 the study length, in big-endian order. */
1160
1161 if (to_file != NULL)
1162 {
1163 FILE *f = fopen((char *)to_file, "wb");
1164 if (f == NULL)
1165 {
1166 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1167 }
1168 else
1169 {
1170 uschar sbuf[8];
1171 sbuf[0] = (true_size >> 24) & 255;
1172 sbuf[1] = (true_size >> 16) & 255;
1173 sbuf[2] = (true_size >> 8) & 255;
1174 sbuf[3] = (true_size) & 255;
1175
1176 sbuf[4] = (true_study_size >> 24) & 255;
1177 sbuf[5] = (true_study_size >> 16) & 255;
1178 sbuf[6] = (true_study_size >> 8) & 255;
1179 sbuf[7] = (true_study_size) & 255;
1180
1181 if (fwrite(sbuf, 1, 8, f) < 8 ||
1182 fwrite(re, 1, true_size, f) < true_size)
1183 {
1184 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1185 }
1186 else
1187 {
1188 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1189 if (extra != NULL)
1190 {
1191 if (fwrite(extra->study_data, 1, true_study_size, f) <
1192 true_study_size)
1193 {
1194 fprintf(outfile, "Write error on %s: %s\n", to_file,
1195 strerror(errno));
1196 }
1197 else fprintf(outfile, "Study data written to %s\n", to_file);
1198 }
1199 }
1200 fclose(f);
1201 }
8ac170f3
PH
1202
1203 new_free(re);
1204 if (extra != NULL) new_free(extra);
1205 if (tables != NULL) new_free((void *)tables);
c86f6258
PH
1206 continue; /* With next regex */
1207 }
1208 } /* End of non-POSIX compile */
1209
1210 /* Read data lines and test them */
1211
1212 for (;;)
1213 {
1214 unsigned char *q;
1215 unsigned char *bptr = dbuffer;
1216 int *use_offsets = offsets;
1217 int use_size_offsets = size_offsets;
1218 int callout_data = 0;
1219 int callout_data_set = 0;
1220 int count, c;
1221 int copystrings = 0;
1222 int find_match_limit = 0;
1223 int getstrings = 0;
1224 int getlist = 0;
1225 int gmatched = 0;
1226 int start_offset = 0;
1227 int g_notempty = 0;
8ac170f3 1228 int use_dfa = 0;
c86f6258
PH
1229
1230 options = 0;
1231
1232 pcre_callout = callout;
1233 first_callout = 1;
1234 callout_extra = 0;
1235 callout_count = 0;
1236 callout_fail_count = 999999;
1237 callout_fail_id = -1;
1238 show_malloc = 0;
1239
1240 if (infile == stdin) printf("data> ");
1241 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1242 {
1243 done = 1;
1244 goto CONTINUE;
1245 }
1246 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1247
1248 len = (int)strlen((char *)buffer);
1249 while (len > 0 && isspace(buffer[len-1])) len--;
1250 buffer[len] = 0;
1251 if (len == 0) break;
1252
1253 p = buffer;
1254 while (isspace(*p)) p++;
1255
1256 q = dbuffer;
1257 while ((c = *p++) != 0)
1258 {
1259 int i = 0;
1260 int n = 0;
1261
1262 if (c == '\\') switch ((c = *p++))
1263 {
1264 case 'a': c = 7; break;
1265 case 'b': c = '\b'; break;
1266 case 'e': c = 27; break;
1267 case 'f': c = '\f'; break;
1268 case 'n': c = '\n'; break;
1269 case 'r': c = '\r'; break;
1270 case 't': c = '\t'; break;
1271 case 'v': c = '\v'; break;
1272
1273 case '0': case '1': case '2': case '3':
1274 case '4': case '5': case '6': case '7':
1275 c -= '0';
1276 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1277 c = c * 8 + *p++ - '0';
1278 break;
1279
1280 case 'x':
1281
1282 /* Handle \x{..} specially - new Perl thing for utf8 */
1283
8ac170f3 1284#if !defined NOUTF8
c86f6258
PH
1285 if (*p == '{')
1286 {
1287 unsigned char *pt = p;
1288 c = 0;
1289 while (isxdigit(*(++pt)))
1290 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1291 if (*pt == '}')
1292 {
1293 unsigned char buff8[8];
1294 int ii, utn;
8ac170f3 1295 utn = _pcre_ord2utf8(c, buff8);
c86f6258
PH
1296 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1297 c = buff8[ii]; /* Last byte */
1298 p = pt + 1;
1299 break;
1300 }
1301 /* Not correct form; fall through */
1302 }
8ac170f3 1303#endif
c86f6258
PH
1304
1305 /* Ordinary \x */
1306
1307 c = 0;
1308 while (i++ < 2 && isxdigit(*p))
1309 {
1310 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1311 p++;
1312 }
1313 break;
1314
1315 case 0: /* \ followed by EOF allows for an empty line */
1316 p--;
1317 continue;
1318
1319 case '>':
1320 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1321 continue;
1322
1323 case 'A': /* Option setting */
1324 options |= PCRE_ANCHORED;
1325 continue;
1326
1327 case 'B':
1328 options |= PCRE_NOTBOL;
1329 continue;
1330
1331 case 'C':
1332 if (isdigit(*p)) /* Set copy string */
1333 {
1334 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1335 copystrings |= 1 << n;
1336 }
1337 else if (isalnum(*p))
1338 {
1339 uschar name[256];
1340 uschar *npp = name;
1341 while (isalnum(*p)) *npp++ = *p++;
1342 *npp = 0;
1343 n = pcre_get_stringnumber(re, (char *)name);
1344 if (n < 0)
1345 fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1346 else copystrings |= 1 << n;
1347 }
1348 else if (*p == '+')
1349 {
1350 callout_extra = 1;
1351 p++;
1352 }
1353 else if (*p == '-')
1354 {
1355 pcre_callout = NULL;
1356 p++;
1357 }
1358 else if (*p == '!')
1359 {
1360 callout_fail_id = 0;
1361 p++;
1362 while(isdigit(*p))
1363 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1364 callout_fail_count = 0;
1365 if (*p == '!')
1366 {
1367 p++;
1368 while(isdigit(*p))
1369 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1370 }
1371 }
1372 else if (*p == '*')
1373 {
1374 int sign = 1;
1375 callout_data = 0;
1376 if (*(++p) == '-') { sign = -1; p++; }
1377 while(isdigit(*p))
1378 callout_data = callout_data * 10 + *p++ - '0';
1379 callout_data *= sign;
1380 callout_data_set = 1;
1381 }
1382 continue;
1383
8ac170f3
PH
1384#if !defined NODFA
1385 case 'D':
1386#if !defined NOPOSIX
1387 if (posix || do_posix)
1388 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1389 else
1390#endif
1391 use_dfa = 1;
1392 continue;
1393
1394 case 'F':
1395 options |= PCRE_DFA_SHORTEST;
1396 continue;
1397#endif
1398
c86f6258
PH
1399 case 'G':
1400 if (isdigit(*p))
1401 {
1402 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1403 getstrings |= 1 << n;
1404 }
1405 else if (isalnum(*p))
1406 {
1407 uschar name[256];
1408 uschar *npp = name;
1409 while (isalnum(*p)) *npp++ = *p++;
1410 *npp = 0;
1411 n = pcre_get_stringnumber(re, (char *)name);
1412 if (n < 0)
1413 fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1414 else getstrings |= 1 << n;
1415 }
1416 continue;
1417
1418 case 'L':
1419 getlist = 1;
1420 continue;
1421
1422 case 'M':
1423 find_match_limit = 1;
1424 continue;
1425
1426 case 'N':
1427 options |= PCRE_NOTEMPTY;
1428 continue;
1429
1430 case 'O':
1431 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1432 if (n > size_offsets_max)
1433 {
1434 size_offsets_max = n;
1435 free(offsets);
1436 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1437 if (offsets == NULL)
1438 {
1439 printf("** Failed to get %d bytes of memory for offsets vector\n",
1440 size_offsets_max * sizeof(int));
8ac170f3
PH
1441 yield = 1;
1442 goto EXIT;
c86f6258
PH
1443 }
1444 }
1445 use_size_offsets = n;
1446 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1447 continue;
1448
1449 case 'P':
1450 options |= PCRE_PARTIAL;
1451 continue;
1452
8ac170f3
PH
1453#if !defined NODFA
1454 case 'R':
1455 options |= PCRE_DFA_RESTART;
1456 continue;
1457#endif
1458
c86f6258
PH
1459 case 'S':
1460 show_malloc = 1;
1461 continue;
1462
1463 case 'Z':
1464 options |= PCRE_NOTEOL;
1465 continue;
1466
1467 case '?':
1468 options |= PCRE_NO_UTF8_CHECK;
1469 continue;
1470 }
1471 *q++ = c;
1472 }
1473 *q = 0;
1474 len = q - dbuffer;
1475
8ac170f3
PH
1476 if ((all_use_dfa || use_dfa) && find_match_limit)
1477 {
1478 printf("**Match limit not relevant for DFA matching: ignored\n");
1479 find_match_limit = 0;
1480 }
1481
c86f6258
PH
1482 /* Handle matching via the POSIX interface, which does not
1483 support timing or playing with the match limit or callout data. */
1484
1485#if !defined NOPOSIX
1486 if (posix || do_posix)
1487 {
1488 int rc;
1489 int eflags = 0;
1490 regmatch_t *pmatch = NULL;
1491 if (use_size_offsets > 0)
1492 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1493 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1494 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1495
1496 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1497
1498 if (rc != 0)
1499 {
1500 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1501 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1502 }
1503 else
1504 {
1505 size_t i;
1506 for (i = 0; i < (size_t)use_size_offsets; i++)
1507 {
1508 if (pmatch[i].rm_so >= 0)
1509 {
1510 fprintf(outfile, "%2d: ", (int)i);
1511 (void)pchars(dbuffer + pmatch[i].rm_so,
1512 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1513 fprintf(outfile, "\n");
1514 if (i == 0 && do_showrest)
1515 {
1516 fprintf(outfile, " 0+ ");
1517 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1518 outfile);
1519 fprintf(outfile, "\n");
1520 }
1521 }
1522 }
1523 }
1524 free(pmatch);
1525 }
1526
1527 /* Handle matching via the native interface - repeats for /g and /G */
1528
1529 else
1530#endif /* !defined NOPOSIX */
1531
1532 for (;; gmatched++) /* Loop for /g or /G */
1533 {
1534 if (timeit)
1535 {
1536 register int i;
1537 clock_t time_taken;
1538 clock_t start_time = clock();
8ac170f3
PH
1539
1540#if !defined NODFA
1541 if (all_use_dfa || use_dfa)
1542 {
1543 int workspace[1000];
1544 for (i = 0; i < LOOPREPEAT; i++)
1545 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1546 options | g_notempty, use_offsets, use_size_offsets, workspace,
1547 sizeof(workspace)/sizeof(int));
1548 }
1549 else
1550#endif
1551
c86f6258
PH
1552 for (i = 0; i < LOOPREPEAT; i++)
1553 count = pcre_exec(re, extra, (char *)bptr, len,
1554 start_offset, options | g_notempty, use_offsets, use_size_offsets);
8ac170f3 1555
c86f6258
PH
1556 time_taken = clock() - start_time;
1557 fprintf(outfile, "Execute time %.3f milliseconds\n",
1558 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1559 (double)CLOCKS_PER_SEC);
1560 }
1561
1562 /* If find_match_limit is set, we want to do repeated matches with
1563 varying limits in order to find the minimum value. */
1564
1565 if (find_match_limit)
1566 {
1567 int min = 0;
1568 int mid = 64;
1569 int max = -1;
1570
1571 if (extra == NULL)
1572 {
1573 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1574 extra->flags = 0;
1575 }
1576 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1577
1578 for (;;)
1579 {
1580 extra->match_limit = mid;
1581 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1582 options | g_notempty, use_offsets, use_size_offsets);
1583 if (count == PCRE_ERROR_MATCHLIMIT)
1584 {
1585 /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1586 min = mid;
1587 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1588 }
1589 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1590 count == PCRE_ERROR_PARTIAL)
1591 {
1592 if (mid == min + 1)
1593 {
1594 fprintf(outfile, "Minimum match limit = %d\n", mid);
1595 break;
1596 }
1597 /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1598 max = mid;
1599 mid = (min + mid)/2;
1600 }
1601 else break; /* Some other error */
1602 }
1603
1604 extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1605 }
1606
1607 /* If callout_data is set, use the interface with additional data */
1608
1609 else if (callout_data_set)
1610 {
1611 if (extra == NULL)
1612 {
1613 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1614 extra->flags = 0;
1615 }
1616 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1617 extra->callout_data = &callout_data;
1618 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1619 options | g_notempty, use_offsets, use_size_offsets);
1620 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1621 }
1622
1623 /* The normal case is just to do the match once, with the default
1624 value of match_limit. */
1625
8ac170f3
PH
1626#if !defined NODFA
1627 else if (all_use_dfa || use_dfa)
c86f6258 1628 {
8ac170f3
PH
1629 int workspace[1000];
1630 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1631 options | g_notempty, use_offsets, use_size_offsets, workspace,
1632 sizeof(workspace)/sizeof(int));
1633 if (count == 0)
1634 {
1635 fprintf(outfile, "Matched, but too many subsidiary matches\n");
1636 count = use_size_offsets/2;
1637 }
c86f6258 1638 }
8ac170f3 1639#endif
c86f6258 1640
8ac170f3 1641 else
c86f6258 1642 {
8ac170f3
PH
1643 count = pcre_exec(re, extra, (char *)bptr, len,
1644 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1645 if (count == 0)
1646 {
1647 fprintf(outfile, "Matched, but too many substrings\n");
1648 count = use_size_offsets/3;
1649 }
c86f6258
PH
1650 }
1651
1652 /* Matched */
1653
1654 if (count >= 0)
1655 {
1656 int i;
1657 for (i = 0; i < count * 2; i += 2)
1658 {
1659 if (use_offsets[i] < 0)
1660 fprintf(outfile, "%2d: <unset>\n", i/2);
1661 else
1662 {
1663 fprintf(outfile, "%2d: ", i/2);
1664 (void)pchars(bptr + use_offsets[i],
1665 use_offsets[i+1] - use_offsets[i], outfile);
1666 fprintf(outfile, "\n");
1667 if (i == 0)
1668 {
1669 if (do_showrest)
1670 {
1671 fprintf(outfile, " 0+ ");
1672 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1673 outfile);
1674 fprintf(outfile, "\n");
1675 }
1676 }
1677 }
1678 }
1679
1680 for (i = 0; i < 32; i++)
1681 {
1682 if ((copystrings & (1 << i)) != 0)
1683 {
1684 char copybuffer[16];
1685 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1686 i, copybuffer, sizeof(copybuffer));
1687 if (rc < 0)
1688 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1689 else
1690 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1691 }
1692 }
1693
1694 for (i = 0; i < 32; i++)
1695 {
1696 if ((getstrings & (1 << i)) != 0)
1697 {
1698 const char *substring;
1699 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1700 i, &substring);
1701 if (rc < 0)
1702 fprintf(outfile, "get substring %d failed %d\n", i, rc);
1703 else
1704 {
1705 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1706 /* free((void *)substring); */
1707 pcre_free_substring(substring);
1708 }
1709 }
1710 }
1711
1712 if (getlist)
1713 {
1714 const char **stringlist;
1715 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1716 &stringlist);
1717 if (rc < 0)
1718 fprintf(outfile, "get substring list failed %d\n", rc);
1719 else
1720 {
1721 for (i = 0; i < count; i++)
1722 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1723 if (stringlist[i] != NULL)
1724 fprintf(outfile, "string list not terminated by NULL\n");
1725 /* free((void *)stringlist); */
1726 pcre_free_substring_list(stringlist);
1727 }
1728 }
1729 }
1730
1731 /* There was a partial match */
1732
1733 else if (count == PCRE_ERROR_PARTIAL)
1734 {
8ac170f3
PH
1735 fprintf(outfile, "Partial match");
1736#if !defined NODFA
1737 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
1738 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
1739 bptr + use_offsets[0]);
1740#endif
1741 fprintf(outfile, "\n");
c86f6258
PH
1742 break; /* Out of the /g loop */
1743 }
1744
1745 /* Failed to match. If this is a /g or /G loop and we previously set
1746 g_notempty after a null match, this is not necessarily the end.
1747 We want to advance the start offset, and continue. In the case of UTF-8
1748 matching, the advance must be one character, not one byte. Fudge the
1749 offset values to achieve this. We won't be at the end of the string -
1750 that was checked before setting g_notempty. */
1751
1752 else
1753 {
1754 if (g_notempty != 0)
1755 {
1756 int onechar = 1;
1757 use_offsets[0] = start_offset;
1758 if (use_utf8)
1759 {
1760 while (start_offset + onechar < len)
1761 {
1762 int tb = bptr[start_offset+onechar];
1763 if (tb <= 127) break;
1764 tb &= 0xc0;
1765 if (tb != 0 && tb != 0xc0) onechar++;
1766 }
1767 }
1768 use_offsets[1] = start_offset + onechar;
1769 }
1770 else
1771 {
1772 if (count == PCRE_ERROR_NOMATCH)
1773 {
1774 if (gmatched == 0) fprintf(outfile, "No match\n");
1775 }
1776 else fprintf(outfile, "Error %d\n", count);
1777 break; /* Out of the /g loop */
1778 }
1779 }
1780
1781 /* If not /g or /G we are done */
1782
1783 if (!do_g && !do_G) break;
1784
1785 /* If we have matched an empty string, first check to see if we are at
1786 the end of the subject. If so, the /g loop is over. Otherwise, mimic
1787 what Perl's /g options does. This turns out to be rather cunning. First
1788 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1789 same point. If this fails (picked up above) we advance to the next
1790 character. */
1791
1792 g_notempty = 0;
1793 if (use_offsets[0] == use_offsets[1])
1794 {
1795 if (use_offsets[0] == len) break;
1796 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1797 }
1798
1799 /* For /g, update the start offset, leaving the rest alone */
1800
1801 if (do_g) start_offset = use_offsets[1];
1802
1803 /* For /G, update the pointer and length */
1804
1805 else
1806 {
1807 bptr += use_offsets[1];
1808 len -= use_offsets[1];
1809 }
1810 } /* End of loop for /g and /G */
1811 } /* End of loop for data lines */
1812
1813 CONTINUE:
1814
1815#if !defined NOPOSIX
1816 if (posix || do_posix) regfree(&preg);
1817#endif
1818
8ac170f3
PH
1819 if (re != NULL) new_free(re);
1820 if (extra != NULL) new_free(extra);
c86f6258
PH
1821 if (tables != NULL)
1822 {
8ac170f3 1823 new_free((void *)tables);
c86f6258
PH
1824 setlocale(LC_CTYPE, "C");
1825 }
1826 }
1827
1828if (infile == stdin) fprintf(outfile, "\n");
8ac170f3
PH
1829
1830EXIT:
1831
1832if (infile != NULL && infile != stdin) fclose(infile);
1833if (outfile != NULL && outfile != stdout) fclose(outfile);
1834
1835free(buffer);
1836free(dbuffer);
1837free(pbuffer);
1838free(offsets);
1839
1840return yield;
c86f6258
PH
1841}
1842
8ac170f3 1843/* End of pcretest.c */