Start
[exim.git] / src / src / pcre / pcretest.c
CommitLineData
c86f6258
PH
1/*************************************************
2* PCRE testing program *
3*************************************************/
4
5/* This program was hacked up as a tester for PCRE. I really should have
6written it more tidily in the first place. Will I ever learn? It has grown and
7been extended and consequently is now rather untidy in places.
8
9-----------------------------------------------------------------------------
10Redistribution and use in source and binary forms, with or without
11modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34POSSIBILITY OF SUCH DAMAGE.
35-----------------------------------------------------------------------------
36*/
37
38
39#include <ctype.h>
40#include <stdio.h>
41#include <string.h>
42#include <stdlib.h>
43#include <time.h>
44#include <locale.h>
45#include <errno.h>
46
47/* We need the internal info for displaying the results of pcre_study(). Also
48for getting the opcodes for showing compiled code. */
49
50#define PCRE_SPY /* For Win32 build, import data, not export */
51#include "internal.h"
52
53/* It is possible to compile this test program without including support for
54testing the POSIX interface, though this is not available via the standard
55Makefile. */
56
57#if !defined NOPOSIX
58#include "pcreposix.h"
59#endif
60
61#ifndef CLOCKS_PER_SEC
62#ifdef CLK_TCK
63#define CLOCKS_PER_SEC CLK_TCK
64#else
65#define CLOCKS_PER_SEC 100
66#endif
67#endif
68
69#define LOOPREPEAT 500000
70
71#define BUFFER_SIZE 30000
72#define PBUFFER_SIZE BUFFER_SIZE
73#define DBUFFER_SIZE BUFFER_SIZE
74
75
76static FILE *outfile;
77static int log_store = 0;
78static int callout_count;
79static int callout_extra;
80static int callout_fail_count;
81static int callout_fail_id;
82static int first_callout;
83static int show_malloc;
84static int use_utf8;
85static size_t gotten_store;
86
87static uschar *pbuffer = NULL;
88
89
90static const int utf8_table1[] = {
91 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
92
93static const int utf8_table2[] = {
94 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
95
96static const int utf8_table3[] = {
97 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
98
99
100
101/*************************************************
102* Print compiled regex *
103*************************************************/
104
105/* The code for doing this is held in a separate file that is also included in
106pcre.c when it is compiled with the debug switch. It defines a function called
107print_internals(), which uses a table of opcode lengths defined by the macro
108OP_LENGTHS, whose name must be OP_lengths. It also uses a table that translates
109Unicode property names to numbers; this is kept in a separate file. */
110
111static uschar OP_lengths[] = { OP_LENGTHS };
112
113#ifdef SUPPORT_UCP
114#include "ucp.h"
115#include "ucptypetable.c"
116#endif
117
118#include "printint.c"
119
120
121
122/*************************************************
123* Read number from string *
124*************************************************/
125
126/* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
127around with conditional compilation, just do the job by hand. It is only used
128for unpicking the -o argument, so just keep it simple.
129
130Arguments:
131 str string to be converted
132 endptr where to put the end pointer
133
134Returns: the unsigned long
135*/
136
137static int
138get_value(unsigned char *str, unsigned char **endptr)
139{
140int result = 0;
141while(*str != 0 && isspace(*str)) str++;
142while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
143*endptr = str;
144return(result);
145}
146
147
148
149/*************************************************
150* Convert character value to UTF-8 *
151*************************************************/
152
153/* This function takes an integer value in the range 0 - 0x7fffffff
154and encodes it as a UTF-8 character in 0 to 6 bytes.
155
156Arguments:
157 cvalue the character value
158 buffer pointer to buffer for result - at least 6 bytes long
159
160Returns: number of characters placed in the buffer
161 -1 if input character is negative
162 0 if input character is positive but too big (only when
163 int is longer than 32 bits)
164*/
165
166static int
167ord2utf8(int cvalue, unsigned char *buffer)
168{
169register int i, j;
170for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
171 if (cvalue <= utf8_table1[i]) break;
172if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
173if (cvalue < 0) return -1;
174
175buffer += i;
176for (j = i; j > 0; j--)
177 {
178 *buffer-- = 0x80 | (cvalue & 0x3f);
179 cvalue >>= 6;
180 }
181*buffer = utf8_table2[i] | cvalue;
182return i + 1;
183}
184
185
186/*************************************************
187* Convert UTF-8 string to value *
188*************************************************/
189
190/* This function takes one or more bytes that represents a UTF-8 character,
191and returns the value of the character.
192
193Argument:
194 buffer a pointer to the byte vector
195 vptr a pointer to an int to receive the value
196
197Returns: > 0 => the number of bytes consumed
198 -6 to 0 => malformed UTF-8 character at offset = (-return)
199*/
200
201static int
202utf82ord(unsigned char *buffer, int *vptr)
203{
204int c = *buffer++;
205int d = c;
206int i, j, s;
207
208for (i = -1; i < 6; i++) /* i is number of additional bytes */
209 {
210 if ((d & 0x80) == 0) break;
211 d <<= 1;
212 }
213
214if (i == -1) { *vptr = c; return 1; } /* ascii character */
215if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
216
217/* i now has a value in the range 1-5 */
218
219s = 6*i;
220d = (c & utf8_table3[i]) << s;
221
222for (j = 0; j < i; j++)
223 {
224 c = *buffer++;
225 if ((c & 0xc0) != 0x80) return -(j+1);
226 s -= 6;
227 d |= (c & 0x3f) << s;
228 }
229
230/* Check that encoding was the correct unique one */
231
232for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
233 if (d <= utf8_table1[j]) break;
234if (j != i) return -(i+1);
235
236/* Valid value */
237
238*vptr = d;
239return i+1;
240}
241
242
243
244/*************************************************
245* Print character string *
246*************************************************/
247
248/* Character string printing function. Must handle UTF-8 strings in utf8
249mode. Yields number of characters printed. If handed a NULL file, just counts
250chars without printing. */
251
252static int pchars(unsigned char *p, int length, FILE *f)
253{
254int c;
255int yield = 0;
256
257while (length-- > 0)
258 {
259 if (use_utf8)
260 {
261 int rc = utf82ord(p, &c);
262
263 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
264 {
265 length -= rc - 1;
266 p += rc;
267 if (c < 256 && isprint(c))
268 {
269 if (f != NULL) fprintf(f, "%c", c);
270 yield++;
271 }
272 else
273 {
274 int n;
275 if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
276 yield += n;
277 }
278 continue;
279 }
280 }
281
282 /* Not UTF-8, or malformed UTF-8 */
283
284 if (isprint(c = *(p++)))
285 {
286 if (f != NULL) fprintf(f, "%c", c);
287 yield++;
288 }
289 else
290 {
291 if (f != NULL) fprintf(f, "\\x%02x", c);
292 yield += 4;
293 }
294 }
295
296return yield;
297}
298
299
300
301/*************************************************
302* Callout function *
303*************************************************/
304
305/* Called from PCRE as a result of the (?C) item. We print out where we are in
306the match. Yield zero unless more callouts than the fail count, or the callout
307data is not zero. */
308
309static int callout(pcre_callout_block *cb)
310{
311FILE *f = (first_callout | callout_extra)? outfile : NULL;
312int i, pre_start, post_start, subject_length;
313
314if (callout_extra)
315 {
316 fprintf(f, "Callout %d: last capture = %d\n",
317 cb->callout_number, cb->capture_last);
318
319 for (i = 0; i < cb->capture_top * 2; i += 2)
320 {
321 if (cb->offset_vector[i] < 0)
322 fprintf(f, "%2d: <unset>\n", i/2);
323 else
324 {
325 fprintf(f, "%2d: ", i/2);
326 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
327 cb->offset_vector[i+1] - cb->offset_vector[i], f);
328 fprintf(f, "\n");
329 }
330 }
331 }
332
333/* Re-print the subject in canonical form, the first time or if giving full
334datails. On subsequent calls in the same match, we use pchars just to find the
335printed lengths of the substrings. */
336
337if (f != NULL) fprintf(f, "--->");
338
339pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
340post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
341 cb->current_position - cb->start_match, f);
342
343subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
344
345(void)pchars((unsigned char *)(cb->subject + cb->current_position),
346 cb->subject_length - cb->current_position, f);
347
348if (f != NULL) fprintf(f, "\n");
349
350/* Always print appropriate indicators, with callout number if not already
351shown. For automatic callouts, show the pattern offset. */
352
353if (cb->callout_number == 255)
354 {
355 fprintf(outfile, "%+3d ", cb->pattern_position);
356 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
357 }
358else
359 {
360 if (callout_extra) fprintf(outfile, " ");
361 else fprintf(outfile, "%3d ", cb->callout_number);
362 }
363
364for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
365fprintf(outfile, "^");
366
367if (post_start > 0)
368 {
369 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
370 fprintf(outfile, "^");
371 }
372
373for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
374 fprintf(outfile, " ");
375
376fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
377 pbuffer + cb->pattern_position);
378
379fprintf(outfile, "\n");
380first_callout = 0;
381
382if (cb->callout_data != NULL)
383 {
384 int callout_data = *((int *)(cb->callout_data));
385 if (callout_data != 0)
386 {
387 fprintf(outfile, "Callout data = %d\n", callout_data);
388 return callout_data;
389 }
390 }
391
392return (cb->callout_number != callout_fail_id)? 0 :
393 (++callout_count >= callout_fail_count)? 1 : 0;
394}
395
396
397/*************************************************
398* Local malloc functions *
399*************************************************/
400
401/* Alternative malloc function, to test functionality and show the size of the
402compiled re. */
403
404static void *new_malloc(size_t size)
405{
406void *block = malloc(size);
407gotten_store = size;
408if (show_malloc)
409 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
410return block;
411}
412
413static void new_free(void *block)
414{
415if (show_malloc)
416 fprintf(outfile, "free %p\n", block);
417free(block);
418}
419
420
421/* For recursion malloc/free, to test stacking calls */
422
423static void *stack_malloc(size_t size)
424{
425void *block = malloc(size);
426if (show_malloc)
427 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
428return block;
429}
430
431static void stack_free(void *block)
432{
433if (show_malloc)
434 fprintf(outfile, "stack_free %p\n", block);
435free(block);
436}
437
438
439/*************************************************
440* Call pcre_fullinfo() *
441*************************************************/
442
443/* Get one piece of information from the pcre_fullinfo() function */
444
445static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
446{
447int rc;
448if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
449 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
450}
451
452
453
454/*************************************************
455* Byte flipping function *
456*************************************************/
457
458static long int
459byteflip(long int value, int n)
460{
461if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
462return ((value & 0x000000ff) << 24) |
463 ((value & 0x0000ff00) << 8) |
464 ((value & 0x00ff0000) >> 8) |
465 ((value & 0xff000000) >> 24);
466}
467
468
469
470
471/*************************************************
472* Main Program *
473*************************************************/
474
475/* Read lines from named file or stdin and write to named file or stdout; lines
476consist of a regular expression, in delimiters and optionally followed by
477options, followed by a set of test data, terminated by an empty line. */
478
479int main(int argc, char **argv)
480{
481FILE *infile = stdin;
482int options = 0;
483int study_options = 0;
484int op = 1;
485int timeit = 0;
486int showinfo = 0;
487int showstore = 0;
488int size_offsets = 45;
489int size_offsets_max;
490int *offsets;
491#if !defined NOPOSIX
492int posix = 0;
493#endif
494int debug = 0;
495int done = 0;
496
497unsigned char *buffer;
498unsigned char *dbuffer;
499
500/* Get buffers from malloc() so that Electric Fence will check their misuse
501when I am debugging. */
502
503buffer = (unsigned char *)malloc(BUFFER_SIZE);
504dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
505pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
506
507/* The outfile variable is static so that new_malloc can use it. The _setmode()
508stuff is some magic that I don't understand, but which apparently does good
509things in Windows. It's related to line terminations. */
510
511#if defined(_WIN32) || defined(WIN32)
512_setmode( _fileno( stdout ), 0x8000 );
513#endif /* defined(_WIN32) || defined(WIN32) */
514
515outfile = stdout;
516
517/* Scan options */
518
519while (argc > 1 && argv[op][0] == '-')
520 {
521 unsigned char *endptr;
522
523 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
524 showstore = 1;
525 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
526 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
527 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
528 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
529 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
530 *endptr == 0))
531 {
532 op++;
533 argc--;
534 }
535#if !defined NOPOSIX
536 else if (strcmp(argv[op], "-p") == 0) posix = 1;
537#endif
538 else if (strcmp(argv[op], "-C") == 0)
539 {
540 int rc;
541 printf("PCRE version %s\n", pcre_version());
542 printf("Compiled with\n");
543 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
544 printf(" %sUTF-8 support\n", rc? "" : "No ");
545 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
546 printf(" %sUnicode properties support\n", rc? "" : "No ");
547 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
548 printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF");
549 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
550 printf(" Internal link size = %d\n", rc);
551 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
552 printf(" POSIX malloc threshold = %d\n", rc);
553 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
554 printf(" Default match limit = %d\n", rc);
555 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
556 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
557 exit(0);
558 }
559 else
560 {
561 printf("** Unknown or malformed option %s\n", argv[op]);
562 printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
563 printf(" -C show PCRE compile-time options and exit\n");
564 printf(" -d debug: show compiled code; implies -i\n"
565 " -i show information about compiled pattern\n"
566 " -m output memory used information\n"
567 " -o <n> set size of offsets vector to <n>\n");
568#if !defined NOPOSIX
569 printf(" -p use POSIX interface\n");
570#endif
571 printf(" -s output store (memory) used information\n"
572 " -t time compilation and execution\n");
573 return 1;
574 }
575 op++;
576 argc--;
577 }
578
579/* Get the store for the offsets vector, and remember what it was */
580
581size_offsets_max = size_offsets;
582offsets = (int *)malloc(size_offsets_max * sizeof(int));
583if (offsets == NULL)
584 {
585 printf("** Failed to get %d bytes of memory for offsets vector\n",
586 size_offsets_max * sizeof(int));
587 return 1;
588 }
589
590/* Sort out the input and output files */
591
592if (argc > 1)
593 {
594 infile = fopen(argv[op], "rb");
595 if (infile == NULL)
596 {
597 printf("** Failed to open %s\n", argv[op]);
598 return 1;
599 }
600 }
601
602if (argc > 2)
603 {
604 outfile = fopen(argv[op+1], "wb");
605 if (outfile == NULL)
606 {
607 printf("** Failed to open %s\n", argv[op+1]);
608 return 1;
609 }
610 }
611
612/* Set alternative malloc function */
613
614pcre_malloc = new_malloc;
615pcre_free = new_free;
616pcre_stack_malloc = stack_malloc;
617pcre_stack_free = stack_free;
618
619/* Heading line, then prompt for first regex if stdin */
620
621fprintf(outfile, "PCRE version %s\n\n", pcre_version());
622
623/* Main loop */
624
625while (!done)
626 {
627 pcre *re = NULL;
628 pcre_extra *extra = NULL;
629
630#if !defined NOPOSIX /* There are still compilers that require no indent */
631 regex_t preg;
632 int do_posix = 0;
633#endif
634
635 const char *error;
636 unsigned char *p, *pp, *ppp;
637 unsigned char *to_file = NULL;
638 const unsigned char *tables = NULL;
639 unsigned long int true_size, true_study_size = 0;
640 size_t size, regex_gotten_store;
641 int do_study = 0;
642 int do_debug = debug;
643 int do_G = 0;
644 int do_g = 0;
645 int do_showinfo = showinfo;
646 int do_showrest = 0;
647 int do_flip = 0;
648 int erroroffset, len, delimiter;
649
650 use_utf8 = 0;
651
652 if (infile == stdin) printf(" re> ");
653 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
654 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
655 fflush(outfile);
656
657 p = buffer;
658 while (isspace(*p)) p++;
659 if (*p == 0) continue;
660
661 /* See if the pattern is to be loaded pre-compiled from a file. */
662
663 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
664 {
665 unsigned long int magic;
666 uschar sbuf[8];
667 FILE *f;
668
669 p++;
670 pp = p + (int)strlen((char *)p);
671 while (isspace(pp[-1])) pp--;
672 *pp = 0;
673
674 f = fopen((char *)p, "rb");
675 if (f == NULL)
676 {
677 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
678 continue;
679 }
680
681 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
682
683 true_size =
684 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
685 true_study_size =
686 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
687
688 re = (real_pcre *)new_malloc(true_size);
689 regex_gotten_store = gotten_store;
690
691 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
692
693 magic = ((real_pcre *)re)->magic_number;
694 if (magic != MAGIC_NUMBER)
695 {
696 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
697 {
698 do_flip = 1;
699 }
700 else
701 {
702 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
703 fclose(f);
704 continue;
705 }
706 }
707
708 fprintf(outfile, "Compiled regex%s loaded from %s\n",
709 do_flip? " (byte-inverted)" : "", p);
710
711 /* Need to know if UTF-8 for printing data strings */
712
713 new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
714 use_utf8 = (options & PCRE_UTF8) != 0;
715
716 /* Now see if there is any following study data */
717
718 if (true_study_size != 0)
719 {
720 pcre_study_data *psd;
721
722 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
723 extra->flags = PCRE_EXTRA_STUDY_DATA;
724
725 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
726 extra->study_data = psd;
727
728 if (fread(psd, 1, true_study_size, f) != true_study_size)
729 {
730 FAIL_READ:
731 fprintf(outfile, "Failed to read data from %s\n", p);
732 if (extra != NULL) new_free(extra);
733 if (re != NULL) new_free(re);
734 fclose(f);
735 continue;
736 }
737 fprintf(outfile, "Study data loaded from %s\n", p);
738 do_study = 1; /* To get the data output if requested */
739 }
740 else fprintf(outfile, "No study data\n");
741
742 fclose(f);
743 goto SHOW_INFO;
744 }
745
746 /* In-line pattern (the usual case). Get the delimiter and seek the end of
747 the pattern; if is isn't complete, read more. */
748
749 delimiter = *p++;
750
751 if (isalnum(delimiter) || delimiter == '\\')
752 {
753 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
754 goto SKIP_DATA;
755 }
756
757 pp = p;
758
759 for(;;)
760 {
761 while (*pp != 0)
762 {
763 if (*pp == '\\' && pp[1] != 0) pp++;
764 else if (*pp == delimiter) break;
765 pp++;
766 }
767 if (*pp != 0) break;
768
769 len = BUFFER_SIZE - (pp - buffer);
770 if (len < 256)
771 {
772 fprintf(outfile, "** Expression too long - missing delimiter?\n");
773 goto SKIP_DATA;
774 }
775
776 if (infile == stdin) printf(" > ");
777 if (fgets((char *)pp, len, infile) == NULL)
778 {
779 fprintf(outfile, "** Unexpected EOF\n");
780 done = 1;
781 goto CONTINUE;
782 }
783 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
784 }
785
786 /* If the first character after the delimiter is backslash, make
787 the pattern end with backslash. This is purely to provide a way
788 of testing for the error message when a pattern ends with backslash. */
789
790 if (pp[1] == '\\') *pp++ = '\\';
791
792 /* Terminate the pattern at the delimiter, and save a copy of the pattern
793 for callouts. */
794
795 *pp++ = 0;
796 strcpy((char *)pbuffer, (char *)p);
797
798 /* Look for options after final delimiter */
799
800 options = 0;
801 study_options = 0;
802 log_store = showstore; /* default from command line */
803
804 while (*pp != 0)
805 {
806 switch (*pp++)
807 {
808 case 'g': do_g = 1; break;
809 case 'i': options |= PCRE_CASELESS; break;
810 case 'm': options |= PCRE_MULTILINE; break;
811 case 's': options |= PCRE_DOTALL; break;
812 case 'x': options |= PCRE_EXTENDED; break;
813
814 case '+': do_showrest = 1; break;
815 case 'A': options |= PCRE_ANCHORED; break;
816 case 'C': options |= PCRE_AUTO_CALLOUT; break;
817 case 'D': do_debug = do_showinfo = 1; break;
818 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
819 case 'F': do_flip = 1; break;
820 case 'G': do_G = 1; break;
821 case 'I': do_showinfo = 1; break;
822 case 'M': log_store = 1; break;
823 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
824
825#if !defined NOPOSIX
826 case 'P': do_posix = 1; break;
827#endif
828
829 case 'S': do_study = 1; break;
830 case 'U': options |= PCRE_UNGREEDY; break;
831 case 'X': options |= PCRE_EXTRA; break;
832 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
833 case '?': options |= PCRE_NO_UTF8_CHECK; break;
834
835 case 'L':
836 ppp = pp;
837 while (*ppp != '\n' && *ppp != ' ') ppp++;
838 *ppp = 0;
839 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
840 {
841 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
842 goto SKIP_DATA;
843 }
844 tables = pcre_maketables();
845 pp = ppp;
846 break;
847
848 case '>':
849 to_file = pp;
850 while (*pp != 0) pp++;
851 while (isspace(pp[-1])) pp--;
852 *pp = 0;
853 break;
854
855 case '\n': case ' ': break;
856
857 default:
858 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
859 goto SKIP_DATA;
860 }
861 }
862
863 /* Handle compiling via the POSIX interface, which doesn't support the
864 timing, showing, or debugging options, nor the ability to pass over
865 local character tables. */
866
867#if !defined NOPOSIX
868 if (posix || do_posix)
869 {
870 int rc;
871 int cflags = 0;
872
873 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
874 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
875 rc = regcomp(&preg, (char *)p, cflags);
876
877 /* Compilation failed; go back for another re, skipping to blank line
878 if non-interactive. */
879
880 if (rc != 0)
881 {
882 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
883 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
884 goto SKIP_DATA;
885 }
886 }
887
888 /* Handle compiling via the native interface */
889
890 else
891#endif /* !defined NOPOSIX */
892
893 {
894 if (timeit)
895 {
896 register int i;
897 clock_t time_taken;
898 clock_t start_time = clock();
899 for (i = 0; i < LOOPREPEAT; i++)
900 {
901 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
902 if (re != NULL) free(re);
903 }
904 time_taken = clock() - start_time;
905 fprintf(outfile, "Compile time %.3f milliseconds\n",
906 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
907 (double)CLOCKS_PER_SEC);
908 }
909
910 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
911
912 /* Compilation failed; go back for another re, skipping to blank line
913 if non-interactive. */
914
915 if (re == NULL)
916 {
917 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
918 SKIP_DATA:
919 if (infile != stdin)
920 {
921 for (;;)
922 {
923 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
924 {
925 done = 1;
926 goto CONTINUE;
927 }
928 len = (int)strlen((char *)buffer);
929 while (len > 0 && isspace(buffer[len-1])) len--;
930 if (len == 0) break;
931 }
932 fprintf(outfile, "\n");
933 }
934 goto CONTINUE;
935 }
936
937 /* Compilation succeeded; print data if required. There are now two
938 info-returning functions. The old one has a limited interface and
939 returns only limited data. Check that it agrees with the newer one. */
940
941 if (log_store)
942 fprintf(outfile, "Memory allocation (code space): %d\n",
943 (int)(gotten_store -
944 sizeof(real_pcre) -
945 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
946
947 /* Extract the size for possible writing before possibly flipping it,
948 and remember the store that was got. */
949
950 true_size = ((real_pcre *)re)->size;
951 regex_gotten_store = gotten_store;
952
953 /* If /S was present, study the regexp to generate additional info to
954 help with the matching. */
955
956 if (do_study)
957 {
958 if (timeit)
959 {
960 register int i;
961 clock_t time_taken;
962 clock_t start_time = clock();
963 for (i = 0; i < LOOPREPEAT; i++)
964 extra = pcre_study(re, study_options, &error);
965 time_taken = clock() - start_time;
966 if (extra != NULL) free(extra);
967 fprintf(outfile, " Study time %.3f milliseconds\n",
968 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
969 (double)CLOCKS_PER_SEC);
970 }
971 extra = pcre_study(re, study_options, &error);
972 if (error != NULL)
973 fprintf(outfile, "Failed to study: %s\n", error);
974 else if (extra != NULL)
975 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
976 }
977
978 /* If the 'F' option was present, we flip the bytes of all the integer
979 fields in the regex data block and the study block. This is to make it
980 possible to test PCRE's handling of byte-flipped patterns, e.g. those
981 compiled on a different architecture. */
982
983 if (do_flip)
984 {
985 real_pcre *rre = (real_pcre *)re;
986 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
987 rre->size = byteflip(rre->size, sizeof(rre->size));
988 rre->options = byteflip(rre->options, sizeof(rre->options));
989 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
990 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
991 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
992 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
993 rre->name_table_offset = byteflip(rre->name_table_offset,
994 sizeof(rre->name_table_offset));
995 rre->name_entry_size = byteflip(rre->name_entry_size,
996 sizeof(rre->name_entry_size));
997 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
998
999 if (extra != NULL)
1000 {
1001 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1002 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1003 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1004 }
1005 }
1006
1007 /* Extract information from the compiled data if required */
1008
1009 SHOW_INFO:
1010
1011 if (do_showinfo)
1012 {
1013 unsigned long int get_options, all_options;
1014 int old_first_char, old_options, old_count;
1015 int count, backrefmax, first_char, need_char;
1016 int nameentrysize, namecount;
1017 const uschar *nametable;
1018
1019 if (do_debug)
1020 {
1021 fprintf(outfile, "------------------------------------------------------------------\n");
1022 print_internals(re, outfile);
1023 }
1024
1025 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1026 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1027 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1028 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1029 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1030 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1031 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1032 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1033 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1034
1035 old_count = pcre_info(re, &old_options, &old_first_char);
1036 if (count < 0) fprintf(outfile,
1037 "Error %d from pcre_info()\n", count);
1038 else
1039 {
1040 if (old_count != count) fprintf(outfile,
1041 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1042 old_count);
1043
1044 if (old_first_char != first_char) fprintf(outfile,
1045 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1046 first_char, old_first_char);
1047
1048 if (old_options != (int)get_options) fprintf(outfile,
1049 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1050 get_options, old_options);
1051 }
1052
1053 if (size != regex_gotten_store) fprintf(outfile,
1054 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1055 (int)size, (int)regex_gotten_store);
1056
1057 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1058 if (backrefmax > 0)
1059 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1060
1061 if (namecount > 0)
1062 {
1063 fprintf(outfile, "Named capturing subpatterns:\n");
1064 while (namecount-- > 0)
1065 {
1066 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1067 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1068 GET2(nametable, 0));
1069 nametable += nameentrysize;
1070 }
1071 }
1072
1073 /* The NOPARTIAL bit is a private bit in the options, so we have
1074 to fish it out via out back door */
1075
1076 all_options = ((real_pcre *)re)->options;
1077 if (do_flip)
1078 {
1079 all_options = byteflip(all_options, sizeof(all_options));
1080 }
1081
1082 if ((all_options & PCRE_NOPARTIAL) != 0)
1083 fprintf(outfile, "Partial matching not supported\n");
1084
1085 if (get_options == 0) fprintf(outfile, "No options\n");
1086 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",
1087 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1088 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1089 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1090 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1091 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1092 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1093 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1094 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1095 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1096 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1097
1098 if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1099 fprintf(outfile, "Case state changes\n");
1100
1101 if (first_char == -1)
1102 {
1103 fprintf(outfile, "First char at start or follows \\n\n");
1104 }
1105 else if (first_char < 0)
1106 {
1107 fprintf(outfile, "No first char\n");
1108 }
1109 else
1110 {
1111 int ch = first_char & 255;
1112 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1113 "" : " (caseless)";
1114 if (isprint(ch))
1115 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1116 else
1117 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1118 }
1119
1120 if (need_char < 0)
1121 {
1122 fprintf(outfile, "No need char\n");
1123 }
1124 else
1125 {
1126 int ch = need_char & 255;
1127 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1128 "" : " (caseless)";
1129 if (isprint(ch))
1130 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1131 else
1132 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1133 }
1134
1135 /* Don't output study size; at present it is in any case a fixed
1136 value, but it varies, depending on the computer architecture, and
1137 so messes up the test suite. (And with the /F option, it might be
1138 flipped.) */
1139
1140 if (do_study)
1141 {
1142 if (extra == NULL)
1143 fprintf(outfile, "Study returned NULL\n");
1144 else
1145 {
1146 uschar *start_bits = NULL;
1147 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1148
1149 if (start_bits == NULL)
1150 fprintf(outfile, "No starting byte set\n");
1151 else
1152 {
1153 int i;
1154 int c = 24;
1155 fprintf(outfile, "Starting byte set: ");
1156 for (i = 0; i < 256; i++)
1157 {
1158 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1159 {
1160 if (c > 75)
1161 {
1162 fprintf(outfile, "\n ");
1163 c = 2;
1164 }
1165 if (isprint(i) && i != ' ')
1166 {
1167 fprintf(outfile, "%c ", i);
1168 c += 2;
1169 }
1170 else
1171 {
1172 fprintf(outfile, "\\x%02x ", i);
1173 c += 5;
1174 }
1175 }
1176 }
1177 fprintf(outfile, "\n");
1178 }
1179 }
1180 }
1181 }
1182
1183 /* If the '>' option was present, we write out the regex to a file, and
1184 that is all. The first 8 bytes of the file are the regex length and then
1185 the study length, in big-endian order. */
1186
1187 if (to_file != NULL)
1188 {
1189 FILE *f = fopen((char *)to_file, "wb");
1190 if (f == NULL)
1191 {
1192 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1193 }
1194 else
1195 {
1196 uschar sbuf[8];
1197 sbuf[0] = (true_size >> 24) & 255;
1198 sbuf[1] = (true_size >> 16) & 255;
1199 sbuf[2] = (true_size >> 8) & 255;
1200 sbuf[3] = (true_size) & 255;
1201
1202 sbuf[4] = (true_study_size >> 24) & 255;
1203 sbuf[5] = (true_study_size >> 16) & 255;
1204 sbuf[6] = (true_study_size >> 8) & 255;
1205 sbuf[7] = (true_study_size) & 255;
1206
1207 if (fwrite(sbuf, 1, 8, f) < 8 ||
1208 fwrite(re, 1, true_size, f) < true_size)
1209 {
1210 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1211 }
1212 else
1213 {
1214 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1215 if (extra != NULL)
1216 {
1217 if (fwrite(extra->study_data, 1, true_study_size, f) <
1218 true_study_size)
1219 {
1220 fprintf(outfile, "Write error on %s: %s\n", to_file,
1221 strerror(errno));
1222 }
1223 else fprintf(outfile, "Study data written to %s\n", to_file);
1224 }
1225 }
1226 fclose(f);
1227 }
1228 continue; /* With next regex */
1229 }
1230 } /* End of non-POSIX compile */
1231
1232 /* Read data lines and test them */
1233
1234 for (;;)
1235 {
1236 unsigned char *q;
1237 unsigned char *bptr = dbuffer;
1238 int *use_offsets = offsets;
1239 int use_size_offsets = size_offsets;
1240 int callout_data = 0;
1241 int callout_data_set = 0;
1242 int count, c;
1243 int copystrings = 0;
1244 int find_match_limit = 0;
1245 int getstrings = 0;
1246 int getlist = 0;
1247 int gmatched = 0;
1248 int start_offset = 0;
1249 int g_notempty = 0;
1250
1251 options = 0;
1252
1253 pcre_callout = callout;
1254 first_callout = 1;
1255 callout_extra = 0;
1256 callout_count = 0;
1257 callout_fail_count = 999999;
1258 callout_fail_id = -1;
1259 show_malloc = 0;
1260
1261 if (infile == stdin) printf("data> ");
1262 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1263 {
1264 done = 1;
1265 goto CONTINUE;
1266 }
1267 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1268
1269 len = (int)strlen((char *)buffer);
1270 while (len > 0 && isspace(buffer[len-1])) len--;
1271 buffer[len] = 0;
1272 if (len == 0) break;
1273
1274 p = buffer;
1275 while (isspace(*p)) p++;
1276
1277 q = dbuffer;
1278 while ((c = *p++) != 0)
1279 {
1280 int i = 0;
1281 int n = 0;
1282
1283 if (c == '\\') switch ((c = *p++))
1284 {
1285 case 'a': c = 7; break;
1286 case 'b': c = '\b'; break;
1287 case 'e': c = 27; break;
1288 case 'f': c = '\f'; break;
1289 case 'n': c = '\n'; break;
1290 case 'r': c = '\r'; break;
1291 case 't': c = '\t'; break;
1292 case 'v': c = '\v'; break;
1293
1294 case '0': case '1': case '2': case '3':
1295 case '4': case '5': case '6': case '7':
1296 c -= '0';
1297 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1298 c = c * 8 + *p++ - '0';
1299 break;
1300
1301 case 'x':
1302
1303 /* Handle \x{..} specially - new Perl thing for utf8 */
1304
1305 if (*p == '{')
1306 {
1307 unsigned char *pt = p;
1308 c = 0;
1309 while (isxdigit(*(++pt)))
1310 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1311 if (*pt == '}')
1312 {
1313 unsigned char buff8[8];
1314 int ii, utn;
1315 utn = ord2utf8(c, buff8);
1316 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1317 c = buff8[ii]; /* Last byte */
1318 p = pt + 1;
1319 break;
1320 }
1321 /* Not correct form; fall through */
1322 }
1323
1324 /* Ordinary \x */
1325
1326 c = 0;
1327 while (i++ < 2 && isxdigit(*p))
1328 {
1329 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1330 p++;
1331 }
1332 break;
1333
1334 case 0: /* \ followed by EOF allows for an empty line */
1335 p--;
1336 continue;
1337
1338 case '>':
1339 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1340 continue;
1341
1342 case 'A': /* Option setting */
1343 options |= PCRE_ANCHORED;
1344 continue;
1345
1346 case 'B':
1347 options |= PCRE_NOTBOL;
1348 continue;
1349
1350 case 'C':
1351 if (isdigit(*p)) /* Set copy string */
1352 {
1353 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1354 copystrings |= 1 << n;
1355 }
1356 else if (isalnum(*p))
1357 {
1358 uschar name[256];
1359 uschar *npp = name;
1360 while (isalnum(*p)) *npp++ = *p++;
1361 *npp = 0;
1362 n = pcre_get_stringnumber(re, (char *)name);
1363 if (n < 0)
1364 fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1365 else copystrings |= 1 << n;
1366 }
1367 else if (*p == '+')
1368 {
1369 callout_extra = 1;
1370 p++;
1371 }
1372 else if (*p == '-')
1373 {
1374 pcre_callout = NULL;
1375 p++;
1376 }
1377 else if (*p == '!')
1378 {
1379 callout_fail_id = 0;
1380 p++;
1381 while(isdigit(*p))
1382 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1383 callout_fail_count = 0;
1384 if (*p == '!')
1385 {
1386 p++;
1387 while(isdigit(*p))
1388 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1389 }
1390 }
1391 else if (*p == '*')
1392 {
1393 int sign = 1;
1394 callout_data = 0;
1395 if (*(++p) == '-') { sign = -1; p++; }
1396 while(isdigit(*p))
1397 callout_data = callout_data * 10 + *p++ - '0';
1398 callout_data *= sign;
1399 callout_data_set = 1;
1400 }
1401 continue;
1402
1403 case 'G':
1404 if (isdigit(*p))
1405 {
1406 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1407 getstrings |= 1 << n;
1408 }
1409 else if (isalnum(*p))
1410 {
1411 uschar name[256];
1412 uschar *npp = name;
1413 while (isalnum(*p)) *npp++ = *p++;
1414 *npp = 0;
1415 n = pcre_get_stringnumber(re, (char *)name);
1416 if (n < 0)
1417 fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1418 else getstrings |= 1 << n;
1419 }
1420 continue;
1421
1422 case 'L':
1423 getlist = 1;
1424 continue;
1425
1426 case 'M':
1427 find_match_limit = 1;
1428 continue;
1429
1430 case 'N':
1431 options |= PCRE_NOTEMPTY;
1432 continue;
1433
1434 case 'O':
1435 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1436 if (n > size_offsets_max)
1437 {
1438 size_offsets_max = n;
1439 free(offsets);
1440 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1441 if (offsets == NULL)
1442 {
1443 printf("** Failed to get %d bytes of memory for offsets vector\n",
1444 size_offsets_max * sizeof(int));
1445 return 1;
1446 }
1447 }
1448 use_size_offsets = n;
1449 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1450 continue;
1451
1452 case 'P':
1453 options |= PCRE_PARTIAL;
1454 continue;
1455
1456 case 'S':
1457 show_malloc = 1;
1458 continue;
1459
1460 case 'Z':
1461 options |= PCRE_NOTEOL;
1462 continue;
1463
1464 case '?':
1465 options |= PCRE_NO_UTF8_CHECK;
1466 continue;
1467 }
1468 *q++ = c;
1469 }
1470 *q = 0;
1471 len = q - dbuffer;
1472
1473 /* Handle matching via the POSIX interface, which does not
1474 support timing or playing with the match limit or callout data. */
1475
1476#if !defined NOPOSIX
1477 if (posix || do_posix)
1478 {
1479 int rc;
1480 int eflags = 0;
1481 regmatch_t *pmatch = NULL;
1482 if (use_size_offsets > 0)
1483 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1484 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1485 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1486
1487 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1488
1489 if (rc != 0)
1490 {
1491 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1492 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1493 }
1494 else
1495 {
1496 size_t i;
1497 for (i = 0; i < (size_t)use_size_offsets; i++)
1498 {
1499 if (pmatch[i].rm_so >= 0)
1500 {
1501 fprintf(outfile, "%2d: ", (int)i);
1502 (void)pchars(dbuffer + pmatch[i].rm_so,
1503 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1504 fprintf(outfile, "\n");
1505 if (i == 0 && do_showrest)
1506 {
1507 fprintf(outfile, " 0+ ");
1508 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1509 outfile);
1510 fprintf(outfile, "\n");
1511 }
1512 }
1513 }
1514 }
1515 free(pmatch);
1516 }
1517
1518 /* Handle matching via the native interface - repeats for /g and /G */
1519
1520 else
1521#endif /* !defined NOPOSIX */
1522
1523 for (;; gmatched++) /* Loop for /g or /G */
1524 {
1525 if (timeit)
1526 {
1527 register int i;
1528 clock_t time_taken;
1529 clock_t start_time = clock();
1530 for (i = 0; i < LOOPREPEAT; i++)
1531 count = pcre_exec(re, extra, (char *)bptr, len,
1532 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1533 time_taken = clock() - start_time;
1534 fprintf(outfile, "Execute time %.3f milliseconds\n",
1535 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1536 (double)CLOCKS_PER_SEC);
1537 }
1538
1539 /* If find_match_limit is set, we want to do repeated matches with
1540 varying limits in order to find the minimum value. */
1541
1542 if (find_match_limit)
1543 {
1544 int min = 0;
1545 int mid = 64;
1546 int max = -1;
1547
1548 if (extra == NULL)
1549 {
1550 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1551 extra->flags = 0;
1552 }
1553 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1554
1555 for (;;)
1556 {
1557 extra->match_limit = mid;
1558 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1559 options | g_notempty, use_offsets, use_size_offsets);
1560 if (count == PCRE_ERROR_MATCHLIMIT)
1561 {
1562 /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1563 min = mid;
1564 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1565 }
1566 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1567 count == PCRE_ERROR_PARTIAL)
1568 {
1569 if (mid == min + 1)
1570 {
1571 fprintf(outfile, "Minimum match limit = %d\n", mid);
1572 break;
1573 }
1574 /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1575 max = mid;
1576 mid = (min + mid)/2;
1577 }
1578 else break; /* Some other error */
1579 }
1580
1581 extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1582 }
1583
1584 /* If callout_data is set, use the interface with additional data */
1585
1586 else if (callout_data_set)
1587 {
1588 if (extra == NULL)
1589 {
1590 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1591 extra->flags = 0;
1592 }
1593 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1594 extra->callout_data = &callout_data;
1595 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1596 options | g_notempty, use_offsets, use_size_offsets);
1597 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1598 }
1599
1600 /* The normal case is just to do the match once, with the default
1601 value of match_limit. */
1602
1603 else
1604 {
1605 count = pcre_exec(re, extra, (char *)bptr, len,
1606 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1607 }
1608
1609 if (count == 0)
1610 {
1611 fprintf(outfile, "Matched, but too many substrings\n");
1612 count = use_size_offsets/3;
1613 }
1614
1615 /* Matched */
1616
1617 if (count >= 0)
1618 {
1619 int i;
1620 for (i = 0; i < count * 2; i += 2)
1621 {
1622 if (use_offsets[i] < 0)
1623 fprintf(outfile, "%2d: <unset>\n", i/2);
1624 else
1625 {
1626 fprintf(outfile, "%2d: ", i/2);
1627 (void)pchars(bptr + use_offsets[i],
1628 use_offsets[i+1] - use_offsets[i], outfile);
1629 fprintf(outfile, "\n");
1630 if (i == 0)
1631 {
1632 if (do_showrest)
1633 {
1634 fprintf(outfile, " 0+ ");
1635 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1636 outfile);
1637 fprintf(outfile, "\n");
1638 }
1639 }
1640 }
1641 }
1642
1643 for (i = 0; i < 32; i++)
1644 {
1645 if ((copystrings & (1 << i)) != 0)
1646 {
1647 char copybuffer[16];
1648 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1649 i, copybuffer, sizeof(copybuffer));
1650 if (rc < 0)
1651 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1652 else
1653 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1654 }
1655 }
1656
1657 for (i = 0; i < 32; i++)
1658 {
1659 if ((getstrings & (1 << i)) != 0)
1660 {
1661 const char *substring;
1662 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1663 i, &substring);
1664 if (rc < 0)
1665 fprintf(outfile, "get substring %d failed %d\n", i, rc);
1666 else
1667 {
1668 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1669 /* free((void *)substring); */
1670 pcre_free_substring(substring);
1671 }
1672 }
1673 }
1674
1675 if (getlist)
1676 {
1677 const char **stringlist;
1678 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1679 &stringlist);
1680 if (rc < 0)
1681 fprintf(outfile, "get substring list failed %d\n", rc);
1682 else
1683 {
1684 for (i = 0; i < count; i++)
1685 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1686 if (stringlist[i] != NULL)
1687 fprintf(outfile, "string list not terminated by NULL\n");
1688 /* free((void *)stringlist); */
1689 pcre_free_substring_list(stringlist);
1690 }
1691 }
1692 }
1693
1694 /* There was a partial match */
1695
1696 else if (count == PCRE_ERROR_PARTIAL)
1697 {
1698 fprintf(outfile, "Partial match\n");
1699 break; /* Out of the /g loop */
1700 }
1701
1702 /* Failed to match. If this is a /g or /G loop and we previously set
1703 g_notempty after a null match, this is not necessarily the end.
1704 We want to advance the start offset, and continue. In the case of UTF-8
1705 matching, the advance must be one character, not one byte. Fudge the
1706 offset values to achieve this. We won't be at the end of the string -
1707 that was checked before setting g_notempty. */
1708
1709 else
1710 {
1711 if (g_notempty != 0)
1712 {
1713 int onechar = 1;
1714 use_offsets[0] = start_offset;
1715 if (use_utf8)
1716 {
1717 while (start_offset + onechar < len)
1718 {
1719 int tb = bptr[start_offset+onechar];
1720 if (tb <= 127) break;
1721 tb &= 0xc0;
1722 if (tb != 0 && tb != 0xc0) onechar++;
1723 }
1724 }
1725 use_offsets[1] = start_offset + onechar;
1726 }
1727 else
1728 {
1729 if (count == PCRE_ERROR_NOMATCH)
1730 {
1731 if (gmatched == 0) fprintf(outfile, "No match\n");
1732 }
1733 else fprintf(outfile, "Error %d\n", count);
1734 break; /* Out of the /g loop */
1735 }
1736 }
1737
1738 /* If not /g or /G we are done */
1739
1740 if (!do_g && !do_G) break;
1741
1742 /* If we have matched an empty string, first check to see if we are at
1743 the end of the subject. If so, the /g loop is over. Otherwise, mimic
1744 what Perl's /g options does. This turns out to be rather cunning. First
1745 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1746 same point. If this fails (picked up above) we advance to the next
1747 character. */
1748
1749 g_notempty = 0;
1750 if (use_offsets[0] == use_offsets[1])
1751 {
1752 if (use_offsets[0] == len) break;
1753 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1754 }
1755
1756 /* For /g, update the start offset, leaving the rest alone */
1757
1758 if (do_g) start_offset = use_offsets[1];
1759
1760 /* For /G, update the pointer and length */
1761
1762 else
1763 {
1764 bptr += use_offsets[1];
1765 len -= use_offsets[1];
1766 }
1767 } /* End of loop for /g and /G */
1768 } /* End of loop for data lines */
1769
1770 CONTINUE:
1771
1772#if !defined NOPOSIX
1773 if (posix || do_posix) regfree(&preg);
1774#endif
1775
1776 if (re != NULL) free(re);
1777 if (extra != NULL) free(extra);
1778 if (tables != NULL)
1779 {
1780 free((void *)tables);
1781 setlocale(LC_CTYPE, "C");
1782 }
1783 }
1784
1785if (infile == stdin) fprintf(outfile, "\n");
1786return 0;
1787}
1788
1789/* End */