Installed PCRE release 7.0.
[exim.git] / src / src / pcre / pcretest.c
CommitLineData
6bf342e1 1/* $Cambridge: exim/src/src/pcre/pcretest.c,v 1.6 2007/01/23 15:08:45 ph10 Exp $ */
8ac170f3 2
c86f6258
PH
3/*************************************************
4* PCRE testing program *
5*************************************************/
6
7/* This program was hacked up as a tester for PCRE. I really should have
8written it more tidily in the first place. Will I ever learn? It has grown and
8ac170f3 9been extended and consequently is now rather, er, *very* untidy in places.
c86f6258
PH
10
11-----------------------------------------------------------------------------
12Redistribution and use in source and binary forms, with or without
13modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36POSSIBILITY OF SUCH DAMAGE.
37-----------------------------------------------------------------------------
38*/
39
40
41#include <ctype.h>
42#include <stdio.h>
43#include <string.h>
44#include <stdlib.h>
45#include <time.h>
46#include <locale.h>
47#include <errno.h>
48
6bf342e1
PH
49
50/* A number of things vary for Windows builds. Originally, pcretest opened its
51input and output without "b"; then I was told that "b" was needed in some
52environments, so it was added for release 5.0 to both the input and output. (It
53makes no difference on Unix-like systems.) Later I was told that it is wrong
54for the input on Windows. I've now abstracted the modes into two macros that
55are set here, to make it easier to fiddle with them, and removed "b" from the
56input mode under Windows. */
57
58#if defined(_WIN32) || defined(WIN32)
59#include <io.h> /* For _setmode() */
60#include <fcntl.h> /* For _O_BINARY */
61#define INPUT_MODE "r"
62#define OUTPUT_MODE "wb"
63
64#else
65#include <sys/time.h> /* These two includes are needed */
66#include <sys/resource.h> /* for setrlimit(). */
67#define INPUT_MODE "rb"
68#define OUTPUT_MODE "wb"
aa41d2de
PH
69#endif
70
6bf342e1 71
c86f6258 72#define PCRE_SPY /* For Win32 build, import data, not export */
8ac170f3 73
aa41d2de
PH
74/* We include pcre_internal.h because we need the internal info for displaying
75the results of pcre_study() and we also need to know about the internal
76macros, structures, and other internal data values; pcretest has "inside
77information" compared to a program that strictly follows the PCRE API. */
8ac170f3
PH
78
79#include "pcre_internal.h"
80
aa41d2de
PH
81/* We need access to the data tables that PCRE uses. So as not to have to keep
82two copies, we include the source file here, changing the names of the external
83symbols to prevent clashes. */
84
85#define _pcre_utf8_table1 utf8_table1
86#define _pcre_utf8_table1_size utf8_table1_size
87#define _pcre_utf8_table2 utf8_table2
88#define _pcre_utf8_table3 utf8_table3
89#define _pcre_utf8_table4 utf8_table4
90#define _pcre_utt utt
91#define _pcre_utt_size utt_size
92#define _pcre_OP_lengths OP_lengths
93
94#include "pcre_tables.c"
95
96/* We also need the pcre_printint() function for printing out compiled
97patterns. This function is in a separate file so that it can be included in
6bf342e1
PH
98pcre_compile.c when that module is compiled with debugging enabled.
99
100The definition of the macro PRINTABLE, which determines whether to print an
101output character as-is or as a hex value when showing compiled patterns, is
102contained in this file. We uses it here also, in cases when the locale has not
103been explicitly changed, so as to get consistent output from systems that
104differ in their output from isprint() even in the "C" locale. */
aa41d2de
PH
105
106#include "pcre_printint.src"
107
6bf342e1
PH
108#define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
109
c86f6258
PH
110
111/* It is possible to compile this test program without including support for
112testing the POSIX interface, though this is not available via the standard
113Makefile. */
114
115#if !defined NOPOSIX
116#include "pcreposix.h"
117#endif
118
92e772ff
PH
119/* It is also possible, for the benefit of the version imported into Exim, to
120build pcretest without support for UTF8 (define NOUTF8), without the interface
8ac170f3
PH
121to the DFA matcher (NODFA), and without the doublecheck of the old "info"
122function (define NOINFOCHECK). */
123
124
aa41d2de
PH
125/* Other parameters */
126
c86f6258
PH
127#ifndef CLOCKS_PER_SEC
128#ifdef CLK_TCK
129#define CLOCKS_PER_SEC CLK_TCK
130#else
131#define CLOCKS_PER_SEC 100
132#endif
133#endif
134
6bf342e1
PH
135/* This is the default loop count for timing. */
136
c86f6258
PH
137#define LOOPREPEAT 500000
138
aa41d2de 139/* Static variables */
c86f6258
PH
140
141static FILE *outfile;
142static int log_store = 0;
143static int callout_count;
144static int callout_extra;
145static int callout_fail_count;
146static int callout_fail_id;
147static int first_callout;
6bf342e1 148static int locale_set = 0;
c86f6258
PH
149static int show_malloc;
150static int use_utf8;
151static size_t gotten_store;
152
aa41d2de
PH
153/* The buffers grow automatically if very long input lines are encountered. */
154
155static int buffer_size = 50000;
156static uschar *buffer = NULL;
157static uschar *dbuffer = NULL;
c86f6258
PH
158static uschar *pbuffer = NULL;
159
160
c86f6258 161
aa41d2de
PH
162/*************************************************
163* Read or extend an input line *
164*************************************************/
165
166/* Input lines are read into buffer, but both patterns and data lines can be
167continued over multiple input lines. In addition, if the buffer fills up, we
168want to automatically expand it so as to be able to handle extremely large
169lines that are needed for certain stress tests. When the input buffer is
170expanded, the other two buffers must also be expanded likewise, and the
171contents of pbuffer, which are a copy of the input for callouts, must be
172preserved (for when expansion happens for a data line). This is not the most
173optimal way of handling this, but hey, this is just a test program!
174
175Arguments:
176 f the file to read
177 start where in buffer to start (this *must* be within buffer)
178
179Returns: pointer to the start of new data
180 could be a copy of start, or could be moved
181 NULL if no data read and EOF reached
182*/
183
184static uschar *
185extend_inputline(FILE *f, uschar *start)
186{
187uschar *here = start;
188
189for (;;)
190 {
191 int rlen = buffer_size - (here - buffer);
6bf342e1 192
aa41d2de
PH
193 if (rlen > 1000)
194 {
195 int dlen;
196 if (fgets((char *)here, rlen, f) == NULL)
197 return (here == start)? NULL : start;
198 dlen = (int)strlen((char *)here);
199 if (dlen > 0 && here[dlen - 1] == '\n') return start;
200 here += dlen;
201 }
202
203 else
204 {
205 int new_buffer_size = 2*buffer_size;
206 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
207 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
208 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
209
210 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
211 {
212 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
213 exit(1);
214 }
215
216 memcpy(new_buffer, buffer, buffer_size);
217 memcpy(new_pbuffer, pbuffer, buffer_size);
218
219 buffer_size = new_buffer_size;
220
221 start = new_buffer + (start - buffer);
222 here = new_buffer + (here - buffer);
223
224 free(buffer);
225 free(dbuffer);
226 free(pbuffer);
227
228 buffer = new_buffer;
229 dbuffer = new_dbuffer;
230 pbuffer = new_pbuffer;
231 }
232 }
233
234return NULL; /* Control never gets here */
235}
236
237
238
239
240
241
242
c86f6258
PH
243/*************************************************
244* Read number from string *
245*************************************************/
246
247/* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
248around with conditional compilation, just do the job by hand. It is only used
6bf342e1 249for unpicking arguments, so just keep it simple.
c86f6258
PH
250
251Arguments:
252 str string to be converted
253 endptr where to put the end pointer
254
255Returns: the unsigned long
256*/
257
258static int
259get_value(unsigned char *str, unsigned char **endptr)
260{
261int result = 0;
262while(*str != 0 && isspace(*str)) str++;
263while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
264*endptr = str;
265return(result);
266}
267
268
269
c86f6258
PH
270
271/*************************************************
272* Convert UTF-8 string to value *
273*************************************************/
274
275/* This function takes one or more bytes that represents a UTF-8 character,
276and returns the value of the character.
277
278Argument:
aa41d2de
PH
279 utf8bytes a pointer to the byte vector
280 vptr a pointer to an int to receive the value
c86f6258 281
aa41d2de
PH
282Returns: > 0 => the number of bytes consumed
283 -6 to 0 => malformed UTF-8 character at offset = (-return)
c86f6258
PH
284*/
285
8ac170f3
PH
286#if !defined NOUTF8
287
c86f6258 288static int
aa41d2de 289utf82ord(unsigned char *utf8bytes, int *vptr)
c86f6258 290{
aa41d2de 291int c = *utf8bytes++;
c86f6258
PH
292int d = c;
293int i, j, s;
294
295for (i = -1; i < 6; i++) /* i is number of additional bytes */
296 {
297 if ((d & 0x80) == 0) break;
298 d <<= 1;
299 }
300
301if (i == -1) { *vptr = c; return 1; } /* ascii character */
302if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
303
304/* i now has a value in the range 1-5 */
305
306s = 6*i;
aa41d2de 307d = (c & utf8_table3[i]) << s;
c86f6258
PH
308
309for (j = 0; j < i; j++)
310 {
aa41d2de 311 c = *utf8bytes++;
c86f6258
PH
312 if ((c & 0xc0) != 0x80) return -(j+1);
313 s -= 6;
314 d |= (c & 0x3f) << s;
315 }
316
317/* Check that encoding was the correct unique one */
318
aa41d2de
PH
319for (j = 0; j < utf8_table1_size; j++)
320 if (d <= utf8_table1[j]) break;
c86f6258
PH
321if (j != i) return -(i+1);
322
323/* Valid value */
324
325*vptr = d;
326return i+1;
327}
328
8ac170f3
PH
329#endif
330
c86f6258
PH
331
332
aa41d2de
PH
333/*************************************************
334* Convert character value to UTF-8 *
335*************************************************/
336
337/* This function takes an integer value in the range 0 - 0x7fffffff
338and encodes it as a UTF-8 character in 0 to 6 bytes.
339
340Arguments:
341 cvalue the character value
342 utf8bytes pointer to buffer for result - at least 6 bytes long
343
344Returns: number of characters placed in the buffer
345*/
346
347#if !defined NOUTF8
348
349static int
350ord2utf8(int cvalue, uschar *utf8bytes)
351{
352register int i, j;
353for (i = 0; i < utf8_table1_size; i++)
354 if (cvalue <= utf8_table1[i]) break;
355utf8bytes += i;
356for (j = i; j > 0; j--)
357 {
358 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
359 cvalue >>= 6;
360 }
361*utf8bytes = utf8_table2[i] | cvalue;
362return i + 1;
363}
364
365#endif
366
367
368
c86f6258
PH
369/*************************************************
370* Print character string *
371*************************************************/
372
373/* Character string printing function. Must handle UTF-8 strings in utf8
374mode. Yields number of characters printed. If handed a NULL file, just counts
375chars without printing. */
376
377static int pchars(unsigned char *p, int length, FILE *f)
378{
aa41d2de 379int c = 0;
c86f6258
PH
380int yield = 0;
381
382while (length-- > 0)
383 {
92e772ff 384#if !defined NOUTF8
c86f6258
PH
385 if (use_utf8)
386 {
387 int rc = utf82ord(p, &c);
388
389 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
390 {
391 length -= rc - 1;
392 p += rc;
6bf342e1 393 if (PRINTHEX(c))
c86f6258
PH
394 {
395 if (f != NULL) fprintf(f, "%c", c);
396 yield++;
397 }
398 else
399 {
6bf342e1
PH
400 int n = 4;
401 if (f != NULL) fprintf(f, "\\x{%02x}", c);
402 yield += (n <= 0x000000ff)? 2 :
403 (n <= 0x00000fff)? 3 :
404 (n <= 0x0000ffff)? 4 :
405 (n <= 0x000fffff)? 5 : 6;
c86f6258
PH
406 }
407 continue;
408 }
409 }
8ac170f3 410#endif
c86f6258
PH
411
412 /* Not UTF-8, or malformed UTF-8 */
413
6bf342e1
PH
414 c = *p++;
415 if (PRINTHEX(c))
c86f6258
PH
416 {
417 if (f != NULL) fprintf(f, "%c", c);
418 yield++;
419 }
420 else
421 {
422 if (f != NULL) fprintf(f, "\\x%02x", c);
423 yield += 4;
424 }
425 }
426
427return yield;
428}
429
430
431
432/*************************************************
433* Callout function *
434*************************************************/
435
436/* Called from PCRE as a result of the (?C) item. We print out where we are in
437the match. Yield zero unless more callouts than the fail count, or the callout
438data is not zero. */
439
440static int callout(pcre_callout_block *cb)
441{
442FILE *f = (first_callout | callout_extra)? outfile : NULL;
443int i, pre_start, post_start, subject_length;
444
445if (callout_extra)
446 {
447 fprintf(f, "Callout %d: last capture = %d\n",
448 cb->callout_number, cb->capture_last);
449
450 for (i = 0; i < cb->capture_top * 2; i += 2)
451 {
452 if (cb->offset_vector[i] < 0)
453 fprintf(f, "%2d: <unset>\n", i/2);
454 else
455 {
456 fprintf(f, "%2d: ", i/2);
457 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
458 cb->offset_vector[i+1] - cb->offset_vector[i], f);
459 fprintf(f, "\n");
460 }
461 }
462 }
463
464/* Re-print the subject in canonical form, the first time or if giving full
465datails. On subsequent calls in the same match, we use pchars just to find the
466printed lengths of the substrings. */
467
468if (f != NULL) fprintf(f, "--->");
469
470pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
471post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
472 cb->current_position - cb->start_match, f);
473
474subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
475
476(void)pchars((unsigned char *)(cb->subject + cb->current_position),
477 cb->subject_length - cb->current_position, f);
478
479if (f != NULL) fprintf(f, "\n");
480
481/* Always print appropriate indicators, with callout number if not already
482shown. For automatic callouts, show the pattern offset. */
483
484if (cb->callout_number == 255)
485 {
486 fprintf(outfile, "%+3d ", cb->pattern_position);
487 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
488 }
489else
490 {
491 if (callout_extra) fprintf(outfile, " ");
492 else fprintf(outfile, "%3d ", cb->callout_number);
493 }
494
495for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
496fprintf(outfile, "^");
497
498if (post_start > 0)
499 {
500 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
501 fprintf(outfile, "^");
502 }
503
504for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
505 fprintf(outfile, " ");
506
507fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
508 pbuffer + cb->pattern_position);
509
510fprintf(outfile, "\n");
511first_callout = 0;
512
513if (cb->callout_data != NULL)
514 {
515 int callout_data = *((int *)(cb->callout_data));
516 if (callout_data != 0)
517 {
518 fprintf(outfile, "Callout data = %d\n", callout_data);
519 return callout_data;
520 }
521 }
522
523return (cb->callout_number != callout_fail_id)? 0 :
524 (++callout_count >= callout_fail_count)? 1 : 0;
525}
526
527
528/*************************************************
529* Local malloc functions *
530*************************************************/
531
532/* Alternative malloc function, to test functionality and show the size of the
533compiled re. */
534
535static void *new_malloc(size_t size)
536{
537void *block = malloc(size);
538gotten_store = size;
539if (show_malloc)
540 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
541return block;
542}
543
544static void new_free(void *block)
545{
546if (show_malloc)
547 fprintf(outfile, "free %p\n", block);
548free(block);
549}
550
551
552/* For recursion malloc/free, to test stacking calls */
553
554static void *stack_malloc(size_t size)
555{
556void *block = malloc(size);
557if (show_malloc)
558 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
559return block;
560}
561
562static void stack_free(void *block)
563{
564if (show_malloc)
565 fprintf(outfile, "stack_free %p\n", block);
566free(block);
567}
568
569
570/*************************************************
571* Call pcre_fullinfo() *
572*************************************************/
573
574/* Get one piece of information from the pcre_fullinfo() function */
575
576static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
577{
578int rc;
579if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
580 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
581}
582
583
584
585/*************************************************
586* Byte flipping function *
587*************************************************/
588
aa41d2de
PH
589static unsigned long int
590byteflip(unsigned long int value, int n)
c86f6258
PH
591{
592if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
593return ((value & 0x000000ff) << 24) |
594 ((value & 0x0000ff00) << 8) |
595 ((value & 0x00ff0000) >> 8) |
596 ((value & 0xff000000) >> 24);
597}
598
599
600
601
aa41d2de
PH
602/*************************************************
603* Check match or recursion limit *
604*************************************************/
605
606static int
607check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
608 int start_offset, int options, int *use_offsets, int use_size_offsets,
609 int flag, unsigned long int *limit, int errnumber, const char *msg)
610{
611int count;
612int min = 0;
613int mid = 64;
614int max = -1;
615
616extra->flags |= flag;
617
618for (;;)
619 {
620 *limit = mid;
621
622 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
623 use_offsets, use_size_offsets);
624
625 if (count == errnumber)
626 {
627 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
628 min = mid;
629 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
630 }
631
632 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
633 count == PCRE_ERROR_PARTIAL)
634 {
635 if (mid == min + 1)
636 {
637 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
638 break;
639 }
640 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
641 max = mid;
642 mid = (min + mid)/2;
643 }
644 else break; /* Some other error */
645 }
646
647extra->flags &= ~flag;
648return count;
649}
650
651
652
653/*************************************************
654* Check newline indicator *
655*************************************************/
656
657/* This is used both at compile and run-time to check for <xxx> escapes, where
6bf342e1 658xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.
aa41d2de
PH
659
660Arguments:
661 p points after the leading '<'
662 f file for error message
663
664Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
665*/
666
667static int
668check_newline(uschar *p, FILE *f)
669{
670if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
671if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
672if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
6bf342e1 673if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
aa41d2de
PH
674fprintf(f, "Unknown newline type at: <%s\n", p);
675return 0;
676}
677
678
679
6bf342e1
PH
680/*************************************************
681* Usage function *
682*************************************************/
683
684static void
685usage(void)
686{
687printf("Usage: pcretest [options] [<input> [<output>]]\n");
688printf(" -b show compiled code (bytecode)\n");
689printf(" -C show PCRE compile-time options and exit\n");
690printf(" -d debug: show compiled code and information (-b and -i)\n");
691#if !defined NODFA
692printf(" -dfa force DFA matching for all subjects\n");
693#endif
694printf(" -help show usage information\n");
695printf(" -i show information about compiled patterns\n"
696 " -m output memory used information\n"
697 " -o <n> set size of offsets vector to <n>\n");
698#if !defined NOPOSIX
699printf(" -p use POSIX interface\n");
700#endif
701printf(" -q quiet: do not output PCRE version number at start\n");
702printf(" -S <n> set stack size to <n> megabytes\n");
703printf(" -s output store (memory) used information\n"
704 " -t time compilation and execution\n");
705printf(" -t <n> time compilation and execution, repeating <n> times\n");
706printf(" -tm time execution (matching) only\n");
707printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
708}
709
710
711
c86f6258
PH
712/*************************************************
713* Main Program *
714*************************************************/
715
716/* Read lines from named file or stdin and write to named file or stdout; lines
717consist of a regular expression, in delimiters and optionally followed by
718options, followed by a set of test data, terminated by an empty line. */
719
720int main(int argc, char **argv)
721{
722FILE *infile = stdin;
723int options = 0;
724int study_options = 0;
725int op = 1;
726int timeit = 0;
6bf342e1 727int timeitm = 0;
c86f6258
PH
728int showinfo = 0;
729int showstore = 0;
aa41d2de 730int quiet = 0;
c86f6258
PH
731int size_offsets = 45;
732int size_offsets_max;
8ac170f3 733int *offsets = NULL;
c86f6258
PH
734#if !defined NOPOSIX
735int posix = 0;
736#endif
737int debug = 0;
738int done = 0;
8ac170f3
PH
739int all_use_dfa = 0;
740int yield = 0;
aa41d2de
PH
741int stack_size;
742
743/* These vectors store, end-to-end, a list of captured substring names. Assume
744that 1024 is plenty long enough for the few names we'll be testing. */
745
746uschar copynames[1024];
747uschar getnames[1024];
c86f6258 748
aa41d2de
PH
749uschar *copynamesptr;
750uschar *getnamesptr;
c86f6258
PH
751
752/* Get buffers from malloc() so that Electric Fence will check their misuse
aa41d2de 753when I am debugging. They grow automatically when very long lines are read. */
c86f6258 754
aa41d2de
PH
755buffer = (unsigned char *)malloc(buffer_size);
756dbuffer = (unsigned char *)malloc(buffer_size);
757pbuffer = (unsigned char *)malloc(buffer_size);
c86f6258 758
6bf342e1 759/* The outfile variable is static so that new_malloc can use it. */
c86f6258
PH
760
761outfile = stdout;
762
6bf342e1
PH
763/* The following _setmode() stuff is some Windows magic that tells its runtime
764library to translate CRLF into a single LF character. At least, that's what
765I've been told: never having used Windows I take this all on trust. Originally
766it set 0x8000, but then I was advised that _O_BINARY was better. */
767
768#if defined(_WIN32) || defined(WIN32)
769_setmode( _fileno( stdout ), _O_BINARY );
770#endif
771
c86f6258
PH
772/* Scan options */
773
774while (argc > 1 && argv[op][0] == '-')
775 {
776 unsigned char *endptr;
777
778 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
779 showstore = 1;
aa41d2de 780 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
6bf342e1 781 else if (strcmp(argv[op], "-b") == 0) debug = 1;
c86f6258
PH
782 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
783 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
92e772ff 784#if !defined NODFA
8ac170f3
PH
785 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
786#endif
c86f6258
PH
787 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
788 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
789 *endptr == 0))
790 {
791 op++;
792 argc--;
793 }
6bf342e1
PH
794 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
795 {
796 int both = argv[op][2] == 0;
797 int temp;
798 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
799 *endptr == 0))
800 {
801 timeitm = temp;
802 op++;
803 argc--;
804 }
805 else timeitm = LOOPREPEAT;
806 if (both) timeit = timeitm;
807 }
aa41d2de
PH
808 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
809 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
810 *endptr == 0))
811 {
6bf342e1 812#if defined(_WIN32) || defined(WIN32)
aa41d2de
PH
813 printf("PCRE: -S not supported on this OS\n");
814 exit(1);
815#else
816 int rc;
817 struct rlimit rlim;
818 getrlimit(RLIMIT_STACK, &rlim);
819 rlim.rlim_cur = stack_size * 1024 * 1024;
820 rc = setrlimit(RLIMIT_STACK, &rlim);
821 if (rc != 0)
822 {
823 printf("PCRE: setrlimit() failed with error %d\n", rc);
824 exit(1);
825 }
826 op++;
827 argc--;
828#endif
829 }
c86f6258
PH
830#if !defined NOPOSIX
831 else if (strcmp(argv[op], "-p") == 0) posix = 1;
832#endif
833 else if (strcmp(argv[op], "-C") == 0)
834 {
835 int rc;
836 printf("PCRE version %s\n", pcre_version());
837 printf("Compiled with\n");
838 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
839 printf(" %sUTF-8 support\n", rc? "" : "No ");
840 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
841 printf(" %sUnicode properties support\n", rc? "" : "No ");
842 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
aa41d2de 843 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
6bf342e1
PH
844 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
845 (rc == -1)? "ANY" : "???");
c86f6258
PH
846 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
847 printf(" Internal link size = %d\n", rc);
848 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
849 printf(" POSIX malloc threshold = %d\n", rc);
850 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
851 printf(" Default match limit = %d\n", rc);
aa41d2de
PH
852 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
853 printf(" Default recursion depth limit = %d\n", rc);
c86f6258
PH
854 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
855 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
856 exit(0);
857 }
6bf342e1
PH
858 else if (strcmp(argv[op], "-help") == 0 ||
859 strcmp(argv[op], "--help") == 0)
860 {
861 usage();
862 goto EXIT;
863 }
c86f6258
PH
864 else
865 {
866 printf("** Unknown or malformed option %s\n", argv[op]);
6bf342e1 867 usage();
8ac170f3
PH
868 yield = 1;
869 goto EXIT;
c86f6258
PH
870 }
871 op++;
872 argc--;
873 }
874
875/* Get the store for the offsets vector, and remember what it was */
876
877size_offsets_max = size_offsets;
878offsets = (int *)malloc(size_offsets_max * sizeof(int));
879if (offsets == NULL)
880 {
881 printf("** Failed to get %d bytes of memory for offsets vector\n",
882 size_offsets_max * sizeof(int));
8ac170f3
PH
883 yield = 1;
884 goto EXIT;
c86f6258
PH
885 }
886
887/* Sort out the input and output files */
888
889if (argc > 1)
890 {
6bf342e1 891 infile = fopen(argv[op], INPUT_MODE);
c86f6258
PH
892 if (infile == NULL)
893 {
894 printf("** Failed to open %s\n", argv[op]);
8ac170f3
PH
895 yield = 1;
896 goto EXIT;
c86f6258
PH
897 }
898 }
899
900if (argc > 2)
901 {
6bf342e1 902 outfile = fopen(argv[op+1], OUTPUT_MODE);
c86f6258
PH
903 if (outfile == NULL)
904 {
905 printf("** Failed to open %s\n", argv[op+1]);
8ac170f3
PH
906 yield = 1;
907 goto EXIT;
c86f6258
PH
908 }
909 }
910
911/* Set alternative malloc function */
912
913pcre_malloc = new_malloc;
914pcre_free = new_free;
915pcre_stack_malloc = stack_malloc;
916pcre_stack_free = stack_free;
917
aa41d2de 918/* Heading line unless quiet, then prompt for first regex if stdin */
c86f6258 919
aa41d2de 920if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
c86f6258
PH
921
922/* Main loop */
923
924while (!done)
925 {
926 pcre *re = NULL;
927 pcre_extra *extra = NULL;
928
929#if !defined NOPOSIX /* There are still compilers that require no indent */
930 regex_t preg;
931 int do_posix = 0;
932#endif
933
934 const char *error;
935 unsigned char *p, *pp, *ppp;
936 unsigned char *to_file = NULL;
937 const unsigned char *tables = NULL;
938 unsigned long int true_size, true_study_size = 0;
939 size_t size, regex_gotten_store;
940 int do_study = 0;
941 int do_debug = debug;
942 int do_G = 0;
943 int do_g = 0;
944 int do_showinfo = showinfo;
945 int do_showrest = 0;
946 int do_flip = 0;
6bf342e1 947 int erroroffset, len, delimiter, poffset;
c86f6258
PH
948
949 use_utf8 = 0;
950
951 if (infile == stdin) printf(" re> ");
aa41d2de 952 if (extend_inputline(infile, buffer) == NULL) break;
c86f6258
PH
953 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
954 fflush(outfile);
955
956 p = buffer;
957 while (isspace(*p)) p++;
958 if (*p == 0) continue;
959
960 /* See if the pattern is to be loaded pre-compiled from a file. */
961
962 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
963 {
aa41d2de 964 unsigned long int magic, get_options;
c86f6258
PH
965 uschar sbuf[8];
966 FILE *f;
967
968 p++;
969 pp = p + (int)strlen((char *)p);
970 while (isspace(pp[-1])) pp--;
971 *pp = 0;
972
973 f = fopen((char *)p, "rb");
974 if (f == NULL)
975 {
976 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
977 continue;
978 }
979
980 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
981
982 true_size =
983 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
984 true_study_size =
985 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
986
987 re = (real_pcre *)new_malloc(true_size);
988 regex_gotten_store = gotten_store;
989
990 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
991
992 magic = ((real_pcre *)re)->magic_number;
993 if (magic != MAGIC_NUMBER)
994 {
995 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
996 {
997 do_flip = 1;
998 }
999 else
1000 {
1001 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1002 fclose(f);
1003 continue;
1004 }
1005 }
1006
1007 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1008 do_flip? " (byte-inverted)" : "", p);
1009
1010 /* Need to know if UTF-8 for printing data strings */
1011
aa41d2de
PH
1012 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1013 use_utf8 = (get_options & PCRE_UTF8) != 0;
c86f6258
PH
1014
1015 /* Now see if there is any following study data */
1016
1017 if (true_study_size != 0)
1018 {
1019 pcre_study_data *psd;
1020
1021 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1022 extra->flags = PCRE_EXTRA_STUDY_DATA;
1023
1024 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1025 extra->study_data = psd;
1026
1027 if (fread(psd, 1, true_study_size, f) != true_study_size)
1028 {
1029 FAIL_READ:
1030 fprintf(outfile, "Failed to read data from %s\n", p);
1031 if (extra != NULL) new_free(extra);
1032 if (re != NULL) new_free(re);
1033 fclose(f);
1034 continue;
1035 }
1036 fprintf(outfile, "Study data loaded from %s\n", p);
1037 do_study = 1; /* To get the data output if requested */
1038 }
1039 else fprintf(outfile, "No study data\n");
1040
1041 fclose(f);
1042 goto SHOW_INFO;
1043 }
1044
1045 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1046 the pattern; if is isn't complete, read more. */
1047
1048 delimiter = *p++;
1049
1050 if (isalnum(delimiter) || delimiter == '\\')
1051 {
1052 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1053 goto SKIP_DATA;
1054 }
1055
1056 pp = p;
6bf342e1 1057 poffset = p - buffer;
c86f6258
PH
1058
1059 for(;;)
1060 {
1061 while (*pp != 0)
1062 {
1063 if (*pp == '\\' && pp[1] != 0) pp++;
1064 else if (*pp == delimiter) break;
1065 pp++;
1066 }
1067 if (*pp != 0) break;
c86f6258 1068 if (infile == stdin) printf(" > ");
aa41d2de 1069 if ((pp = extend_inputline(infile, pp)) == NULL)
c86f6258
PH
1070 {
1071 fprintf(outfile, "** Unexpected EOF\n");
1072 done = 1;
1073 goto CONTINUE;
1074 }
1075 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1076 }
1077
6bf342e1
PH
1078 /* The buffer may have moved while being extended; reset the start of data
1079 pointer to the correct relative point in the buffer. */
1080
1081 p = buffer + poffset;
1082
c86f6258
PH
1083 /* If the first character after the delimiter is backslash, make
1084 the pattern end with backslash. This is purely to provide a way
1085 of testing for the error message when a pattern ends with backslash. */
1086
1087 if (pp[1] == '\\') *pp++ = '\\';
1088
1089 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1090 for callouts. */
1091
1092 *pp++ = 0;
1093 strcpy((char *)pbuffer, (char *)p);
1094
1095 /* Look for options after final delimiter */
1096
1097 options = 0;
1098 study_options = 0;
1099 log_store = showstore; /* default from command line */
1100
1101 while (*pp != 0)
1102 {
1103 switch (*pp++)
1104 {
8ac170f3 1105 case 'f': options |= PCRE_FIRSTLINE; break;
c86f6258
PH
1106 case 'g': do_g = 1; break;
1107 case 'i': options |= PCRE_CASELESS; break;
1108 case 'm': options |= PCRE_MULTILINE; break;
1109 case 's': options |= PCRE_DOTALL; break;
1110 case 'x': options |= PCRE_EXTENDED; break;
1111
1112 case '+': do_showrest = 1; break;
1113 case 'A': options |= PCRE_ANCHORED; break;
6bf342e1 1114 case 'B': do_debug = 1; break;
c86f6258
PH
1115 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1116 case 'D': do_debug = do_showinfo = 1; break;
1117 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1118 case 'F': do_flip = 1; break;
1119 case 'G': do_G = 1; break;
1120 case 'I': do_showinfo = 1; break;
aa41d2de 1121 case 'J': options |= PCRE_DUPNAMES; break;
c86f6258
PH
1122 case 'M': log_store = 1; break;
1123 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1124
1125#if !defined NOPOSIX
1126 case 'P': do_posix = 1; break;
1127#endif
1128
1129 case 'S': do_study = 1; break;
1130 case 'U': options |= PCRE_UNGREEDY; break;
1131 case 'X': options |= PCRE_EXTRA; break;
1132 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1133 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1134
1135 case 'L':
1136 ppp = pp;
6bf342e1
PH
1137 /* The '\r' test here is so that it works on Windows. */
1138 /* The '0' test is just in case this is an unterminated line. */
1139 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
c86f6258
PH
1140 *ppp = 0;
1141 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1142 {
1143 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1144 goto SKIP_DATA;
1145 }
6bf342e1 1146 locale_set = 1;
c86f6258
PH
1147 tables = pcre_maketables();
1148 pp = ppp;
1149 break;
1150
1151 case '>':
1152 to_file = pp;
1153 while (*pp != 0) pp++;
1154 while (isspace(pp[-1])) pp--;
1155 *pp = 0;
1156 break;
1157
aa41d2de
PH
1158 case '<':
1159 {
1160 int x = check_newline(pp, outfile);
1161 if (x == 0) goto SKIP_DATA;
1162 options |= x;
1163 while (*pp++ != '>');
1164 }
1165 break;
1166
8ac170f3
PH
1167 case '\r': /* So that it works in Windows */
1168 case '\n':
1169 case ' ':
1170 break;
c86f6258
PH
1171
1172 default:
1173 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1174 goto SKIP_DATA;
1175 }
1176 }
1177
1178 /* Handle compiling via the POSIX interface, which doesn't support the
1179 timing, showing, or debugging options, nor the ability to pass over
1180 local character tables. */
1181
1182#if !defined NOPOSIX
1183 if (posix || do_posix)
1184 {
1185 int rc;
1186 int cflags = 0;
1187
1188 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1189 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
8ac170f3 1190 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
aa41d2de
PH
1191 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1192 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1193
c86f6258
PH
1194 rc = regcomp(&preg, (char *)p, cflags);
1195
1196 /* Compilation failed; go back for another re, skipping to blank line
1197 if non-interactive. */
1198
1199 if (rc != 0)
1200 {
aa41d2de 1201 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
c86f6258
PH
1202 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1203 goto SKIP_DATA;
1204 }
1205 }
1206
1207 /* Handle compiling via the native interface */
1208
1209 else
1210#endif /* !defined NOPOSIX */
1211
1212 {
6bf342e1 1213 if (timeit > 0)
c86f6258
PH
1214 {
1215 register int i;
1216 clock_t time_taken;
1217 clock_t start_time = clock();
6bf342e1 1218 for (i = 0; i < timeit; i++)
c86f6258
PH
1219 {
1220 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1221 if (re != NULL) free(re);
1222 }
1223 time_taken = clock() - start_time;
6bf342e1
PH
1224 fprintf(outfile, "Compile time %.4f milliseconds\n",
1225 (((double)time_taken * 1000.0) / (double)timeit) /
c86f6258
PH
1226 (double)CLOCKS_PER_SEC);
1227 }
1228
1229 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1230
1231 /* Compilation failed; go back for another re, skipping to blank line
1232 if non-interactive. */
1233
1234 if (re == NULL)
1235 {
1236 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1237 SKIP_DATA:
1238 if (infile != stdin)
1239 {
1240 for (;;)
1241 {
aa41d2de 1242 if (extend_inputline(infile, buffer) == NULL)
c86f6258
PH
1243 {
1244 done = 1;
1245 goto CONTINUE;
1246 }
1247 len = (int)strlen((char *)buffer);
1248 while (len > 0 && isspace(buffer[len-1])) len--;
1249 if (len == 0) break;
1250 }
1251 fprintf(outfile, "\n");
1252 }
1253 goto CONTINUE;
1254 }
1255
1256 /* Compilation succeeded; print data if required. There are now two
1257 info-returning functions. The old one has a limited interface and
1258 returns only limited data. Check that it agrees with the newer one. */
1259
1260 if (log_store)
1261 fprintf(outfile, "Memory allocation (code space): %d\n",
1262 (int)(gotten_store -
1263 sizeof(real_pcre) -
1264 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1265
1266 /* Extract the size for possible writing before possibly flipping it,
1267 and remember the store that was got. */
1268
1269 true_size = ((real_pcre *)re)->size;
1270 regex_gotten_store = gotten_store;
1271
1272 /* If /S was present, study the regexp to generate additional info to
1273 help with the matching. */
1274
1275 if (do_study)
1276 {
6bf342e1 1277 if (timeit > 0)
c86f6258
PH
1278 {
1279 register int i;
1280 clock_t time_taken;
1281 clock_t start_time = clock();
6bf342e1 1282 for (i = 0; i < timeit; i++)
c86f6258
PH
1283 extra = pcre_study(re, study_options, &error);
1284 time_taken = clock() - start_time;
1285 if (extra != NULL) free(extra);
6bf342e1
PH
1286 fprintf(outfile, " Study time %.4f milliseconds\n",
1287 (((double)time_taken * 1000.0) / (double)timeit) /
c86f6258
PH
1288 (double)CLOCKS_PER_SEC);
1289 }
1290 extra = pcre_study(re, study_options, &error);
1291 if (error != NULL)
1292 fprintf(outfile, "Failed to study: %s\n", error);
1293 else if (extra != NULL)
1294 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1295 }
1296
1297 /* If the 'F' option was present, we flip the bytes of all the integer
1298 fields in the regex data block and the study block. This is to make it
1299 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1300 compiled on a different architecture. */
1301
1302 if (do_flip)
1303 {
1304 real_pcre *rre = (real_pcre *)re;
1305 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1306 rre->size = byteflip(rre->size, sizeof(rre->size));
1307 rre->options = byteflip(rre->options, sizeof(rre->options));
1308 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1309 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1310 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1311 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1312 rre->name_table_offset = byteflip(rre->name_table_offset,
1313 sizeof(rre->name_table_offset));
1314 rre->name_entry_size = byteflip(rre->name_entry_size,
1315 sizeof(rre->name_entry_size));
1316 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1317
1318 if (extra != NULL)
1319 {
1320 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1321 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1322 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1323 }
1324 }
1325
1326 /* Extract information from the compiled data if required */
1327
1328 SHOW_INFO:
1329
6bf342e1
PH
1330 if (do_debug)
1331 {
1332 fprintf(outfile, "------------------------------------------------------------------\n");
1333 pcre_printint(re, outfile);
1334 }
1335
c86f6258
PH
1336 if (do_showinfo)
1337 {
1338 unsigned long int get_options, all_options;
8ac170f3 1339#if !defined NOINFOCHECK
c86f6258 1340 int old_first_char, old_options, old_count;
8ac170f3 1341#endif
c86f6258
PH
1342 int count, backrefmax, first_char, need_char;
1343 int nameentrysize, namecount;
1344 const uschar *nametable;
1345
c86f6258
PH
1346 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1347 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1348 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1349 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1350 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1351 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1352 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1353 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1354 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1355
8ac170f3 1356#if !defined NOINFOCHECK
c86f6258
PH
1357 old_count = pcre_info(re, &old_options, &old_first_char);
1358 if (count < 0) fprintf(outfile,
1359 "Error %d from pcre_info()\n", count);
1360 else
1361 {
1362 if (old_count != count) fprintf(outfile,
1363 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1364 old_count);
1365
1366 if (old_first_char != first_char) fprintf(outfile,
1367 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1368 first_char, old_first_char);
1369
1370 if (old_options != (int)get_options) fprintf(outfile,
1371 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1372 get_options, old_options);
1373 }
8ac170f3 1374#endif
c86f6258
PH
1375
1376 if (size != regex_gotten_store) fprintf(outfile,
1377 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1378 (int)size, (int)regex_gotten_store);
1379
1380 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1381 if (backrefmax > 0)
1382 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1383
1384 if (namecount > 0)
1385 {
1386 fprintf(outfile, "Named capturing subpatterns:\n");
1387 while (namecount-- > 0)
1388 {
1389 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1390 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1391 GET2(nametable, 0));
1392 nametable += nameentrysize;
1393 }
1394 }
1395
1396 /* The NOPARTIAL bit is a private bit in the options, so we have
1397 to fish it out via out back door */
1398
1399 all_options = ((real_pcre *)re)->options;
1400 if (do_flip)
1401 {
1402 all_options = byteflip(all_options, sizeof(all_options));
aa41d2de 1403 }
c86f6258
PH
1404
1405 if ((all_options & PCRE_NOPARTIAL) != 0)
1406 fprintf(outfile, "Partial matching not supported\n");
1407
1408 if (get_options == 0) fprintf(outfile, "No options\n");
aa41d2de 1409 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
c86f6258
PH
1410 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1411 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1412 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1413 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
8ac170f3 1414 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
c86f6258
PH
1415 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1416 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1417 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1418 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
aa41d2de 1419 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
c86f6258 1420 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
aa41d2de
PH
1421 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1422 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1423
6bf342e1 1424 switch (get_options & PCRE_NEWLINE_BITS)
aa41d2de
PH
1425 {
1426 case PCRE_NEWLINE_CR:
1427 fprintf(outfile, "Forced newline sequence: CR\n");
1428 break;
1429
1430 case PCRE_NEWLINE_LF:
1431 fprintf(outfile, "Forced newline sequence: LF\n");
1432 break;
c86f6258 1433
aa41d2de
PH
1434 case PCRE_NEWLINE_CRLF:
1435 fprintf(outfile, "Forced newline sequence: CRLF\n");
1436 break;
1437
6bf342e1
PH
1438 case PCRE_NEWLINE_ANY:
1439 fprintf(outfile, "Forced newline sequence: ANY\n");
1440 break;
1441
aa41d2de
PH
1442 default:
1443 break;
1444 }
c86f6258
PH
1445
1446 if (first_char == -1)
1447 {
aa41d2de 1448 fprintf(outfile, "First char at start or follows newline\n");
c86f6258
PH
1449 }
1450 else if (first_char < 0)
1451 {
1452 fprintf(outfile, "No first char\n");
1453 }
1454 else
1455 {
1456 int ch = first_char & 255;
1457 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1458 "" : " (caseless)";
6bf342e1 1459 if (PRINTHEX(ch))
c86f6258
PH
1460 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1461 else
1462 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1463 }
1464
1465 if (need_char < 0)
1466 {
1467 fprintf(outfile, "No need char\n");
1468 }
1469 else
1470 {
1471 int ch = need_char & 255;
1472 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1473 "" : " (caseless)";
6bf342e1 1474 if (PRINTHEX(ch))
c86f6258
PH
1475 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1476 else
1477 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1478 }
1479
1480 /* Don't output study size; at present it is in any case a fixed
1481 value, but it varies, depending on the computer architecture, and
1482 so messes up the test suite. (And with the /F option, it might be
1483 flipped.) */
1484
1485 if (do_study)
1486 {
1487 if (extra == NULL)
1488 fprintf(outfile, "Study returned NULL\n");
1489 else
1490 {
1491 uschar *start_bits = NULL;
1492 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1493
1494 if (start_bits == NULL)
1495 fprintf(outfile, "No starting byte set\n");
1496 else
1497 {
1498 int i;
1499 int c = 24;
1500 fprintf(outfile, "Starting byte set: ");
1501 for (i = 0; i < 256; i++)
1502 {
1503 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1504 {
1505 if (c > 75)
1506 {
1507 fprintf(outfile, "\n ");
1508 c = 2;
1509 }
6bf342e1 1510 if (PRINTHEX(i) && i != ' ')
c86f6258
PH
1511 {
1512 fprintf(outfile, "%c ", i);
1513 c += 2;
1514 }
1515 else
1516 {
1517 fprintf(outfile, "\\x%02x ", i);
1518 c += 5;
1519 }
1520 }
1521 }
1522 fprintf(outfile, "\n");
1523 }
1524 }
1525 }
1526 }
1527
1528 /* If the '>' option was present, we write out the regex to a file, and
1529 that is all. The first 8 bytes of the file are the regex length and then
1530 the study length, in big-endian order. */
1531
1532 if (to_file != NULL)
1533 {
1534 FILE *f = fopen((char *)to_file, "wb");
1535 if (f == NULL)
1536 {
1537 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1538 }
1539 else
1540 {
1541 uschar sbuf[8];
1542 sbuf[0] = (true_size >> 24) & 255;
1543 sbuf[1] = (true_size >> 16) & 255;
1544 sbuf[2] = (true_size >> 8) & 255;
1545 sbuf[3] = (true_size) & 255;
1546
1547 sbuf[4] = (true_study_size >> 24) & 255;
1548 sbuf[5] = (true_study_size >> 16) & 255;
1549 sbuf[6] = (true_study_size >> 8) & 255;
1550 sbuf[7] = (true_study_size) & 255;
1551
1552 if (fwrite(sbuf, 1, 8, f) < 8 ||
1553 fwrite(re, 1, true_size, f) < true_size)
1554 {
1555 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1556 }
1557 else
1558 {
1559 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1560 if (extra != NULL)
1561 {
1562 if (fwrite(extra->study_data, 1, true_study_size, f) <
1563 true_study_size)
1564 {
1565 fprintf(outfile, "Write error on %s: %s\n", to_file,
1566 strerror(errno));
1567 }
1568 else fprintf(outfile, "Study data written to %s\n", to_file);
6bf342e1 1569
c86f6258
PH
1570 }
1571 }
1572 fclose(f);
1573 }
8ac170f3
PH
1574
1575 new_free(re);
1576 if (extra != NULL) new_free(extra);
1577 if (tables != NULL) new_free((void *)tables);
c86f6258
PH
1578 continue; /* With next regex */
1579 }
1580 } /* End of non-POSIX compile */
1581
1582 /* Read data lines and test them */
1583
1584 for (;;)
1585 {
aa41d2de
PH
1586 uschar *q;
1587 uschar *bptr = dbuffer;
c86f6258
PH
1588 int *use_offsets = offsets;
1589 int use_size_offsets = size_offsets;
1590 int callout_data = 0;
1591 int callout_data_set = 0;
1592 int count, c;
1593 int copystrings = 0;
1594 int find_match_limit = 0;
1595 int getstrings = 0;
1596 int getlist = 0;
1597 int gmatched = 0;
1598 int start_offset = 0;
1599 int g_notempty = 0;
8ac170f3 1600 int use_dfa = 0;
c86f6258
PH
1601
1602 options = 0;
1603
aa41d2de
PH
1604 *copynames = 0;
1605 *getnames = 0;
1606
1607 copynamesptr = copynames;
1608 getnamesptr = getnames;
1609
c86f6258
PH
1610 pcre_callout = callout;
1611 first_callout = 1;
1612 callout_extra = 0;
1613 callout_count = 0;
1614 callout_fail_count = 999999;
1615 callout_fail_id = -1;
1616 show_malloc = 0;
1617
aa41d2de
PH
1618 if (extra != NULL) extra->flags &=
1619 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1620
1621 len = 0;
1622 for (;;)
c86f6258 1623 {
aa41d2de
PH
1624 if (infile == stdin) printf("data> ");
1625 if (extend_inputline(infile, buffer + len) == NULL)
1626 {
1627 if (len > 0) break;
1628 done = 1;
1629 goto CONTINUE;
1630 }
1631 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1632 len = (int)strlen((char *)buffer);
1633 if (buffer[len-1] == '\n') break;
c86f6258 1634 }
c86f6258 1635
c86f6258
PH
1636 while (len > 0 && isspace(buffer[len-1])) len--;
1637 buffer[len] = 0;
1638 if (len == 0) break;
1639
1640 p = buffer;
1641 while (isspace(*p)) p++;
1642
1643 q = dbuffer;
1644 while ((c = *p++) != 0)
1645 {
1646 int i = 0;
1647 int n = 0;
1648
1649 if (c == '\\') switch ((c = *p++))
1650 {
1651 case 'a': c = 7; break;
1652 case 'b': c = '\b'; break;
1653 case 'e': c = 27; break;
1654 case 'f': c = '\f'; break;
1655 case 'n': c = '\n'; break;
1656 case 'r': c = '\r'; break;
1657 case 't': c = '\t'; break;
1658 case 'v': c = '\v'; break;
1659
1660 case '0': case '1': case '2': case '3':
1661 case '4': case '5': case '6': case '7':
1662 c -= '0';
1663 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1664 c = c * 8 + *p++ - '0';
aa41d2de
PH
1665
1666#if !defined NOUTF8
1667 if (use_utf8 && c > 255)
1668 {
1669 unsigned char buff8[8];
1670 int ii, utn;
1671 utn = ord2utf8(c, buff8);
1672 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1673 c = buff8[ii]; /* Last byte */
1674 }
1675#endif
c86f6258
PH
1676 break;
1677
1678 case 'x':
1679
1680 /* Handle \x{..} specially - new Perl thing for utf8 */
1681
8ac170f3 1682#if !defined NOUTF8
c86f6258
PH
1683 if (*p == '{')
1684 {
1685 unsigned char *pt = p;
1686 c = 0;
1687 while (isxdigit(*(++pt)))
1688 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1689 if (*pt == '}')
1690 {
1691 unsigned char buff8[8];
1692 int ii, utn;
aa41d2de 1693 utn = ord2utf8(c, buff8);
c86f6258
PH
1694 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1695 c = buff8[ii]; /* Last byte */
1696 p = pt + 1;
1697 break;
1698 }
1699 /* Not correct form; fall through */
1700 }
8ac170f3 1701#endif
c86f6258
PH
1702
1703 /* Ordinary \x */
1704
1705 c = 0;
1706 while (i++ < 2 && isxdigit(*p))
1707 {
1708 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1709 p++;
1710 }
1711 break;
1712
1713 case 0: /* \ followed by EOF allows for an empty line */
1714 p--;
1715 continue;
1716
1717 case '>':
1718 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1719 continue;
1720
1721 case 'A': /* Option setting */
1722 options |= PCRE_ANCHORED;
1723 continue;
1724
1725 case 'B':
1726 options |= PCRE_NOTBOL;
1727 continue;
1728
1729 case 'C':
1730 if (isdigit(*p)) /* Set copy string */
1731 {
1732 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1733 copystrings |= 1 << n;
1734 }
1735 else if (isalnum(*p))
1736 {
aa41d2de 1737 uschar *npp = copynamesptr;
c86f6258 1738 while (isalnum(*p)) *npp++ = *p++;
aa41d2de 1739 *npp++ = 0;
c86f6258 1740 *npp = 0;
aa41d2de 1741 n = pcre_get_stringnumber(re, (char *)copynamesptr);
c86f6258 1742 if (n < 0)
aa41d2de
PH
1743 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1744 copynamesptr = npp;
c86f6258
PH
1745 }
1746 else if (*p == '+')
1747 {
1748 callout_extra = 1;
1749 p++;
1750 }
1751 else if (*p == '-')
1752 {
1753 pcre_callout = NULL;
1754 p++;
1755 }
1756 else if (*p == '!')
1757 {
1758 callout_fail_id = 0;
1759 p++;
1760 while(isdigit(*p))
1761 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1762 callout_fail_count = 0;
1763 if (*p == '!')
1764 {
1765 p++;
1766 while(isdigit(*p))
1767 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1768 }
1769 }
1770 else if (*p == '*')
1771 {
1772 int sign = 1;
1773 callout_data = 0;
1774 if (*(++p) == '-') { sign = -1; p++; }
1775 while(isdigit(*p))
1776 callout_data = callout_data * 10 + *p++ - '0';
1777 callout_data *= sign;
1778 callout_data_set = 1;
1779 }
1780 continue;
1781
8ac170f3
PH
1782#if !defined NODFA
1783 case 'D':
1784#if !defined NOPOSIX
1785 if (posix || do_posix)
1786 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1787 else
1788#endif
1789 use_dfa = 1;
1790 continue;
1791
1792 case 'F':
1793 options |= PCRE_DFA_SHORTEST;
1794 continue;
1795#endif
1796
c86f6258
PH
1797 case 'G':
1798 if (isdigit(*p))
1799 {
1800 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1801 getstrings |= 1 << n;
1802 }
1803 else if (isalnum(*p))
1804 {
aa41d2de 1805 uschar *npp = getnamesptr;
c86f6258 1806 while (isalnum(*p)) *npp++ = *p++;
aa41d2de 1807 *npp++ = 0;
c86f6258 1808 *npp = 0;
aa41d2de 1809 n = pcre_get_stringnumber(re, (char *)getnamesptr);
c86f6258 1810 if (n < 0)
aa41d2de
PH
1811 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1812 getnamesptr = npp;
c86f6258
PH
1813 }
1814 continue;
1815
1816 case 'L':
1817 getlist = 1;
1818 continue;
1819
1820 case 'M':
1821 find_match_limit = 1;
1822 continue;
1823
1824 case 'N':
1825 options |= PCRE_NOTEMPTY;
1826 continue;
1827
1828 case 'O':
1829 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1830 if (n > size_offsets_max)
1831 {
1832 size_offsets_max = n;
1833 free(offsets);
1834 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1835 if (offsets == NULL)
1836 {
1837 printf("** Failed to get %d bytes of memory for offsets vector\n",
1838 size_offsets_max * sizeof(int));
8ac170f3
PH
1839 yield = 1;
1840 goto EXIT;
c86f6258
PH
1841 }
1842 }
1843 use_size_offsets = n;
1844 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1845 continue;
1846
1847 case 'P':
1848 options |= PCRE_PARTIAL;
1849 continue;
1850
aa41d2de
PH
1851 case 'Q':
1852 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1853 if (extra == NULL)
1854 {
1855 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1856 extra->flags = 0;
1857 }
1858 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1859 extra->match_limit_recursion = n;
1860 continue;
1861
1862 case 'q':
1863 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1864 if (extra == NULL)
1865 {
1866 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1867 extra->flags = 0;
1868 }
1869 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1870 extra->match_limit = n;
1871 continue;
1872
8ac170f3
PH
1873#if !defined NODFA
1874 case 'R':
1875 options |= PCRE_DFA_RESTART;
1876 continue;
1877#endif
1878
c86f6258
PH
1879 case 'S':
1880 show_malloc = 1;
1881 continue;
1882
1883 case 'Z':
1884 options |= PCRE_NOTEOL;
1885 continue;
1886
1887 case '?':
1888 options |= PCRE_NO_UTF8_CHECK;
1889 continue;
aa41d2de
PH
1890
1891 case '<':
1892 {
1893 int x = check_newline(p, outfile);
1894 if (x == 0) goto NEXT_DATA;
1895 options |= x;
1896 while (*p++ != '>');
1897 }
1898 continue;
c86f6258
PH
1899 }
1900 *q++ = c;
1901 }
1902 *q = 0;
1903 len = q - dbuffer;
1904
8ac170f3
PH
1905 if ((all_use_dfa || use_dfa) && find_match_limit)
1906 {
1907 printf("**Match limit not relevant for DFA matching: ignored\n");
1908 find_match_limit = 0;
1909 }
1910
c86f6258
PH
1911 /* Handle matching via the POSIX interface, which does not
1912 support timing or playing with the match limit or callout data. */
1913
1914#if !defined NOPOSIX
1915 if (posix || do_posix)
1916 {
1917 int rc;
1918 int eflags = 0;
1919 regmatch_t *pmatch = NULL;
1920 if (use_size_offsets > 0)
1921 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1922 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1923 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1924
1925 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1926
1927 if (rc != 0)
1928 {
aa41d2de 1929 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
c86f6258
PH
1930 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1931 }
aa41d2de
PH
1932 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1933 != 0)
1934 {
1935 fprintf(outfile, "Matched with REG_NOSUB\n");
1936 }
c86f6258
PH
1937 else
1938 {
1939 size_t i;
1940 for (i = 0; i < (size_t)use_size_offsets; i++)
1941 {
1942 if (pmatch[i].rm_so >= 0)
1943 {
1944 fprintf(outfile, "%2d: ", (int)i);
1945 (void)pchars(dbuffer + pmatch[i].rm_so,
1946 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1947 fprintf(outfile, "\n");
1948 if (i == 0 && do_showrest)
1949 {
1950 fprintf(outfile, " 0+ ");
1951 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1952 outfile);
1953 fprintf(outfile, "\n");
1954 }
1955 }
1956 }
1957 }
1958 free(pmatch);
1959 }
1960
1961 /* Handle matching via the native interface - repeats for /g and /G */
1962
1963 else
1964#endif /* !defined NOPOSIX */
1965
1966 for (;; gmatched++) /* Loop for /g or /G */
1967 {
6bf342e1 1968 if (timeitm > 0)
c86f6258
PH
1969 {
1970 register int i;
1971 clock_t time_taken;
1972 clock_t start_time = clock();
8ac170f3
PH
1973
1974#if !defined NODFA
1975 if (all_use_dfa || use_dfa)
1976 {
1977 int workspace[1000];
6bf342e1 1978 for (i = 0; i < timeitm; i++)
8ac170f3
PH
1979 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1980 options | g_notempty, use_offsets, use_size_offsets, workspace,
1981 sizeof(workspace)/sizeof(int));
1982 }
1983 else
1984#endif
1985
6bf342e1 1986 for (i = 0; i < timeitm; i++)
c86f6258
PH
1987 count = pcre_exec(re, extra, (char *)bptr, len,
1988 start_offset, options | g_notempty, use_offsets, use_size_offsets);
8ac170f3 1989
c86f6258 1990 time_taken = clock() - start_time;
6bf342e1
PH
1991 fprintf(outfile, "Execute time %.4f milliseconds\n",
1992 (((double)time_taken * 1000.0) / (double)timeitm) /
c86f6258
PH
1993 (double)CLOCKS_PER_SEC);
1994 }
1995
1996 /* If find_match_limit is set, we want to do repeated matches with
aa41d2de
PH
1997 varying limits in order to find the minimum value for the match limit and
1998 for the recursion limit. */
c86f6258
PH
1999
2000 if (find_match_limit)
2001 {
c86f6258
PH
2002 if (extra == NULL)
2003 {
2004 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2005 extra->flags = 0;
2006 }
c86f6258 2007
aa41d2de
PH
2008 (void)check_match_limit(re, extra, bptr, len, start_offset,
2009 options|g_notempty, use_offsets, use_size_offsets,
2010 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2011 PCRE_ERROR_MATCHLIMIT, "match()");
c86f6258 2012
aa41d2de
PH
2013 count = check_match_limit(re, extra, bptr, len, start_offset,
2014 options|g_notempty, use_offsets, use_size_offsets,
2015 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2016 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
c86f6258
PH
2017 }
2018
2019 /* If callout_data is set, use the interface with additional data */
2020
2021 else if (callout_data_set)
2022 {
2023 if (extra == NULL)
2024 {
2025 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2026 extra->flags = 0;
2027 }
2028 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2029 extra->callout_data = &callout_data;
2030 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2031 options | g_notempty, use_offsets, use_size_offsets);
2032 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2033 }
2034
2035 /* The normal case is just to do the match once, with the default
2036 value of match_limit. */
2037
8ac170f3
PH
2038#if !defined NODFA
2039 else if (all_use_dfa || use_dfa)
c86f6258 2040 {
8ac170f3
PH
2041 int workspace[1000];
2042 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2043 options | g_notempty, use_offsets, use_size_offsets, workspace,
2044 sizeof(workspace)/sizeof(int));
2045 if (count == 0)
2046 {
2047 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2048 count = use_size_offsets/2;
2049 }
c86f6258 2050 }
8ac170f3 2051#endif
c86f6258 2052
8ac170f3 2053 else
c86f6258 2054 {
8ac170f3
PH
2055 count = pcre_exec(re, extra, (char *)bptr, len,
2056 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2057 if (count == 0)
2058 {
2059 fprintf(outfile, "Matched, but too many substrings\n");
2060 count = use_size_offsets/3;
2061 }
c86f6258
PH
2062 }
2063
2064 /* Matched */
2065
2066 if (count >= 0)
2067 {
6bf342e1
PH
2068 int i, maxcount;
2069
2070#if !defined NODFA
2071 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2072#endif
2073 maxcount = use_size_offsets/3;
2074
2075 /* This is a check against a lunatic return value. */
2076
2077 if (count > maxcount)
2078 {
2079 fprintf(outfile,
2080 "** PCRE error: returned count %d is too big for offset size %d\n",
2081 count, use_size_offsets);
2082 count = use_size_offsets/3;
2083 if (do_g || do_G)
2084 {
2085 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2086 do_g = do_G = FALSE; /* Break g/G loop */
2087 }
2088 }
2089
c86f6258
PH
2090 for (i = 0; i < count * 2; i += 2)
2091 {
2092 if (use_offsets[i] < 0)
2093 fprintf(outfile, "%2d: <unset>\n", i/2);
2094 else
2095 {
2096 fprintf(outfile, "%2d: ", i/2);
2097 (void)pchars(bptr + use_offsets[i],
2098 use_offsets[i+1] - use_offsets[i], outfile);
2099 fprintf(outfile, "\n");
2100 if (i == 0)
2101 {
2102 if (do_showrest)
2103 {
2104 fprintf(outfile, " 0+ ");
2105 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2106 outfile);
2107 fprintf(outfile, "\n");
2108 }
2109 }
2110 }
2111 }
2112
2113 for (i = 0; i < 32; i++)
2114 {
2115 if ((copystrings & (1 << i)) != 0)
2116 {
aa41d2de 2117 char copybuffer[256];
c86f6258
PH
2118 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2119 i, copybuffer, sizeof(copybuffer));
2120 if (rc < 0)
2121 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2122 else
2123 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2124 }
2125 }
2126
aa41d2de
PH
2127 for (copynamesptr = copynames;
2128 *copynamesptr != 0;
2129 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2130 {
2131 char copybuffer[256];
2132 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2133 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2134 if (rc < 0)
2135 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2136 else
2137 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2138 }
2139
c86f6258
PH
2140 for (i = 0; i < 32; i++)
2141 {
2142 if ((getstrings & (1 << i)) != 0)
2143 {
2144 const char *substring;
2145 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2146 i, &substring);
2147 if (rc < 0)
2148 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2149 else
2150 {
2151 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
c86f6258
PH
2152 pcre_free_substring(substring);
2153 }
2154 }
2155 }
2156
aa41d2de
PH
2157 for (getnamesptr = getnames;
2158 *getnamesptr != 0;
2159 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2160 {
2161 const char *substring;
2162 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2163 count, (char *)getnamesptr, &substring);
2164 if (rc < 0)
2165 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2166 else
2167 {
2168 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2169 pcre_free_substring(substring);
2170 }
2171 }
2172
c86f6258
PH
2173 if (getlist)
2174 {
2175 const char **stringlist;
2176 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2177 &stringlist);
2178 if (rc < 0)
2179 fprintf(outfile, "get substring list failed %d\n", rc);
2180 else
2181 {
2182 for (i = 0; i < count; i++)
2183 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2184 if (stringlist[i] != NULL)
2185 fprintf(outfile, "string list not terminated by NULL\n");
2186 /* free((void *)stringlist); */
2187 pcre_free_substring_list(stringlist);
2188 }
2189 }
2190 }
2191
2192 /* There was a partial match */
2193
2194 else if (count == PCRE_ERROR_PARTIAL)
2195 {
8ac170f3
PH
2196 fprintf(outfile, "Partial match");
2197#if !defined NODFA
2198 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2199 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2200 bptr + use_offsets[0]);
2201#endif
2202 fprintf(outfile, "\n");
c86f6258
PH
2203 break; /* Out of the /g loop */
2204 }
2205
2206 /* Failed to match. If this is a /g or /G loop and we previously set
2207 g_notempty after a null match, this is not necessarily the end.
2208 We want to advance the start offset, and continue. In the case of UTF-8
2209 matching, the advance must be one character, not one byte. Fudge the
2210 offset values to achieve this. We won't be at the end of the string -
2211 that was checked before setting g_notempty. */
2212
2213 else
2214 {
2215 if (g_notempty != 0)
2216 {
2217 int onechar = 1;
2218 use_offsets[0] = start_offset;
2219 if (use_utf8)
2220 {
2221 while (start_offset + onechar < len)
2222 {
2223 int tb = bptr[start_offset+onechar];
2224 if (tb <= 127) break;
2225 tb &= 0xc0;
2226 if (tb != 0 && tb != 0xc0) onechar++;
2227 }
2228 }
2229 use_offsets[1] = start_offset + onechar;
2230 }
2231 else
2232 {
2233 if (count == PCRE_ERROR_NOMATCH)
2234 {
2235 if (gmatched == 0) fprintf(outfile, "No match\n");
2236 }
2237 else fprintf(outfile, "Error %d\n", count);
2238 break; /* Out of the /g loop */
2239 }
2240 }
2241
2242 /* If not /g or /G we are done */
2243
2244 if (!do_g && !do_G) break;
2245
2246 /* If we have matched an empty string, first check to see if we are at
2247 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2248 what Perl's /g options does. This turns out to be rather cunning. First
2249 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2250 same point. If this fails (picked up above) we advance to the next
2251 character. */
2252
2253 g_notempty = 0;
2254 if (use_offsets[0] == use_offsets[1])
2255 {
2256 if (use_offsets[0] == len) break;
2257 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2258 }
2259
2260 /* For /g, update the start offset, leaving the rest alone */
2261
2262 if (do_g) start_offset = use_offsets[1];
2263
2264 /* For /G, update the pointer and length */
2265
2266 else
2267 {
2268 bptr += use_offsets[1];
2269 len -= use_offsets[1];
2270 }
2271 } /* End of loop for /g and /G */
aa41d2de
PH
2272
2273 NEXT_DATA: continue;
c86f6258
PH
2274 } /* End of loop for data lines */
2275
2276 CONTINUE:
2277
2278#if !defined NOPOSIX
2279 if (posix || do_posix) regfree(&preg);
2280#endif
2281
8ac170f3
PH
2282 if (re != NULL) new_free(re);
2283 if (extra != NULL) new_free(extra);
c86f6258
PH
2284 if (tables != NULL)
2285 {
8ac170f3 2286 new_free((void *)tables);
c86f6258 2287 setlocale(LC_CTYPE, "C");
6bf342e1 2288 locale_set = 0;
c86f6258
PH
2289 }
2290 }
2291
2292if (infile == stdin) fprintf(outfile, "\n");
8ac170f3
PH
2293
2294EXIT:
2295
2296if (infile != NULL && infile != stdin) fclose(infile);
2297if (outfile != NULL && outfile != stdout) fclose(outfile);
2298
2299free(buffer);
2300free(dbuffer);
2301free(pbuffer);
2302free(offsets);
2303
2304return yield;
c86f6258
PH
2305}
2306
8ac170f3 2307/* End of pcretest.c */