Install PCRE 6.7 in in place of 6.2.
[exim.git] / src / src / pcre / pcretest.c
CommitLineData
aa41d2de 1/* $Cambridge: exim/src/src/pcre/pcretest.c,v 1.4 2006/11/07 16:50:36 ph10 Exp $ */
8ac170f3 2
c86f6258
PH
3/*************************************************
4* PCRE testing program *
5*************************************************/
6
7/* This program was hacked up as a tester for PCRE. I really should have
8written it more tidily in the first place. Will I ever learn? It has grown and
8ac170f3 9been extended and consequently is now rather, er, *very* untidy in places.
c86f6258
PH
10
11-----------------------------------------------------------------------------
12Redistribution and use in source and binary forms, with or without
13modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36POSSIBILITY OF SUCH DAMAGE.
37-----------------------------------------------------------------------------
38*/
39
40
41#include <ctype.h>
42#include <stdio.h>
43#include <string.h>
44#include <stdlib.h>
45#include <time.h>
46#include <locale.h>
47#include <errno.h>
48
aa41d2de
PH
49#ifndef _WIN32
50#include <sys/resource.h>
51#endif
52
c86f6258 53#define PCRE_SPY /* For Win32 build, import data, not export */
8ac170f3 54
aa41d2de
PH
55/* We include pcre_internal.h because we need the internal info for displaying
56the results of pcre_study() and we also need to know about the internal
57macros, structures, and other internal data values; pcretest has "inside
58information" compared to a program that strictly follows the PCRE API. */
8ac170f3
PH
59
60#include "pcre_internal.h"
61
aa41d2de
PH
62/* We need access to the data tables that PCRE uses. So as not to have to keep
63two copies, we include the source file here, changing the names of the external
64symbols to prevent clashes. */
65
66#define _pcre_utf8_table1 utf8_table1
67#define _pcre_utf8_table1_size utf8_table1_size
68#define _pcre_utf8_table2 utf8_table2
69#define _pcre_utf8_table3 utf8_table3
70#define _pcre_utf8_table4 utf8_table4
71#define _pcre_utt utt
72#define _pcre_utt_size utt_size
73#define _pcre_OP_lengths OP_lengths
74
75#include "pcre_tables.c"
76
77/* We also need the pcre_printint() function for printing out compiled
78patterns. This function is in a separate file so that it can be included in
79pcre_compile.c when that module is compiled with debugging enabled. */
80
81#include "pcre_printint.src"
82
c86f6258
PH
83
84/* It is possible to compile this test program without including support for
85testing the POSIX interface, though this is not available via the standard
86Makefile. */
87
88#if !defined NOPOSIX
89#include "pcreposix.h"
90#endif
91
92e772ff
PH
92/* It is also possible, for the benefit of the version imported into Exim, to
93build pcretest without support for UTF8 (define NOUTF8), without the interface
8ac170f3
PH
94to the DFA matcher (NODFA), and without the doublecheck of the old "info"
95function (define NOINFOCHECK). */
96
97
aa41d2de
PH
98/* Other parameters */
99
c86f6258
PH
100#ifndef CLOCKS_PER_SEC
101#ifdef CLK_TCK
102#define CLOCKS_PER_SEC CLK_TCK
103#else
104#define CLOCKS_PER_SEC 100
105#endif
106#endif
107
108#define LOOPREPEAT 500000
109
aa41d2de 110/* Static variables */
c86f6258
PH
111
112static FILE *outfile;
113static int log_store = 0;
114static int callout_count;
115static int callout_extra;
116static int callout_fail_count;
117static int callout_fail_id;
118static int first_callout;
119static int show_malloc;
120static int use_utf8;
121static size_t gotten_store;
122
aa41d2de
PH
123/* The buffers grow automatically if very long input lines are encountered. */
124
125static int buffer_size = 50000;
126static uschar *buffer = NULL;
127static uschar *dbuffer = NULL;
c86f6258
PH
128static uschar *pbuffer = NULL;
129
130
c86f6258 131
aa41d2de
PH
132/*************************************************
133* Read or extend an input line *
134*************************************************/
135
136/* Input lines are read into buffer, but both patterns and data lines can be
137continued over multiple input lines. In addition, if the buffer fills up, we
138want to automatically expand it so as to be able to handle extremely large
139lines that are needed for certain stress tests. When the input buffer is
140expanded, the other two buffers must also be expanded likewise, and the
141contents of pbuffer, which are a copy of the input for callouts, must be
142preserved (for when expansion happens for a data line). This is not the most
143optimal way of handling this, but hey, this is just a test program!
144
145Arguments:
146 f the file to read
147 start where in buffer to start (this *must* be within buffer)
148
149Returns: pointer to the start of new data
150 could be a copy of start, or could be moved
151 NULL if no data read and EOF reached
152*/
153
154static uschar *
155extend_inputline(FILE *f, uschar *start)
156{
157uschar *here = start;
158
159for (;;)
160 {
161 int rlen = buffer_size - (here - buffer);
162 if (rlen > 1000)
163 {
164 int dlen;
165 if (fgets((char *)here, rlen, f) == NULL)
166 return (here == start)? NULL : start;
167 dlen = (int)strlen((char *)here);
168 if (dlen > 0 && here[dlen - 1] == '\n') return start;
169 here += dlen;
170 }
171
172 else
173 {
174 int new_buffer_size = 2*buffer_size;
175 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
176 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
177 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
178
179 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
180 {
181 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
182 exit(1);
183 }
184
185 memcpy(new_buffer, buffer, buffer_size);
186 memcpy(new_pbuffer, pbuffer, buffer_size);
187
188 buffer_size = new_buffer_size;
189
190 start = new_buffer + (start - buffer);
191 here = new_buffer + (here - buffer);
192
193 free(buffer);
194 free(dbuffer);
195 free(pbuffer);
196
197 buffer = new_buffer;
198 dbuffer = new_dbuffer;
199 pbuffer = new_pbuffer;
200 }
201 }
202
203return NULL; /* Control never gets here */
204}
205
206
207
208
209
210
211
c86f6258
PH
212/*************************************************
213* Read number from string *
214*************************************************/
215
216/* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
217around with conditional compilation, just do the job by hand. It is only used
218for unpicking the -o argument, so just keep it simple.
219
220Arguments:
221 str string to be converted
222 endptr where to put the end pointer
223
224Returns: the unsigned long
225*/
226
227static int
228get_value(unsigned char *str, unsigned char **endptr)
229{
230int result = 0;
231while(*str != 0 && isspace(*str)) str++;
232while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
233*endptr = str;
234return(result);
235}
236
237
238
c86f6258
PH
239
240/*************************************************
241* Convert UTF-8 string to value *
242*************************************************/
243
244/* This function takes one or more bytes that represents a UTF-8 character,
245and returns the value of the character.
246
247Argument:
aa41d2de
PH
248 utf8bytes a pointer to the byte vector
249 vptr a pointer to an int to receive the value
c86f6258 250
aa41d2de
PH
251Returns: > 0 => the number of bytes consumed
252 -6 to 0 => malformed UTF-8 character at offset = (-return)
c86f6258
PH
253*/
254
8ac170f3
PH
255#if !defined NOUTF8
256
c86f6258 257static int
aa41d2de 258utf82ord(unsigned char *utf8bytes, int *vptr)
c86f6258 259{
aa41d2de 260int c = *utf8bytes++;
c86f6258
PH
261int d = c;
262int i, j, s;
263
264for (i = -1; i < 6; i++) /* i is number of additional bytes */
265 {
266 if ((d & 0x80) == 0) break;
267 d <<= 1;
268 }
269
270if (i == -1) { *vptr = c; return 1; } /* ascii character */
271if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
272
273/* i now has a value in the range 1-5 */
274
275s = 6*i;
aa41d2de 276d = (c & utf8_table3[i]) << s;
c86f6258
PH
277
278for (j = 0; j < i; j++)
279 {
aa41d2de 280 c = *utf8bytes++;
c86f6258
PH
281 if ((c & 0xc0) != 0x80) return -(j+1);
282 s -= 6;
283 d |= (c & 0x3f) << s;
284 }
285
286/* Check that encoding was the correct unique one */
287
aa41d2de
PH
288for (j = 0; j < utf8_table1_size; j++)
289 if (d <= utf8_table1[j]) break;
c86f6258
PH
290if (j != i) return -(i+1);
291
292/* Valid value */
293
294*vptr = d;
295return i+1;
296}
297
8ac170f3
PH
298#endif
299
c86f6258
PH
300
301
aa41d2de
PH
302/*************************************************
303* Convert character value to UTF-8 *
304*************************************************/
305
306/* This function takes an integer value in the range 0 - 0x7fffffff
307and encodes it as a UTF-8 character in 0 to 6 bytes.
308
309Arguments:
310 cvalue the character value
311 utf8bytes pointer to buffer for result - at least 6 bytes long
312
313Returns: number of characters placed in the buffer
314*/
315
316#if !defined NOUTF8
317
318static int
319ord2utf8(int cvalue, uschar *utf8bytes)
320{
321register int i, j;
322for (i = 0; i < utf8_table1_size; i++)
323 if (cvalue <= utf8_table1[i]) break;
324utf8bytes += i;
325for (j = i; j > 0; j--)
326 {
327 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
328 cvalue >>= 6;
329 }
330*utf8bytes = utf8_table2[i] | cvalue;
331return i + 1;
332}
333
334#endif
335
336
337
c86f6258
PH
338/*************************************************
339* Print character string *
340*************************************************/
341
342/* Character string printing function. Must handle UTF-8 strings in utf8
343mode. Yields number of characters printed. If handed a NULL file, just counts
344chars without printing. */
345
346static int pchars(unsigned char *p, int length, FILE *f)
347{
aa41d2de 348int c = 0;
c86f6258
PH
349int yield = 0;
350
351while (length-- > 0)
352 {
92e772ff 353#if !defined NOUTF8
c86f6258
PH
354 if (use_utf8)
355 {
356 int rc = utf82ord(p, &c);
357
358 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
359 {
360 length -= rc - 1;
361 p += rc;
362 if (c < 256 && isprint(c))
363 {
364 if (f != NULL) fprintf(f, "%c", c);
365 yield++;
366 }
367 else
368 {
369 int n;
370 if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
371 yield += n;
372 }
373 continue;
374 }
375 }
8ac170f3 376#endif
c86f6258
PH
377
378 /* Not UTF-8, or malformed UTF-8 */
379
380 if (isprint(c = *(p++)))
381 {
382 if (f != NULL) fprintf(f, "%c", c);
383 yield++;
384 }
385 else
386 {
387 if (f != NULL) fprintf(f, "\\x%02x", c);
388 yield += 4;
389 }
390 }
391
392return yield;
393}
394
395
396
397/*************************************************
398* Callout function *
399*************************************************/
400
401/* Called from PCRE as a result of the (?C) item. We print out where we are in
402the match. Yield zero unless more callouts than the fail count, or the callout
403data is not zero. */
404
405static int callout(pcre_callout_block *cb)
406{
407FILE *f = (first_callout | callout_extra)? outfile : NULL;
408int i, pre_start, post_start, subject_length;
409
410if (callout_extra)
411 {
412 fprintf(f, "Callout %d: last capture = %d\n",
413 cb->callout_number, cb->capture_last);
414
415 for (i = 0; i < cb->capture_top * 2; i += 2)
416 {
417 if (cb->offset_vector[i] < 0)
418 fprintf(f, "%2d: <unset>\n", i/2);
419 else
420 {
421 fprintf(f, "%2d: ", i/2);
422 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
423 cb->offset_vector[i+1] - cb->offset_vector[i], f);
424 fprintf(f, "\n");
425 }
426 }
427 }
428
429/* Re-print the subject in canonical form, the first time or if giving full
430datails. On subsequent calls in the same match, we use pchars just to find the
431printed lengths of the substrings. */
432
433if (f != NULL) fprintf(f, "--->");
434
435pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
436post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
437 cb->current_position - cb->start_match, f);
438
439subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
440
441(void)pchars((unsigned char *)(cb->subject + cb->current_position),
442 cb->subject_length - cb->current_position, f);
443
444if (f != NULL) fprintf(f, "\n");
445
446/* Always print appropriate indicators, with callout number if not already
447shown. For automatic callouts, show the pattern offset. */
448
449if (cb->callout_number == 255)
450 {
451 fprintf(outfile, "%+3d ", cb->pattern_position);
452 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
453 }
454else
455 {
456 if (callout_extra) fprintf(outfile, " ");
457 else fprintf(outfile, "%3d ", cb->callout_number);
458 }
459
460for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
461fprintf(outfile, "^");
462
463if (post_start > 0)
464 {
465 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
466 fprintf(outfile, "^");
467 }
468
469for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
470 fprintf(outfile, " ");
471
472fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
473 pbuffer + cb->pattern_position);
474
475fprintf(outfile, "\n");
476first_callout = 0;
477
478if (cb->callout_data != NULL)
479 {
480 int callout_data = *((int *)(cb->callout_data));
481 if (callout_data != 0)
482 {
483 fprintf(outfile, "Callout data = %d\n", callout_data);
484 return callout_data;
485 }
486 }
487
488return (cb->callout_number != callout_fail_id)? 0 :
489 (++callout_count >= callout_fail_count)? 1 : 0;
490}
491
492
493/*************************************************
494* Local malloc functions *
495*************************************************/
496
497/* Alternative malloc function, to test functionality and show the size of the
498compiled re. */
499
500static void *new_malloc(size_t size)
501{
502void *block = malloc(size);
503gotten_store = size;
504if (show_malloc)
505 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
506return block;
507}
508
509static void new_free(void *block)
510{
511if (show_malloc)
512 fprintf(outfile, "free %p\n", block);
513free(block);
514}
515
516
517/* For recursion malloc/free, to test stacking calls */
518
519static void *stack_malloc(size_t size)
520{
521void *block = malloc(size);
522if (show_malloc)
523 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
524return block;
525}
526
527static void stack_free(void *block)
528{
529if (show_malloc)
530 fprintf(outfile, "stack_free %p\n", block);
531free(block);
532}
533
534
535/*************************************************
536* Call pcre_fullinfo() *
537*************************************************/
538
539/* Get one piece of information from the pcre_fullinfo() function */
540
541static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
542{
543int rc;
544if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
545 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
546}
547
548
549
550/*************************************************
551* Byte flipping function *
552*************************************************/
553
aa41d2de
PH
554static unsigned long int
555byteflip(unsigned long int value, int n)
c86f6258
PH
556{
557if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
558return ((value & 0x000000ff) << 24) |
559 ((value & 0x0000ff00) << 8) |
560 ((value & 0x00ff0000) >> 8) |
561 ((value & 0xff000000) >> 24);
562}
563
564
565
566
aa41d2de
PH
567/*************************************************
568* Check match or recursion limit *
569*************************************************/
570
571static int
572check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
573 int start_offset, int options, int *use_offsets, int use_size_offsets,
574 int flag, unsigned long int *limit, int errnumber, const char *msg)
575{
576int count;
577int min = 0;
578int mid = 64;
579int max = -1;
580
581extra->flags |= flag;
582
583for (;;)
584 {
585 *limit = mid;
586
587 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
588 use_offsets, use_size_offsets);
589
590 if (count == errnumber)
591 {
592 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
593 min = mid;
594 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
595 }
596
597 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
598 count == PCRE_ERROR_PARTIAL)
599 {
600 if (mid == min + 1)
601 {
602 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
603 break;
604 }
605 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
606 max = mid;
607 mid = (min + mid)/2;
608 }
609 else break; /* Some other error */
610 }
611
612extra->flags &= ~flag;
613return count;
614}
615
616
617
618/*************************************************
619* Check newline indicator *
620*************************************************/
621
622/* This is used both at compile and run-time to check for <xxx> escapes, where
623xxx is LF, CR, or CRLF. Print a message and return 0 if there is no match.
624
625Arguments:
626 p points after the leading '<'
627 f file for error message
628
629Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
630*/
631
632static int
633check_newline(uschar *p, FILE *f)
634{
635if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
636if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
637if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
638fprintf(f, "Unknown newline type at: <%s\n", p);
639return 0;
640}
641
642
643
c86f6258
PH
644/*************************************************
645* Main Program *
646*************************************************/
647
648/* Read lines from named file or stdin and write to named file or stdout; lines
649consist of a regular expression, in delimiters and optionally followed by
650options, followed by a set of test data, terminated by an empty line. */
651
652int main(int argc, char **argv)
653{
654FILE *infile = stdin;
655int options = 0;
656int study_options = 0;
657int op = 1;
658int timeit = 0;
659int showinfo = 0;
660int showstore = 0;
aa41d2de 661int quiet = 0;
c86f6258
PH
662int size_offsets = 45;
663int size_offsets_max;
8ac170f3 664int *offsets = NULL;
c86f6258
PH
665#if !defined NOPOSIX
666int posix = 0;
667#endif
668int debug = 0;
669int done = 0;
8ac170f3
PH
670int all_use_dfa = 0;
671int yield = 0;
aa41d2de
PH
672int stack_size;
673
674/* These vectors store, end-to-end, a list of captured substring names. Assume
675that 1024 is plenty long enough for the few names we'll be testing. */
676
677uschar copynames[1024];
678uschar getnames[1024];
c86f6258 679
aa41d2de
PH
680uschar *copynamesptr;
681uschar *getnamesptr;
c86f6258
PH
682
683/* Get buffers from malloc() so that Electric Fence will check their misuse
aa41d2de 684when I am debugging. They grow automatically when very long lines are read. */
c86f6258 685
aa41d2de
PH
686buffer = (unsigned char *)malloc(buffer_size);
687dbuffer = (unsigned char *)malloc(buffer_size);
688pbuffer = (unsigned char *)malloc(buffer_size);
c86f6258
PH
689
690/* The outfile variable is static so that new_malloc can use it. The _setmode()
691stuff is some magic that I don't understand, but which apparently does good
692things in Windows. It's related to line terminations. */
693
694#if defined(_WIN32) || defined(WIN32)
695_setmode( _fileno( stdout ), 0x8000 );
696#endif /* defined(_WIN32) || defined(WIN32) */
697
698outfile = stdout;
699
700/* Scan options */
701
702while (argc > 1 && argv[op][0] == '-')
703 {
704 unsigned char *endptr;
705
706 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
707 showstore = 1;
708 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
aa41d2de 709 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
c86f6258
PH
710 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
711 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
92e772ff 712#if !defined NODFA
8ac170f3
PH
713 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
714#endif
c86f6258
PH
715 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
716 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
717 *endptr == 0))
718 {
719 op++;
720 argc--;
721 }
aa41d2de
PH
722 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
723 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
724 *endptr == 0))
725 {
726#ifdef _WIN32
727 printf("PCRE: -S not supported on this OS\n");
728 exit(1);
729#else
730 int rc;
731 struct rlimit rlim;
732 getrlimit(RLIMIT_STACK, &rlim);
733 rlim.rlim_cur = stack_size * 1024 * 1024;
734 rc = setrlimit(RLIMIT_STACK, &rlim);
735 if (rc != 0)
736 {
737 printf("PCRE: setrlimit() failed with error %d\n", rc);
738 exit(1);
739 }
740 op++;
741 argc--;
742#endif
743 }
c86f6258
PH
744#if !defined NOPOSIX
745 else if (strcmp(argv[op], "-p") == 0) posix = 1;
746#endif
747 else if (strcmp(argv[op], "-C") == 0)
748 {
749 int rc;
750 printf("PCRE version %s\n", pcre_version());
751 printf("Compiled with\n");
752 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
753 printf(" %sUTF-8 support\n", rc? "" : "No ");
754 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
755 printf(" %sUnicode properties support\n", rc? "" : "No ");
756 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
aa41d2de
PH
757 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
758 (rc == '\n')? "LF" : "CRLF");
c86f6258
PH
759 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
760 printf(" Internal link size = %d\n", rc);
761 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
762 printf(" POSIX malloc threshold = %d\n", rc);
763 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
764 printf(" Default match limit = %d\n", rc);
aa41d2de
PH
765 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
766 printf(" Default recursion depth limit = %d\n", rc);
c86f6258
PH
767 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
768 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
769 exit(0);
770 }
771 else
772 {
773 printf("** Unknown or malformed option %s\n", argv[op]);
aa41d2de 774 printf("Usage: pcretest [options] [<input> [<output>]]\n");
c86f6258 775 printf(" -C show PCRE compile-time options and exit\n");
8ac170f3
PH
776 printf(" -d debug: show compiled code; implies -i\n");
777#if !defined NODFA
778 printf(" -dfa force DFA matching for all subjects\n");
779#endif
780 printf(" -i show information about compiled pattern\n"
c86f6258
PH
781 " -m output memory used information\n"
782 " -o <n> set size of offsets vector to <n>\n");
783#if !defined NOPOSIX
784 printf(" -p use POSIX interface\n");
785#endif
aa41d2de 786 printf(" -S <n> set stack size to <n> megabytes\n");
c86f6258
PH
787 printf(" -s output store (memory) used information\n"
788 " -t time compilation and execution\n");
8ac170f3
PH
789 yield = 1;
790 goto EXIT;
c86f6258
PH
791 }
792 op++;
793 argc--;
794 }
795
796/* Get the store for the offsets vector, and remember what it was */
797
798size_offsets_max = size_offsets;
799offsets = (int *)malloc(size_offsets_max * sizeof(int));
800if (offsets == NULL)
801 {
802 printf("** Failed to get %d bytes of memory for offsets vector\n",
803 size_offsets_max * sizeof(int));
8ac170f3
PH
804 yield = 1;
805 goto EXIT;
c86f6258
PH
806 }
807
808/* Sort out the input and output files */
809
810if (argc > 1)
811 {
812 infile = fopen(argv[op], "rb");
813 if (infile == NULL)
814 {
815 printf("** Failed to open %s\n", argv[op]);
8ac170f3
PH
816 yield = 1;
817 goto EXIT;
c86f6258
PH
818 }
819 }
820
821if (argc > 2)
822 {
823 outfile = fopen(argv[op+1], "wb");
824 if (outfile == NULL)
825 {
826 printf("** Failed to open %s\n", argv[op+1]);
8ac170f3
PH
827 yield = 1;
828 goto EXIT;
c86f6258
PH
829 }
830 }
831
832/* Set alternative malloc function */
833
834pcre_malloc = new_malloc;
835pcre_free = new_free;
836pcre_stack_malloc = stack_malloc;
837pcre_stack_free = stack_free;
838
aa41d2de 839/* Heading line unless quiet, then prompt for first regex if stdin */
c86f6258 840
aa41d2de 841if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
c86f6258
PH
842
843/* Main loop */
844
845while (!done)
846 {
847 pcre *re = NULL;
848 pcre_extra *extra = NULL;
849
850#if !defined NOPOSIX /* There are still compilers that require no indent */
851 regex_t preg;
852 int do_posix = 0;
853#endif
854
855 const char *error;
856 unsigned char *p, *pp, *ppp;
857 unsigned char *to_file = NULL;
858 const unsigned char *tables = NULL;
859 unsigned long int true_size, true_study_size = 0;
860 size_t size, regex_gotten_store;
861 int do_study = 0;
862 int do_debug = debug;
863 int do_G = 0;
864 int do_g = 0;
865 int do_showinfo = showinfo;
866 int do_showrest = 0;
867 int do_flip = 0;
868 int erroroffset, len, delimiter;
869
870 use_utf8 = 0;
871
872 if (infile == stdin) printf(" re> ");
aa41d2de 873 if (extend_inputline(infile, buffer) == NULL) break;
c86f6258
PH
874 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
875 fflush(outfile);
876
877 p = buffer;
878 while (isspace(*p)) p++;
879 if (*p == 0) continue;
880
881 /* See if the pattern is to be loaded pre-compiled from a file. */
882
883 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
884 {
aa41d2de 885 unsigned long int magic, get_options;
c86f6258
PH
886 uschar sbuf[8];
887 FILE *f;
888
889 p++;
890 pp = p + (int)strlen((char *)p);
891 while (isspace(pp[-1])) pp--;
892 *pp = 0;
893
894 f = fopen((char *)p, "rb");
895 if (f == NULL)
896 {
897 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
898 continue;
899 }
900
901 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
902
903 true_size =
904 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
905 true_study_size =
906 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
907
908 re = (real_pcre *)new_malloc(true_size);
909 regex_gotten_store = gotten_store;
910
911 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
912
913 magic = ((real_pcre *)re)->magic_number;
914 if (magic != MAGIC_NUMBER)
915 {
916 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
917 {
918 do_flip = 1;
919 }
920 else
921 {
922 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
923 fclose(f);
924 continue;
925 }
926 }
927
928 fprintf(outfile, "Compiled regex%s loaded from %s\n",
929 do_flip? " (byte-inverted)" : "", p);
930
931 /* Need to know if UTF-8 for printing data strings */
932
aa41d2de
PH
933 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
934 use_utf8 = (get_options & PCRE_UTF8) != 0;
c86f6258
PH
935
936 /* Now see if there is any following study data */
937
938 if (true_study_size != 0)
939 {
940 pcre_study_data *psd;
941
942 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
943 extra->flags = PCRE_EXTRA_STUDY_DATA;
944
945 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
946 extra->study_data = psd;
947
948 if (fread(psd, 1, true_study_size, f) != true_study_size)
949 {
950 FAIL_READ:
951 fprintf(outfile, "Failed to read data from %s\n", p);
952 if (extra != NULL) new_free(extra);
953 if (re != NULL) new_free(re);
954 fclose(f);
955 continue;
956 }
957 fprintf(outfile, "Study data loaded from %s\n", p);
958 do_study = 1; /* To get the data output if requested */
959 }
960 else fprintf(outfile, "No study data\n");
961
962 fclose(f);
963 goto SHOW_INFO;
964 }
965
966 /* In-line pattern (the usual case). Get the delimiter and seek the end of
967 the pattern; if is isn't complete, read more. */
968
969 delimiter = *p++;
970
971 if (isalnum(delimiter) || delimiter == '\\')
972 {
973 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
974 goto SKIP_DATA;
975 }
976
977 pp = p;
978
979 for(;;)
980 {
981 while (*pp != 0)
982 {
983 if (*pp == '\\' && pp[1] != 0) pp++;
984 else if (*pp == delimiter) break;
985 pp++;
986 }
987 if (*pp != 0) break;
c86f6258 988 if (infile == stdin) printf(" > ");
aa41d2de 989 if ((pp = extend_inputline(infile, pp)) == NULL)
c86f6258
PH
990 {
991 fprintf(outfile, "** Unexpected EOF\n");
992 done = 1;
993 goto CONTINUE;
994 }
995 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
996 }
997
998 /* If the first character after the delimiter is backslash, make
999 the pattern end with backslash. This is purely to provide a way
1000 of testing for the error message when a pattern ends with backslash. */
1001
1002 if (pp[1] == '\\') *pp++ = '\\';
1003
1004 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1005 for callouts. */
1006
1007 *pp++ = 0;
1008 strcpy((char *)pbuffer, (char *)p);
1009
1010 /* Look for options after final delimiter */
1011
1012 options = 0;
1013 study_options = 0;
1014 log_store = showstore; /* default from command line */
1015
1016 while (*pp != 0)
1017 {
1018 switch (*pp++)
1019 {
8ac170f3 1020 case 'f': options |= PCRE_FIRSTLINE; break;
c86f6258
PH
1021 case 'g': do_g = 1; break;
1022 case 'i': options |= PCRE_CASELESS; break;
1023 case 'm': options |= PCRE_MULTILINE; break;
1024 case 's': options |= PCRE_DOTALL; break;
1025 case 'x': options |= PCRE_EXTENDED; break;
1026
1027 case '+': do_showrest = 1; break;
1028 case 'A': options |= PCRE_ANCHORED; break;
1029 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1030 case 'D': do_debug = do_showinfo = 1; break;
1031 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1032 case 'F': do_flip = 1; break;
1033 case 'G': do_G = 1; break;
1034 case 'I': do_showinfo = 1; break;
aa41d2de 1035 case 'J': options |= PCRE_DUPNAMES; break;
c86f6258
PH
1036 case 'M': log_store = 1; break;
1037 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1038
1039#if !defined NOPOSIX
1040 case 'P': do_posix = 1; break;
1041#endif
1042
1043 case 'S': do_study = 1; break;
1044 case 'U': options |= PCRE_UNGREEDY; break;
1045 case 'X': options |= PCRE_EXTRA; break;
1046 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1047 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1048
1049 case 'L':
1050 ppp = pp;
8ac170f3
PH
1051 /* The '\r' test here is so that it works on Windows */
1052 while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
c86f6258
PH
1053 *ppp = 0;
1054 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1055 {
1056 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1057 goto SKIP_DATA;
1058 }
1059 tables = pcre_maketables();
1060 pp = ppp;
1061 break;
1062
1063 case '>':
1064 to_file = pp;
1065 while (*pp != 0) pp++;
1066 while (isspace(pp[-1])) pp--;
1067 *pp = 0;
1068 break;
1069
aa41d2de
PH
1070 case '<':
1071 {
1072 int x = check_newline(pp, outfile);
1073 if (x == 0) goto SKIP_DATA;
1074 options |= x;
1075 while (*pp++ != '>');
1076 }
1077 break;
1078
8ac170f3
PH
1079 case '\r': /* So that it works in Windows */
1080 case '\n':
1081 case ' ':
1082 break;
c86f6258
PH
1083
1084 default:
1085 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1086 goto SKIP_DATA;
1087 }
1088 }
1089
1090 /* Handle compiling via the POSIX interface, which doesn't support the
1091 timing, showing, or debugging options, nor the ability to pass over
1092 local character tables. */
1093
1094#if !defined NOPOSIX
1095 if (posix || do_posix)
1096 {
1097 int rc;
1098 int cflags = 0;
1099
1100 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1101 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
8ac170f3 1102 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
aa41d2de
PH
1103 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1104 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1105
c86f6258
PH
1106 rc = regcomp(&preg, (char *)p, cflags);
1107
1108 /* Compilation failed; go back for another re, skipping to blank line
1109 if non-interactive. */
1110
1111 if (rc != 0)
1112 {
aa41d2de 1113 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
c86f6258
PH
1114 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1115 goto SKIP_DATA;
1116 }
1117 }
1118
1119 /* Handle compiling via the native interface */
1120
1121 else
1122#endif /* !defined NOPOSIX */
1123
1124 {
1125 if (timeit)
1126 {
1127 register int i;
1128 clock_t time_taken;
1129 clock_t start_time = clock();
1130 for (i = 0; i < LOOPREPEAT; i++)
1131 {
1132 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1133 if (re != NULL) free(re);
1134 }
1135 time_taken = clock() - start_time;
1136 fprintf(outfile, "Compile time %.3f milliseconds\n",
1137 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1138 (double)CLOCKS_PER_SEC);
1139 }
1140
1141 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1142
1143 /* Compilation failed; go back for another re, skipping to blank line
1144 if non-interactive. */
1145
1146 if (re == NULL)
1147 {
1148 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1149 SKIP_DATA:
1150 if (infile != stdin)
1151 {
1152 for (;;)
1153 {
aa41d2de 1154 if (extend_inputline(infile, buffer) == NULL)
c86f6258
PH
1155 {
1156 done = 1;
1157 goto CONTINUE;
1158 }
1159 len = (int)strlen((char *)buffer);
1160 while (len > 0 && isspace(buffer[len-1])) len--;
1161 if (len == 0) break;
1162 }
1163 fprintf(outfile, "\n");
1164 }
1165 goto CONTINUE;
1166 }
1167
1168 /* Compilation succeeded; print data if required. There are now two
1169 info-returning functions. The old one has a limited interface and
1170 returns only limited data. Check that it agrees with the newer one. */
1171
1172 if (log_store)
1173 fprintf(outfile, "Memory allocation (code space): %d\n",
1174 (int)(gotten_store -
1175 sizeof(real_pcre) -
1176 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1177
1178 /* Extract the size for possible writing before possibly flipping it,
1179 and remember the store that was got. */
1180
1181 true_size = ((real_pcre *)re)->size;
1182 regex_gotten_store = gotten_store;
1183
1184 /* If /S was present, study the regexp to generate additional info to
1185 help with the matching. */
1186
1187 if (do_study)
1188 {
1189 if (timeit)
1190 {
1191 register int i;
1192 clock_t time_taken;
1193 clock_t start_time = clock();
1194 for (i = 0; i < LOOPREPEAT; i++)
1195 extra = pcre_study(re, study_options, &error);
1196 time_taken = clock() - start_time;
1197 if (extra != NULL) free(extra);
1198 fprintf(outfile, " Study time %.3f milliseconds\n",
1199 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1200 (double)CLOCKS_PER_SEC);
1201 }
1202 extra = pcre_study(re, study_options, &error);
1203 if (error != NULL)
1204 fprintf(outfile, "Failed to study: %s\n", error);
1205 else if (extra != NULL)
1206 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1207 }
1208
1209 /* If the 'F' option was present, we flip the bytes of all the integer
1210 fields in the regex data block and the study block. This is to make it
1211 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1212 compiled on a different architecture. */
1213
1214 if (do_flip)
1215 {
1216 real_pcre *rre = (real_pcre *)re;
1217 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1218 rre->size = byteflip(rre->size, sizeof(rre->size));
1219 rre->options = byteflip(rre->options, sizeof(rre->options));
1220 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1221 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1222 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1223 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1224 rre->name_table_offset = byteflip(rre->name_table_offset,
1225 sizeof(rre->name_table_offset));
1226 rre->name_entry_size = byteflip(rre->name_entry_size,
1227 sizeof(rre->name_entry_size));
1228 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1229
1230 if (extra != NULL)
1231 {
1232 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1233 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1234 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1235 }
1236 }
1237
1238 /* Extract information from the compiled data if required */
1239
1240 SHOW_INFO:
1241
1242 if (do_showinfo)
1243 {
1244 unsigned long int get_options, all_options;
8ac170f3 1245#if !defined NOINFOCHECK
c86f6258 1246 int old_first_char, old_options, old_count;
8ac170f3 1247#endif
c86f6258
PH
1248 int count, backrefmax, first_char, need_char;
1249 int nameentrysize, namecount;
1250 const uschar *nametable;
1251
1252 if (do_debug)
1253 {
1254 fprintf(outfile, "------------------------------------------------------------------\n");
aa41d2de 1255 pcre_printint(re, outfile);
c86f6258
PH
1256 }
1257
1258 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1259 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1260 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1261 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1262 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1263 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1264 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1265 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1266 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1267
8ac170f3 1268#if !defined NOINFOCHECK
c86f6258
PH
1269 old_count = pcre_info(re, &old_options, &old_first_char);
1270 if (count < 0) fprintf(outfile,
1271 "Error %d from pcre_info()\n", count);
1272 else
1273 {
1274 if (old_count != count) fprintf(outfile,
1275 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1276 old_count);
1277
1278 if (old_first_char != first_char) fprintf(outfile,
1279 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1280 first_char, old_first_char);
1281
1282 if (old_options != (int)get_options) fprintf(outfile,
1283 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1284 get_options, old_options);
1285 }
8ac170f3 1286#endif
c86f6258
PH
1287
1288 if (size != regex_gotten_store) fprintf(outfile,
1289 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1290 (int)size, (int)regex_gotten_store);
1291
1292 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1293 if (backrefmax > 0)
1294 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1295
1296 if (namecount > 0)
1297 {
1298 fprintf(outfile, "Named capturing subpatterns:\n");
1299 while (namecount-- > 0)
1300 {
1301 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1302 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1303 GET2(nametable, 0));
1304 nametable += nameentrysize;
1305 }
1306 }
1307
1308 /* The NOPARTIAL bit is a private bit in the options, so we have
1309 to fish it out via out back door */
1310
1311 all_options = ((real_pcre *)re)->options;
1312 if (do_flip)
1313 {
1314 all_options = byteflip(all_options, sizeof(all_options));
aa41d2de 1315 }
c86f6258
PH
1316
1317 if ((all_options & PCRE_NOPARTIAL) != 0)
1318 fprintf(outfile, "Partial matching not supported\n");
1319
1320 if (get_options == 0) fprintf(outfile, "No options\n");
aa41d2de 1321 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
c86f6258
PH
1322 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1323 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1324 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1325 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
8ac170f3 1326 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
c86f6258
PH
1327 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1328 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1329 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1330 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
aa41d2de 1331 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
c86f6258 1332 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
aa41d2de
PH
1333 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1334 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1335
1336 switch (get_options & PCRE_NEWLINE_CRLF)
1337 {
1338 case PCRE_NEWLINE_CR:
1339 fprintf(outfile, "Forced newline sequence: CR\n");
1340 break;
1341
1342 case PCRE_NEWLINE_LF:
1343 fprintf(outfile, "Forced newline sequence: LF\n");
1344 break;
c86f6258 1345
aa41d2de
PH
1346 case PCRE_NEWLINE_CRLF:
1347 fprintf(outfile, "Forced newline sequence: CRLF\n");
1348 break;
1349
1350 default:
1351 break;
1352 }
c86f6258
PH
1353
1354 if (first_char == -1)
1355 {
aa41d2de 1356 fprintf(outfile, "First char at start or follows newline\n");
c86f6258
PH
1357 }
1358 else if (first_char < 0)
1359 {
1360 fprintf(outfile, "No first char\n");
1361 }
1362 else
1363 {
1364 int ch = first_char & 255;
1365 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1366 "" : " (caseless)";
1367 if (isprint(ch))
1368 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1369 else
1370 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1371 }
1372
1373 if (need_char < 0)
1374 {
1375 fprintf(outfile, "No need char\n");
1376 }
1377 else
1378 {
1379 int ch = need_char & 255;
1380 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1381 "" : " (caseless)";
1382 if (isprint(ch))
1383 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1384 else
1385 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1386 }
1387
1388 /* Don't output study size; at present it is in any case a fixed
1389 value, but it varies, depending on the computer architecture, and
1390 so messes up the test suite. (And with the /F option, it might be
1391 flipped.) */
1392
1393 if (do_study)
1394 {
1395 if (extra == NULL)
1396 fprintf(outfile, "Study returned NULL\n");
1397 else
1398 {
1399 uschar *start_bits = NULL;
1400 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1401
1402 if (start_bits == NULL)
1403 fprintf(outfile, "No starting byte set\n");
1404 else
1405 {
1406 int i;
1407 int c = 24;
1408 fprintf(outfile, "Starting byte set: ");
1409 for (i = 0; i < 256; i++)
1410 {
1411 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1412 {
1413 if (c > 75)
1414 {
1415 fprintf(outfile, "\n ");
1416 c = 2;
1417 }
1418 if (isprint(i) && i != ' ')
1419 {
1420 fprintf(outfile, "%c ", i);
1421 c += 2;
1422 }
1423 else
1424 {
1425 fprintf(outfile, "\\x%02x ", i);
1426 c += 5;
1427 }
1428 }
1429 }
1430 fprintf(outfile, "\n");
1431 }
1432 }
1433 }
1434 }
1435
1436 /* If the '>' option was present, we write out the regex to a file, and
1437 that is all. The first 8 bytes of the file are the regex length and then
1438 the study length, in big-endian order. */
1439
1440 if (to_file != NULL)
1441 {
1442 FILE *f = fopen((char *)to_file, "wb");
1443 if (f == NULL)
1444 {
1445 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1446 }
1447 else
1448 {
1449 uschar sbuf[8];
1450 sbuf[0] = (true_size >> 24) & 255;
1451 sbuf[1] = (true_size >> 16) & 255;
1452 sbuf[2] = (true_size >> 8) & 255;
1453 sbuf[3] = (true_size) & 255;
1454
1455 sbuf[4] = (true_study_size >> 24) & 255;
1456 sbuf[5] = (true_study_size >> 16) & 255;
1457 sbuf[6] = (true_study_size >> 8) & 255;
1458 sbuf[7] = (true_study_size) & 255;
1459
1460 if (fwrite(sbuf, 1, 8, f) < 8 ||
1461 fwrite(re, 1, true_size, f) < true_size)
1462 {
1463 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1464 }
1465 else
1466 {
1467 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1468 if (extra != NULL)
1469 {
1470 if (fwrite(extra->study_data, 1, true_study_size, f) <
1471 true_study_size)
1472 {
1473 fprintf(outfile, "Write error on %s: %s\n", to_file,
1474 strerror(errno));
1475 }
1476 else fprintf(outfile, "Study data written to %s\n", to_file);
1477 }
1478 }
1479 fclose(f);
1480 }
8ac170f3
PH
1481
1482 new_free(re);
1483 if (extra != NULL) new_free(extra);
1484 if (tables != NULL) new_free((void *)tables);
c86f6258
PH
1485 continue; /* With next regex */
1486 }
1487 } /* End of non-POSIX compile */
1488
1489 /* Read data lines and test them */
1490
1491 for (;;)
1492 {
aa41d2de
PH
1493 uschar *q;
1494 uschar *bptr = dbuffer;
c86f6258
PH
1495 int *use_offsets = offsets;
1496 int use_size_offsets = size_offsets;
1497 int callout_data = 0;
1498 int callout_data_set = 0;
1499 int count, c;
1500 int copystrings = 0;
1501 int find_match_limit = 0;
1502 int getstrings = 0;
1503 int getlist = 0;
1504 int gmatched = 0;
1505 int start_offset = 0;
1506 int g_notempty = 0;
8ac170f3 1507 int use_dfa = 0;
c86f6258
PH
1508
1509 options = 0;
1510
aa41d2de
PH
1511 *copynames = 0;
1512 *getnames = 0;
1513
1514 copynamesptr = copynames;
1515 getnamesptr = getnames;
1516
c86f6258
PH
1517 pcre_callout = callout;
1518 first_callout = 1;
1519 callout_extra = 0;
1520 callout_count = 0;
1521 callout_fail_count = 999999;
1522 callout_fail_id = -1;
1523 show_malloc = 0;
1524
aa41d2de
PH
1525 if (extra != NULL) extra->flags &=
1526 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1527
1528 len = 0;
1529 for (;;)
c86f6258 1530 {
aa41d2de
PH
1531 if (infile == stdin) printf("data> ");
1532 if (extend_inputline(infile, buffer + len) == NULL)
1533 {
1534 if (len > 0) break;
1535 done = 1;
1536 goto CONTINUE;
1537 }
1538 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1539 len = (int)strlen((char *)buffer);
1540 if (buffer[len-1] == '\n') break;
c86f6258 1541 }
c86f6258 1542
c86f6258
PH
1543 while (len > 0 && isspace(buffer[len-1])) len--;
1544 buffer[len] = 0;
1545 if (len == 0) break;
1546
1547 p = buffer;
1548 while (isspace(*p)) p++;
1549
1550 q = dbuffer;
1551 while ((c = *p++) != 0)
1552 {
1553 int i = 0;
1554 int n = 0;
1555
1556 if (c == '\\') switch ((c = *p++))
1557 {
1558 case 'a': c = 7; break;
1559 case 'b': c = '\b'; break;
1560 case 'e': c = 27; break;
1561 case 'f': c = '\f'; break;
1562 case 'n': c = '\n'; break;
1563 case 'r': c = '\r'; break;
1564 case 't': c = '\t'; break;
1565 case 'v': c = '\v'; break;
1566
1567 case '0': case '1': case '2': case '3':
1568 case '4': case '5': case '6': case '7':
1569 c -= '0';
1570 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1571 c = c * 8 + *p++ - '0';
aa41d2de
PH
1572
1573#if !defined NOUTF8
1574 if (use_utf8 && c > 255)
1575 {
1576 unsigned char buff8[8];
1577 int ii, utn;
1578 utn = ord2utf8(c, buff8);
1579 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1580 c = buff8[ii]; /* Last byte */
1581 }
1582#endif
c86f6258
PH
1583 break;
1584
1585 case 'x':
1586
1587 /* Handle \x{..} specially - new Perl thing for utf8 */
1588
8ac170f3 1589#if !defined NOUTF8
c86f6258
PH
1590 if (*p == '{')
1591 {
1592 unsigned char *pt = p;
1593 c = 0;
1594 while (isxdigit(*(++pt)))
1595 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1596 if (*pt == '}')
1597 {
1598 unsigned char buff8[8];
1599 int ii, utn;
aa41d2de 1600 utn = ord2utf8(c, buff8);
c86f6258
PH
1601 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1602 c = buff8[ii]; /* Last byte */
1603 p = pt + 1;
1604 break;
1605 }
1606 /* Not correct form; fall through */
1607 }
8ac170f3 1608#endif
c86f6258
PH
1609
1610 /* Ordinary \x */
1611
1612 c = 0;
1613 while (i++ < 2 && isxdigit(*p))
1614 {
1615 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1616 p++;
1617 }
1618 break;
1619
1620 case 0: /* \ followed by EOF allows for an empty line */
1621 p--;
1622 continue;
1623
1624 case '>':
1625 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1626 continue;
1627
1628 case 'A': /* Option setting */
1629 options |= PCRE_ANCHORED;
1630 continue;
1631
1632 case 'B':
1633 options |= PCRE_NOTBOL;
1634 continue;
1635
1636 case 'C':
1637 if (isdigit(*p)) /* Set copy string */
1638 {
1639 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1640 copystrings |= 1 << n;
1641 }
1642 else if (isalnum(*p))
1643 {
aa41d2de 1644 uschar *npp = copynamesptr;
c86f6258 1645 while (isalnum(*p)) *npp++ = *p++;
aa41d2de 1646 *npp++ = 0;
c86f6258 1647 *npp = 0;
aa41d2de 1648 n = pcre_get_stringnumber(re, (char *)copynamesptr);
c86f6258 1649 if (n < 0)
aa41d2de
PH
1650 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1651 copynamesptr = npp;
c86f6258
PH
1652 }
1653 else if (*p == '+')
1654 {
1655 callout_extra = 1;
1656 p++;
1657 }
1658 else if (*p == '-')
1659 {
1660 pcre_callout = NULL;
1661 p++;
1662 }
1663 else if (*p == '!')
1664 {
1665 callout_fail_id = 0;
1666 p++;
1667 while(isdigit(*p))
1668 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1669 callout_fail_count = 0;
1670 if (*p == '!')
1671 {
1672 p++;
1673 while(isdigit(*p))
1674 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1675 }
1676 }
1677 else if (*p == '*')
1678 {
1679 int sign = 1;
1680 callout_data = 0;
1681 if (*(++p) == '-') { sign = -1; p++; }
1682 while(isdigit(*p))
1683 callout_data = callout_data * 10 + *p++ - '0';
1684 callout_data *= sign;
1685 callout_data_set = 1;
1686 }
1687 continue;
1688
8ac170f3
PH
1689#if !defined NODFA
1690 case 'D':
1691#if !defined NOPOSIX
1692 if (posix || do_posix)
1693 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1694 else
1695#endif
1696 use_dfa = 1;
1697 continue;
1698
1699 case 'F':
1700 options |= PCRE_DFA_SHORTEST;
1701 continue;
1702#endif
1703
c86f6258
PH
1704 case 'G':
1705 if (isdigit(*p))
1706 {
1707 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1708 getstrings |= 1 << n;
1709 }
1710 else if (isalnum(*p))
1711 {
aa41d2de 1712 uschar *npp = getnamesptr;
c86f6258 1713 while (isalnum(*p)) *npp++ = *p++;
aa41d2de 1714 *npp++ = 0;
c86f6258 1715 *npp = 0;
aa41d2de 1716 n = pcre_get_stringnumber(re, (char *)getnamesptr);
c86f6258 1717 if (n < 0)
aa41d2de
PH
1718 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1719 getnamesptr = npp;
c86f6258
PH
1720 }
1721 continue;
1722
1723 case 'L':
1724 getlist = 1;
1725 continue;
1726
1727 case 'M':
1728 find_match_limit = 1;
1729 continue;
1730
1731 case 'N':
1732 options |= PCRE_NOTEMPTY;
1733 continue;
1734
1735 case 'O':
1736 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1737 if (n > size_offsets_max)
1738 {
1739 size_offsets_max = n;
1740 free(offsets);
1741 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1742 if (offsets == NULL)
1743 {
1744 printf("** Failed to get %d bytes of memory for offsets vector\n",
1745 size_offsets_max * sizeof(int));
8ac170f3
PH
1746 yield = 1;
1747 goto EXIT;
c86f6258
PH
1748 }
1749 }
1750 use_size_offsets = n;
1751 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1752 continue;
1753
1754 case 'P':
1755 options |= PCRE_PARTIAL;
1756 continue;
1757
aa41d2de
PH
1758 case 'Q':
1759 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1760 if (extra == NULL)
1761 {
1762 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1763 extra->flags = 0;
1764 }
1765 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1766 extra->match_limit_recursion = n;
1767 continue;
1768
1769 case 'q':
1770 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1771 if (extra == NULL)
1772 {
1773 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1774 extra->flags = 0;
1775 }
1776 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1777 extra->match_limit = n;
1778 continue;
1779
8ac170f3
PH
1780#if !defined NODFA
1781 case 'R':
1782 options |= PCRE_DFA_RESTART;
1783 continue;
1784#endif
1785
c86f6258
PH
1786 case 'S':
1787 show_malloc = 1;
1788 continue;
1789
1790 case 'Z':
1791 options |= PCRE_NOTEOL;
1792 continue;
1793
1794 case '?':
1795 options |= PCRE_NO_UTF8_CHECK;
1796 continue;
aa41d2de
PH
1797
1798 case '<':
1799 {
1800 int x = check_newline(p, outfile);
1801 if (x == 0) goto NEXT_DATA;
1802 options |= x;
1803 while (*p++ != '>');
1804 }
1805 continue;
c86f6258
PH
1806 }
1807 *q++ = c;
1808 }
1809 *q = 0;
1810 len = q - dbuffer;
1811
8ac170f3
PH
1812 if ((all_use_dfa || use_dfa) && find_match_limit)
1813 {
1814 printf("**Match limit not relevant for DFA matching: ignored\n");
1815 find_match_limit = 0;
1816 }
1817
c86f6258
PH
1818 /* Handle matching via the POSIX interface, which does not
1819 support timing or playing with the match limit or callout data. */
1820
1821#if !defined NOPOSIX
1822 if (posix || do_posix)
1823 {
1824 int rc;
1825 int eflags = 0;
1826 regmatch_t *pmatch = NULL;
1827 if (use_size_offsets > 0)
1828 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1829 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1830 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1831
1832 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1833
1834 if (rc != 0)
1835 {
aa41d2de 1836 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
c86f6258
PH
1837 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1838 }
aa41d2de
PH
1839 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1840 != 0)
1841 {
1842 fprintf(outfile, "Matched with REG_NOSUB\n");
1843 }
c86f6258
PH
1844 else
1845 {
1846 size_t i;
1847 for (i = 0; i < (size_t)use_size_offsets; i++)
1848 {
1849 if (pmatch[i].rm_so >= 0)
1850 {
1851 fprintf(outfile, "%2d: ", (int)i);
1852 (void)pchars(dbuffer + pmatch[i].rm_so,
1853 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1854 fprintf(outfile, "\n");
1855 if (i == 0 && do_showrest)
1856 {
1857 fprintf(outfile, " 0+ ");
1858 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1859 outfile);
1860 fprintf(outfile, "\n");
1861 }
1862 }
1863 }
1864 }
1865 free(pmatch);
1866 }
1867
1868 /* Handle matching via the native interface - repeats for /g and /G */
1869
1870 else
1871#endif /* !defined NOPOSIX */
1872
1873 for (;; gmatched++) /* Loop for /g or /G */
1874 {
1875 if (timeit)
1876 {
1877 register int i;
1878 clock_t time_taken;
1879 clock_t start_time = clock();
8ac170f3
PH
1880
1881#if !defined NODFA
1882 if (all_use_dfa || use_dfa)
1883 {
1884 int workspace[1000];
1885 for (i = 0; i < LOOPREPEAT; i++)
1886 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1887 options | g_notempty, use_offsets, use_size_offsets, workspace,
1888 sizeof(workspace)/sizeof(int));
1889 }
1890 else
1891#endif
1892
c86f6258
PH
1893 for (i = 0; i < LOOPREPEAT; i++)
1894 count = pcre_exec(re, extra, (char *)bptr, len,
1895 start_offset, options | g_notempty, use_offsets, use_size_offsets);
8ac170f3 1896
c86f6258
PH
1897 time_taken = clock() - start_time;
1898 fprintf(outfile, "Execute time %.3f milliseconds\n",
1899 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1900 (double)CLOCKS_PER_SEC);
1901 }
1902
1903 /* If find_match_limit is set, we want to do repeated matches with
aa41d2de
PH
1904 varying limits in order to find the minimum value for the match limit and
1905 for the recursion limit. */
c86f6258
PH
1906
1907 if (find_match_limit)
1908 {
c86f6258
PH
1909 if (extra == NULL)
1910 {
1911 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1912 extra->flags = 0;
1913 }
c86f6258 1914
aa41d2de
PH
1915 (void)check_match_limit(re, extra, bptr, len, start_offset,
1916 options|g_notempty, use_offsets, use_size_offsets,
1917 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
1918 PCRE_ERROR_MATCHLIMIT, "match()");
c86f6258 1919
aa41d2de
PH
1920 count = check_match_limit(re, extra, bptr, len, start_offset,
1921 options|g_notempty, use_offsets, use_size_offsets,
1922 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
1923 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
c86f6258
PH
1924 }
1925
1926 /* If callout_data is set, use the interface with additional data */
1927
1928 else if (callout_data_set)
1929 {
1930 if (extra == NULL)
1931 {
1932 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1933 extra->flags = 0;
1934 }
1935 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1936 extra->callout_data = &callout_data;
1937 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1938 options | g_notempty, use_offsets, use_size_offsets);
1939 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1940 }
1941
1942 /* The normal case is just to do the match once, with the default
1943 value of match_limit. */
1944
8ac170f3
PH
1945#if !defined NODFA
1946 else if (all_use_dfa || use_dfa)
c86f6258 1947 {
8ac170f3
PH
1948 int workspace[1000];
1949 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1950 options | g_notempty, use_offsets, use_size_offsets, workspace,
1951 sizeof(workspace)/sizeof(int));
1952 if (count == 0)
1953 {
1954 fprintf(outfile, "Matched, but too many subsidiary matches\n");
1955 count = use_size_offsets/2;
1956 }
c86f6258 1957 }
8ac170f3 1958#endif
c86f6258 1959
8ac170f3 1960 else
c86f6258 1961 {
8ac170f3
PH
1962 count = pcre_exec(re, extra, (char *)bptr, len,
1963 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1964 if (count == 0)
1965 {
1966 fprintf(outfile, "Matched, but too many substrings\n");
1967 count = use_size_offsets/3;
1968 }
c86f6258
PH
1969 }
1970
1971 /* Matched */
1972
1973 if (count >= 0)
1974 {
1975 int i;
1976 for (i = 0; i < count * 2; i += 2)
1977 {
1978 if (use_offsets[i] < 0)
1979 fprintf(outfile, "%2d: <unset>\n", i/2);
1980 else
1981 {
1982 fprintf(outfile, "%2d: ", i/2);
1983 (void)pchars(bptr + use_offsets[i],
1984 use_offsets[i+1] - use_offsets[i], outfile);
1985 fprintf(outfile, "\n");
1986 if (i == 0)
1987 {
1988 if (do_showrest)
1989 {
1990 fprintf(outfile, " 0+ ");
1991 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1992 outfile);
1993 fprintf(outfile, "\n");
1994 }
1995 }
1996 }
1997 }
1998
1999 for (i = 0; i < 32; i++)
2000 {
2001 if ((copystrings & (1 << i)) != 0)
2002 {
aa41d2de 2003 char copybuffer[256];
c86f6258
PH
2004 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2005 i, copybuffer, sizeof(copybuffer));
2006 if (rc < 0)
2007 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2008 else
2009 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2010 }
2011 }
2012
aa41d2de
PH
2013 for (copynamesptr = copynames;
2014 *copynamesptr != 0;
2015 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2016 {
2017 char copybuffer[256];
2018 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2019 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2020 if (rc < 0)
2021 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2022 else
2023 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2024 }
2025
c86f6258
PH
2026 for (i = 0; i < 32; i++)
2027 {
2028 if ((getstrings & (1 << i)) != 0)
2029 {
2030 const char *substring;
2031 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2032 i, &substring);
2033 if (rc < 0)
2034 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2035 else
2036 {
2037 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
c86f6258
PH
2038 pcre_free_substring(substring);
2039 }
2040 }
2041 }
2042
aa41d2de
PH
2043 for (getnamesptr = getnames;
2044 *getnamesptr != 0;
2045 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2046 {
2047 const char *substring;
2048 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2049 count, (char *)getnamesptr, &substring);
2050 if (rc < 0)
2051 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2052 else
2053 {
2054 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2055 pcre_free_substring(substring);
2056 }
2057 }
2058
c86f6258
PH
2059 if (getlist)
2060 {
2061 const char **stringlist;
2062 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2063 &stringlist);
2064 if (rc < 0)
2065 fprintf(outfile, "get substring list failed %d\n", rc);
2066 else
2067 {
2068 for (i = 0; i < count; i++)
2069 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2070 if (stringlist[i] != NULL)
2071 fprintf(outfile, "string list not terminated by NULL\n");
2072 /* free((void *)stringlist); */
2073 pcre_free_substring_list(stringlist);
2074 }
2075 }
2076 }
2077
2078 /* There was a partial match */
2079
2080 else if (count == PCRE_ERROR_PARTIAL)
2081 {
8ac170f3
PH
2082 fprintf(outfile, "Partial match");
2083#if !defined NODFA
2084 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2085 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2086 bptr + use_offsets[0]);
2087#endif
2088 fprintf(outfile, "\n");
c86f6258
PH
2089 break; /* Out of the /g loop */
2090 }
2091
2092 /* Failed to match. If this is a /g or /G loop and we previously set
2093 g_notempty after a null match, this is not necessarily the end.
2094 We want to advance the start offset, and continue. In the case of UTF-8
2095 matching, the advance must be one character, not one byte. Fudge the
2096 offset values to achieve this. We won't be at the end of the string -
2097 that was checked before setting g_notempty. */
2098
2099 else
2100 {
2101 if (g_notempty != 0)
2102 {
2103 int onechar = 1;
2104 use_offsets[0] = start_offset;
2105 if (use_utf8)
2106 {
2107 while (start_offset + onechar < len)
2108 {
2109 int tb = bptr[start_offset+onechar];
2110 if (tb <= 127) break;
2111 tb &= 0xc0;
2112 if (tb != 0 && tb != 0xc0) onechar++;
2113 }
2114 }
2115 use_offsets[1] = start_offset + onechar;
2116 }
2117 else
2118 {
2119 if (count == PCRE_ERROR_NOMATCH)
2120 {
2121 if (gmatched == 0) fprintf(outfile, "No match\n");
2122 }
2123 else fprintf(outfile, "Error %d\n", count);
2124 break; /* Out of the /g loop */
2125 }
2126 }
2127
2128 /* If not /g or /G we are done */
2129
2130 if (!do_g && !do_G) break;
2131
2132 /* If we have matched an empty string, first check to see if we are at
2133 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2134 what Perl's /g options does. This turns out to be rather cunning. First
2135 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2136 same point. If this fails (picked up above) we advance to the next
2137 character. */
2138
2139 g_notempty = 0;
2140 if (use_offsets[0] == use_offsets[1])
2141 {
2142 if (use_offsets[0] == len) break;
2143 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2144 }
2145
2146 /* For /g, update the start offset, leaving the rest alone */
2147
2148 if (do_g) start_offset = use_offsets[1];
2149
2150 /* For /G, update the pointer and length */
2151
2152 else
2153 {
2154 bptr += use_offsets[1];
2155 len -= use_offsets[1];
2156 }
2157 } /* End of loop for /g and /G */
aa41d2de
PH
2158
2159 NEXT_DATA: continue;
c86f6258
PH
2160 } /* End of loop for data lines */
2161
2162 CONTINUE:
2163
2164#if !defined NOPOSIX
2165 if (posix || do_posix) regfree(&preg);
2166#endif
2167
8ac170f3
PH
2168 if (re != NULL) new_free(re);
2169 if (extra != NULL) new_free(extra);
c86f6258
PH
2170 if (tables != NULL)
2171 {
8ac170f3 2172 new_free((void *)tables);
c86f6258
PH
2173 setlocale(LC_CTYPE, "C");
2174 }
2175 }
2176
2177if (infile == stdin) fprintf(outfile, "\n");
8ac170f3
PH
2178
2179EXIT:
2180
2181if (infile != NULL && infile != stdin) fclose(infile);
2182if (outfile != NULL && outfile != stdout) fclose(outfile);
2183
2184free(buffer);
2185free(dbuffer);
2186free(pbuffer);
2187free(offsets);
2188
2189return yield;
c86f6258
PH
2190}
2191
8ac170f3 2192/* End of pcretest.c */