Make -R and -S do something sensible when combined with -q<time>.
[exim.git] / src / src / pcre / pcre_printint.src
1 /* $Cambridge: exim/src/src/pcre/pcre_printint.src,v 1.1 2006/11/07 16:50:36 ph10 Exp $ */
2
3 /*************************************************
4 * Perl-Compatible Regular Expressions *
5 *************************************************/
6
7 /* PCRE is a library of functions to support regular expressions whose syntax
8 and semantics are as close as possible to those of the Perl 5 language.
9
10 Written by Philip Hazel
11 Copyright (c) 1997-2005 University of Cambridge
12
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
19
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
23
24 * Neither the name of the University of Cambridge nor the names of its
25 contributors may be used to endorse or promote products derived from
26 this software without specific prior written permission.
27
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
40 */
41
42
43 /* This module contains a PCRE private debugging function for printing out the
44 internal form of a compiled regular expression, along with some supporting
45 local functions. This source file is used in two places:
46
47 (1) It is #included by pcre_compile.c when it is compiled in debugging mode
48 (DEBUG defined in pcre_internal.h). It is not included in production compiles.
49
50 (2) It is always #included by pcretest.c, which can be asked to print out a
51 compiled regex for debugging purposes. */
52
53
54 static const char *OP_names[] = { OP_NAME_LIST };
55
56
57 /*************************************************
58 * Print single- or multi-byte character *
59 *************************************************/
60
61 static int
62 print_char(FILE *f, uschar *ptr, BOOL utf8)
63 {
64 int c = *ptr;
65
66 if (!utf8 || (c & 0xc0) != 0xc0)
67 {
68 if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
69 return 0;
70 }
71 else
72 {
73 int i;
74 int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
75 int s = 6*a;
76 c = (c & _pcre_utf8_table3[a]) << s;
77 for (i = 1; i <= a; i++)
78 {
79 /* This is a check for malformed UTF-8; it should only occur if the sanity
80 check has been turned off. Rather than swallow random bytes, just stop if
81 we hit a bad one. Print it with \X instead of \x as an indication. */
82
83 if ((ptr[i] & 0xc0) != 0x80)
84 {
85 fprintf(f, "\\X{%x}", c);
86 return i - 1;
87 }
88
89 /* The byte is OK */
90
91 s -= 6;
92 c |= (ptr[i] & 0x3f) << s;
93 }
94 if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
95 return a;
96 }
97 }
98
99
100
101 /*************************************************
102 * Find Unicode property name *
103 *************************************************/
104
105 static const char *
106 get_ucpname(int ptype, int pvalue)
107 {
108 #ifdef SUPPORT_UCP
109 int i;
110 for (i = _pcre_utt_size; i >= 0; i--)
111 {
112 if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
113 }
114 return (i >= 0)? _pcre_utt[i].name : "??";
115 #else
116 /* It gets harder and harder to shut off unwanted compiler warnings. */
117 ptype = ptype * pvalue;
118 return (ptype == pvalue)? "??" : "??";
119 #endif
120 }
121
122
123
124 /*************************************************
125 * Print compiled regex *
126 *************************************************/
127
128 /* Make this function work for a regex with integers either byte order.
129 However, we assume that what we are passed is a compiled regex. */
130
131 static void
132 pcre_printint(pcre *external_re, FILE *f)
133 {
134 real_pcre *re = (real_pcre *)external_re;
135 uschar *codestart, *code;
136 BOOL utf8;
137
138 unsigned int options = re->options;
139 int offset = re->name_table_offset;
140 int count = re->name_count;
141 int size = re->name_entry_size;
142
143 if (re->magic_number != MAGIC_NUMBER)
144 {
145 offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
146 count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
147 size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
148 options = ((options << 24) & 0xff000000) |
149 ((options << 8) & 0x00ff0000) |
150 ((options >> 8) & 0x0000ff00) |
151 ((options >> 24) & 0x000000ff);
152 }
153
154 code = codestart = (uschar *)re + offset + count * size;
155 utf8 = (options & PCRE_UTF8) != 0;
156
157 for(;;)
158 {
159 uschar *ccode;
160 int c;
161 int extra = 0;
162
163 fprintf(f, "%3d ", (int)(code - codestart));
164
165 if (*code >= OP_BRA)
166 {
167 if (*code - OP_BRA > EXTRACT_BASIC_MAX)
168 fprintf(f, "%3d Bra extra\n", GET(code, 1));
169 else
170 fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
171 code += _pcre_OP_lengths[OP_BRA];
172 continue;
173 }
174
175 switch(*code)
176 {
177 case OP_END:
178 fprintf(f, " %s\n", OP_names[*code]);
179 fprintf(f, "------------------------------------------------------------------\n");
180 return;
181
182 case OP_OPT:
183 fprintf(f, " %.2x %s", code[1], OP_names[*code]);
184 break;
185
186 case OP_CHAR:
187 fprintf(f, " ");
188 do
189 {
190 code++;
191 code += 1 + print_char(f, code, utf8);
192 }
193 while (*code == OP_CHAR);
194 fprintf(f, "\n");
195 continue;
196
197 case OP_CHARNC:
198 fprintf(f, " NC ");
199 do
200 {
201 code++;
202 code += 1 + print_char(f, code, utf8);
203 }
204 while (*code == OP_CHARNC);
205 fprintf(f, "\n");
206 continue;
207
208 case OP_KETRMAX:
209 case OP_KETRMIN:
210 case OP_ALT:
211 case OP_KET:
212 case OP_ASSERT:
213 case OP_ASSERT_NOT:
214 case OP_ASSERTBACK:
215 case OP_ASSERTBACK_NOT:
216 case OP_ONCE:
217 case OP_COND:
218 case OP_REVERSE:
219 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
220 break;
221
222 case OP_BRANUMBER:
223 printf("%3d %s", GET2(code, 1), OP_names[*code]);
224 break;
225
226 case OP_CREF:
227 if (GET2(code, 1) == CREF_RECURSE)
228 fprintf(f, " Cond recurse");
229 else
230 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
231 break;
232
233 case OP_STAR:
234 case OP_MINSTAR:
235 case OP_PLUS:
236 case OP_MINPLUS:
237 case OP_QUERY:
238 case OP_MINQUERY:
239 case OP_TYPESTAR:
240 case OP_TYPEMINSTAR:
241 case OP_TYPEPLUS:
242 case OP_TYPEMINPLUS:
243 case OP_TYPEQUERY:
244 case OP_TYPEMINQUERY:
245 fprintf(f, " ");
246 if (*code >= OP_TYPESTAR)
247 {
248 fprintf(f, "%s", OP_names[code[1]]);
249 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
250 {
251 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
252 extra = 2;
253 }
254 }
255 else extra = print_char(f, code+1, utf8);
256 fprintf(f, "%s", OP_names[*code]);
257 break;
258
259 case OP_EXACT:
260 case OP_UPTO:
261 case OP_MINUPTO:
262 fprintf(f, " ");
263 extra = print_char(f, code+3, utf8);
264 fprintf(f, "{");
265 if (*code != OP_EXACT) fprintf(f, ",");
266 fprintf(f, "%d}", GET2(code,1));
267 if (*code == OP_MINUPTO) fprintf(f, "?");
268 break;
269
270 case OP_TYPEEXACT:
271 case OP_TYPEUPTO:
272 case OP_TYPEMINUPTO:
273 fprintf(f, " %s", OP_names[code[3]]);
274 if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
275 {
276 fprintf(f, " %s ", get_ucpname(code[4], code[5]));
277 extra = 2;
278 }
279 fprintf(f, "{");
280 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
281 fprintf(f, "%d}", GET2(code,1));
282 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
283 break;
284
285 case OP_NOT:
286 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
287 else fprintf(f, " [^\\x%02x]", c);
288 break;
289
290 case OP_NOTSTAR:
291 case OP_NOTMINSTAR:
292 case OP_NOTPLUS:
293 case OP_NOTMINPLUS:
294 case OP_NOTQUERY:
295 case OP_NOTMINQUERY:
296 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
297 else fprintf(f, " [^\\x%02x]", c);
298 fprintf(f, "%s", OP_names[*code]);
299 break;
300
301 case OP_NOTEXACT:
302 case OP_NOTUPTO:
303 case OP_NOTMINUPTO:
304 if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
305 else fprintf(f, " [^\\x%02x]{", c);
306 if (*code != OP_NOTEXACT) fprintf(f, "0,");
307 fprintf(f, "%d}", GET2(code,1));
308 if (*code == OP_NOTMINUPTO) fprintf(f, "?");
309 break;
310
311 case OP_RECURSE:
312 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
313 break;
314
315 case OP_REF:
316 fprintf(f, " \\%d", GET2(code,1));
317 ccode = code + _pcre_OP_lengths[*code];
318 goto CLASS_REF_REPEAT;
319
320 case OP_CALLOUT:
321 fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
322 GET(code, 2 + LINK_SIZE));
323 break;
324
325 case OP_PROP:
326 case OP_NOTPROP:
327 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
328 break;
329
330 /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
331 having this code always here, and it makes it less messy without all those
332 #ifdefs. */
333
334 case OP_CLASS:
335 case OP_NCLASS:
336 case OP_XCLASS:
337 {
338 int i, min, max;
339 BOOL printmap;
340
341 fprintf(f, " [");
342
343 if (*code == OP_XCLASS)
344 {
345 extra = GET(code, 1);
346 ccode = code + LINK_SIZE + 1;
347 printmap = (*ccode & XCL_MAP) != 0;
348 if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
349 }
350 else
351 {
352 printmap = TRUE;
353 ccode = code + 1;
354 }
355
356 /* Print a bit map */
357
358 if (printmap)
359 {
360 for (i = 0; i < 256; i++)
361 {
362 if ((ccode[i/8] & (1 << (i&7))) != 0)
363 {
364 int j;
365 for (j = i+1; j < 256; j++)
366 if ((ccode[j/8] & (1 << (j&7))) == 0) break;
367 if (i == '-' || i == ']') fprintf(f, "\\");
368 if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
369 if (--j > i)
370 {
371 if (j != i + 1) fprintf(f, "-");
372 if (j == '-' || j == ']') fprintf(f, "\\");
373 if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
374 }
375 i = j;
376 }
377 }
378 ccode += 32;
379 }
380
381 /* For an XCLASS there is always some additional data */
382
383 if (*code == OP_XCLASS)
384 {
385 int ch;
386 while ((ch = *ccode++) != XCL_END)
387 {
388 if (ch == XCL_PROP)
389 {
390 int ptype = *ccode++;
391 int pvalue = *ccode++;
392 fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
393 }
394 else if (ch == XCL_NOTPROP)
395 {
396 int ptype = *ccode++;
397 int pvalue = *ccode++;
398 fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
399 }
400 else
401 {
402 ccode += 1 + print_char(f, ccode, TRUE);
403 if (ch == XCL_RANGE)
404 {
405 fprintf(f, "-");
406 ccode += 1 + print_char(f, ccode, TRUE);
407 }
408 }
409 }
410 }
411
412 /* Indicate a non-UTF8 class which was created by negation */
413
414 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
415
416 /* Handle repeats after a class or a back reference */
417
418 CLASS_REF_REPEAT:
419 switch(*ccode)
420 {
421 case OP_CRSTAR:
422 case OP_CRMINSTAR:
423 case OP_CRPLUS:
424 case OP_CRMINPLUS:
425 case OP_CRQUERY:
426 case OP_CRMINQUERY:
427 fprintf(f, "%s", OP_names[*ccode]);
428 extra += _pcre_OP_lengths[*ccode];
429 break;
430
431 case OP_CRRANGE:
432 case OP_CRMINRANGE:
433 min = GET2(ccode,1);
434 max = GET2(ccode,3);
435 if (max == 0) fprintf(f, "{%d,}", min);
436 else fprintf(f, "{%d,%d}", min, max);
437 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
438 extra += _pcre_OP_lengths[*ccode];
439 break;
440
441 /* Do nothing if it's not a repeat; this code stops picky compilers
442 warning about the lack of a default code path. */
443
444 default:
445 break;
446 }
447 }
448 break;
449
450 /* Anything else is just an item with no data*/
451
452 default:
453 fprintf(f, " %s", OP_names[*code]);
454 break;
455 }
456
457 code += _pcre_OP_lengths[*code] + extra;
458 fprintf(f, "\n");
459 }
460 }
461
462 /* End of pcre_printint.src */