Commit | Line | Data |
---|---|---|
6bf342e1 | 1 | /* $Cambridge: exim/src/src/pcre/pcre_printint.src,v 1.2 2007/01/23 15:08:45 ph10 Exp $ */ |
8ac170f3 | 2 | |
c86f6258 PH |
3 | /************************************************* |
4 | * Perl-Compatible Regular Expressions * | |
5 | *************************************************/ | |
6 | ||
aa41d2de PH |
7 | /* PCRE is a library of functions to support regular expressions whose syntax |
8 | and semantics are as close as possible to those of the Perl 5 language. | |
c86f6258 | 9 | |
aa41d2de PH |
10 | Written by Philip Hazel |
11 | Copyright (c) 1997-2005 University of Cambridge | |
c86f6258 PH |
12 | |
13 | ----------------------------------------------------------------------------- | |
14 | Redistribution and use in source and binary forms, with or without | |
15 | modification, are permitted provided that the following conditions are met: | |
16 | ||
17 | * Redistributions of source code must retain the above copyright notice, | |
18 | this list of conditions and the following disclaimer. | |
19 | ||
20 | * Redistributions in binary form must reproduce the above copyright | |
21 | notice, this list of conditions and the following disclaimer in the | |
22 | documentation and/or other materials provided with the distribution. | |
23 | ||
24 | * Neither the name of the University of Cambridge nor the names of its | |
25 | contributors may be used to endorse or promote products derived from | |
26 | this software without specific prior written permission. | |
27 | ||
28 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
29 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
30 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
31 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
32 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
33 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
34 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
35 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
36 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
37 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
38 | POSSIBILITY OF SUCH DAMAGE. | |
39 | ----------------------------------------------------------------------------- | |
40 | */ | |
41 | ||
42 | ||
aa41d2de PH |
43 | /* This module contains a PCRE private debugging function for printing out the |
44 | internal form of a compiled regular expression, along with some supporting | |
45 | local functions. This source file is used in two places: | |
46 | ||
47 | (1) It is #included by pcre_compile.c when it is compiled in debugging mode | |
48 | (DEBUG defined in pcre_internal.h). It is not included in production compiles. | |
49 | ||
50 | (2) It is always #included by pcretest.c, which can be asked to print out a | |
51 | compiled regex for debugging purposes. */ | |
c86f6258 PH |
52 | |
53 | ||
6bf342e1 PH |
54 | /* Macro that decides whether a character should be output as a literal or in |
55 | hexadecimal. We don't use isprint() because that can vary from system to system | |
56 | (even without the use of locales) and we want the output always to be the same, | |
57 | for testing purposes. This macro is used in pcretest as well as in this file. */ | |
58 | ||
59 | #define PRINTABLE(c) ((c) >= 32 && (c) < 127) | |
60 | ||
61 | /* The table of operator names. */ | |
62 | ||
c86f6258 PH |
63 | static const char *OP_names[] = { OP_NAME_LIST }; |
64 | ||
65 | ||
6bf342e1 | 66 | |
c86f6258 PH |
67 | /************************************************* |
68 | * Print single- or multi-byte character * | |
69 | *************************************************/ | |
70 | ||
c86f6258 PH |
71 | static int |
72 | print_char(FILE *f, uschar *ptr, BOOL utf8) | |
73 | { | |
74 | int c = *ptr; | |
75 | ||
76 | if (!utf8 || (c & 0xc0) != 0xc0) | |
77 | { | |
6bf342e1 | 78 | if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c); |
c86f6258 PH |
79 | return 0; |
80 | } | |
81 | else | |
82 | { | |
83 | int i; | |
aa41d2de | 84 | int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ |
c86f6258 | 85 | int s = 6*a; |
aa41d2de | 86 | c = (c & _pcre_utf8_table3[a]) << s; |
c86f6258 PH |
87 | for (i = 1; i <= a; i++) |
88 | { | |
89 | /* This is a check for malformed UTF-8; it should only occur if the sanity | |
90 | check has been turned off. Rather than swallow random bytes, just stop if | |
91 | we hit a bad one. Print it with \X instead of \x as an indication. */ | |
92 | ||
93 | if ((ptr[i] & 0xc0) != 0x80) | |
94 | { | |
95 | fprintf(f, "\\X{%x}", c); | |
96 | return i - 1; | |
97 | } | |
98 | ||
99 | /* The byte is OK */ | |
100 | ||
101 | s -= 6; | |
102 | c |= (ptr[i] & 0x3f) << s; | |
103 | } | |
104 | if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c); | |
105 | return a; | |
106 | } | |
107 | } | |
108 | ||
109 | ||
110 | ||
c86f6258 PH |
111 | /************************************************* |
112 | * Find Unicode property name * | |
113 | *************************************************/ | |
114 | ||
115 | static const char * | |
aa41d2de | 116 | get_ucpname(int ptype, int pvalue) |
c86f6258 PH |
117 | { |
118 | #ifdef SUPPORT_UCP | |
119 | int i; | |
aa41d2de | 120 | for (i = _pcre_utt_size; i >= 0; i--) |
c86f6258 | 121 | { |
aa41d2de | 122 | if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break; |
c86f6258 | 123 | } |
aa41d2de | 124 | return (i >= 0)? _pcre_utt[i].name : "??"; |
c86f6258 | 125 | #else |
aa41d2de PH |
126 | /* It gets harder and harder to shut off unwanted compiler warnings. */ |
127 | ptype = ptype * pvalue; | |
128 | return (ptype == pvalue)? "??" : "??"; | |
c86f6258 PH |
129 | #endif |
130 | } | |
131 | ||
132 | ||
133 | ||
134 | /************************************************* | |
135 | * Print compiled regex * | |
136 | *************************************************/ | |
137 | ||
138 | /* Make this function work for a regex with integers either byte order. | |
139 | However, we assume that what we are passed is a compiled regex. */ | |
140 | ||
141 | static void | |
aa41d2de | 142 | pcre_printint(pcre *external_re, FILE *f) |
c86f6258 PH |
143 | { |
144 | real_pcre *re = (real_pcre *)external_re; | |
145 | uschar *codestart, *code; | |
146 | BOOL utf8; | |
147 | ||
148 | unsigned int options = re->options; | |
149 | int offset = re->name_table_offset; | |
150 | int count = re->name_count; | |
151 | int size = re->name_entry_size; | |
152 | ||
153 | if (re->magic_number != MAGIC_NUMBER) | |
154 | { | |
155 | offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff); | |
156 | count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff); | |
157 | size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff); | |
158 | options = ((options << 24) & 0xff000000) | | |
159 | ((options << 8) & 0x00ff0000) | | |
160 | ((options >> 8) & 0x0000ff00) | | |
161 | ((options >> 24) & 0x000000ff); | |
162 | } | |
163 | ||
164 | code = codestart = (uschar *)re + offset + count * size; | |
165 | utf8 = (options & PCRE_UTF8) != 0; | |
166 | ||
167 | for(;;) | |
168 | { | |
169 | uschar *ccode; | |
170 | int c; | |
171 | int extra = 0; | |
172 | ||
173 | fprintf(f, "%3d ", (int)(code - codestart)); | |
174 | ||
c86f6258 PH |
175 | switch(*code) |
176 | { | |
177 | case OP_END: | |
178 | fprintf(f, " %s\n", OP_names[*code]); | |
179 | fprintf(f, "------------------------------------------------------------------\n"); | |
180 | return; | |
181 | ||
182 | case OP_OPT: | |
183 | fprintf(f, " %.2x %s", code[1], OP_names[*code]); | |
184 | break; | |
185 | ||
186 | case OP_CHAR: | |
aa41d2de PH |
187 | fprintf(f, " "); |
188 | do | |
c86f6258 | 189 | { |
aa41d2de PH |
190 | code++; |
191 | code += 1 + print_char(f, code, utf8); | |
c86f6258 | 192 | } |
aa41d2de PH |
193 | while (*code == OP_CHAR); |
194 | fprintf(f, "\n"); | |
195 | continue; | |
c86f6258 PH |
196 | |
197 | case OP_CHARNC: | |
aa41d2de PH |
198 | fprintf(f, " NC "); |
199 | do | |
c86f6258 | 200 | { |
aa41d2de PH |
201 | code++; |
202 | code += 1 + print_char(f, code, utf8); | |
c86f6258 | 203 | } |
aa41d2de PH |
204 | while (*code == OP_CHARNC); |
205 | fprintf(f, "\n"); | |
206 | continue; | |
c86f6258 | 207 | |
6bf342e1 PH |
208 | case OP_CBRA: |
209 | case OP_SCBRA: | |
210 | fprintf(f, "%3d %s %d", GET(code, 1), OP_names[*code], | |
211 | GET2(code, 1+LINK_SIZE)); | |
212 | break; | |
213 | ||
214 | case OP_BRA: | |
215 | case OP_SBRA: | |
c86f6258 PH |
216 | case OP_KETRMAX: |
217 | case OP_KETRMIN: | |
218 | case OP_ALT: | |
219 | case OP_KET: | |
220 | case OP_ASSERT: | |
221 | case OP_ASSERT_NOT: | |
222 | case OP_ASSERTBACK: | |
223 | case OP_ASSERTBACK_NOT: | |
224 | case OP_ONCE: | |
225 | case OP_COND: | |
6bf342e1 | 226 | case OP_SCOND: |
c86f6258 PH |
227 | case OP_REVERSE: |
228 | fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]); | |
229 | break; | |
230 | ||
6bf342e1 PH |
231 | case OP_CREF: |
232 | fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]); | |
c86f6258 PH |
233 | break; |
234 | ||
6bf342e1 PH |
235 | case OP_RREF: |
236 | c = GET2(code, 1); | |
237 | if (c == RREF_ANY) | |
238 | fprintf(f, " Cond recurse any"); | |
c86f6258 | 239 | else |
6bf342e1 PH |
240 | fprintf(f, " Cond recurse %d", c); |
241 | break; | |
242 | ||
243 | case OP_DEF: | |
244 | fprintf(f, " Cond def"); | |
c86f6258 PH |
245 | break; |
246 | ||
247 | case OP_STAR: | |
248 | case OP_MINSTAR: | |
6bf342e1 | 249 | case OP_POSSTAR: |
c86f6258 PH |
250 | case OP_PLUS: |
251 | case OP_MINPLUS: | |
6bf342e1 | 252 | case OP_POSPLUS: |
c86f6258 PH |
253 | case OP_QUERY: |
254 | case OP_MINQUERY: | |
6bf342e1 | 255 | case OP_POSQUERY: |
c86f6258 PH |
256 | case OP_TYPESTAR: |
257 | case OP_TYPEMINSTAR: | |
6bf342e1 | 258 | case OP_TYPEPOSSTAR: |
c86f6258 PH |
259 | case OP_TYPEPLUS: |
260 | case OP_TYPEMINPLUS: | |
6bf342e1 | 261 | case OP_TYPEPOSPLUS: |
c86f6258 PH |
262 | case OP_TYPEQUERY: |
263 | case OP_TYPEMINQUERY: | |
6bf342e1 | 264 | case OP_TYPEPOSQUERY: |
c86f6258 PH |
265 | fprintf(f, " "); |
266 | if (*code >= OP_TYPESTAR) | |
267 | { | |
268 | fprintf(f, "%s", OP_names[code[1]]); | |
269 | if (code[1] == OP_PROP || code[1] == OP_NOTPROP) | |
270 | { | |
aa41d2de PH |
271 | fprintf(f, " %s ", get_ucpname(code[2], code[3])); |
272 | extra = 2; | |
c86f6258 PH |
273 | } |
274 | } | |
275 | else extra = print_char(f, code+1, utf8); | |
276 | fprintf(f, "%s", OP_names[*code]); | |
277 | break; | |
278 | ||
279 | case OP_EXACT: | |
280 | case OP_UPTO: | |
281 | case OP_MINUPTO: | |
6bf342e1 | 282 | case OP_POSUPTO: |
c86f6258 PH |
283 | fprintf(f, " "); |
284 | extra = print_char(f, code+3, utf8); | |
285 | fprintf(f, "{"); | |
6bf342e1 | 286 | if (*code != OP_EXACT) fprintf(f, "0,"); |
c86f6258 PH |
287 | fprintf(f, "%d}", GET2(code,1)); |
288 | if (*code == OP_MINUPTO) fprintf(f, "?"); | |
6bf342e1 | 289 | else if (*code == OP_POSUPTO) fprintf(f, "+"); |
c86f6258 PH |
290 | break; |
291 | ||
292 | case OP_TYPEEXACT: | |
293 | case OP_TYPEUPTO: | |
294 | case OP_TYPEMINUPTO: | |
6bf342e1 | 295 | case OP_TYPEPOSUPTO: |
c86f6258 PH |
296 | fprintf(f, " %s", OP_names[code[3]]); |
297 | if (code[3] == OP_PROP || code[3] == OP_NOTPROP) | |
298 | { | |
aa41d2de PH |
299 | fprintf(f, " %s ", get_ucpname(code[4], code[5])); |
300 | extra = 2; | |
c86f6258 PH |
301 | } |
302 | fprintf(f, "{"); | |
303 | if (*code != OP_TYPEEXACT) fprintf(f, "0,"); | |
304 | fprintf(f, "%d}", GET2(code,1)); | |
305 | if (*code == OP_TYPEMINUPTO) fprintf(f, "?"); | |
6bf342e1 | 306 | else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+"); |
c86f6258 PH |
307 | break; |
308 | ||
309 | case OP_NOT: | |
6bf342e1 PH |
310 | c = code[1]; |
311 | if (PRINTABLE(c)) fprintf(f, " [^%c]", c); | |
c86f6258 PH |
312 | else fprintf(f, " [^\\x%02x]", c); |
313 | break; | |
314 | ||
315 | case OP_NOTSTAR: | |
316 | case OP_NOTMINSTAR: | |
6bf342e1 | 317 | case OP_NOTPOSSTAR: |
c86f6258 PH |
318 | case OP_NOTPLUS: |
319 | case OP_NOTMINPLUS: | |
6bf342e1 | 320 | case OP_NOTPOSPLUS: |
c86f6258 PH |
321 | case OP_NOTQUERY: |
322 | case OP_NOTMINQUERY: | |
6bf342e1 PH |
323 | case OP_NOTPOSQUERY: |
324 | c = code[1]; | |
325 | if (PRINTABLE(c)) fprintf(f, " [^%c]", c); | |
c86f6258 PH |
326 | else fprintf(f, " [^\\x%02x]", c); |
327 | fprintf(f, "%s", OP_names[*code]); | |
328 | break; | |
329 | ||
330 | case OP_NOTEXACT: | |
331 | case OP_NOTUPTO: | |
332 | case OP_NOTMINUPTO: | |
6bf342e1 PH |
333 | case OP_NOTPOSUPTO: |
334 | c = code[3]; | |
335 | if (PRINTABLE(c)) fprintf(f, " [^%c]{", c); | |
c86f6258 | 336 | else fprintf(f, " [^\\x%02x]{", c); |
aa41d2de | 337 | if (*code != OP_NOTEXACT) fprintf(f, "0,"); |
c86f6258 PH |
338 | fprintf(f, "%d}", GET2(code,1)); |
339 | if (*code == OP_NOTMINUPTO) fprintf(f, "?"); | |
6bf342e1 | 340 | else if (*code == OP_NOTPOSUPTO) fprintf(f, "+"); |
c86f6258 PH |
341 | break; |
342 | ||
343 | case OP_RECURSE: | |
344 | fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]); | |
345 | break; | |
346 | ||
347 | case OP_REF: | |
348 | fprintf(f, " \\%d", GET2(code,1)); | |
aa41d2de | 349 | ccode = code + _pcre_OP_lengths[*code]; |
c86f6258 PH |
350 | goto CLASS_REF_REPEAT; |
351 | ||
352 | case OP_CALLOUT: | |
353 | fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2), | |
354 | GET(code, 2 + LINK_SIZE)); | |
355 | break; | |
356 | ||
357 | case OP_PROP: | |
358 | case OP_NOTPROP: | |
aa41d2de | 359 | fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2])); |
c86f6258 PH |
360 | break; |
361 | ||
362 | /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in | |
363 | having this code always here, and it makes it less messy without all those | |
364 | #ifdefs. */ | |
365 | ||
366 | case OP_CLASS: | |
367 | case OP_NCLASS: | |
368 | case OP_XCLASS: | |
369 | { | |
370 | int i, min, max; | |
371 | BOOL printmap; | |
372 | ||
373 | fprintf(f, " ["); | |
374 | ||
375 | if (*code == OP_XCLASS) | |
376 | { | |
377 | extra = GET(code, 1); | |
378 | ccode = code + LINK_SIZE + 1; | |
379 | printmap = (*ccode & XCL_MAP) != 0; | |
380 | if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^"); | |
381 | } | |
382 | else | |
383 | { | |
384 | printmap = TRUE; | |
385 | ccode = code + 1; | |
386 | } | |
387 | ||
388 | /* Print a bit map */ | |
389 | ||
390 | if (printmap) | |
391 | { | |
392 | for (i = 0; i < 256; i++) | |
393 | { | |
394 | if ((ccode[i/8] & (1 << (i&7))) != 0) | |
395 | { | |
396 | int j; | |
397 | for (j = i+1; j < 256; j++) | |
398 | if ((ccode[j/8] & (1 << (j&7))) == 0) break; | |
399 | if (i == '-' || i == ']') fprintf(f, "\\"); | |
6bf342e1 PH |
400 | if (PRINTABLE(i)) fprintf(f, "%c", i); |
401 | else fprintf(f, "\\x%02x", i); | |
c86f6258 PH |
402 | if (--j > i) |
403 | { | |
404 | if (j != i + 1) fprintf(f, "-"); | |
405 | if (j == '-' || j == ']') fprintf(f, "\\"); | |
6bf342e1 PH |
406 | if (PRINTABLE(j)) fprintf(f, "%c", j); |
407 | else fprintf(f, "\\x%02x", j); | |
c86f6258 PH |
408 | } |
409 | i = j; | |
410 | } | |
411 | } | |
412 | ccode += 32; | |
413 | } | |
414 | ||
415 | /* For an XCLASS there is always some additional data */ | |
416 | ||
417 | if (*code == OP_XCLASS) | |
418 | { | |
419 | int ch; | |
420 | while ((ch = *ccode++) != XCL_END) | |
421 | { | |
422 | if (ch == XCL_PROP) | |
423 | { | |
aa41d2de PH |
424 | int ptype = *ccode++; |
425 | int pvalue = *ccode++; | |
426 | fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue)); | |
c86f6258 PH |
427 | } |
428 | else if (ch == XCL_NOTPROP) | |
429 | { | |
aa41d2de PH |
430 | int ptype = *ccode++; |
431 | int pvalue = *ccode++; | |
432 | fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue)); | |
c86f6258 PH |
433 | } |
434 | else | |
435 | { | |
436 | ccode += 1 + print_char(f, ccode, TRUE); | |
437 | if (ch == XCL_RANGE) | |
438 | { | |
439 | fprintf(f, "-"); | |
440 | ccode += 1 + print_char(f, ccode, TRUE); | |
441 | } | |
442 | } | |
443 | } | |
444 | } | |
445 | ||
446 | /* Indicate a non-UTF8 class which was created by negation */ | |
447 | ||
448 | fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : ""); | |
449 | ||
450 | /* Handle repeats after a class or a back reference */ | |
451 | ||
452 | CLASS_REF_REPEAT: | |
453 | switch(*ccode) | |
454 | { | |
455 | case OP_CRSTAR: | |
456 | case OP_CRMINSTAR: | |
457 | case OP_CRPLUS: | |
458 | case OP_CRMINPLUS: | |
459 | case OP_CRQUERY: | |
460 | case OP_CRMINQUERY: | |
461 | fprintf(f, "%s", OP_names[*ccode]); | |
aa41d2de | 462 | extra += _pcre_OP_lengths[*ccode]; |
c86f6258 PH |
463 | break; |
464 | ||
465 | case OP_CRRANGE: | |
466 | case OP_CRMINRANGE: | |
467 | min = GET2(ccode,1); | |
468 | max = GET2(ccode,3); | |
469 | if (max == 0) fprintf(f, "{%d,}", min); | |
470 | else fprintf(f, "{%d,%d}", min, max); | |
471 | if (*ccode == OP_CRMINRANGE) fprintf(f, "?"); | |
aa41d2de PH |
472 | extra += _pcre_OP_lengths[*ccode]; |
473 | break; | |
474 | ||
475 | /* Do nothing if it's not a repeat; this code stops picky compilers | |
476 | warning about the lack of a default code path. */ | |
477 | ||
478 | default: | |
c86f6258 PH |
479 | break; |
480 | } | |
481 | } | |
482 | break; | |
483 | ||
484 | /* Anything else is just an item with no data*/ | |
485 | ||
486 | default: | |
487 | fprintf(f, " %s", OP_names[*code]); | |
488 | break; | |
489 | } | |
490 | ||
aa41d2de | 491 | code += _pcre_OP_lengths[*code] + extra; |
c86f6258 PH |
492 | fprintf(f, "\n"); |
493 | } | |
494 | } | |
495 | ||
aa41d2de | 496 | /* End of pcre_printint.src */ |