Commit | Line | Data |
---|---|---|
92e772ff | 1 | /* $Cambridge: exim/src/src/pcre/pcre_printint.c,v 1.2 2005/08/08 10:22:14 ph10 Exp $ */ |
8ac170f3 PH |
2 | |
3 | /************************************************* | |
4 | * Perl-Compatible Regular Expressions * | |
5 | *************************************************/ | |
6 | ||
7 | /* PCRE is a library of functions to support regular expressions whose syntax | |
8 | and semantics are as close as possible to those of the Perl 5 language. | |
9 | ||
10 | Written by Philip Hazel | |
11 | Copyright (c) 1997-2005 University of Cambridge | |
12 | ||
13 | ----------------------------------------------------------------------------- | |
14 | Redistribution and use in source and binary forms, with or without | |
15 | modification, are permitted provided that the following conditions are met: | |
16 | ||
17 | * Redistributions of source code must retain the above copyright notice, | |
18 | this list of conditions and the following disclaimer. | |
19 | ||
20 | * Redistributions in binary form must reproduce the above copyright | |
21 | notice, this list of conditions and the following disclaimer in the | |
22 | documentation and/or other materials provided with the distribution. | |
23 | ||
24 | * Neither the name of the University of Cambridge nor the names of its | |
25 | contributors may be used to endorse or promote products derived from | |
26 | this software without specific prior written permission. | |
27 | ||
28 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
29 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
30 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
31 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
32 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
33 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
34 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
35 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
36 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
37 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
38 | POSSIBILITY OF SUCH DAMAGE. | |
39 | ----------------------------------------------------------------------------- | |
40 | */ | |
41 | ||
42 | ||
43 | /* This module contains an PCRE private debugging function for printing out the | |
44 | internal form of a compiled regular expression, along with some supporting | |
45 | local functions. */ | |
46 | ||
47 | ||
48 | #include "pcre_internal.h" | |
49 | ||
50 | ||
51 | static const char *OP_names[] = { OP_NAME_LIST }; | |
52 | ||
53 | ||
54 | /************************************************* | |
55 | * Print single- or multi-byte character * | |
56 | *************************************************/ | |
57 | ||
58 | static int | |
59 | print_char(FILE *f, uschar *ptr, BOOL utf8) | |
60 | { | |
61 | int c = *ptr; | |
62 | ||
63 | if (!utf8 || (c & 0xc0) != 0xc0) | |
64 | { | |
65 | if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c); | |
66 | return 0; | |
67 | } | |
68 | else | |
69 | { | |
70 | int i; | |
71 | int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ | |
72 | int s = 6*a; | |
73 | c = (c & _pcre_utf8_table3[a]) << s; | |
74 | for (i = 1; i <= a; i++) | |
75 | { | |
76 | /* This is a check for malformed UTF-8; it should only occur if the sanity | |
77 | check has been turned off. Rather than swallow random bytes, just stop if | |
78 | we hit a bad one. Print it with \X instead of \x as an indication. */ | |
79 | ||
80 | if ((ptr[i] & 0xc0) != 0x80) | |
81 | { | |
82 | fprintf(f, "\\X{%x}", c); | |
83 | return i - 1; | |
84 | } | |
85 | ||
86 | /* The byte is OK */ | |
87 | ||
88 | s -= 6; | |
89 | c |= (ptr[i] & 0x3f) << s; | |
90 | } | |
91 | if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c); | |
92 | return a; | |
93 | } | |
94 | } | |
95 | ||
96 | ||
97 | ||
98 | /************************************************* | |
99 | * Find Unicode property name * | |
100 | *************************************************/ | |
101 | ||
102 | static const char * | |
103 | get_ucpname(int property) | |
104 | { | |
105 | #ifdef SUPPORT_UCP | |
106 | int i; | |
107 | for (i = _pcre_utt_size; i >= 0; i--) | |
108 | { | |
109 | if (property == _pcre_utt[i].value) break; | |
110 | } | |
111 | return (i >= 0)? _pcre_utt[i].name : "??"; | |
112 | #else | |
113 | return "??"; | |
114 | #endif | |
115 | } | |
116 | ||
117 | ||
118 | ||
119 | /************************************************* | |
120 | * Print compiled regex * | |
121 | *************************************************/ | |
122 | ||
123 | /* Make this function work for a regex with integers either byte order. | |
124 | However, we assume that what we are passed is a compiled regex. */ | |
125 | ||
126 | EXPORT void | |
127 | _pcre_printint(pcre *external_re, FILE *f) | |
128 | { | |
129 | real_pcre *re = (real_pcre *)external_re; | |
130 | uschar *codestart, *code; | |
131 | BOOL utf8; | |
132 | ||
133 | unsigned int options = re->options; | |
134 | int offset = re->name_table_offset; | |
135 | int count = re->name_count; | |
136 | int size = re->name_entry_size; | |
137 | ||
138 | if (re->magic_number != MAGIC_NUMBER) | |
139 | { | |
140 | offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff); | |
141 | count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff); | |
142 | size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff); | |
143 | options = ((options << 24) & 0xff000000) | | |
144 | ((options << 8) & 0x00ff0000) | | |
145 | ((options >> 8) & 0x0000ff00) | | |
146 | ((options >> 24) & 0x000000ff); | |
147 | } | |
148 | ||
149 | code = codestart = (uschar *)re + offset + count * size; | |
150 | utf8 = (options & PCRE_UTF8) != 0; | |
151 | ||
152 | for(;;) | |
153 | { | |
154 | uschar *ccode; | |
155 | int c; | |
156 | int extra = 0; | |
157 | ||
158 | fprintf(f, "%3d ", (int)(code - codestart)); | |
159 | ||
160 | if (*code >= OP_BRA) | |
161 | { | |
162 | if (*code - OP_BRA > EXTRACT_BASIC_MAX) | |
163 | fprintf(f, "%3d Bra extra\n", GET(code, 1)); | |
164 | else | |
165 | fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA); | |
166 | code += _pcre_OP_lengths[OP_BRA]; | |
167 | continue; | |
168 | } | |
169 | ||
170 | switch(*code) | |
171 | { | |
172 | case OP_END: | |
173 | fprintf(f, " %s\n", OP_names[*code]); | |
174 | fprintf(f, "------------------------------------------------------------------\n"); | |
175 | return; | |
176 | ||
177 | case OP_OPT: | |
178 | fprintf(f, " %.2x %s", code[1], OP_names[*code]); | |
179 | break; | |
180 | ||
181 | case OP_CHAR: | |
182 | { | |
183 | fprintf(f, " "); | |
184 | do | |
185 | { | |
186 | code++; | |
187 | code += 1 + print_char(f, code, utf8); | |
188 | } | |
189 | while (*code == OP_CHAR); | |
190 | fprintf(f, "\n"); | |
191 | continue; | |
192 | } | |
193 | break; | |
194 | ||
195 | case OP_CHARNC: | |
196 | { | |
197 | fprintf(f, " NC "); | |
198 | do | |
199 | { | |
200 | code++; | |
201 | code += 1 + print_char(f, code, utf8); | |
202 | } | |
203 | while (*code == OP_CHARNC); | |
204 | fprintf(f, "\n"); | |
205 | continue; | |
206 | } | |
207 | break; | |
208 | ||
209 | case OP_KETRMAX: | |
210 | case OP_KETRMIN: | |
211 | case OP_ALT: | |
212 | case OP_KET: | |
213 | case OP_ASSERT: | |
214 | case OP_ASSERT_NOT: | |
215 | case OP_ASSERTBACK: | |
216 | case OP_ASSERTBACK_NOT: | |
217 | case OP_ONCE: | |
218 | case OP_COND: | |
219 | case OP_REVERSE: | |
220 | fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]); | |
221 | break; | |
222 | ||
223 | case OP_BRANUMBER: | |
224 | printf("%3d %s", GET2(code, 1), OP_names[*code]); | |
225 | break; | |
226 | ||
227 | case OP_CREF: | |
228 | if (GET2(code, 1) == CREF_RECURSE) | |
229 | fprintf(f, " Cond recurse"); | |
230 | else | |
231 | fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]); | |
232 | break; | |
233 | ||
234 | case OP_STAR: | |
235 | case OP_MINSTAR: | |
236 | case OP_PLUS: | |
237 | case OP_MINPLUS: | |
238 | case OP_QUERY: | |
239 | case OP_MINQUERY: | |
240 | case OP_TYPESTAR: | |
241 | case OP_TYPEMINSTAR: | |
242 | case OP_TYPEPLUS: | |
243 | case OP_TYPEMINPLUS: | |
244 | case OP_TYPEQUERY: | |
245 | case OP_TYPEMINQUERY: | |
246 | fprintf(f, " "); | |
247 | if (*code >= OP_TYPESTAR) | |
248 | { | |
249 | fprintf(f, "%s", OP_names[code[1]]); | |
250 | if (code[1] == OP_PROP || code[1] == OP_NOTPROP) | |
251 | { | |
252 | fprintf(f, " %s ", get_ucpname(code[2])); | |
253 | extra = 1; | |
254 | } | |
255 | } | |
256 | else extra = print_char(f, code+1, utf8); | |
257 | fprintf(f, "%s", OP_names[*code]); | |
258 | break; | |
259 | ||
260 | case OP_EXACT: | |
261 | case OP_UPTO: | |
262 | case OP_MINUPTO: | |
263 | fprintf(f, " "); | |
264 | extra = print_char(f, code+3, utf8); | |
265 | fprintf(f, "{"); | |
266 | if (*code != OP_EXACT) fprintf(f, ","); | |
267 | fprintf(f, "%d}", GET2(code,1)); | |
268 | if (*code == OP_MINUPTO) fprintf(f, "?"); | |
269 | break; | |
270 | ||
271 | case OP_TYPEEXACT: | |
272 | case OP_TYPEUPTO: | |
273 | case OP_TYPEMINUPTO: | |
274 | fprintf(f, " %s", OP_names[code[3]]); | |
275 | if (code[3] == OP_PROP || code[3] == OP_NOTPROP) | |
276 | { | |
277 | fprintf(f, " %s ", get_ucpname(code[4])); | |
278 | extra = 1; | |
279 | } | |
280 | fprintf(f, "{"); | |
281 | if (*code != OP_TYPEEXACT) fprintf(f, "0,"); | |
282 | fprintf(f, "%d}", GET2(code,1)); | |
283 | if (*code == OP_TYPEMINUPTO) fprintf(f, "?"); | |
284 | break; | |
285 | ||
286 | case OP_NOT: | |
287 | if (isprint(c = code[1])) fprintf(f, " [^%c]", c); | |
288 | else fprintf(f, " [^\\x%02x]", c); | |
289 | break; | |
290 | ||
291 | case OP_NOTSTAR: | |
292 | case OP_NOTMINSTAR: | |
293 | case OP_NOTPLUS: | |
294 | case OP_NOTMINPLUS: | |
295 | case OP_NOTQUERY: | |
296 | case OP_NOTMINQUERY: | |
297 | if (isprint(c = code[1])) fprintf(f, " [^%c]", c); | |
298 | else fprintf(f, " [^\\x%02x]", c); | |
299 | fprintf(f, "%s", OP_names[*code]); | |
300 | break; | |
301 | ||
302 | case OP_NOTEXACT: | |
303 | case OP_NOTUPTO: | |
304 | case OP_NOTMINUPTO: | |
305 | if (isprint(c = code[3])) fprintf(f, " [^%c]{", c); | |
306 | else fprintf(f, " [^\\x%02x]{", c); | |
307 | if (*code != OP_NOTEXACT) fprintf(f, "0,"); | |
308 | fprintf(f, "%d}", GET2(code,1)); | |
309 | if (*code == OP_NOTMINUPTO) fprintf(f, "?"); | |
310 | break; | |
311 | ||
312 | case OP_RECURSE: | |
313 | fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]); | |
314 | break; | |
315 | ||
316 | case OP_REF: | |
317 | fprintf(f, " \\%d", GET2(code,1)); | |
318 | ccode = code + _pcre_OP_lengths[*code]; | |
319 | goto CLASS_REF_REPEAT; | |
320 | ||
321 | case OP_CALLOUT: | |
322 | fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2), | |
323 | GET(code, 2 + LINK_SIZE)); | |
324 | break; | |
325 | ||
326 | case OP_PROP: | |
327 | case OP_NOTPROP: | |
328 | fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1])); | |
329 | break; | |
330 | ||
331 | /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in | |
332 | having this code always here, and it makes it less messy without all those | |
333 | #ifdefs. */ | |
334 | ||
335 | case OP_CLASS: | |
336 | case OP_NCLASS: | |
337 | case OP_XCLASS: | |
338 | { | |
339 | int i, min, max; | |
340 | BOOL printmap; | |
341 | ||
342 | fprintf(f, " ["); | |
343 | ||
344 | if (*code == OP_XCLASS) | |
345 | { | |
346 | extra = GET(code, 1); | |
347 | ccode = code + LINK_SIZE + 1; | |
348 | printmap = (*ccode & XCL_MAP) != 0; | |
349 | if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^"); | |
350 | } | |
351 | else | |
352 | { | |
353 | printmap = TRUE; | |
354 | ccode = code + 1; | |
355 | } | |
356 | ||
357 | /* Print a bit map */ | |
358 | ||
359 | if (printmap) | |
360 | { | |
361 | for (i = 0; i < 256; i++) | |
362 | { | |
363 | if ((ccode[i/8] & (1 << (i&7))) != 0) | |
364 | { | |
365 | int j; | |
366 | for (j = i+1; j < 256; j++) | |
367 | if ((ccode[j/8] & (1 << (j&7))) == 0) break; | |
368 | if (i == '-' || i == ']') fprintf(f, "\\"); | |
369 | if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i); | |
370 | if (--j > i) | |
371 | { | |
372 | if (j != i + 1) fprintf(f, "-"); | |
373 | if (j == '-' || j == ']') fprintf(f, "\\"); | |
374 | if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j); | |
375 | } | |
376 | i = j; | |
377 | } | |
378 | } | |
379 | ccode += 32; | |
380 | } | |
381 | ||
382 | /* For an XCLASS there is always some additional data */ | |
383 | ||
384 | if (*code == OP_XCLASS) | |
385 | { | |
386 | int ch; | |
387 | while ((ch = *ccode++) != XCL_END) | |
388 | { | |
389 | if (ch == XCL_PROP) | |
390 | { | |
391 | fprintf(f, "\\p{%s}", get_ucpname(*ccode++)); | |
392 | } | |
393 | else if (ch == XCL_NOTPROP) | |
394 | { | |
395 | fprintf(f, "\\P{%s}", get_ucpname(*ccode++)); | |
396 | } | |
397 | else | |
398 | { | |
399 | ccode += 1 + print_char(f, ccode, TRUE); | |
400 | if (ch == XCL_RANGE) | |
401 | { | |
402 | fprintf(f, "-"); | |
403 | ccode += 1 + print_char(f, ccode, TRUE); | |
404 | } | |
405 | } | |
406 | } | |
407 | } | |
408 | ||
409 | /* Indicate a non-UTF8 class which was created by negation */ | |
410 | ||
411 | fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : ""); | |
412 | ||
413 | /* Handle repeats after a class or a back reference */ | |
414 | ||
415 | CLASS_REF_REPEAT: | |
416 | switch(*ccode) | |
417 | { | |
418 | case OP_CRSTAR: | |
419 | case OP_CRMINSTAR: | |
420 | case OP_CRPLUS: | |
421 | case OP_CRMINPLUS: | |
422 | case OP_CRQUERY: | |
423 | case OP_CRMINQUERY: | |
424 | fprintf(f, "%s", OP_names[*ccode]); | |
425 | extra += _pcre_OP_lengths[*ccode]; | |
426 | break; | |
427 | ||
428 | case OP_CRRANGE: | |
429 | case OP_CRMINRANGE: | |
430 | min = GET2(ccode,1); | |
431 | max = GET2(ccode,3); | |
432 | if (max == 0) fprintf(f, "{%d,}", min); | |
433 | else fprintf(f, "{%d,%d}", min, max); | |
434 | if (*ccode == OP_CRMINRANGE) fprintf(f, "?"); | |
435 | extra += _pcre_OP_lengths[*ccode]; | |
436 | break; | |
437 | } | |
438 | } | |
439 | break; | |
440 | ||
441 | /* Anything else is just an item with no data*/ | |
442 | ||
443 | default: | |
444 | fprintf(f, " %s", OP_names[*code]); | |
445 | break; | |
446 | } | |
447 | ||
448 | code += _pcre_OP_lengths[*code] + extra; | |
449 | fprintf(f, "\n"); | |
450 | } | |
451 | } | |
452 | ||
453 | /* End of pcre_printint.c */ |