Installed PCRE 7.2 into Exim.
[exim.git] / src / src / pcre / pcre_printint.src
1 /* $Cambridge: exim/src/src/pcre/pcre_printint.src,v 1.3 2007/06/26 11:16:54 ph10 Exp $ */
2
3 /*************************************************
4 * Perl-Compatible Regular Expressions *
5 *************************************************/
6
7 /* PCRE is a library of functions to support regular expressions whose syntax
8 and semantics are as close as possible to those of the Perl 5 language.
9
10 Written by Philip Hazel
11 Copyright (c) 1997-2007 University of Cambridge
12
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
19
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
23
24 * Neither the name of the University of Cambridge nor the names of its
25 contributors may be used to endorse or promote products derived from
26 this software without specific prior written permission.
27
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
40 */
41
42
43 /* This module contains a PCRE private debugging function for printing out the
44 internal form of a compiled regular expression, along with some supporting
45 local functions. This source file is used in two places:
46
47 (1) It is #included by pcre_compile.c when it is compiled in debugging mode
48 (DEBUG defined in pcre_internal.h). It is not included in production compiles.
49
50 (2) It is always #included by pcretest.c, which can be asked to print out a
51 compiled regex for debugging purposes. */
52
53
54 /* Macro that decides whether a character should be output as a literal or in
55 hexadecimal. We don't use isprint() because that can vary from system to system
56 (even without the use of locales) and we want the output always to be the same,
57 for testing purposes. This macro is used in pcretest as well as in this file. */
58
59 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
60
61 /* The table of operator names. */
62
63 static const char *OP_names[] = { OP_NAME_LIST };
64
65
66
67 /*************************************************
68 * Print single- or multi-byte character *
69 *************************************************/
70
71 static int
72 print_char(FILE *f, uschar *ptr, BOOL utf8)
73 {
74 int c = *ptr;
75
76 #ifndef SUPPORT_UTF8
77 utf8 = utf8; /* Avoid compiler warning */
78 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
79 return 0;
80
81 #else
82 if (!utf8 || (c & 0xc0) != 0xc0)
83 {
84 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
85 return 0;
86 }
87 else
88 {
89 int i;
90 int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
91 int s = 6*a;
92 c = (c & _pcre_utf8_table3[a]) << s;
93 for (i = 1; i <= a; i++)
94 {
95 /* This is a check for malformed UTF-8; it should only occur if the sanity
96 check has been turned off. Rather than swallow random bytes, just stop if
97 we hit a bad one. Print it with \X instead of \x as an indication. */
98
99 if ((ptr[i] & 0xc0) != 0x80)
100 {
101 fprintf(f, "\\X{%x}", c);
102 return i - 1;
103 }
104
105 /* The byte is OK */
106
107 s -= 6;
108 c |= (ptr[i] & 0x3f) << s;
109 }
110 if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
111 return a;
112 }
113 #endif
114 }
115
116
117
118 /*************************************************
119 * Find Unicode property name *
120 *************************************************/
121
122 static const char *
123 get_ucpname(int ptype, int pvalue)
124 {
125 #ifdef SUPPORT_UCP
126 int i;
127 for (i = _pcre_utt_size; i >= 0; i--)
128 {
129 if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
130 }
131 return (i >= 0)? _pcre_utt[i].name : "??";
132 #else
133 /* It gets harder and harder to shut off unwanted compiler warnings. */
134 ptype = ptype * pvalue;
135 return (ptype == pvalue)? "??" : "??";
136 #endif
137 }
138
139
140
141 /*************************************************
142 * Print compiled regex *
143 *************************************************/
144
145 /* Make this function work for a regex with integers either byte order.
146 However, we assume that what we are passed is a compiled regex. The
147 print_lengths flag controls whether offsets and lengths of items are printed.
148 They can be turned off from pcretest so that automatic tests on bytecode can be
149 written that do not depend on the value of LINK_SIZE. */
150
151 static void
152 pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
153 {
154 real_pcre *re = (real_pcre *)external_re;
155 uschar *codestart, *code;
156 BOOL utf8;
157
158 unsigned int options = re->options;
159 int offset = re->name_table_offset;
160 int count = re->name_count;
161 int size = re->name_entry_size;
162
163 if (re->magic_number != MAGIC_NUMBER)
164 {
165 offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
166 count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
167 size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
168 options = ((options << 24) & 0xff000000) |
169 ((options << 8) & 0x00ff0000) |
170 ((options >> 8) & 0x0000ff00) |
171 ((options >> 24) & 0x000000ff);
172 }
173
174 code = codestart = (uschar *)re + offset + count * size;
175 utf8 = (options & PCRE_UTF8) != 0;
176
177 for(;;)
178 {
179 uschar *ccode;
180 int c;
181 int extra = 0;
182
183 if (print_lengths)
184 fprintf(f, "%3d ", (int)(code - codestart));
185 else
186 fprintf(f, " ");
187
188 switch(*code)
189 {
190 case OP_END:
191 fprintf(f, " %s\n", OP_names[*code]);
192 fprintf(f, "------------------------------------------------------------------\n");
193 return;
194
195 case OP_OPT:
196 fprintf(f, " %.2x %s", code[1], OP_names[*code]);
197 break;
198
199 case OP_CHAR:
200 fprintf(f, " ");
201 do
202 {
203 code++;
204 code += 1 + print_char(f, code, utf8);
205 }
206 while (*code == OP_CHAR);
207 fprintf(f, "\n");
208 continue;
209
210 case OP_CHARNC:
211 fprintf(f, " NC ");
212 do
213 {
214 code++;
215 code += 1 + print_char(f, code, utf8);
216 }
217 while (*code == OP_CHARNC);
218 fprintf(f, "\n");
219 continue;
220
221 case OP_CBRA:
222 case OP_SCBRA:
223 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
224 else fprintf(f, " ");
225 fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
226 break;
227
228 case OP_BRA:
229 case OP_SBRA:
230 case OP_KETRMAX:
231 case OP_KETRMIN:
232 case OP_ALT:
233 case OP_KET:
234 case OP_ASSERT:
235 case OP_ASSERT_NOT:
236 case OP_ASSERTBACK:
237 case OP_ASSERTBACK_NOT:
238 case OP_ONCE:
239 case OP_COND:
240 case OP_SCOND:
241 case OP_REVERSE:
242 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
243 else fprintf(f, " ");
244 fprintf(f, "%s", OP_names[*code]);
245 break;
246
247 case OP_CREF:
248 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
249 break;
250
251 case OP_RREF:
252 c = GET2(code, 1);
253 if (c == RREF_ANY)
254 fprintf(f, " Cond recurse any");
255 else
256 fprintf(f, " Cond recurse %d", c);
257 break;
258
259 case OP_DEF:
260 fprintf(f, " Cond def");
261 break;
262
263 case OP_STAR:
264 case OP_MINSTAR:
265 case OP_POSSTAR:
266 case OP_PLUS:
267 case OP_MINPLUS:
268 case OP_POSPLUS:
269 case OP_QUERY:
270 case OP_MINQUERY:
271 case OP_POSQUERY:
272 case OP_TYPESTAR:
273 case OP_TYPEMINSTAR:
274 case OP_TYPEPOSSTAR:
275 case OP_TYPEPLUS:
276 case OP_TYPEMINPLUS:
277 case OP_TYPEPOSPLUS:
278 case OP_TYPEQUERY:
279 case OP_TYPEMINQUERY:
280 case OP_TYPEPOSQUERY:
281 fprintf(f, " ");
282 if (*code >= OP_TYPESTAR)
283 {
284 fprintf(f, "%s", OP_names[code[1]]);
285 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
286 {
287 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
288 extra = 2;
289 }
290 }
291 else extra = print_char(f, code+1, utf8);
292 fprintf(f, "%s", OP_names[*code]);
293 break;
294
295 case OP_EXACT:
296 case OP_UPTO:
297 case OP_MINUPTO:
298 case OP_POSUPTO:
299 fprintf(f, " ");
300 extra = print_char(f, code+3, utf8);
301 fprintf(f, "{");
302 if (*code != OP_EXACT) fprintf(f, "0,");
303 fprintf(f, "%d}", GET2(code,1));
304 if (*code == OP_MINUPTO) fprintf(f, "?");
305 else if (*code == OP_POSUPTO) fprintf(f, "+");
306 break;
307
308 case OP_TYPEEXACT:
309 case OP_TYPEUPTO:
310 case OP_TYPEMINUPTO:
311 case OP_TYPEPOSUPTO:
312 fprintf(f, " %s", OP_names[code[3]]);
313 if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
314 {
315 fprintf(f, " %s ", get_ucpname(code[4], code[5]));
316 extra = 2;
317 }
318 fprintf(f, "{");
319 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
320 fprintf(f, "%d}", GET2(code,1));
321 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
322 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
323 break;
324
325 case OP_NOT:
326 c = code[1];
327 if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
328 else fprintf(f, " [^\\x%02x]", c);
329 break;
330
331 case OP_NOTSTAR:
332 case OP_NOTMINSTAR:
333 case OP_NOTPOSSTAR:
334 case OP_NOTPLUS:
335 case OP_NOTMINPLUS:
336 case OP_NOTPOSPLUS:
337 case OP_NOTQUERY:
338 case OP_NOTMINQUERY:
339 case OP_NOTPOSQUERY:
340 c = code[1];
341 if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
342 else fprintf(f, " [^\\x%02x]", c);
343 fprintf(f, "%s", OP_names[*code]);
344 break;
345
346 case OP_NOTEXACT:
347 case OP_NOTUPTO:
348 case OP_NOTMINUPTO:
349 case OP_NOTPOSUPTO:
350 c = code[3];
351 if (PRINTABLE(c)) fprintf(f, " [^%c]{", c);
352 else fprintf(f, " [^\\x%02x]{", c);
353 if (*code != OP_NOTEXACT) fprintf(f, "0,");
354 fprintf(f, "%d}", GET2(code,1));
355 if (*code == OP_NOTMINUPTO) fprintf(f, "?");
356 else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
357 break;
358
359 case OP_RECURSE:
360 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
361 else fprintf(f, " ");
362 fprintf(f, "%s", OP_names[*code]);
363 break;
364
365 case OP_REF:
366 fprintf(f, " \\%d", GET2(code,1));
367 ccode = code + _pcre_OP_lengths[*code];
368 goto CLASS_REF_REPEAT;
369
370 case OP_CALLOUT:
371 fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
372 GET(code, 2 + LINK_SIZE));
373 break;
374
375 case OP_PROP:
376 case OP_NOTPROP:
377 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
378 break;
379
380 /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
381 having this code always here, and it makes it less messy without all those
382 #ifdefs. */
383
384 case OP_CLASS:
385 case OP_NCLASS:
386 case OP_XCLASS:
387 {
388 int i, min, max;
389 BOOL printmap;
390
391 fprintf(f, " [");
392
393 if (*code == OP_XCLASS)
394 {
395 extra = GET(code, 1);
396 ccode = code + LINK_SIZE + 1;
397 printmap = (*ccode & XCL_MAP) != 0;
398 if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
399 }
400 else
401 {
402 printmap = TRUE;
403 ccode = code + 1;
404 }
405
406 /* Print a bit map */
407
408 if (printmap)
409 {
410 for (i = 0; i < 256; i++)
411 {
412 if ((ccode[i/8] & (1 << (i&7))) != 0)
413 {
414 int j;
415 for (j = i+1; j < 256; j++)
416 if ((ccode[j/8] & (1 << (j&7))) == 0) break;
417 if (i == '-' || i == ']') fprintf(f, "\\");
418 if (PRINTABLE(i)) fprintf(f, "%c", i);
419 else fprintf(f, "\\x%02x", i);
420 if (--j > i)
421 {
422 if (j != i + 1) fprintf(f, "-");
423 if (j == '-' || j == ']') fprintf(f, "\\");
424 if (PRINTABLE(j)) fprintf(f, "%c", j);
425 else fprintf(f, "\\x%02x", j);
426 }
427 i = j;
428 }
429 }
430 ccode += 32;
431 }
432
433 /* For an XCLASS there is always some additional data */
434
435 if (*code == OP_XCLASS)
436 {
437 int ch;
438 while ((ch = *ccode++) != XCL_END)
439 {
440 if (ch == XCL_PROP)
441 {
442 int ptype = *ccode++;
443 int pvalue = *ccode++;
444 fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
445 }
446 else if (ch == XCL_NOTPROP)
447 {
448 int ptype = *ccode++;
449 int pvalue = *ccode++;
450 fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
451 }
452 else
453 {
454 ccode += 1 + print_char(f, ccode, TRUE);
455 if (ch == XCL_RANGE)
456 {
457 fprintf(f, "-");
458 ccode += 1 + print_char(f, ccode, TRUE);
459 }
460 }
461 }
462 }
463
464 /* Indicate a non-UTF8 class which was created by negation */
465
466 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
467
468 /* Handle repeats after a class or a back reference */
469
470 CLASS_REF_REPEAT:
471 switch(*ccode)
472 {
473 case OP_CRSTAR:
474 case OP_CRMINSTAR:
475 case OP_CRPLUS:
476 case OP_CRMINPLUS:
477 case OP_CRQUERY:
478 case OP_CRMINQUERY:
479 fprintf(f, "%s", OP_names[*ccode]);
480 extra += _pcre_OP_lengths[*ccode];
481 break;
482
483 case OP_CRRANGE:
484 case OP_CRMINRANGE:
485 min = GET2(ccode,1);
486 max = GET2(ccode,3);
487 if (max == 0) fprintf(f, "{%d,}", min);
488 else fprintf(f, "{%d,%d}", min, max);
489 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
490 extra += _pcre_OP_lengths[*ccode];
491 break;
492
493 /* Do nothing if it's not a repeat; this code stops picky compilers
494 warning about the lack of a default code path. */
495
496 default:
497 break;
498 }
499 }
500 break;
501
502 /* Anything else is just an item with no data*/
503
504 default:
505 fprintf(f, " %s", OP_names[*code]);
506 break;
507 }
508
509 code += _pcre_OP_lengths[*code] + extra;
510 fprintf(f, "\n");
511 }
512 }
513
514 /* End of pcre_printint.src */