Installed PCRE release 7.0.
[exim.git] / src / src / pcre / pcre_printint.src
1 /* $Cambridge: exim/src/src/pcre/pcre_printint.src,v 1.2 2007/01/23 15:08:45 ph10 Exp $ */
2
3 /*************************************************
4 * Perl-Compatible Regular Expressions *
5 *************************************************/
6
7 /* PCRE is a library of functions to support regular expressions whose syntax
8 and semantics are as close as possible to those of the Perl 5 language.
9
10 Written by Philip Hazel
11 Copyright (c) 1997-2005 University of Cambridge
12
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
19
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
23
24 * Neither the name of the University of Cambridge nor the names of its
25 contributors may be used to endorse or promote products derived from
26 this software without specific prior written permission.
27
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
40 */
41
42
43 /* This module contains a PCRE private debugging function for printing out the
44 internal form of a compiled regular expression, along with some supporting
45 local functions. This source file is used in two places:
46
47 (1) It is #included by pcre_compile.c when it is compiled in debugging mode
48 (DEBUG defined in pcre_internal.h). It is not included in production compiles.
49
50 (2) It is always #included by pcretest.c, which can be asked to print out a
51 compiled regex for debugging purposes. */
52
53
54 /* Macro that decides whether a character should be output as a literal or in
55 hexadecimal. We don't use isprint() because that can vary from system to system
56 (even without the use of locales) and we want the output always to be the same,
57 for testing purposes. This macro is used in pcretest as well as in this file. */
58
59 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
60
61 /* The table of operator names. */
62
63 static const char *OP_names[] = { OP_NAME_LIST };
64
65
66
67 /*************************************************
68 * Print single- or multi-byte character *
69 *************************************************/
70
71 static int
72 print_char(FILE *f, uschar *ptr, BOOL utf8)
73 {
74 int c = *ptr;
75
76 if (!utf8 || (c & 0xc0) != 0xc0)
77 {
78 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
79 return 0;
80 }
81 else
82 {
83 int i;
84 int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
85 int s = 6*a;
86 c = (c & _pcre_utf8_table3[a]) << s;
87 for (i = 1; i <= a; i++)
88 {
89 /* This is a check for malformed UTF-8; it should only occur if the sanity
90 check has been turned off. Rather than swallow random bytes, just stop if
91 we hit a bad one. Print it with \X instead of \x as an indication. */
92
93 if ((ptr[i] & 0xc0) != 0x80)
94 {
95 fprintf(f, "\\X{%x}", c);
96 return i - 1;
97 }
98
99 /* The byte is OK */
100
101 s -= 6;
102 c |= (ptr[i] & 0x3f) << s;
103 }
104 if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
105 return a;
106 }
107 }
108
109
110
111 /*************************************************
112 * Find Unicode property name *
113 *************************************************/
114
115 static const char *
116 get_ucpname(int ptype, int pvalue)
117 {
118 #ifdef SUPPORT_UCP
119 int i;
120 for (i = _pcre_utt_size; i >= 0; i--)
121 {
122 if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
123 }
124 return (i >= 0)? _pcre_utt[i].name : "??";
125 #else
126 /* It gets harder and harder to shut off unwanted compiler warnings. */
127 ptype = ptype * pvalue;
128 return (ptype == pvalue)? "??" : "??";
129 #endif
130 }
131
132
133
134 /*************************************************
135 * Print compiled regex *
136 *************************************************/
137
138 /* Make this function work for a regex with integers either byte order.
139 However, we assume that what we are passed is a compiled regex. */
140
141 static void
142 pcre_printint(pcre *external_re, FILE *f)
143 {
144 real_pcre *re = (real_pcre *)external_re;
145 uschar *codestart, *code;
146 BOOL utf8;
147
148 unsigned int options = re->options;
149 int offset = re->name_table_offset;
150 int count = re->name_count;
151 int size = re->name_entry_size;
152
153 if (re->magic_number != MAGIC_NUMBER)
154 {
155 offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
156 count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
157 size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
158 options = ((options << 24) & 0xff000000) |
159 ((options << 8) & 0x00ff0000) |
160 ((options >> 8) & 0x0000ff00) |
161 ((options >> 24) & 0x000000ff);
162 }
163
164 code = codestart = (uschar *)re + offset + count * size;
165 utf8 = (options & PCRE_UTF8) != 0;
166
167 for(;;)
168 {
169 uschar *ccode;
170 int c;
171 int extra = 0;
172
173 fprintf(f, "%3d ", (int)(code - codestart));
174
175 switch(*code)
176 {
177 case OP_END:
178 fprintf(f, " %s\n", OP_names[*code]);
179 fprintf(f, "------------------------------------------------------------------\n");
180 return;
181
182 case OP_OPT:
183 fprintf(f, " %.2x %s", code[1], OP_names[*code]);
184 break;
185
186 case OP_CHAR:
187 fprintf(f, " ");
188 do
189 {
190 code++;
191 code += 1 + print_char(f, code, utf8);
192 }
193 while (*code == OP_CHAR);
194 fprintf(f, "\n");
195 continue;
196
197 case OP_CHARNC:
198 fprintf(f, " NC ");
199 do
200 {
201 code++;
202 code += 1 + print_char(f, code, utf8);
203 }
204 while (*code == OP_CHARNC);
205 fprintf(f, "\n");
206 continue;
207
208 case OP_CBRA:
209 case OP_SCBRA:
210 fprintf(f, "%3d %s %d", GET(code, 1), OP_names[*code],
211 GET2(code, 1+LINK_SIZE));
212 break;
213
214 case OP_BRA:
215 case OP_SBRA:
216 case OP_KETRMAX:
217 case OP_KETRMIN:
218 case OP_ALT:
219 case OP_KET:
220 case OP_ASSERT:
221 case OP_ASSERT_NOT:
222 case OP_ASSERTBACK:
223 case OP_ASSERTBACK_NOT:
224 case OP_ONCE:
225 case OP_COND:
226 case OP_SCOND:
227 case OP_REVERSE:
228 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
229 break;
230
231 case OP_CREF:
232 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
233 break;
234
235 case OP_RREF:
236 c = GET2(code, 1);
237 if (c == RREF_ANY)
238 fprintf(f, " Cond recurse any");
239 else
240 fprintf(f, " Cond recurse %d", c);
241 break;
242
243 case OP_DEF:
244 fprintf(f, " Cond def");
245 break;
246
247 case OP_STAR:
248 case OP_MINSTAR:
249 case OP_POSSTAR:
250 case OP_PLUS:
251 case OP_MINPLUS:
252 case OP_POSPLUS:
253 case OP_QUERY:
254 case OP_MINQUERY:
255 case OP_POSQUERY:
256 case OP_TYPESTAR:
257 case OP_TYPEMINSTAR:
258 case OP_TYPEPOSSTAR:
259 case OP_TYPEPLUS:
260 case OP_TYPEMINPLUS:
261 case OP_TYPEPOSPLUS:
262 case OP_TYPEQUERY:
263 case OP_TYPEMINQUERY:
264 case OP_TYPEPOSQUERY:
265 fprintf(f, " ");
266 if (*code >= OP_TYPESTAR)
267 {
268 fprintf(f, "%s", OP_names[code[1]]);
269 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
270 {
271 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
272 extra = 2;
273 }
274 }
275 else extra = print_char(f, code+1, utf8);
276 fprintf(f, "%s", OP_names[*code]);
277 break;
278
279 case OP_EXACT:
280 case OP_UPTO:
281 case OP_MINUPTO:
282 case OP_POSUPTO:
283 fprintf(f, " ");
284 extra = print_char(f, code+3, utf8);
285 fprintf(f, "{");
286 if (*code != OP_EXACT) fprintf(f, "0,");
287 fprintf(f, "%d}", GET2(code,1));
288 if (*code == OP_MINUPTO) fprintf(f, "?");
289 else if (*code == OP_POSUPTO) fprintf(f, "+");
290 break;
291
292 case OP_TYPEEXACT:
293 case OP_TYPEUPTO:
294 case OP_TYPEMINUPTO:
295 case OP_TYPEPOSUPTO:
296 fprintf(f, " %s", OP_names[code[3]]);
297 if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
298 {
299 fprintf(f, " %s ", get_ucpname(code[4], code[5]));
300 extra = 2;
301 }
302 fprintf(f, "{");
303 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
304 fprintf(f, "%d}", GET2(code,1));
305 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
306 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
307 break;
308
309 case OP_NOT:
310 c = code[1];
311 if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
312 else fprintf(f, " [^\\x%02x]", c);
313 break;
314
315 case OP_NOTSTAR:
316 case OP_NOTMINSTAR:
317 case OP_NOTPOSSTAR:
318 case OP_NOTPLUS:
319 case OP_NOTMINPLUS:
320 case OP_NOTPOSPLUS:
321 case OP_NOTQUERY:
322 case OP_NOTMINQUERY:
323 case OP_NOTPOSQUERY:
324 c = code[1];
325 if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
326 else fprintf(f, " [^\\x%02x]", c);
327 fprintf(f, "%s", OP_names[*code]);
328 break;
329
330 case OP_NOTEXACT:
331 case OP_NOTUPTO:
332 case OP_NOTMINUPTO:
333 case OP_NOTPOSUPTO:
334 c = code[3];
335 if (PRINTABLE(c)) fprintf(f, " [^%c]{", c);
336 else fprintf(f, " [^\\x%02x]{", c);
337 if (*code != OP_NOTEXACT) fprintf(f, "0,");
338 fprintf(f, "%d}", GET2(code,1));
339 if (*code == OP_NOTMINUPTO) fprintf(f, "?");
340 else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
341 break;
342
343 case OP_RECURSE:
344 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
345 break;
346
347 case OP_REF:
348 fprintf(f, " \\%d", GET2(code,1));
349 ccode = code + _pcre_OP_lengths[*code];
350 goto CLASS_REF_REPEAT;
351
352 case OP_CALLOUT:
353 fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
354 GET(code, 2 + LINK_SIZE));
355 break;
356
357 case OP_PROP:
358 case OP_NOTPROP:
359 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
360 break;
361
362 /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
363 having this code always here, and it makes it less messy without all those
364 #ifdefs. */
365
366 case OP_CLASS:
367 case OP_NCLASS:
368 case OP_XCLASS:
369 {
370 int i, min, max;
371 BOOL printmap;
372
373 fprintf(f, " [");
374
375 if (*code == OP_XCLASS)
376 {
377 extra = GET(code, 1);
378 ccode = code + LINK_SIZE + 1;
379 printmap = (*ccode & XCL_MAP) != 0;
380 if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
381 }
382 else
383 {
384 printmap = TRUE;
385 ccode = code + 1;
386 }
387
388 /* Print a bit map */
389
390 if (printmap)
391 {
392 for (i = 0; i < 256; i++)
393 {
394 if ((ccode[i/8] & (1 << (i&7))) != 0)
395 {
396 int j;
397 for (j = i+1; j < 256; j++)
398 if ((ccode[j/8] & (1 << (j&7))) == 0) break;
399 if (i == '-' || i == ']') fprintf(f, "\\");
400 if (PRINTABLE(i)) fprintf(f, "%c", i);
401 else fprintf(f, "\\x%02x", i);
402 if (--j > i)
403 {
404 if (j != i + 1) fprintf(f, "-");
405 if (j == '-' || j == ']') fprintf(f, "\\");
406 if (PRINTABLE(j)) fprintf(f, "%c", j);
407 else fprintf(f, "\\x%02x", j);
408 }
409 i = j;
410 }
411 }
412 ccode += 32;
413 }
414
415 /* For an XCLASS there is always some additional data */
416
417 if (*code == OP_XCLASS)
418 {
419 int ch;
420 while ((ch = *ccode++) != XCL_END)
421 {
422 if (ch == XCL_PROP)
423 {
424 int ptype = *ccode++;
425 int pvalue = *ccode++;
426 fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
427 }
428 else if (ch == XCL_NOTPROP)
429 {
430 int ptype = *ccode++;
431 int pvalue = *ccode++;
432 fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
433 }
434 else
435 {
436 ccode += 1 + print_char(f, ccode, TRUE);
437 if (ch == XCL_RANGE)
438 {
439 fprintf(f, "-");
440 ccode += 1 + print_char(f, ccode, TRUE);
441 }
442 }
443 }
444 }
445
446 /* Indicate a non-UTF8 class which was created by negation */
447
448 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
449
450 /* Handle repeats after a class or a back reference */
451
452 CLASS_REF_REPEAT:
453 switch(*ccode)
454 {
455 case OP_CRSTAR:
456 case OP_CRMINSTAR:
457 case OP_CRPLUS:
458 case OP_CRMINPLUS:
459 case OP_CRQUERY:
460 case OP_CRMINQUERY:
461 fprintf(f, "%s", OP_names[*ccode]);
462 extra += _pcre_OP_lengths[*ccode];
463 break;
464
465 case OP_CRRANGE:
466 case OP_CRMINRANGE:
467 min = GET2(ccode,1);
468 max = GET2(ccode,3);
469 if (max == 0) fprintf(f, "{%d,}", min);
470 else fprintf(f, "{%d,%d}", min, max);
471 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
472 extra += _pcre_OP_lengths[*ccode];
473 break;
474
475 /* Do nothing if it's not a repeat; this code stops picky compilers
476 warning about the lack of a default code path. */
477
478 default:
479 break;
480 }
481 }
482 break;
483
484 /* Anything else is just an item with no data*/
485
486 default:
487 fprintf(f, " %s", OP_names[*code]);
488 break;
489 }
490
491 code += _pcre_OP_lengths[*code] + extra;
492 fprintf(f, "\n");
493 }
494 }
495
496 /* End of pcre_printint.src */