Installed PCRE release 7.0.
[exim.git] / src / src / pcre / pcre_printint.src
CommitLineData
6bf342e1 1/* $Cambridge: exim/src/src/pcre/pcre_printint.src,v 1.2 2007/01/23 15:08:45 ph10 Exp $ */
8ac170f3 2
c86f6258
PH
3/*************************************************
4* Perl-Compatible Regular Expressions *
5*************************************************/
6
aa41d2de
PH
7/* PCRE is a library of functions to support regular expressions whose syntax
8and semantics are as close as possible to those of the Perl 5 language.
c86f6258 9
aa41d2de
PH
10 Written by Philip Hazel
11 Copyright (c) 1997-2005 University of Cambridge
c86f6258
PH
12
13-----------------------------------------------------------------------------
14Redistribution and use in source and binary forms, with or without
15modification, are permitted provided that the following conditions are met:
16
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
19
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
23
24 * Neither the name of the University of Cambridge nor the names of its
25 contributors may be used to endorse or promote products derived from
26 this software without specific prior written permission.
27
28THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38POSSIBILITY OF SUCH DAMAGE.
39-----------------------------------------------------------------------------
40*/
41
42
aa41d2de
PH
43/* This module contains a PCRE private debugging function for printing out the
44internal form of a compiled regular expression, along with some supporting
45local functions. This source file is used in two places:
46
47(1) It is #included by pcre_compile.c when it is compiled in debugging mode
48(DEBUG defined in pcre_internal.h). It is not included in production compiles.
49
50(2) It is always #included by pcretest.c, which can be asked to print out a
51compiled regex for debugging purposes. */
c86f6258
PH
52
53
6bf342e1
PH
54/* Macro that decides whether a character should be output as a literal or in
55hexadecimal. We don't use isprint() because that can vary from system to system
56(even without the use of locales) and we want the output always to be the same,
57for testing purposes. This macro is used in pcretest as well as in this file. */
58
59#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
60
61/* The table of operator names. */
62
c86f6258
PH
63static const char *OP_names[] = { OP_NAME_LIST };
64
65
6bf342e1 66
c86f6258
PH
67/*************************************************
68* Print single- or multi-byte character *
69*************************************************/
70
c86f6258
PH
71static int
72print_char(FILE *f, uschar *ptr, BOOL utf8)
73{
74int c = *ptr;
75
76if (!utf8 || (c & 0xc0) != 0xc0)
77 {
6bf342e1 78 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
c86f6258
PH
79 return 0;
80 }
81else
82 {
83 int i;
aa41d2de 84 int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
c86f6258 85 int s = 6*a;
aa41d2de 86 c = (c & _pcre_utf8_table3[a]) << s;
c86f6258
PH
87 for (i = 1; i <= a; i++)
88 {
89 /* This is a check for malformed UTF-8; it should only occur if the sanity
90 check has been turned off. Rather than swallow random bytes, just stop if
91 we hit a bad one. Print it with \X instead of \x as an indication. */
92
93 if ((ptr[i] & 0xc0) != 0x80)
94 {
95 fprintf(f, "\\X{%x}", c);
96 return i - 1;
97 }
98
99 /* The byte is OK */
100
101 s -= 6;
102 c |= (ptr[i] & 0x3f) << s;
103 }
104 if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
105 return a;
106 }
107}
108
109
110
c86f6258
PH
111/*************************************************
112* Find Unicode property name *
113*************************************************/
114
115static const char *
aa41d2de 116get_ucpname(int ptype, int pvalue)
c86f6258
PH
117{
118#ifdef SUPPORT_UCP
119int i;
aa41d2de 120for (i = _pcre_utt_size; i >= 0; i--)
c86f6258 121 {
aa41d2de 122 if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
c86f6258 123 }
aa41d2de 124return (i >= 0)? _pcre_utt[i].name : "??";
c86f6258 125#else
aa41d2de
PH
126/* It gets harder and harder to shut off unwanted compiler warnings. */
127ptype = ptype * pvalue;
128return (ptype == pvalue)? "??" : "??";
c86f6258
PH
129#endif
130}
131
132
133
134/*************************************************
135* Print compiled regex *
136*************************************************/
137
138/* Make this function work for a regex with integers either byte order.
139However, we assume that what we are passed is a compiled regex. */
140
141static void
aa41d2de 142pcre_printint(pcre *external_re, FILE *f)
c86f6258
PH
143{
144real_pcre *re = (real_pcre *)external_re;
145uschar *codestart, *code;
146BOOL utf8;
147
148unsigned int options = re->options;
149int offset = re->name_table_offset;
150int count = re->name_count;
151int size = re->name_entry_size;
152
153if (re->magic_number != MAGIC_NUMBER)
154 {
155 offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
156 count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
157 size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
158 options = ((options << 24) & 0xff000000) |
159 ((options << 8) & 0x00ff0000) |
160 ((options >> 8) & 0x0000ff00) |
161 ((options >> 24) & 0x000000ff);
162 }
163
164code = codestart = (uschar *)re + offset + count * size;
165utf8 = (options & PCRE_UTF8) != 0;
166
167for(;;)
168 {
169 uschar *ccode;
170 int c;
171 int extra = 0;
172
173 fprintf(f, "%3d ", (int)(code - codestart));
174
c86f6258
PH
175 switch(*code)
176 {
177 case OP_END:
178 fprintf(f, " %s\n", OP_names[*code]);
179 fprintf(f, "------------------------------------------------------------------\n");
180 return;
181
182 case OP_OPT:
183 fprintf(f, " %.2x %s", code[1], OP_names[*code]);
184 break;
185
186 case OP_CHAR:
aa41d2de
PH
187 fprintf(f, " ");
188 do
c86f6258 189 {
aa41d2de
PH
190 code++;
191 code += 1 + print_char(f, code, utf8);
c86f6258 192 }
aa41d2de
PH
193 while (*code == OP_CHAR);
194 fprintf(f, "\n");
195 continue;
c86f6258
PH
196
197 case OP_CHARNC:
aa41d2de
PH
198 fprintf(f, " NC ");
199 do
c86f6258 200 {
aa41d2de
PH
201 code++;
202 code += 1 + print_char(f, code, utf8);
c86f6258 203 }
aa41d2de
PH
204 while (*code == OP_CHARNC);
205 fprintf(f, "\n");
206 continue;
c86f6258 207
6bf342e1
PH
208 case OP_CBRA:
209 case OP_SCBRA:
210 fprintf(f, "%3d %s %d", GET(code, 1), OP_names[*code],
211 GET2(code, 1+LINK_SIZE));
212 break;
213
214 case OP_BRA:
215 case OP_SBRA:
c86f6258
PH
216 case OP_KETRMAX:
217 case OP_KETRMIN:
218 case OP_ALT:
219 case OP_KET:
220 case OP_ASSERT:
221 case OP_ASSERT_NOT:
222 case OP_ASSERTBACK:
223 case OP_ASSERTBACK_NOT:
224 case OP_ONCE:
225 case OP_COND:
6bf342e1 226 case OP_SCOND:
c86f6258
PH
227 case OP_REVERSE:
228 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
229 break;
230
6bf342e1
PH
231 case OP_CREF:
232 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
c86f6258
PH
233 break;
234
6bf342e1
PH
235 case OP_RREF:
236 c = GET2(code, 1);
237 if (c == RREF_ANY)
238 fprintf(f, " Cond recurse any");
c86f6258 239 else
6bf342e1
PH
240 fprintf(f, " Cond recurse %d", c);
241 break;
242
243 case OP_DEF:
244 fprintf(f, " Cond def");
c86f6258
PH
245 break;
246
247 case OP_STAR:
248 case OP_MINSTAR:
6bf342e1 249 case OP_POSSTAR:
c86f6258
PH
250 case OP_PLUS:
251 case OP_MINPLUS:
6bf342e1 252 case OP_POSPLUS:
c86f6258
PH
253 case OP_QUERY:
254 case OP_MINQUERY:
6bf342e1 255 case OP_POSQUERY:
c86f6258
PH
256 case OP_TYPESTAR:
257 case OP_TYPEMINSTAR:
6bf342e1 258 case OP_TYPEPOSSTAR:
c86f6258
PH
259 case OP_TYPEPLUS:
260 case OP_TYPEMINPLUS:
6bf342e1 261 case OP_TYPEPOSPLUS:
c86f6258
PH
262 case OP_TYPEQUERY:
263 case OP_TYPEMINQUERY:
6bf342e1 264 case OP_TYPEPOSQUERY:
c86f6258
PH
265 fprintf(f, " ");
266 if (*code >= OP_TYPESTAR)
267 {
268 fprintf(f, "%s", OP_names[code[1]]);
269 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
270 {
aa41d2de
PH
271 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
272 extra = 2;
c86f6258
PH
273 }
274 }
275 else extra = print_char(f, code+1, utf8);
276 fprintf(f, "%s", OP_names[*code]);
277 break;
278
279 case OP_EXACT:
280 case OP_UPTO:
281 case OP_MINUPTO:
6bf342e1 282 case OP_POSUPTO:
c86f6258
PH
283 fprintf(f, " ");
284 extra = print_char(f, code+3, utf8);
285 fprintf(f, "{");
6bf342e1 286 if (*code != OP_EXACT) fprintf(f, "0,");
c86f6258
PH
287 fprintf(f, "%d}", GET2(code,1));
288 if (*code == OP_MINUPTO) fprintf(f, "?");
6bf342e1 289 else if (*code == OP_POSUPTO) fprintf(f, "+");
c86f6258
PH
290 break;
291
292 case OP_TYPEEXACT:
293 case OP_TYPEUPTO:
294 case OP_TYPEMINUPTO:
6bf342e1 295 case OP_TYPEPOSUPTO:
c86f6258
PH
296 fprintf(f, " %s", OP_names[code[3]]);
297 if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
298 {
aa41d2de
PH
299 fprintf(f, " %s ", get_ucpname(code[4], code[5]));
300 extra = 2;
c86f6258
PH
301 }
302 fprintf(f, "{");
303 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
304 fprintf(f, "%d}", GET2(code,1));
305 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
6bf342e1 306 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
c86f6258
PH
307 break;
308
309 case OP_NOT:
6bf342e1
PH
310 c = code[1];
311 if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
c86f6258
PH
312 else fprintf(f, " [^\\x%02x]", c);
313 break;
314
315 case OP_NOTSTAR:
316 case OP_NOTMINSTAR:
6bf342e1 317 case OP_NOTPOSSTAR:
c86f6258
PH
318 case OP_NOTPLUS:
319 case OP_NOTMINPLUS:
6bf342e1 320 case OP_NOTPOSPLUS:
c86f6258
PH
321 case OP_NOTQUERY:
322 case OP_NOTMINQUERY:
6bf342e1
PH
323 case OP_NOTPOSQUERY:
324 c = code[1];
325 if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
c86f6258
PH
326 else fprintf(f, " [^\\x%02x]", c);
327 fprintf(f, "%s", OP_names[*code]);
328 break;
329
330 case OP_NOTEXACT:
331 case OP_NOTUPTO:
332 case OP_NOTMINUPTO:
6bf342e1
PH
333 case OP_NOTPOSUPTO:
334 c = code[3];
335 if (PRINTABLE(c)) fprintf(f, " [^%c]{", c);
c86f6258 336 else fprintf(f, " [^\\x%02x]{", c);
aa41d2de 337 if (*code != OP_NOTEXACT) fprintf(f, "0,");
c86f6258
PH
338 fprintf(f, "%d}", GET2(code,1));
339 if (*code == OP_NOTMINUPTO) fprintf(f, "?");
6bf342e1 340 else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
c86f6258
PH
341 break;
342
343 case OP_RECURSE:
344 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
345 break;
346
347 case OP_REF:
348 fprintf(f, " \\%d", GET2(code,1));
aa41d2de 349 ccode = code + _pcre_OP_lengths[*code];
c86f6258
PH
350 goto CLASS_REF_REPEAT;
351
352 case OP_CALLOUT:
353 fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
354 GET(code, 2 + LINK_SIZE));
355 break;
356
357 case OP_PROP:
358 case OP_NOTPROP:
aa41d2de 359 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
c86f6258
PH
360 break;
361
362 /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
363 having this code always here, and it makes it less messy without all those
364 #ifdefs. */
365
366 case OP_CLASS:
367 case OP_NCLASS:
368 case OP_XCLASS:
369 {
370 int i, min, max;
371 BOOL printmap;
372
373 fprintf(f, " [");
374
375 if (*code == OP_XCLASS)
376 {
377 extra = GET(code, 1);
378 ccode = code + LINK_SIZE + 1;
379 printmap = (*ccode & XCL_MAP) != 0;
380 if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
381 }
382 else
383 {
384 printmap = TRUE;
385 ccode = code + 1;
386 }
387
388 /* Print a bit map */
389
390 if (printmap)
391 {
392 for (i = 0; i < 256; i++)
393 {
394 if ((ccode[i/8] & (1 << (i&7))) != 0)
395 {
396 int j;
397 for (j = i+1; j < 256; j++)
398 if ((ccode[j/8] & (1 << (j&7))) == 0) break;
399 if (i == '-' || i == ']') fprintf(f, "\\");
6bf342e1
PH
400 if (PRINTABLE(i)) fprintf(f, "%c", i);
401 else fprintf(f, "\\x%02x", i);
c86f6258
PH
402 if (--j > i)
403 {
404 if (j != i + 1) fprintf(f, "-");
405 if (j == '-' || j == ']') fprintf(f, "\\");
6bf342e1
PH
406 if (PRINTABLE(j)) fprintf(f, "%c", j);
407 else fprintf(f, "\\x%02x", j);
c86f6258
PH
408 }
409 i = j;
410 }
411 }
412 ccode += 32;
413 }
414
415 /* For an XCLASS there is always some additional data */
416
417 if (*code == OP_XCLASS)
418 {
419 int ch;
420 while ((ch = *ccode++) != XCL_END)
421 {
422 if (ch == XCL_PROP)
423 {
aa41d2de
PH
424 int ptype = *ccode++;
425 int pvalue = *ccode++;
426 fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
c86f6258
PH
427 }
428 else if (ch == XCL_NOTPROP)
429 {
aa41d2de
PH
430 int ptype = *ccode++;
431 int pvalue = *ccode++;
432 fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
c86f6258
PH
433 }
434 else
435 {
436 ccode += 1 + print_char(f, ccode, TRUE);
437 if (ch == XCL_RANGE)
438 {
439 fprintf(f, "-");
440 ccode += 1 + print_char(f, ccode, TRUE);
441 }
442 }
443 }
444 }
445
446 /* Indicate a non-UTF8 class which was created by negation */
447
448 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
449
450 /* Handle repeats after a class or a back reference */
451
452 CLASS_REF_REPEAT:
453 switch(*ccode)
454 {
455 case OP_CRSTAR:
456 case OP_CRMINSTAR:
457 case OP_CRPLUS:
458 case OP_CRMINPLUS:
459 case OP_CRQUERY:
460 case OP_CRMINQUERY:
461 fprintf(f, "%s", OP_names[*ccode]);
aa41d2de 462 extra += _pcre_OP_lengths[*ccode];
c86f6258
PH
463 break;
464
465 case OP_CRRANGE:
466 case OP_CRMINRANGE:
467 min = GET2(ccode,1);
468 max = GET2(ccode,3);
469 if (max == 0) fprintf(f, "{%d,}", min);
470 else fprintf(f, "{%d,%d}", min, max);
471 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
aa41d2de
PH
472 extra += _pcre_OP_lengths[*ccode];
473 break;
474
475 /* Do nothing if it's not a repeat; this code stops picky compilers
476 warning about the lack of a default code path. */
477
478 default:
c86f6258
PH
479 break;
480 }
481 }
482 break;
483
484 /* Anything else is just an item with no data*/
485
486 default:
487 fprintf(f, " %s", OP_names[*code]);
488 break;
489 }
490
aa41d2de 491 code += _pcre_OP_lengths[*code] + extra;
c86f6258
PH
492 fprintf(f, "\n");
493 }
494}
495
aa41d2de 496/* End of pcre_printint.src */