Oops, I introduced a bug in ${prvstest changes, shown up by building (new)
[exim.git] / src / src / pcre / pcre_printint.c
1 /* $Cambridge: exim/src/src/pcre/pcre_printint.c,v 1.2 2005/08/08 10:22:14 ph10 Exp $ */
2
3 /*************************************************
4 * Perl-Compatible Regular Expressions *
5 *************************************************/
6
7 /* PCRE is a library of functions to support regular expressions whose syntax
8 and semantics are as close as possible to those of the Perl 5 language.
9
10 Written by Philip Hazel
11 Copyright (c) 1997-2005 University of Cambridge
12
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
19
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
23
24 * Neither the name of the University of Cambridge nor the names of its
25 contributors may be used to endorse or promote products derived from
26 this software without specific prior written permission.
27
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
40 */
41
42
43 /* This module contains an PCRE private debugging function for printing out the
44 internal form of a compiled regular expression, along with some supporting
45 local functions. */
46
47
48 #include "pcre_internal.h"
49
50
51 static const char *OP_names[] = { OP_NAME_LIST };
52
53
54 /*************************************************
55 * Print single- or multi-byte character *
56 *************************************************/
57
58 static int
59 print_char(FILE *f, uschar *ptr, BOOL utf8)
60 {
61 int c = *ptr;
62
63 if (!utf8 || (c & 0xc0) != 0xc0)
64 {
65 if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
66 return 0;
67 }
68 else
69 {
70 int i;
71 int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
72 int s = 6*a;
73 c = (c & _pcre_utf8_table3[a]) << s;
74 for (i = 1; i <= a; i++)
75 {
76 /* This is a check for malformed UTF-8; it should only occur if the sanity
77 check has been turned off. Rather than swallow random bytes, just stop if
78 we hit a bad one. Print it with \X instead of \x as an indication. */
79
80 if ((ptr[i] & 0xc0) != 0x80)
81 {
82 fprintf(f, "\\X{%x}", c);
83 return i - 1;
84 }
85
86 /* The byte is OK */
87
88 s -= 6;
89 c |= (ptr[i] & 0x3f) << s;
90 }
91 if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
92 return a;
93 }
94 }
95
96
97
98 /*************************************************
99 * Find Unicode property name *
100 *************************************************/
101
102 static const char *
103 get_ucpname(int property)
104 {
105 #ifdef SUPPORT_UCP
106 int i;
107 for (i = _pcre_utt_size; i >= 0; i--)
108 {
109 if (property == _pcre_utt[i].value) break;
110 }
111 return (i >= 0)? _pcre_utt[i].name : "??";
112 #else
113 return "??";
114 #endif
115 }
116
117
118
119 /*************************************************
120 * Print compiled regex *
121 *************************************************/
122
123 /* Make this function work for a regex with integers either byte order.
124 However, we assume that what we are passed is a compiled regex. */
125
126 EXPORT void
127 _pcre_printint(pcre *external_re, FILE *f)
128 {
129 real_pcre *re = (real_pcre *)external_re;
130 uschar *codestart, *code;
131 BOOL utf8;
132
133 unsigned int options = re->options;
134 int offset = re->name_table_offset;
135 int count = re->name_count;
136 int size = re->name_entry_size;
137
138 if (re->magic_number != MAGIC_NUMBER)
139 {
140 offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
141 count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
142 size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
143 options = ((options << 24) & 0xff000000) |
144 ((options << 8) & 0x00ff0000) |
145 ((options >> 8) & 0x0000ff00) |
146 ((options >> 24) & 0x000000ff);
147 }
148
149 code = codestart = (uschar *)re + offset + count * size;
150 utf8 = (options & PCRE_UTF8) != 0;
151
152 for(;;)
153 {
154 uschar *ccode;
155 int c;
156 int extra = 0;
157
158 fprintf(f, "%3d ", (int)(code - codestart));
159
160 if (*code >= OP_BRA)
161 {
162 if (*code - OP_BRA > EXTRACT_BASIC_MAX)
163 fprintf(f, "%3d Bra extra\n", GET(code, 1));
164 else
165 fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
166 code += _pcre_OP_lengths[OP_BRA];
167 continue;
168 }
169
170 switch(*code)
171 {
172 case OP_END:
173 fprintf(f, " %s\n", OP_names[*code]);
174 fprintf(f, "------------------------------------------------------------------\n");
175 return;
176
177 case OP_OPT:
178 fprintf(f, " %.2x %s", code[1], OP_names[*code]);
179 break;
180
181 case OP_CHAR:
182 {
183 fprintf(f, " ");
184 do
185 {
186 code++;
187 code += 1 + print_char(f, code, utf8);
188 }
189 while (*code == OP_CHAR);
190 fprintf(f, "\n");
191 continue;
192 }
193 break;
194
195 case OP_CHARNC:
196 {
197 fprintf(f, " NC ");
198 do
199 {
200 code++;
201 code += 1 + print_char(f, code, utf8);
202 }
203 while (*code == OP_CHARNC);
204 fprintf(f, "\n");
205 continue;
206 }
207 break;
208
209 case OP_KETRMAX:
210 case OP_KETRMIN:
211 case OP_ALT:
212 case OP_KET:
213 case OP_ASSERT:
214 case OP_ASSERT_NOT:
215 case OP_ASSERTBACK:
216 case OP_ASSERTBACK_NOT:
217 case OP_ONCE:
218 case OP_COND:
219 case OP_REVERSE:
220 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
221 break;
222
223 case OP_BRANUMBER:
224 printf("%3d %s", GET2(code, 1), OP_names[*code]);
225 break;
226
227 case OP_CREF:
228 if (GET2(code, 1) == CREF_RECURSE)
229 fprintf(f, " Cond recurse");
230 else
231 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
232 break;
233
234 case OP_STAR:
235 case OP_MINSTAR:
236 case OP_PLUS:
237 case OP_MINPLUS:
238 case OP_QUERY:
239 case OP_MINQUERY:
240 case OP_TYPESTAR:
241 case OP_TYPEMINSTAR:
242 case OP_TYPEPLUS:
243 case OP_TYPEMINPLUS:
244 case OP_TYPEQUERY:
245 case OP_TYPEMINQUERY:
246 fprintf(f, " ");
247 if (*code >= OP_TYPESTAR)
248 {
249 fprintf(f, "%s", OP_names[code[1]]);
250 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
251 {
252 fprintf(f, " %s ", get_ucpname(code[2]));
253 extra = 1;
254 }
255 }
256 else extra = print_char(f, code+1, utf8);
257 fprintf(f, "%s", OP_names[*code]);
258 break;
259
260 case OP_EXACT:
261 case OP_UPTO:
262 case OP_MINUPTO:
263 fprintf(f, " ");
264 extra = print_char(f, code+3, utf8);
265 fprintf(f, "{");
266 if (*code != OP_EXACT) fprintf(f, ",");
267 fprintf(f, "%d}", GET2(code,1));
268 if (*code == OP_MINUPTO) fprintf(f, "?");
269 break;
270
271 case OP_TYPEEXACT:
272 case OP_TYPEUPTO:
273 case OP_TYPEMINUPTO:
274 fprintf(f, " %s", OP_names[code[3]]);
275 if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
276 {
277 fprintf(f, " %s ", get_ucpname(code[4]));
278 extra = 1;
279 }
280 fprintf(f, "{");
281 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
282 fprintf(f, "%d}", GET2(code,1));
283 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
284 break;
285
286 case OP_NOT:
287 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
288 else fprintf(f, " [^\\x%02x]", c);
289 break;
290
291 case OP_NOTSTAR:
292 case OP_NOTMINSTAR:
293 case OP_NOTPLUS:
294 case OP_NOTMINPLUS:
295 case OP_NOTQUERY:
296 case OP_NOTMINQUERY:
297 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
298 else fprintf(f, " [^\\x%02x]", c);
299 fprintf(f, "%s", OP_names[*code]);
300 break;
301
302 case OP_NOTEXACT:
303 case OP_NOTUPTO:
304 case OP_NOTMINUPTO:
305 if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
306 else fprintf(f, " [^\\x%02x]{", c);
307 if (*code != OP_NOTEXACT) fprintf(f, "0,");
308 fprintf(f, "%d}", GET2(code,1));
309 if (*code == OP_NOTMINUPTO) fprintf(f, "?");
310 break;
311
312 case OP_RECURSE:
313 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
314 break;
315
316 case OP_REF:
317 fprintf(f, " \\%d", GET2(code,1));
318 ccode = code + _pcre_OP_lengths[*code];
319 goto CLASS_REF_REPEAT;
320
321 case OP_CALLOUT:
322 fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
323 GET(code, 2 + LINK_SIZE));
324 break;
325
326 case OP_PROP:
327 case OP_NOTPROP:
328 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1]));
329 break;
330
331 /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
332 having this code always here, and it makes it less messy without all those
333 #ifdefs. */
334
335 case OP_CLASS:
336 case OP_NCLASS:
337 case OP_XCLASS:
338 {
339 int i, min, max;
340 BOOL printmap;
341
342 fprintf(f, " [");
343
344 if (*code == OP_XCLASS)
345 {
346 extra = GET(code, 1);
347 ccode = code + LINK_SIZE + 1;
348 printmap = (*ccode & XCL_MAP) != 0;
349 if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
350 }
351 else
352 {
353 printmap = TRUE;
354 ccode = code + 1;
355 }
356
357 /* Print a bit map */
358
359 if (printmap)
360 {
361 for (i = 0; i < 256; i++)
362 {
363 if ((ccode[i/8] & (1 << (i&7))) != 0)
364 {
365 int j;
366 for (j = i+1; j < 256; j++)
367 if ((ccode[j/8] & (1 << (j&7))) == 0) break;
368 if (i == '-' || i == ']') fprintf(f, "\\");
369 if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
370 if (--j > i)
371 {
372 if (j != i + 1) fprintf(f, "-");
373 if (j == '-' || j == ']') fprintf(f, "\\");
374 if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
375 }
376 i = j;
377 }
378 }
379 ccode += 32;
380 }
381
382 /* For an XCLASS there is always some additional data */
383
384 if (*code == OP_XCLASS)
385 {
386 int ch;
387 while ((ch = *ccode++) != XCL_END)
388 {
389 if (ch == XCL_PROP)
390 {
391 fprintf(f, "\\p{%s}", get_ucpname(*ccode++));
392 }
393 else if (ch == XCL_NOTPROP)
394 {
395 fprintf(f, "\\P{%s}", get_ucpname(*ccode++));
396 }
397 else
398 {
399 ccode += 1 + print_char(f, ccode, TRUE);
400 if (ch == XCL_RANGE)
401 {
402 fprintf(f, "-");
403 ccode += 1 + print_char(f, ccode, TRUE);
404 }
405 }
406 }
407 }
408
409 /* Indicate a non-UTF8 class which was created by negation */
410
411 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
412
413 /* Handle repeats after a class or a back reference */
414
415 CLASS_REF_REPEAT:
416 switch(*ccode)
417 {
418 case OP_CRSTAR:
419 case OP_CRMINSTAR:
420 case OP_CRPLUS:
421 case OP_CRMINPLUS:
422 case OP_CRQUERY:
423 case OP_CRMINQUERY:
424 fprintf(f, "%s", OP_names[*ccode]);
425 extra += _pcre_OP_lengths[*ccode];
426 break;
427
428 case OP_CRRANGE:
429 case OP_CRMINRANGE:
430 min = GET2(ccode,1);
431 max = GET2(ccode,3);
432 if (max == 0) fprintf(f, "{%d,}", min);
433 else fprintf(f, "{%d,%d}", min, max);
434 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
435 extra += _pcre_OP_lengths[*ccode];
436 break;
437 }
438 }
439 break;
440
441 /* Anything else is just an item with no data*/
442
443 default:
444 fprintf(f, " %s", OP_names[*code]);
445 break;
446 }
447
448 code += _pcre_OP_lengths[*code] + extra;
449 fprintf(f, "\n");
450 }
451 }
452
453 /* End of pcre_printint.c */