Make -R and -S do something sensible when combined with -q<time>.
[exim.git] / src / src / pcre / printint.c
1 /* $Cambridge: exim/src/src/pcre/printint.c,v 1.2 2005/06/15 08:57:10 ph10 Exp $ */
2
3 /*************************************************
4 * Perl-Compatible Regular Expressions *
5 *************************************************/
6
7 /*
8 This is a library of functions to support regular expressions whose syntax
9 and semantics are as close as possible to those of the Perl 5 language. See
10 the file Tech.Notes for some information on the internals.
11
12 Written by: Philip Hazel <ph10@cam.ac.uk>
13
14 Copyright (c) 1997-2004 University of Cambridge
15
16 -----------------------------------------------------------------------------
17 Redistribution and use in source and binary forms, with or without
18 modification, are permitted provided that the following conditions are met:
19
20 * Redistributions of source code must retain the above copyright notice,
21 this list of conditions and the following disclaimer.
22
23 * Redistributions in binary form must reproduce the above copyright
24 notice, this list of conditions and the following disclaimer in the
25 documentation and/or other materials provided with the distribution.
26
27 * Neither the name of the University of Cambridge nor the names of its
28 contributors may be used to endorse or promote products derived from
29 this software without specific prior written permission.
30
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
42 -----------------------------------------------------------------------------
43 */
44
45
46 /* This module contains a debugging function for printing out the internal form
47 of a compiled regular expression. It is kept in a separate file so that it can
48 be #included both in the pcretest program, and in the library itself when
49 compiled with the debugging switch. */
50
51
52 static const char *OP_names[] = { OP_NAME_LIST };
53
54
55 /*************************************************
56 * Print single- or multi-byte character *
57 *************************************************/
58
59 /* These tables are actually copies of ones in pcre.c. If we compile the
60 library with debugging, they are included twice, but that isn't really a
61 problem - compiling with debugging is pretty rare and these are very small. */
62
63 static const int utf8_t3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
64
65 static const uschar utf8_t4[] = {
66 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
67 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
68 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
69 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
70
71 static int
72 print_char(FILE *f, uschar *ptr, BOOL utf8)
73 {
74 int c = *ptr;
75
76 if (!utf8 || (c & 0xc0) != 0xc0)
77 {
78 if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
79 return 0;
80 }
81 else
82 {
83 int i;
84 int a = utf8_t4[c & 0x3f]; /* Number of additional bytes */
85 int s = 6*a;
86 c = (c & utf8_t3[a]) << s;
87 for (i = 1; i <= a; i++)
88 {
89 /* This is a check for malformed UTF-8; it should only occur if the sanity
90 check has been turned off. Rather than swallow random bytes, just stop if
91 we hit a bad one. Print it with \X instead of \x as an indication. */
92
93 if ((ptr[i] & 0xc0) != 0x80)
94 {
95 fprintf(f, "\\X{%x}", c);
96 return i - 1;
97 }
98
99 /* The byte is OK */
100
101 s -= 6;
102 c |= (ptr[i] & 0x3f) << s;
103 }
104 if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
105 return a;
106 }
107 }
108
109
110
111
112 /*************************************************
113 * Find Unicode property name *
114 *************************************************/
115
116 static const char *
117 get_ucpname(int property)
118 {
119 #ifdef SUPPORT_UCP
120 int i;
121 for (i = sizeof(utt)/sizeof(ucp_type_table); i >= 0; i--)
122 {
123 if (property == utt[i].value) break;
124 }
125 return (i >= 0)? utt[i].name : "??";
126 #else
127 return "??";
128 #endif
129 }
130
131
132
133 /*************************************************
134 * Print compiled regex *
135 *************************************************/
136
137 /* Make this function work for a regex with integers either byte order.
138 However, we assume that what we are passed is a compiled regex. */
139
140 static void
141 print_internals(pcre *external_re, FILE *f)
142 {
143 real_pcre *re = (real_pcre *)external_re;
144 uschar *codestart, *code;
145 BOOL utf8;
146
147 unsigned int options = re->options;
148 int offset = re->name_table_offset;
149 int count = re->name_count;
150 int size = re->name_entry_size;
151
152 if (re->magic_number != MAGIC_NUMBER)
153 {
154 offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
155 count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
156 size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
157 options = ((options << 24) & 0xff000000) |
158 ((options << 8) & 0x00ff0000) |
159 ((options >> 8) & 0x0000ff00) |
160 ((options >> 24) & 0x000000ff);
161 }
162
163 code = codestart = (uschar *)re + offset + count * size;
164 utf8 = (options & PCRE_UTF8) != 0;
165
166 for(;;)
167 {
168 uschar *ccode;
169 int c;
170 int extra = 0;
171
172 fprintf(f, "%3d ", (int)(code - codestart));
173
174 if (*code >= OP_BRA)
175 {
176 if (*code - OP_BRA > EXTRACT_BASIC_MAX)
177 fprintf(f, "%3d Bra extra\n", GET(code, 1));
178 else
179 fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
180 code += OP_lengths[OP_BRA];
181 continue;
182 }
183
184 switch(*code)
185 {
186 case OP_END:
187 fprintf(f, " %s\n", OP_names[*code]);
188 fprintf(f, "------------------------------------------------------------------\n");
189 return;
190
191 case OP_OPT:
192 fprintf(f, " %.2x %s", code[1], OP_names[*code]);
193 break;
194
195 case OP_CHAR:
196 {
197 fprintf(f, " ");
198 do
199 {
200 code++;
201 code += 1 + print_char(f, code, utf8);
202 }
203 while (*code == OP_CHAR);
204 fprintf(f, "\n");
205 continue;
206 }
207 break;
208
209 case OP_CHARNC:
210 {
211 fprintf(f, " NC ");
212 do
213 {
214 code++;
215 code += 1 + print_char(f, code, utf8);
216 }
217 while (*code == OP_CHARNC);
218 fprintf(f, "\n");
219 continue;
220 }
221 break;
222
223 case OP_KETRMAX:
224 case OP_KETRMIN:
225 case OP_ALT:
226 case OP_KET:
227 case OP_ASSERT:
228 case OP_ASSERT_NOT:
229 case OP_ASSERTBACK:
230 case OP_ASSERTBACK_NOT:
231 case OP_ONCE:
232 case OP_COND:
233 case OP_REVERSE:
234 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
235 break;
236
237 case OP_BRANUMBER:
238 printf("%3d %s", GET2(code, 1), OP_names[*code]);
239 break;
240
241 case OP_CREF:
242 if (GET2(code, 1) == CREF_RECURSE)
243 fprintf(f, " Cond recurse");
244 else
245 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
246 break;
247
248 case OP_STAR:
249 case OP_MINSTAR:
250 case OP_PLUS:
251 case OP_MINPLUS:
252 case OP_QUERY:
253 case OP_MINQUERY:
254 case OP_TYPESTAR:
255 case OP_TYPEMINSTAR:
256 case OP_TYPEPLUS:
257 case OP_TYPEMINPLUS:
258 case OP_TYPEQUERY:
259 case OP_TYPEMINQUERY:
260 fprintf(f, " ");
261 if (*code >= OP_TYPESTAR)
262 {
263 fprintf(f, "%s", OP_names[code[1]]);
264 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
265 {
266 fprintf(f, " %s ", get_ucpname(code[2]));
267 extra = 1;
268 }
269 }
270 else extra = print_char(f, code+1, utf8);
271 fprintf(f, "%s", OP_names[*code]);
272 break;
273
274 case OP_EXACT:
275 case OP_UPTO:
276 case OP_MINUPTO:
277 fprintf(f, " ");
278 extra = print_char(f, code+3, utf8);
279 fprintf(f, "{");
280 if (*code != OP_EXACT) fprintf(f, ",");
281 fprintf(f, "%d}", GET2(code,1));
282 if (*code == OP_MINUPTO) fprintf(f, "?");
283 break;
284
285 case OP_TYPEEXACT:
286 case OP_TYPEUPTO:
287 case OP_TYPEMINUPTO:
288 fprintf(f, " %s", OP_names[code[3]]);
289 if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
290 {
291 fprintf(f, " %s ", get_ucpname(code[4]));
292 extra = 1;
293 }
294 fprintf(f, "{");
295 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
296 fprintf(f, "%d}", GET2(code,1));
297 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
298 break;
299
300 case OP_NOT:
301 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
302 else fprintf(f, " [^\\x%02x]", c);
303 break;
304
305 case OP_NOTSTAR:
306 case OP_NOTMINSTAR:
307 case OP_NOTPLUS:
308 case OP_NOTMINPLUS:
309 case OP_NOTQUERY:
310 case OP_NOTMINQUERY:
311 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
312 else fprintf(f, " [^\\x%02x]", c);
313 fprintf(f, "%s", OP_names[*code]);
314 break;
315
316 case OP_NOTEXACT:
317 case OP_NOTUPTO:
318 case OP_NOTMINUPTO:
319 if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
320 else fprintf(f, " [^\\x%02x]{", c);
321 if (*code != OP_NOTEXACT) fprintf(f, ",");
322 fprintf(f, "%d}", GET2(code,1));
323 if (*code == OP_NOTMINUPTO) fprintf(f, "?");
324 break;
325
326 case OP_RECURSE:
327 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
328 break;
329
330 case OP_REF:
331 fprintf(f, " \\%d", GET2(code,1));
332 ccode = code + OP_lengths[*code];
333 goto CLASS_REF_REPEAT;
334
335 case OP_CALLOUT:
336 fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
337 GET(code, 2 + LINK_SIZE));
338 break;
339
340 case OP_PROP:
341 case OP_NOTPROP:
342 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1]));
343 break;
344
345 /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
346 having this code always here, and it makes it less messy without all those
347 #ifdefs. */
348
349 case OP_CLASS:
350 case OP_NCLASS:
351 case OP_XCLASS:
352 {
353 int i, min, max;
354 BOOL printmap;
355
356 fprintf(f, " [");
357
358 if (*code == OP_XCLASS)
359 {
360 extra = GET(code, 1);
361 ccode = code + LINK_SIZE + 1;
362 printmap = (*ccode & XCL_MAP) != 0;
363 if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
364 }
365 else
366 {
367 printmap = TRUE;
368 ccode = code + 1;
369 }
370
371 /* Print a bit map */
372
373 if (printmap)
374 {
375 for (i = 0; i < 256; i++)
376 {
377 if ((ccode[i/8] & (1 << (i&7))) != 0)
378 {
379 int j;
380 for (j = i+1; j < 256; j++)
381 if ((ccode[j/8] & (1 << (j&7))) == 0) break;
382 if (i == '-' || i == ']') fprintf(f, "\\");
383 if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
384 if (--j > i)
385 {
386 if (j != i + 1) fprintf(f, "-");
387 if (j == '-' || j == ']') fprintf(f, "\\");
388 if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
389 }
390 i = j;
391 }
392 }
393 ccode += 32;
394 }
395
396 /* For an XCLASS there is always some additional data */
397
398 if (*code == OP_XCLASS)
399 {
400 int ch;
401 while ((ch = *ccode++) != XCL_END)
402 {
403 if (ch == XCL_PROP)
404 {
405 fprintf(f, "\\p{%s}", get_ucpname(*ccode++));
406 }
407 else if (ch == XCL_NOTPROP)
408 {
409 fprintf(f, "\\P{%s}", get_ucpname(*ccode++));
410 }
411 else
412 {
413 ccode += 1 + print_char(f, ccode, TRUE);
414 if (ch == XCL_RANGE)
415 {
416 fprintf(f, "-");
417 ccode += 1 + print_char(f, ccode, TRUE);
418 }
419 }
420 }
421 }
422
423 /* Indicate a non-UTF8 class which was created by negation */
424
425 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
426
427 /* Handle repeats after a class or a back reference */
428
429 CLASS_REF_REPEAT:
430 switch(*ccode)
431 {
432 case OP_CRSTAR:
433 case OP_CRMINSTAR:
434 case OP_CRPLUS:
435 case OP_CRMINPLUS:
436 case OP_CRQUERY:
437 case OP_CRMINQUERY:
438 fprintf(f, "%s", OP_names[*ccode]);
439 extra += OP_lengths[*ccode];
440 break;
441
442 case OP_CRRANGE:
443 case OP_CRMINRANGE:
444 min = GET2(ccode,1);
445 max = GET2(ccode,3);
446 if (max == 0) fprintf(f, "{%d,}", min);
447 else fprintf(f, "{%d,%d}", min, max);
448 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
449 extra += OP_lengths[*ccode];
450 break;
451 }
452 }
453 break;
454
455 /* Anything else is just an item with no data*/
456
457 default:
458 fprintf(f, " %s", OP_names[*code]);
459 break;
460 }
461
462 code += OP_lengths[*code] + extra;
463 fprintf(f, "\n");
464 }
465 }
466
467 /* End of printint.c */