Oops, I introduced a bug in ${prvstest changes, shown up by building (new)
[exim.git] / src / src / pcre / printint.c
CommitLineData
8ac170f3
PH
1/* $Cambridge: exim/src/src/pcre/printint.c,v 1.2 2005/06/15 08:57:10 ph10 Exp $ */
2
c86f6258
PH
3/*************************************************
4* Perl-Compatible Regular Expressions *
5*************************************************/
6
7/*
8This is a library of functions to support regular expressions whose syntax
9and semantics are as close as possible to those of the Perl 5 language. See
10the file Tech.Notes for some information on the internals.
11
12Written by: Philip Hazel <ph10@cam.ac.uk>
13
14 Copyright (c) 1997-2004 University of Cambridge
15
16-----------------------------------------------------------------------------
17Redistribution and use in source and binary forms, with or without
18modification, are permitted provided that the following conditions are met:
19
20 * Redistributions of source code must retain the above copyright notice,
21 this list of conditions and the following disclaimer.
22
23 * Redistributions in binary form must reproduce the above copyright
24 notice, this list of conditions and the following disclaimer in the
25 documentation and/or other materials provided with the distribution.
26
27 * Neither the name of the University of Cambridge nor the names of its
28 contributors may be used to endorse or promote products derived from
29 this software without specific prior written permission.
30
31THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41POSSIBILITY OF SUCH DAMAGE.
42-----------------------------------------------------------------------------
43*/
44
45
46/* This module contains a debugging function for printing out the internal form
47of a compiled regular expression. It is kept in a separate file so that it can
48be #included both in the pcretest program, and in the library itself when
49compiled with the debugging switch. */
50
51
52static const char *OP_names[] = { OP_NAME_LIST };
53
54
55/*************************************************
56* Print single- or multi-byte character *
57*************************************************/
58
59/* These tables are actually copies of ones in pcre.c. If we compile the
60library with debugging, they are included twice, but that isn't really a
61problem - compiling with debugging is pretty rare and these are very small. */
62
63static const int utf8_t3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
64
65static const uschar utf8_t4[] = {
66 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
67 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
68 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
69 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
70
71static int
72print_char(FILE *f, uschar *ptr, BOOL utf8)
73{
74int c = *ptr;
75
76if (!utf8 || (c & 0xc0) != 0xc0)
77 {
78 if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
79 return 0;
80 }
81else
82 {
83 int i;
84 int a = utf8_t4[c & 0x3f]; /* Number of additional bytes */
85 int s = 6*a;
86 c = (c & utf8_t3[a]) << s;
87 for (i = 1; i <= a; i++)
88 {
89 /* This is a check for malformed UTF-8; it should only occur if the sanity
90 check has been turned off. Rather than swallow random bytes, just stop if
91 we hit a bad one. Print it with \X instead of \x as an indication. */
92
93 if ((ptr[i] & 0xc0) != 0x80)
94 {
95 fprintf(f, "\\X{%x}", c);
96 return i - 1;
97 }
98
99 /* The byte is OK */
100
101 s -= 6;
102 c |= (ptr[i] & 0x3f) << s;
103 }
104 if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
105 return a;
106 }
107}
108
109
110
111
112/*************************************************
113* Find Unicode property name *
114*************************************************/
115
116static const char *
117get_ucpname(int property)
118{
119#ifdef SUPPORT_UCP
120int i;
121for (i = sizeof(utt)/sizeof(ucp_type_table); i >= 0; i--)
122 {
123 if (property == utt[i].value) break;
124 }
125return (i >= 0)? utt[i].name : "??";
126#else
127return "??";
128#endif
129}
130
131
132
133/*************************************************
134* Print compiled regex *
135*************************************************/
136
137/* Make this function work for a regex with integers either byte order.
138However, we assume that what we are passed is a compiled regex. */
139
140static void
141print_internals(pcre *external_re, FILE *f)
142{
143real_pcre *re = (real_pcre *)external_re;
144uschar *codestart, *code;
145BOOL utf8;
146
147unsigned int options = re->options;
148int offset = re->name_table_offset;
149int count = re->name_count;
150int size = re->name_entry_size;
151
152if (re->magic_number != MAGIC_NUMBER)
153 {
154 offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
155 count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
156 size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
157 options = ((options << 24) & 0xff000000) |
158 ((options << 8) & 0x00ff0000) |
159 ((options >> 8) & 0x0000ff00) |
160 ((options >> 24) & 0x000000ff);
161 }
162
163code = codestart = (uschar *)re + offset + count * size;
164utf8 = (options & PCRE_UTF8) != 0;
165
166for(;;)
167 {
168 uschar *ccode;
169 int c;
170 int extra = 0;
171
172 fprintf(f, "%3d ", (int)(code - codestart));
173
174 if (*code >= OP_BRA)
175 {
176 if (*code - OP_BRA > EXTRACT_BASIC_MAX)
177 fprintf(f, "%3d Bra extra\n", GET(code, 1));
178 else
179 fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
180 code += OP_lengths[OP_BRA];
181 continue;
182 }
183
184 switch(*code)
185 {
186 case OP_END:
187 fprintf(f, " %s\n", OP_names[*code]);
188 fprintf(f, "------------------------------------------------------------------\n");
189 return;
190
191 case OP_OPT:
192 fprintf(f, " %.2x %s", code[1], OP_names[*code]);
193 break;
194
195 case OP_CHAR:
196 {
197 fprintf(f, " ");
198 do
199 {
200 code++;
201 code += 1 + print_char(f, code, utf8);
202 }
203 while (*code == OP_CHAR);
204 fprintf(f, "\n");
205 continue;
206 }
207 break;
208
209 case OP_CHARNC:
210 {
211 fprintf(f, " NC ");
212 do
213 {
214 code++;
215 code += 1 + print_char(f, code, utf8);
216 }
217 while (*code == OP_CHARNC);
218 fprintf(f, "\n");
219 continue;
220 }
221 break;
222
223 case OP_KETRMAX:
224 case OP_KETRMIN:
225 case OP_ALT:
226 case OP_KET:
227 case OP_ASSERT:
228 case OP_ASSERT_NOT:
229 case OP_ASSERTBACK:
230 case OP_ASSERTBACK_NOT:
231 case OP_ONCE:
232 case OP_COND:
233 case OP_REVERSE:
234 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
235 break;
236
237 case OP_BRANUMBER:
238 printf("%3d %s", GET2(code, 1), OP_names[*code]);
239 break;
240
241 case OP_CREF:
242 if (GET2(code, 1) == CREF_RECURSE)
243 fprintf(f, " Cond recurse");
244 else
245 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
246 break;
247
248 case OP_STAR:
249 case OP_MINSTAR:
250 case OP_PLUS:
251 case OP_MINPLUS:
252 case OP_QUERY:
253 case OP_MINQUERY:
254 case OP_TYPESTAR:
255 case OP_TYPEMINSTAR:
256 case OP_TYPEPLUS:
257 case OP_TYPEMINPLUS:
258 case OP_TYPEQUERY:
259 case OP_TYPEMINQUERY:
260 fprintf(f, " ");
261 if (*code >= OP_TYPESTAR)
262 {
263 fprintf(f, "%s", OP_names[code[1]]);
264 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
265 {
266 fprintf(f, " %s ", get_ucpname(code[2]));
267 extra = 1;
268 }
269 }
270 else extra = print_char(f, code+1, utf8);
271 fprintf(f, "%s", OP_names[*code]);
272 break;
273
274 case OP_EXACT:
275 case OP_UPTO:
276 case OP_MINUPTO:
277 fprintf(f, " ");
278 extra = print_char(f, code+3, utf8);
279 fprintf(f, "{");
280 if (*code != OP_EXACT) fprintf(f, ",");
281 fprintf(f, "%d}", GET2(code,1));
282 if (*code == OP_MINUPTO) fprintf(f, "?");
283 break;
284
285 case OP_TYPEEXACT:
286 case OP_TYPEUPTO:
287 case OP_TYPEMINUPTO:
288 fprintf(f, " %s", OP_names[code[3]]);
289 if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
290 {
291 fprintf(f, " %s ", get_ucpname(code[4]));
292 extra = 1;
293 }
294 fprintf(f, "{");
295 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
296 fprintf(f, "%d}", GET2(code,1));
297 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
298 break;
299
300 case OP_NOT:
301 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
302 else fprintf(f, " [^\\x%02x]", c);
303 break;
304
305 case OP_NOTSTAR:
306 case OP_NOTMINSTAR:
307 case OP_NOTPLUS:
308 case OP_NOTMINPLUS:
309 case OP_NOTQUERY:
310 case OP_NOTMINQUERY:
311 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
312 else fprintf(f, " [^\\x%02x]", c);
313 fprintf(f, "%s", OP_names[*code]);
314 break;
315
316 case OP_NOTEXACT:
317 case OP_NOTUPTO:
318 case OP_NOTMINUPTO:
319 if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
320 else fprintf(f, " [^\\x%02x]{", c);
321 if (*code != OP_NOTEXACT) fprintf(f, ",");
322 fprintf(f, "%d}", GET2(code,1));
323 if (*code == OP_NOTMINUPTO) fprintf(f, "?");
324 break;
325
326 case OP_RECURSE:
327 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
328 break;
329
330 case OP_REF:
331 fprintf(f, " \\%d", GET2(code,1));
332 ccode = code + OP_lengths[*code];
333 goto CLASS_REF_REPEAT;
334
335 case OP_CALLOUT:
336 fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
337 GET(code, 2 + LINK_SIZE));
338 break;
339
340 case OP_PROP:
341 case OP_NOTPROP:
342 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1]));
343 break;
344
345 /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
346 having this code always here, and it makes it less messy without all those
347 #ifdefs. */
348
349 case OP_CLASS:
350 case OP_NCLASS:
351 case OP_XCLASS:
352 {
353 int i, min, max;
354 BOOL printmap;
355
356 fprintf(f, " [");
357
358 if (*code == OP_XCLASS)
359 {
360 extra = GET(code, 1);
361 ccode = code + LINK_SIZE + 1;
362 printmap = (*ccode & XCL_MAP) != 0;
363 if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
364 }
365 else
366 {
367 printmap = TRUE;
368 ccode = code + 1;
369 }
370
371 /* Print a bit map */
372
373 if (printmap)
374 {
375 for (i = 0; i < 256; i++)
376 {
377 if ((ccode[i/8] & (1 << (i&7))) != 0)
378 {
379 int j;
380 for (j = i+1; j < 256; j++)
381 if ((ccode[j/8] & (1 << (j&7))) == 0) break;
382 if (i == '-' || i == ']') fprintf(f, "\\");
383 if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
384 if (--j > i)
385 {
386 if (j != i + 1) fprintf(f, "-");
387 if (j == '-' || j == ']') fprintf(f, "\\");
388 if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
389 }
390 i = j;
391 }
392 }
393 ccode += 32;
394 }
395
396 /* For an XCLASS there is always some additional data */
397
398 if (*code == OP_XCLASS)
399 {
400 int ch;
401 while ((ch = *ccode++) != XCL_END)
402 {
403 if (ch == XCL_PROP)
404 {
405 fprintf(f, "\\p{%s}", get_ucpname(*ccode++));
406 }
407 else if (ch == XCL_NOTPROP)
408 {
409 fprintf(f, "\\P{%s}", get_ucpname(*ccode++));
410 }
411 else
412 {
413 ccode += 1 + print_char(f, ccode, TRUE);
414 if (ch == XCL_RANGE)
415 {
416 fprintf(f, "-");
417 ccode += 1 + print_char(f, ccode, TRUE);
418 }
419 }
420 }
421 }
422
423 /* Indicate a non-UTF8 class which was created by negation */
424
425 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
426
427 /* Handle repeats after a class or a back reference */
428
429 CLASS_REF_REPEAT:
430 switch(*ccode)
431 {
432 case OP_CRSTAR:
433 case OP_CRMINSTAR:
434 case OP_CRPLUS:
435 case OP_CRMINPLUS:
436 case OP_CRQUERY:
437 case OP_CRMINQUERY:
438 fprintf(f, "%s", OP_names[*ccode]);
439 extra += OP_lengths[*ccode];
440 break;
441
442 case OP_CRRANGE:
443 case OP_CRMINRANGE:
444 min = GET2(ccode,1);
445 max = GET2(ccode,3);
446 if (max == 0) fprintf(f, "{%d,}", min);
447 else fprintf(f, "{%d,%d}", min, max);
448 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
449 extra += OP_lengths[*ccode];
450 break;
451 }
452 }
453 break;
454
455 /* Anything else is just an item with no data*/
456
457 default:
458 fprintf(f, " %s", OP_names[*code]);
459 break;
460 }
461
462 code += OP_lengths[*code] + extra;
463 fprintf(f, "\n");
464 }
465}
466
467/* End of printint.c */