Commit | Line | Data |
---|---|---|
47db1125 | 1 | /* $Cambridge: exim/src/src/pcre/pcre_study.c,v 1.6 2007/11/12 13:02:20 nm4 Exp $ */ |
8ac170f3 PH |
2 | |
3 | /************************************************* | |
4 | * Perl-Compatible Regular Expressions * | |
5 | *************************************************/ | |
6 | ||
7 | /* PCRE is a library of functions to support regular expressions whose syntax | |
8 | and semantics are as close as possible to those of the Perl 5 language. | |
9 | ||
10 | Written by Philip Hazel | |
64f2600a | 11 | Copyright (c) 1997-2007 University of Cambridge |
8ac170f3 PH |
12 | |
13 | ----------------------------------------------------------------------------- | |
14 | Redistribution and use in source and binary forms, with or without | |
15 | modification, are permitted provided that the following conditions are met: | |
16 | ||
17 | * Redistributions of source code must retain the above copyright notice, | |
18 | this list of conditions and the following disclaimer. | |
19 | ||
20 | * Redistributions in binary form must reproduce the above copyright | |
21 | notice, this list of conditions and the following disclaimer in the | |
22 | documentation and/or other materials provided with the distribution. | |
23 | ||
24 | * Neither the name of the University of Cambridge nor the names of its | |
25 | contributors may be used to endorse or promote products derived from | |
26 | this software without specific prior written permission. | |
27 | ||
28 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
29 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
30 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
31 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
32 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
33 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
34 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
35 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
36 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
37 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
38 | POSSIBILITY OF SUCH DAMAGE. | |
39 | ----------------------------------------------------------------------------- | |
40 | */ | |
41 | ||
42 | ||
43 | /* This module contains the external function pcre_study(), along with local | |
44 | supporting functions. */ | |
45 | ||
46 | ||
47db1125 NM |
47 | #ifdef HAVE_CONFIG_H |
48 | #include "config.h" | |
49 | #endif | |
50 | ||
8ac170f3 PH |
51 | #include "pcre_internal.h" |
52 | ||
53 | ||
6bf342e1 PH |
54 | /* Returns from set_start_bits() */ |
55 | ||
56 | enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE }; | |
57 | ||
58 | ||
8ac170f3 PH |
59 | /************************************************* |
60 | * Set a bit and maybe its alternate case * | |
61 | *************************************************/ | |
62 | ||
63 | /* Given a character, set its bit in the table, and also the bit for the other | |
64 | version of a letter if we are caseless. | |
65 | ||
66 | Arguments: | |
67 | start_bits points to the bit map | |
68 | c is the character | |
69 | caseless the caseless flag | |
70 | cd the block with char table pointers | |
71 | ||
72 | Returns: nothing | |
73 | */ | |
74 | ||
75 | static void | |
76 | set_bit(uschar *start_bits, unsigned int c, BOOL caseless, compile_data *cd) | |
77 | { | |
78 | start_bits[c/8] |= (1 << (c&7)); | |
79 | if (caseless && (cd->ctypes[c] & ctype_letter) != 0) | |
80 | start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7)); | |
81 | } | |
82 | ||
83 | ||
84 | ||
85 | /************************************************* | |
6bf342e1 | 86 | * Create bitmap of starting bytes * |
8ac170f3 PH |
87 | *************************************************/ |
88 | ||
6bf342e1 PH |
89 | /* This function scans a compiled unanchored expression recursively and |
90 | attempts to build a bitmap of the set of possible starting bytes. As time goes | |
91 | by, we may be able to get more clever at doing this. The SSB_CONTINUE return is | |
92 | useful for parenthesized groups in patterns such as (a*)b where the group | |
93 | provides some optional starting bytes but scanning must continue at the outer | |
94 | level to find at least one mandatory byte. At the outermost level, this | |
95 | function fails unless the result is SSB_DONE. | |
8ac170f3 PH |
96 | |
97 | Arguments: | |
98 | code points to an expression | |
99 | start_bits points to a 32-byte table, initialized to 0 | |
100 | caseless the current state of the caseless flag | |
101 | utf8 TRUE if in UTF-8 mode | |
102 | cd the block with char table pointers | |
103 | ||
6bf342e1 PH |
104 | Returns: SSB_FAIL => Failed to find any starting bytes |
105 | SSB_DONE => Found mandatory starting bytes | |
106 | SSB_CONTINUE => Found optional starting bytes | |
8ac170f3 PH |
107 | */ |
108 | ||
6bf342e1 | 109 | static int |
8ac170f3 PH |
110 | set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless, |
111 | BOOL utf8, compile_data *cd) | |
112 | { | |
113 | register int c; | |
6bf342e1 | 114 | int yield = SSB_DONE; |
8ac170f3 | 115 | |
aa41d2de PH |
116 | #if 0 |
117 | /* ========================================================================= */ | |
118 | /* The following comment and code was inserted in January 1999. In May 2006, | |
119 | when it was observed to cause compiler warnings about unused values, I took it | |
120 | out again. If anybody is still using OS/2, they will have to put it back | |
121 | manually. */ | |
122 | ||
8ac170f3 PH |
123 | /* This next statement and the later reference to dummy are here in order to |
124 | trick the optimizer of the IBM C compiler for OS/2 into generating correct | |
125 | code. Apparently IBM isn't going to fix the problem, and we would rather not | |
126 | disable optimization (in this module it actually makes a big difference, and | |
127 | the pcre module can use all the optimization it can get). */ | |
128 | ||
129 | volatile int dummy; | |
aa41d2de PH |
130 | /* ========================================================================= */ |
131 | #endif | |
8ac170f3 PH |
132 | |
133 | do | |
134 | { | |
6bf342e1 | 135 | const uschar *tcode = code + (((int)*code == OP_CBRA)? 3:1) + LINK_SIZE; |
8ac170f3 PH |
136 | BOOL try_next = TRUE; |
137 | ||
6bf342e1 | 138 | while (try_next) /* Loop for items in this branch */ |
8ac170f3 | 139 | { |
6bf342e1 PH |
140 | int rc; |
141 | switch(*tcode) | |
8ac170f3 | 142 | { |
6bf342e1 | 143 | /* Fail if we reach something we don't understand */ |
8ac170f3 | 144 | |
8ac170f3 | 145 | default: |
6bf342e1 PH |
146 | return SSB_FAIL; |
147 | ||
148 | /* If we hit a bracket or a positive lookahead assertion, recurse to set | |
149 | bits from within the subpattern. If it can't find anything, we have to | |
150 | give up. If it finds some mandatory character(s), we are done for this | |
151 | branch. Otherwise, carry on scanning after the subpattern. */ | |
152 | ||
153 | case OP_BRA: | |
154 | case OP_SBRA: | |
155 | case OP_CBRA: | |
156 | case OP_SCBRA: | |
157 | case OP_ONCE: | |
158 | case OP_ASSERT: | |
159 | rc = set_start_bits(tcode, start_bits, caseless, utf8, cd); | |
160 | if (rc == SSB_FAIL) return SSB_FAIL; | |
161 | if (rc == SSB_DONE) try_next = FALSE; else | |
162 | { | |
163 | do tcode += GET(tcode, 1); while (*tcode == OP_ALT); | |
164 | tcode += 1 + LINK_SIZE; | |
165 | } | |
166 | break; | |
8ac170f3 | 167 | |
6bf342e1 PH |
168 | /* If we hit ALT or KET, it means we haven't found anything mandatory in |
169 | this branch, though we might have found something optional. For ALT, we | |
170 | continue with the next alternative, but we have to arrange that the final | |
171 | result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET, | |
172 | return SSB_CONTINUE: if this is the top level, that indicates failure, | |
173 | but after a nested subpattern, it causes scanning to continue. */ | |
8ac170f3 | 174 | |
6bf342e1 PH |
175 | case OP_ALT: |
176 | yield = SSB_CONTINUE; | |
177 | try_next = FALSE; | |
8ac170f3 PH |
178 | break; |
179 | ||
6bf342e1 PH |
180 | case OP_KET: |
181 | case OP_KETRMAX: | |
182 | case OP_KETRMIN: | |
183 | return SSB_CONTINUE; | |
8ac170f3 | 184 | |
6bf342e1 PH |
185 | /* Skip over callout */ |
186 | ||
187 | case OP_CALLOUT: | |
188 | tcode += 2 + 2*LINK_SIZE; | |
8ac170f3 PH |
189 | break; |
190 | ||
191 | /* Skip over lookbehind and negative lookahead assertions */ | |
192 | ||
193 | case OP_ASSERT_NOT: | |
194 | case OP_ASSERTBACK: | |
195 | case OP_ASSERTBACK_NOT: | |
196 | do tcode += GET(tcode, 1); while (*tcode == OP_ALT); | |
6bf342e1 | 197 | tcode += 1 + LINK_SIZE; |
8ac170f3 PH |
198 | break; |
199 | ||
200 | /* Skip over an option setting, changing the caseless flag */ | |
201 | ||
202 | case OP_OPT: | |
203 | caseless = (tcode[1] & PCRE_CASELESS) != 0; | |
204 | tcode += 2; | |
205 | break; | |
206 | ||
207 | /* BRAZERO does the bracket, but carries on. */ | |
208 | ||
209 | case OP_BRAZERO: | |
210 | case OP_BRAMINZERO: | |
6bf342e1 PH |
211 | if (set_start_bits(++tcode, start_bits, caseless, utf8, cd) == SSB_FAIL) |
212 | return SSB_FAIL; | |
aa41d2de PH |
213 | /* ========================================================================= |
214 | See the comment at the head of this function concerning the next line, | |
215 | which was an old fudge for the benefit of OS/2. | |
8ac170f3 | 216 | dummy = 1; |
aa41d2de | 217 | ========================================================================= */ |
8ac170f3 | 218 | do tcode += GET(tcode,1); while (*tcode == OP_ALT); |
6bf342e1 | 219 | tcode += 1 + LINK_SIZE; |
8ac170f3 PH |
220 | break; |
221 | ||
222 | /* Single-char * or ? sets the bit and tries the next item */ | |
223 | ||
224 | case OP_STAR: | |
225 | case OP_MINSTAR: | |
6bf342e1 | 226 | case OP_POSSTAR: |
8ac170f3 PH |
227 | case OP_QUERY: |
228 | case OP_MINQUERY: | |
6bf342e1 | 229 | case OP_POSQUERY: |
8ac170f3 PH |
230 | set_bit(start_bits, tcode[1], caseless, cd); |
231 | tcode += 2; | |
232 | #ifdef SUPPORT_UTF8 | |
6bf342e1 PH |
233 | if (utf8 && tcode[-1] >= 0xc0) |
234 | tcode += _pcre_utf8_table4[tcode[-1] & 0x3f]; | |
8ac170f3 PH |
235 | #endif |
236 | break; | |
237 | ||
238 | /* Single-char upto sets the bit and tries the next */ | |
239 | ||
240 | case OP_UPTO: | |
241 | case OP_MINUPTO: | |
6bf342e1 | 242 | case OP_POSUPTO: |
8ac170f3 PH |
243 | set_bit(start_bits, tcode[3], caseless, cd); |
244 | tcode += 4; | |
245 | #ifdef SUPPORT_UTF8 | |
6bf342e1 PH |
246 | if (utf8 && tcode[-1] >= 0xc0) |
247 | tcode += _pcre_utf8_table4[tcode[-1] & 0x3f]; | |
8ac170f3 PH |
248 | #endif |
249 | break; | |
250 | ||
251 | /* At least one single char sets the bit and stops */ | |
252 | ||
253 | case OP_EXACT: /* Fall through */ | |
254 | tcode += 2; | |
255 | ||
256 | case OP_CHAR: | |
257 | case OP_CHARNC: | |
258 | case OP_PLUS: | |
259 | case OP_MINPLUS: | |
6bf342e1 | 260 | case OP_POSPLUS: |
8ac170f3 PH |
261 | set_bit(start_bits, tcode[1], caseless, cd); |
262 | try_next = FALSE; | |
263 | break; | |
264 | ||
265 | /* Single character type sets the bits and stops */ | |
266 | ||
267 | case OP_NOT_DIGIT: | |
268 | for (c = 0; c < 32; c++) | |
269 | start_bits[c] |= ~cd->cbits[c+cbit_digit]; | |
270 | try_next = FALSE; | |
271 | break; | |
272 | ||
273 | case OP_DIGIT: | |
274 | for (c = 0; c < 32; c++) | |
275 | start_bits[c] |= cd->cbits[c+cbit_digit]; | |
276 | try_next = FALSE; | |
277 | break; | |
278 | ||
aa41d2de PH |
279 | /* The cbit_space table has vertical tab as whitespace; we have to |
280 | discard it. */ | |
281 | ||
8ac170f3 PH |
282 | case OP_NOT_WHITESPACE: |
283 | for (c = 0; c < 32; c++) | |
aa41d2de PH |
284 | { |
285 | int d = cd->cbits[c+cbit_space]; | |
286 | if (c == 1) d &= ~0x08; | |
287 | start_bits[c] |= ~d; | |
288 | } | |
8ac170f3 PH |
289 | try_next = FALSE; |
290 | break; | |
291 | ||
aa41d2de PH |
292 | /* The cbit_space table has vertical tab as whitespace; we have to |
293 | discard it. */ | |
294 | ||
8ac170f3 PH |
295 | case OP_WHITESPACE: |
296 | for (c = 0; c < 32; c++) | |
aa41d2de PH |
297 | { |
298 | int d = cd->cbits[c+cbit_space]; | |
299 | if (c == 1) d &= ~0x08; | |
300 | start_bits[c] |= d; | |
301 | } | |
8ac170f3 PH |
302 | try_next = FALSE; |
303 | break; | |
304 | ||
305 | case OP_NOT_WORDCHAR: | |
306 | for (c = 0; c < 32; c++) | |
307 | start_bits[c] |= ~cd->cbits[c+cbit_word]; | |
308 | try_next = FALSE; | |
309 | break; | |
310 | ||
311 | case OP_WORDCHAR: | |
312 | for (c = 0; c < 32; c++) | |
313 | start_bits[c] |= cd->cbits[c+cbit_word]; | |
314 | try_next = FALSE; | |
315 | break; | |
316 | ||
317 | /* One or more character type fudges the pointer and restarts, knowing | |
318 | it will hit a single character type and stop there. */ | |
319 | ||
320 | case OP_TYPEPLUS: | |
321 | case OP_TYPEMINPLUS: | |
322 | tcode++; | |
323 | break; | |
324 | ||
325 | case OP_TYPEEXACT: | |
326 | tcode += 3; | |
327 | break; | |
328 | ||
329 | /* Zero or more repeats of character types set the bits and then | |
330 | try again. */ | |
331 | ||
332 | case OP_TYPEUPTO: | |
333 | case OP_TYPEMINUPTO: | |
6bf342e1 | 334 | case OP_TYPEPOSUPTO: |
8ac170f3 PH |
335 | tcode += 2; /* Fall through */ |
336 | ||
337 | case OP_TYPESTAR: | |
338 | case OP_TYPEMINSTAR: | |
6bf342e1 | 339 | case OP_TYPEPOSSTAR: |
8ac170f3 PH |
340 | case OP_TYPEQUERY: |
341 | case OP_TYPEMINQUERY: | |
6bf342e1 | 342 | case OP_TYPEPOSQUERY: |
8ac170f3 PH |
343 | switch(tcode[1]) |
344 | { | |
345 | case OP_ANY: | |
6bf342e1 | 346 | return SSB_FAIL; |
8ac170f3 PH |
347 | |
348 | case OP_NOT_DIGIT: | |
349 | for (c = 0; c < 32; c++) | |
350 | start_bits[c] |= ~cd->cbits[c+cbit_digit]; | |
351 | break; | |
352 | ||
353 | case OP_DIGIT: | |
354 | for (c = 0; c < 32; c++) | |
355 | start_bits[c] |= cd->cbits[c+cbit_digit]; | |
356 | break; | |
357 | ||
aa41d2de PH |
358 | /* The cbit_space table has vertical tab as whitespace; we have to |
359 | discard it. */ | |
360 | ||
8ac170f3 PH |
361 | case OP_NOT_WHITESPACE: |
362 | for (c = 0; c < 32; c++) | |
aa41d2de PH |
363 | { |
364 | int d = cd->cbits[c+cbit_space]; | |
365 | if (c == 1) d &= ~0x08; | |
366 | start_bits[c] |= ~d; | |
367 | } | |
8ac170f3 PH |
368 | break; |
369 | ||
aa41d2de PH |
370 | /* The cbit_space table has vertical tab as whitespace; we have to |
371 | discard it. */ | |
372 | ||
8ac170f3 PH |
373 | case OP_WHITESPACE: |
374 | for (c = 0; c < 32; c++) | |
aa41d2de PH |
375 | { |
376 | int d = cd->cbits[c+cbit_space]; | |
377 | if (c == 1) d &= ~0x08; | |
378 | start_bits[c] |= d; | |
379 | } | |
8ac170f3 PH |
380 | break; |
381 | ||
382 | case OP_NOT_WORDCHAR: | |
383 | for (c = 0; c < 32; c++) | |
384 | start_bits[c] |= ~cd->cbits[c+cbit_word]; | |
385 | break; | |
386 | ||
387 | case OP_WORDCHAR: | |
388 | for (c = 0; c < 32; c++) | |
389 | start_bits[c] |= cd->cbits[c+cbit_word]; | |
390 | break; | |
391 | } | |
392 | ||
393 | tcode += 2; | |
394 | break; | |
395 | ||
396 | /* Character class where all the information is in a bit map: set the | |
397 | bits and either carry on or not, according to the repeat count. If it was | |
398 | a negative class, and we are operating with UTF-8 characters, any byte | |
399 | with a value >= 0xc4 is a potentially valid starter because it starts a | |
400 | character with a value > 255. */ | |
401 | ||
402 | case OP_NCLASS: | |
64f2600a | 403 | #ifdef SUPPORT_UTF8 |
8ac170f3 PH |
404 | if (utf8) |
405 | { | |
406 | start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */ | |
407 | memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */ | |
408 | } | |
64f2600a | 409 | #endif |
8ac170f3 PH |
410 | /* Fall through */ |
411 | ||
412 | case OP_CLASS: | |
413 | { | |
414 | tcode++; | |
415 | ||
416 | /* In UTF-8 mode, the bits in a bit map correspond to character | |
417 | values, not to byte values. However, the bit map we are constructing is | |
418 | for byte values. So we have to do a conversion for characters whose | |
419 | value is > 127. In fact, there are only two possible starting bytes for | |
420 | characters in the range 128 - 255. */ | |
421 | ||
64f2600a | 422 | #ifdef SUPPORT_UTF8 |
8ac170f3 PH |
423 | if (utf8) |
424 | { | |
425 | for (c = 0; c < 16; c++) start_bits[c] |= tcode[c]; | |
426 | for (c = 128; c < 256; c++) | |
427 | { | |
428 | if ((tcode[c/8] && (1 << (c&7))) != 0) | |
429 | { | |
430 | int d = (c >> 6) | 0xc0; /* Set bit for this starter */ | |
431 | start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */ | |
432 | c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */ | |
433 | } | |
434 | } | |
435 | } | |
436 | ||
437 | /* In non-UTF-8 mode, the two bit maps are completely compatible. */ | |
438 | ||
439 | else | |
64f2600a | 440 | #endif |
8ac170f3 PH |
441 | { |
442 | for (c = 0; c < 32; c++) start_bits[c] |= tcode[c]; | |
443 | } | |
444 | ||
445 | /* Advance past the bit map, and act on what follows */ | |
446 | ||
447 | tcode += 32; | |
448 | switch (*tcode) | |
449 | { | |
450 | case OP_CRSTAR: | |
451 | case OP_CRMINSTAR: | |
452 | case OP_CRQUERY: | |
453 | case OP_CRMINQUERY: | |
454 | tcode++; | |
455 | break; | |
456 | ||
457 | case OP_CRRANGE: | |
458 | case OP_CRMINRANGE: | |
459 | if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5; | |
460 | else try_next = FALSE; | |
461 | break; | |
462 | ||
463 | default: | |
464 | try_next = FALSE; | |
465 | break; | |
466 | } | |
467 | } | |
468 | break; /* End of bitmap class handling */ | |
469 | ||
470 | } /* End of switch */ | |
471 | } /* End of try_next loop */ | |
472 | ||
473 | code += GET(code, 1); /* Advance to next branch */ | |
474 | } | |
475 | while (*code == OP_ALT); | |
6bf342e1 | 476 | return yield; |
8ac170f3 PH |
477 | } |
478 | ||
479 | ||
480 | ||
481 | /************************************************* | |
482 | * Study a compiled expression * | |
483 | *************************************************/ | |
484 | ||
485 | /* This function is handed a compiled expression that it must study to produce | |
486 | information that will speed up the matching. It returns a pcre_extra block | |
487 | which then gets handed back to pcre_exec(). | |
488 | ||
489 | Arguments: | |
490 | re points to the compiled expression | |
491 | options contains option bits | |
492 | errorptr points to where to place error messages; | |
493 | set NULL unless error | |
494 | ||
495 | Returns: pointer to a pcre_extra block, with study_data filled in and the | |
496 | appropriate flag set; | |
497 | NULL on error or if no optimization possible | |
498 | */ | |
499 | ||
64f2600a | 500 | PCRE_EXP_DEFN pcre_extra * |
8ac170f3 PH |
501 | pcre_study(const pcre *external_re, int options, const char **errorptr) |
502 | { | |
503 | uschar start_bits[32]; | |
504 | pcre_extra *extra; | |
505 | pcre_study_data *study; | |
506 | const uschar *tables; | |
aa41d2de | 507 | uschar *code; |
8ac170f3 | 508 | compile_data compile_block; |
aa41d2de | 509 | const real_pcre *re = (const real_pcre *)external_re; |
8ac170f3 PH |
510 | |
511 | *errorptr = NULL; | |
512 | ||
513 | if (re == NULL || re->magic_number != MAGIC_NUMBER) | |
514 | { | |
515 | *errorptr = "argument is not a compiled regular expression"; | |
516 | return NULL; | |
517 | } | |
518 | ||
519 | if ((options & ~PUBLIC_STUDY_OPTIONS) != 0) | |
520 | { | |
521 | *errorptr = "unknown or incorrect option bit(s) set"; | |
522 | return NULL; | |
523 | } | |
524 | ||
aa41d2de PH |
525 | code = (uschar *)re + re->name_table_offset + |
526 | (re->name_count * re->name_entry_size); | |
527 | ||
8ac170f3 PH |
528 | /* For an anchored pattern, or an unanchored pattern that has a first char, or |
529 | a multiline pattern that matches only at "line starts", no further processing | |
530 | at present. */ | |
531 | ||
47db1125 NM |
532 | if ((re->options & PCRE_ANCHORED) != 0 || |
533 | (re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) != 0) | |
8ac170f3 PH |
534 | return NULL; |
535 | ||
536 | /* Set the character tables in the block that is passed around */ | |
537 | ||
538 | tables = re->tables; | |
539 | if (tables == NULL) | |
540 | (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, | |
541 | (void *)(&tables)); | |
542 | ||
543 | compile_block.lcc = tables + lcc_offset; | |
544 | compile_block.fcc = tables + fcc_offset; | |
545 | compile_block.cbits = tables + cbits_offset; | |
546 | compile_block.ctypes = tables + ctypes_offset; | |
547 | ||
548 | /* See if we can find a fixed set of initial characters for the pattern. */ | |
549 | ||
550 | memset(start_bits, 0, 32 * sizeof(uschar)); | |
6bf342e1 PH |
551 | if (set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0, |
552 | (re->options & PCRE_UTF8) != 0, &compile_block) != SSB_DONE) return NULL; | |
8ac170f3 PH |
553 | |
554 | /* Get a pcre_extra block and a pcre_study_data block. The study data is put in | |
555 | the latter, which is pointed to by the former, which may also get additional | |
556 | data set later by the calling program. At the moment, the size of | |
557 | pcre_study_data is fixed. We nevertheless save it in a field for returning via | |
558 | the pcre_fullinfo() function so that if it becomes variable in the future, we | |
559 | don't have to change that code. */ | |
560 | ||
561 | extra = (pcre_extra *)(pcre_malloc) | |
562 | (sizeof(pcre_extra) + sizeof(pcre_study_data)); | |
563 | ||
564 | if (extra == NULL) | |
565 | { | |
566 | *errorptr = "failed to get memory"; | |
567 | return NULL; | |
568 | } | |
569 | ||
570 | study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra)); | |
571 | extra->flags = PCRE_EXTRA_STUDY_DATA; | |
572 | extra->study_data = study; | |
573 | ||
574 | study->size = sizeof(pcre_study_data); | |
575 | study->options = PCRE_STUDY_MAPPED; | |
576 | memcpy(study->start_bits, start_bits, sizeof(start_bits)); | |
577 | ||
578 | return extra; | |
579 | } | |
580 | ||
581 | /* End of pcre_study.c */ |