Commit | Line | Data |
---|---|---|
6bf342e1 | 1 | /* $Cambridge: exim/src/src/pcre/pcre_study.c,v 1.4 2007/01/23 15:08:45 ph10 Exp $ */ |
8ac170f3 PH |
2 | |
3 | /************************************************* | |
4 | * Perl-Compatible Regular Expressions * | |
5 | *************************************************/ | |
6 | ||
7 | /* PCRE is a library of functions to support regular expressions whose syntax | |
8 | and semantics are as close as possible to those of the Perl 5 language. | |
9 | ||
10 | Written by Philip Hazel | |
aa41d2de | 11 | Copyright (c) 1997-2006 University of Cambridge |
8ac170f3 PH |
12 | |
13 | ----------------------------------------------------------------------------- | |
14 | Redistribution and use in source and binary forms, with or without | |
15 | modification, are permitted provided that the following conditions are met: | |
16 | ||
17 | * Redistributions of source code must retain the above copyright notice, | |
18 | this list of conditions and the following disclaimer. | |
19 | ||
20 | * Redistributions in binary form must reproduce the above copyright | |
21 | notice, this list of conditions and the following disclaimer in the | |
22 | documentation and/or other materials provided with the distribution. | |
23 | ||
24 | * Neither the name of the University of Cambridge nor the names of its | |
25 | contributors may be used to endorse or promote products derived from | |
26 | this software without specific prior written permission. | |
27 | ||
28 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
29 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
30 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
31 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
32 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
33 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
34 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
35 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
36 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
37 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
38 | POSSIBILITY OF SUCH DAMAGE. | |
39 | ----------------------------------------------------------------------------- | |
40 | */ | |
41 | ||
42 | ||
43 | /* This module contains the external function pcre_study(), along with local | |
44 | supporting functions. */ | |
45 | ||
46 | ||
47 | #include "pcre_internal.h" | |
48 | ||
49 | ||
6bf342e1 PH |
50 | /* Returns from set_start_bits() */ |
51 | ||
52 | enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE }; | |
53 | ||
54 | ||
8ac170f3 PH |
55 | /************************************************* |
56 | * Set a bit and maybe its alternate case * | |
57 | *************************************************/ | |
58 | ||
59 | /* Given a character, set its bit in the table, and also the bit for the other | |
60 | version of a letter if we are caseless. | |
61 | ||
62 | Arguments: | |
63 | start_bits points to the bit map | |
64 | c is the character | |
65 | caseless the caseless flag | |
66 | cd the block with char table pointers | |
67 | ||
68 | Returns: nothing | |
69 | */ | |
70 | ||
71 | static void | |
72 | set_bit(uschar *start_bits, unsigned int c, BOOL caseless, compile_data *cd) | |
73 | { | |
74 | start_bits[c/8] |= (1 << (c&7)); | |
75 | if (caseless && (cd->ctypes[c] & ctype_letter) != 0) | |
76 | start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7)); | |
77 | } | |
78 | ||
79 | ||
80 | ||
81 | /************************************************* | |
6bf342e1 | 82 | * Create bitmap of starting bytes * |
8ac170f3 PH |
83 | *************************************************/ |
84 | ||
6bf342e1 PH |
85 | /* This function scans a compiled unanchored expression recursively and |
86 | attempts to build a bitmap of the set of possible starting bytes. As time goes | |
87 | by, we may be able to get more clever at doing this. The SSB_CONTINUE return is | |
88 | useful for parenthesized groups in patterns such as (a*)b where the group | |
89 | provides some optional starting bytes but scanning must continue at the outer | |
90 | level to find at least one mandatory byte. At the outermost level, this | |
91 | function fails unless the result is SSB_DONE. | |
8ac170f3 PH |
92 | |
93 | Arguments: | |
94 | code points to an expression | |
95 | start_bits points to a 32-byte table, initialized to 0 | |
96 | caseless the current state of the caseless flag | |
97 | utf8 TRUE if in UTF-8 mode | |
98 | cd the block with char table pointers | |
99 | ||
6bf342e1 PH |
100 | Returns: SSB_FAIL => Failed to find any starting bytes |
101 | SSB_DONE => Found mandatory starting bytes | |
102 | SSB_CONTINUE => Found optional starting bytes | |
8ac170f3 PH |
103 | */ |
104 | ||
6bf342e1 | 105 | static int |
8ac170f3 PH |
106 | set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless, |
107 | BOOL utf8, compile_data *cd) | |
108 | { | |
109 | register int c; | |
6bf342e1 | 110 | int yield = SSB_DONE; |
8ac170f3 | 111 | |
aa41d2de PH |
112 | #if 0 |
113 | /* ========================================================================= */ | |
114 | /* The following comment and code was inserted in January 1999. In May 2006, | |
115 | when it was observed to cause compiler warnings about unused values, I took it | |
116 | out again. If anybody is still using OS/2, they will have to put it back | |
117 | manually. */ | |
118 | ||
8ac170f3 PH |
119 | /* This next statement and the later reference to dummy are here in order to |
120 | trick the optimizer of the IBM C compiler for OS/2 into generating correct | |
121 | code. Apparently IBM isn't going to fix the problem, and we would rather not | |
122 | disable optimization (in this module it actually makes a big difference, and | |
123 | the pcre module can use all the optimization it can get). */ | |
124 | ||
125 | volatile int dummy; | |
aa41d2de PH |
126 | /* ========================================================================= */ |
127 | #endif | |
8ac170f3 PH |
128 | |
129 | do | |
130 | { | |
6bf342e1 | 131 | const uschar *tcode = code + (((int)*code == OP_CBRA)? 3:1) + LINK_SIZE; |
8ac170f3 PH |
132 | BOOL try_next = TRUE; |
133 | ||
6bf342e1 | 134 | while (try_next) /* Loop for items in this branch */ |
8ac170f3 | 135 | { |
6bf342e1 PH |
136 | int rc; |
137 | switch(*tcode) | |
8ac170f3 | 138 | { |
6bf342e1 | 139 | /* Fail if we reach something we don't understand */ |
8ac170f3 | 140 | |
8ac170f3 | 141 | default: |
6bf342e1 PH |
142 | return SSB_FAIL; |
143 | ||
144 | /* If we hit a bracket or a positive lookahead assertion, recurse to set | |
145 | bits from within the subpattern. If it can't find anything, we have to | |
146 | give up. If it finds some mandatory character(s), we are done for this | |
147 | branch. Otherwise, carry on scanning after the subpattern. */ | |
148 | ||
149 | case OP_BRA: | |
150 | case OP_SBRA: | |
151 | case OP_CBRA: | |
152 | case OP_SCBRA: | |
153 | case OP_ONCE: | |
154 | case OP_ASSERT: | |
155 | rc = set_start_bits(tcode, start_bits, caseless, utf8, cd); | |
156 | if (rc == SSB_FAIL) return SSB_FAIL; | |
157 | if (rc == SSB_DONE) try_next = FALSE; else | |
158 | { | |
159 | do tcode += GET(tcode, 1); while (*tcode == OP_ALT); | |
160 | tcode += 1 + LINK_SIZE; | |
161 | } | |
162 | break; | |
8ac170f3 | 163 | |
6bf342e1 PH |
164 | /* If we hit ALT or KET, it means we haven't found anything mandatory in |
165 | this branch, though we might have found something optional. For ALT, we | |
166 | continue with the next alternative, but we have to arrange that the final | |
167 | result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET, | |
168 | return SSB_CONTINUE: if this is the top level, that indicates failure, | |
169 | but after a nested subpattern, it causes scanning to continue. */ | |
8ac170f3 | 170 | |
6bf342e1 PH |
171 | case OP_ALT: |
172 | yield = SSB_CONTINUE; | |
173 | try_next = FALSE; | |
8ac170f3 PH |
174 | break; |
175 | ||
6bf342e1 PH |
176 | case OP_KET: |
177 | case OP_KETRMAX: | |
178 | case OP_KETRMIN: | |
179 | return SSB_CONTINUE; | |
8ac170f3 | 180 | |
6bf342e1 PH |
181 | /* Skip over callout */ |
182 | ||
183 | case OP_CALLOUT: | |
184 | tcode += 2 + 2*LINK_SIZE; | |
8ac170f3 PH |
185 | break; |
186 | ||
187 | /* Skip over lookbehind and negative lookahead assertions */ | |
188 | ||
189 | case OP_ASSERT_NOT: | |
190 | case OP_ASSERTBACK: | |
191 | case OP_ASSERTBACK_NOT: | |
192 | do tcode += GET(tcode, 1); while (*tcode == OP_ALT); | |
6bf342e1 | 193 | tcode += 1 + LINK_SIZE; |
8ac170f3 PH |
194 | break; |
195 | ||
196 | /* Skip over an option setting, changing the caseless flag */ | |
197 | ||
198 | case OP_OPT: | |
199 | caseless = (tcode[1] & PCRE_CASELESS) != 0; | |
200 | tcode += 2; | |
201 | break; | |
202 | ||
203 | /* BRAZERO does the bracket, but carries on. */ | |
204 | ||
205 | case OP_BRAZERO: | |
206 | case OP_BRAMINZERO: | |
6bf342e1 PH |
207 | if (set_start_bits(++tcode, start_bits, caseless, utf8, cd) == SSB_FAIL) |
208 | return SSB_FAIL; | |
aa41d2de PH |
209 | /* ========================================================================= |
210 | See the comment at the head of this function concerning the next line, | |
211 | which was an old fudge for the benefit of OS/2. | |
8ac170f3 | 212 | dummy = 1; |
aa41d2de | 213 | ========================================================================= */ |
8ac170f3 | 214 | do tcode += GET(tcode,1); while (*tcode == OP_ALT); |
6bf342e1 | 215 | tcode += 1 + LINK_SIZE; |
8ac170f3 PH |
216 | break; |
217 | ||
218 | /* Single-char * or ? sets the bit and tries the next item */ | |
219 | ||
220 | case OP_STAR: | |
221 | case OP_MINSTAR: | |
6bf342e1 | 222 | case OP_POSSTAR: |
8ac170f3 PH |
223 | case OP_QUERY: |
224 | case OP_MINQUERY: | |
6bf342e1 | 225 | case OP_POSQUERY: |
8ac170f3 PH |
226 | set_bit(start_bits, tcode[1], caseless, cd); |
227 | tcode += 2; | |
228 | #ifdef SUPPORT_UTF8 | |
6bf342e1 PH |
229 | if (utf8 && tcode[-1] >= 0xc0) |
230 | tcode += _pcre_utf8_table4[tcode[-1] & 0x3f]; | |
8ac170f3 PH |
231 | #endif |
232 | break; | |
233 | ||
234 | /* Single-char upto sets the bit and tries the next */ | |
235 | ||
236 | case OP_UPTO: | |
237 | case OP_MINUPTO: | |
6bf342e1 | 238 | case OP_POSUPTO: |
8ac170f3 PH |
239 | set_bit(start_bits, tcode[3], caseless, cd); |
240 | tcode += 4; | |
241 | #ifdef SUPPORT_UTF8 | |
6bf342e1 PH |
242 | if (utf8 && tcode[-1] >= 0xc0) |
243 | tcode += _pcre_utf8_table4[tcode[-1] & 0x3f]; | |
8ac170f3 PH |
244 | #endif |
245 | break; | |
246 | ||
247 | /* At least one single char sets the bit and stops */ | |
248 | ||
249 | case OP_EXACT: /* Fall through */ | |
250 | tcode += 2; | |
251 | ||
252 | case OP_CHAR: | |
253 | case OP_CHARNC: | |
254 | case OP_PLUS: | |
255 | case OP_MINPLUS: | |
6bf342e1 | 256 | case OP_POSPLUS: |
8ac170f3 PH |
257 | set_bit(start_bits, tcode[1], caseless, cd); |
258 | try_next = FALSE; | |
259 | break; | |
260 | ||
261 | /* Single character type sets the bits and stops */ | |
262 | ||
263 | case OP_NOT_DIGIT: | |
264 | for (c = 0; c < 32; c++) | |
265 | start_bits[c] |= ~cd->cbits[c+cbit_digit]; | |
266 | try_next = FALSE; | |
267 | break; | |
268 | ||
269 | case OP_DIGIT: | |
270 | for (c = 0; c < 32; c++) | |
271 | start_bits[c] |= cd->cbits[c+cbit_digit]; | |
272 | try_next = FALSE; | |
273 | break; | |
274 | ||
aa41d2de PH |
275 | /* The cbit_space table has vertical tab as whitespace; we have to |
276 | discard it. */ | |
277 | ||
8ac170f3 PH |
278 | case OP_NOT_WHITESPACE: |
279 | for (c = 0; c < 32; c++) | |
aa41d2de PH |
280 | { |
281 | int d = cd->cbits[c+cbit_space]; | |
282 | if (c == 1) d &= ~0x08; | |
283 | start_bits[c] |= ~d; | |
284 | } | |
8ac170f3 PH |
285 | try_next = FALSE; |
286 | break; | |
287 | ||
aa41d2de PH |
288 | /* The cbit_space table has vertical tab as whitespace; we have to |
289 | discard it. */ | |
290 | ||
8ac170f3 PH |
291 | case OP_WHITESPACE: |
292 | for (c = 0; c < 32; c++) | |
aa41d2de PH |
293 | { |
294 | int d = cd->cbits[c+cbit_space]; | |
295 | if (c == 1) d &= ~0x08; | |
296 | start_bits[c] |= d; | |
297 | } | |
8ac170f3 PH |
298 | try_next = FALSE; |
299 | break; | |
300 | ||
301 | case OP_NOT_WORDCHAR: | |
302 | for (c = 0; c < 32; c++) | |
303 | start_bits[c] |= ~cd->cbits[c+cbit_word]; | |
304 | try_next = FALSE; | |
305 | break; | |
306 | ||
307 | case OP_WORDCHAR: | |
308 | for (c = 0; c < 32; c++) | |
309 | start_bits[c] |= cd->cbits[c+cbit_word]; | |
310 | try_next = FALSE; | |
311 | break; | |
312 | ||
313 | /* One or more character type fudges the pointer and restarts, knowing | |
314 | it will hit a single character type and stop there. */ | |
315 | ||
316 | case OP_TYPEPLUS: | |
317 | case OP_TYPEMINPLUS: | |
318 | tcode++; | |
319 | break; | |
320 | ||
321 | case OP_TYPEEXACT: | |
322 | tcode += 3; | |
323 | break; | |
324 | ||
325 | /* Zero or more repeats of character types set the bits and then | |
326 | try again. */ | |
327 | ||
328 | case OP_TYPEUPTO: | |
329 | case OP_TYPEMINUPTO: | |
6bf342e1 | 330 | case OP_TYPEPOSUPTO: |
8ac170f3 PH |
331 | tcode += 2; /* Fall through */ |
332 | ||
333 | case OP_TYPESTAR: | |
334 | case OP_TYPEMINSTAR: | |
6bf342e1 | 335 | case OP_TYPEPOSSTAR: |
8ac170f3 PH |
336 | case OP_TYPEQUERY: |
337 | case OP_TYPEMINQUERY: | |
6bf342e1 | 338 | case OP_TYPEPOSQUERY: |
8ac170f3 PH |
339 | switch(tcode[1]) |
340 | { | |
341 | case OP_ANY: | |
6bf342e1 | 342 | return SSB_FAIL; |
8ac170f3 PH |
343 | |
344 | case OP_NOT_DIGIT: | |
345 | for (c = 0; c < 32; c++) | |
346 | start_bits[c] |= ~cd->cbits[c+cbit_digit]; | |
347 | break; | |
348 | ||
349 | case OP_DIGIT: | |
350 | for (c = 0; c < 32; c++) | |
351 | start_bits[c] |= cd->cbits[c+cbit_digit]; | |
352 | break; | |
353 | ||
aa41d2de PH |
354 | /* The cbit_space table has vertical tab as whitespace; we have to |
355 | discard it. */ | |
356 | ||
8ac170f3 PH |
357 | case OP_NOT_WHITESPACE: |
358 | for (c = 0; c < 32; c++) | |
aa41d2de PH |
359 | { |
360 | int d = cd->cbits[c+cbit_space]; | |
361 | if (c == 1) d &= ~0x08; | |
362 | start_bits[c] |= ~d; | |
363 | } | |
8ac170f3 PH |
364 | break; |
365 | ||
aa41d2de PH |
366 | /* The cbit_space table has vertical tab as whitespace; we have to |
367 | discard it. */ | |
368 | ||
8ac170f3 PH |
369 | case OP_WHITESPACE: |
370 | for (c = 0; c < 32; c++) | |
aa41d2de PH |
371 | { |
372 | int d = cd->cbits[c+cbit_space]; | |
373 | if (c == 1) d &= ~0x08; | |
374 | start_bits[c] |= d; | |
375 | } | |
8ac170f3 PH |
376 | break; |
377 | ||
378 | case OP_NOT_WORDCHAR: | |
379 | for (c = 0; c < 32; c++) | |
380 | start_bits[c] |= ~cd->cbits[c+cbit_word]; | |
381 | break; | |
382 | ||
383 | case OP_WORDCHAR: | |
384 | for (c = 0; c < 32; c++) | |
385 | start_bits[c] |= cd->cbits[c+cbit_word]; | |
386 | break; | |
387 | } | |
388 | ||
389 | tcode += 2; | |
390 | break; | |
391 | ||
392 | /* Character class where all the information is in a bit map: set the | |
393 | bits and either carry on or not, according to the repeat count. If it was | |
394 | a negative class, and we are operating with UTF-8 characters, any byte | |
395 | with a value >= 0xc4 is a potentially valid starter because it starts a | |
396 | character with a value > 255. */ | |
397 | ||
398 | case OP_NCLASS: | |
399 | if (utf8) | |
400 | { | |
401 | start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */ | |
402 | memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */ | |
403 | } | |
404 | /* Fall through */ | |
405 | ||
406 | case OP_CLASS: | |
407 | { | |
408 | tcode++; | |
409 | ||
410 | /* In UTF-8 mode, the bits in a bit map correspond to character | |
411 | values, not to byte values. However, the bit map we are constructing is | |
412 | for byte values. So we have to do a conversion for characters whose | |
413 | value is > 127. In fact, there are only two possible starting bytes for | |
414 | characters in the range 128 - 255. */ | |
415 | ||
416 | if (utf8) | |
417 | { | |
418 | for (c = 0; c < 16; c++) start_bits[c] |= tcode[c]; | |
419 | for (c = 128; c < 256; c++) | |
420 | { | |
421 | if ((tcode[c/8] && (1 << (c&7))) != 0) | |
422 | { | |
423 | int d = (c >> 6) | 0xc0; /* Set bit for this starter */ | |
424 | start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */ | |
425 | c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */ | |
426 | } | |
427 | } | |
428 | } | |
429 | ||
430 | /* In non-UTF-8 mode, the two bit maps are completely compatible. */ | |
431 | ||
432 | else | |
433 | { | |
434 | for (c = 0; c < 32; c++) start_bits[c] |= tcode[c]; | |
435 | } | |
436 | ||
437 | /* Advance past the bit map, and act on what follows */ | |
438 | ||
439 | tcode += 32; | |
440 | switch (*tcode) | |
441 | { | |
442 | case OP_CRSTAR: | |
443 | case OP_CRMINSTAR: | |
444 | case OP_CRQUERY: | |
445 | case OP_CRMINQUERY: | |
446 | tcode++; | |
447 | break; | |
448 | ||
449 | case OP_CRRANGE: | |
450 | case OP_CRMINRANGE: | |
451 | if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5; | |
452 | else try_next = FALSE; | |
453 | break; | |
454 | ||
455 | default: | |
456 | try_next = FALSE; | |
457 | break; | |
458 | } | |
459 | } | |
460 | break; /* End of bitmap class handling */ | |
461 | ||
462 | } /* End of switch */ | |
463 | } /* End of try_next loop */ | |
464 | ||
465 | code += GET(code, 1); /* Advance to next branch */ | |
466 | } | |
467 | while (*code == OP_ALT); | |
6bf342e1 | 468 | return yield; |
8ac170f3 PH |
469 | } |
470 | ||
471 | ||
472 | ||
473 | /************************************************* | |
474 | * Study a compiled expression * | |
475 | *************************************************/ | |
476 | ||
477 | /* This function is handed a compiled expression that it must study to produce | |
478 | information that will speed up the matching. It returns a pcre_extra block | |
479 | which then gets handed back to pcre_exec(). | |
480 | ||
481 | Arguments: | |
482 | re points to the compiled expression | |
483 | options contains option bits | |
484 | errorptr points to where to place error messages; | |
485 | set NULL unless error | |
486 | ||
487 | Returns: pointer to a pcre_extra block, with study_data filled in and the | |
488 | appropriate flag set; | |
489 | NULL on error or if no optimization possible | |
490 | */ | |
491 | ||
aa41d2de | 492 | PCRE_DATA_SCOPE pcre_extra * |
8ac170f3 PH |
493 | pcre_study(const pcre *external_re, int options, const char **errorptr) |
494 | { | |
495 | uschar start_bits[32]; | |
496 | pcre_extra *extra; | |
497 | pcre_study_data *study; | |
498 | const uschar *tables; | |
aa41d2de | 499 | uschar *code; |
8ac170f3 | 500 | compile_data compile_block; |
aa41d2de | 501 | const real_pcre *re = (const real_pcre *)external_re; |
8ac170f3 PH |
502 | |
503 | *errorptr = NULL; | |
504 | ||
505 | if (re == NULL || re->magic_number != MAGIC_NUMBER) | |
506 | { | |
507 | *errorptr = "argument is not a compiled regular expression"; | |
508 | return NULL; | |
509 | } | |
510 | ||
511 | if ((options & ~PUBLIC_STUDY_OPTIONS) != 0) | |
512 | { | |
513 | *errorptr = "unknown or incorrect option bit(s) set"; | |
514 | return NULL; | |
515 | } | |
516 | ||
aa41d2de PH |
517 | code = (uschar *)re + re->name_table_offset + |
518 | (re->name_count * re->name_entry_size); | |
519 | ||
8ac170f3 PH |
520 | /* For an anchored pattern, or an unanchored pattern that has a first char, or |
521 | a multiline pattern that matches only at "line starts", no further processing | |
522 | at present. */ | |
523 | ||
524 | if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0) | |
525 | return NULL; | |
526 | ||
527 | /* Set the character tables in the block that is passed around */ | |
528 | ||
529 | tables = re->tables; | |
530 | if (tables == NULL) | |
531 | (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, | |
532 | (void *)(&tables)); | |
533 | ||
534 | compile_block.lcc = tables + lcc_offset; | |
535 | compile_block.fcc = tables + fcc_offset; | |
536 | compile_block.cbits = tables + cbits_offset; | |
537 | compile_block.ctypes = tables + ctypes_offset; | |
538 | ||
539 | /* See if we can find a fixed set of initial characters for the pattern. */ | |
540 | ||
541 | memset(start_bits, 0, 32 * sizeof(uschar)); | |
6bf342e1 PH |
542 | if (set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0, |
543 | (re->options & PCRE_UTF8) != 0, &compile_block) != SSB_DONE) return NULL; | |
8ac170f3 PH |
544 | |
545 | /* Get a pcre_extra block and a pcre_study_data block. The study data is put in | |
546 | the latter, which is pointed to by the former, which may also get additional | |
547 | data set later by the calling program. At the moment, the size of | |
548 | pcre_study_data is fixed. We nevertheless save it in a field for returning via | |
549 | the pcre_fullinfo() function so that if it becomes variable in the future, we | |
550 | don't have to change that code. */ | |
551 | ||
552 | extra = (pcre_extra *)(pcre_malloc) | |
553 | (sizeof(pcre_extra) + sizeof(pcre_study_data)); | |
554 | ||
555 | if (extra == NULL) | |
556 | { | |
557 | *errorptr = "failed to get memory"; | |
558 | return NULL; | |
559 | } | |
560 | ||
561 | study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra)); | |
562 | extra->flags = PCRE_EXTRA_STUDY_DATA; | |
563 | extra->study_data = study; | |
564 | ||
565 | study->size = sizeof(pcre_study_data); | |
566 | study->options = PCRE_STUDY_MAPPED; | |
567 | memcpy(study->start_bits, start_bits, sizeof(start_bits)); | |
568 | ||
569 | return extra; | |
570 | } | |
571 | ||
572 | /* End of pcre_study.c */ |