| 1 | /* $Cambridge: exim/src/src/pcre/pcre_study.c,v 1.6 2007/11/12 13:02:20 nm4 Exp $ */ |
| 2 | |
| 3 | /************************************************* |
| 4 | * Perl-Compatible Regular Expressions * |
| 5 | *************************************************/ |
| 6 | |
| 7 | /* PCRE is a library of functions to support regular expressions whose syntax |
| 8 | and semantics are as close as possible to those of the Perl 5 language. |
| 9 | |
| 10 | Written by Philip Hazel |
| 11 | Copyright (c) 1997-2007 University of Cambridge |
| 12 | |
| 13 | ----------------------------------------------------------------------------- |
| 14 | Redistribution and use in source and binary forms, with or without |
| 15 | modification, are permitted provided that the following conditions are met: |
| 16 | |
| 17 | * Redistributions of source code must retain the above copyright notice, |
| 18 | this list of conditions and the following disclaimer. |
| 19 | |
| 20 | * Redistributions in binary form must reproduce the above copyright |
| 21 | notice, this list of conditions and the following disclaimer in the |
| 22 | documentation and/or other materials provided with the distribution. |
| 23 | |
| 24 | * Neither the name of the University of Cambridge nor the names of its |
| 25 | contributors may be used to endorse or promote products derived from |
| 26 | this software without specific prior written permission. |
| 27 | |
| 28 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 29 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 30 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 31 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 32 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 33 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 34 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 35 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 36 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 37 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 38 | POSSIBILITY OF SUCH DAMAGE. |
| 39 | ----------------------------------------------------------------------------- |
| 40 | */ |
| 41 | |
| 42 | |
| 43 | /* This module contains the external function pcre_study(), along with local |
| 44 | supporting functions. */ |
| 45 | |
| 46 | |
| 47 | #ifdef HAVE_CONFIG_H |
| 48 | #include "config.h" |
| 49 | #endif |
| 50 | |
| 51 | #include "pcre_internal.h" |
| 52 | |
| 53 | |
| 54 | /* Returns from set_start_bits() */ |
| 55 | |
| 56 | enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE }; |
| 57 | |
| 58 | |
| 59 | /************************************************* |
| 60 | * Set a bit and maybe its alternate case * |
| 61 | *************************************************/ |
| 62 | |
| 63 | /* Given a character, set its bit in the table, and also the bit for the other |
| 64 | version of a letter if we are caseless. |
| 65 | |
| 66 | Arguments: |
| 67 | start_bits points to the bit map |
| 68 | c is the character |
| 69 | caseless the caseless flag |
| 70 | cd the block with char table pointers |
| 71 | |
| 72 | Returns: nothing |
| 73 | */ |
| 74 | |
| 75 | static void |
| 76 | set_bit(uschar *start_bits, unsigned int c, BOOL caseless, compile_data *cd) |
| 77 | { |
| 78 | start_bits[c/8] |= (1 << (c&7)); |
| 79 | if (caseless && (cd->ctypes[c] & ctype_letter) != 0) |
| 80 | start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7)); |
| 81 | } |
| 82 | |
| 83 | |
| 84 | |
| 85 | /************************************************* |
| 86 | * Create bitmap of starting bytes * |
| 87 | *************************************************/ |
| 88 | |
| 89 | /* This function scans a compiled unanchored expression recursively and |
| 90 | attempts to build a bitmap of the set of possible starting bytes. As time goes |
| 91 | by, we may be able to get more clever at doing this. The SSB_CONTINUE return is |
| 92 | useful for parenthesized groups in patterns such as (a*)b where the group |
| 93 | provides some optional starting bytes but scanning must continue at the outer |
| 94 | level to find at least one mandatory byte. At the outermost level, this |
| 95 | function fails unless the result is SSB_DONE. |
| 96 | |
| 97 | Arguments: |
| 98 | code points to an expression |
| 99 | start_bits points to a 32-byte table, initialized to 0 |
| 100 | caseless the current state of the caseless flag |
| 101 | utf8 TRUE if in UTF-8 mode |
| 102 | cd the block with char table pointers |
| 103 | |
| 104 | Returns: SSB_FAIL => Failed to find any starting bytes |
| 105 | SSB_DONE => Found mandatory starting bytes |
| 106 | SSB_CONTINUE => Found optional starting bytes |
| 107 | */ |
| 108 | |
| 109 | static int |
| 110 | set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless, |
| 111 | BOOL utf8, compile_data *cd) |
| 112 | { |
| 113 | register int c; |
| 114 | int yield = SSB_DONE; |
| 115 | |
| 116 | #if 0 |
| 117 | /* ========================================================================= */ |
| 118 | /* The following comment and code was inserted in January 1999. In May 2006, |
| 119 | when it was observed to cause compiler warnings about unused values, I took it |
| 120 | out again. If anybody is still using OS/2, they will have to put it back |
| 121 | manually. */ |
| 122 | |
| 123 | /* This next statement and the later reference to dummy are here in order to |
| 124 | trick the optimizer of the IBM C compiler for OS/2 into generating correct |
| 125 | code. Apparently IBM isn't going to fix the problem, and we would rather not |
| 126 | disable optimization (in this module it actually makes a big difference, and |
| 127 | the pcre module can use all the optimization it can get). */ |
| 128 | |
| 129 | volatile int dummy; |
| 130 | /* ========================================================================= */ |
| 131 | #endif |
| 132 | |
| 133 | do |
| 134 | { |
| 135 | const uschar *tcode = code + (((int)*code == OP_CBRA)? 3:1) + LINK_SIZE; |
| 136 | BOOL try_next = TRUE; |
| 137 | |
| 138 | while (try_next) /* Loop for items in this branch */ |
| 139 | { |
| 140 | int rc; |
| 141 | switch(*tcode) |
| 142 | { |
| 143 | /* Fail if we reach something we don't understand */ |
| 144 | |
| 145 | default: |
| 146 | return SSB_FAIL; |
| 147 | |
| 148 | /* If we hit a bracket or a positive lookahead assertion, recurse to set |
| 149 | bits from within the subpattern. If it can't find anything, we have to |
| 150 | give up. If it finds some mandatory character(s), we are done for this |
| 151 | branch. Otherwise, carry on scanning after the subpattern. */ |
| 152 | |
| 153 | case OP_BRA: |
| 154 | case OP_SBRA: |
| 155 | case OP_CBRA: |
| 156 | case OP_SCBRA: |
| 157 | case OP_ONCE: |
| 158 | case OP_ASSERT: |
| 159 | rc = set_start_bits(tcode, start_bits, caseless, utf8, cd); |
| 160 | if (rc == SSB_FAIL) return SSB_FAIL; |
| 161 | if (rc == SSB_DONE) try_next = FALSE; else |
| 162 | { |
| 163 | do tcode += GET(tcode, 1); while (*tcode == OP_ALT); |
| 164 | tcode += 1 + LINK_SIZE; |
| 165 | } |
| 166 | break; |
| 167 | |
| 168 | /* If we hit ALT or KET, it means we haven't found anything mandatory in |
| 169 | this branch, though we might have found something optional. For ALT, we |
| 170 | continue with the next alternative, but we have to arrange that the final |
| 171 | result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET, |
| 172 | return SSB_CONTINUE: if this is the top level, that indicates failure, |
| 173 | but after a nested subpattern, it causes scanning to continue. */ |
| 174 | |
| 175 | case OP_ALT: |
| 176 | yield = SSB_CONTINUE; |
| 177 | try_next = FALSE; |
| 178 | break; |
| 179 | |
| 180 | case OP_KET: |
| 181 | case OP_KETRMAX: |
| 182 | case OP_KETRMIN: |
| 183 | return SSB_CONTINUE; |
| 184 | |
| 185 | /* Skip over callout */ |
| 186 | |
| 187 | case OP_CALLOUT: |
| 188 | tcode += 2 + 2*LINK_SIZE; |
| 189 | break; |
| 190 | |
| 191 | /* Skip over lookbehind and negative lookahead assertions */ |
| 192 | |
| 193 | case OP_ASSERT_NOT: |
| 194 | case OP_ASSERTBACK: |
| 195 | case OP_ASSERTBACK_NOT: |
| 196 | do tcode += GET(tcode, 1); while (*tcode == OP_ALT); |
| 197 | tcode += 1 + LINK_SIZE; |
| 198 | break; |
| 199 | |
| 200 | /* Skip over an option setting, changing the caseless flag */ |
| 201 | |
| 202 | case OP_OPT: |
| 203 | caseless = (tcode[1] & PCRE_CASELESS) != 0; |
| 204 | tcode += 2; |
| 205 | break; |
| 206 | |
| 207 | /* BRAZERO does the bracket, but carries on. */ |
| 208 | |
| 209 | case OP_BRAZERO: |
| 210 | case OP_BRAMINZERO: |
| 211 | if (set_start_bits(++tcode, start_bits, caseless, utf8, cd) == SSB_FAIL) |
| 212 | return SSB_FAIL; |
| 213 | /* ========================================================================= |
| 214 | See the comment at the head of this function concerning the next line, |
| 215 | which was an old fudge for the benefit of OS/2. |
| 216 | dummy = 1; |
| 217 | ========================================================================= */ |
| 218 | do tcode += GET(tcode,1); while (*tcode == OP_ALT); |
| 219 | tcode += 1 + LINK_SIZE; |
| 220 | break; |
| 221 | |
| 222 | /* Single-char * or ? sets the bit and tries the next item */ |
| 223 | |
| 224 | case OP_STAR: |
| 225 | case OP_MINSTAR: |
| 226 | case OP_POSSTAR: |
| 227 | case OP_QUERY: |
| 228 | case OP_MINQUERY: |
| 229 | case OP_POSQUERY: |
| 230 | set_bit(start_bits, tcode[1], caseless, cd); |
| 231 | tcode += 2; |
| 232 | #ifdef SUPPORT_UTF8 |
| 233 | if (utf8 && tcode[-1] >= 0xc0) |
| 234 | tcode += _pcre_utf8_table4[tcode[-1] & 0x3f]; |
| 235 | #endif |
| 236 | break; |
| 237 | |
| 238 | /* Single-char upto sets the bit and tries the next */ |
| 239 | |
| 240 | case OP_UPTO: |
| 241 | case OP_MINUPTO: |
| 242 | case OP_POSUPTO: |
| 243 | set_bit(start_bits, tcode[3], caseless, cd); |
| 244 | tcode += 4; |
| 245 | #ifdef SUPPORT_UTF8 |
| 246 | if (utf8 && tcode[-1] >= 0xc0) |
| 247 | tcode += _pcre_utf8_table4[tcode[-1] & 0x3f]; |
| 248 | #endif |
| 249 | break; |
| 250 | |
| 251 | /* At least one single char sets the bit and stops */ |
| 252 | |
| 253 | case OP_EXACT: /* Fall through */ |
| 254 | tcode += 2; |
| 255 | |
| 256 | case OP_CHAR: |
| 257 | case OP_CHARNC: |
| 258 | case OP_PLUS: |
| 259 | case OP_MINPLUS: |
| 260 | case OP_POSPLUS: |
| 261 | set_bit(start_bits, tcode[1], caseless, cd); |
| 262 | try_next = FALSE; |
| 263 | break; |
| 264 | |
| 265 | /* Single character type sets the bits and stops */ |
| 266 | |
| 267 | case OP_NOT_DIGIT: |
| 268 | for (c = 0; c < 32; c++) |
| 269 | start_bits[c] |= ~cd->cbits[c+cbit_digit]; |
| 270 | try_next = FALSE; |
| 271 | break; |
| 272 | |
| 273 | case OP_DIGIT: |
| 274 | for (c = 0; c < 32; c++) |
| 275 | start_bits[c] |= cd->cbits[c+cbit_digit]; |
| 276 | try_next = FALSE; |
| 277 | break; |
| 278 | |
| 279 | /* The cbit_space table has vertical tab as whitespace; we have to |
| 280 | discard it. */ |
| 281 | |
| 282 | case OP_NOT_WHITESPACE: |
| 283 | for (c = 0; c < 32; c++) |
| 284 | { |
| 285 | int d = cd->cbits[c+cbit_space]; |
| 286 | if (c == 1) d &= ~0x08; |
| 287 | start_bits[c] |= ~d; |
| 288 | } |
| 289 | try_next = FALSE; |
| 290 | break; |
| 291 | |
| 292 | /* The cbit_space table has vertical tab as whitespace; we have to |
| 293 | discard it. */ |
| 294 | |
| 295 | case OP_WHITESPACE: |
| 296 | for (c = 0; c < 32; c++) |
| 297 | { |
| 298 | int d = cd->cbits[c+cbit_space]; |
| 299 | if (c == 1) d &= ~0x08; |
| 300 | start_bits[c] |= d; |
| 301 | } |
| 302 | try_next = FALSE; |
| 303 | break; |
| 304 | |
| 305 | case OP_NOT_WORDCHAR: |
| 306 | for (c = 0; c < 32; c++) |
| 307 | start_bits[c] |= ~cd->cbits[c+cbit_word]; |
| 308 | try_next = FALSE; |
| 309 | break; |
| 310 | |
| 311 | case OP_WORDCHAR: |
| 312 | for (c = 0; c < 32; c++) |
| 313 | start_bits[c] |= cd->cbits[c+cbit_word]; |
| 314 | try_next = FALSE; |
| 315 | break; |
| 316 | |
| 317 | /* One or more character type fudges the pointer and restarts, knowing |
| 318 | it will hit a single character type and stop there. */ |
| 319 | |
| 320 | case OP_TYPEPLUS: |
| 321 | case OP_TYPEMINPLUS: |
| 322 | tcode++; |
| 323 | break; |
| 324 | |
| 325 | case OP_TYPEEXACT: |
| 326 | tcode += 3; |
| 327 | break; |
| 328 | |
| 329 | /* Zero or more repeats of character types set the bits and then |
| 330 | try again. */ |
| 331 | |
| 332 | case OP_TYPEUPTO: |
| 333 | case OP_TYPEMINUPTO: |
| 334 | case OP_TYPEPOSUPTO: |
| 335 | tcode += 2; /* Fall through */ |
| 336 | |
| 337 | case OP_TYPESTAR: |
| 338 | case OP_TYPEMINSTAR: |
| 339 | case OP_TYPEPOSSTAR: |
| 340 | case OP_TYPEQUERY: |
| 341 | case OP_TYPEMINQUERY: |
| 342 | case OP_TYPEPOSQUERY: |
| 343 | switch(tcode[1]) |
| 344 | { |
| 345 | case OP_ANY: |
| 346 | return SSB_FAIL; |
| 347 | |
| 348 | case OP_NOT_DIGIT: |
| 349 | for (c = 0; c < 32; c++) |
| 350 | start_bits[c] |= ~cd->cbits[c+cbit_digit]; |
| 351 | break; |
| 352 | |
| 353 | case OP_DIGIT: |
| 354 | for (c = 0; c < 32; c++) |
| 355 | start_bits[c] |= cd->cbits[c+cbit_digit]; |
| 356 | break; |
| 357 | |
| 358 | /* The cbit_space table has vertical tab as whitespace; we have to |
| 359 | discard it. */ |
| 360 | |
| 361 | case OP_NOT_WHITESPACE: |
| 362 | for (c = 0; c < 32; c++) |
| 363 | { |
| 364 | int d = cd->cbits[c+cbit_space]; |
| 365 | if (c == 1) d &= ~0x08; |
| 366 | start_bits[c] |= ~d; |
| 367 | } |
| 368 | break; |
| 369 | |
| 370 | /* The cbit_space table has vertical tab as whitespace; we have to |
| 371 | discard it. */ |
| 372 | |
| 373 | case OP_WHITESPACE: |
| 374 | for (c = 0; c < 32; c++) |
| 375 | { |
| 376 | int d = cd->cbits[c+cbit_space]; |
| 377 | if (c == 1) d &= ~0x08; |
| 378 | start_bits[c] |= d; |
| 379 | } |
| 380 | break; |
| 381 | |
| 382 | case OP_NOT_WORDCHAR: |
| 383 | for (c = 0; c < 32; c++) |
| 384 | start_bits[c] |= ~cd->cbits[c+cbit_word]; |
| 385 | break; |
| 386 | |
| 387 | case OP_WORDCHAR: |
| 388 | for (c = 0; c < 32; c++) |
| 389 | start_bits[c] |= cd->cbits[c+cbit_word]; |
| 390 | break; |
| 391 | } |
| 392 | |
| 393 | tcode += 2; |
| 394 | break; |
| 395 | |
| 396 | /* Character class where all the information is in a bit map: set the |
| 397 | bits and either carry on or not, according to the repeat count. If it was |
| 398 | a negative class, and we are operating with UTF-8 characters, any byte |
| 399 | with a value >= 0xc4 is a potentially valid starter because it starts a |
| 400 | character with a value > 255. */ |
| 401 | |
| 402 | case OP_NCLASS: |
| 403 | #ifdef SUPPORT_UTF8 |
| 404 | if (utf8) |
| 405 | { |
| 406 | start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */ |
| 407 | memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */ |
| 408 | } |
| 409 | #endif |
| 410 | /* Fall through */ |
| 411 | |
| 412 | case OP_CLASS: |
| 413 | { |
| 414 | tcode++; |
| 415 | |
| 416 | /* In UTF-8 mode, the bits in a bit map correspond to character |
| 417 | values, not to byte values. However, the bit map we are constructing is |
| 418 | for byte values. So we have to do a conversion for characters whose |
| 419 | value is > 127. In fact, there are only two possible starting bytes for |
| 420 | characters in the range 128 - 255. */ |
| 421 | |
| 422 | #ifdef SUPPORT_UTF8 |
| 423 | if (utf8) |
| 424 | { |
| 425 | for (c = 0; c < 16; c++) start_bits[c] |= tcode[c]; |
| 426 | for (c = 128; c < 256; c++) |
| 427 | { |
| 428 | if ((tcode[c/8] && (1 << (c&7))) != 0) |
| 429 | { |
| 430 | int d = (c >> 6) | 0xc0; /* Set bit for this starter */ |
| 431 | start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */ |
| 432 | c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */ |
| 433 | } |
| 434 | } |
| 435 | } |
| 436 | |
| 437 | /* In non-UTF-8 mode, the two bit maps are completely compatible. */ |
| 438 | |
| 439 | else |
| 440 | #endif |
| 441 | { |
| 442 | for (c = 0; c < 32; c++) start_bits[c] |= tcode[c]; |
| 443 | } |
| 444 | |
| 445 | /* Advance past the bit map, and act on what follows */ |
| 446 | |
| 447 | tcode += 32; |
| 448 | switch (*tcode) |
| 449 | { |
| 450 | case OP_CRSTAR: |
| 451 | case OP_CRMINSTAR: |
| 452 | case OP_CRQUERY: |
| 453 | case OP_CRMINQUERY: |
| 454 | tcode++; |
| 455 | break; |
| 456 | |
| 457 | case OP_CRRANGE: |
| 458 | case OP_CRMINRANGE: |
| 459 | if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5; |
| 460 | else try_next = FALSE; |
| 461 | break; |
| 462 | |
| 463 | default: |
| 464 | try_next = FALSE; |
| 465 | break; |
| 466 | } |
| 467 | } |
| 468 | break; /* End of bitmap class handling */ |
| 469 | |
| 470 | } /* End of switch */ |
| 471 | } /* End of try_next loop */ |
| 472 | |
| 473 | code += GET(code, 1); /* Advance to next branch */ |
| 474 | } |
| 475 | while (*code == OP_ALT); |
| 476 | return yield; |
| 477 | } |
| 478 | |
| 479 | |
| 480 | |
| 481 | /************************************************* |
| 482 | * Study a compiled expression * |
| 483 | *************************************************/ |
| 484 | |
| 485 | /* This function is handed a compiled expression that it must study to produce |
| 486 | information that will speed up the matching. It returns a pcre_extra block |
| 487 | which then gets handed back to pcre_exec(). |
| 488 | |
| 489 | Arguments: |
| 490 | re points to the compiled expression |
| 491 | options contains option bits |
| 492 | errorptr points to where to place error messages; |
| 493 | set NULL unless error |
| 494 | |
| 495 | Returns: pointer to a pcre_extra block, with study_data filled in and the |
| 496 | appropriate flag set; |
| 497 | NULL on error or if no optimization possible |
| 498 | */ |
| 499 | |
| 500 | PCRE_EXP_DEFN pcre_extra * |
| 501 | pcre_study(const pcre *external_re, int options, const char **errorptr) |
| 502 | { |
| 503 | uschar start_bits[32]; |
| 504 | pcre_extra *extra; |
| 505 | pcre_study_data *study; |
| 506 | const uschar *tables; |
| 507 | uschar *code; |
| 508 | compile_data compile_block; |
| 509 | const real_pcre *re = (const real_pcre *)external_re; |
| 510 | |
| 511 | *errorptr = NULL; |
| 512 | |
| 513 | if (re == NULL || re->magic_number != MAGIC_NUMBER) |
| 514 | { |
| 515 | *errorptr = "argument is not a compiled regular expression"; |
| 516 | return NULL; |
| 517 | } |
| 518 | |
| 519 | if ((options & ~PUBLIC_STUDY_OPTIONS) != 0) |
| 520 | { |
| 521 | *errorptr = "unknown or incorrect option bit(s) set"; |
| 522 | return NULL; |
| 523 | } |
| 524 | |
| 525 | code = (uschar *)re + re->name_table_offset + |
| 526 | (re->name_count * re->name_entry_size); |
| 527 | |
| 528 | /* For an anchored pattern, or an unanchored pattern that has a first char, or |
| 529 | a multiline pattern that matches only at "line starts", no further processing |
| 530 | at present. */ |
| 531 | |
| 532 | if ((re->options & PCRE_ANCHORED) != 0 || |
| 533 | (re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) != 0) |
| 534 | return NULL; |
| 535 | |
| 536 | /* Set the character tables in the block that is passed around */ |
| 537 | |
| 538 | tables = re->tables; |
| 539 | if (tables == NULL) |
| 540 | (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, |
| 541 | (void *)(&tables)); |
| 542 | |
| 543 | compile_block.lcc = tables + lcc_offset; |
| 544 | compile_block.fcc = tables + fcc_offset; |
| 545 | compile_block.cbits = tables + cbits_offset; |
| 546 | compile_block.ctypes = tables + ctypes_offset; |
| 547 | |
| 548 | /* See if we can find a fixed set of initial characters for the pattern. */ |
| 549 | |
| 550 | memset(start_bits, 0, 32 * sizeof(uschar)); |
| 551 | if (set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0, |
| 552 | (re->options & PCRE_UTF8) != 0, &compile_block) != SSB_DONE) return NULL; |
| 553 | |
| 554 | /* Get a pcre_extra block and a pcre_study_data block. The study data is put in |
| 555 | the latter, which is pointed to by the former, which may also get additional |
| 556 | data set later by the calling program. At the moment, the size of |
| 557 | pcre_study_data is fixed. We nevertheless save it in a field for returning via |
| 558 | the pcre_fullinfo() function so that if it becomes variable in the future, we |
| 559 | don't have to change that code. */ |
| 560 | |
| 561 | extra = (pcre_extra *)(pcre_malloc) |
| 562 | (sizeof(pcre_extra) + sizeof(pcre_study_data)); |
| 563 | |
| 564 | if (extra == NULL) |
| 565 | { |
| 566 | *errorptr = "failed to get memory"; |
| 567 | return NULL; |
| 568 | } |
| 569 | |
| 570 | study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra)); |
| 571 | extra->flags = PCRE_EXTRA_STUDY_DATA; |
| 572 | extra->study_data = study; |
| 573 | |
| 574 | study->size = sizeof(pcre_study_data); |
| 575 | study->options = PCRE_STUDY_MAPPED; |
| 576 | memcpy(study->start_bits, start_bits, sizeof(start_bits)); |
| 577 | |
| 578 | return extra; |
| 579 | } |
| 580 | |
| 581 | /* End of pcre_study.c */ |