Updated embedded PCRE to version 7.4 to avoid 2 CVE issues:-
[exim.git] / src / src / pcre / pcre_get.c
1 /* $Cambridge: exim/src/src/pcre/pcre_get.c,v 1.6 2007/11/12 13:02:19 nm4 Exp $ */
2
3 /*************************************************
4 * Perl-Compatible Regular Expressions *
5 *************************************************/
6
7 /* PCRE is a library of functions to support regular expressions whose syntax
8 and semantics are as close as possible to those of the Perl 5 language.
9
10 Written by Philip Hazel
11 Copyright (c) 1997-2007 University of Cambridge
12
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
16
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
19
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
23
24 * Neither the name of the University of Cambridge nor the names of its
25 contributors may be used to endorse or promote products derived from
26 this software without specific prior written permission.
27
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
40 */
41
42
43 /* This module contains some convenience functions for extracting substrings
44 from the subject string after a regex match has succeeded. The original idea
45 for these functions came from Scott Wimer. */
46
47
48 #ifdef HAVE_CONFIG_H
49 #include "config.h"
50 #endif
51
52 #include "pcre_internal.h"
53
54
55 /*************************************************
56 * Find number for named string *
57 *************************************************/
58
59 /* This function is used by the get_first_set() function below, as well
60 as being generally available. It assumes that names are unique.
61
62 Arguments:
63 code the compiled regex
64 stringname the name whose number is required
65
66 Returns: the number of the named parentheses, or a negative number
67 (PCRE_ERROR_NOSUBSTRING) if not found
68 */
69
70 int
71 pcre_get_stringnumber(const pcre *code, const char *stringname)
72 {
73 int rc;
74 int entrysize;
75 int top, bot;
76 uschar *nametable;
77
78 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
79 return rc;
80 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
81
82 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
83 return rc;
84 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
85 return rc;
86
87 bot = 0;
88 while (top > bot)
89 {
90 int mid = (top + bot) / 2;
91 uschar *entry = nametable + entrysize*mid;
92 int c = strcmp(stringname, (char *)(entry + 2));
93 if (c == 0) return (entry[0] << 8) + entry[1];
94 if (c > 0) bot = mid + 1; else top = mid;
95 }
96
97 return PCRE_ERROR_NOSUBSTRING;
98 }
99
100
101
102 /*************************************************
103 * Find (multiple) entries for named string *
104 *************************************************/
105
106 /* This is used by the get_first_set() function below, as well as being
107 generally available. It is used when duplicated names are permitted.
108
109 Arguments:
110 code the compiled regex
111 stringname the name whose entries required
112 firstptr where to put the pointer to the first entry
113 lastptr where to put the pointer to the last entry
114
115 Returns: the length of each entry, or a negative number
116 (PCRE_ERROR_NOSUBSTRING) if not found
117 */
118
119 int
120 pcre_get_stringtable_entries(const pcre *code, const char *stringname,
121 char **firstptr, char **lastptr)
122 {
123 int rc;
124 int entrysize;
125 int top, bot;
126 uschar *nametable, *lastentry;
127
128 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
129 return rc;
130 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
131
132 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
133 return rc;
134 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
135 return rc;
136
137 lastentry = nametable + entrysize * (top - 1);
138 bot = 0;
139 while (top > bot)
140 {
141 int mid = (top + bot) / 2;
142 uschar *entry = nametable + entrysize*mid;
143 int c = strcmp(stringname, (char *)(entry + 2));
144 if (c == 0)
145 {
146 uschar *first = entry;
147 uschar *last = entry;
148 while (first > nametable)
149 {
150 if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
151 first -= entrysize;
152 }
153 while (last < lastentry)
154 {
155 if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
156 last += entrysize;
157 }
158 *firstptr = (char *)first;
159 *lastptr = (char *)last;
160 return entrysize;
161 }
162 if (c > 0) bot = mid + 1; else top = mid;
163 }
164
165 return PCRE_ERROR_NOSUBSTRING;
166 }
167
168
169
170 /*************************************************
171 * Find first set of multiple named strings *
172 *************************************************/
173
174 /* This function allows for duplicate names in the table of named substrings.
175 It returns the number of the first one that was set in a pattern match.
176
177 Arguments:
178 code the compiled regex
179 stringname the name of the capturing substring
180 ovector the vector of matched substrings
181
182 Returns: the number of the first that is set,
183 or the number of the last one if none are set,
184 or a negative number on error
185 */
186
187 static int
188 get_first_set(const pcre *code, const char *stringname, int *ovector)
189 {
190 const real_pcre *re = (const real_pcre *)code;
191 int entrysize;
192 char *first, *last;
193 uschar *entry;
194 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
195 return pcre_get_stringnumber(code, stringname);
196 entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
197 if (entrysize <= 0) return entrysize;
198 for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
199 {
200 int n = (entry[0] << 8) + entry[1];
201 if (ovector[n*2] >= 0) return n;
202 }
203 return (first[0] << 8) + first[1];
204 }
205
206
207
208
209 /*************************************************
210 * Copy captured string to given buffer *
211 *************************************************/
212
213 /* This function copies a single captured substring into a given buffer.
214 Note that we use memcpy() rather than strncpy() in case there are binary zeros
215 in the string.
216
217 Arguments:
218 subject the subject string that was matched
219 ovector pointer to the offsets table
220 stringcount the number of substrings that were captured
221 (i.e. the yield of the pcre_exec call, unless
222 that was zero, in which case it should be 1/3
223 of the offset table size)
224 stringnumber the number of the required substring
225 buffer where to put the substring
226 size the size of the buffer
227
228 Returns: if successful:
229 the length of the copied string, not including the zero
230 that is put on the end; can be zero
231 if not successful:
232 PCRE_ERROR_NOMEMORY (-6) buffer too small
233 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
234 */
235
236 int
237 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
238 int stringnumber, char *buffer, int size)
239 {
240 int yield;
241 if (stringnumber < 0 || stringnumber >= stringcount)
242 return PCRE_ERROR_NOSUBSTRING;
243 stringnumber *= 2;
244 yield = ovector[stringnumber+1] - ovector[stringnumber];
245 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
246 memcpy(buffer, subject + ovector[stringnumber], yield);
247 buffer[yield] = 0;
248 return yield;
249 }
250
251
252
253 /*************************************************
254 * Copy named captured string to given buffer *
255 *************************************************/
256
257 /* This function copies a single captured substring into a given buffer,
258 identifying it by name. If the regex permits duplicate names, the first
259 substring that is set is chosen.
260
261 Arguments:
262 code the compiled regex
263 subject the subject string that was matched
264 ovector pointer to the offsets table
265 stringcount the number of substrings that were captured
266 (i.e. the yield of the pcre_exec call, unless
267 that was zero, in which case it should be 1/3
268 of the offset table size)
269 stringname the name of the required substring
270 buffer where to put the substring
271 size the size of the buffer
272
273 Returns: if successful:
274 the length of the copied string, not including the zero
275 that is put on the end; can be zero
276 if not successful:
277 PCRE_ERROR_NOMEMORY (-6) buffer too small
278 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
279 */
280
281 int
282 pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
283 int stringcount, const char *stringname, char *buffer, int size)
284 {
285 int n = get_first_set(code, stringname, ovector);
286 if (n <= 0) return n;
287 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
288 }
289
290
291
292 /*************************************************
293 * Copy all captured strings to new store *
294 *************************************************/
295
296 /* This function gets one chunk of store and builds a list of pointers and all
297 of the captured substrings in it. A NULL pointer is put on the end of the list.
298
299 Arguments:
300 subject the subject string that was matched
301 ovector pointer to the offsets table
302 stringcount the number of substrings that were captured
303 (i.e. the yield of the pcre_exec call, unless
304 that was zero, in which case it should be 1/3
305 of the offset table size)
306 listptr set to point to the list of pointers
307
308 Returns: if successful: 0
309 if not successful:
310 PCRE_ERROR_NOMEMORY (-6) failed to get store
311 */
312
313 int
314 pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
315 const char ***listptr)
316 {
317 int i;
318 int size = sizeof(char *);
319 int double_count = stringcount * 2;
320 char **stringlist;
321 char *p;
322
323 for (i = 0; i < double_count; i += 2)
324 size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
325
326 stringlist = (char **)(pcre_malloc)(size);
327 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
328
329 *listptr = (const char **)stringlist;
330 p = (char *)(stringlist + stringcount + 1);
331
332 for (i = 0; i < double_count; i += 2)
333 {
334 int len = ovector[i+1] - ovector[i];
335 memcpy(p, subject + ovector[i], len);
336 *stringlist++ = p;
337 p += len;
338 *p++ = 0;
339 }
340
341 *stringlist = NULL;
342 return 0;
343 }
344
345
346
347 /*************************************************
348 * Free store obtained by get_substring_list *
349 *************************************************/
350
351 /* This function exists for the benefit of people calling PCRE from non-C
352 programs that can call its functions, but not free() or (pcre_free)() directly.
353
354 Argument: the result of a previous pcre_get_substring_list()
355 Returns: nothing
356 */
357
358 void
359 pcre_free_substring_list(const char **pointer)
360 {
361 (pcre_free)((void *)pointer);
362 }
363
364
365
366 /*************************************************
367 * Copy captured string to new store *
368 *************************************************/
369
370 /* This function copies a single captured substring into a piece of new
371 store
372
373 Arguments:
374 subject the subject string that was matched
375 ovector pointer to the offsets table
376 stringcount the number of substrings that were captured
377 (i.e. the yield of the pcre_exec call, unless
378 that was zero, in which case it should be 1/3
379 of the offset table size)
380 stringnumber the number of the required substring
381 stringptr where to put a pointer to the substring
382
383 Returns: if successful:
384 the length of the string, not including the zero that
385 is put on the end; can be zero
386 if not successful:
387 PCRE_ERROR_NOMEMORY (-6) failed to get store
388 PCRE_ERROR_NOSUBSTRING (-7) substring not present
389 */
390
391 int
392 pcre_get_substring(const char *subject, int *ovector, int stringcount,
393 int stringnumber, const char **stringptr)
394 {
395 int yield;
396 char *substring;
397 if (stringnumber < 0 || stringnumber >= stringcount)
398 return PCRE_ERROR_NOSUBSTRING;
399 stringnumber *= 2;
400 yield = ovector[stringnumber+1] - ovector[stringnumber];
401 substring = (char *)(pcre_malloc)(yield + 1);
402 if (substring == NULL) return PCRE_ERROR_NOMEMORY;
403 memcpy(substring, subject + ovector[stringnumber], yield);
404 substring[yield] = 0;
405 *stringptr = substring;
406 return yield;
407 }
408
409
410
411 /*************************************************
412 * Copy named captured string to new store *
413 *************************************************/
414
415 /* This function copies a single captured substring, identified by name, into
416 new store. If the regex permits duplicate names, the first substring that is
417 set is chosen.
418
419 Arguments:
420 code the compiled regex
421 subject the subject string that was matched
422 ovector pointer to the offsets table
423 stringcount the number of substrings that were captured
424 (i.e. the yield of the pcre_exec call, unless
425 that was zero, in which case it should be 1/3
426 of the offset table size)
427 stringname the name of the required substring
428 stringptr where to put the pointer
429
430 Returns: if successful:
431 the length of the copied string, not including the zero
432 that is put on the end; can be zero
433 if not successful:
434 PCRE_ERROR_NOMEMORY (-6) couldn't get memory
435 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
436 */
437
438 int
439 pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
440 int stringcount, const char *stringname, const char **stringptr)
441 {
442 int n = get_first_set(code, stringname, ovector);
443 if (n <= 0) return n;
444 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
445 }
446
447
448
449
450 /*************************************************
451 * Free store obtained by get_substring *
452 *************************************************/
453
454 /* This function exists for the benefit of people calling PCRE from non-C
455 programs that can call its functions, but not free() or (pcre_free)() directly.
456
457 Argument: the result of a previous pcre_get_substring()
458 Returns: nothing
459 */
460
461 void
462 pcre_free_substring(const char *pointer)
463 {
464 (pcre_free)((void *)pointer);
465 }
466
467 /* End of pcre_get.c */