Commit | Line | Data |
---|---|---|
6bf342e1 | 1 | /* $Cambridge: exim/src/src/pcre/pcre_get.c,v 1.4 2007/01/23 15:08:45 ph10 Exp $ */ |
8ac170f3 PH |
2 | |
3 | /************************************************* | |
4 | * Perl-Compatible Regular Expressions * | |
5 | *************************************************/ | |
6 | ||
7 | /* PCRE is a library of functions to support regular expressions whose syntax | |
8 | and semantics are as close as possible to those of the Perl 5 language. | |
9 | ||
10 | Written by Philip Hazel | |
aa41d2de | 11 | Copyright (c) 1997-2006 University of Cambridge |
8ac170f3 PH |
12 | |
13 | ----------------------------------------------------------------------------- | |
14 | Redistribution and use in source and binary forms, with or without | |
15 | modification, are permitted provided that the following conditions are met: | |
16 | ||
17 | * Redistributions of source code must retain the above copyright notice, | |
18 | this list of conditions and the following disclaimer. | |
19 | ||
20 | * Redistributions in binary form must reproduce the above copyright | |
21 | notice, this list of conditions and the following disclaimer in the | |
22 | documentation and/or other materials provided with the distribution. | |
23 | ||
24 | * Neither the name of the University of Cambridge nor the names of its | |
25 | contributors may be used to endorse or promote products derived from | |
26 | this software without specific prior written permission. | |
27 | ||
28 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
29 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
30 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
31 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
32 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
33 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
34 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
35 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
36 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
37 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
38 | POSSIBILITY OF SUCH DAMAGE. | |
39 | ----------------------------------------------------------------------------- | |
40 | */ | |
41 | ||
42 | ||
43 | /* This module contains some convenience functions for extracting substrings | |
44 | from the subject string after a regex match has succeeded. The original idea | |
45 | for these functions came from Scott Wimer. */ | |
46 | ||
47 | ||
48 | #include "pcre_internal.h" | |
49 | ||
50 | ||
51 | /************************************************* | |
52 | * Find number for named string * | |
53 | *************************************************/ | |
54 | ||
aa41d2de PH |
55 | /* This function is used by the get_first_set() function below, as well |
56 | as being generally available. It assumes that names are unique. | |
8ac170f3 PH |
57 | |
58 | Arguments: | |
59 | code the compiled regex | |
60 | stringname the name whose number is required | |
61 | ||
62 | Returns: the number of the named parentheses, or a negative number | |
63 | (PCRE_ERROR_NOSUBSTRING) if not found | |
64 | */ | |
65 | ||
66 | int | |
67 | pcre_get_stringnumber(const pcre *code, const char *stringname) | |
68 | { | |
69 | int rc; | |
70 | int entrysize; | |
71 | int top, bot; | |
72 | uschar *nametable; | |
73 | ||
74 | if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) | |
75 | return rc; | |
76 | if (top <= 0) return PCRE_ERROR_NOSUBSTRING; | |
77 | ||
78 | if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) | |
79 | return rc; | |
80 | if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) | |
81 | return rc; | |
82 | ||
83 | bot = 0; | |
84 | while (top > bot) | |
85 | { | |
86 | int mid = (top + bot) / 2; | |
87 | uschar *entry = nametable + entrysize*mid; | |
88 | int c = strcmp(stringname, (char *)(entry + 2)); | |
89 | if (c == 0) return (entry[0] << 8) + entry[1]; | |
90 | if (c > 0) bot = mid + 1; else top = mid; | |
91 | } | |
92 | ||
93 | return PCRE_ERROR_NOSUBSTRING; | |
94 | } | |
95 | ||
96 | ||
97 | ||
aa41d2de PH |
98 | /************************************************* |
99 | * Find (multiple) entries for named string * | |
100 | *************************************************/ | |
101 | ||
102 | /* This is used by the get_first_set() function below, as well as being | |
103 | generally available. It is used when duplicated names are permitted. | |
104 | ||
105 | Arguments: | |
106 | code the compiled regex | |
107 | stringname the name whose entries required | |
108 | firstptr where to put the pointer to the first entry | |
109 | lastptr where to put the pointer to the last entry | |
110 | ||
111 | Returns: the length of each entry, or a negative number | |
112 | (PCRE_ERROR_NOSUBSTRING) if not found | |
113 | */ | |
114 | ||
115 | int | |
116 | pcre_get_stringtable_entries(const pcre *code, const char *stringname, | |
117 | char **firstptr, char **lastptr) | |
118 | { | |
119 | int rc; | |
120 | int entrysize; | |
121 | int top, bot; | |
122 | uschar *nametable, *lastentry; | |
123 | ||
124 | if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) | |
125 | return rc; | |
126 | if (top <= 0) return PCRE_ERROR_NOSUBSTRING; | |
127 | ||
128 | if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) | |
129 | return rc; | |
130 | if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) | |
131 | return rc; | |
132 | ||
133 | lastentry = nametable + entrysize * (top - 1); | |
134 | bot = 0; | |
135 | while (top > bot) | |
136 | { | |
137 | int mid = (top + bot) / 2; | |
138 | uschar *entry = nametable + entrysize*mid; | |
139 | int c = strcmp(stringname, (char *)(entry + 2)); | |
140 | if (c == 0) | |
141 | { | |
142 | uschar *first = entry; | |
143 | uschar *last = entry; | |
144 | while (first > nametable) | |
145 | { | |
146 | if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break; | |
147 | first -= entrysize; | |
148 | } | |
149 | while (last < lastentry) | |
150 | { | |
151 | if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break; | |
152 | last += entrysize; | |
153 | } | |
154 | *firstptr = (char *)first; | |
155 | *lastptr = (char *)last; | |
156 | return entrysize; | |
157 | } | |
158 | if (c > 0) bot = mid + 1; else top = mid; | |
159 | } | |
160 | ||
161 | return PCRE_ERROR_NOSUBSTRING; | |
162 | } | |
163 | ||
164 | ||
165 | ||
166 | /************************************************* | |
167 | * Find first set of multiple named strings * | |
168 | *************************************************/ | |
169 | ||
170 | /* This function allows for duplicate names in the table of named substrings. | |
171 | It returns the number of the first one that was set in a pattern match. | |
172 | ||
173 | Arguments: | |
174 | code the compiled regex | |
175 | stringname the name of the capturing substring | |
176 | ovector the vector of matched substrings | |
177 | ||
178 | Returns: the number of the first that is set, | |
179 | or the number of the last one if none are set, | |
180 | or a negative number on error | |
181 | */ | |
182 | ||
183 | static int | |
184 | get_first_set(const pcre *code, const char *stringname, int *ovector) | |
185 | { | |
186 | const real_pcre *re = (const real_pcre *)code; | |
187 | int entrysize; | |
188 | char *first, *last; | |
189 | uschar *entry; | |
190 | if ((re->options & (PCRE_DUPNAMES | PCRE_JCHANGED)) == 0) | |
191 | return pcre_get_stringnumber(code, stringname); | |
192 | entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last); | |
193 | if (entrysize <= 0) return entrysize; | |
194 | for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize) | |
195 | { | |
196 | int n = (entry[0] << 8) + entry[1]; | |
197 | if (ovector[n*2] >= 0) return n; | |
198 | } | |
199 | return (first[0] << 8) + first[1]; | |
200 | } | |
201 | ||
202 | ||
203 | ||
204 | ||
8ac170f3 PH |
205 | /************************************************* |
206 | * Copy captured string to given buffer * | |
207 | *************************************************/ | |
208 | ||
209 | /* This function copies a single captured substring into a given buffer. | |
210 | Note that we use memcpy() rather than strncpy() in case there are binary zeros | |
211 | in the string. | |
212 | ||
213 | Arguments: | |
214 | subject the subject string that was matched | |
215 | ovector pointer to the offsets table | |
216 | stringcount the number of substrings that were captured | |
217 | (i.e. the yield of the pcre_exec call, unless | |
218 | that was zero, in which case it should be 1/3 | |
219 | of the offset table size) | |
220 | stringnumber the number of the required substring | |
221 | buffer where to put the substring | |
222 | size the size of the buffer | |
223 | ||
224 | Returns: if successful: | |
225 | the length of the copied string, not including the zero | |
226 | that is put on the end; can be zero | |
227 | if not successful: | |
228 | PCRE_ERROR_NOMEMORY (-6) buffer too small | |
229 | PCRE_ERROR_NOSUBSTRING (-7) no such captured substring | |
230 | */ | |
231 | ||
232 | int | |
233 | pcre_copy_substring(const char *subject, int *ovector, int stringcount, | |
234 | int stringnumber, char *buffer, int size) | |
235 | { | |
236 | int yield; | |
237 | if (stringnumber < 0 || stringnumber >= stringcount) | |
238 | return PCRE_ERROR_NOSUBSTRING; | |
239 | stringnumber *= 2; | |
240 | yield = ovector[stringnumber+1] - ovector[stringnumber]; | |
241 | if (size < yield + 1) return PCRE_ERROR_NOMEMORY; | |
242 | memcpy(buffer, subject + ovector[stringnumber], yield); | |
243 | buffer[yield] = 0; | |
244 | return yield; | |
245 | } | |
246 | ||
247 | ||
248 | ||
249 | /************************************************* | |
250 | * Copy named captured string to given buffer * | |
251 | *************************************************/ | |
252 | ||
253 | /* This function copies a single captured substring into a given buffer, | |
aa41d2de PH |
254 | identifying it by name. If the regex permits duplicate names, the first |
255 | substring that is set is chosen. | |
8ac170f3 PH |
256 | |
257 | Arguments: | |
258 | code the compiled regex | |
259 | subject the subject string that was matched | |
260 | ovector pointer to the offsets table | |
261 | stringcount the number of substrings that were captured | |
262 | (i.e. the yield of the pcre_exec call, unless | |
263 | that was zero, in which case it should be 1/3 | |
264 | of the offset table size) | |
265 | stringname the name of the required substring | |
266 | buffer where to put the substring | |
267 | size the size of the buffer | |
268 | ||
269 | Returns: if successful: | |
270 | the length of the copied string, not including the zero | |
271 | that is put on the end; can be zero | |
272 | if not successful: | |
273 | PCRE_ERROR_NOMEMORY (-6) buffer too small | |
274 | PCRE_ERROR_NOSUBSTRING (-7) no such captured substring | |
275 | */ | |
276 | ||
277 | int | |
278 | pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector, | |
279 | int stringcount, const char *stringname, char *buffer, int size) | |
280 | { | |
aa41d2de | 281 | int n = get_first_set(code, stringname, ovector); |
8ac170f3 PH |
282 | if (n <= 0) return n; |
283 | return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size); | |
284 | } | |
285 | ||
286 | ||
287 | ||
288 | /************************************************* | |
289 | * Copy all captured strings to new store * | |
290 | *************************************************/ | |
291 | ||
292 | /* This function gets one chunk of store and builds a list of pointers and all | |
293 | of the captured substrings in it. A NULL pointer is put on the end of the list. | |
294 | ||
295 | Arguments: | |
296 | subject the subject string that was matched | |
297 | ovector pointer to the offsets table | |
298 | stringcount the number of substrings that were captured | |
299 | (i.e. the yield of the pcre_exec call, unless | |
300 | that was zero, in which case it should be 1/3 | |
301 | of the offset table size) | |
302 | listptr set to point to the list of pointers | |
303 | ||
304 | Returns: if successful: 0 | |
305 | if not successful: | |
306 | PCRE_ERROR_NOMEMORY (-6) failed to get store | |
307 | */ | |
308 | ||
309 | int | |
310 | pcre_get_substring_list(const char *subject, int *ovector, int stringcount, | |
311 | const char ***listptr) | |
312 | { | |
313 | int i; | |
314 | int size = sizeof(char *); | |
315 | int double_count = stringcount * 2; | |
316 | char **stringlist; | |
317 | char *p; | |
318 | ||
319 | for (i = 0; i < double_count; i += 2) | |
320 | size += sizeof(char *) + ovector[i+1] - ovector[i] + 1; | |
321 | ||
322 | stringlist = (char **)(pcre_malloc)(size); | |
323 | if (stringlist == NULL) return PCRE_ERROR_NOMEMORY; | |
324 | ||
325 | *listptr = (const char **)stringlist; | |
326 | p = (char *)(stringlist + stringcount + 1); | |
327 | ||
328 | for (i = 0; i < double_count; i += 2) | |
329 | { | |
330 | int len = ovector[i+1] - ovector[i]; | |
331 | memcpy(p, subject + ovector[i], len); | |
332 | *stringlist++ = p; | |
333 | p += len; | |
334 | *p++ = 0; | |
335 | } | |
336 | ||
337 | *stringlist = NULL; | |
338 | return 0; | |
339 | } | |
340 | ||
341 | ||
342 | ||
343 | /************************************************* | |
344 | * Free store obtained by get_substring_list * | |
345 | *************************************************/ | |
346 | ||
347 | /* This function exists for the benefit of people calling PCRE from non-C | |
348 | programs that can call its functions, but not free() or (pcre_free)() directly. | |
349 | ||
350 | Argument: the result of a previous pcre_get_substring_list() | |
351 | Returns: nothing | |
352 | */ | |
353 | ||
354 | void | |
355 | pcre_free_substring_list(const char **pointer) | |
356 | { | |
357 | (pcre_free)((void *)pointer); | |
358 | } | |
359 | ||
360 | ||
361 | ||
362 | /************************************************* | |
363 | * Copy captured string to new store * | |
364 | *************************************************/ | |
365 | ||
366 | /* This function copies a single captured substring into a piece of new | |
367 | store | |
368 | ||
369 | Arguments: | |
370 | subject the subject string that was matched | |
371 | ovector pointer to the offsets table | |
372 | stringcount the number of substrings that were captured | |
373 | (i.e. the yield of the pcre_exec call, unless | |
374 | that was zero, in which case it should be 1/3 | |
375 | of the offset table size) | |
376 | stringnumber the number of the required substring | |
377 | stringptr where to put a pointer to the substring | |
378 | ||
379 | Returns: if successful: | |
380 | the length of the string, not including the zero that | |
381 | is put on the end; can be zero | |
382 | if not successful: | |
383 | PCRE_ERROR_NOMEMORY (-6) failed to get store | |
384 | PCRE_ERROR_NOSUBSTRING (-7) substring not present | |
385 | */ | |
386 | ||
387 | int | |
388 | pcre_get_substring(const char *subject, int *ovector, int stringcount, | |
389 | int stringnumber, const char **stringptr) | |
390 | { | |
391 | int yield; | |
392 | char *substring; | |
393 | if (stringnumber < 0 || stringnumber >= stringcount) | |
394 | return PCRE_ERROR_NOSUBSTRING; | |
395 | stringnumber *= 2; | |
396 | yield = ovector[stringnumber+1] - ovector[stringnumber]; | |
397 | substring = (char *)(pcre_malloc)(yield + 1); | |
398 | if (substring == NULL) return PCRE_ERROR_NOMEMORY; | |
399 | memcpy(substring, subject + ovector[stringnumber], yield); | |
400 | substring[yield] = 0; | |
401 | *stringptr = substring; | |
402 | return yield; | |
403 | } | |
404 | ||
405 | ||
406 | ||
407 | /************************************************* | |
408 | * Copy named captured string to new store * | |
409 | *************************************************/ | |
410 | ||
411 | /* This function copies a single captured substring, identified by name, into | |
aa41d2de PH |
412 | new store. If the regex permits duplicate names, the first substring that is |
413 | set is chosen. | |
8ac170f3 PH |
414 | |
415 | Arguments: | |
416 | code the compiled regex | |
417 | subject the subject string that was matched | |
418 | ovector pointer to the offsets table | |
419 | stringcount the number of substrings that were captured | |
420 | (i.e. the yield of the pcre_exec call, unless | |
421 | that was zero, in which case it should be 1/3 | |
422 | of the offset table size) | |
423 | stringname the name of the required substring | |
424 | stringptr where to put the pointer | |
425 | ||
426 | Returns: if successful: | |
427 | the length of the copied string, not including the zero | |
428 | that is put on the end; can be zero | |
429 | if not successful: | |
430 | PCRE_ERROR_NOMEMORY (-6) couldn't get memory | |
431 | PCRE_ERROR_NOSUBSTRING (-7) no such captured substring | |
432 | */ | |
433 | ||
434 | int | |
435 | pcre_get_named_substring(const pcre *code, const char *subject, int *ovector, | |
436 | int stringcount, const char *stringname, const char **stringptr) | |
437 | { | |
aa41d2de | 438 | int n = get_first_set(code, stringname, ovector); |
8ac170f3 PH |
439 | if (n <= 0) return n; |
440 | return pcre_get_substring(subject, ovector, stringcount, n, stringptr); | |
441 | } | |
442 | ||
443 | ||
444 | ||
445 | ||
446 | /************************************************* | |
447 | * Free store obtained by get_substring * | |
448 | *************************************************/ | |
449 | ||
450 | /* This function exists for the benefit of people calling PCRE from non-C | |
451 | programs that can call its functions, but not free() or (pcre_free)() directly. | |
452 | ||
453 | Argument: the result of a previous pcre_get_substring() | |
454 | Returns: nothing | |
455 | */ | |
456 | ||
457 | void | |
458 | pcre_free_substring(const char *pointer) | |
459 | { | |
460 | (pcre_free)((void *)pointer); | |
461 | } | |
462 | ||
463 | /* End of pcre_get.c */ |