Commit | Line | Data |
---|---|---|
8ac170f3 PH |
1 | /* $Cambridge: exim/src/src/pcre/get.c,v 1.2 2005/06/15 08:57:10 ph10 Exp $ */ |
2 | ||
c86f6258 PH |
3 | /************************************************* |
4 | * Perl-Compatible Regular Expressions * | |
5 | *************************************************/ | |
6 | ||
7 | /* | |
8 | This is a library of functions to support regular expressions whose syntax | |
9 | and semantics are as close as possible to those of the Perl 5 language. See | |
10 | the file Tech.Notes for some information on the internals. | |
11 | ||
12 | Written by: Philip Hazel <ph10@cam.ac.uk> | |
13 | ||
14 | Copyright (c) 1997-2003 University of Cambridge | |
15 | ||
16 | ----------------------------------------------------------------------------- | |
17 | Redistribution and use in source and binary forms, with or without | |
18 | modification, are permitted provided that the following conditions are met: | |
19 | ||
20 | * Redistributions of source code must retain the above copyright notice, | |
21 | this list of conditions and the following disclaimer. | |
22 | ||
23 | * Redistributions in binary form must reproduce the above copyright | |
24 | notice, this list of conditions and the following disclaimer in the | |
25 | documentation and/or other materials provided with the distribution. | |
26 | ||
27 | * Neither the name of the University of Cambridge nor the names of its | |
28 | contributors may be used to endorse or promote products derived from | |
29 | this software without specific prior written permission. | |
30 | ||
31 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
32 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
33 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
34 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
35 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
36 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
37 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
38 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
39 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
40 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
41 | POSSIBILITY OF SUCH DAMAGE. | |
42 | ----------------------------------------------------------------------------- | |
43 | */ | |
44 | ||
45 | /* This module contains some convenience functions for extracting substrings | |
46 | from the subject string after a regex match has succeeded. The original idea | |
47 | for these functions came from Scott Wimer. */ | |
48 | ||
49 | ||
50 | /* Include the internals header, which itself includes Standard C headers plus | |
51 | the external pcre header. */ | |
52 | ||
53 | #include "internal.h" | |
54 | ||
55 | ||
56 | /************************************************* | |
57 | * Find number for named string * | |
58 | *************************************************/ | |
59 | ||
60 | /* This function is used by the two extraction functions below, as well | |
61 | as being generally available. | |
62 | ||
63 | Arguments: | |
64 | code the compiled regex | |
65 | stringname the name whose number is required | |
66 | ||
67 | Returns: the number of the named parentheses, or a negative number | |
68 | (PCRE_ERROR_NOSUBSTRING) if not found | |
69 | */ | |
70 | ||
71 | int | |
72 | pcre_get_stringnumber(const pcre *code, const char *stringname) | |
73 | { | |
74 | int rc; | |
75 | int entrysize; | |
76 | int top, bot; | |
77 | uschar *nametable; | |
78 | ||
79 | if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) | |
80 | return rc; | |
81 | if (top <= 0) return PCRE_ERROR_NOSUBSTRING; | |
82 | ||
83 | if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) | |
84 | return rc; | |
85 | if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) | |
86 | return rc; | |
87 | ||
88 | bot = 0; | |
89 | while (top > bot) | |
90 | { | |
91 | int mid = (top + bot) / 2; | |
92 | uschar *entry = nametable + entrysize*mid; | |
93 | int c = strcmp(stringname, (char *)(entry + 2)); | |
94 | if (c == 0) return (entry[0] << 8) + entry[1]; | |
95 | if (c > 0) bot = mid + 1; else top = mid; | |
96 | } | |
97 | ||
98 | return PCRE_ERROR_NOSUBSTRING; | |
99 | } | |
100 | ||
101 | ||
102 | ||
103 | /************************************************* | |
104 | * Copy captured string to given buffer * | |
105 | *************************************************/ | |
106 | ||
107 | /* This function copies a single captured substring into a given buffer. | |
108 | Note that we use memcpy() rather than strncpy() in case there are binary zeros | |
109 | in the string. | |
110 | ||
111 | Arguments: | |
112 | subject the subject string that was matched | |
113 | ovector pointer to the offsets table | |
114 | stringcount the number of substrings that were captured | |
115 | (i.e. the yield of the pcre_exec call, unless | |
116 | that was zero, in which case it should be 1/3 | |
117 | of the offset table size) | |
118 | stringnumber the number of the required substring | |
119 | buffer where to put the substring | |
120 | size the size of the buffer | |
121 | ||
122 | Returns: if successful: | |
123 | the length of the copied string, not including the zero | |
124 | that is put on the end; can be zero | |
125 | if not successful: | |
126 | PCRE_ERROR_NOMEMORY (-6) buffer too small | |
127 | PCRE_ERROR_NOSUBSTRING (-7) no such captured substring | |
128 | */ | |
129 | ||
130 | int | |
131 | pcre_copy_substring(const char *subject, int *ovector, int stringcount, | |
132 | int stringnumber, char *buffer, int size) | |
133 | { | |
134 | int yield; | |
135 | if (stringnumber < 0 || stringnumber >= stringcount) | |
136 | return PCRE_ERROR_NOSUBSTRING; | |
137 | stringnumber *= 2; | |
138 | yield = ovector[stringnumber+1] - ovector[stringnumber]; | |
139 | if (size < yield + 1) return PCRE_ERROR_NOMEMORY; | |
140 | memcpy(buffer, subject + ovector[stringnumber], yield); | |
141 | buffer[yield] = 0; | |
142 | return yield; | |
143 | } | |
144 | ||
145 | ||
146 | ||
147 | /************************************************* | |
148 | * Copy named captured string to given buffer * | |
149 | *************************************************/ | |
150 | ||
151 | /* This function copies a single captured substring into a given buffer, | |
152 | identifying it by name. | |
153 | ||
154 | Arguments: | |
155 | code the compiled regex | |
156 | subject the subject string that was matched | |
157 | ovector pointer to the offsets table | |
158 | stringcount the number of substrings that were captured | |
159 | (i.e. the yield of the pcre_exec call, unless | |
160 | that was zero, in which case it should be 1/3 | |
161 | of the offset table size) | |
162 | stringname the name of the required substring | |
163 | buffer where to put the substring | |
164 | size the size of the buffer | |
165 | ||
166 | Returns: if successful: | |
167 | the length of the copied string, not including the zero | |
168 | that is put on the end; can be zero | |
169 | if not successful: | |
170 | PCRE_ERROR_NOMEMORY (-6) buffer too small | |
171 | PCRE_ERROR_NOSUBSTRING (-7) no such captured substring | |
172 | */ | |
173 | ||
174 | int | |
175 | pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector, | |
176 | int stringcount, const char *stringname, char *buffer, int size) | |
177 | { | |
178 | int n = pcre_get_stringnumber(code, stringname); | |
179 | if (n <= 0) return n; | |
180 | return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size); | |
181 | } | |
182 | ||
183 | ||
184 | ||
185 | /************************************************* | |
186 | * Copy all captured strings to new store * | |
187 | *************************************************/ | |
188 | ||
189 | /* This function gets one chunk of store and builds a list of pointers and all | |
190 | of the captured substrings in it. A NULL pointer is put on the end of the list. | |
191 | ||
192 | Arguments: | |
193 | subject the subject string that was matched | |
194 | ovector pointer to the offsets table | |
195 | stringcount the number of substrings that were captured | |
196 | (i.e. the yield of the pcre_exec call, unless | |
197 | that was zero, in which case it should be 1/3 | |
198 | of the offset table size) | |
199 | listptr set to point to the list of pointers | |
200 | ||
201 | Returns: if successful: 0 | |
202 | if not successful: | |
203 | PCRE_ERROR_NOMEMORY (-6) failed to get store | |
204 | */ | |
205 | ||
206 | int | |
207 | pcre_get_substring_list(const char *subject, int *ovector, int stringcount, | |
208 | const char ***listptr) | |
209 | { | |
210 | int i; | |
211 | int size = sizeof(char *); | |
212 | int double_count = stringcount * 2; | |
213 | char **stringlist; | |
214 | char *p; | |
215 | ||
216 | for (i = 0; i < double_count; i += 2) | |
217 | size += sizeof(char *) + ovector[i+1] - ovector[i] + 1; | |
218 | ||
219 | stringlist = (char **)(pcre_malloc)(size); | |
220 | if (stringlist == NULL) return PCRE_ERROR_NOMEMORY; | |
221 | ||
222 | *listptr = (const char **)stringlist; | |
223 | p = (char *)(stringlist + stringcount + 1); | |
224 | ||
225 | for (i = 0; i < double_count; i += 2) | |
226 | { | |
227 | int len = ovector[i+1] - ovector[i]; | |
228 | memcpy(p, subject + ovector[i], len); | |
229 | *stringlist++ = p; | |
230 | p += len; | |
231 | *p++ = 0; | |
232 | } | |
233 | ||
234 | *stringlist = NULL; | |
235 | return 0; | |
236 | } | |
237 | ||
238 | ||
239 | ||
240 | /************************************************* | |
241 | * Free store obtained by get_substring_list * | |
242 | *************************************************/ | |
243 | ||
244 | /* This function exists for the benefit of people calling PCRE from non-C | |
245 | programs that can call its functions, but not free() or (pcre_free)() directly. | |
246 | ||
247 | Argument: the result of a previous pcre_get_substring_list() | |
248 | Returns: nothing | |
249 | */ | |
250 | ||
251 | void | |
252 | pcre_free_substring_list(const char **pointer) | |
253 | { | |
254 | (pcre_free)((void *)pointer); | |
255 | } | |
256 | ||
257 | ||
258 | ||
259 | /************************************************* | |
260 | * Copy captured string to new store * | |
261 | *************************************************/ | |
262 | ||
263 | /* This function copies a single captured substring into a piece of new | |
264 | store | |
265 | ||
266 | Arguments: | |
267 | subject the subject string that was matched | |
268 | ovector pointer to the offsets table | |
269 | stringcount the number of substrings that were captured | |
270 | (i.e. the yield of the pcre_exec call, unless | |
271 | that was zero, in which case it should be 1/3 | |
272 | of the offset table size) | |
273 | stringnumber the number of the required substring | |
274 | stringptr where to put a pointer to the substring | |
275 | ||
276 | Returns: if successful: | |
277 | the length of the string, not including the zero that | |
278 | is put on the end; can be zero | |
279 | if not successful: | |
280 | PCRE_ERROR_NOMEMORY (-6) failed to get store | |
281 | PCRE_ERROR_NOSUBSTRING (-7) substring not present | |
282 | */ | |
283 | ||
284 | int | |
285 | pcre_get_substring(const char *subject, int *ovector, int stringcount, | |
286 | int stringnumber, const char **stringptr) | |
287 | { | |
288 | int yield; | |
289 | char *substring; | |
290 | if (stringnumber < 0 || stringnumber >= stringcount) | |
291 | return PCRE_ERROR_NOSUBSTRING; | |
292 | stringnumber *= 2; | |
293 | yield = ovector[stringnumber+1] - ovector[stringnumber]; | |
294 | substring = (char *)(pcre_malloc)(yield + 1); | |
295 | if (substring == NULL) return PCRE_ERROR_NOMEMORY; | |
296 | memcpy(substring, subject + ovector[stringnumber], yield); | |
297 | substring[yield] = 0; | |
298 | *stringptr = substring; | |
299 | return yield; | |
300 | } | |
301 | ||
302 | ||
303 | ||
304 | /************************************************* | |
305 | * Copy named captured string to new store * | |
306 | *************************************************/ | |
307 | ||
308 | /* This function copies a single captured substring, identified by name, into | |
309 | new store. | |
310 | ||
311 | Arguments: | |
312 | code the compiled regex | |
313 | subject the subject string that was matched | |
314 | ovector pointer to the offsets table | |
315 | stringcount the number of substrings that were captured | |
316 | (i.e. the yield of the pcre_exec call, unless | |
317 | that was zero, in which case it should be 1/3 | |
318 | of the offset table size) | |
319 | stringname the name of the required substring | |
320 | stringptr where to put the pointer | |
321 | ||
322 | Returns: if successful: | |
323 | the length of the copied string, not including the zero | |
324 | that is put on the end; can be zero | |
325 | if not successful: | |
326 | PCRE_ERROR_NOMEMORY (-6) couldn't get memory | |
327 | PCRE_ERROR_NOSUBSTRING (-7) no such captured substring | |
328 | */ | |
329 | ||
330 | int | |
331 | pcre_get_named_substring(const pcre *code, const char *subject, int *ovector, | |
332 | int stringcount, const char *stringname, const char **stringptr) | |
333 | { | |
334 | int n = pcre_get_stringnumber(code, stringname); | |
335 | if (n <= 0) return n; | |
336 | return pcre_get_substring(subject, ovector, stringcount, n, stringptr); | |
337 | } | |
338 | ||
339 | ||
340 | ||
341 | ||
342 | /************************************************* | |
343 | * Free store obtained by get_substring * | |
344 | *************************************************/ | |
345 | ||
346 | /* This function exists for the benefit of people calling PCRE from non-C | |
347 | programs that can call its functions, but not free() or (pcre_free)() directly. | |
348 | ||
349 | Argument: the result of a previous pcre_get_substring() | |
350 | Returns: nothing | |
351 | */ | |
352 | ||
353 | void | |
354 | pcre_free_substring(const char *pointer) | |
355 | { | |
356 | (pcre_free)((void *)pointer); | |
357 | } | |
358 | ||
359 | /* End of get.c */ |