225843e2e298cee58de533c27e949c3fa48b767a
[exim.git] / src / src / pcre / get.c
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /*
6 This is a library of functions to support regular expressions whose syntax
7 and semantics are as close as possible to those of the Perl 5 language. See
8 the file Tech.Notes for some information on the internals.
9
10 Written by: Philip Hazel <ph10@cam.ac.uk>
11
12 Copyright (c) 1997-2003 University of Cambridge
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 /* This module contains some convenience functions for extracting substrings
44 from the subject string after a regex match has succeeded. The original idea
45 for these functions came from Scott Wimer. */
46
47
48 /* Include the internals header, which itself includes Standard C headers plus
49 the external pcre header. */
50
51 #include "internal.h"
52
53
54 /*************************************************
55 * Find number for named string *
56 *************************************************/
57
58 /* This function is used by the two extraction functions below, as well
59 as being generally available.
60
61 Arguments:
62 code the compiled regex
63 stringname the name whose number is required
64
65 Returns: the number of the named parentheses, or a negative number
66 (PCRE_ERROR_NOSUBSTRING) if not found
67 */
68
69 int
70 pcre_get_stringnumber(const pcre *code, const char *stringname)
71 {
72 int rc;
73 int entrysize;
74 int top, bot;
75 uschar *nametable;
76
77 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
78 return rc;
79 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
80
81 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
82 return rc;
83 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
84 return rc;
85
86 bot = 0;
87 while (top > bot)
88 {
89 int mid = (top + bot) / 2;
90 uschar *entry = nametable + entrysize*mid;
91 int c = strcmp(stringname, (char *)(entry + 2));
92 if (c == 0) return (entry[0] << 8) + entry[1];
93 if (c > 0) bot = mid + 1; else top = mid;
94 }
95
96 return PCRE_ERROR_NOSUBSTRING;
97 }
98
99
100
101 /*************************************************
102 * Copy captured string to given buffer *
103 *************************************************/
104
105 /* This function copies a single captured substring into a given buffer.
106 Note that we use memcpy() rather than strncpy() in case there are binary zeros
107 in the string.
108
109 Arguments:
110 subject the subject string that was matched
111 ovector pointer to the offsets table
112 stringcount the number of substrings that were captured
113 (i.e. the yield of the pcre_exec call, unless
114 that was zero, in which case it should be 1/3
115 of the offset table size)
116 stringnumber the number of the required substring
117 buffer where to put the substring
118 size the size of the buffer
119
120 Returns: if successful:
121 the length of the copied string, not including the zero
122 that is put on the end; can be zero
123 if not successful:
124 PCRE_ERROR_NOMEMORY (-6) buffer too small
125 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
126 */
127
128 int
129 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
130 int stringnumber, char *buffer, int size)
131 {
132 int yield;
133 if (stringnumber < 0 || stringnumber >= stringcount)
134 return PCRE_ERROR_NOSUBSTRING;
135 stringnumber *= 2;
136 yield = ovector[stringnumber+1] - ovector[stringnumber];
137 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
138 memcpy(buffer, subject + ovector[stringnumber], yield);
139 buffer[yield] = 0;
140 return yield;
141 }
142
143
144
145 /*************************************************
146 * Copy named captured string to given buffer *
147 *************************************************/
148
149 /* This function copies a single captured substring into a given buffer,
150 identifying it by name.
151
152 Arguments:
153 code the compiled regex
154 subject the subject string that was matched
155 ovector pointer to the offsets table
156 stringcount the number of substrings that were captured
157 (i.e. the yield of the pcre_exec call, unless
158 that was zero, in which case it should be 1/3
159 of the offset table size)
160 stringname the name of the required substring
161 buffer where to put the substring
162 size the size of the buffer
163
164 Returns: if successful:
165 the length of the copied string, not including the zero
166 that is put on the end; can be zero
167 if not successful:
168 PCRE_ERROR_NOMEMORY (-6) buffer too small
169 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
170 */
171
172 int
173 pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
174 int stringcount, const char *stringname, char *buffer, int size)
175 {
176 int n = pcre_get_stringnumber(code, stringname);
177 if (n <= 0) return n;
178 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
179 }
180
181
182
183 /*************************************************
184 * Copy all captured strings to new store *
185 *************************************************/
186
187 /* This function gets one chunk of store and builds a list of pointers and all
188 of the captured substrings in it. A NULL pointer is put on the end of the list.
189
190 Arguments:
191 subject the subject string that was matched
192 ovector pointer to the offsets table
193 stringcount the number of substrings that were captured
194 (i.e. the yield of the pcre_exec call, unless
195 that was zero, in which case it should be 1/3
196 of the offset table size)
197 listptr set to point to the list of pointers
198
199 Returns: if successful: 0
200 if not successful:
201 PCRE_ERROR_NOMEMORY (-6) failed to get store
202 */
203
204 int
205 pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
206 const char ***listptr)
207 {
208 int i;
209 int size = sizeof(char *);
210 int double_count = stringcount * 2;
211 char **stringlist;
212 char *p;
213
214 for (i = 0; i < double_count; i += 2)
215 size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
216
217 stringlist = (char **)(pcre_malloc)(size);
218 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
219
220 *listptr = (const char **)stringlist;
221 p = (char *)(stringlist + stringcount + 1);
222
223 for (i = 0; i < double_count; i += 2)
224 {
225 int len = ovector[i+1] - ovector[i];
226 memcpy(p, subject + ovector[i], len);
227 *stringlist++ = p;
228 p += len;
229 *p++ = 0;
230 }
231
232 *stringlist = NULL;
233 return 0;
234 }
235
236
237
238 /*************************************************
239 * Free store obtained by get_substring_list *
240 *************************************************/
241
242 /* This function exists for the benefit of people calling PCRE from non-C
243 programs that can call its functions, but not free() or (pcre_free)() directly.
244
245 Argument: the result of a previous pcre_get_substring_list()
246 Returns: nothing
247 */
248
249 void
250 pcre_free_substring_list(const char **pointer)
251 {
252 (pcre_free)((void *)pointer);
253 }
254
255
256
257 /*************************************************
258 * Copy captured string to new store *
259 *************************************************/
260
261 /* This function copies a single captured substring into a piece of new
262 store
263
264 Arguments:
265 subject the subject string that was matched
266 ovector pointer to the offsets table
267 stringcount the number of substrings that were captured
268 (i.e. the yield of the pcre_exec call, unless
269 that was zero, in which case it should be 1/3
270 of the offset table size)
271 stringnumber the number of the required substring
272 stringptr where to put a pointer to the substring
273
274 Returns: if successful:
275 the length of the string, not including the zero that
276 is put on the end; can be zero
277 if not successful:
278 PCRE_ERROR_NOMEMORY (-6) failed to get store
279 PCRE_ERROR_NOSUBSTRING (-7) substring not present
280 */
281
282 int
283 pcre_get_substring(const char *subject, int *ovector, int stringcount,
284 int stringnumber, const char **stringptr)
285 {
286 int yield;
287 char *substring;
288 if (stringnumber < 0 || stringnumber >= stringcount)
289 return PCRE_ERROR_NOSUBSTRING;
290 stringnumber *= 2;
291 yield = ovector[stringnumber+1] - ovector[stringnumber];
292 substring = (char *)(pcre_malloc)(yield + 1);
293 if (substring == NULL) return PCRE_ERROR_NOMEMORY;
294 memcpy(substring, subject + ovector[stringnumber], yield);
295 substring[yield] = 0;
296 *stringptr = substring;
297 return yield;
298 }
299
300
301
302 /*************************************************
303 * Copy named captured string to new store *
304 *************************************************/
305
306 /* This function copies a single captured substring, identified by name, into
307 new store.
308
309 Arguments:
310 code the compiled regex
311 subject the subject string that was matched
312 ovector pointer to the offsets table
313 stringcount the number of substrings that were captured
314 (i.e. the yield of the pcre_exec call, unless
315 that was zero, in which case it should be 1/3
316 of the offset table size)
317 stringname the name of the required substring
318 stringptr where to put the pointer
319
320 Returns: if successful:
321 the length of the copied string, not including the zero
322 that is put on the end; can be zero
323 if not successful:
324 PCRE_ERROR_NOMEMORY (-6) couldn't get memory
325 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
326 */
327
328 int
329 pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
330 int stringcount, const char *stringname, const char **stringptr)
331 {
332 int n = pcre_get_stringnumber(code, stringname);
333 if (n <= 0) return n;
334 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
335 }
336
337
338
339
340 /*************************************************
341 * Free store obtained by get_substring *
342 *************************************************/
343
344 /* This function exists for the benefit of people calling PCRE from non-C
345 programs that can call its functions, but not free() or (pcre_free)() directly.
346
347 Argument: the result of a previous pcre_get_substring()
348 Returns: nothing
349 */
350
351 void
352 pcre_free_substring(const char *pointer)
353 {
354 (pcre_free)((void *)pointer);
355 }
356
357 /* End of get.c */