Installed PCRE 6.0 sources, which involved adding a number of files and
[exim.git] / src / src / pcre / get.c
1 /* $Cambridge: exim/src/src/pcre/get.c,v 1.2 2005/06/15 08:57:10 ph10 Exp $ */
2
3 /*************************************************
4 * Perl-Compatible Regular Expressions *
5 *************************************************/
6
7 /*
8 This is a library of functions to support regular expressions whose syntax
9 and semantics are as close as possible to those of the Perl 5 language. See
10 the file Tech.Notes for some information on the internals.
11
12 Written by: Philip Hazel <ph10@cam.ac.uk>
13
14 Copyright (c) 1997-2003 University of Cambridge
15
16 -----------------------------------------------------------------------------
17 Redistribution and use in source and binary forms, with or without
18 modification, are permitted provided that the following conditions are met:
19
20 * Redistributions of source code must retain the above copyright notice,
21 this list of conditions and the following disclaimer.
22
23 * Redistributions in binary form must reproduce the above copyright
24 notice, this list of conditions and the following disclaimer in the
25 documentation and/or other materials provided with the distribution.
26
27 * Neither the name of the University of Cambridge nor the names of its
28 contributors may be used to endorse or promote products derived from
29 this software without specific prior written permission.
30
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
42 -----------------------------------------------------------------------------
43 */
44
45 /* This module contains some convenience functions for extracting substrings
46 from the subject string after a regex match has succeeded. The original idea
47 for these functions came from Scott Wimer. */
48
49
50 /* Include the internals header, which itself includes Standard C headers plus
51 the external pcre header. */
52
53 #include "internal.h"
54
55
56 /*************************************************
57 * Find number for named string *
58 *************************************************/
59
60 /* This function is used by the two extraction functions below, as well
61 as being generally available.
62
63 Arguments:
64 code the compiled regex
65 stringname the name whose number is required
66
67 Returns: the number of the named parentheses, or a negative number
68 (PCRE_ERROR_NOSUBSTRING) if not found
69 */
70
71 int
72 pcre_get_stringnumber(const pcre *code, const char *stringname)
73 {
74 int rc;
75 int entrysize;
76 int top, bot;
77 uschar *nametable;
78
79 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
80 return rc;
81 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
82
83 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
84 return rc;
85 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
86 return rc;
87
88 bot = 0;
89 while (top > bot)
90 {
91 int mid = (top + bot) / 2;
92 uschar *entry = nametable + entrysize*mid;
93 int c = strcmp(stringname, (char *)(entry + 2));
94 if (c == 0) return (entry[0] << 8) + entry[1];
95 if (c > 0) bot = mid + 1; else top = mid;
96 }
97
98 return PCRE_ERROR_NOSUBSTRING;
99 }
100
101
102
103 /*************************************************
104 * Copy captured string to given buffer *
105 *************************************************/
106
107 /* This function copies a single captured substring into a given buffer.
108 Note that we use memcpy() rather than strncpy() in case there are binary zeros
109 in the string.
110
111 Arguments:
112 subject the subject string that was matched
113 ovector pointer to the offsets table
114 stringcount the number of substrings that were captured
115 (i.e. the yield of the pcre_exec call, unless
116 that was zero, in which case it should be 1/3
117 of the offset table size)
118 stringnumber the number of the required substring
119 buffer where to put the substring
120 size the size of the buffer
121
122 Returns: if successful:
123 the length of the copied string, not including the zero
124 that is put on the end; can be zero
125 if not successful:
126 PCRE_ERROR_NOMEMORY (-6) buffer too small
127 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
128 */
129
130 int
131 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
132 int stringnumber, char *buffer, int size)
133 {
134 int yield;
135 if (stringnumber < 0 || stringnumber >= stringcount)
136 return PCRE_ERROR_NOSUBSTRING;
137 stringnumber *= 2;
138 yield = ovector[stringnumber+1] - ovector[stringnumber];
139 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
140 memcpy(buffer, subject + ovector[stringnumber], yield);
141 buffer[yield] = 0;
142 return yield;
143 }
144
145
146
147 /*************************************************
148 * Copy named captured string to given buffer *
149 *************************************************/
150
151 /* This function copies a single captured substring into a given buffer,
152 identifying it by name.
153
154 Arguments:
155 code the compiled regex
156 subject the subject string that was matched
157 ovector pointer to the offsets table
158 stringcount the number of substrings that were captured
159 (i.e. the yield of the pcre_exec call, unless
160 that was zero, in which case it should be 1/3
161 of the offset table size)
162 stringname the name of the required substring
163 buffer where to put the substring
164 size the size of the buffer
165
166 Returns: if successful:
167 the length of the copied string, not including the zero
168 that is put on the end; can be zero
169 if not successful:
170 PCRE_ERROR_NOMEMORY (-6) buffer too small
171 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
172 */
173
174 int
175 pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
176 int stringcount, const char *stringname, char *buffer, int size)
177 {
178 int n = pcre_get_stringnumber(code, stringname);
179 if (n <= 0) return n;
180 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
181 }
182
183
184
185 /*************************************************
186 * Copy all captured strings to new store *
187 *************************************************/
188
189 /* This function gets one chunk of store and builds a list of pointers and all
190 of the captured substrings in it. A NULL pointer is put on the end of the list.
191
192 Arguments:
193 subject the subject string that was matched
194 ovector pointer to the offsets table
195 stringcount the number of substrings that were captured
196 (i.e. the yield of the pcre_exec call, unless
197 that was zero, in which case it should be 1/3
198 of the offset table size)
199 listptr set to point to the list of pointers
200
201 Returns: if successful: 0
202 if not successful:
203 PCRE_ERROR_NOMEMORY (-6) failed to get store
204 */
205
206 int
207 pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
208 const char ***listptr)
209 {
210 int i;
211 int size = sizeof(char *);
212 int double_count = stringcount * 2;
213 char **stringlist;
214 char *p;
215
216 for (i = 0; i < double_count; i += 2)
217 size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
218
219 stringlist = (char **)(pcre_malloc)(size);
220 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
221
222 *listptr = (const char **)stringlist;
223 p = (char *)(stringlist + stringcount + 1);
224
225 for (i = 0; i < double_count; i += 2)
226 {
227 int len = ovector[i+1] - ovector[i];
228 memcpy(p, subject + ovector[i], len);
229 *stringlist++ = p;
230 p += len;
231 *p++ = 0;
232 }
233
234 *stringlist = NULL;
235 return 0;
236 }
237
238
239
240 /*************************************************
241 * Free store obtained by get_substring_list *
242 *************************************************/
243
244 /* This function exists for the benefit of people calling PCRE from non-C
245 programs that can call its functions, but not free() or (pcre_free)() directly.
246
247 Argument: the result of a previous pcre_get_substring_list()
248 Returns: nothing
249 */
250
251 void
252 pcre_free_substring_list(const char **pointer)
253 {
254 (pcre_free)((void *)pointer);
255 }
256
257
258
259 /*************************************************
260 * Copy captured string to new store *
261 *************************************************/
262
263 /* This function copies a single captured substring into a piece of new
264 store
265
266 Arguments:
267 subject the subject string that was matched
268 ovector pointer to the offsets table
269 stringcount the number of substrings that were captured
270 (i.e. the yield of the pcre_exec call, unless
271 that was zero, in which case it should be 1/3
272 of the offset table size)
273 stringnumber the number of the required substring
274 stringptr where to put a pointer to the substring
275
276 Returns: if successful:
277 the length of the string, not including the zero that
278 is put on the end; can be zero
279 if not successful:
280 PCRE_ERROR_NOMEMORY (-6) failed to get store
281 PCRE_ERROR_NOSUBSTRING (-7) substring not present
282 */
283
284 int
285 pcre_get_substring(const char *subject, int *ovector, int stringcount,
286 int stringnumber, const char **stringptr)
287 {
288 int yield;
289 char *substring;
290 if (stringnumber < 0 || stringnumber >= stringcount)
291 return PCRE_ERROR_NOSUBSTRING;
292 stringnumber *= 2;
293 yield = ovector[stringnumber+1] - ovector[stringnumber];
294 substring = (char *)(pcre_malloc)(yield + 1);
295 if (substring == NULL) return PCRE_ERROR_NOMEMORY;
296 memcpy(substring, subject + ovector[stringnumber], yield);
297 substring[yield] = 0;
298 *stringptr = substring;
299 return yield;
300 }
301
302
303
304 /*************************************************
305 * Copy named captured string to new store *
306 *************************************************/
307
308 /* This function copies a single captured substring, identified by name, into
309 new store.
310
311 Arguments:
312 code the compiled regex
313 subject the subject string that was matched
314 ovector pointer to the offsets table
315 stringcount the number of substrings that were captured
316 (i.e. the yield of the pcre_exec call, unless
317 that was zero, in which case it should be 1/3
318 of the offset table size)
319 stringname the name of the required substring
320 stringptr where to put the pointer
321
322 Returns: if successful:
323 the length of the copied string, not including the zero
324 that is put on the end; can be zero
325 if not successful:
326 PCRE_ERROR_NOMEMORY (-6) couldn't get memory
327 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
328 */
329
330 int
331 pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
332 int stringcount, const char *stringname, const char **stringptr)
333 {
334 int n = pcre_get_stringnumber(code, stringname);
335 if (n <= 0) return n;
336 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
337 }
338
339
340
341
342 /*************************************************
343 * Free store obtained by get_substring *
344 *************************************************/
345
346 /* This function exists for the benefit of people calling PCRE from non-C
347 programs that can call its functions, but not free() or (pcre_free)() directly.
348
349 Argument: the result of a previous pcre_get_substring()
350 Returns: nothing
351 */
352
353 void
354 pcre_free_substring(const char *pointer)
355 {
356 (pcre_free)((void *)pointer);
357 }
358
359 /* End of get.c */