1 /* $Cambridge: exim/src/src/pcre/pcre_get.c,v 1.6 2007/11/12 13:02:19 nm4 Exp $ */
3 /*************************************************
4 * Perl-Compatible Regular Expressions *
5 *************************************************/
7 /* PCRE is a library of functions to support regular expressions whose syntax
8 and semantics are as close as possible to those of the Perl 5 language.
10 Written by Philip Hazel
11 Copyright (c) 1997-2007 University of Cambridge
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
24 * Neither the name of the University of Cambridge nor the names of its
25 contributors may be used to endorse or promote products derived from
26 this software without specific prior written permission.
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
43 /* This module contains some convenience functions for extracting substrings
44 from the subject string after a regex match has succeeded. The original idea
45 for these functions came from Scott Wimer. */
52 #include "pcre_internal.h"
55 /*************************************************
56 * Find number for named string *
57 *************************************************/
59 /* This function is used by the get_first_set() function below, as well
60 as being generally available. It assumes that names are unique.
63 code the compiled regex
64 stringname the name whose number is required
66 Returns: the number of the named parentheses, or a negative number
67 (PCRE_ERROR_NOSUBSTRING) if not found
71 pcre_get_stringnumber(const pcre
*code
, const char *stringname
)
78 if ((rc
= pcre_fullinfo(code
, NULL
, PCRE_INFO_NAMECOUNT
, &top
)) != 0)
80 if (top
<= 0) return PCRE_ERROR_NOSUBSTRING
;
82 if ((rc
= pcre_fullinfo(code
, NULL
, PCRE_INFO_NAMEENTRYSIZE
, &entrysize
)) != 0)
84 if ((rc
= pcre_fullinfo(code
, NULL
, PCRE_INFO_NAMETABLE
, &nametable
)) != 0)
90 int mid
= (top
+ bot
) / 2;
91 uschar
*entry
= nametable
+ entrysize
*mid
;
92 int c
= strcmp(stringname
, (char *)(entry
+ 2));
93 if (c
== 0) return (entry
[0] << 8) + entry
[1];
94 if (c
> 0) bot
= mid
+ 1; else top
= mid
;
97 return PCRE_ERROR_NOSUBSTRING
;
102 /*************************************************
103 * Find (multiple) entries for named string *
104 *************************************************/
106 /* This is used by the get_first_set() function below, as well as being
107 generally available. It is used when duplicated names are permitted.
110 code the compiled regex
111 stringname the name whose entries required
112 firstptr where to put the pointer to the first entry
113 lastptr where to put the pointer to the last entry
115 Returns: the length of each entry, or a negative number
116 (PCRE_ERROR_NOSUBSTRING) if not found
120 pcre_get_stringtable_entries(const pcre
*code
, const char *stringname
,
121 char **firstptr
, char **lastptr
)
126 uschar
*nametable
, *lastentry
;
128 if ((rc
= pcre_fullinfo(code
, NULL
, PCRE_INFO_NAMECOUNT
, &top
)) != 0)
130 if (top
<= 0) return PCRE_ERROR_NOSUBSTRING
;
132 if ((rc
= pcre_fullinfo(code
, NULL
, PCRE_INFO_NAMEENTRYSIZE
, &entrysize
)) != 0)
134 if ((rc
= pcre_fullinfo(code
, NULL
, PCRE_INFO_NAMETABLE
, &nametable
)) != 0)
137 lastentry
= nametable
+ entrysize
* (top
- 1);
141 int mid
= (top
+ bot
) / 2;
142 uschar
*entry
= nametable
+ entrysize
*mid
;
143 int c
= strcmp(stringname
, (char *)(entry
+ 2));
146 uschar
*first
= entry
;
147 uschar
*last
= entry
;
148 while (first
> nametable
)
150 if (strcmp(stringname
, (char *)(first
- entrysize
+ 2)) != 0) break;
153 while (last
< lastentry
)
155 if (strcmp(stringname
, (char *)(last
+ entrysize
+ 2)) != 0) break;
158 *firstptr
= (char *)first
;
159 *lastptr
= (char *)last
;
162 if (c
> 0) bot
= mid
+ 1; else top
= mid
;
165 return PCRE_ERROR_NOSUBSTRING
;
170 /*************************************************
171 * Find first set of multiple named strings *
172 *************************************************/
174 /* This function allows for duplicate names in the table of named substrings.
175 It returns the number of the first one that was set in a pattern match.
178 code the compiled regex
179 stringname the name of the capturing substring
180 ovector the vector of matched substrings
182 Returns: the number of the first that is set,
183 or the number of the last one if none are set,
184 or a negative number on error
188 get_first_set(const pcre
*code
, const char *stringname
, int *ovector
)
190 const real_pcre
*re
= (const real_pcre
*)code
;
194 if ((re
->options
& PCRE_DUPNAMES
) == 0 && (re
->flags
& PCRE_JCHANGED
) == 0)
195 return pcre_get_stringnumber(code
, stringname
);
196 entrysize
= pcre_get_stringtable_entries(code
, stringname
, &first
, &last
);
197 if (entrysize
<= 0) return entrysize
;
198 for (entry
= (uschar
*)first
; entry
<= (uschar
*)last
; entry
+= entrysize
)
200 int n
= (entry
[0] << 8) + entry
[1];
201 if (ovector
[n
*2] >= 0) return n
;
203 return (first
[0] << 8) + first
[1];
209 /*************************************************
210 * Copy captured string to given buffer *
211 *************************************************/
213 /* This function copies a single captured substring into a given buffer.
214 Note that we use memcpy() rather than strncpy() in case there are binary zeros
218 subject the subject string that was matched
219 ovector pointer to the offsets table
220 stringcount the number of substrings that were captured
221 (i.e. the yield of the pcre_exec call, unless
222 that was zero, in which case it should be 1/3
223 of the offset table size)
224 stringnumber the number of the required substring
225 buffer where to put the substring
226 size the size of the buffer
228 Returns: if successful:
229 the length of the copied string, not including the zero
230 that is put on the end; can be zero
232 PCRE_ERROR_NOMEMORY (-6) buffer too small
233 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
237 pcre_copy_substring(const char *subject
, int *ovector
, int stringcount
,
238 int stringnumber
, char *buffer
, int size
)
241 if (stringnumber
< 0 || stringnumber
>= stringcount
)
242 return PCRE_ERROR_NOSUBSTRING
;
244 yield
= ovector
[stringnumber
+1] - ovector
[stringnumber
];
245 if (size
< yield
+ 1) return PCRE_ERROR_NOMEMORY
;
246 memcpy(buffer
, subject
+ ovector
[stringnumber
], yield
);
253 /*************************************************
254 * Copy named captured string to given buffer *
255 *************************************************/
257 /* This function copies a single captured substring into a given buffer,
258 identifying it by name. If the regex permits duplicate names, the first
259 substring that is set is chosen.
262 code the compiled regex
263 subject the subject string that was matched
264 ovector pointer to the offsets table
265 stringcount the number of substrings that were captured
266 (i.e. the yield of the pcre_exec call, unless
267 that was zero, in which case it should be 1/3
268 of the offset table size)
269 stringname the name of the required substring
270 buffer where to put the substring
271 size the size of the buffer
273 Returns: if successful:
274 the length of the copied string, not including the zero
275 that is put on the end; can be zero
277 PCRE_ERROR_NOMEMORY (-6) buffer too small
278 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
282 pcre_copy_named_substring(const pcre
*code
, const char *subject
, int *ovector
,
283 int stringcount
, const char *stringname
, char *buffer
, int size
)
285 int n
= get_first_set(code
, stringname
, ovector
);
286 if (n
<= 0) return n
;
287 return pcre_copy_substring(subject
, ovector
, stringcount
, n
, buffer
, size
);
292 /*************************************************
293 * Copy all captured strings to new store *
294 *************************************************/
296 /* This function gets one chunk of store and builds a list of pointers and all
297 of the captured substrings in it. A NULL pointer is put on the end of the list.
300 subject the subject string that was matched
301 ovector pointer to the offsets table
302 stringcount the number of substrings that were captured
303 (i.e. the yield of the pcre_exec call, unless
304 that was zero, in which case it should be 1/3
305 of the offset table size)
306 listptr set to point to the list of pointers
308 Returns: if successful: 0
310 PCRE_ERROR_NOMEMORY (-6) failed to get store
314 pcre_get_substring_list(const char *subject
, int *ovector
, int stringcount
,
315 const char ***listptr
)
318 int size
= sizeof(char *);
319 int double_count
= stringcount
* 2;
323 for (i
= 0; i
< double_count
; i
+= 2)
324 size
+= sizeof(char *) + ovector
[i
+1] - ovector
[i
] + 1;
326 stringlist
= (char **)(pcre_malloc
)(size
);
327 if (stringlist
== NULL
) return PCRE_ERROR_NOMEMORY
;
329 *listptr
= (const char **)stringlist
;
330 p
= (char *)(stringlist
+ stringcount
+ 1);
332 for (i
= 0; i
< double_count
; i
+= 2)
334 int len
= ovector
[i
+1] - ovector
[i
];
335 memcpy(p
, subject
+ ovector
[i
], len
);
347 /*************************************************
348 * Free store obtained by get_substring_list *
349 *************************************************/
351 /* This function exists for the benefit of people calling PCRE from non-C
352 programs that can call its functions, but not free() or (pcre_free)() directly.
354 Argument: the result of a previous pcre_get_substring_list()
359 pcre_free_substring_list(const char **pointer
)
361 (pcre_free
)((void *)pointer
);
366 /*************************************************
367 * Copy captured string to new store *
368 *************************************************/
370 /* This function copies a single captured substring into a piece of new
374 subject the subject string that was matched
375 ovector pointer to the offsets table
376 stringcount the number of substrings that were captured
377 (i.e. the yield of the pcre_exec call, unless
378 that was zero, in which case it should be 1/3
379 of the offset table size)
380 stringnumber the number of the required substring
381 stringptr where to put a pointer to the substring
383 Returns: if successful:
384 the length of the string, not including the zero that
385 is put on the end; can be zero
387 PCRE_ERROR_NOMEMORY (-6) failed to get store
388 PCRE_ERROR_NOSUBSTRING (-7) substring not present
392 pcre_get_substring(const char *subject
, int *ovector
, int stringcount
,
393 int stringnumber
, const char **stringptr
)
397 if (stringnumber
< 0 || stringnumber
>= stringcount
)
398 return PCRE_ERROR_NOSUBSTRING
;
400 yield
= ovector
[stringnumber
+1] - ovector
[stringnumber
];
401 substring
= (char *)(pcre_malloc
)(yield
+ 1);
402 if (substring
== NULL
) return PCRE_ERROR_NOMEMORY
;
403 memcpy(substring
, subject
+ ovector
[stringnumber
], yield
);
404 substring
[yield
] = 0;
405 *stringptr
= substring
;
411 /*************************************************
412 * Copy named captured string to new store *
413 *************************************************/
415 /* This function copies a single captured substring, identified by name, into
416 new store. If the regex permits duplicate names, the first substring that is
420 code the compiled regex
421 subject the subject string that was matched
422 ovector pointer to the offsets table
423 stringcount the number of substrings that were captured
424 (i.e. the yield of the pcre_exec call, unless
425 that was zero, in which case it should be 1/3
426 of the offset table size)
427 stringname the name of the required substring
428 stringptr where to put the pointer
430 Returns: if successful:
431 the length of the copied string, not including the zero
432 that is put on the end; can be zero
434 PCRE_ERROR_NOMEMORY (-6) couldn't get memory
435 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
439 pcre_get_named_substring(const pcre
*code
, const char *subject
, int *ovector
,
440 int stringcount
, const char *stringname
, const char **stringptr
)
442 int n
= get_first_set(code
, stringname
, ovector
);
443 if (n
<= 0) return n
;
444 return pcre_get_substring(subject
, ovector
, stringcount
, n
, stringptr
);
450 /*************************************************
451 * Free store obtained by get_substring *
452 *************************************************/
454 /* This function exists for the benefit of people calling PCRE from non-C
455 programs that can call its functions, but not free() or (pcre_free)() directly.
457 Argument: the result of a previous pcre_get_substring()
462 pcre_free_substring(const char *pointer
)
464 (pcre_free
)((void *)pointer
);
467 /* End of pcre_get.c */