Commit | Line | Data |
---|---|---|
47db1125 | 1 | /* $Cambridge: exim/src/src/pcre/pcre_newline.c,v 1.3 2007/11/12 13:02:20 nm4 Exp $ */ |
6bf342e1 PH |
2 | |
3 | /************************************************* | |
4 | * Perl-Compatible Regular Expressions * | |
5 | *************************************************/ | |
6 | ||
7 | /* PCRE is a library of functions to support regular expressions whose syntax | |
8 | and semantics are as close as possible to those of the Perl 5 language. | |
9 | ||
10 | Written by Philip Hazel | |
64f2600a | 11 | Copyright (c) 1997-2007 University of Cambridge |
6bf342e1 PH |
12 | |
13 | ----------------------------------------------------------------------------- | |
14 | Redistribution and use in source and binary forms, with or without | |
15 | modification, are permitted provided that the following conditions are met: | |
16 | ||
17 | * Redistributions of source code must retain the above copyright notice, | |
18 | this list of conditions and the following disclaimer. | |
19 | ||
20 | * Redistributions in binary form must reproduce the above copyright | |
21 | notice, this list of conditions and the following disclaimer in the | |
22 | documentation and/or other materials provided with the distribution. | |
23 | ||
24 | * Neither the name of the University of Cambridge nor the names of its | |
25 | contributors may be used to endorse or promote products derived from | |
26 | this software without specific prior written permission. | |
27 | ||
28 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
29 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
30 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
31 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
32 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
33 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
34 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
35 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
36 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
37 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
38 | POSSIBILITY OF SUCH DAMAGE. | |
39 | ----------------------------------------------------------------------------- | |
40 | */ | |
41 | ||
42 | ||
43 | /* This module contains internal functions for testing newlines when more than | |
44 | one kind of newline is to be recognized. When a newline is found, its length is | |
45 | returned. In principle, we could implement several newline "types", each | |
46 | referring to a different set of newline characters. At present, PCRE supports | |
64f2600a PH |
47 | only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF, |
48 | and NLTYPE_ANY. The full list of Unicode newline characters is taken from | |
6bf342e1 PH |
49 | http://unicode.org/unicode/reports/tr18/. */ |
50 | ||
51 | ||
47db1125 NM |
52 | #ifdef HAVE_CONFIG_H |
53 | #include "config.h" | |
54 | #endif | |
55 | ||
6bf342e1 PH |
56 | #include "pcre_internal.h" |
57 | ||
58 | ||
59 | ||
60 | /************************************************* | |
61 | * Check for newline at given position * | |
62 | *************************************************/ | |
63 | ||
64 | /* It is guaranteed that the initial value of ptr is less than the end of the | |
65 | string that is being processed. | |
66 | ||
67 | Arguments: | |
68 | ptr pointer to possible newline | |
64f2600a | 69 | type the newline type |
6bf342e1 PH |
70 | endptr pointer to the end of the string |
71 | lenptr where to return the length | |
72 | utf8 TRUE if in utf8 mode | |
73 | ||
74 | Returns: TRUE or FALSE | |
75 | */ | |
76 | ||
77 | BOOL | |
64f2600a PH |
78 | _pcre_is_newline(const uschar *ptr, int type, const uschar *endptr, |
79 | int *lenptr, BOOL utf8) | |
6bf342e1 PH |
80 | { |
81 | int c; | |
82 | if (utf8) { GETCHAR(c, ptr); } else c = *ptr; | |
64f2600a PH |
83 | |
84 | if (type == NLTYPE_ANYCRLF) switch(c) | |
85 | { | |
86 | case 0x000a: *lenptr = 1; return TRUE; /* LF */ | |
87 | case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1; | |
88 | return TRUE; /* CR */ | |
89 | default: return FALSE; | |
90 | } | |
91 | ||
92 | /* NLTYPE_ANY */ | |
93 | ||
94 | else switch(c) | |
6bf342e1 PH |
95 | { |
96 | case 0x000a: /* LF */ | |
97 | case 0x000b: /* VT */ | |
98 | case 0x000c: *lenptr = 1; return TRUE; /* FF */ | |
99 | case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1; | |
100 | return TRUE; /* CR */ | |
101 | case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */ | |
102 | case 0x2028: /* LS */ | |
103 | case 0x2029: *lenptr = 3; return TRUE; /* PS */ | |
104 | default: return FALSE; | |
105 | } | |
106 | } | |
107 | ||
108 | ||
109 | ||
110 | /************************************************* | |
111 | * Check for newline at previous position * | |
112 | *************************************************/ | |
113 | ||
114 | /* It is guaranteed that the initial value of ptr is greater than the start of | |
115 | the string that is being processed. | |
116 | ||
117 | Arguments: | |
118 | ptr pointer to possible newline | |
64f2600a | 119 | type the newline type |
6bf342e1 PH |
120 | startptr pointer to the start of the string |
121 | lenptr where to return the length | |
122 | utf8 TRUE if in utf8 mode | |
123 | ||
124 | Returns: TRUE or FALSE | |
125 | */ | |
126 | ||
127 | BOOL | |
64f2600a PH |
128 | _pcre_was_newline(const uschar *ptr, int type, const uschar *startptr, |
129 | int *lenptr, BOOL utf8) | |
6bf342e1 PH |
130 | { |
131 | int c; | |
132 | ptr--; | |
47db1125 | 133 | #ifdef SUPPORT_UTF8 |
6bf342e1 PH |
134 | if (utf8) |
135 | { | |
136 | BACKCHAR(ptr); | |
137 | GETCHAR(c, ptr); | |
138 | } | |
139 | else c = *ptr; | |
47db1125 NM |
140 | #else /* no UTF-8 support */ |
141 | c = *ptr; | |
142 | #endif /* SUPPORT_UTF8 */ | |
64f2600a PH |
143 | |
144 | if (type == NLTYPE_ANYCRLF) switch(c) | |
145 | { | |
146 | case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1; | |
147 | return TRUE; /* LF */ | |
148 | case 0x000d: *lenptr = 1; return TRUE; /* CR */ | |
149 | default: return FALSE; | |
150 | } | |
151 | ||
152 | else switch(c) | |
6bf342e1 PH |
153 | { |
154 | case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1; | |
155 | return TRUE; /* LF */ | |
156 | case 0x000b: /* VT */ | |
157 | case 0x000c: /* FF */ | |
158 | case 0x000d: *lenptr = 1; return TRUE; /* CR */ | |
159 | case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */ | |
160 | case 0x2028: /* LS */ | |
161 | case 0x2029: *lenptr = 3; return TRUE; /* PS */ | |
162 | default: return FALSE; | |
163 | } | |
164 | } | |
165 | ||
166 | /* End of pcre_newline.c */ |