Commit | Line | Data |
---|---|---|
6bf342e1 | 1 | /* $Cambridge: exim/src/src/pcre/pcre_tables.c,v 1.4 2007/01/23 15:08:45 ph10 Exp $ */ |
8ac170f3 PH |
2 | |
3 | /************************************************* | |
4 | * Perl-Compatible Regular Expressions * | |
5 | *************************************************/ | |
6 | ||
7 | /* PCRE is a library of functions to support regular expressions whose syntax | |
8 | and semantics are as close as possible to those of the Perl 5 language. | |
9 | ||
10 | Written by Philip Hazel | |
aa41d2de | 11 | Copyright (c) 1997-2006 University of Cambridge |
8ac170f3 PH |
12 | |
13 | ----------------------------------------------------------------------------- | |
14 | Redistribution and use in source and binary forms, with or without | |
15 | modification, are permitted provided that the following conditions are met: | |
16 | ||
17 | * Redistributions of source code must retain the above copyright notice, | |
18 | this list of conditions and the following disclaimer. | |
19 | ||
20 | * Redistributions in binary form must reproduce the above copyright | |
21 | notice, this list of conditions and the following disclaimer in the | |
22 | documentation and/or other materials provided with the distribution. | |
23 | ||
24 | * Neither the name of the University of Cambridge nor the names of its | |
25 | contributors may be used to endorse or promote products derived from | |
26 | this software without specific prior written permission. | |
27 | ||
28 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
29 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
30 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
31 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
32 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
33 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
34 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
35 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
36 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
37 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
38 | POSSIBILITY OF SUCH DAMAGE. | |
39 | ----------------------------------------------------------------------------- | |
40 | */ | |
41 | ||
42 | ||
43 | /* This module contains some fixed tables that are used by more than one of the | |
aa41d2de PH |
44 | PCRE code modules. The tables are also #included by the pcretest program, which |
45 | uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name | |
46 | clashes with the library. */ | |
8ac170f3 PH |
47 | |
48 | ||
49 | #include "pcre_internal.h" | |
50 | ||
51 | ||
52 | /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that | |
aa41d2de | 53 | the definition is next to the definition of the opcodes in pcre_internal.h. */ |
8ac170f3 PH |
54 | |
55 | const uschar _pcre_OP_lengths[] = { OP_LENGTHS }; | |
56 | ||
57 | ||
58 | ||
59 | /************************************************* | |
60 | * Tables for UTF-8 support * | |
61 | *************************************************/ | |
62 | ||
63 | /* These are the breakpoints for different numbers of bytes in a UTF-8 | |
64 | character. */ | |
65 | ||
66 | const int _pcre_utf8_table1[] = | |
67 | { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; | |
68 | ||
69 | const int _pcre_utf8_table1_size = sizeof(_pcre_utf8_table1)/sizeof(int); | |
70 | ||
71 | /* These are the indicator bits and the mask for the data bits to set in the | |
72 | first byte of a character, indexed by the number of additional bytes. */ | |
73 | ||
74 | const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; | |
75 | const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; | |
76 | ||
6bf342e1 PH |
77 | /* Table of the number of extra bytes, indexed by the first byte masked with |
78 | 0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */ | |
8ac170f3 PH |
79 | |
80 | const uschar _pcre_utf8_table4[] = { | |
81 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
82 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
83 | 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |
84 | 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; | |
85 | ||
aa41d2de PH |
86 | /* This table translates Unicode property names into type and code values. It |
87 | is searched by binary chop, so must be in collating sequence of name. */ | |
8ac170f3 PH |
88 | |
89 | const ucp_type_table _pcre_utt[] = { | |
aa41d2de PH |
90 | { "Any", PT_ANY, 0 }, |
91 | { "Arabic", PT_SC, ucp_Arabic }, | |
92 | { "Armenian", PT_SC, ucp_Armenian }, | |
6bf342e1 | 93 | { "Balinese", PT_SC, ucp_Balinese }, |
aa41d2de PH |
94 | { "Bengali", PT_SC, ucp_Bengali }, |
95 | { "Bopomofo", PT_SC, ucp_Bopomofo }, | |
96 | { "Braille", PT_SC, ucp_Braille }, | |
97 | { "Buginese", PT_SC, ucp_Buginese }, | |
98 | { "Buhid", PT_SC, ucp_Buhid }, | |
99 | { "C", PT_GC, ucp_C }, | |
100 | { "Canadian_Aboriginal", PT_SC, ucp_Canadian_Aboriginal }, | |
101 | { "Cc", PT_PC, ucp_Cc }, | |
102 | { "Cf", PT_PC, ucp_Cf }, | |
103 | { "Cherokee", PT_SC, ucp_Cherokee }, | |
104 | { "Cn", PT_PC, ucp_Cn }, | |
105 | { "Co", PT_PC, ucp_Co }, | |
106 | { "Common", PT_SC, ucp_Common }, | |
107 | { "Coptic", PT_SC, ucp_Coptic }, | |
108 | { "Cs", PT_PC, ucp_Cs }, | |
6bf342e1 | 109 | { "Cuneiform", PT_SC, ucp_Cuneiform }, |
aa41d2de PH |
110 | { "Cypriot", PT_SC, ucp_Cypriot }, |
111 | { "Cyrillic", PT_SC, ucp_Cyrillic }, | |
112 | { "Deseret", PT_SC, ucp_Deseret }, | |
113 | { "Devanagari", PT_SC, ucp_Devanagari }, | |
114 | { "Ethiopic", PT_SC, ucp_Ethiopic }, | |
115 | { "Georgian", PT_SC, ucp_Georgian }, | |
116 | { "Glagolitic", PT_SC, ucp_Glagolitic }, | |
117 | { "Gothic", PT_SC, ucp_Gothic }, | |
118 | { "Greek", PT_SC, ucp_Greek }, | |
119 | { "Gujarati", PT_SC, ucp_Gujarati }, | |
120 | { "Gurmukhi", PT_SC, ucp_Gurmukhi }, | |
121 | { "Han", PT_SC, ucp_Han }, | |
122 | { "Hangul", PT_SC, ucp_Hangul }, | |
123 | { "Hanunoo", PT_SC, ucp_Hanunoo }, | |
124 | { "Hebrew", PT_SC, ucp_Hebrew }, | |
125 | { "Hiragana", PT_SC, ucp_Hiragana }, | |
126 | { "Inherited", PT_SC, ucp_Inherited }, | |
127 | { "Kannada", PT_SC, ucp_Kannada }, | |
128 | { "Katakana", PT_SC, ucp_Katakana }, | |
129 | { "Kharoshthi", PT_SC, ucp_Kharoshthi }, | |
130 | { "Khmer", PT_SC, ucp_Khmer }, | |
131 | { "L", PT_GC, ucp_L }, | |
132 | { "L&", PT_LAMP, 0 }, | |
133 | { "Lao", PT_SC, ucp_Lao }, | |
134 | { "Latin", PT_SC, ucp_Latin }, | |
135 | { "Limbu", PT_SC, ucp_Limbu }, | |
136 | { "Linear_B", PT_SC, ucp_Linear_B }, | |
137 | { "Ll", PT_PC, ucp_Ll }, | |
138 | { "Lm", PT_PC, ucp_Lm }, | |
139 | { "Lo", PT_PC, ucp_Lo }, | |
140 | { "Lt", PT_PC, ucp_Lt }, | |
141 | { "Lu", PT_PC, ucp_Lu }, | |
142 | { "M", PT_GC, ucp_M }, | |
143 | { "Malayalam", PT_SC, ucp_Malayalam }, | |
144 | { "Mc", PT_PC, ucp_Mc }, | |
145 | { "Me", PT_PC, ucp_Me }, | |
146 | { "Mn", PT_PC, ucp_Mn }, | |
147 | { "Mongolian", PT_SC, ucp_Mongolian }, | |
148 | { "Myanmar", PT_SC, ucp_Myanmar }, | |
149 | { "N", PT_GC, ucp_N }, | |
150 | { "Nd", PT_PC, ucp_Nd }, | |
151 | { "New_Tai_Lue", PT_SC, ucp_New_Tai_Lue }, | |
6bf342e1 | 152 | { "Nko", PT_SC, ucp_Nko }, |
aa41d2de PH |
153 | { "Nl", PT_PC, ucp_Nl }, |
154 | { "No", PT_PC, ucp_No }, | |
155 | { "Ogham", PT_SC, ucp_Ogham }, | |
156 | { "Old_Italic", PT_SC, ucp_Old_Italic }, | |
157 | { "Old_Persian", PT_SC, ucp_Old_Persian }, | |
158 | { "Oriya", PT_SC, ucp_Oriya }, | |
159 | { "Osmanya", PT_SC, ucp_Osmanya }, | |
160 | { "P", PT_GC, ucp_P }, | |
161 | { "Pc", PT_PC, ucp_Pc }, | |
162 | { "Pd", PT_PC, ucp_Pd }, | |
163 | { "Pe", PT_PC, ucp_Pe }, | |
164 | { "Pf", PT_PC, ucp_Pf }, | |
6bf342e1 PH |
165 | { "Phags_Pa", PT_SC, ucp_Phags_Pa }, |
166 | { "Phoenician", PT_SC, ucp_Phoenician }, | |
aa41d2de PH |
167 | { "Pi", PT_PC, ucp_Pi }, |
168 | { "Po", PT_PC, ucp_Po }, | |
169 | { "Ps", PT_PC, ucp_Ps }, | |
170 | { "Runic", PT_SC, ucp_Runic }, | |
171 | { "S", PT_GC, ucp_S }, | |
172 | { "Sc", PT_PC, ucp_Sc }, | |
173 | { "Shavian", PT_SC, ucp_Shavian }, | |
174 | { "Sinhala", PT_SC, ucp_Sinhala }, | |
175 | { "Sk", PT_PC, ucp_Sk }, | |
176 | { "Sm", PT_PC, ucp_Sm }, | |
177 | { "So", PT_PC, ucp_So }, | |
178 | { "Syloti_Nagri", PT_SC, ucp_Syloti_Nagri }, | |
179 | { "Syriac", PT_SC, ucp_Syriac }, | |
180 | { "Tagalog", PT_SC, ucp_Tagalog }, | |
181 | { "Tagbanwa", PT_SC, ucp_Tagbanwa }, | |
182 | { "Tai_Le", PT_SC, ucp_Tai_Le }, | |
183 | { "Tamil", PT_SC, ucp_Tamil }, | |
184 | { "Telugu", PT_SC, ucp_Telugu }, | |
185 | { "Thaana", PT_SC, ucp_Thaana }, | |
186 | { "Thai", PT_SC, ucp_Thai }, | |
187 | { "Tibetan", PT_SC, ucp_Tibetan }, | |
188 | { "Tifinagh", PT_SC, ucp_Tifinagh }, | |
189 | { "Ugaritic", PT_SC, ucp_Ugaritic }, | |
190 | { "Yi", PT_SC, ucp_Yi }, | |
191 | { "Z", PT_GC, ucp_Z }, | |
192 | { "Zl", PT_PC, ucp_Zl }, | |
193 | { "Zp", PT_PC, ucp_Zp }, | |
194 | { "Zs", PT_PC, ucp_Zs } | |
8ac170f3 PH |
195 | }; |
196 | ||
197 | const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table); | |
198 | ||
199 | /* End of pcre_tables.c */ |