| 1 | /* $Cambridge: exim/src/src/pcre/pcre_tables.c,v 1.1 2005/06/15 08:57:10 ph10 Exp $ */ |
| 2 | |
| 3 | /************************************************* |
| 4 | * Perl-Compatible Regular Expressions * |
| 5 | *************************************************/ |
| 6 | |
| 7 | /* PCRE is a library of functions to support regular expressions whose syntax |
| 8 | and semantics are as close as possible to those of the Perl 5 language. |
| 9 | |
| 10 | Written by Philip Hazel |
| 11 | Copyright (c) 1997-2005 University of Cambridge |
| 12 | |
| 13 | ----------------------------------------------------------------------------- |
| 14 | Redistribution and use in source and binary forms, with or without |
| 15 | modification, are permitted provided that the following conditions are met: |
| 16 | |
| 17 | * Redistributions of source code must retain the above copyright notice, |
| 18 | this list of conditions and the following disclaimer. |
| 19 | |
| 20 | * Redistributions in binary form must reproduce the above copyright |
| 21 | notice, this list of conditions and the following disclaimer in the |
| 22 | documentation and/or other materials provided with the distribution. |
| 23 | |
| 24 | * Neither the name of the University of Cambridge nor the names of its |
| 25 | contributors may be used to endorse or promote products derived from |
| 26 | this software without specific prior written permission. |
| 27 | |
| 28 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 29 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 30 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 31 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 32 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 33 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 34 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 35 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 36 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 37 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 38 | POSSIBILITY OF SUCH DAMAGE. |
| 39 | ----------------------------------------------------------------------------- |
| 40 | */ |
| 41 | |
| 42 | |
| 43 | /* This module contains some fixed tables that are used by more than one of the |
| 44 | PCRE code modules. */ |
| 45 | |
| 46 | |
| 47 | #include "pcre_internal.h" |
| 48 | |
| 49 | |
| 50 | /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that |
| 51 | the definition is next to the definition of the opcodes in internal.h. */ |
| 52 | |
| 53 | const uschar _pcre_OP_lengths[] = { OP_LENGTHS }; |
| 54 | |
| 55 | |
| 56 | |
| 57 | /************************************************* |
| 58 | * Tables for UTF-8 support * |
| 59 | *************************************************/ |
| 60 | |
| 61 | /* These are the breakpoints for different numbers of bytes in a UTF-8 |
| 62 | character. */ |
| 63 | |
| 64 | const int _pcre_utf8_table1[] = |
| 65 | { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; |
| 66 | |
| 67 | const int _pcre_utf8_table1_size = sizeof(_pcre_utf8_table1)/sizeof(int); |
| 68 | |
| 69 | /* These are the indicator bits and the mask for the data bits to set in the |
| 70 | first byte of a character, indexed by the number of additional bytes. */ |
| 71 | |
| 72 | const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; |
| 73 | const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; |
| 74 | |
| 75 | /* Table of the number of extra characters, indexed by the first character |
| 76 | masked with 0x3f. The highest number for a valid UTF-8 character is in fact |
| 77 | 0x3d. */ |
| 78 | |
| 79 | const uschar _pcre_utf8_table4[] = { |
| 80 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 81 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 82 | 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
| 83 | 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; |
| 84 | |
| 85 | /* This table translates Unicode property names into code values for the |
| 86 | ucp_findchar() function. It is used by pcretest as well as by the library |
| 87 | functions. */ |
| 88 | |
| 89 | const ucp_type_table _pcre_utt[] = { |
| 90 | { "C", 128 + ucp_C }, |
| 91 | { "Cc", ucp_Cc }, |
| 92 | { "Cf", ucp_Cf }, |
| 93 | { "Cn", ucp_Cn }, |
| 94 | { "Co", ucp_Co }, |
| 95 | { "Cs", ucp_Cs }, |
| 96 | { "L", 128 + ucp_L }, |
| 97 | { "Ll", ucp_Ll }, |
| 98 | { "Lm", ucp_Lm }, |
| 99 | { "Lo", ucp_Lo }, |
| 100 | { "Lt", ucp_Lt }, |
| 101 | { "Lu", ucp_Lu }, |
| 102 | { "M", 128 + ucp_M }, |
| 103 | { "Mc", ucp_Mc }, |
| 104 | { "Me", ucp_Me }, |
| 105 | { "Mn", ucp_Mn }, |
| 106 | { "N", 128 + ucp_N }, |
| 107 | { "Nd", ucp_Nd }, |
| 108 | { "Nl", ucp_Nl }, |
| 109 | { "No", ucp_No }, |
| 110 | { "P", 128 + ucp_P }, |
| 111 | { "Pc", ucp_Pc }, |
| 112 | { "Pd", ucp_Pd }, |
| 113 | { "Pe", ucp_Pe }, |
| 114 | { "Pf", ucp_Pf }, |
| 115 | { "Pi", ucp_Pi }, |
| 116 | { "Po", ucp_Po }, |
| 117 | { "Ps", ucp_Ps }, |
| 118 | { "S", 128 + ucp_S }, |
| 119 | { "Sc", ucp_Sc }, |
| 120 | { "Sk", ucp_Sk }, |
| 121 | { "Sm", ucp_Sm }, |
| 122 | { "So", ucp_So }, |
| 123 | { "Z", 128 + ucp_Z }, |
| 124 | { "Zl", ucp_Zl }, |
| 125 | { "Zp", ucp_Zp }, |
| 126 | { "Zs", ucp_Zs } |
| 127 | }; |
| 128 | |
| 129 | const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table); |
| 130 | |
| 131 | /* End of pcre_tables.c */ |