Commit | Line | Data |
---|---|---|
6a488035 TO |
1 | <?xml version="1.0" ?> |
2 | <!-- | |
3 | Licensed to the Apache Software Foundation (ASF) under one or more | |
4 | contributor license agreements. See the NOTICE file distributed with | |
5 | this work for additional information regarding copyright ownership. | |
6 | The ASF licenses this file to You under the Apache License, Version 2.0 | |
7 | (the "License"); you may not use this file except in compliance with | |
8 | the License. You may obtain a copy of the License at | |
9 | ||
10 | http://www.apache.org/licenses/LICENSE-2.0 | |
11 | ||
12 | Unless required by applicable law or agreed to in writing, software | |
13 | distributed under the License is distributed on an "AS IS" BASIS, | |
14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
15 | See the License for the specific language governing permissions and | |
16 | limitations under the License. | |
17 | --> | |
18 | ||
19 | <!-- This is the Solr schema file. This file should be named "schema.xml" and | |
20 | should be in the conf directory under the solr home | |
31037a42 | 21 | (i.e. ./solr/conf/schema.xml by default) |
6a488035 TO |
22 | or located where the classloader for the Solr webapp can find it. |
23 | ||
24 | For more information, on how to customize this file, please see | |
25 | http://wiki.apache.org/solr/SchemaXml | |
26 | --> | |
27 | ||
28 | <schema name="civicrm_contact" version="1.1"> | |
29 | <!-- attribute "name" is the name of this schema and is only used for display purposes. | |
30 | Applications should change this to reflect the nature of the search collection. | |
31 | version="1.1" is Solr's version number for the schema syntax and semantics. It should | |
32 | not normally be changed by applications. | |
33 | 1.0: multiValued attribute did not exist, all fields are multiValued by nature | |
34 | 1.1: multiValued attribute introduced, false by default --> | |
35 | ||
36 | <types> | |
37 | <!-- field type definitions. The "name" attribute is | |
38 | just a label to be used by field definitions. The "class" | |
39 | attribute and any other attributes determine the real | |
40 | behavior of the fieldtype. | |
41 | Class names starting with "solr" refer to java classes in the | |
42 | org.apache.solr.analysis package. | |
43 | --> | |
44 | ||
31037a42 | 45 | <!-- The StrField type is not analyzed, but indexed/stored verbatim. |
6a488035 TO |
46 | - StrField and TextField support an optional compressThreshold which |
47 | limits compression (if enabled in the derived fields) to values which | |
48 | exceed a certain size (in characters). | |
49 | --> | |
50 | <fieldtype name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/> | |
51 | ||
52 | <!-- boolean type: "true" or "false" --> | |
53 | <fieldtype name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/> | |
54 | ||
55 | <!-- The optional sortMissingLast and sortMissingFirst attributes are | |
56 | currently supported on types that are sorted internally as strings. | |
57 | - If sortMissingLast="true", then a sort on this field will cause documents | |
58 | without the field to come after documents with the field, | |
59 | regardless of the requested sort order (asc or desc). | |
60 | - If sortMissingFirst="true", then a sort on this field will cause documents | |
61 | without the field to come before documents with the field, | |
62 | regardless of the requested sort order. | |
63 | - If sortMissingLast="false" and sortMissingFirst="false" (the default), | |
64 | then default lucene sorting will be used which places docs without the | |
65 | field first in an ascending sort and last in a descending sort. | |
31037a42 | 66 | --> |
6a488035 TO |
67 | |
68 | ||
69 | <!-- numeric field types that store and index the text | |
70 | value verbatim (and hence don't support range queries, since the | |
71 | lexicographic ordering isn't equal to the numeric ordering) --> | |
72 | <fieldtype name="integer" class="solr.IntField" omitNorms="true"/> | |
73 | <fieldtype name="long" class="solr.LongField" omitNorms="true"/> | |
74 | <fieldtype name="float" class="solr.FloatField" omitNorms="true"/> | |
75 | <fieldtype name="double" class="solr.DoubleField" omitNorms="true"/> | |
76 | ||
77 | ||
78 | <!-- Numeric field types that manipulate the value into | |
79 | a string value that isn't human-readable in its internal form, | |
80 | but with a lexicographic ordering the same as the numeric ordering, | |
81 | so that range queries work correctly. --> | |
82 | <fieldtype name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/> | |
83 | <fieldtype name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/> | |
84 | <fieldtype name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/> | |
85 | <fieldtype name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/> | |
86 | ||
87 | ||
88 | <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and | |
89 | is a more restricted form of the canonical representation of dateTime | |
31037a42 | 90 | http://www.w3.org/TR/xmlschema-2/#dateTime |
6a488035 TO |
91 | The trailing "Z" designates UTC time and is mandatory. |
92 | Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z | |
93 | All other components are mandatory. | |
94 | ||
95 | Expressions can also be used to denote calculations that should be | |
96 | performed relative to "NOW" to determine the value, ie... | |
97 | ||
98 | NOW/HOUR | |
99 | ... Round to the start of the current hour | |
100 | NOW-1DAY | |
101 | ... Exactly 1 day prior to now | |
102 | NOW/DAY+6MONTHS+3DAYS | |
103 | ... 6 months and 3 days in the future from the start of | |
104 | the current day | |
31037a42 | 105 | |
6a488035 TO |
106 | Consult the DateField javadocs for more information. |
107 | --> | |
108 | <fieldtype name="date" class="solr.DateField" sortMissingLast="true" omitNorms="true"/> | |
109 | ||
110 | <!-- solr.TextField allows the specification of custom text analyzers | |
111 | specified as a tokenizer and a list of token filters. Different | |
112 | analyzers may be specified for indexing and querying. | |
113 | ||
114 | The optional positionIncrementGap puts space between multiple fields of | |
115 | this type on the same document, with the purpose of preventing false phrase | |
116 | matching across fields. | |
117 | ||
118 | For more info on customizing your analyzer chain, please see | |
119 | http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters | |
120 | --> | |
121 | ||
122 | <!-- One can also specify an existing Analyzer class that has a | |
123 | default constructor via the class attribute on the analyzer element | |
124 | <fieldtype name="text_greek" class="solr.TextField"> | |
125 | <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/> | |
126 | </fieldType> | |
127 | --> | |
128 | ||
129 | <!-- A text field that only splits on whitespace for exact matching of words --> | |
130 | <fieldtype name="text_ws" class="solr.TextField" positionIncrementGap="100"> | |
131 | <analyzer> | |
132 | <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
133 | </analyzer> | |
134 | </fieldtype> | |
135 | ||
136 | <!-- A text field that uses WordDelimiterFilter to enable splitting and matching of | |
137 | words on case-change, alpha numeric boundaries, and non-alphanumeric chars, | |
138 | so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi". | |
139 | Synonyms and stopwords are customized by external files, and stemming is enabled. | |
140 | Duplicate tokens at the same position (which may result from Stemmed Synonyms or | |
141 | WordDelim parts) are removed. | |
142 | --> | |
143 | <fieldtype name="text" class="solr.TextField" positionIncrementGap="100"> | |
144 | <analyzer type="index"> | |
145 | <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
146 | <!-- in this example, we will only use synonyms at query time | |
147 | <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> | |
148 | --> | |
149 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> | |
150 | <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/> | |
151 | <filter class="solr.LowerCaseFilterFactory"/> | |
152 | <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/> | |
153 | <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |
154 | </analyzer> | |
155 | <analyzer type="query"> | |
156 | <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
157 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> | |
158 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> | |
159 | <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/> | |
160 | <filter class="solr.LowerCaseFilterFactory"/> | |
161 | <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/> | |
162 | <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |
163 | </analyzer> | |
164 | </fieldtype> | |
165 | ||
166 | ||
167 | <!-- Less flexible matching, but less false matches. Probably not ideal for product names, | |
168 | but may be good for SKUs. Can insert dashes in the wrong place and still match. --> | |
169 | <fieldtype name="textTight" class="solr.TextField" positionIncrementGap="100" > | |
170 | <analyzer> | |
171 | <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
172 | <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> | |
173 | <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> | |
174 | <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> | |
175 | <filter class="solr.LowerCaseFilterFactory"/> | |
176 | <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/> | |
177 | <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |
178 | </analyzer> | |
179 | </fieldtype> | |
180 | ||
181 | </types> | |
182 | ||
183 | ||
184 | <fields> | |
185 | <!-- Valid attributes for fields: | |
186 | name: mandatory - the name for the field | |
187 | type: mandatory - the name of a previously defined type from the <types> section | |
188 | indexed: true if this field should be indexed (searchable or sortable) | |
189 | stored: true if this field should be retrievable | |
190 | compressed: [false] if this field should be stored using gzip compression | |
191 | (this will only apply if the field type is compressable; among | |
192 | the standard field types, only TextField and StrField are) | |
193 | multiValued: true if this field may contain multiple values per document | |
194 | omitNorms: (expert) set to true to omit the norms associated with | |
195 | this field (this disables length normalization and index-time | |
196 | boosting for the field, and saves some memory). Only full-text | |
197 | fields or fields that need an index-time boost need norms. | |
198 | --> | |
199 | ||
200 | <field name="id" type="sint" indexed="true" stored="true" /> | |
201 | ||
202 | <!-- fields from civicrm_contact --> | |
203 | <field name="sort_name" type="text" indexed="true" stored="true" /> | |
204 | <field name="display_name" type="text" indexed="true" stored="true" /> | |
205 | <field name="nick_name" type="text" indexed="true" stored="false"/> | |
206 | <field name="legal_identifier" type="text" indexed="true" stored="false"/> | |
207 | <field name="external_identifier" type="text" indexed="true" stored="false"/> | |
208 | <field name="contact_source" type="text" indexed="true" stored="false"/> | |
209 | <field name="contact_type" type="text" indexed="true" stored="false"/> | |
210 | ||
211 | <!-- fields from civicrm_individual --> | |
212 | <field name="first_name" type="text" indexed="true" stored="false"/> | |
213 | <field name="last_name" type="text" indexed="true" stored="false"/> | |
214 | <field name="middle_name" type="text" indexed="true" stored="false"/> | |
215 | <field name="job_title" type="text" indexed="true" stored="false"/> | |
216 | <field name="birth_date" type="date" indexed="true" stored="false"/> | |
217 | <field name="deceased_date" type="date" indexed="true" stored="false"/> | |
218 | <field name="gender" type="text" indexed="true" stored="false"/> | |
219 | ||
220 | <!-- fields from civicrm_household --> | |
221 | <field name="household_name" type="text" indexed="true" stored="false"/> | |
222 | ||
223 | <!-- fields from civicrm_organization --> | |
224 | <field name="organization_name" type="text" indexed="true" stored="false"/> | |
225 | <field name="legal_name" type="text" indexed="true" stored="false"/> | |
226 | <field name="sic_code" type="text" indexed="true" stored="false"/> | |
227 | ||
228 | <!-- fields from civicrm_location, civicrm_address, civicrm_phone, civicrm_email --> | |
229 | <field name="location_name" type="text" indexed="true" stored="false" multiValued="true"/> | |
230 | <field name="street_address" type="text" indexed="true" stored="false" multiValued="true"/> | |
231 | <field name="supplemental_address_1" type="text" indexed="true" stored="false" multiValued="true"/> | |
232 | <field name="supplemental_address_2" type="text" indexed="true" stored="false" multiValued="true"/> | |
233 | <field name="city" type="text" indexed="true" stored="false" multiValued="true"/> | |
234 | ||
235 | <field name="county" type="text" indexed="true" stored="false" multiValued="true"/> | |
236 | <field name="state" type="text" indexed="true" stored="false" multiValued="true"/> | |
237 | <field name="postal_code" type="text" indexed="true" stored="false" multiValued="true"/> | |
238 | <field name="country" type="text" indexed="true" stored="false" multiValued="true"/> | |
239 | <field name="location_note" type="text" indexed="true" stored="false" multiValued="true"/> | |
240 | <field name="email" type="text" indexed="true" stored="false" multiValued="true"/> | |
241 | <field name="phone" type="text" indexed="true" stored="false" multiValued="true"/> | |
242 | <field name="im" type="text" indexed="true" stored="false" multiValued="true"/> | |
243 | ||
244 | <field name="note_subject" type="text" indexed="true" stored="false" multiValued="true"/> | |
245 | <field name="note_body" type="text" indexed="true" stored="false" multiValued="true"/> | |
246 | ||
247 | <!-- catchall field, containing all other searchable text fields (implemented | |
248 | via copyField further on in this schema --> | |
249 | <field name="text" type="text" indexed="true" stored="false" multiValued="true"/> | |
250 | <field name="address" type="text" indexed="true" stored="false" multiValued="true"/> | |
251 | <field name="note" type="text" indexed="true" stored="false" multiValued="true"/> | |
252 | ||
253 | <!-- Dynamic field definitions. If a field name is not found, dynamicFields | |
254 | will be used if the name matches any of the patterns. | |
255 | RESTRICTION: the glob-like pattern in the name attribute must have | |
256 | a "*" only at the start or the end. | |
257 | EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i) | |
258 | Longer patterns will be matched first. if equal size patterns | |
259 | both match, the first appearing in the schema will be used. --> | |
260 | <dynamicField name="*_i" type="sint" indexed="true" stored="true"/> | |
261 | <dynamicField name="*_s" type="string" indexed="true" stored="true"/> | |
262 | <dynamicField name="*_l" type="slong" indexed="true" stored="true"/> | |
263 | <dynamicField name="*_t" type="text" indexed="true" stored="true"/> | |
264 | <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/> | |
265 | <dynamicField name="*_f" type="sfloat" indexed="true" stored="true"/> | |
266 | <dynamicField name="*_d" type="sdouble" indexed="true" stored="true"/> | |
267 | <dynamicField name="*_dt" type="date" indexed="true" stored="true"/> | |
268 | </fields> | |
269 | ||
270 | <!-- field to use to determine and enforce document uniqueness. --> | |
271 | <uniqueKey>id</uniqueKey> | |
272 | ||
273 | <!-- field for the QueryParser to use when an explicit fieldname is absent --> | |
274 | <defaultSearchField>text</defaultSearchField> | |
275 | ||
276 | <!-- SolrQueryParser configuration: defaultOperator="AND|OR" --> | |
277 | <solrQueryParser defaultOperator="OR"/> | |
278 | ||
279 | <!-- copyField commands copy one field to another at the time a document | |
280 | is added to the index. It's used either to index the same field differently, | |
281 | or to add multiple fields to the same field for easier/faster searching. --> | |
282 | <copyField source="note_subject" dest="note"/> | |
283 | <copyField source="note_body" dest="note"/> | |
284 | ||
285 | <copyField source="street_address" dest="address"/> | |
286 | <copyField source="supplemental_address_1" dest="address"/> | |
287 | <copyField source="supplemental_address_2" dest="address"/> | |
288 | <copyField source="city" dest="address"/> | |
289 | <copyField source="county" dest="address"/> | |
290 | <copyField source="state" dest="address"/> | |
291 | <copyField source="postal_code" dest="address"/> | |
292 | <copyField source="country" dest="address"/> | |
293 | <copyField source="location_note" dest="address"/> | |
294 | <copyField source="email" dest="address"/> | |
295 | <copyField source="phone" dest="address"/> | |
296 | <copyField source="im" dest="address"/> | |
297 | ||
298 | <copyField source="sort_name" dest="text"/> | |
299 | <copyField source="nick_name" dest="text"/> | |
300 | <copyField source="legal_identifier" dest="text"/> | |
301 | <copyField source="external_identifier" dest="text"/> | |
302 | <copyField source="contact_source" dest="text"/> | |
303 | <copyField source="job_title" dest="text"/> | |
304 | <copyField source="household_name" dest="text"/> | |
305 | <copyField source="legal_name" dest="text"/> | |
306 | <copyField source="sic_code" dest="text"/> | |
307 | <copyField source="note_subject" dest="text"/> | |
308 | <copyField source="note_body" dest="text"/> | |
309 | <copyField source="street_address" dest="text"/> | |
310 | <copyField source="supplemental_address_1" dest="text"/> | |
311 | <copyField source="supplemental_address_2" dest="text"/> | |
312 | <copyField source="city" dest="text"/> | |
313 | <copyField source="county" dest="text"/> | |
314 | <copyField source="state" dest="text"/> | |
315 | <copyField source="postal_code" dest="text"/> | |
316 | <copyField source="country" dest="text"/> | |
317 | <copyField source="location_note" dest="text"/> | |
318 | <copyField source="email" dest="text"/> | |
319 | <copyField source="phone" dest="text"/> | |
320 | <copyField source="im" dest="text"/> | |
321 | ||
322 | ||
323 | <!-- Similarity is the scoring routine for each document vs. a query. | |
324 | A custom similarity may be specified here, but the default is fine | |
325 | for most applications. --> | |
326 | <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> --> | |
327 | ||
328 | </schema> |