Merge pull request #15826 from seamuslee001/dev_core_183_dedupe
[civicrm-core.git] / tools / scripts / solr / createSolrJSON.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | Copyright CiviCRM LLC. All rights reserved. |
5 | |
6 | This code is published under the GNU AGPLv3 license with some |
7 | permitted exceptions and without any warranty. For full license |
8 | and copyright information, see https://civicrm.org/licensing |
9 +--------------------------------------------------------------------+
10 */
11
12 /**
13 * Create a xml file for a set of contact ID's in a format digestible
14 * by Solr
15 */
16
17 require_once '../../../civicrm.settings.php';
18 require_once 'CRM/Core/Config.php';
19
20 define('CHUNK_SIZE', 128);
21
22 /**
23 * Split a large array of contactIDs into more manageable smaller chunks
24 * @param $contactIDs
25 * @return array
26 */
27 function &splitContactIDs(&$contactIDs) {
28 // contactIDs could be a real large array, so we split it up into
29 // smaller chunks and then general xml for each chunk
30 $chunks = [];
31 $current = 0;
32 $chunks[$current] = [];
33 $count = 0;
34
35 foreach ($contactIDs as $cid) {
36 $chunks[$current][] = $cid;
37 $count++;
38
39 if ($count == CHUNK_SIZE) {
40 $current++;
41 $chunks[$current] = [];
42 $count = 0;
43 }
44 }
45
46 if (empty($chunks[$current])) {
47 unset($chunks[$current]);
48 }
49
50 return $chunks;
51 }
52
53 /**
54 * Given an array of values, generate the JSON in the Solr format
55 * @param $values
56 * @return string
57 */
58 function &generateSolrJSON($values) {
59 $result = "[";
60 foreach ($values as $cid => $tokens) {
61 if (empty($tokens)) {
62 continue;
63 }
64
65 $result .= "\n {\n \"contact_id\" : \"$cid\",";
66
67 foreach ($tokens as $n => $v) {
68 if (is_array($v)) {
69 $str = [];
70 foreach ($v as $el) {
71 $el = escapeJsonString($el);
72 $str[] = "\"$el\"";
73 }
74 $string = implode(",", $str);
75 $result .= "\n \"{$n}\" : [$string],";
76 }
77 else {
78 $v = escapeJsonString($v);
79 $result .= "\n \"{$n}\" : \"{$v}\",";
80 }
81 }
82
83 // remove the last comma
84 $result = rtrim($result, ",");
85
86 $result .= "\n },";
87 }
88 // remove the last comma
89 $result = rtrim($result, ",");
90
91 $result .= "\n]\n";
92
93
94 return $result;
95 }
96
97 /**
98 * @param $value
99 *
100 * @return mixed
101 */
102 function escapeJsonString($value) {
103 $escapers = ["\\", "/", "\"", "\n", "\r", "\t", "\x08", "\x0c"];
104 $replacements = ["\\\\", "\\/", "\\\"", "\\n", "\\r", "\\t", "\\f", "\\b"];
105 return str_replace($escapers, $replacements, $value);
106 }
107
108 /**
109 * Given a set of contact IDs get the values
110 * @param $contactIDs
111 * @param $values
112 * @return array
113 */
114 function getValues(&$contactIDs, &$values) {
115 $values = [];
116
117 foreach ($contactIDs as $cid) {
118 $values[$cid] = [];
119 }
120
121 getContactInfo($contactIDs, $values);
122 getAddressInfo($contactIDs, $values);
123 getPhoneInfo($contactIDs, $values);
124 getEmailInfo($contactIDs, $values);
125 getNoteInfo($contactIDs, $values);
126
127 return $values;
128 }
129
130 /**
131 * @param $contactIDs
132 * @param $values
133 * @param $tableName
134 * @param $fields
135 * @param $whereField
136 * @param null $additionalWhereCond
137 */
138 function getTableInfo(&$contactIDs, &$values, $tableName, &$fields, $whereField, $additionalWhereCond = NULL) {
139 $selectString = implode(',', array_keys($fields));
140 $idString = implode(',', $contactIDs);
141
142 $sql = "
143 SELECT $selectString, $whereField as contact_id
144 FROM $tableName
145 WHERE $whereField IN ( $idString )
146 ";
147
148 if ($additionalWhereCond) {
149 $sql .= " AND $additionalWhereCond";
150 }
151
152 $dao = &CRM_Core_DAO::executeQuery($sql);
153 while ($dao->fetch()) {
154 foreach ($fields as $fld => $name) {
155 $name = $name ? $name : $fld;
156 appendValue($values, $dao->contact_id, $name, $dao->$fld);
157 }
158 }
159 }
160
161 /**
162 * @param $contactIDs
163 * @param $values
164 */
165 function getContactInfo(&$contactIDs, &$values) {
166 $fields = [
167 'sort_name' => NULL,
168 'display_name' => NULL,
169 'contact_type' => NULL,
170 'legal_identifier' => NULL,
171 'external_identifier' => NULL,
172 'first_name' => NULL,
173 'last_name' => NULL,
174 'middle_name' => NULL,
175 'household_name' => NULL,
176 'organization_name' => NULL,
177 'legal_name' => NULL,
178 'job_title' => NULL,
179 ];
180 getTableInfo($contactIDs, $values, 'civicrm_contact', $fields, 'id');
181 }
182
183 /**
184 * @param $contactIDs
185 * @param $values
186 */
187 function getNoteInfo(&$contactIDs, &$values) {
188 $ids = implode(',', $contactIDs);
189
190 $sql = "
191 SELECT
192 entity_id as contact_id,
193 note as note, subject as subject
194 FROM civicrm_note
195 WHERE entity_id IN ( $ids )
196 AND entity_table = 'civicrm_contact'
197 ";
198
199 $dao = &CRM_Core_DAO::executeQuery($sql);
200 while ($dao->fetch()) {
201 $note = empty($dao->subject) ? '' : "{$dao->subject}: ";
202 $note .= empty($dao->note) ? '' : $dao->note;
203
204 appendValue($values, $dao->contact_id, 'note', $note);
205 }
206 }
207
208 /**
209 * @param $contactIDs
210 * @param $values
211 */
212 function getPhoneInfo(&$contactIDs, &$values) {
213 $ids = implode(',', $contactIDs);
214
215 $sql = "
216 SELECT
217 c.id as contact_id,
218 l.name as location_type,
219 p.phone as phone,
220 v.label as phone_type
221 FROM civicrm_contact c
222 INNER JOIN civicrm_phone p ON p.contact_id = c.id
223 LEFT JOIN civicrm_location_type l ON p.location_type_id = l.id
224 LEFT JOIN civicrm_option_group g ON g.name = 'phone_type'
225 LEFT JOIN civicrm_option_value v ON v.option_group_id = g.id AND p.phone_type_id = v.value
226 WHERE c.id IN ( $ids )
227 AND p.phone IS NOT NULL
228 ";
229
230 $dao = &CRM_Core_DAO::executeQuery($sql);
231 while ($dao->fetch()) {
232 $phone = '';
233
234 if (!empty($dao->location_type)) {
235 $phone = "{$dao->location_type}: ";
236 }
237
238 $phone .= $dao->phone;
239
240 if (!empty($dao->phone_type)) {
241 $phone .= " ({$dao->phone_type})";
242 }
243
244 appendValue($values, $dao->contact_id, 'phone', $phone);
245 }
246 }
247
248 /**
249 * @param $contactIDs
250 * @param $values
251 */
252 function getEmailInfo(&$contactIDs, &$values) {
253 $ids = implode(',', $contactIDs);
254
255 $sql = "
256 SELECT
257 c.id as contact_id,
258 l.name as location_type,
259 e.email as email
260 FROM civicrm_contact c
261 INNER JOIN civicrm_email e ON e.contact_id = c.id
262 LEFT JOIN civicrm_location_type l ON e.location_type_id = l.id
263 WHERE c.id IN ( $ids )
264 AND e.email IS NOT NULL
265 ";
266
267 $dao = &CRM_Core_DAO::executeQuery($sql);
268 while ($dao->fetch()) {
269 $email = '';
270
271 if (!empty($dao->location_type)) {
272 $email = "{$dao->location_type}: ";
273 }
274
275 $email .= $dao->email;
276 appendValue($values, $dao->contact_id, 'email', $email);
277 }
278 }
279
280 /**
281 * @param $contactIDs
282 * @param $values
283 */
284 function getAddressInfo(&$contactIDs, &$values) {
285 $ids = implode(',', $contactIDs);
286
287 $sql = "
288 SELECT c.id as contact_id, l.name as location_type,
289 a.street_address, a.supplemental_address_1, a.supplemental_address_2,
290 a.supplemental_address_3,
291 a.city, a.postal_code,
292 s.name as state, co.name as country
293 FROM civicrm_contact c
294 INNER JOIN civicrm_address a ON a.contact_id = c.id
295 LEFT JOIN civicrm_location_type l ON a.location_type_id = l.id
296 LEFT JOIN civicrm_state_province s ON a.state_province_id = s.id
297 LEFT JOIN civicrm_country co ON a.country_id = co.id
298 WHERE c.id IN ( $ids )
299 ";
300
301 $fields = [
302 'location_type',
303 'street_address',
304 'supplemental_address_1',
305 'supplemental_address_2',
306 'supplemental_address_3',
307 'city',
308 'postal_code',
309 'state',
310 'country',
311 ];
312 $dao = &CRM_Core_DAO::executeQuery($sql);
313 while ($dao->fetch()) {
314 $address = '';
315 foreach ($fields as $fld) {
316 if (empty($dao->$fld)) {
317 continue;
318 }
319
320 $address .= ($fld == 'location_type') ? "{$dao->$fld}: " : " {$dao->$fld},";
321 appendValue($values, $dao->contact_id, $fld, $dao->$fld);
322 }
323
324 if (!empty($address)) {
325 $address = rtrim($address, ",");
326 appendValue($values, $dao->contact_id, 'address', $address);
327 }
328 }
329 }
330
331 /**
332 * @param $values
333 * @param $contactID
334 * @param $name
335 * @param $value
336 */
337 function appendValue(&$values, $contactID, $name, $value) {
338 if (empty($value)) {
339 return;
340 }
341
342 if (!isset($values[$contactID][$name])) {
343 $values[$contactID][$name] = $value;
344 }
345 else {
346 if (!is_array($values[$contactID][$name])) {
347 $save = $values[$contactID][$name];
348 $values[$contactID][$name] = [];
349 $values[$contactID][$name][] = $save;
350 }
351 $values[$contactID][$name][] = $value;
352 }
353 }
354
355 /**
356 * @param $contactIDs
357 */
358 function run(&$contactIDs) {
359 $chunks = &splitContactIDs($contactIDs);
360
361 foreach ($chunks as $chunk) {
362 $values = [];
363 getValues($chunk, $values);
364 $xml = &generateSolrJSON($values);
365 echo $xml;
366 }
367 }
368
369 $config = CRM_Core_Config::singleton();
370 $config->userFramework = 'Soap';
371 $config->userFrameworkClass = 'CRM_Utils_System_Soap';
372 $config->userHookClass = 'CRM_Utils_Hook_Soap';
373
374 $sql = <<<EOT
375 SELECT id
376 FROM civicrm_contact
377 EOT;
378 $dao = &CRM_Core_DAO::executeQuery($sql);
379
380
381 $contactIDs = [];
382 while ($dao->fetch()) {
383 $contactIDs[] = $dao->id;
384 }
385
386 run($contactIDs);
387