Merge pull request #18799 from deb1990/CPS-332-support-new-button-markup
[civicrm-core.git] / CRM / Dedupe / BAO / RuleGroup.php
CommitLineData
6a488035
TO
1<?php
2/*
3 +--------------------------------------------------------------------+
bc77d7c0 4 | Copyright CiviCRM LLC. All rights reserved. |
6a488035 5 | |
bc77d7c0
TO
6 | This work is published under the GNU AGPLv3 license with some |
7 | permitted exceptions and without any warranty. For full license |
8 | and copyright information, see https://civicrm.org/licensing |
6a488035 9 +--------------------------------------------------------------------+
d25dd0ee 10 */
6a488035
TO
11
12/**
13 *
14 * @package CRM
ca5cec67 15 * @copyright CiviCRM LLC https://civicrm.org/licensing
6a488035
TO
16 */
17
18/**
19 * The CiviCRM duplicate discovery engine is based on an
20 * algorithm designed by David Strauss <david@fourkitchens.com>.
21 */
22class CRM_Dedupe_BAO_RuleGroup extends CRM_Dedupe_DAO_RuleGroup {
23
24 /**
100fef9d 25 * Ids of the contacts to limit the SQL queries (whole-database queries otherwise)
518fa0ee 26 * @var array
6a488035 27 */
518fa0ee 28 public $contactIds = [];
6a488035 29
4c8b4719 30 /**
31 * Set the contact IDs to restrict the dedupe to.
32 *
33 * @param array $contactIds
34 */
35 public function setContactIds($contactIds) {
36 $this->contactIds = $contactIds;
37 }
38
6a488035 39 /**
100fef9d 40 * Params to dedupe against (queries against the whole contact set otherwise)
518fa0ee 41 * @var array
6a488035 42 */
518fa0ee 43 public $params = [];
6a488035
TO
44
45 /**
fe482240 46 * If there are no rules in rule group.
518fa0ee 47 * @var bool
6a488035 48 */
518fa0ee 49 public $noRules = FALSE;
6a488035 50
887c0ec2 51 protected $temporaryTables = [];
52
6a488035
TO
53 /**
54 * Return a structure holding the supported tables, fields and their titles
55 *
98997235
TO
56 * @param string $requestedType
57 * The requested contact type.
6a488035 58 *
a6c01b45
CW
59 * @return array
60 * a table-keyed array of field-keyed arrays holding supported fields' titles
6a488035 61 */
49cb3722 62 public static function supportedFields($requestedType) {
6a488035
TO
63 static $fields = NULL;
64 if (!$fields) {
65 // this is needed, as we're piggy-backing importableFields() below
be2fb01f 66 $replacements = [
6a488035
TO
67 'civicrm_country.name' => 'civicrm_address.country_id',
68 'civicrm_county.name' => 'civicrm_address.county_id',
69 'civicrm_state_province.name' => 'civicrm_address.state_province_id',
70 'gender.label' => 'civicrm_contact.gender_id',
71 'individual_prefix.label' => 'civicrm_contact.prefix_id',
72 'individual_suffix.label' => 'civicrm_contact.suffix_id',
73 'addressee.label' => 'civicrm_contact.addressee_id',
74 'email_greeting.label' => 'civicrm_contact.email_greeting_id',
75 'postal_greeting.label' => 'civicrm_contact.postal_greeting_id',
698095e2 76 'civicrm_phone.phone' => 'civicrm_phone.phone_numeric',
be2fb01f 77 ];
6a488035 78 // the table names we support in dedupe rules - a filter for importableFields()
be2fb01f 79 $supportedTables = [
353ffa53
TO
80 'civicrm_address',
81 'civicrm_contact',
82 'civicrm_email',
83 'civicrm_im',
84 'civicrm_note',
85 'civicrm_openid',
86 'civicrm_phone',
be2fb01f 87 ];
6a488035 88
be2fb01f 89 foreach (['Individual', 'Organization', 'Household'] as $ctype) {
6a488035
TO
90 // take the table.field pairs and their titles from importableFields() if the table is supported
91 foreach (CRM_Contact_BAO_Contact::importableFields($ctype) as $iField) {
92 if (isset($iField['where'])) {
93 $where = $iField['where'];
94 if (isset($replacements[$where])) {
95 $where = $replacements[$where];
96 }
97 list($table, $field) = explode('.', $where);
98 if (!in_array($table, $supportedTables)) {
99 continue;
100 }
101 $fields[$ctype][$table][$field] = $iField['title'];
102 }
103 }
49cb3722 104 // Note that most of the fields available come from 'importable fields' -
105 // I thought about making this field 'importable' but it felt like there might be unknown consequences
106 // so I opted for just adding it in & securing it with a unit test.
1dba397e 107 /// Example usage of sort_name - It is possible to alter sort name via hook so 2 organization names might differ as in
49cb3722 108 // Justice League vs The Justice League but these could have the same sort_name if 'the the'
109 // exension is installed (https://github.com/eileenmcnaughton/org.wikimedia.thethe)
110 $fields[$ctype]['civicrm_contact']['sort_name'] = ts('Sort Name');
6a488035 111 // add custom data fields
0b330e6d 112 foreach (CRM_Core_BAO_CustomGroup::getTree($ctype, NULL, NULL, -1) as $key => $cg) {
6a488035
TO
113 if (!is_int($key)) {
114 continue;
115 }
116 foreach ($cg['fields'] as $cf) {
117 $fields[$ctype][$cg['table_name']][$cf['column_name']] = $cf['label'];
118 }
119 }
120 }
121 }
1cf05c3e 122 CRM_Utils_Hook::dupeQuery(CRM_Core_DAO::$_nullObject, 'supportedFields', $fields);
49cb3722 123 return !empty($fields[$requestedType]) ? $fields[$requestedType] : [];
6a488035
TO
124 }
125
126 /**
127 * Return the SQL query for dropping the temporary table.
128 */
00be9182 129 public function tableDropQuery() {
6a488035
TO
130 return 'DROP TEMPORARY TABLE IF EXISTS dedupe';
131 }
132
133 /**
134 * Return a set of SQL queries whose cummulative weights will mark matched
135 * records for the RuleGroup::threasholdQuery() to retrieve.
136 */
00be9182 137 public function tableQuery() {
6a488035
TO
138 // make sure we've got a fetched dbrecord, not sure if this is enforced
139 if (!$this->name == NULL || $this->is_reserved == NULL) {
140 $this->find(TRUE);
141 }
142
143 // Reserved Rule Groups can optionally get special treatment by
144 // implementing an optimization class and returning a query array.
145 if ($this->is_reserved &&
146 CRM_Utils_File::isIncludable("CRM/Dedupe/BAO/QueryBuilder/{$this->name}.php")
147 ) {
6a488035 148 $command = empty($this->params) ? 'internal' : 'record';
be2fb01f 149 $queries = call_user_func(["CRM_Dedupe_BAO_QueryBuilder_{$this->name}", $command], $this);
6a488035
TO
150 }
151 else {
152 // All other rule groups have queries generated by the member dedupe
153 // rules defined in the administrative interface.
154
155 // Find all rules contained by this script sorted by weight so that
156 // their execution can be short circuited on RuleGroup::fillTable()
157 $bao = new CRM_Dedupe_BAO_Rule();
158 $bao->dedupe_rule_group_id = $this->id;
159 $bao->orderBy('rule_weight DESC');
160 $bao->find();
161
162 // Generate a SQL query for each rule in the rule group that is
163 // tailored to respect the param and contactId options provided.
be2fb01f 164 $queries = [];
6a488035
TO
165 while ($bao->fetch()) {
166 $bao->contactIds = $this->contactIds;
167 $bao->params = $this->params;
168
169 // Skipping empty rules? Empty rules shouldn't exist; why check?
170 if ($query = $bao->sql()) {
171 $queries["{$bao->rule_table}.{$bao->rule_field}.{$bao->rule_weight}"] = $query;
172 }
173 }
174 }
175
176 // if there are no rules in this rule group
177 // add an empty query fulfilling the pattern
178 if (!$queries) {
6a488035 179 $this->noRules = TRUE;
be2fb01f 180 return [];
6a488035
TO
181 }
182
183 return $queries;
184 }
185
00be9182 186 public function fillTable() {
6a488035
TO
187 // get the list of queries handy
188 $tableQueries = $this->tableQuery();
189
190 if ($this->params && !$this->noRules) {
887c0ec2 191 $this->temporaryTables['dedupe'] = CRM_Utils_SQL_TempTable::build()
192 ->setCategory('dedupe')
193 ->createWithColumns("id1 int, weight int, UNIQUE UI_id1 (id1)")->getName();
1dba397e
SL
194 $dedupeCopyTemporaryTableObject = CRM_Utils_SQL_TempTable::build()
195 ->setCategory('dedupe');
196 $this->temporaryTables['dedupe_copy'] = $dedupeCopyTemporaryTableObject->getName();
887c0ec2 197 $insertClause = "INSERT INTO {$this->temporaryTables['dedupe']} (id1, weight)";
3dc13e46 198 $groupByClause = "GROUP BY id1, weight";
1dba397e 199 $dupeCopyJoin = " JOIN {$this->temporaryTables['dedupe_copy']} ON {$this->temporaryTables['dedupe_copy']}.id1 = t1.column WHERE ";
6a488035
TO
200 }
201 else {
887c0ec2 202 $this->temporaryTables['dedupe'] = CRM_Utils_SQL_TempTable::build()
203 ->setCategory('dedupe')
204 ->createWithColumns("id1 int, id2 int, weight int, UNIQUE UI_id1_id2 (id1, id2)")->getName();
1dba397e
SL
205 $dedupeCopyTemporaryTableObject = CRM_Utils_SQL_TempTable::build()
206 ->setCategory('dedupe');
207 $this->temporaryTables['dedupe_copy'] = $dedupeCopyTemporaryTableObject->getName();
887c0ec2 208 $insertClause = "INSERT INTO {$this->temporaryTables['dedupe']} (id1, id2, weight)";
3dc13e46 209 $groupByClause = "GROUP BY id1, id2, weight";
1dba397e 210 $dupeCopyJoin = " JOIN {$this->temporaryTables['dedupe_copy']} ON {$this->temporaryTables['dedupe_copy']}.id1 = t1.column AND {$this->temporaryTables['dedupe_copy']}.id2 = t2.column WHERE ";
6a488035
TO
211 }
212 $patternColumn = '/t1.(\w+)/';
be2fb01f 213 $exclWeightSum = [];
6a488035 214
6a488035
TO
215 CRM_Utils_Hook::dupeQuery($this, 'table', $tableQueries);
216
217 while (!empty($tableQueries)) {
218 list($isInclusive, $isDie) = self::isQuerySetInclusive($tableQueries, $this->threshold, $exclWeightSum);
219
220 if ($isInclusive) {
221 // order queries by table count
222 self::orderByTableCount($tableQueries);
223
224 $weightSum = array_sum($exclWeightSum);
225 $searchWithinDupes = !empty($exclWeightSum) ? 1 : 0;
226
227 while (!empty($tableQueries)) {
228 // extract the next query ( and weight ) to be executed
229 $fieldWeight = array_keys($tableQueries);
230 $fieldWeight = $fieldWeight[0];
353ffa53 231 $query = array_shift($tableQueries);
6a488035
TO
232
233 if ($searchWithinDupes) {
9742d8f3
SL
234 // drop dedupe_copy table just in case if its already there.
235 $dedupeCopyTemporaryTableObject->drop();
6a488035 236 // get prepared to search within already found dupes if $searchWithinDupes flag is set
1dba397e 237 $dedupeCopyTemporaryTableObject->createWithQuery("SELECT * FROM {$this->temporaryTables['dedupe']} WHERE weight >= {$weightSum}");
6a488035
TO
238
239 preg_match($patternColumn, $query, $matches);
240 $query = str_replace(' WHERE ', str_replace('column', $matches[1], $dupeCopyJoin), $query);
0226a9f3
AH
241
242 // CRM-19612: If there's a union, there will be two WHEREs, and you
243 // can't use the temp table twice.
1dba397e 244 if (preg_match('/' . $this->temporaryTables['dedupe_copy'] . '[\S\s]*(union)[\S\s]*' . $this->temporaryTables['dedupe_copy'] . '/i', $query, $matches, PREG_OFFSET_CAPTURE)) {
0226a9f3 245 // Make a second temp table:
1dba397e
SL
246 $this->temporaryTables['dedupe_copy_2'] = CRM_Utils_SQL_TempTable::build()
247 ->setCategory('dedupe')
248 ->createWithQuery("SELECT * FROM {$this->temporaryTables['dedupe']} WHERE weight >= {$weightSum}")
249 ->getName();
0226a9f3
AH
250 // After the union, use that new temp table:
251 $part1 = substr($query, 0, $matches[1][1]);
1dba397e 252 $query = $part1 . str_replace($this->temporaryTables['dedupe_copy'], $this->temporaryTables['dedupe_copy_2'], substr($query, $matches[1][1]));
0226a9f3 253 }
6a488035
TO
254 }
255 $searchWithinDupes = 1;
256
257 // construct and execute the intermediate query
258 $query = "{$insertClause} {$query} {$groupByClause} ON DUPLICATE KEY UPDATE weight = weight + VALUES(weight)";
411f09c9 259 $dao = CRM_Core_DAO::executeQuery($query);
6a488035
TO
260
261 // FIXME: we need to be more acurate with affected rows, especially for insert vs duplicate insert.
262 // And that will help optimize further.
263 $affectedRows = $dao->affectedRows();
6a488035
TO
264
265 // In an inclusive situation, failure of any query means no further processing -
266 if ($affectedRows == 0) {
267 // reset to make sure no further execution is done.
be2fb01f 268 $tableQueries = [];
6a488035
TO
269 break;
270 }
271 $weightSum = substr($fieldWeight, strrpos($fieldWeight, '.') + 1) + $weightSum;
272 }
273 // An exclusive situation -
274 }
275 elseif (!$isDie) {
276 // since queries are already sorted by weights, we can continue as is
277 $fieldWeight = array_keys($tableQueries);
278 $fieldWeight = $fieldWeight[0];
353ffa53
TO
279 $query = array_shift($tableQueries);
280 $query = "{$insertClause} {$query} {$groupByClause} ON DUPLICATE KEY UPDATE weight = weight + VALUES(weight)";
411f09c9 281 $dao = CRM_Core_DAO::executeQuery($query);
6a488035
TO
282 if ($dao->affectedRows() >= 1) {
283 $exclWeightSum[] = substr($fieldWeight, strrpos($fieldWeight, '.') + 1);
284 }
6a488035
TO
285 }
286 else {
287 // its a die situation
288 break;
289 }
290 }
291 }
292
e0ef6999 293 /**
4f1f1f2a
CW
294 * Function to determine if a given query set contains inclusive or exclusive set of weights.
295 * The function assumes that the query set is already ordered by weight in desc order.
e0ef6999
EM
296 * @param $tableQueries
297 * @param $threshold
298 * @param array $exclWeightSum
299 *
300 * @return array
301 */
be2fb01f
CW
302 public static function isQuerySetInclusive($tableQueries, $threshold, $exclWeightSum = []) {
303 $input = [];
6a488035
TO
304 foreach ($tableQueries as $key => $query) {
305 $input[] = substr($key, strrpos($key, '.') + 1);
306 }
307
308 if (!empty($exclWeightSum)) {
309 $input = array_merge($input, $exclWeightSum);
310 rsort($input);
311 }
312
313 if (count($input) == 1) {
be2fb01f 314 return [FALSE, $input[0] < $threshold];
6a488035
TO
315 }
316
317 $totalCombinations = 0;
318 for ($i = 0; $i < count($input); $i++) {
be2fb01f 319 $combination = [$input[$i]];
6a488035
TO
320 if (array_sum($combination) >= $threshold) {
321 $totalCombinations++;
322 continue;
323 }
324 for ($j = $i + 1; $j < count($input); $j++) {
325 $combination[] = $input[$j];
326 if (array_sum($combination) >= $threshold) {
327 $totalCombinations++;
328 }
329 }
330 }
be2fb01f 331 return [$totalCombinations == 1, $totalCombinations <= 0];
6a488035
TO
332 }
333
e0ef6999 334 /**
fe482240 335 * sort queries by number of records for the table associated with them.
e0ef6999
EM
336 * @param $tableQueries
337 */
00be9182 338 public static function orderByTableCount(&$tableQueries) {
be2fb01f 339 static $tableCount = [];
6a488035 340
be2fb01f 341 $tempArray = [];
6a488035
TO
342 foreach ($tableQueries as $key => $query) {
343 $table = explode(".", $key);
344 $table = $table[0];
345 if (!array_key_exists($table, $tableCount)) {
346 $query = "SELECT COUNT(*) FROM {$table}";
347 $tableCount[$table] = CRM_Core_DAO::singleValueQuery($query);
348 }
349 $tempArray[$key] = $tableCount[$table];
350 }
351
352 asort($tempArray);
353 foreach ($tempArray as $key => $count) {
354 $tempArray[$key] = $tableQueries[$key];
355 }
356 $tableQueries = $tempArray;
357 }
358
359 /**
360 * Return the SQL query for getting only the interesting results out of the dedupe table.
361 *
362 * @$checkPermission boolean $params a flag to indicate if permission should be considered.
363 * default is to always check permissioning but public pages for example might not want
364 * permission to be checked for anonymous users. Refer CRM-6211. We might be beaking
365 * Multi-Site dedupe for public pages.
ea3ddccf 366 *
367 * @param bool $checkPermission
368 *
369 * @return string
6a488035 370 */
00be9182 371 public function thresholdQuery($checkPermission = TRUE) {
6a488035 372 $this->_aclFrom = '';
cb5d0cde 373 $aclWhere = '';
6a488035
TO
374
375 if ($this->params && !$this->noRules) {
376 if ($checkPermission) {
cb5d0cde 377 list($this->_aclFrom, $aclWhere) = CRM_Contact_BAO_Contact_Permission::cacheClause('civicrm_contact');
378 $aclWhere = $aclWhere ? "AND {$aclWhere}" : '';
6a488035 379 }
887c0ec2 380 $query = "SELECT {$this->temporaryTables['dedupe']}.id1 as id
381 FROM {$this->temporaryTables['dedupe']} JOIN civicrm_contact ON {$this->temporaryTables['dedupe']}.id1 = civicrm_contact.id {$this->_aclFrom}
cb5d0cde 382 WHERE contact_type = '{$this->contact_type}' AND is_deleted = 0 $aclWhere
6a488035
TO
383 AND weight >= {$this->threshold}";
384 }
385 else {
cb5d0cde 386 $aclWhere = '';
6a488035 387 if ($checkPermission) {
cb5d0cde 388 list($this->_aclFrom, $aclWhere) = CRM_Contact_BAO_Contact_Permission::cacheClause(['c1', 'c2']);
389 $aclWhere = $aclWhere ? "AND {$aclWhere}" : '';
6a488035 390 }
887c0ec2 391 $query = "SELECT IF({$this->temporaryTables['dedupe']}.id1 < {$this->temporaryTables['dedupe']}.id2, {$this->temporaryTables['dedupe']}.id1, {$this->temporaryTables['dedupe']}.id2) as id1,
392 IF({$this->temporaryTables['dedupe']}.id1 < {$this->temporaryTables['dedupe']}.id2, {$this->temporaryTables['dedupe']}.id2, {$this->temporaryTables['dedupe']}.id1) as id2, {$this->temporaryTables['dedupe']}.weight
393 FROM {$this->temporaryTables['dedupe']} JOIN civicrm_contact c1 ON {$this->temporaryTables['dedupe']}.id1 = c1.id
394 JOIN civicrm_contact c2 ON {$this->temporaryTables['dedupe']}.id2 = c2.id {$this->_aclFrom}
395 LEFT JOIN civicrm_dedupe_exception exc ON {$this->temporaryTables['dedupe']}.id1 = exc.contact_id1 AND {$this->temporaryTables['dedupe']}.id2 = exc.contact_id2
d4c8a770 396 WHERE c1.contact_type = '{$this->contact_type}' AND
cb5d0cde 397 c2.contact_type = '{$this->contact_type}'
398 AND c1.is_deleted = 0 AND c2.is_deleted = 0
399 {$aclWhere}
6a488035
TO
400 AND weight >= {$this->threshold} AND exc.contact_id1 IS NULL";
401 }
402
403 CRM_Utils_Hook::dupeQuery($this, 'threshold', $query);
404 return $query;
405 }
406
407 /**
dc195289 408 * find fields related to a rule group.
6a488035 409 *
c490a46a 410 * @param array $params
77b97be7 411 *
a6c01b45
CW
412 * @return array
413 * (rule field => weight) array and threshold associated to rule group
6a488035 414 */
00be9182 415 public static function dedupeRuleFieldsWeight($params) {
353ffa53 416 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
6a488035 417 $rgBao->contact_type = $params['contact_type'];
b53cbfbc 418 if (!empty($params['id'])) {
03390e26 419 // accept an ID if provided
420 $rgBao->id = $params['id'];
421 }
422 else {
423 $rgBao->used = $params['used'];
424 }
6a488035
TO
425 $rgBao->find(TRUE);
426
427 $ruleBao = new CRM_Dedupe_BAO_Rule();
428 $ruleBao->dedupe_rule_group_id = $rgBao->id;
429 $ruleBao->find();
be2fb01f 430 $ruleFields = [];
6a488035 431 while ($ruleBao->fetch()) {
0ca748fb
JM
432 $field_name = $ruleBao->rule_field;
433 if ($field_name == 'phone_numeric') {
434 $field_name = 'phone';
435 }
436 $ruleFields[$field_name] = $ruleBao->rule_weight;
6a488035
TO
437 }
438
be2fb01f 439 return [$ruleFields, $rgBao->threshold];
6a488035
TO
440 }
441
03390e26 442 /**
fe482240 443 * Get all of the combinations of fields that would work with a rule.
ad37ac8e 444 *
445 * @param array $rgFields
446 * @param int $threshold
447 * @param array $combos
448 * @param array $running
03390e26 449 */
be2fb01f 450 public static function combos($rgFields, $threshold, &$combos, $running = []) {
03390e26 451 foreach ($rgFields as $rgField => $weight) {
452 unset($rgFields[$rgField]);
453 $diff = $threshold - $weight;
454 $runningnow = $running;
455 $runningnow[] = $rgField;
456 if ($diff > 0) {
457 self::combos($rgFields, $diff, $combos, $runningnow);
458 }
459 else {
460 $combos[] = $runningnow;
461 }
462 }
463 }
464
6a488035
TO
465 /**
466 * Get an array of rule group id to rule group name
467 * for all th groups for that contactType. If contactType
468 * not specified, do it for all
469 *
98997235
TO
470 * @param string $contactType
471 * Individual, Household or Organization.
6a488035 472 *
6a488035 473 *
a6c01b45
CW
474 * @return array
475 * id => "nice name" of rule group
6a488035 476 */
00be9182 477 public static function getByType($contactType = NULL) {
6a488035
TO
478 $dao = new CRM_Dedupe_DAO_RuleGroup();
479
480 if ($contactType) {
481 $dao->contact_type = $contactType;
482 }
483
484 $dao->find();
be2fb01f 485 $result = [];
6a488035 486 while ($dao->fetch()) {
389bcebf 487 $title = !empty($dao->title) ? $dao->title : (!empty($dao->name) ? $dao->name : $dao->contact_type);
01a93ecd
DL
488
489 $name = "$title - {$dao->used}";
6a488035
TO
490 $result[$dao->id] = $name;
491 }
492 return $result;
493 }
96025800 494
2ae26001 495 /**
496 * Get the cached contact type for a particular rule group.
497 *
498 * @param int $rule_group_id
499 *
500 * @return string
501 */
502 public static function getContactTypeForRuleGroup($rule_group_id) {
503 if (!isset(\Civi::$statics[__CLASS__]) || !isset(\Civi::$statics[__CLASS__]['rule_groups'])) {
be2fb01f 504 \Civi::$statics[__CLASS__]['rule_groups'] = [];
2ae26001 505 }
506 if (empty(\Civi::$statics[__CLASS__]['rule_groups'][$rule_group_id])) {
507 \Civi::$statics[__CLASS__]['rule_groups'][$rule_group_id]['contact_type'] = CRM_Core_DAO::getFieldValue(
508 'CRM_Dedupe_DAO_RuleGroup',
509 $rule_group_id,
510 'contact_type'
511 );
512 }
513
514 return \Civi::$statics[__CLASS__]['rule_groups'][$rule_group_id]['contact_type'];
515 }
516
6a488035 517}