3 +--------------------------------------------------------------------+
4 | Copyright CiviCRM LLC. All rights reserved. |
6 | This work is published under the GNU AGPLv3 license with some |
7 | permitted exceptions and without any warranty. For full license |
8 | and copyright information, see https://civicrm.org/licensing |
9 +--------------------------------------------------------------------+
15 * @copyright CiviCRM LLC https://civicrm.org/licensing
19 * The CiviCRM duplicate discovery engine is based on an
20 * algorithm designed by David Strauss <david@fourkitchens.com>.
22 class CRM_Dedupe_BAO_DedupeRuleGroup
extends CRM_Dedupe_DAO_DedupeRuleGroup
{
25 * Ids of the contacts to limit the SQL queries (whole-database queries otherwise)
28 public $contactIds = [];
31 * Set the contact IDs to restrict the dedupe to.
33 * @param array $contactIds
35 public function setContactIds($contactIds) {
36 $this->contactIds
= $contactIds;
40 * Params to dedupe against (queries against the whole contact set otherwise)
46 * If there are no rules in rule group.
49 public $noRules = FALSE;
51 protected $temporaryTables = [];
54 * Return a structure holding the supported tables, fields and their titles
56 * @param string $requestedType
57 * The requested contact type.
60 * a table-keyed array of field-keyed arrays holding supported fields' titles
62 public static function supportedFields($requestedType) {
63 static $fields = NULL;
65 // this is needed, as we're piggy-backing importableFields() below
67 'civicrm_country.name' => 'civicrm_address.country_id',
68 'civicrm_county.name' => 'civicrm_address.county_id',
69 'civicrm_state_province.name' => 'civicrm_address.state_province_id',
70 'gender.label' => 'civicrm_contact.gender_id',
71 'individual_prefix.label' => 'civicrm_contact.prefix_id',
72 'individual_suffix.label' => 'civicrm_contact.suffix_id',
73 'addressee.label' => 'civicrm_contact.addressee_id',
74 'email_greeting.label' => 'civicrm_contact.email_greeting_id',
75 'postal_greeting.label' => 'civicrm_contact.postal_greeting_id',
76 'civicrm_phone.phone' => 'civicrm_phone.phone_numeric',
78 // the table names we support in dedupe rules - a filter for importableFields()
90 foreach (CRM_Contact_BAO_ContactType
::basicTypes() as $ctype) {
91 // take the table.field pairs and their titles from importableFields() if the table is supported
92 foreach (CRM_Contact_BAO_Contact
::importableFields($ctype) as $iField) {
93 if (isset($iField['where'])) {
94 $where = $iField['where'];
95 if (isset($replacements[$where])) {
96 $where = $replacements[$where];
98 list($table, $field) = explode('.', $where);
99 if (!in_array($table, $supportedTables)) {
102 $fields[$ctype][$table][$field] = $iField['title'];
105 // Note that most of the fields available come from 'importable fields' -
106 // I thought about making this field 'importable' but it felt like there might be unknown consequences
107 // so I opted for just adding it in & securing it with a unit test.
108 /// Example usage of sort_name - It is possible to alter sort name via hook so 2 organization names might differ as in
109 // Justice League vs The Justice League but these could have the same sort_name if 'the the'
110 // exension is installed (https://github.com/eileenmcnaughton/org.wikimedia.thethe)
111 $fields[$ctype]['civicrm_contact']['sort_name'] = ts('Sort Name');
112 // add custom data fields
113 foreach (CRM_Core_BAO_CustomGroup
::getTree($ctype, NULL, NULL, -1) as $key => $cg) {
117 foreach ($cg['fields'] as $cf) {
118 $fields[$ctype][$cg['table_name']][$cf['column_name']] = $cg['title'] . ' : ' . $cf['label'];
123 CRM_Utils_Hook
::dupeQuery(CRM_Core_DAO
::$_nullObject, 'supportedFields', $fields);
124 return !empty($fields[$requestedType]) ?
$fields[$requestedType] : [];
128 * Return the SQL query for dropping the temporary table.
130 public function tableDropQuery() {
131 return 'DROP TEMPORARY TABLE IF EXISTS dedupe';
135 * Return a set of SQL queries whose cummulative weights will mark matched
136 * records for the RuleGroup::threasholdQuery() to retrieve.
138 public function tableQuery() {
139 // make sure we've got a fetched dbrecord, not sure if this is enforced
140 if (!$this->name
== NULL ||
$this->is_reserved
== NULL) {
144 // Reserved Rule Groups can optionally get special treatment by
145 // implementing an optimization class and returning a query array.
146 if ($this->is_reserved
&&
147 CRM_Utils_File
::isIncludable("CRM/Dedupe/BAO/QueryBuilder/{$this->name}.php")
149 $command = empty($this->params
) ?
'internal' : 'record';
150 $queries = call_user_func(["CRM_Dedupe_BAO_QueryBuilder_{$this->name}", $command], $this);
153 // All other rule groups have queries generated by the member dedupe
154 // rules defined in the administrative interface.
156 // Find all rules contained by this script sorted by weight so that
157 // their execution can be short circuited on RuleGroup::fillTable()
158 $bao = new CRM_Dedupe_BAO_DedupeRule();
159 $bao->dedupe_rule_group_id
= $this->id
;
160 $bao->orderBy('rule_weight DESC');
163 // Generate a SQL query for each rule in the rule group that is
164 // tailored to respect the param and contactId options provided.
166 while ($bao->fetch()) {
167 $bao->contactIds
= $this->contactIds
;
168 $bao->params
= $this->params
;
170 // Skipping empty rules? Empty rules shouldn't exist; why check?
171 if ($query = $bao->sql()) {
172 $queries["{$bao->rule_table}.{$bao->rule_field}.{$bao->rule_weight}"] = $query;
177 // if there are no rules in this rule group
178 // add an empty query fulfilling the pattern
180 $this->noRules
= TRUE;
187 public function fillTable() {
188 // get the list of queries handy
189 $tableQueries = $this->tableQuery();
191 if ($this->params
&& !$this->noRules
) {
192 $this->temporaryTables
['dedupe'] = CRM_Utils_SQL_TempTable
::build()
193 ->setCategory('dedupe')
194 ->createWithColumns("id1 int, weight int, UNIQUE UI_id1 (id1)")->getName();
195 $dedupeCopyTemporaryTableObject = CRM_Utils_SQL_TempTable
::build()
196 ->setCategory('dedupe');
197 $this->temporaryTables
['dedupe_copy'] = $dedupeCopyTemporaryTableObject->getName();
198 $insertClause = "INSERT INTO {$this->temporaryTables['dedupe']} (id1, weight)";
199 $groupByClause = "GROUP BY id1, weight";
200 $dupeCopyJoin = " JOIN {$this->temporaryTables['dedupe_copy']} ON {$this->temporaryTables['dedupe_copy']}.id1 = t1.column WHERE ";
203 $this->temporaryTables
['dedupe'] = CRM_Utils_SQL_TempTable
::build()
204 ->setCategory('dedupe')
205 ->createWithColumns("id1 int, id2 int, weight int, UNIQUE UI_id1_id2 (id1, id2)")->getName();
206 $dedupeCopyTemporaryTableObject = CRM_Utils_SQL_TempTable
::build()
207 ->setCategory('dedupe');
208 $this->temporaryTables
['dedupe_copy'] = $dedupeCopyTemporaryTableObject->getName();
209 $insertClause = "INSERT INTO {$this->temporaryTables['dedupe']} (id1, id2, weight)";
210 $groupByClause = "GROUP BY id1, id2, weight";
211 $dupeCopyJoin = " JOIN {$this->temporaryTables['dedupe_copy']} ON {$this->temporaryTables['dedupe_copy']}.id1 = t1.column AND {$this->temporaryTables['dedupe_copy']}.id2 = t2.column WHERE ";
213 $patternColumn = '/t1.(\w+)/';
216 CRM_Utils_Hook
::dupeQuery($this, 'table', $tableQueries);
218 while (!empty($tableQueries)) {
219 list($isInclusive, $isDie) = self
::isQuerySetInclusive($tableQueries, $this->threshold
, $exclWeightSum);
222 // order queries by table count
223 self
::orderByTableCount($tableQueries);
225 $weightSum = array_sum($exclWeightSum);
226 $searchWithinDupes = !empty($exclWeightSum) ?
1 : 0;
228 while (!empty($tableQueries)) {
229 // extract the next query ( and weight ) to be executed
230 $fieldWeight = array_keys($tableQueries);
231 $fieldWeight = $fieldWeight[0];
232 $query = array_shift($tableQueries);
234 if ($searchWithinDupes) {
235 // drop dedupe_copy table just in case if its already there.
236 $dedupeCopyTemporaryTableObject->drop();
237 // get prepared to search within already found dupes if $searchWithinDupes flag is set
238 $dedupeCopyTemporaryTableObject->createWithQuery("SELECT * FROM {$this->temporaryTables['dedupe']} WHERE weight >= {$weightSum}");
240 preg_match($patternColumn, $query, $matches);
241 $query = str_replace(' WHERE ', str_replace('column', $matches[1], $dupeCopyJoin), $query);
243 // CRM-19612: If there's a union, there will be two WHEREs, and you
244 // can't use the temp table twice.
245 if (preg_match('/' . $this->temporaryTables
['dedupe_copy'] . '[\S\s]*(union)[\S\s]*' . $this->temporaryTables
['dedupe_copy'] . '/i', $query, $matches, PREG_OFFSET_CAPTURE
)) {
246 // Make a second temp table:
247 $this->temporaryTables
['dedupe_copy_2'] = CRM_Utils_SQL_TempTable
::build()
248 ->setCategory('dedupe')
249 ->createWithQuery("SELECT * FROM {$this->temporaryTables['dedupe']} WHERE weight >= {$weightSum}")
251 // After the union, use that new temp table:
252 $part1 = substr($query, 0, $matches[1][1]);
253 $query = $part1 . str_replace($this->temporaryTables
['dedupe_copy'], $this->temporaryTables
['dedupe_copy_2'], substr($query, $matches[1][1]));
256 $searchWithinDupes = 1;
258 // construct and execute the intermediate query
259 $query = "{$insertClause} {$query} {$groupByClause} ON DUPLICATE KEY UPDATE weight = weight + VALUES(weight)";
260 $dao = CRM_Core_DAO
::executeQuery($query);
262 // FIXME: we need to be more acurate with affected rows, especially for insert vs duplicate insert.
263 // And that will help optimize further.
264 $affectedRows = $dao->affectedRows();
266 // In an inclusive situation, failure of any query means no further processing -
267 if ($affectedRows == 0) {
268 // reset to make sure no further execution is done.
272 $weightSum = substr($fieldWeight, strrpos($fieldWeight, '.') +
1) +
$weightSum;
274 // An exclusive situation -
277 // since queries are already sorted by weights, we can continue as is
278 $fieldWeight = array_keys($tableQueries);
279 $fieldWeight = $fieldWeight[0];
280 $query = array_shift($tableQueries);
281 $query = "{$insertClause} {$query} {$groupByClause} ON DUPLICATE KEY UPDATE weight = weight + VALUES(weight)";
282 $dao = CRM_Core_DAO
::executeQuery($query);
283 if ($dao->affectedRows() >= 1) {
284 $exclWeightSum[] = substr($fieldWeight, strrpos($fieldWeight, '.') +
1);
288 // its a die situation
295 * Function to determine if a given query set contains inclusive or exclusive set of weights.
296 * The function assumes that the query set is already ordered by weight in desc order.
297 * @param $tableQueries
299 * @param array $exclWeightSum
303 public static function isQuerySetInclusive($tableQueries, $threshold, $exclWeightSum = []) {
305 foreach ($tableQueries as $key => $query) {
306 $input[] = substr($key, strrpos($key, '.') +
1);
309 if (!empty($exclWeightSum)) {
310 $input = array_merge($input, $exclWeightSum);
314 if (count($input) == 1) {
315 return [FALSE, $input[0] < $threshold];
318 $totalCombinations = 0;
319 for ($i = 0; $i < count($input); $i++
) {
320 $combination = [$input[$i]];
321 if (array_sum($combination) >= $threshold) {
322 $totalCombinations++
;
325 for ($j = $i +
1; $j < count($input); $j++
) {
326 $combination[] = $input[$j];
327 if (array_sum($combination) >= $threshold) {
328 $totalCombinations++
;
332 return [$totalCombinations == 1, $totalCombinations <= 0];
336 * sort queries by number of records for the table associated with them.
337 * @param $tableQueries
339 public static function orderByTableCount(&$tableQueries) {
340 static $tableCount = [];
343 foreach ($tableQueries as $key => $query) {
344 $table = explode(".", $key);
346 if (!array_key_exists($table, $tableCount)) {
347 $query = "SELECT COUNT(*) FROM {$table}";
348 $tableCount[$table] = CRM_Core_DAO
::singleValueQuery($query);
350 $tempArray[$key] = $tableCount[$table];
354 foreach ($tempArray as $key => $count) {
355 $tempArray[$key] = $tableQueries[$key];
357 $tableQueries = $tempArray;
361 * Return the SQL query for getting only the interesting results out of the dedupe table.
363 * @$checkPermission boolean $params a flag to indicate if permission should be considered.
364 * default is to always check permissioning but public pages for example might not want
365 * permission to be checked for anonymous users. Refer CRM-6211. We might be beaking
366 * Multi-Site dedupe for public pages.
368 * @param bool $checkPermission
372 public function thresholdQuery($checkPermission = TRUE) {
373 $this->_aclFrom
= '';
376 if ($this->params
&& !$this->noRules
) {
377 if ($checkPermission) {
378 list($this->_aclFrom
, $aclWhere) = CRM_Contact_BAO_Contact_Permission
::cacheClause('civicrm_contact');
379 $aclWhere = $aclWhere ?
"AND {$aclWhere}" : '';
381 $query = "SELECT {$this->temporaryTables['dedupe']}.id1 as id
382 FROM {$this->temporaryTables['dedupe']} JOIN civicrm_contact ON {$this->temporaryTables['dedupe']}.id1 = civicrm_contact.id {$this->_aclFrom}
383 WHERE contact_type = '{$this->contact_type}' AND is_deleted = 0 $aclWhere
384 AND weight >= {$this->threshold}";
388 if ($checkPermission) {
389 list($this->_aclFrom
, $aclWhere) = CRM_Contact_BAO_Contact_Permission
::cacheClause(['c1', 'c2']);
390 $aclWhere = $aclWhere ?
"AND {$aclWhere}" : '';
392 $query = "SELECT IF({$this->temporaryTables['dedupe']}.id1 < {$this->temporaryTables['dedupe']}.id2, {$this->temporaryTables['dedupe']}.id1, {$this->temporaryTables['dedupe']}.id2) as id1,
393 IF({$this->temporaryTables['dedupe']}.id1 < {$this->temporaryTables['dedupe']}.id2, {$this->temporaryTables['dedupe']}.id2, {$this->temporaryTables['dedupe']}.id1) as id2, {$this->temporaryTables['dedupe']}.weight
394 FROM {$this->temporaryTables['dedupe']} JOIN civicrm_contact c1 ON {$this->temporaryTables['dedupe']}.id1 = c1.id
395 JOIN civicrm_contact c2 ON {$this->temporaryTables['dedupe']}.id2 = c2.id {$this->_aclFrom}
396 LEFT JOIN civicrm_dedupe_exception exc ON {$this->temporaryTables['dedupe']}.id1 = exc.contact_id1 AND {$this->temporaryTables['dedupe']}.id2 = exc.contact_id2
397 WHERE c1.contact_type = '{$this->contact_type}' AND
398 c2.contact_type = '{$this->contact_type}'
399 AND c1.is_deleted = 0 AND c2.is_deleted = 0
401 AND weight >= {$this->threshold} AND exc.contact_id1 IS NULL";
404 CRM_Utils_Hook
::dupeQuery($this, 'threshold', $query);
409 * find fields related to a rule group.
411 * @param array $params
414 * (rule field => weight) array and threshold associated to rule group
416 public static function dedupeRuleFieldsWeight($params) {
417 $rgBao = new CRM_Dedupe_BAO_DedupeRuleGroup();
418 $rgBao->contact_type
= $params['contact_type'];
419 if (!empty($params['id'])) {
420 // accept an ID if provided
421 $rgBao->id
= $params['id'];
424 $rgBao->used
= $params['used'];
428 $ruleBao = new CRM_Dedupe_BAO_DedupeRule();
429 $ruleBao->dedupe_rule_group_id
= $rgBao->id
;
432 while ($ruleBao->fetch()) {
433 $field_name = $ruleBao->rule_field
;
434 if ($field_name == 'phone_numeric') {
435 $field_name = 'phone';
437 $ruleFields[$field_name] = $ruleBao->rule_weight
;
440 return [$ruleFields, $rgBao->threshold
];
444 * Get all of the combinations of fields that would work with a rule.
446 * @param array $rgFields
447 * @param int $threshold
448 * @param array $combos
449 * @param array $running
451 public static function combos($rgFields, $threshold, &$combos, $running = []) {
452 foreach ($rgFields as $rgField => $weight) {
453 unset($rgFields[$rgField]);
454 $diff = $threshold - $weight;
455 $runningnow = $running;
456 $runningnow[] = $rgField;
458 self
::combos($rgFields, $diff, $combos, $runningnow);
461 $combos[] = $runningnow;
467 * Get an array of rule group id to rule group name
468 * for all th groups for that contactType. If contactType
469 * not specified, do it for all
471 * @param string $contactType
472 * Individual, Household or Organization.
476 * id => "nice name" of rule group
478 public static function getByType($contactType = NULL) {
479 $dao = new CRM_Dedupe_DAO_DedupeRuleGroup();
482 $dao->contact_type
= $contactType;
487 while ($dao->fetch()) {
488 $title = !empty($dao->title
) ?
$dao->title
: (!empty($dao->name
) ?
$dao->name
: $dao->contact_type
);
490 $name = "$title - {$dao->used}";
491 $result[$dao->id
] = $name;
497 * Get the cached contact type for a particular rule group.
499 * @param int $rule_group_id
503 public static function getContactTypeForRuleGroup($rule_group_id) {
504 if (!isset(\Civi
::$statics[__CLASS__
]) ||
!isset(\Civi
::$statics[__CLASS__
]['rule_groups'])) {
505 \Civi
::$statics[__CLASS__
]['rule_groups'] = [];
507 if (empty(\Civi
::$statics[__CLASS__
]['rule_groups'][$rule_group_id])) {
508 \Civi
::$statics[__CLASS__
]['rule_groups'][$rule_group_id]['contact_type'] = CRM_Core_DAO
::getFieldValue(
509 'CRM_Dedupe_DAO_DedupeRuleGroup',
515 return \Civi
::$statics[__CLASS__
]['rule_groups'][$rule_group_id]['contact_type'];