3 +--------------------------------------------------------------------+
4 | CiviCRM version 4.7 |
5 +--------------------------------------------------------------------+
6 | Copyright CiviCRM LLC (c) 2004-2015 |
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
31 * @copyright CiviCRM LLC (c) 2004-2015
37 * The CiviCRM duplicate discovery engine is based on an
38 * algorithm designed by David Strauss <david@fourkitchens.com>.
40 class CRM_Dedupe_BAO_RuleGroup
extends CRM_Dedupe_DAO_RuleGroup
{
43 * Ids of the contacts to limit the SQL queries (whole-database queries otherwise)
45 var $contactIds = array();
48 * Params to dedupe against (queries against the whole contact set otherwise)
50 var $params = array();
53 * If there are no rules in rule group.
58 * Return a structure holding the supported tables, fields and their titles
60 * @param string $requestedType
61 * The requested contact type.
64 * a table-keyed array of field-keyed arrays holding supported fields' titles
66 public static function &supportedFields($requestedType) {
67 static $fields = NULL;
69 // this is needed, as we're piggy-backing importableFields() below
70 $replacements = array(
71 'civicrm_country.name' => 'civicrm_address.country_id',
72 'civicrm_county.name' => 'civicrm_address.county_id',
73 'civicrm_state_province.name' => 'civicrm_address.state_province_id',
74 'gender.label' => 'civicrm_contact.gender_id',
75 'individual_prefix.label' => 'civicrm_contact.prefix_id',
76 'individual_suffix.label' => 'civicrm_contact.suffix_id',
77 'addressee.label' => 'civicrm_contact.addressee_id',
78 'email_greeting.label' => 'civicrm_contact.email_greeting_id',
79 'postal_greeting.label' => 'civicrm_contact.postal_greeting_id',
80 'civicrm_phone.phone' => 'civicrm_phone.phone_numeric',
82 // the table names we support in dedupe rules - a filter for importableFields()
83 $supportedTables = array(
93 foreach (array('Individual', 'Organization', 'Household') as $ctype) {
94 // take the table.field pairs and their titles from importableFields() if the table is supported
95 foreach (CRM_Contact_BAO_Contact
::importableFields($ctype) as $iField) {
96 if (isset($iField['where'])) {
97 $where = $iField['where'];
98 if (isset($replacements[$where])) {
99 $where = $replacements[$where];
101 list($table, $field) = explode('.', $where);
102 if (!in_array($table, $supportedTables)) {
105 $fields[$ctype][$table][$field] = $iField['title'];
108 // add custom data fields
109 foreach (CRM_Core_BAO_CustomGroup
::getTree($ctype, CRM_Core_DAO
::$_nullObject, NULL, -1) as $key => $cg) {
113 foreach ($cg['fields'] as $cf) {
114 $fields[$ctype][$cg['table_name']][$cf['column_name']] = $cf['label'];
119 CRM_Utils_Hook
::dupeQuery(CRM_Core_DAO
::$_nullObject, 'supportedFields', $fields);
120 return $fields[$requestedType];
124 * Return the SQL query for dropping the temporary table.
126 public function tableDropQuery() {
127 return 'DROP TEMPORARY TABLE IF EXISTS dedupe';
131 * Return a set of SQL queries whose cummulative weights will mark matched
132 * records for the RuleGroup::threasholdQuery() to retrieve.
134 public function tableQuery() {
135 // make sure we've got a fetched dbrecord, not sure if this is enforced
136 if (!$this->name
== NULL ||
$this->is_reserved
== NULL) {
140 // Reserved Rule Groups can optionally get special treatment by
141 // implementing an optimization class and returning a query array.
142 if ($this->is_reserved
&&
143 CRM_Utils_File
::isIncludable("CRM/Dedupe/BAO/QueryBuilder/{$this->name}.php")
145 include_once "CRM/Dedupe/BAO/QueryBuilder/{$this->name}.php";
146 $class = "CRM_Dedupe_BAO_QueryBuilder_{$this->name}";
147 $command = empty($this->params
) ?
'internal' : 'record';
148 $queries = call_user_func(array($class, $command), $this);
151 // All other rule groups have queries generated by the member dedupe
152 // rules defined in the administrative interface.
154 // Find all rules contained by this script sorted by weight so that
155 // their execution can be short circuited on RuleGroup::fillTable()
156 $bao = new CRM_Dedupe_BAO_Rule();
157 $bao->dedupe_rule_group_id
= $this->id
;
158 $bao->orderBy('rule_weight DESC');
161 // Generate a SQL query for each rule in the rule group that is
162 // tailored to respect the param and contactId options provided.
164 while ($bao->fetch()) {
165 $bao->contactIds
= $this->contactIds
;
166 $bao->params
= $this->params
;
168 // Skipping empty rules? Empty rules shouldn't exist; why check?
169 if ($query = $bao->sql()) {
170 $queries["{$bao->rule_table}.{$bao->rule_field}.{$bao->rule_weight}"] = $query;
175 // if there are no rules in this rule group
176 // add an empty query fulfilling the pattern
178 $queries = array('SELECT 0 id1, 0 id2, 0 weight LIMIT 0');
179 $this->noRules
= TRUE;
185 public function fillTable() {
186 // get the list of queries handy
187 $tableQueries = $this->tableQuery();
189 if ($this->params
&& !$this->noRules
) {
190 $tempTableQuery = "CREATE TEMPORARY TABLE dedupe (id1 int, weight int, UNIQUE UI_id1 (id1)) ENGINE=MyISAM";
191 $insertClause = "INSERT INTO dedupe (id1, weight)";
192 $groupByClause = "GROUP BY id1";
193 $dupeCopyJoin = " JOIN dedupe_copy ON dedupe_copy.id1 = t1.column WHERE ";
196 $tempTableQuery = "CREATE TEMPORARY TABLE dedupe (id1 int, id2 int, weight int, UNIQUE UI_id1_id2 (id1, id2)) ENGINE=MyISAM";
197 $insertClause = "INSERT INTO dedupe (id1, id2, weight)";
198 $groupByClause = "GROUP BY id1, id2";
199 $dupeCopyJoin = " JOIN dedupe_copy ON dedupe_copy.id1 = t1.column AND dedupe_copy.id2 = t2.column WHERE ";
201 $patternColumn = '/t1.(\w+)/';
202 $exclWeightSum = array();
205 $dao = new CRM_Core_DAO();
206 $dao->query($tempTableQuery);
208 CRM_Utils_Hook
::dupeQuery($this, 'table', $tableQueries);
210 while (!empty($tableQueries)) {
211 list($isInclusive, $isDie) = self
::isQuerySetInclusive($tableQueries, $this->threshold
, $exclWeightSum);
214 // order queries by table count
215 self
::orderByTableCount($tableQueries);
217 $weightSum = array_sum($exclWeightSum);
218 $searchWithinDupes = !empty($exclWeightSum) ?
1 : 0;
220 while (!empty($tableQueries)) {
221 // extract the next query ( and weight ) to be executed
222 $fieldWeight = array_keys($tableQueries);
223 $fieldWeight = $fieldWeight[0];
224 $query = array_shift($tableQueries);
226 if ($searchWithinDupes) {
227 // get prepared to search within already found dupes if $searchWithinDupes flag is set
228 $dao->query("DROP TEMPORARY TABLE IF EXISTS dedupe_copy");
229 $dao->query("CREATE TEMPORARY TABLE dedupe_copy SELECT * FROM dedupe WHERE weight >= {$weightSum}");
232 preg_match($patternColumn, $query, $matches);
233 $query = str_replace(' WHERE ', str_replace('column', $matches[1], $dupeCopyJoin), $query);
235 $searchWithinDupes = 1;
237 // construct and execute the intermediate query
238 $query = "{$insertClause} {$query} {$groupByClause} ON DUPLICATE KEY UPDATE weight = weight + VALUES(weight)";
241 // FIXME: we need to be more acurate with affected rows, especially for insert vs duplicate insert.
242 // And that will help optimize further.
243 $affectedRows = $dao->affectedRows();
246 // In an inclusive situation, failure of any query means no further processing -
247 if ($affectedRows == 0) {
248 // reset to make sure no further execution is done.
249 $tableQueries = array();
252 $weightSum = substr($fieldWeight, strrpos($fieldWeight, '.') +
1) +
$weightSum;
254 // An exclusive situation -
257 // since queries are already sorted by weights, we can continue as is
258 $fieldWeight = array_keys($tableQueries);
259 $fieldWeight = $fieldWeight[0];
260 $query = array_shift($tableQueries);
261 $query = "{$insertClause} {$query} {$groupByClause} ON DUPLICATE KEY UPDATE weight = weight + VALUES(weight)";
263 if ($dao->affectedRows() >= 1) {
264 $exclWeightSum[] = substr($fieldWeight, strrpos($fieldWeight, '.') +
1);
269 // its a die situation
276 * Function to determine if a given query set contains inclusive or exclusive set of weights.
277 * The function assumes that the query set is already ordered by weight in desc order.
278 * @param $tableQueries
280 * @param array $exclWeightSum
284 public static function isQuerySetInclusive($tableQueries, $threshold, $exclWeightSum = array()) {
286 foreach ($tableQueries as $key => $query) {
287 $input[] = substr($key, strrpos($key, '.') +
1);
290 if (!empty($exclWeightSum)) {
291 $input = array_merge($input, $exclWeightSum);
295 if (count($input) == 1) {
296 return array(FALSE, $input[0] < $threshold);
299 $totalCombinations = 0;
300 for ($i = 0; $i < count($input); $i++
) {
301 $combination = array($input[$i]);
302 if (array_sum($combination) >= $threshold) {
303 $totalCombinations++
;
306 for ($j = $i +
1; $j < count($input); $j++
) {
307 $combination[] = $input[$j];
308 if (array_sum($combination) >= $threshold) {
309 $totalCombinations++
;
313 return array($totalCombinations == 1, $totalCombinations <= 0);
317 * sort queries by number of records for the table associated with them.
318 * @param $tableQueries
320 public static function orderByTableCount(&$tableQueries) {
321 static $tableCount = array();
323 $tempArray = array();
324 foreach ($tableQueries as $key => $query) {
325 $table = explode(".", $key);
327 if (!array_key_exists($table, $tableCount)) {
328 $query = "SELECT COUNT(*) FROM {$table}";
329 $tableCount[$table] = CRM_Core_DAO
::singleValueQuery($query);
331 $tempArray[$key] = $tableCount[$table];
335 foreach ($tempArray as $key => $count) {
336 $tempArray[$key] = $tableQueries[$key];
338 $tableQueries = $tempArray;
342 * Return the SQL query for getting only the interesting results out of the dedupe table.
344 * @$checkPermission boolean $params a flag to indicate if permission should be considered.
345 * default is to always check permissioning but public pages for example might not want
346 * permission to be checked for anonymous users. Refer CRM-6211. We might be beaking
347 * Multi-Site dedupe for public pages.
349 * @param bool $checkPermission
353 public function thresholdQuery($checkPermission = TRUE) {
354 $this->_aclFrom
= '';
355 // CRM-6603: anonymous dupechecks side-step ACLs
356 $this->_aclWhere
= ' AND is_deleted = 0 ';
358 if ($this->params
&& !$this->noRules
) {
359 if ($checkPermission) {
360 list($this->_aclFrom
, $this->_aclWhere
) = CRM_Contact_BAO_Contact_Permission
::cacheClause('civicrm_contact');
361 $this->_aclWhere
= $this->_aclWhere ?
"AND {$this->_aclWhere}" : '';
363 $query = "SELECT dedupe.id1 as id
364 FROM dedupe JOIN civicrm_contact ON dedupe.id1 = civicrm_contact.id {$this->_aclFrom}
365 WHERE contact_type = '{$this->contact_type}' {$this->_aclWhere}
366 AND weight >= {$this->threshold}";
369 $this->_aclWhere
= ' AND c1.is_deleted = 0 AND c2.is_deleted = 0';
370 if ($checkPermission) {
371 list($this->_aclFrom
, $this->_aclWhere
) = CRM_Contact_BAO_Contact_Permission
::cacheClause(array('c1', 'c2'));
372 $this->_aclWhere
= $this->_aclWhere ?
"AND {$this->_aclWhere}" : '';
374 $query = "SELECT dedupe.id1, dedupe.id2, dedupe.weight
375 FROM dedupe JOIN civicrm_contact c1 ON dedupe.id1 = c1.id
376 JOIN civicrm_contact c2 ON dedupe.id2 = c2.id {$this->_aclFrom}
377 LEFT JOIN civicrm_dedupe_exception exc ON dedupe.id1 = exc.contact_id1 AND dedupe.id2 = exc.contact_id2
378 WHERE c1.contact_type = '{$this->contact_type}' AND
379 c2.contact_type = '{$this->contact_type}' {$this->_aclWhere}
380 AND weight >= {$this->threshold} AND exc.contact_id1 IS NULL";
383 CRM_Utils_Hook
::dupeQuery($this, 'threshold', $query);
388 * find fields related to a rule group.
390 * @param array $params
393 * (rule field => weight) array and threshold associated to rule group
395 public static function dedupeRuleFieldsWeight($params) {
396 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
397 $rgBao->contact_type
= $params['contact_type'];
398 if (!empty($params['id'])) {
399 // accept an ID if provided
400 $rgBao->id
= $params['id'];
403 $rgBao->used
= $params['used'];
407 $ruleBao = new CRM_Dedupe_BAO_Rule();
408 $ruleBao->dedupe_rule_group_id
= $rgBao->id
;
410 $ruleFields = array();
411 while ($ruleBao->fetch()) {
412 $ruleFields[$ruleBao->rule_field
] = $ruleBao->rule_weight
;
415 return array($ruleFields, $rgBao->threshold
);
419 * Get all of the combinations of fields that would work with a rule.
421 * @param array $rgFields
422 * @param int $threshold
423 * @param array $combos
424 * @param array $running
426 public static function combos($rgFields, $threshold, &$combos, $running = array()) {
427 foreach ($rgFields as $rgField => $weight) {
428 unset($rgFields[$rgField]);
429 $diff = $threshold - $weight;
430 $runningnow = $running;
431 $runningnow[] = $rgField;
433 self
::combos($rgFields, $diff, $combos, $runningnow);
436 $combos[] = $runningnow;
442 * Get an array of rule group id to rule group name
443 * for all th groups for that contactType. If contactType
444 * not specified, do it for all
446 * @param string $contactType
447 * Individual, Household or Organization.
451 * id => "nice name" of rule group
453 public static function getByType($contactType = NULL) {
454 $dao = new CRM_Dedupe_DAO_RuleGroup();
457 $dao->contact_type
= $contactType;
462 while ($dao->fetch()) {
463 $title = !empty($dao->title
) ?
$dao->title
: (!empty($dao->name
) ?
$dao->name
: $dao->contact_type
);
465 $name = "$title - {$dao->used}";
466 $result[$dao->id
] = $name;