fix notices and strict warnings from PHP 5.4 in running webtests
[civicrm-core.git] / CRM / Dedupe / BAO / RuleGroup.php
CommitLineData
6a488035
TO
1<?php
2/*
3 +--------------------------------------------------------------------+
4 | CiviCRM version 4.3 |
5 +--------------------------------------------------------------------+
6 | Copyright CiviCRM LLC (c) 2004-2013 |
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
26*/
27
28/**
29 *
30 * @package CRM
31 * @copyright CiviCRM LLC (c) 2004-2013
32 * $Id$
33 *
34 */
35
36/**
37 * The CiviCRM duplicate discovery engine is based on an
38 * algorithm designed by David Strauss <david@fourkitchens.com>.
39 */
40class CRM_Dedupe_BAO_RuleGroup extends CRM_Dedupe_DAO_RuleGroup {
41
42 /**
43 * ids of the contacts to limit the SQL queries (whole-database queries otherwise)
44 */
45 var $contactIds = array();
46
47 /**
48 * params to dedupe against (queries against the whole contact set otherwise)
49 */
50 var $params = array();
51
52 /**
53 * if there are no rules in rule group
54 */
55 var $noRules = FALSE;
56
57 /**
58 * Return a structure holding the supported tables, fields and their titles
59 *
60 * @param string $requestedType the requested contact type
61 *
62 * @return array a table-keyed array of field-keyed arrays holding supported fields' titles
63 */
64 static function &supportedFields($requestedType) {
65 static $fields = NULL;
66 if (!$fields) {
67 // this is needed, as we're piggy-backing importableFields() below
68 $replacements = array(
69 'civicrm_country.name' => 'civicrm_address.country_id',
70 'civicrm_county.name' => 'civicrm_address.county_id',
71 'civicrm_state_province.name' => 'civicrm_address.state_province_id',
72 'gender.label' => 'civicrm_contact.gender_id',
73 'individual_prefix.label' => 'civicrm_contact.prefix_id',
74 'individual_suffix.label' => 'civicrm_contact.suffix_id',
75 'addressee.label' => 'civicrm_contact.addressee_id',
76 'email_greeting.label' => 'civicrm_contact.email_greeting_id',
77 'postal_greeting.label' => 'civicrm_contact.postal_greeting_id',
78 );
79 // the table names we support in dedupe rules - a filter for importableFields()
80 $supportedTables = array(
81 'civicrm_address', 'civicrm_contact', 'civicrm_email',
82 'civicrm_im', 'civicrm_note', 'civicrm_openid', 'civicrm_phone',
83 );
84
85 foreach (array(
86 'Individual', 'Organization', 'Household') as $ctype) {
87 // take the table.field pairs and their titles from importableFields() if the table is supported
88 foreach (CRM_Contact_BAO_Contact::importableFields($ctype) as $iField) {
89 if (isset($iField['where'])) {
90 $where = $iField['where'];
91 if (isset($replacements[$where])) {
92 $where = $replacements[$where];
93 }
94 list($table, $field) = explode('.', $where);
95 if (!in_array($table, $supportedTables)) {
96 continue;
97 }
98 $fields[$ctype][$table][$field] = $iField['title'];
99 }
100 }
101 // add custom data fields
102 foreach (CRM_Core_BAO_CustomGroup::getTree($ctype, CRM_Core_DAO::$_nullObject, NULL, -1) as $key => $cg) {
103 if (!is_int($key)) {
104 continue;
105 }
106 foreach ($cg['fields'] as $cf) {
107 $fields[$ctype][$cg['table_name']][$cf['column_name']] = $cf['label'];
108 }
109 }
110 }
111 }
112 return $fields[$requestedType];
113 }
114
115 /**
116 * Return the SQL query for dropping the temporary table.
117 */
118 function tableDropQuery() {
119 return 'DROP TEMPORARY TABLE IF EXISTS dedupe';
120 }
121
122 /**
123 * Return a set of SQL queries whose cummulative weights will mark matched
124 * records for the RuleGroup::threasholdQuery() to retrieve.
125 */
126 function tableQuery() {
127 // make sure we've got a fetched dbrecord, not sure if this is enforced
128 if (!$this->name == NULL || $this->is_reserved == NULL) {
129 $this->find(TRUE);
130 }
131
132 // Reserved Rule Groups can optionally get special treatment by
133 // implementing an optimization class and returning a query array.
134 if ($this->is_reserved &&
135 CRM_Utils_File::isIncludable("CRM/Dedupe/BAO/QueryBuilder/{$this->name}.php")
136 ) {
137 include_once "CRM/Dedupe/BAO/QueryBuilder/{$this->name}.php";
138 $class = "CRM_Dedupe_BAO_QueryBuilder_{$this->name}";
139 $command = empty($this->params) ? 'internal' : 'record';
140 $queries = call_user_func(array($class, $command), $this);
141 }
142 else {
143 // All other rule groups have queries generated by the member dedupe
144 // rules defined in the administrative interface.
145
146 // Find all rules contained by this script sorted by weight so that
147 // their execution can be short circuited on RuleGroup::fillTable()
148 $bao = new CRM_Dedupe_BAO_Rule();
149 $bao->dedupe_rule_group_id = $this->id;
150 $bao->orderBy('rule_weight DESC');
151 $bao->find();
152
153 // Generate a SQL query for each rule in the rule group that is
154 // tailored to respect the param and contactId options provided.
155 $queries = array();
156 while ($bao->fetch()) {
157 $bao->contactIds = $this->contactIds;
158 $bao->params = $this->params;
159
160 // Skipping empty rules? Empty rules shouldn't exist; why check?
161 if ($query = $bao->sql()) {
162 $queries["{$bao->rule_table}.{$bao->rule_field}.{$bao->rule_weight}"] = $query;
163 }
164 }
165 }
166
167 // if there are no rules in this rule group
168 // add an empty query fulfilling the pattern
169 if (!$queries) {
170 $queries = array('SELECT 0 id1, 0 id2, 0 weight LIMIT 0');
171 $this->noRules = TRUE;
172 }
173
174 return $queries;
175 }
176
177 function fillTable() {
178 // get the list of queries handy
179 $tableQueries = $this->tableQuery();
180
181 if ($this->params && !$this->noRules) {
182 $tempTableQuery = "CREATE TEMPORARY TABLE dedupe (id1 int, weight int, UNIQUE UI_id1 (id1)) ENGINE=MyISAM";
183 $insertClause = "INSERT INTO dedupe (id1, weight)";
184 $groupByClause = "GROUP BY id1";
185 $dupeCopyJoin = " JOIN dedupe_copy ON dedupe_copy.id1 = t1.column WHERE ";
186 }
187 else {
188 $tempTableQuery = "CREATE TEMPORARY TABLE dedupe (id1 int, id2 int, weight int, UNIQUE UI_id1_id2 (id1, id2)) ENGINE=MyISAM";
189 $insertClause = "INSERT INTO dedupe (id1, id2, weight)";
190 $groupByClause = "GROUP BY id1, id2";
191 $dupeCopyJoin = " JOIN dedupe_copy ON dedupe_copy.id1 = t1.column AND dedupe_copy.id2 = t2.column WHERE ";
192 }
193 $patternColumn = '/t1.(\w+)/';
194 $exclWeightSum = array();
195
196 // create temp table
197 $dao = new CRM_Core_DAO();
198 $dao->query($tempTableQuery);
199
200
201 CRM_Utils_Hook::dupeQuery($this, 'table', $tableQueries);
202
203 while (!empty($tableQueries)) {
204 list($isInclusive, $isDie) = self::isQuerySetInclusive($tableQueries, $this->threshold, $exclWeightSum);
205
206 if ($isInclusive) {
207 // order queries by table count
208 self::orderByTableCount($tableQueries);
209
210 $weightSum = array_sum($exclWeightSum);
211 $searchWithinDupes = !empty($exclWeightSum) ? 1 : 0;
212
213 while (!empty($tableQueries)) {
214 // extract the next query ( and weight ) to be executed
215 $fieldWeight = array_keys($tableQueries);
216 $fieldWeight = $fieldWeight[0];
217 $query = array_shift($tableQueries);
218
219 if ($searchWithinDupes) {
220 // get prepared to search within already found dupes if $searchWithinDupes flag is set
221 $dao->query("DROP TEMPORARY TABLE IF EXISTS dedupe_copy");
222 $dao->query("CREATE TEMPORARY TABLE dedupe_copy SELECT * FROM dedupe WHERE weight >= {$weightSum}");
223 $dao->free();
224
225 preg_match($patternColumn, $query, $matches);
226 $query = str_replace(' WHERE ', str_replace('column', $matches[1], $dupeCopyJoin), $query);
227 }
228 $searchWithinDupes = 1;
229
230 // construct and execute the intermediate query
231 $query = "{$insertClause} {$query} {$groupByClause} ON DUPLICATE KEY UPDATE weight = weight + VALUES(weight)";
232 $dao->query($query);
233
234 // FIXME: we need to be more acurate with affected rows, especially for insert vs duplicate insert.
235 // And that will help optimize further.
236 $affectedRows = $dao->affectedRows();
237 $dao->free();
238
239 // In an inclusive situation, failure of any query means no further processing -
240 if ($affectedRows == 0) {
241 // reset to make sure no further execution is done.
242 $tableQueries = array();
243 break;
244 }
245 $weightSum = substr($fieldWeight, strrpos($fieldWeight, '.') + 1) + $weightSum;
246 }
247 // An exclusive situation -
248 }
249 elseif (!$isDie) {
250 // since queries are already sorted by weights, we can continue as is
251 $fieldWeight = array_keys($tableQueries);
252 $fieldWeight = $fieldWeight[0];
253 $query = array_shift($tableQueries);
254 $query = "{$insertClause} {$query} {$groupByClause} ON DUPLICATE KEY UPDATE weight = weight + VALUES(weight)";
255 $dao->query($query);
256 if ($dao->affectedRows() >= 1) {
257 $exclWeightSum[] = substr($fieldWeight, strrpos($fieldWeight, '.') + 1);
258 }
259 $dao->free();
260 }
261 else {
262 // its a die situation
263 break;
264 }
265 }
266 }
267
268 // Function to determine if a given query set contains inclusive or exclusive set of weights.
269 // The function assumes that the query set is already ordered by weight in desc order.
270 static function isQuerySetInclusive($tableQueries, $threshold, $exclWeightSum = array(
271 )) {
272 $input = array();
273 foreach ($tableQueries as $key => $query) {
274 $input[] = substr($key, strrpos($key, '.') + 1);
275 }
276
277 if (!empty($exclWeightSum)) {
278 $input = array_merge($input, $exclWeightSum);
279 rsort($input);
280 }
281
282 if (count($input) == 1) {
283 return array(FALSE, $input[0] < $threshold);
284 }
285
286 $totalCombinations = 0;
287 for ($i = 0; $i < count($input); $i++) {
288 $combination = array($input[$i]);
289 if (array_sum($combination) >= $threshold) {
290 $totalCombinations++;
291 continue;
292 }
293 for ($j = $i + 1; $j < count($input); $j++) {
294 $combination[] = $input[$j];
295 if (array_sum($combination) >= $threshold) {
296 $totalCombinations++;
297 }
298 }
299 }
300 return array($totalCombinations == 1, $totalCombinations <= 0);
301 }
302
303 // sort queries by number of records for the table associated with them
304 static function orderByTableCount(&$tableQueries) {
305 static $tableCount = array();
306
307 $tempArray = array();
308 foreach ($tableQueries as $key => $query) {
309 $table = explode(".", $key);
310 $table = $table[0];
311 if (!array_key_exists($table, $tableCount)) {
312 $query = "SELECT COUNT(*) FROM {$table}";
313 $tableCount[$table] = CRM_Core_DAO::singleValueQuery($query);
314 }
315 $tempArray[$key] = $tableCount[$table];
316 }
317
318 asort($tempArray);
319 foreach ($tempArray as $key => $count) {
320 $tempArray[$key] = $tableQueries[$key];
321 }
322 $tableQueries = $tempArray;
323 }
324
325 /**
326 * Return the SQL query for getting only the interesting results out of the dedupe table.
327 *
328 * @$checkPermission boolean $params a flag to indicate if permission should be considered.
329 * default is to always check permissioning but public pages for example might not want
330 * permission to be checked for anonymous users. Refer CRM-6211. We might be beaking
331 * Multi-Site dedupe for public pages.
332 *
333 */
334 function thresholdQuery($checkPermission = TRUE) {
335 $this->_aclFrom = '';
336 // CRM-6603: anonymous dupechecks side-step ACLs
337 $this->_aclWhere = ' AND is_deleted = 0 ';
338
339 if ($this->params && !$this->noRules) {
340 if ($checkPermission) {
341 list($this->_aclFrom, $this->_aclWhere) = CRM_Contact_BAO_Contact_Permission::cacheClause('civicrm_contact');
342 $this->_aclWhere = $this->_aclWhere ? "AND {$this->_aclWhere}" : '';
343 }
344 $query = "SELECT dedupe.id1 as id
345 FROM dedupe JOIN civicrm_contact ON dedupe.id1 = civicrm_contact.id {$this->_aclFrom}
346 WHERE contact_type = '{$this->contact_type}' {$this->_aclWhere}
347 AND weight >= {$this->threshold}";
348 }
349 else {
350 $this->_aclWhere = ' AND c1.is_deleted = 0 AND c2.is_deleted = 0';
351 if ($checkPermission) {
352 list($this->_aclFrom, $this->_aclWhere) = CRM_Contact_BAO_Contact_Permission::cacheClause(array('c1', 'c2'));
353 $this->_aclWhere = $this->_aclWhere ? "AND {$this->_aclWhere}" : '';
354 }
355 $query = "SELECT dedupe.id1, dedupe.id2, dedupe.weight
d4c8a770 356 FROM dedupe JOIN civicrm_contact c1 ON dedupe.id1 = c1.id
6a488035
TO
357 JOIN civicrm_contact c2 ON dedupe.id2 = c2.id {$this->_aclFrom}
358 LEFT JOIN civicrm_dedupe_exception exc ON dedupe.id1 = exc.contact_id1 AND dedupe.id2 = exc.contact_id2
d4c8a770 359 WHERE c1.contact_type = '{$this->contact_type}' AND
6a488035
TO
360 c2.contact_type = '{$this->contact_type}' {$this->_aclWhere}
361 AND weight >= {$this->threshold} AND exc.contact_id1 IS NULL";
362 }
363
364 CRM_Utils_Hook::dupeQuery($this, 'threshold', $query);
365 return $query;
366 }
367
368 /**
369 * To find fields related to a rule group.
370 *
371 * @param array contains the rule group property to identify rule group
372 *
373 * @return (rule field => weight) array and threshold associated to rule group
374 * @access public
375 */
d4c8a770 376 static function dedupeRuleFieldsWeight($params) {
6a488035
TO
377 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
378 $rgBao->used = $params['used'];
379 $rgBao->contact_type = $params['contact_type'];
380 $rgBao->find(TRUE);
381
382 $ruleBao = new CRM_Dedupe_BAO_Rule();
383 $ruleBao->dedupe_rule_group_id = $rgBao->id;
384 $ruleBao->find();
385 $ruleFields = array();
386 while ($ruleBao->fetch()) {
387 $ruleFields[$ruleBao->rule_field] = $ruleBao->rule_weight;
388 }
389
390 return array($ruleFields, $rgBao->threshold);
391 }
392
393 /**
394 * Get an array of rule group id to rule group name
395 * for all th groups for that contactType. If contactType
396 * not specified, do it for all
397 *
398 * @param string $contactType Individual, Household or Organization
399 *
400 * @static
401 *
402 * @return array id => "nice name" of rule group
403 */
404 static function getByType($contactType = NULL) {
405 $dao = new CRM_Dedupe_DAO_RuleGroup();
406
407 if ($contactType) {
408 $dao->contact_type = $contactType;
409 }
410
411 $dao->find();
412 $result = array();
413 while ($dao->fetch()) {
414 if (!empty($dao->name)) {
415 $name = "{$dao->name} - {$dao->used}";
416 }
417 else {
418 $name = "{$dao->contact_type} - {$dao->used}";
419 }
420 $result[$dao->id] = $name;
421 }
422 return $result;
423 }
424}
425