Commit | Line | Data |
---|---|---|
6a488035 TO |
1 | <?php |
2 | /* | |
3 | +--------------------------------------------------------------------+ | |
fee14197 | 4 | | CiviCRM version 5 | |
6a488035 | 5 | +--------------------------------------------------------------------+ |
6b83d5bd | 6 | | Copyright CiviCRM LLC (c) 2004-2019 | |
6a488035 TO |
7 | +--------------------------------------------------------------------+ |
8 | | This file is a part of CiviCRM. | | |
9 | | | | |
10 | | CiviCRM is free software; you can copy, modify, and distribute it | | |
11 | | under the terms of the GNU Affero General Public License | | |
12 | | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. | | |
13 | | | | |
14 | | CiviCRM is distributed in the hope that it will be useful, but | | |
15 | | WITHOUT ANY WARRANTY; without even the implied warranty of | | |
16 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | | |
17 | | See the GNU Affero General Public License for more details. | | |
18 | | | | |
19 | | You should have received a copy of the GNU Affero General Public | | |
20 | | License and the CiviCRM Licensing Exception along | | |
21 | | with this program; if not, contact CiviCRM LLC | | |
22 | | at info[AT]civicrm[DOT]org. If you have questions about the | | |
23 | | GNU Affero General Public License or the licensing of CiviCRM, | | |
24 | | see the CiviCRM license FAQ at http://civicrm.org/licensing | | |
25 | +--------------------------------------------------------------------+ | |
d25dd0ee | 26 | */ |
6a488035 TO |
27 | |
28 | /** | |
29 | * | |
30 | * @package CRM | |
6b83d5bd | 31 | * @copyright CiviCRM LLC (c) 2004-2019 |
6a488035 TO |
32 | * $Id$ |
33 | * | |
34 | */ | |
35 | ||
36 | /** | |
37 | * The CiviCRM duplicate discovery engine is based on an | |
38 | * algorithm designed by David Strauss <david@fourkitchens.com>. | |
39 | */ | |
40 | class CRM_Dedupe_Finder { | |
41 | ||
42 | /** | |
43 | * Return a contact_id-keyed array of arrays of possible dupes | |
44 | * (of the key contact_id) - limited to dupes of $cids if provided. | |
45 | * | |
98997235 TO |
46 | * @param int $rgid |
47 | * Rule group id. | |
48 | * @param array $cids | |
49 | * Contact ids to limit the search to. | |
6a488035 | 50 | * |
3058f4d9 | 51 | * @param bool $checkPermissions |
52 | * Respect logged in user permissions. | |
53 | * | |
21a95d83 | 54 | * @param int $searchLimit |
55 | * Limit for the number of contacts to be used for comparison. | |
56 | * The search methodology finds all matches for the searchedContacts so this limits | |
57 | * the number of searched contacts, not the matches found. | |
6c866f0c | 58 | * |
a6c01b45 | 59 | * @return array |
3058f4d9 | 60 | * Array of (cid1, cid2, weight) dupe triples |
6c866f0c | 61 | * |
62 | * @throws CiviCRM_API3_Exception | |
63 | * @throws Exception | |
6a488035 | 64 | */ |
21a95d83 | 65 | public static function dupes($rgid, $cids = array(), $checkPermissions = TRUE, $searchLimit = 0) { |
353ffa53 TO |
66 | $rgBao = new CRM_Dedupe_BAO_RuleGroup(); |
67 | $rgBao->id = $rgid; | |
6a488035 TO |
68 | $rgBao->contactIds = $cids; |
69 | if (!$rgBao->find(TRUE)) { | |
16254ae1 | 70 | CRM_Core_Error::fatal("Dedupe rule not found for selected contacts"); |
6a488035 | 71 | } |
21a95d83 | 72 | if (empty($rgBao->contactIds) && !empty($searchLimit)) { |
4c8b4719 | 73 | $limitedContacts = civicrm_api3('Contact', 'get', array( |
74 | 'return' => 'id', | |
75 | 'contact_type' => $rgBao->contact_type, | |
21a95d83 | 76 | 'options' => array('limit' => $searchLimit), |
4c8b4719 | 77 | )); |
78 | $rgBao->contactIds = array_keys($limitedContacts['values']); | |
79 | } | |
6a488035 TO |
80 | |
81 | $rgBao->fillTable(); | |
82 | $dao = new CRM_Core_DAO(); | |
3058f4d9 | 83 | $dao->query($rgBao->thresholdQuery($checkPermissions)); |
6a488035 TO |
84 | $dupes = array(); |
85 | while ($dao->fetch()) { | |
86 | $dupes[] = array($dao->id1, $dao->id2, $dao->weight); | |
87 | } | |
88 | $dao->query($rgBao->tableDropQuery()); | |
89 | ||
90 | return $dupes; | |
91 | } | |
92 | ||
93 | /** | |
94 | * Return an array of possible dupes, based on the provided array of | |
95 | * params, using the default rule group for the given contact type and | |
96 | * usage. | |
97 | * | |
98 | * check_permission is a boolean flag to indicate if permission should be considered. | |
99 | * default is to always check permissioning but public pages for example might not want | |
6acc9d56 | 100 | * permission to be checked for anonymous users. Refer CRM-6211. We might be breaking |
6a488035 TO |
101 | * Multi-Site dedupe for public pages. |
102 | * | |
98997235 TO |
103 | * @param array $params |
104 | * Array of params of the form $params[$table][$field] == $value. | |
105 | * @param string $ctype | |
106 | * Contact type to match against. | |
107 | * @param string $used | |
108 | * Dedupe rule group usage ('Unsupervised' or 'Supervised' or 'General'). | |
109 | * @param array $except | |
110 | * Array of contacts that shouldn't be considered dupes. | |
111 | * @param int $ruleGroupID | |
112 | * The id of the dedupe rule we should be using. | |
6a488035 | 113 | * |
a6c01b45 CW |
114 | * @return array |
115 | * matching contact ids | |
6a488035 | 116 | */ |
389bcebf | 117 | public static function dupesByParams( |
57b29d67 | 118 | $params, |
6a488035 | 119 | $ctype, |
d58a19a1 TO |
120 | $used = 'Unsupervised', |
121 | $except = array(), | |
6a488035 TO |
122 | $ruleGroupID = NULL |
123 | ) { | |
124 | // If $params is empty there is zero reason to proceed. | |
125 | if (!$params) { | |
126 | return array(); | |
127 | } | |
4f33e78b AS |
128 | // This may no longer be required - see https://github.com/civicrm/civicrm-core/pull/13176 |
129 | $params = array_filter($params); | |
6a488035 TO |
130 | |
131 | $foundByID = FALSE; | |
132 | if ($ruleGroupID) { | |
353ffa53 TO |
133 | $rgBao = new CRM_Dedupe_BAO_RuleGroup(); |
134 | $rgBao->id = $ruleGroupID; | |
6a488035 TO |
135 | $rgBao->contact_type = $ctype; |
136 | if ($rgBao->find(TRUE)) { | |
137 | $foundByID = TRUE; | |
138 | } | |
139 | } | |
140 | ||
141 | if (!$foundByID) { | |
353ffa53 | 142 | $rgBao = new CRM_Dedupe_BAO_RuleGroup(); |
6a488035 | 143 | $rgBao->contact_type = $ctype; |
353ffa53 | 144 | $rgBao->used = $used; |
6a488035 TO |
145 | if (!$rgBao->find(TRUE)) { |
146 | CRM_Core_Error::fatal("$used rule for $ctype does not exist"); | |
147 | } | |
148 | } | |
149 | $params['check_permission'] = CRM_Utils_Array::value('check_permission', $params, TRUE); | |
150 | ||
ce83f203 JM |
151 | if (isset($params['civicrm_phone']['phone_numeric'])) { |
152 | $orig = $params['civicrm_phone']['phone_numeric']; | |
153 | $params['civicrm_phone']['phone_numeric'] = preg_replace('/[^\d]/', '', $orig); | |
154 | } | |
6a488035 TO |
155 | $rgBao->params = $params; |
156 | $rgBao->fillTable(); | |
157 | $dao = new CRM_Core_DAO(); | |
158 | $dao->query($rgBao->thresholdQuery($params['check_permission'])); | |
159 | $dupes = array(); | |
160 | while ($dao->fetch()) { | |
161 | if (isset($dao->id) && $dao->id) { | |
162 | $dupes[] = $dao->id; | |
163 | } | |
164 | } | |
165 | $dao->query($rgBao->tableDropQuery()); | |
166 | return array_diff($dupes, $except); | |
167 | } | |
168 | ||
169 | /** | |
170 | * Return a contact_id-keyed array of arrays of possible dupes in the given group. | |
171 | * | |
98997235 TO |
172 | * @param int $rgid |
173 | * Rule group id. | |
174 | * @param int $gid | |
329840ed | 175 | * Contact group id. |
6a488035 | 176 | * |
21a95d83 | 177 | * @param int $searchLimit |
178 | * Limit for the number of contacts to be used for comparison. | |
179 | * The search methodology finds all matches for the searchedContacts so this limits | |
180 | * the number of searched contacts, not the matches found. | |
181 | * | |
a6c01b45 CW |
182 | * @return array |
183 | * array of (cid1, cid2, weight) dupe triples | |
6a488035 | 184 | */ |
21a95d83 | 185 | public static function dupesInGroup($rgid, $gid, $searchLimit = 0) { |
917acf6f | 186 | $cids = array_keys(CRM_Contact_BAO_Group::getMember($gid, TRUE, $searchLimit)); |
481a74f4 | 187 | if (!empty($cids)) { |
d58a19a1 TO |
188 | return self::dupes($rgid, $cids); |
189 | } | |
6a488035 TO |
190 | return array(); |
191 | } | |
192 | ||
6a488035 TO |
193 | /** |
194 | * A hackish function needed to massage CRM_Contact_Form_$ctype::formRule() | |
195 | * object into a valid $params array for dedupe | |
196 | * | |
98997235 TO |
197 | * @param array $fields |
198 | * Contact structure from formRule(). | |
199 | * @param string $ctype | |
200 | * Contact type of the given contact. | |
6a488035 | 201 | * |
a6c01b45 CW |
202 | * @return array |
203 | * valid $params array for dedupe | |
6a488035 | 204 | */ |
00be9182 | 205 | public static function formatParams($fields, $ctype) { |
6a488035 TO |
206 | $flat = array(); |
207 | CRM_Utils_Array::flatten($fields, $flat); | |
208 | ||
309a09df | 209 | // FIXME: This may no longer be necessary - check inputs |
6a488035 TO |
210 | $replace_these = array( |
211 | 'individual_prefix' => 'prefix_id', | |
212 | 'individual_suffix' => 'suffix_id', | |
213 | 'gender' => 'gender_id', | |
214 | ); | |
309a09df | 215 | foreach (array('individual_suffix', 'individual_prefix', 'gender') as $name) { |
a7488080 | 216 | if (!empty($fields[$name])) { |
6a488035 TO |
217 | $flat[$replace_these[$name]] = $flat[$name]; |
218 | unset($flat[$name]); | |
219 | } | |
220 | } | |
221 | ||
222 | // handle {birth,deceased}_date | |
223 | foreach (array( | |
c5c263ca AH |
224 | 'birth_date', |
225 | 'deceased_date', | |
226 | ) as $date) { | |
a7488080 | 227 | if (!empty($fields[$date])) { |
6a488035 TO |
228 | $flat[$date] = $fields[$date]; |
229 | if (is_array($flat[$date])) { | |
230 | $flat[$date] = CRM_Utils_Date::format($flat[$date]); | |
231 | } | |
232 | $flat[$date] = CRM_Utils_Date::processDate($flat[$date]); | |
233 | } | |
234 | } | |
235 | ||
a7488080 | 236 | if (!empty($flat['contact_source'])) { |
6a488035 TO |
237 | $flat['source'] = $flat['contact_source']; |
238 | unset($flat['contact_source']); | |
239 | } | |
240 | ||
241 | // handle preferred_communication_method | |
df5ad245 | 242 | if (!empty($fields['preferred_communication_method'])) { |
6a488035 TO |
243 | $methods = array_intersect($fields['preferred_communication_method'], array('1')); |
244 | $methods = array_keys($methods); | |
245 | sort($methods); | |
246 | if ($methods) { | |
247 | $flat['preferred_communication_method'] = CRM_Core_DAO::VALUE_SEPARATOR . implode(CRM_Core_DAO::VALUE_SEPARATOR, $methods) . CRM_Core_DAO::VALUE_SEPARATOR; | |
248 | } | |
249 | } | |
250 | ||
251 | // handle custom data | |
0b330e6d | 252 | $tree = CRM_Core_BAO_CustomGroup::getTree($ctype, NULL, NULL, -1); |
6a488035 TO |
253 | CRM_Core_BAO_CustomGroup::postProcess($tree, $fields, TRUE); |
254 | foreach ($tree as $key => $cg) { | |
255 | if (!is_int($key)) { | |
256 | continue; | |
257 | } | |
258 | foreach ($cg['fields'] as $cf) { | |
259 | $flat[$cf['column_name']] = CRM_Utils_Array::value('data', $cf['customValue']); | |
260 | } | |
261 | } | |
262 | ||
263 | // if the key is dotted, keep just the last part of it | |
264 | foreach ($flat as $key => $value) { | |
265 | if (substr_count($key, '.')) { | |
266 | $last = explode('.', $key); | |
267 | $last = array_pop($last); | |
b44e3f84 | 268 | // make sure the first occurrence is kept, not the last |
6a488035 TO |
269 | if (!isset($flat[$last])) { |
270 | $flat[$last] = $value; | |
271 | } | |
272 | unset($flat[$key]); | |
273 | } | |
274 | } | |
275 | ||
276 | // drop the -digit (and -Primary, for CRM-3902) postfixes (so event registration's $flat['email-5'] becomes $flat['email']) | |
277 | // FIXME: CRM-5026 should be fixed here; the below clobbers all address info; we should split off address fields and match | |
278 | // the -digit to civicrm_address.location_type_id and -Primary to civicrm_address.is_primary | |
279 | foreach ($flat as $key => $value) { | |
280 | $matches = array(); | |
eba5ec3d | 281 | if (preg_match('/(.*)-(Primary-[\d+])$|(.*)-(\d+|Primary)$/', $key, $matches)) { |
520b28fe | 282 | $return = array_values(array_filter($matches)); |
283 | $flat[$return[1]] = $value; | |
6a488035 TO |
284 | unset($flat[$key]); |
285 | } | |
286 | } | |
287 | ||
288 | $params = array(); | |
289 | $supportedFields = CRM_Dedupe_BAO_RuleGroup::supportedFields($ctype); | |
290 | if (is_array($supportedFields)) { | |
291 | foreach ($supportedFields as $table => $fields) { | |
292 | if ($table == 'civicrm_address') { | |
293 | // for matching on civicrm_address fields, we also need the location_type_id | |
294 | $fields['location_type_id'] = ''; | |
295 | // FIXME: we also need to do some hacking for id and name fields, see CRM-3902’s comments | |
296 | $fixes = array( | |
d58a19a1 | 297 | 'address_name' => 'name', |
353ffa53 | 298 | 'country' => 'country_id', |
d58a19a1 | 299 | 'state_province' => 'state_province_id', |
353ffa53 | 300 | 'county' => 'county_id', |
6a488035 TO |
301 | ); |
302 | foreach ($fixes as $orig => $target) { | |
59a67127 JM |
303 | if (!empty($flat[$orig])) { |
304 | $params[$table][$target] = $flat[$orig]; | |
305 | } | |
306 | } | |
307 | } | |
308 | if ($table == 'civicrm_phone') { | |
309 | $fixes = array( | |
0d7e59b0 | 310 | 'phone' => 'phone_numeric', |
59a67127 JM |
311 | ); |
312 | foreach ($fixes as $orig => $target) { | |
a7488080 | 313 | if (!empty($flat[$orig])) { |
6a488035 TO |
314 | $params[$table][$target] = $flat[$orig]; |
315 | } | |
316 | } | |
317 | } | |
318 | foreach ($fields as $field => $title) { | |
a7488080 | 319 | if (!empty($flat[$field])) { |
6a488035 TO |
320 | $params[$table][$field] = $flat[$field]; |
321 | } | |
322 | } | |
323 | } | |
324 | } | |
325 | return $params; | |
326 | } | |
96025800 | 327 | |
1719073d | 328 | /** |
329 | * Parse duplicate pairs into a standardised array and store in the prev_next_cache. | |
330 | * | |
331 | * @param array $foundDupes | |
332 | * @param string $cacheKeyString | |
333 | * | |
334 | * @return array Dupe pairs with the keys | |
335 | * Dupe pairs with the keys | |
336 | * -srcID | |
337 | * -srcName | |
338 | * -dstID | |
339 | * -dstName | |
340 | * -weight | |
341 | * -canMerge | |
342 | * | |
343 | * @throws CRM_Core_Exception | |
344 | */ | |
345 | public static function parseAndStoreDupePairs($foundDupes, $cacheKeyString) { | |
346 | $cids = array(); | |
347 | foreach ($foundDupes as $dupe) { | |
348 | $cids[$dupe[0]] = 1; | |
349 | $cids[$dupe[1]] = 1; | |
350 | } | |
351 | $cidString = implode(', ', array_keys($cids)); | |
1fe557c2 | 352 | |
353 | $dao = CRM_Core_DAO::executeQuery("SELECT id, display_name FROM civicrm_contact WHERE id IN ($cidString) ORDER BY sort_name"); | |
1719073d | 354 | $displayNames = array(); |
355 | while ($dao->fetch()) { | |
356 | $displayNames[$dao->id] = $dao->display_name; | |
357 | } | |
358 | ||
3bdcd4ec | 359 | $userId = CRM_Core_Session::getLoggedInContactID(); |
1719073d | 360 | foreach ($foundDupes as $dupes) { |
1fe557c2 | 361 | $srcID = $dupes[1]; |
362 | $dstID = $dupes[0]; | |
063ffcb7 | 363 | // The logged in user should never be the src (ie. the contact to be removed). |
1fe557c2 | 364 | if ($srcID == $userId) { |
365 | $srcID = $dstID; | |
366 | $dstID = $userId; | |
1719073d | 367 | } |
368 | ||
369 | $mainContacts[] = $row = array( | |
1719073d | 370 | 'dstID' => $dstID, |
371 | 'dstName' => $displayNames[$dstID], | |
e67dcaf8 | 372 | 'srcID' => $srcID, |
373 | 'srcName' => $displayNames[$srcID], | |
1719073d | 374 | 'weight' => $dupes[2], |
375 | 'canMerge' => TRUE, | |
376 | ); | |
377 | ||
378 | $data = CRM_Core_DAO::escapeString(serialize($row)); | |
1fe557c2 | 379 | $values[] = " ( 'civicrm_contact', $dstID, $srcID, '$cacheKeyString', '$data' ) "; |
1719073d | 380 | } |
381 | CRM_Core_BAO_PrevNextCache::setItem($values); | |
382 | return $mainContacts; | |
383 | } | |
384 | ||
6a488035 | 385 | } |