Commit | Line | Data |
---|---|---|
6a488035 TO |
1 | <?php |
2 | /* | |
3 | +--------------------------------------------------------------------+ | |
bc77d7c0 | 4 | | Copyright CiviCRM LLC. All rights reserved. | |
6a488035 | 5 | | | |
bc77d7c0 TO |
6 | | This work is published under the GNU AGPLv3 license with some | |
7 | | permitted exceptions and without any warranty. For full license | | |
8 | | and copyright information, see https://civicrm.org/licensing | | |
6a488035 | 9 | +--------------------------------------------------------------------+ |
d25dd0ee | 10 | */ |
6a488035 TO |
11 | |
12 | /** | |
13 | * | |
14 | * @package CRM | |
ca5cec67 | 15 | * @copyright CiviCRM LLC https://civicrm.org/licensing |
6a488035 TO |
16 | */ |
17 | ||
18 | /** | |
19 | * The CiviCRM duplicate discovery engine is based on an | |
20 | * algorithm designed by David Strauss <david@fourkitchens.com>. | |
21 | */ | |
22 | class CRM_Dedupe_Finder { | |
23 | ||
24 | /** | |
25 | * Return a contact_id-keyed array of arrays of possible dupes | |
26 | * (of the key contact_id) - limited to dupes of $cids if provided. | |
27 | * | |
98997235 TO |
28 | * @param int $rgid |
29 | * Rule group id. | |
30 | * @param array $cids | |
31 | * Contact ids to limit the search to. | |
6a488035 | 32 | * |
3058f4d9 | 33 | * @param bool $checkPermissions |
34 | * Respect logged in user permissions. | |
35 | * | |
a6c01b45 | 36 | * @return array |
3058f4d9 | 37 | * Array of (cid1, cid2, weight) dupe triples |
6c866f0c | 38 | * |
2cbe6e87 | 39 | * @throws \CRM_Core_Exception |
6a488035 | 40 | */ |
22912bef | 41 | public static function dupes($rgid, $cids = [], $checkPermissions = TRUE) { |
353ffa53 TO |
42 | $rgBao = new CRM_Dedupe_BAO_RuleGroup(); |
43 | $rgBao->id = $rgid; | |
6a488035 TO |
44 | $rgBao->contactIds = $cids; |
45 | if (!$rgBao->find(TRUE)) { | |
885de68e | 46 | throw new CRM_Core_Exception('Dedupe rule not found for selected contacts'); |
6a488035 TO |
47 | } |
48 | ||
49 | $rgBao->fillTable(); | |
411f09c9 | 50 | $dao = CRM_Core_DAO::executeQuery($rgBao->thresholdQuery($checkPermissions)); |
be2fb01f | 51 | $dupes = []; |
6a488035 | 52 | while ($dao->fetch()) { |
be2fb01f | 53 | $dupes[] = [$dao->id1, $dao->id2, $dao->weight]; |
6a488035 | 54 | } |
411f09c9 | 55 | CRM_Core_DAO::executeQuery(($rgBao->tableDropQuery())); |
6a488035 TO |
56 | |
57 | return $dupes; | |
58 | } | |
59 | ||
60 | /** | |
61 | * Return an array of possible dupes, based on the provided array of | |
62 | * params, using the default rule group for the given contact type and | |
63 | * usage. | |
64 | * | |
65 | * check_permission is a boolean flag to indicate if permission should be considered. | |
66 | * default is to always check permissioning but public pages for example might not want | |
6acc9d56 | 67 | * permission to be checked for anonymous users. Refer CRM-6211. We might be breaking |
6a488035 TO |
68 | * Multi-Site dedupe for public pages. |
69 | * | |
98997235 TO |
70 | * @param array $params |
71 | * Array of params of the form $params[$table][$field] == $value. | |
72 | * @param string $ctype | |
73 | * Contact type to match against. | |
74 | * @param string $used | |
75 | * Dedupe rule group usage ('Unsupervised' or 'Supervised' or 'General'). | |
76 | * @param array $except | |
77 | * Array of contacts that shouldn't be considered dupes. | |
78 | * @param int $ruleGroupID | |
79 | * The id of the dedupe rule we should be using. | |
6a488035 | 80 | * |
a6c01b45 CW |
81 | * @return array |
82 | * matching contact ids | |
885de68e | 83 | * @throws \CRM_Core_Exception |
6a488035 | 84 | */ |
389bcebf | 85 | public static function dupesByParams( |
57b29d67 | 86 | $params, |
6a488035 | 87 | $ctype, |
d58a19a1 | 88 | $used = 'Unsupervised', |
be2fb01f | 89 | $except = [], |
6a488035 TO |
90 | $ruleGroupID = NULL |
91 | ) { | |
92 | // If $params is empty there is zero reason to proceed. | |
93 | if (!$params) { | |
be2fb01f | 94 | return []; |
6a488035 | 95 | } |
a99b82c5 | 96 | $checkPermission = CRM_Utils_Array::value('check_permission', $params, TRUE); |
4f33e78b AS |
97 | // This may no longer be required - see https://github.com/civicrm/civicrm-core/pull/13176 |
98 | $params = array_filter($params); | |
6a488035 TO |
99 | |
100 | $foundByID = FALSE; | |
101 | if ($ruleGroupID) { | |
353ffa53 TO |
102 | $rgBao = new CRM_Dedupe_BAO_RuleGroup(); |
103 | $rgBao->id = $ruleGroupID; | |
6a488035 TO |
104 | $rgBao->contact_type = $ctype; |
105 | if ($rgBao->find(TRUE)) { | |
106 | $foundByID = TRUE; | |
107 | } | |
108 | } | |
109 | ||
110 | if (!$foundByID) { | |
353ffa53 | 111 | $rgBao = new CRM_Dedupe_BAO_RuleGroup(); |
6a488035 | 112 | $rgBao->contact_type = $ctype; |
353ffa53 | 113 | $rgBao->used = $used; |
6a488035 | 114 | if (!$rgBao->find(TRUE)) { |
885de68e | 115 | throw new CRM_Core_Exception("$used rule for $ctype does not exist"); |
6a488035 TO |
116 | } |
117 | } | |
6a488035 | 118 | |
ce83f203 JM |
119 | if (isset($params['civicrm_phone']['phone_numeric'])) { |
120 | $orig = $params['civicrm_phone']['phone_numeric']; | |
121 | $params['civicrm_phone']['phone_numeric'] = preg_replace('/[^\d]/', '', $orig); | |
122 | } | |
6a488035 TO |
123 | $rgBao->params = $params; |
124 | $rgBao->fillTable(); | |
125 | $dao = new CRM_Core_DAO(); | |
a99b82c5 | 126 | $dao->query($rgBao->thresholdQuery($checkPermission)); |
be2fb01f | 127 | $dupes = []; |
6a488035 TO |
128 | while ($dao->fetch()) { |
129 | if (isset($dao->id) && $dao->id) { | |
130 | $dupes[] = $dao->id; | |
131 | } | |
132 | } | |
133 | $dao->query($rgBao->tableDropQuery()); | |
134 | return array_diff($dupes, $except); | |
135 | } | |
136 | ||
137 | /** | |
138 | * Return a contact_id-keyed array of arrays of possible dupes in the given group. | |
139 | * | |
98997235 TO |
140 | * @param int $rgid |
141 | * Rule group id. | |
142 | * @param int $gid | |
329840ed | 143 | * Contact group id. |
6a488035 | 144 | * |
21a95d83 | 145 | * @param int $searchLimit |
146 | * Limit for the number of contacts to be used for comparison. | |
147 | * The search methodology finds all matches for the searchedContacts so this limits | |
148 | * the number of searched contacts, not the matches found. | |
149 | * | |
a6c01b45 CW |
150 | * @return array |
151 | * array of (cid1, cid2, weight) dupe triples | |
2cbe6e87 | 152 | * |
153 | * @throws \CRM_Core_Exception | |
6a488035 | 154 | */ |
21a95d83 | 155 | public static function dupesInGroup($rgid, $gid, $searchLimit = 0) { |
917acf6f | 156 | $cids = array_keys(CRM_Contact_BAO_Group::getMember($gid, TRUE, $searchLimit)); |
481a74f4 | 157 | if (!empty($cids)) { |
d58a19a1 TO |
158 | return self::dupes($rgid, $cids); |
159 | } | |
be2fb01f | 160 | return []; |
6a488035 TO |
161 | } |
162 | ||
6a488035 TO |
163 | /** |
164 | * A hackish function needed to massage CRM_Contact_Form_$ctype::formRule() | |
165 | * object into a valid $params array for dedupe | |
166 | * | |
98997235 TO |
167 | * @param array $fields |
168 | * Contact structure from formRule(). | |
169 | * @param string $ctype | |
170 | * Contact type of the given contact. | |
6a488035 | 171 | * |
a6c01b45 CW |
172 | * @return array |
173 | * valid $params array for dedupe | |
885de68e | 174 | * @throws \CRM_Core_Exception |
6a488035 | 175 | */ |
00be9182 | 176 | public static function formatParams($fields, $ctype) { |
be2fb01f | 177 | $flat = []; |
6a488035 TO |
178 | CRM_Utils_Array::flatten($fields, $flat); |
179 | ||
309a09df | 180 | // FIXME: This may no longer be necessary - check inputs |
be2fb01f | 181 | $replace_these = [ |
6a488035 TO |
182 | 'individual_prefix' => 'prefix_id', |
183 | 'individual_suffix' => 'suffix_id', | |
184 | 'gender' => 'gender_id', | |
be2fb01f CW |
185 | ]; |
186 | foreach (['individual_suffix', 'individual_prefix', 'gender'] as $name) { | |
a7488080 | 187 | if (!empty($fields[$name])) { |
6a488035 TO |
188 | $flat[$replace_these[$name]] = $flat[$name]; |
189 | unset($flat[$name]); | |
190 | } | |
191 | } | |
192 | ||
193 | // handle {birth,deceased}_date | |
be2fb01f | 194 | foreach ([ |
c5c263ca AH |
195 | 'birth_date', |
196 | 'deceased_date', | |
be2fb01f | 197 | ] as $date) { |
a7488080 | 198 | if (!empty($fields[$date])) { |
6a488035 TO |
199 | $flat[$date] = $fields[$date]; |
200 | if (is_array($flat[$date])) { | |
201 | $flat[$date] = CRM_Utils_Date::format($flat[$date]); | |
202 | } | |
203 | $flat[$date] = CRM_Utils_Date::processDate($flat[$date]); | |
204 | } | |
205 | } | |
206 | ||
a7488080 | 207 | if (!empty($flat['contact_source'])) { |
6a488035 TO |
208 | $flat['source'] = $flat['contact_source']; |
209 | unset($flat['contact_source']); | |
210 | } | |
211 | ||
212 | // handle preferred_communication_method | |
df5ad245 | 213 | if (!empty($fields['preferred_communication_method'])) { |
be2fb01f | 214 | $methods = array_intersect($fields['preferred_communication_method'], ['1']); |
6a488035 TO |
215 | $methods = array_keys($methods); |
216 | sort($methods); | |
217 | if ($methods) { | |
218 | $flat['preferred_communication_method'] = CRM_Core_DAO::VALUE_SEPARATOR . implode(CRM_Core_DAO::VALUE_SEPARATOR, $methods) . CRM_Core_DAO::VALUE_SEPARATOR; | |
219 | } | |
220 | } | |
221 | ||
222 | // handle custom data | |
0b330e6d | 223 | $tree = CRM_Core_BAO_CustomGroup::getTree($ctype, NULL, NULL, -1); |
6a488035 TO |
224 | CRM_Core_BAO_CustomGroup::postProcess($tree, $fields, TRUE); |
225 | foreach ($tree as $key => $cg) { | |
226 | if (!is_int($key)) { | |
227 | continue; | |
228 | } | |
229 | foreach ($cg['fields'] as $cf) { | |
9c1bc317 | 230 | $flat[$cf['column_name']] = $cf['customValue']['data'] ?? NULL; |
6a488035 TO |
231 | } |
232 | } | |
233 | ||
234 | // if the key is dotted, keep just the last part of it | |
235 | foreach ($flat as $key => $value) { | |
236 | if (substr_count($key, '.')) { | |
237 | $last = explode('.', $key); | |
238 | $last = array_pop($last); | |
b44e3f84 | 239 | // make sure the first occurrence is kept, not the last |
6a488035 TO |
240 | if (!isset($flat[$last])) { |
241 | $flat[$last] = $value; | |
242 | } | |
243 | unset($flat[$key]); | |
244 | } | |
245 | } | |
246 | ||
247 | // drop the -digit (and -Primary, for CRM-3902) postfixes (so event registration's $flat['email-5'] becomes $flat['email']) | |
248 | // FIXME: CRM-5026 should be fixed here; the below clobbers all address info; we should split off address fields and match | |
249 | // the -digit to civicrm_address.location_type_id and -Primary to civicrm_address.is_primary | |
250 | foreach ($flat as $key => $value) { | |
be2fb01f | 251 | $matches = []; |
2849b7ac | 252 | if (preg_match('/(.*)-(Primary-[\d+])$|(.*)-(\d+-\d+)$|(.*)-(\d+|Primary)$/', $key, $matches)) { |
520b28fe | 253 | $return = array_values(array_filter($matches)); |
367c9a2d JP |
254 | // make sure the first occurrence is kept, not the last |
255 | $flat[$return[1]] = empty($flat[$return[1]]) ? $value : $flat[$return[1]]; | |
6a488035 TO |
256 | unset($flat[$key]); |
257 | } | |
258 | } | |
259 | ||
be2fb01f | 260 | $params = []; |
6a488035 TO |
261 | $supportedFields = CRM_Dedupe_BAO_RuleGroup::supportedFields($ctype); |
262 | if (is_array($supportedFields)) { | |
263 | foreach ($supportedFields as $table => $fields) { | |
2cbe6e87 | 264 | if ($table === 'civicrm_address') { |
6a488035 TO |
265 | // for matching on civicrm_address fields, we also need the location_type_id |
266 | $fields['location_type_id'] = ''; | |
267 | // FIXME: we also need to do some hacking for id and name fields, see CRM-3902’s comments | |
be2fb01f | 268 | $fixes = [ |
d58a19a1 | 269 | 'address_name' => 'name', |
353ffa53 | 270 | 'country' => 'country_id', |
d58a19a1 | 271 | 'state_province' => 'state_province_id', |
353ffa53 | 272 | 'county' => 'county_id', |
be2fb01f | 273 | ]; |
6a488035 | 274 | foreach ($fixes as $orig => $target) { |
59a67127 JM |
275 | if (!empty($flat[$orig])) { |
276 | $params[$table][$target] = $flat[$orig]; | |
277 | } | |
278 | } | |
279 | } | |
885de68e | 280 | if ($table === 'civicrm_phone') { |
be2fb01f | 281 | $fixes = [ |
0d7e59b0 | 282 | 'phone' => 'phone_numeric', |
be2fb01f | 283 | ]; |
59a67127 | 284 | foreach ($fixes as $orig => $target) { |
a7488080 | 285 | if (!empty($flat[$orig])) { |
6a488035 TO |
286 | $params[$table][$target] = $flat[$orig]; |
287 | } | |
288 | } | |
289 | } | |
290 | foreach ($fields as $field => $title) { | |
a7488080 | 291 | if (!empty($flat[$field])) { |
6a488035 TO |
292 | $params[$table][$field] = $flat[$field]; |
293 | } | |
294 | } | |
295 | } | |
296 | } | |
297 | return $params; | |
298 | } | |
96025800 | 299 | |
1719073d | 300 | /** |
301 | * Parse duplicate pairs into a standardised array and store in the prev_next_cache. | |
302 | * | |
303 | * @param array $foundDupes | |
304 | * @param string $cacheKeyString | |
305 | * | |
518fa0ee | 306 | * @return array |
1719073d | 307 | * Dupe pairs with the keys |
308 | * -srcID | |
309 | * -srcName | |
310 | * -dstID | |
311 | * -dstName | |
312 | * -weight | |
313 | * -canMerge | |
1719073d | 314 | */ |
315 | public static function parseAndStoreDupePairs($foundDupes, $cacheKeyString) { | |
be2fb01f | 316 | $cids = []; |
1719073d | 317 | foreach ($foundDupes as $dupe) { |
318 | $cids[$dupe[0]] = 1; | |
319 | $cids[$dupe[1]] = 1; | |
320 | } | |
321 | $cidString = implode(', ', array_keys($cids)); | |
1fe557c2 | 322 | |
323 | $dao = CRM_Core_DAO::executeQuery("SELECT id, display_name FROM civicrm_contact WHERE id IN ($cidString) ORDER BY sort_name"); | |
be2fb01f | 324 | $displayNames = []; |
1719073d | 325 | while ($dao->fetch()) { |
326 | $displayNames[$dao->id] = $dao->display_name; | |
327 | } | |
328 | ||
3bdcd4ec | 329 | $userId = CRM_Core_Session::getLoggedInContactID(); |
1719073d | 330 | foreach ($foundDupes as $dupes) { |
1fe557c2 | 331 | $srcID = $dupes[1]; |
332 | $dstID = $dupes[0]; | |
063ffcb7 | 333 | // The logged in user should never be the src (ie. the contact to be removed). |
1fe557c2 | 334 | if ($srcID == $userId) { |
335 | $srcID = $dstID; | |
336 | $dstID = $userId; | |
1719073d | 337 | } |
338 | ||
be2fb01f | 339 | $mainContacts[] = $row = [ |
3bcde7f1 | 340 | 'dstID' => (int) $dstID, |
1719073d | 341 | 'dstName' => $displayNames[$dstID], |
3bcde7f1 | 342 | 'srcID' => (int) $srcID, |
e67dcaf8 | 343 | 'srcName' => $displayNames[$srcID], |
1719073d | 344 | 'weight' => $dupes[2], |
345 | 'canMerge' => TRUE, | |
be2fb01f | 346 | ]; |
1719073d | 347 | |
3bcde7f1 | 348 | CRM_Core_DAO::executeQuery("INSERT INTO civicrm_prevnext_cache (entity_table, entity_id1, entity_id2, cacheKey, data) VALUES |
2e09a60f | 349 | ('civicrm_contact', %1, %2, %3, %4)", [ |
3bcde7f1 | 350 | 1 => [$dstID, 'Integer'], |
351 | 2 => [$srcID, 'Integer'], | |
352 | 3 => [$cacheKeyString, 'String'], | |
2e09a60f | 353 | 4 => [serialize($row), 'String'], |
3bcde7f1 | 354 | ] |
355 | ); | |
1719073d | 356 | } |
1719073d | 357 | return $mainContacts; |
358 | } | |
359 | ||
6a488035 | 360 | } |