Merge pull request #18200 from seamuslee001/5.29
[civicrm-core.git] / CRM / Dedupe / Finder.php
CommitLineData
6a488035
TO
1<?php
2/*
3 +--------------------------------------------------------------------+
bc77d7c0 4 | Copyright CiviCRM LLC. All rights reserved. |
6a488035 5 | |
bc77d7c0
TO
6 | This work is published under the GNU AGPLv3 license with some |
7 | permitted exceptions and without any warranty. For full license |
8 | and copyright information, see https://civicrm.org/licensing |
6a488035 9 +--------------------------------------------------------------------+
d25dd0ee 10 */
6a488035
TO
11
12/**
13 *
14 * @package CRM
ca5cec67 15 * @copyright CiviCRM LLC https://civicrm.org/licensing
6a488035
TO
16 */
17
18/**
19 * The CiviCRM duplicate discovery engine is based on an
20 * algorithm designed by David Strauss <david@fourkitchens.com>.
21 */
22class CRM_Dedupe_Finder {
23
24 /**
25 * Return a contact_id-keyed array of arrays of possible dupes
26 * (of the key contact_id) - limited to dupes of $cids if provided.
27 *
98997235
TO
28 * @param int $rgid
29 * Rule group id.
30 * @param array $cids
31 * Contact ids to limit the search to.
6a488035 32 *
3058f4d9 33 * @param bool $checkPermissions
34 * Respect logged in user permissions.
35 *
a6c01b45 36 * @return array
3058f4d9 37 * Array of (cid1, cid2, weight) dupe triples
6c866f0c 38 *
2cbe6e87 39 * @throws \CRM_Core_Exception
6a488035 40 */
22912bef 41 public static function dupes($rgid, $cids = [], $checkPermissions = TRUE) {
353ffa53
TO
42 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
43 $rgBao->id = $rgid;
6a488035
TO
44 $rgBao->contactIds = $cids;
45 if (!$rgBao->find(TRUE)) {
885de68e 46 throw new CRM_Core_Exception('Dedupe rule not found for selected contacts');
6a488035
TO
47 }
48
49 $rgBao->fillTable();
50 $dao = new CRM_Core_DAO();
3058f4d9 51 $dao->query($rgBao->thresholdQuery($checkPermissions));
be2fb01f 52 $dupes = [];
6a488035 53 while ($dao->fetch()) {
be2fb01f 54 $dupes[] = [$dao->id1, $dao->id2, $dao->weight];
6a488035
TO
55 }
56 $dao->query($rgBao->tableDropQuery());
57
58 return $dupes;
59 }
60
61 /**
62 * Return an array of possible dupes, based on the provided array of
63 * params, using the default rule group for the given contact type and
64 * usage.
65 *
66 * check_permission is a boolean flag to indicate if permission should be considered.
67 * default is to always check permissioning but public pages for example might not want
6acc9d56 68 * permission to be checked for anonymous users. Refer CRM-6211. We might be breaking
6a488035
TO
69 * Multi-Site dedupe for public pages.
70 *
98997235
TO
71 * @param array $params
72 * Array of params of the form $params[$table][$field] == $value.
73 * @param string $ctype
74 * Contact type to match against.
75 * @param string $used
76 * Dedupe rule group usage ('Unsupervised' or 'Supervised' or 'General').
77 * @param array $except
78 * Array of contacts that shouldn't be considered dupes.
79 * @param int $ruleGroupID
80 * The id of the dedupe rule we should be using.
6a488035 81 *
a6c01b45
CW
82 * @return array
83 * matching contact ids
885de68e 84 * @throws \CRM_Core_Exception
6a488035 85 */
389bcebf 86 public static function dupesByParams(
57b29d67 87 $params,
6a488035 88 $ctype,
d58a19a1 89 $used = 'Unsupervised',
be2fb01f 90 $except = [],
6a488035
TO
91 $ruleGroupID = NULL
92 ) {
93 // If $params is empty there is zero reason to proceed.
94 if (!$params) {
be2fb01f 95 return [];
6a488035 96 }
a99b82c5 97 $checkPermission = CRM_Utils_Array::value('check_permission', $params, TRUE);
4f33e78b
AS
98 // This may no longer be required - see https://github.com/civicrm/civicrm-core/pull/13176
99 $params = array_filter($params);
6a488035
TO
100
101 $foundByID = FALSE;
102 if ($ruleGroupID) {
353ffa53
TO
103 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
104 $rgBao->id = $ruleGroupID;
6a488035
TO
105 $rgBao->contact_type = $ctype;
106 if ($rgBao->find(TRUE)) {
107 $foundByID = TRUE;
108 }
109 }
110
111 if (!$foundByID) {
353ffa53 112 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
6a488035 113 $rgBao->contact_type = $ctype;
353ffa53 114 $rgBao->used = $used;
6a488035 115 if (!$rgBao->find(TRUE)) {
885de68e 116 throw new CRM_Core_Exception("$used rule for $ctype does not exist");
6a488035
TO
117 }
118 }
6a488035 119
ce83f203
JM
120 if (isset($params['civicrm_phone']['phone_numeric'])) {
121 $orig = $params['civicrm_phone']['phone_numeric'];
122 $params['civicrm_phone']['phone_numeric'] = preg_replace('/[^\d]/', '', $orig);
123 }
6a488035
TO
124 $rgBao->params = $params;
125 $rgBao->fillTable();
126 $dao = new CRM_Core_DAO();
a99b82c5 127 $dao->query($rgBao->thresholdQuery($checkPermission));
be2fb01f 128 $dupes = [];
6a488035
TO
129 while ($dao->fetch()) {
130 if (isset($dao->id) && $dao->id) {
131 $dupes[] = $dao->id;
132 }
133 }
134 $dao->query($rgBao->tableDropQuery());
135 return array_diff($dupes, $except);
136 }
137
138 /**
139 * Return a contact_id-keyed array of arrays of possible dupes in the given group.
140 *
98997235
TO
141 * @param int $rgid
142 * Rule group id.
143 * @param int $gid
329840ed 144 * Contact group id.
6a488035 145 *
21a95d83 146 * @param int $searchLimit
147 * Limit for the number of contacts to be used for comparison.
148 * The search methodology finds all matches for the searchedContacts so this limits
149 * the number of searched contacts, not the matches found.
150 *
a6c01b45
CW
151 * @return array
152 * array of (cid1, cid2, weight) dupe triples
2cbe6e87 153 *
154 * @throws \CRM_Core_Exception
6a488035 155 */
21a95d83 156 public static function dupesInGroup($rgid, $gid, $searchLimit = 0) {
917acf6f 157 $cids = array_keys(CRM_Contact_BAO_Group::getMember($gid, TRUE, $searchLimit));
481a74f4 158 if (!empty($cids)) {
d58a19a1
TO
159 return self::dupes($rgid, $cids);
160 }
be2fb01f 161 return [];
6a488035
TO
162 }
163
6a488035
TO
164 /**
165 * A hackish function needed to massage CRM_Contact_Form_$ctype::formRule()
166 * object into a valid $params array for dedupe
167 *
98997235
TO
168 * @param array $fields
169 * Contact structure from formRule().
170 * @param string $ctype
171 * Contact type of the given contact.
6a488035 172 *
a6c01b45
CW
173 * @return array
174 * valid $params array for dedupe
885de68e 175 * @throws \CRM_Core_Exception
6a488035 176 */
00be9182 177 public static function formatParams($fields, $ctype) {
be2fb01f 178 $flat = [];
6a488035
TO
179 CRM_Utils_Array::flatten($fields, $flat);
180
309a09df 181 // FIXME: This may no longer be necessary - check inputs
be2fb01f 182 $replace_these = [
6a488035
TO
183 'individual_prefix' => 'prefix_id',
184 'individual_suffix' => 'suffix_id',
185 'gender' => 'gender_id',
be2fb01f
CW
186 ];
187 foreach (['individual_suffix', 'individual_prefix', 'gender'] as $name) {
a7488080 188 if (!empty($fields[$name])) {
6a488035
TO
189 $flat[$replace_these[$name]] = $flat[$name];
190 unset($flat[$name]);
191 }
192 }
193
194 // handle {birth,deceased}_date
be2fb01f 195 foreach ([
c5c263ca
AH
196 'birth_date',
197 'deceased_date',
be2fb01f 198 ] as $date) {
a7488080 199 if (!empty($fields[$date])) {
6a488035
TO
200 $flat[$date] = $fields[$date];
201 if (is_array($flat[$date])) {
202 $flat[$date] = CRM_Utils_Date::format($flat[$date]);
203 }
204 $flat[$date] = CRM_Utils_Date::processDate($flat[$date]);
205 }
206 }
207
a7488080 208 if (!empty($flat['contact_source'])) {
6a488035
TO
209 $flat['source'] = $flat['contact_source'];
210 unset($flat['contact_source']);
211 }
212
213 // handle preferred_communication_method
df5ad245 214 if (!empty($fields['preferred_communication_method'])) {
be2fb01f 215 $methods = array_intersect($fields['preferred_communication_method'], ['1']);
6a488035
TO
216 $methods = array_keys($methods);
217 sort($methods);
218 if ($methods) {
219 $flat['preferred_communication_method'] = CRM_Core_DAO::VALUE_SEPARATOR . implode(CRM_Core_DAO::VALUE_SEPARATOR, $methods) . CRM_Core_DAO::VALUE_SEPARATOR;
220 }
221 }
222
223 // handle custom data
0b330e6d 224 $tree = CRM_Core_BAO_CustomGroup::getTree($ctype, NULL, NULL, -1);
6a488035
TO
225 CRM_Core_BAO_CustomGroup::postProcess($tree, $fields, TRUE);
226 foreach ($tree as $key => $cg) {
227 if (!is_int($key)) {
228 continue;
229 }
230 foreach ($cg['fields'] as $cf) {
9c1bc317 231 $flat[$cf['column_name']] = $cf['customValue']['data'] ?? NULL;
6a488035
TO
232 }
233 }
234
235 // if the key is dotted, keep just the last part of it
236 foreach ($flat as $key => $value) {
237 if (substr_count($key, '.')) {
238 $last = explode('.', $key);
239 $last = array_pop($last);
b44e3f84 240 // make sure the first occurrence is kept, not the last
6a488035
TO
241 if (!isset($flat[$last])) {
242 $flat[$last] = $value;
243 }
244 unset($flat[$key]);
245 }
246 }
247
248 // drop the -digit (and -Primary, for CRM-3902) postfixes (so event registration's $flat['email-5'] becomes $flat['email'])
249 // FIXME: CRM-5026 should be fixed here; the below clobbers all address info; we should split off address fields and match
250 // the -digit to civicrm_address.location_type_id and -Primary to civicrm_address.is_primary
251 foreach ($flat as $key => $value) {
be2fb01f 252 $matches = [];
2849b7ac 253 if (preg_match('/(.*)-(Primary-[\d+])$|(.*)-(\d+-\d+)$|(.*)-(\d+|Primary)$/', $key, $matches)) {
520b28fe 254 $return = array_values(array_filter($matches));
367c9a2d
JP
255 // make sure the first occurrence is kept, not the last
256 $flat[$return[1]] = empty($flat[$return[1]]) ? $value : $flat[$return[1]];
6a488035
TO
257 unset($flat[$key]);
258 }
259 }
260
be2fb01f 261 $params = [];
6a488035
TO
262 $supportedFields = CRM_Dedupe_BAO_RuleGroup::supportedFields($ctype);
263 if (is_array($supportedFields)) {
264 foreach ($supportedFields as $table => $fields) {
2cbe6e87 265 if ($table === 'civicrm_address') {
6a488035
TO
266 // for matching on civicrm_address fields, we also need the location_type_id
267 $fields['location_type_id'] = '';
268 // FIXME: we also need to do some hacking for id and name fields, see CRM-3902’s comments
be2fb01f 269 $fixes = [
d58a19a1 270 'address_name' => 'name',
353ffa53 271 'country' => 'country_id',
d58a19a1 272 'state_province' => 'state_province_id',
353ffa53 273 'county' => 'county_id',
be2fb01f 274 ];
6a488035 275 foreach ($fixes as $orig => $target) {
59a67127
JM
276 if (!empty($flat[$orig])) {
277 $params[$table][$target] = $flat[$orig];
278 }
279 }
280 }
885de68e 281 if ($table === 'civicrm_phone') {
be2fb01f 282 $fixes = [
0d7e59b0 283 'phone' => 'phone_numeric',
be2fb01f 284 ];
59a67127 285 foreach ($fixes as $orig => $target) {
a7488080 286 if (!empty($flat[$orig])) {
6a488035
TO
287 $params[$table][$target] = $flat[$orig];
288 }
289 }
290 }
291 foreach ($fields as $field => $title) {
a7488080 292 if (!empty($flat[$field])) {
6a488035
TO
293 $params[$table][$field] = $flat[$field];
294 }
295 }
296 }
297 }
298 return $params;
299 }
96025800 300
1719073d 301 /**
302 * Parse duplicate pairs into a standardised array and store in the prev_next_cache.
303 *
304 * @param array $foundDupes
305 * @param string $cacheKeyString
306 *
518fa0ee 307 * @return array
1719073d 308 * Dupe pairs with the keys
309 * -srcID
310 * -srcName
311 * -dstID
312 * -dstName
313 * -weight
314 * -canMerge
1719073d 315 */
316 public static function parseAndStoreDupePairs($foundDupes, $cacheKeyString) {
be2fb01f 317 $cids = [];
1719073d 318 foreach ($foundDupes as $dupe) {
319 $cids[$dupe[0]] = 1;
320 $cids[$dupe[1]] = 1;
321 }
322 $cidString = implode(', ', array_keys($cids));
1fe557c2 323
324 $dao = CRM_Core_DAO::executeQuery("SELECT id, display_name FROM civicrm_contact WHERE id IN ($cidString) ORDER BY sort_name");
be2fb01f 325 $displayNames = [];
1719073d 326 while ($dao->fetch()) {
327 $displayNames[$dao->id] = $dao->display_name;
328 }
329
3bdcd4ec 330 $userId = CRM_Core_Session::getLoggedInContactID();
1719073d 331 foreach ($foundDupes as $dupes) {
1fe557c2 332 $srcID = $dupes[1];
333 $dstID = $dupes[0];
063ffcb7 334 // The logged in user should never be the src (ie. the contact to be removed).
1fe557c2 335 if ($srcID == $userId) {
336 $srcID = $dstID;
337 $dstID = $userId;
1719073d 338 }
339
be2fb01f 340 $mainContacts[] = $row = [
3bcde7f1 341 'dstID' => (int) $dstID,
1719073d 342 'dstName' => $displayNames[$dstID],
3bcde7f1 343 'srcID' => (int) $srcID,
e67dcaf8 344 'srcName' => $displayNames[$srcID],
1719073d 345 'weight' => $dupes[2],
346 'canMerge' => TRUE,
be2fb01f 347 ];
1719073d 348
3bcde7f1 349 CRM_Core_DAO::executeQuery("INSERT INTO civicrm_prevnext_cache (entity_table, entity_id1, entity_id2, cacheKey, data) VALUES
2e09a60f 350 ('civicrm_contact', %1, %2, %3, %4)", [
3bcde7f1 351 1 => [$dstID, 'Integer'],
352 2 => [$srcID, 'Integer'],
353 3 => [$cacheKeyString, 'String'],
2e09a60f 354 4 => [serialize($row), 'String'],
3bcde7f1 355 ]
356 );
1719073d 357 }
1719073d 358 return $mainContacts;
359 }
360
6a488035 361}