Merge pull request #16286 from alifrumin/offlineContributor
[civicrm-core.git] / CRM / Dedupe / Finder.php
CommitLineData
6a488035
TO
1<?php
2/*
3 +--------------------------------------------------------------------+
bc77d7c0 4 | Copyright CiviCRM LLC. All rights reserved. |
6a488035 5 | |
bc77d7c0
TO
6 | This work is published under the GNU AGPLv3 license with some |
7 | permitted exceptions and without any warranty. For full license |
8 | and copyright information, see https://civicrm.org/licensing |
6a488035 9 +--------------------------------------------------------------------+
d25dd0ee 10 */
6a488035
TO
11
12/**
13 *
14 * @package CRM
ca5cec67 15 * @copyright CiviCRM LLC https://civicrm.org/licensing
6a488035
TO
16 * $Id$
17 *
18 */
19
20/**
21 * The CiviCRM duplicate discovery engine is based on an
22 * algorithm designed by David Strauss <david@fourkitchens.com>.
23 */
24class CRM_Dedupe_Finder {
25
26 /**
27 * Return a contact_id-keyed array of arrays of possible dupes
28 * (of the key contact_id) - limited to dupes of $cids if provided.
29 *
98997235
TO
30 * @param int $rgid
31 * Rule group id.
32 * @param array $cids
33 * Contact ids to limit the search to.
6a488035 34 *
3058f4d9 35 * @param bool $checkPermissions
36 * Respect logged in user permissions.
37 *
a6c01b45 38 * @return array
3058f4d9 39 * Array of (cid1, cid2, weight) dupe triples
6c866f0c 40 *
6c866f0c 41 * @throws Exception
6a488035 42 */
22912bef 43 public static function dupes($rgid, $cids = [], $checkPermissions = TRUE) {
353ffa53
TO
44 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
45 $rgBao->id = $rgid;
6a488035
TO
46 $rgBao->contactIds = $cids;
47 if (!$rgBao->find(TRUE)) {
885de68e 48 throw new CRM_Core_Exception('Dedupe rule not found for selected contacts');
6a488035
TO
49 }
50
51 $rgBao->fillTable();
52 $dao = new CRM_Core_DAO();
3058f4d9 53 $dao->query($rgBao->thresholdQuery($checkPermissions));
be2fb01f 54 $dupes = [];
6a488035 55 while ($dao->fetch()) {
be2fb01f 56 $dupes[] = [$dao->id1, $dao->id2, $dao->weight];
6a488035
TO
57 }
58 $dao->query($rgBao->tableDropQuery());
59
60 return $dupes;
61 }
62
63 /**
64 * Return an array of possible dupes, based on the provided array of
65 * params, using the default rule group for the given contact type and
66 * usage.
67 *
68 * check_permission is a boolean flag to indicate if permission should be considered.
69 * default is to always check permissioning but public pages for example might not want
6acc9d56 70 * permission to be checked for anonymous users. Refer CRM-6211. We might be breaking
6a488035
TO
71 * Multi-Site dedupe for public pages.
72 *
98997235
TO
73 * @param array $params
74 * Array of params of the form $params[$table][$field] == $value.
75 * @param string $ctype
76 * Contact type to match against.
77 * @param string $used
78 * Dedupe rule group usage ('Unsupervised' or 'Supervised' or 'General').
79 * @param array $except
80 * Array of contacts that shouldn't be considered dupes.
81 * @param int $ruleGroupID
82 * The id of the dedupe rule we should be using.
6a488035 83 *
a6c01b45
CW
84 * @return array
85 * matching contact ids
885de68e 86 * @throws \CRM_Core_Exception
6a488035 87 */
389bcebf 88 public static function dupesByParams(
57b29d67 89 $params,
6a488035 90 $ctype,
d58a19a1 91 $used = 'Unsupervised',
be2fb01f 92 $except = [],
6a488035
TO
93 $ruleGroupID = NULL
94 ) {
95 // If $params is empty there is zero reason to proceed.
96 if (!$params) {
be2fb01f 97 return [];
6a488035 98 }
a99b82c5 99 $checkPermission = CRM_Utils_Array::value('check_permission', $params, TRUE);
4f33e78b
AS
100 // This may no longer be required - see https://github.com/civicrm/civicrm-core/pull/13176
101 $params = array_filter($params);
6a488035
TO
102
103 $foundByID = FALSE;
104 if ($ruleGroupID) {
353ffa53
TO
105 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
106 $rgBao->id = $ruleGroupID;
6a488035
TO
107 $rgBao->contact_type = $ctype;
108 if ($rgBao->find(TRUE)) {
109 $foundByID = TRUE;
110 }
111 }
112
113 if (!$foundByID) {
353ffa53 114 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
6a488035 115 $rgBao->contact_type = $ctype;
353ffa53 116 $rgBao->used = $used;
6a488035 117 if (!$rgBao->find(TRUE)) {
885de68e 118 throw new CRM_Core_Exception("$used rule for $ctype does not exist");
6a488035
TO
119 }
120 }
6a488035 121
ce83f203
JM
122 if (isset($params['civicrm_phone']['phone_numeric'])) {
123 $orig = $params['civicrm_phone']['phone_numeric'];
124 $params['civicrm_phone']['phone_numeric'] = preg_replace('/[^\d]/', '', $orig);
125 }
6a488035
TO
126 $rgBao->params = $params;
127 $rgBao->fillTable();
128 $dao = new CRM_Core_DAO();
a99b82c5 129 $dao->query($rgBao->thresholdQuery($checkPermission));
be2fb01f 130 $dupes = [];
6a488035
TO
131 while ($dao->fetch()) {
132 if (isset($dao->id) && $dao->id) {
133 $dupes[] = $dao->id;
134 }
135 }
136 $dao->query($rgBao->tableDropQuery());
137 return array_diff($dupes, $except);
138 }
139
140 /**
141 * Return a contact_id-keyed array of arrays of possible dupes in the given group.
142 *
98997235
TO
143 * @param int $rgid
144 * Rule group id.
145 * @param int $gid
329840ed 146 * Contact group id.
6a488035 147 *
21a95d83 148 * @param int $searchLimit
149 * Limit for the number of contacts to be used for comparison.
150 * The search methodology finds all matches for the searchedContacts so this limits
151 * the number of searched contacts, not the matches found.
152 *
a6c01b45
CW
153 * @return array
154 * array of (cid1, cid2, weight) dupe triples
885de68e 155 * @throws \CiviCRM_API3_Exception
6a488035 156 */
21a95d83 157 public static function dupesInGroup($rgid, $gid, $searchLimit = 0) {
917acf6f 158 $cids = array_keys(CRM_Contact_BAO_Group::getMember($gid, TRUE, $searchLimit));
481a74f4 159 if (!empty($cids)) {
d58a19a1
TO
160 return self::dupes($rgid, $cids);
161 }
be2fb01f 162 return [];
6a488035
TO
163 }
164
6a488035
TO
165 /**
166 * A hackish function needed to massage CRM_Contact_Form_$ctype::formRule()
167 * object into a valid $params array for dedupe
168 *
98997235
TO
169 * @param array $fields
170 * Contact structure from formRule().
171 * @param string $ctype
172 * Contact type of the given contact.
6a488035 173 *
a6c01b45
CW
174 * @return array
175 * valid $params array for dedupe
885de68e 176 * @throws \CRM_Core_Exception
6a488035 177 */
00be9182 178 public static function formatParams($fields, $ctype) {
be2fb01f 179 $flat = [];
6a488035
TO
180 CRM_Utils_Array::flatten($fields, $flat);
181
309a09df 182 // FIXME: This may no longer be necessary - check inputs
be2fb01f 183 $replace_these = [
6a488035
TO
184 'individual_prefix' => 'prefix_id',
185 'individual_suffix' => 'suffix_id',
186 'gender' => 'gender_id',
be2fb01f
CW
187 ];
188 foreach (['individual_suffix', 'individual_prefix', 'gender'] as $name) {
a7488080 189 if (!empty($fields[$name])) {
6a488035
TO
190 $flat[$replace_these[$name]] = $flat[$name];
191 unset($flat[$name]);
192 }
193 }
194
195 // handle {birth,deceased}_date
be2fb01f 196 foreach ([
c5c263ca
AH
197 'birth_date',
198 'deceased_date',
be2fb01f 199 ] as $date) {
a7488080 200 if (!empty($fields[$date])) {
6a488035
TO
201 $flat[$date] = $fields[$date];
202 if (is_array($flat[$date])) {
203 $flat[$date] = CRM_Utils_Date::format($flat[$date]);
204 }
205 $flat[$date] = CRM_Utils_Date::processDate($flat[$date]);
206 }
207 }
208
a7488080 209 if (!empty($flat['contact_source'])) {
6a488035
TO
210 $flat['source'] = $flat['contact_source'];
211 unset($flat['contact_source']);
212 }
213
214 // handle preferred_communication_method
df5ad245 215 if (!empty($fields['preferred_communication_method'])) {
be2fb01f 216 $methods = array_intersect($fields['preferred_communication_method'], ['1']);
6a488035
TO
217 $methods = array_keys($methods);
218 sort($methods);
219 if ($methods) {
220 $flat['preferred_communication_method'] = CRM_Core_DAO::VALUE_SEPARATOR . implode(CRM_Core_DAO::VALUE_SEPARATOR, $methods) . CRM_Core_DAO::VALUE_SEPARATOR;
221 }
222 }
223
224 // handle custom data
0b330e6d 225 $tree = CRM_Core_BAO_CustomGroup::getTree($ctype, NULL, NULL, -1);
6a488035
TO
226 CRM_Core_BAO_CustomGroup::postProcess($tree, $fields, TRUE);
227 foreach ($tree as $key => $cg) {
228 if (!is_int($key)) {
229 continue;
230 }
231 foreach ($cg['fields'] as $cf) {
232 $flat[$cf['column_name']] = CRM_Utils_Array::value('data', $cf['customValue']);
233 }
234 }
235
236 // if the key is dotted, keep just the last part of it
237 foreach ($flat as $key => $value) {
238 if (substr_count($key, '.')) {
239 $last = explode('.', $key);
240 $last = array_pop($last);
b44e3f84 241 // make sure the first occurrence is kept, not the last
6a488035
TO
242 if (!isset($flat[$last])) {
243 $flat[$last] = $value;
244 }
245 unset($flat[$key]);
246 }
247 }
248
249 // drop the -digit (and -Primary, for CRM-3902) postfixes (so event registration's $flat['email-5'] becomes $flat['email'])
250 // FIXME: CRM-5026 should be fixed here; the below clobbers all address info; we should split off address fields and match
251 // the -digit to civicrm_address.location_type_id and -Primary to civicrm_address.is_primary
252 foreach ($flat as $key => $value) {
be2fb01f 253 $matches = [];
eba5ec3d 254 if (preg_match('/(.*)-(Primary-[\d+])$|(.*)-(\d+|Primary)$/', $key, $matches)) {
520b28fe 255 $return = array_values(array_filter($matches));
367c9a2d
JP
256 // make sure the first occurrence is kept, not the last
257 $flat[$return[1]] = empty($flat[$return[1]]) ? $value : $flat[$return[1]];
6a488035
TO
258 unset($flat[$key]);
259 }
260 }
261
be2fb01f 262 $params = [];
6a488035
TO
263 $supportedFields = CRM_Dedupe_BAO_RuleGroup::supportedFields($ctype);
264 if (is_array($supportedFields)) {
265 foreach ($supportedFields as $table => $fields) {
266 if ($table == 'civicrm_address') {
267 // for matching on civicrm_address fields, we also need the location_type_id
268 $fields['location_type_id'] = '';
269 // FIXME: we also need to do some hacking for id and name fields, see CRM-3902’s comments
be2fb01f 270 $fixes = [
d58a19a1 271 'address_name' => 'name',
353ffa53 272 'country' => 'country_id',
d58a19a1 273 'state_province' => 'state_province_id',
353ffa53 274 'county' => 'county_id',
be2fb01f 275 ];
6a488035 276 foreach ($fixes as $orig => $target) {
59a67127
JM
277 if (!empty($flat[$orig])) {
278 $params[$table][$target] = $flat[$orig];
279 }
280 }
281 }
885de68e 282 if ($table === 'civicrm_phone') {
be2fb01f 283 $fixes = [
0d7e59b0 284 'phone' => 'phone_numeric',
be2fb01f 285 ];
59a67127 286 foreach ($fixes as $orig => $target) {
a7488080 287 if (!empty($flat[$orig])) {
6a488035
TO
288 $params[$table][$target] = $flat[$orig];
289 }
290 }
291 }
292 foreach ($fields as $field => $title) {
a7488080 293 if (!empty($flat[$field])) {
6a488035
TO
294 $params[$table][$field] = $flat[$field];
295 }
296 }
297 }
298 }
299 return $params;
300 }
96025800 301
1719073d 302 /**
303 * Parse duplicate pairs into a standardised array and store in the prev_next_cache.
304 *
305 * @param array $foundDupes
306 * @param string $cacheKeyString
307 *
518fa0ee 308 * @return array
1719073d 309 * Dupe pairs with the keys
310 * -srcID
311 * -srcName
312 * -dstID
313 * -dstName
314 * -weight
315 * -canMerge
1719073d 316 */
317 public static function parseAndStoreDupePairs($foundDupes, $cacheKeyString) {
be2fb01f 318 $cids = [];
1719073d 319 foreach ($foundDupes as $dupe) {
320 $cids[$dupe[0]] = 1;
321 $cids[$dupe[1]] = 1;
322 }
323 $cidString = implode(', ', array_keys($cids));
1fe557c2 324
325 $dao = CRM_Core_DAO::executeQuery("SELECT id, display_name FROM civicrm_contact WHERE id IN ($cidString) ORDER BY sort_name");
be2fb01f 326 $displayNames = [];
1719073d 327 while ($dao->fetch()) {
328 $displayNames[$dao->id] = $dao->display_name;
329 }
330
3bdcd4ec 331 $userId = CRM_Core_Session::getLoggedInContactID();
1719073d 332 foreach ($foundDupes as $dupes) {
1fe557c2 333 $srcID = $dupes[1];
334 $dstID = $dupes[0];
063ffcb7 335 // The logged in user should never be the src (ie. the contact to be removed).
1fe557c2 336 if ($srcID == $userId) {
337 $srcID = $dstID;
338 $dstID = $userId;
1719073d 339 }
340
be2fb01f 341 $mainContacts[] = $row = [
1719073d 342 'dstID' => $dstID,
343 'dstName' => $displayNames[$dstID],
e67dcaf8 344 'srcID' => $srcID,
345 'srcName' => $displayNames[$srcID],
1719073d 346 'weight' => $dupes[2],
347 'canMerge' => TRUE,
be2fb01f 348 ];
1719073d 349
350 $data = CRM_Core_DAO::escapeString(serialize($row));
e1c519d7 351 CRM_Core_BAO_PrevNextCache::setItem('civicrm_contact', $dstID, $srcID, $cacheKeyString, $data);
1719073d 352 }
1719073d 353 return $mainContacts;
354 }
355
6a488035 356}