APIv4 - smarter parsing of Entity docblocks
[civicrm-core.git] / CRM / Dedupe / Finder.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | Copyright CiviCRM LLC. All rights reserved. |
5 | |
6 | This work is published under the GNU AGPLv3 license with some |
7 | permitted exceptions and without any warranty. For full license |
8 | and copyright information, see https://civicrm.org/licensing |
9 +--------------------------------------------------------------------+
10 */
11
12 /**
13 *
14 * @package CRM
15 * @copyright CiviCRM LLC https://civicrm.org/licensing
16 */
17
18 /**
19 * The CiviCRM duplicate discovery engine is based on an
20 * algorithm designed by David Strauss <david@fourkitchens.com>.
21 */
22 class CRM_Dedupe_Finder {
23
24 /**
25 * Return a contact_id-keyed array of arrays of possible dupes
26 * (of the key contact_id) - limited to dupes of $cids if provided.
27 *
28 * @param int $rgid
29 * Rule group id.
30 * @param array $cids
31 * Contact ids to limit the search to.
32 *
33 * @param bool $checkPermissions
34 * Respect logged in user permissions.
35 *
36 * @return array
37 * Array of (cid1, cid2, weight) dupe triples
38 *
39 * @throws \CRM_Core_Exception
40 */
41 public static function dupes($rgid, $cids = [], $checkPermissions = TRUE) {
42 $rgBao = new CRM_Dedupe_BAO_DedupeRuleGroup();
43 $rgBao->id = $rgid;
44 $rgBao->contactIds = $cids;
45 if (!$rgBao->find(TRUE)) {
46 throw new CRM_Core_Exception('Dedupe rule not found for selected contacts');
47 }
48
49 $rgBao->fillTable();
50 $dao = CRM_Core_DAO::executeQuery($rgBao->thresholdQuery($checkPermissions));
51 $dupes = [];
52 while ($dao->fetch()) {
53 $dupes[] = [$dao->id1, $dao->id2, $dao->weight];
54 }
55 CRM_Core_DAO::executeQuery(($rgBao->tableDropQuery()));
56
57 return $dupes;
58 }
59
60 /**
61 * Return an array of possible dupes, based on the provided array of
62 * params, using the default rule group for the given contact type and
63 * usage.
64 *
65 * check_permission is a boolean flag to indicate if permission should be considered.
66 * default is to always check permissioning but public pages for example might not want
67 * permission to be checked for anonymous users. Refer CRM-6211. We might be breaking
68 * Multi-Site dedupe for public pages.
69 *
70 * @param array $params
71 * Array of params of the form $params[$table][$field] == $value.
72 * @param string $ctype
73 * Contact type to match against.
74 * @param string $used
75 * Dedupe rule group usage ('Unsupervised' or 'Supervised' or 'General').
76 * @param array $except
77 * Array of contacts that shouldn't be considered dupes.
78 * @param int $ruleGroupID
79 * The id of the dedupe rule we should be using.
80 *
81 * @return array
82 * matching contact ids
83 * @throws \CRM_Core_Exception
84 */
85 public static function dupesByParams(
86 $params,
87 $ctype,
88 $used = 'Unsupervised',
89 $except = [],
90 $ruleGroupID = NULL
91 ) {
92 // If $params is empty there is zero reason to proceed.
93 if (!$params) {
94 return [];
95 }
96 $checkPermission = CRM_Utils_Array::value('check_permission', $params, TRUE);
97 // This may no longer be required - see https://github.com/civicrm/civicrm-core/pull/13176
98 $params = array_filter($params);
99
100 $foundByID = FALSE;
101 if ($ruleGroupID) {
102 $rgBao = new CRM_Dedupe_BAO_DedupeRuleGroup();
103 $rgBao->id = $ruleGroupID;
104 $rgBao->contact_type = $ctype;
105 if ($rgBao->find(TRUE)) {
106 $foundByID = TRUE;
107 }
108 }
109
110 if (!$foundByID) {
111 $rgBao = new CRM_Dedupe_BAO_DedupeRuleGroup();
112 $rgBao->contact_type = $ctype;
113 $rgBao->used = $used;
114 if (!$rgBao->find(TRUE)) {
115 throw new CRM_Core_Exception("$used rule for $ctype does not exist");
116 }
117 }
118
119 if (isset($params['civicrm_phone']['phone_numeric'])) {
120 $orig = $params['civicrm_phone']['phone_numeric'];
121 $params['civicrm_phone']['phone_numeric'] = preg_replace('/[^\d]/', '', $orig);
122 }
123 $rgBao->params = $params;
124 $rgBao->fillTable();
125 $dao = new CRM_Core_DAO();
126 $dao->query($rgBao->thresholdQuery($checkPermission));
127 $dupes = [];
128 while ($dao->fetch()) {
129 if (isset($dao->id) && $dao->id) {
130 $dupes[] = $dao->id;
131 }
132 }
133 $dao->query($rgBao->tableDropQuery());
134 return array_diff($dupes, $except);
135 }
136
137 /**
138 * Return a contact_id-keyed array of arrays of possible dupes in the given group.
139 *
140 * @param int $rgid
141 * Rule group id.
142 * @param int $gid
143 * Contact group id.
144 *
145 * @param int $searchLimit
146 * Limit for the number of contacts to be used for comparison.
147 * The search methodology finds all matches for the searchedContacts so this limits
148 * the number of searched contacts, not the matches found.
149 *
150 * @return array
151 * array of (cid1, cid2, weight) dupe triples
152 *
153 * @throws \CRM_Core_Exception
154 */
155 public static function dupesInGroup($rgid, $gid, $searchLimit = 0) {
156 $cids = array_keys(CRM_Contact_BAO_Group::getMember($gid, TRUE, $searchLimit));
157 if (!empty($cids)) {
158 return self::dupes($rgid, $cids);
159 }
160 return [];
161 }
162
163 /**
164 * A hackish function needed to massage CRM_Contact_Form_$ctype::formRule()
165 * object into a valid $params array for dedupe
166 *
167 * @param array $fields
168 * Contact structure from formRule().
169 * @param string $ctype
170 * Contact type of the given contact.
171 *
172 * @return array
173 * valid $params array for dedupe
174 * @throws \CRM_Core_Exception
175 */
176 public static function formatParams($fields, $ctype) {
177 $flat = [];
178 CRM_Utils_Array::flatten($fields, $flat);
179
180 // FIXME: This may no longer be necessary - check inputs
181 $replace_these = [
182 'individual_prefix' => 'prefix_id',
183 'individual_suffix' => 'suffix_id',
184 'gender' => 'gender_id',
185 ];
186 foreach (['individual_suffix', 'individual_prefix', 'gender'] as $name) {
187 if (!empty($fields[$name])) {
188 $flat[$replace_these[$name]] = $flat[$name];
189 unset($flat[$name]);
190 }
191 }
192
193 // handle {birth,deceased}_date
194 foreach ([
195 'birth_date',
196 'deceased_date',
197 ] as $date) {
198 if (!empty($fields[$date])) {
199 $flat[$date] = $fields[$date];
200 if (is_array($flat[$date])) {
201 $flat[$date] = CRM_Utils_Date::format($flat[$date]);
202 }
203 $flat[$date] = CRM_Utils_Date::processDate($flat[$date]);
204 }
205 }
206
207 if (!empty($flat['contact_source'])) {
208 $flat['source'] = $flat['contact_source'];
209 unset($flat['contact_source']);
210 }
211
212 // handle preferred_communication_method
213 if (!empty($fields['preferred_communication_method'])) {
214 $methods = array_intersect($fields['preferred_communication_method'], ['1']);
215 $methods = array_keys($methods);
216 sort($methods);
217 if ($methods) {
218 $flat['preferred_communication_method'] = CRM_Core_DAO::VALUE_SEPARATOR . implode(CRM_Core_DAO::VALUE_SEPARATOR, $methods) . CRM_Core_DAO::VALUE_SEPARATOR;
219 }
220 }
221
222 // handle custom data
223
224 $subTypes = $fields['contact_sub_type'] ?? [];
225 // Only return custom for subType + unrestricted or return all custom
226 // fields.
227 $tree = CRM_Core_BAO_CustomGroup::getTree($ctype, NULL, NULL, -1, $subTypes, NULL, TRUE, NULL, TRUE);
228 CRM_Core_BAO_CustomGroup::postProcess($tree, $fields, TRUE);
229 foreach ($tree as $key => $cg) {
230 if (!is_int($key)) {
231 continue;
232 }
233 foreach ($cg['fields'] as $cf) {
234 $flat[$cf['column_name']] = $cf['customValue']['data'] ?? NULL;
235 }
236 }
237
238 // if the key is dotted, keep just the last part of it
239 foreach ($flat as $key => $value) {
240 if (substr_count($key, '.')) {
241 $last = explode('.', $key);
242 $last = array_pop($last);
243 // make sure the first occurrence is kept, not the last
244 if (!isset($flat[$last])) {
245 $flat[$last] = $value;
246 }
247 unset($flat[$key]);
248 }
249 }
250
251 // drop the -digit (and -Primary, for CRM-3902) postfixes (so event registration's $flat['email-5'] becomes $flat['email'])
252 // FIXME: CRM-5026 should be fixed here; the below clobbers all address info; we should split off address fields and match
253 // the -digit to civicrm_address.location_type_id and -Primary to civicrm_address.is_primary
254 foreach ($flat as $key => $value) {
255 $matches = [];
256 if (preg_match('/(.*)-(Primary-[\d+])$|(.*)-(\d+-\d+)$|(.*)-(\d+|Primary)$/', $key, $matches)) {
257 $return = array_values(array_filter($matches));
258 // make sure the first occurrence is kept, not the last
259 $flat[$return[1]] = empty($flat[$return[1]]) ? $value : $flat[$return[1]];
260 unset($flat[$key]);
261 }
262 }
263
264 $params = [];
265 $supportedFields = CRM_Dedupe_BAO_DedupeRuleGroup::supportedFields($ctype);
266 if (is_array($supportedFields)) {
267 foreach ($supportedFields as $table => $fields) {
268 if ($table === 'civicrm_address') {
269 // for matching on civicrm_address fields, we also need the location_type_id
270 $fields['location_type_id'] = '';
271 // FIXME: we also need to do some hacking for id and name fields, see CRM-3902’s comments
272 $fixes = [
273 'address_name' => 'name',
274 'country' => 'country_id',
275 'state_province' => 'state_province_id',
276 'county' => 'county_id',
277 ];
278 foreach ($fixes as $orig => $target) {
279 if (!empty($flat[$orig])) {
280 $params[$table][$target] = $flat[$orig];
281 }
282 }
283 }
284 if ($table === 'civicrm_phone') {
285 $fixes = [
286 'phone' => 'phone_numeric',
287 ];
288 foreach ($fixes as $orig => $target) {
289 if (!empty($flat[$orig])) {
290 $params[$table][$target] = $flat[$orig];
291 }
292 }
293 }
294 foreach ($fields as $field => $title) {
295 if (!empty($flat[$field])) {
296 $params[$table][$field] = $flat[$field];
297 }
298 }
299 }
300 }
301 return $params;
302 }
303
304 /**
305 * Parse duplicate pairs into a standardised array and store in the prev_next_cache.
306 *
307 * @param array $foundDupes
308 * @param string $cacheKeyString
309 *
310 * @return array
311 * Dupe pairs with the keys
312 * -srcID
313 * -srcName
314 * -dstID
315 * -dstName
316 * -weight
317 * -canMerge
318 */
319 public static function parseAndStoreDupePairs($foundDupes, $cacheKeyString) {
320 $cids = [];
321 foreach ($foundDupes as $dupe) {
322 $cids[$dupe[0]] = 1;
323 $cids[$dupe[1]] = 1;
324 }
325 $cidString = implode(', ', array_keys($cids));
326
327 $dao = CRM_Core_DAO::executeQuery("SELECT id, display_name FROM civicrm_contact WHERE id IN ($cidString) ORDER BY sort_name");
328 $displayNames = [];
329 while ($dao->fetch()) {
330 $displayNames[$dao->id] = $dao->display_name;
331 }
332
333 $userId = CRM_Core_Session::getLoggedInContactID();
334 foreach ($foundDupes as $dupes) {
335 $srcID = $dupes[1];
336 $dstID = $dupes[0];
337 // The logged in user should never be the src (ie. the contact to be removed).
338 if ($srcID == $userId) {
339 $srcID = $dstID;
340 $dstID = $userId;
341 }
342
343 $mainContacts[] = $row = [
344 'dstID' => (int) $dstID,
345 'dstName' => $displayNames[$dstID],
346 'srcID' => (int) $srcID,
347 'srcName' => $displayNames[$srcID],
348 'weight' => $dupes[2],
349 'canMerge' => TRUE,
350 ];
351
352 CRM_Core_DAO::executeQuery("INSERT INTO civicrm_prevnext_cache (entity_table, entity_id1, entity_id2, cacheKey, data) VALUES
353 ('civicrm_contact', %1, %2, %3, %4)", [
354 1 => [$dstID, 'Integer'],
355 2 => [$srcID, 'Integer'],
356 3 => [$cacheKeyString, 'String'],
357 4 => [serialize($row), 'String'],
358 ]
359 );
360 }
361 return $mainContacts;
362 }
363
364 }