3 +--------------------------------------------------------------------+
5 +--------------------------------------------------------------------+
6 | Copyright CiviCRM LLC (c) 2004-2020 |
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
31 * @copyright CiviCRM LLC https://civicrm.org/licensing
37 * The CiviCRM duplicate discovery engine is based on an
38 * algorithm designed by David Strauss <david@fourkitchens.com>.
40 class CRM_Dedupe_Finder
{
43 * Return a contact_id-keyed array of arrays of possible dupes
44 * (of the key contact_id) - limited to dupes of $cids if provided.
49 * Contact ids to limit the search to.
51 * @param bool $checkPermissions
52 * Respect logged in user permissions.
55 * Array of (cid1, cid2, weight) dupe triples
59 public static function dupes($rgid, $cids = [], $checkPermissions = TRUE) {
60 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
62 $rgBao->contactIds
= $cids;
63 if (!$rgBao->find(TRUE)) {
64 throw new CRM_Core_Exception('Dedupe rule not found for selected contacts');
68 $dao = new CRM_Core_DAO();
69 $dao->query($rgBao->thresholdQuery($checkPermissions));
71 while ($dao->fetch()) {
72 $dupes[] = [$dao->id1
, $dao->id2
, $dao->weight
];
74 $dao->query($rgBao->tableDropQuery());
80 * Return an array of possible dupes, based on the provided array of
81 * params, using the default rule group for the given contact type and
84 * check_permission is a boolean flag to indicate if permission should be considered.
85 * default is to always check permissioning but public pages for example might not want
86 * permission to be checked for anonymous users. Refer CRM-6211. We might be breaking
87 * Multi-Site dedupe for public pages.
89 * @param array $params
90 * Array of params of the form $params[$table][$field] == $value.
91 * @param string $ctype
92 * Contact type to match against.
94 * Dedupe rule group usage ('Unsupervised' or 'Supervised' or 'General').
95 * @param array $except
96 * Array of contacts that shouldn't be considered dupes.
97 * @param int $ruleGroupID
98 * The id of the dedupe rule we should be using.
101 * matching contact ids
102 * @throws \CRM_Core_Exception
104 public static function dupesByParams(
107 $used = 'Unsupervised',
111 // If $params is empty there is zero reason to proceed.
115 $checkPermission = CRM_Utils_Array
::value('check_permission', $params, TRUE);
116 // This may no longer be required - see https://github.com/civicrm/civicrm-core/pull/13176
117 $params = array_filter($params);
121 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
122 $rgBao->id
= $ruleGroupID;
123 $rgBao->contact_type
= $ctype;
124 if ($rgBao->find(TRUE)) {
130 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
131 $rgBao->contact_type
= $ctype;
132 $rgBao->used
= $used;
133 if (!$rgBao->find(TRUE)) {
134 throw new CRM_Core_Exception("$used rule for $ctype does not exist");
138 if (isset($params['civicrm_phone']['phone_numeric'])) {
139 $orig = $params['civicrm_phone']['phone_numeric'];
140 $params['civicrm_phone']['phone_numeric'] = preg_replace('/[^\d]/', '', $orig);
142 $rgBao->params
= $params;
144 $dao = new CRM_Core_DAO();
145 $dao->query($rgBao->thresholdQuery($checkPermission));
147 while ($dao->fetch()) {
148 if (isset($dao->id
) && $dao->id
) {
152 $dao->query($rgBao->tableDropQuery());
153 return array_diff($dupes, $except);
157 * Return a contact_id-keyed array of arrays of possible dupes in the given group.
164 * @param int $searchLimit
165 * Limit for the number of contacts to be used for comparison.
166 * The search methodology finds all matches for the searchedContacts so this limits
167 * the number of searched contacts, not the matches found.
170 * array of (cid1, cid2, weight) dupe triples
171 * @throws \CiviCRM_API3_Exception
173 public static function dupesInGroup($rgid, $gid, $searchLimit = 0) {
174 $cids = array_keys(CRM_Contact_BAO_Group
::getMember($gid, TRUE, $searchLimit));
176 return self
::dupes($rgid, $cids);
182 * A hackish function needed to massage CRM_Contact_Form_$ctype::formRule()
183 * object into a valid $params array for dedupe
185 * @param array $fields
186 * Contact structure from formRule().
187 * @param string $ctype
188 * Contact type of the given contact.
191 * valid $params array for dedupe
192 * @throws \CRM_Core_Exception
194 public static function formatParams($fields, $ctype) {
196 CRM_Utils_Array
::flatten($fields, $flat);
198 // FIXME: This may no longer be necessary - check inputs
200 'individual_prefix' => 'prefix_id',
201 'individual_suffix' => 'suffix_id',
202 'gender' => 'gender_id',
204 foreach (['individual_suffix', 'individual_prefix', 'gender'] as $name) {
205 if (!empty($fields[$name])) {
206 $flat[$replace_these[$name]] = $flat[$name];
211 // handle {birth,deceased}_date
216 if (!empty($fields[$date])) {
217 $flat[$date] = $fields[$date];
218 if (is_array($flat[$date])) {
219 $flat[$date] = CRM_Utils_Date
::format($flat[$date]);
221 $flat[$date] = CRM_Utils_Date
::processDate($flat[$date]);
225 if (!empty($flat['contact_source'])) {
226 $flat['source'] = $flat['contact_source'];
227 unset($flat['contact_source']);
230 // handle preferred_communication_method
231 if (!empty($fields['preferred_communication_method'])) {
232 $methods = array_intersect($fields['preferred_communication_method'], ['1']);
233 $methods = array_keys($methods);
236 $flat['preferred_communication_method'] = CRM_Core_DAO
::VALUE_SEPARATOR
. implode(CRM_Core_DAO
::VALUE_SEPARATOR
, $methods) . CRM_Core_DAO
::VALUE_SEPARATOR
;
240 // handle custom data
241 $tree = CRM_Core_BAO_CustomGroup
::getTree($ctype, NULL, NULL, -1);
242 CRM_Core_BAO_CustomGroup
::postProcess($tree, $fields, TRUE);
243 foreach ($tree as $key => $cg) {
247 foreach ($cg['fields'] as $cf) {
248 $flat[$cf['column_name']] = CRM_Utils_Array
::value('data', $cf['customValue']);
252 // if the key is dotted, keep just the last part of it
253 foreach ($flat as $key => $value) {
254 if (substr_count($key, '.')) {
255 $last = explode('.', $key);
256 $last = array_pop($last);
257 // make sure the first occurrence is kept, not the last
258 if (!isset($flat[$last])) {
259 $flat[$last] = $value;
265 // drop the -digit (and -Primary, for CRM-3902) postfixes (so event registration's $flat['email-5'] becomes $flat['email'])
266 // FIXME: CRM-5026 should be fixed here; the below clobbers all address info; we should split off address fields and match
267 // the -digit to civicrm_address.location_type_id and -Primary to civicrm_address.is_primary
268 foreach ($flat as $key => $value) {
270 if (preg_match('/(.*)-(Primary-[\d+])$|(.*)-(\d+|Primary)$/', $key, $matches)) {
271 $return = array_values(array_filter($matches));
272 // make sure the first occurrence is kept, not the last
273 $flat[$return[1]] = empty($flat[$return[1]]) ?
$value : $flat[$return[1]];
279 $supportedFields = CRM_Dedupe_BAO_RuleGroup
::supportedFields($ctype);
280 if (is_array($supportedFields)) {
281 foreach ($supportedFields as $table => $fields) {
282 if ($table == 'civicrm_address') {
283 // for matching on civicrm_address fields, we also need the location_type_id
284 $fields['location_type_id'] = '';
285 // FIXME: we also need to do some hacking for id and name fields, see CRM-3902’s comments
287 'address_name' => 'name',
288 'country' => 'country_id',
289 'state_province' => 'state_province_id',
290 'county' => 'county_id',
292 foreach ($fixes as $orig => $target) {
293 if (!empty($flat[$orig])) {
294 $params[$table][$target] = $flat[$orig];
298 if ($table === 'civicrm_phone') {
300 'phone' => 'phone_numeric',
302 foreach ($fixes as $orig => $target) {
303 if (!empty($flat[$orig])) {
304 $params[$table][$target] = $flat[$orig];
308 foreach ($fields as $field => $title) {
309 if (!empty($flat[$field])) {
310 $params[$table][$field] = $flat[$field];
319 * Parse duplicate pairs into a standardised array and store in the prev_next_cache.
321 * @param array $foundDupes
322 * @param string $cacheKeyString
325 * Dupe pairs with the keys
333 public static function parseAndStoreDupePairs($foundDupes, $cacheKeyString) {
335 foreach ($foundDupes as $dupe) {
339 $cidString = implode(', ', array_keys($cids));
341 $dao = CRM_Core_DAO
::executeQuery("SELECT id, display_name FROM civicrm_contact WHERE id IN ($cidString) ORDER BY sort_name");
343 while ($dao->fetch()) {
344 $displayNames[$dao->id
] = $dao->display_name
;
347 $userId = CRM_Core_Session
::getLoggedInContactID();
348 foreach ($foundDupes as $dupes) {
351 // The logged in user should never be the src (ie. the contact to be removed).
352 if ($srcID == $userId) {
357 $mainContacts[] = $row = [
359 'dstName' => $displayNames[$dstID],
361 'srcName' => $displayNames[$srcID],
362 'weight' => $dupes[2],
366 $data = CRM_Core_DAO
::escapeString(serialize($row));
367 CRM_Core_BAO_PrevNextCache
::setItem('civicrm_contact', $dstID, $srcID, $cacheKeyString, $data);
369 return $mainContacts;