Merge pull request #5169 from PalanteJon/CRM-15745
[civicrm-core.git] / CRM / Dedupe / Finder.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | CiviCRM version 4.6 |
5 +--------------------------------------------------------------------+
6 | Copyright CiviCRM LLC (c) 2004-2014 |
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
26 */
27
28 /**
29 *
30 * @package CRM
31 * @copyright CiviCRM LLC (c) 2004-2014
32 * $Id$
33 *
34 */
35
36 /**
37 * The CiviCRM duplicate discovery engine is based on an
38 * algorithm designed by David Strauss <david@fourkitchens.com>.
39 */
40 class CRM_Dedupe_Finder {
41
42 /**
43 * Return a contact_id-keyed array of arrays of possible dupes
44 * (of the key contact_id) - limited to dupes of $cids if provided.
45 *
46 * @param int $rgid
47 * Rule group id.
48 * @param array $cids
49 * Contact ids to limit the search to.
50 *
51 * @return array
52 * array of (cid1, cid2, weight) dupe triples
53 */
54 public static function dupes($rgid, $cids = array()) {
55 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
56 $rgBao->id = $rgid;
57 $rgBao->contactIds = $cids;
58 if (!$rgBao->find(TRUE)) {
59 CRM_Core_Error::fatal("Dedupe rule not found for selected contacts");
60 }
61
62 $rgBao->fillTable();
63 $dao = new CRM_Core_DAO();
64 $dao->query($rgBao->thresholdQuery());
65 $dupes = array();
66 while ($dao->fetch()) {
67 $dupes[] = array($dao->id1, $dao->id2, $dao->weight);
68 }
69 $dao->query($rgBao->tableDropQuery());
70
71 return $dupes;
72 }
73
74 /**
75 * Return an array of possible dupes, based on the provided array of
76 * params, using the default rule group for the given contact type and
77 * usage.
78 *
79 * check_permission is a boolean flag to indicate if permission should be considered.
80 * default is to always check permissioning but public pages for example might not want
81 * permission to be checked for anonymous users. Refer CRM-6211. We might be beaking
82 * Multi-Site dedupe for public pages.
83 *
84 * @param array $params
85 * Array of params of the form $params[$table][$field] == $value.
86 * @param string $ctype
87 * Contact type to match against.
88 * @param string $used
89 * Dedupe rule group usage ('Unsupervised' or 'Supervised' or 'General').
90 * @param array $except
91 * Array of contacts that shouldn't be considered dupes.
92 * @param int $ruleGroupID
93 * The id of the dedupe rule we should be using.
94 *
95 * @return array
96 * matching contact ids
97 */
98 public static function dupesByParams(
99 $params,
100 $ctype,
101 $used = 'Unsupervised',
102 $except = array(),
103 $ruleGroupID = NULL
104 ) {
105 // If $params is empty there is zero reason to proceed.
106 if (!$params) {
107 return array();
108 }
109
110 $foundByID = FALSE;
111 if ($ruleGroupID) {
112 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
113 $rgBao->id = $ruleGroupID;
114 $rgBao->contact_type = $ctype;
115 if ($rgBao->find(TRUE)) {
116 $foundByID = TRUE;
117 }
118 }
119
120 if (!$foundByID) {
121 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
122 $rgBao->contact_type = $ctype;
123 $rgBao->used = $used;
124 if (!$rgBao->find(TRUE)) {
125 CRM_Core_Error::fatal("$used rule for $ctype does not exist");
126 }
127 }
128 $params['check_permission'] = CRM_Utils_Array::value('check_permission', $params, TRUE);
129
130 $rgBao->params = $params;
131 $rgBao->fillTable();
132 $dao = new CRM_Core_DAO();
133 $dao->query($rgBao->thresholdQuery($params['check_permission']));
134 $dupes = array();
135 while ($dao->fetch()) {
136 if (isset($dao->id) && $dao->id) {
137 $dupes[] = $dao->id;
138 }
139 }
140 $dao->query($rgBao->tableDropQuery());
141 return array_diff($dupes, $except);
142 }
143
144 /**
145 * Return a contact_id-keyed array of arrays of possible dupes in the given group.
146 *
147 * @param int $rgid
148 * Rule group id.
149 * @param int $gid
150 * Contact group id (currently, works only with non-smart groups).
151 *
152 * @return array
153 * array of (cid1, cid2, weight) dupe triples
154 */
155 public static function dupesInGroup($rgid, $gid) {
156 $cids = array_keys(CRM_Contact_BAO_Group::getMember($gid));
157 if (!empty($cids)) {
158 return self::dupes($rgid, $cids);
159 }
160 return array();
161 }
162
163 /**
164 * Return dupes of a given contact, using the default rule group (of a provided usage).
165 *
166 * @param int $cid
167 * Contact id of the given contact.
168 * @param string $used
169 * Dedupe rule group usage ('Unsupervised' or 'Supervised' or 'General').
170 * @param string $ctype
171 * Contact type of the given contact.
172 *
173 * @return array
174 * array of dupe contact_ids
175 */
176 public static function dupesOfContact($cid, $used = 'Unsupervised', $ctype = NULL) {
177 // if not provided, fetch the contact type from the database
178 if (!$ctype) {
179 $dao = new CRM_Contact_DAO_Contact();
180 $dao->id = $cid;
181 if (!$dao->find(TRUE)) {
182 CRM_Core_Error::fatal("contact id of $cid does not exist");
183 }
184 $ctype = $dao->contact_type;
185 }
186 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
187 $rgBao->used = $used;
188 $rgBao->contact_type = $ctype;
189 if (!$rgBao->find(TRUE)) {
190 CRM_Core_Error::fatal("$used rule for $ctype does not exist");
191 }
192 $dupes = self::dupes($rgBao->id, array($cid));
193
194 // get the dupes for this cid
195 $result = array();
196 foreach ($dupes as $dupe) {
197 if ($dupe[0] == $cid) {
198 $result[] = $dupe[1];
199 }
200 elseif ($dupe[1] == $cid) {
201 $result[] = $dupe[0];
202 }
203 }
204 return $result;
205 }
206
207 /**
208 * A hackish function needed to massage CRM_Contact_Form_$ctype::formRule()
209 * object into a valid $params array for dedupe
210 *
211 * @param array $fields
212 * Contact structure from formRule().
213 * @param string $ctype
214 * Contact type of the given contact.
215 *
216 * @return array
217 * valid $params array for dedupe
218 */
219 public static function formatParams($fields, $ctype) {
220 $flat = array();
221 CRM_Utils_Array::flatten($fields, $flat);
222
223 // FIXME: This may no longer be necessary - check inputs
224 $replace_these = array(
225 'individual_prefix' => 'prefix_id',
226 'individual_suffix' => 'suffix_id',
227 'gender' => 'gender_id',
228 );
229 foreach (array('individual_suffix', 'individual_prefix', 'gender') as $name) {
230 if (!empty($fields[$name])) {
231 $flat[$replace_these[$name]] = $flat[$name];
232 unset($flat[$name]);
233 }
234 }
235
236 // handle {birth,deceased}_date
237 foreach (array(
238 'birth_date',
239 'deceased_date',
240 ) as $date) {
241 if (!empty($fields[$date])) {
242 $flat[$date] = $fields[$date];
243 if (is_array($flat[$date])) {
244 $flat[$date] = CRM_Utils_Date::format($flat[$date]);
245 }
246 $flat[$date] = CRM_Utils_Date::processDate($flat[$date]);
247 }
248 }
249
250 if (!empty($flat['contact_source'])) {
251 $flat['source'] = $flat['contact_source'];
252 unset($flat['contact_source']);
253 }
254
255 // handle preferred_communication_method
256 if (!empty($fields['preferred_communication_method'])) {
257 $methods = array_intersect($fields['preferred_communication_method'], array('1'));
258 $methods = array_keys($methods);
259 sort($methods);
260 if ($methods) {
261 $flat['preferred_communication_method'] = CRM_Core_DAO::VALUE_SEPARATOR . implode(CRM_Core_DAO::VALUE_SEPARATOR, $methods) . CRM_Core_DAO::VALUE_SEPARATOR;
262 }
263 }
264
265 // handle custom data
266 $tree = CRM_Core_BAO_CustomGroup::getTree($ctype, CRM_Core_DAO::$_nullObject, NULL, -1);
267 CRM_Core_BAO_CustomGroup::postProcess($tree, $fields, TRUE);
268 foreach ($tree as $key => $cg) {
269 if (!is_int($key)) {
270 continue;
271 }
272 foreach ($cg['fields'] as $cf) {
273 $flat[$cf['column_name']] = CRM_Utils_Array::value('data', $cf['customValue']);
274 }
275 }
276
277 // if the key is dotted, keep just the last part of it
278 foreach ($flat as $key => $value) {
279 if (substr_count($key, '.')) {
280 $last = explode('.', $key);
281 $last = array_pop($last);
282 // make sure the first occurence is kept, not the last
283 if (!isset($flat[$last])) {
284 $flat[$last] = $value;
285 }
286 unset($flat[$key]);
287 }
288 }
289
290 // drop the -digit (and -Primary, for CRM-3902) postfixes (so event registration's $flat['email-5'] becomes $flat['email'])
291 // FIXME: CRM-5026 should be fixed here; the below clobbers all address info; we should split off address fields and match
292 // the -digit to civicrm_address.location_type_id and -Primary to civicrm_address.is_primary
293 foreach ($flat as $key => $value) {
294 $matches = array();
295 if (preg_match('/(.*)-(\d+|Primary)$/', $key, $matches)) {
296 $flat[$matches[1]] = $value;
297 unset($flat[$key]);
298 }
299 }
300
301 $params = array();
302 $supportedFields = CRM_Dedupe_BAO_RuleGroup::supportedFields($ctype);
303 if (is_array($supportedFields)) {
304 foreach ($supportedFields as $table => $fields) {
305 if ($table == 'civicrm_address') {
306 // for matching on civicrm_address fields, we also need the location_type_id
307 $fields['location_type_id'] = '';
308 // FIXME: we also need to do some hacking for id and name fields, see CRM-3902’s comments
309 $fixes = array(
310 'address_name' => 'name',
311 'country' => 'country_id',
312 'state_province' => 'state_province_id',
313 'county' => 'county_id',
314 );
315 foreach ($fixes as $orig => $target) {
316 if (!empty($flat[$orig])) {
317 $params[$table][$target] = $flat[$orig];
318 }
319 }
320 }
321 foreach ($fields as $field => $title) {
322 if (!empty($flat[$field])) {
323 $params[$table][$field] = $flat[$field];
324 }
325 }
326 }
327 }
328 return $params;
329 }
330
331 }