CRM-18522 tests to attempt to reproduce the reported sql error.
[civicrm-core.git] / CRM / Dedupe / Finder.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | CiviCRM version 4.7 |
5 +--------------------------------------------------------------------+
6 | Copyright CiviCRM LLC (c) 2004-2016 |
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
26 */
27
28 /**
29 *
30 * @package CRM
31 * @copyright CiviCRM LLC (c) 2004-2016
32 * $Id$
33 *
34 */
35
36 /**
37 * The CiviCRM duplicate discovery engine is based on an
38 * algorithm designed by David Strauss <david@fourkitchens.com>.
39 */
40 class CRM_Dedupe_Finder {
41
42 /**
43 * Return a contact_id-keyed array of arrays of possible dupes
44 * (of the key contact_id) - limited to dupes of $cids if provided.
45 *
46 * @param int $rgid
47 * Rule group id.
48 * @param array $cids
49 * Contact ids to limit the search to.
50 *
51 * @param bool $checkPermissions
52 * Respect logged in user permissions.
53 *
54 * @param int $limit
55 * Optional limit. This limits the number of contacts for which the code will
56 * attempt to find matches.
57 *
58 * @return array
59 * Array of (cid1, cid2, weight) dupe triples
60 *
61 * @throws CiviCRM_API3_Exception
62 * @throws Exception
63 */
64 public static function dupes($rgid, $cids = array(), $checkPermissions = TRUE, $limit = NULL) {
65 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
66 $rgBao->id = $rgid;
67 $rgBao->contactIds = $cids;
68 if (!$rgBao->find(TRUE)) {
69 CRM_Core_Error::fatal("Dedupe rule not found for selected contacts");
70 }
71 if (empty($rgBao->contactIds) && !empty($limit)) {
72 $limitedContacts = civicrm_api3('Contact', 'get', array(
73 'return' => 'id',
74 'contact_type' => $rgBao->contact_type,
75 'options' => array('limit' => $limit),
76 ));
77 $rgBao->contactIds = array_keys($limitedContacts['values']);
78 }
79
80 $rgBao->fillTable();
81 $dao = new CRM_Core_DAO();
82 $dao->query($rgBao->thresholdQuery($checkPermissions));
83 $dupes = array();
84 while ($dao->fetch()) {
85 $dupes[] = array($dao->id1, $dao->id2, $dao->weight);
86 }
87 $dao->query($rgBao->tableDropQuery());
88
89 return $dupes;
90 }
91
92 /**
93 * Return an array of possible dupes, based on the provided array of
94 * params, using the default rule group for the given contact type and
95 * usage.
96 *
97 * check_permission is a boolean flag to indicate if permission should be considered.
98 * default is to always check permissioning but public pages for example might not want
99 * permission to be checked for anonymous users. Refer CRM-6211. We might be beaking
100 * Multi-Site dedupe for public pages.
101 *
102 * @param array $params
103 * Array of params of the form $params[$table][$field] == $value.
104 * @param string $ctype
105 * Contact type to match against.
106 * @param string $used
107 * Dedupe rule group usage ('Unsupervised' or 'Supervised' or 'General').
108 * @param array $except
109 * Array of contacts that shouldn't be considered dupes.
110 * @param int $ruleGroupID
111 * The id of the dedupe rule we should be using.
112 *
113 * @return array
114 * matching contact ids
115 */
116 public static function dupesByParams(
117 $params,
118 $ctype,
119 $used = 'Unsupervised',
120 $except = array(),
121 $ruleGroupID = NULL
122 ) {
123 // If $params is empty there is zero reason to proceed.
124 if (!$params) {
125 return array();
126 }
127
128 $foundByID = FALSE;
129 if ($ruleGroupID) {
130 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
131 $rgBao->id = $ruleGroupID;
132 $rgBao->contact_type = $ctype;
133 if ($rgBao->find(TRUE)) {
134 $foundByID = TRUE;
135 }
136 }
137
138 if (!$foundByID) {
139 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
140 $rgBao->contact_type = $ctype;
141 $rgBao->used = $used;
142 if (!$rgBao->find(TRUE)) {
143 CRM_Core_Error::fatal("$used rule for $ctype does not exist");
144 }
145 }
146 $params['check_permission'] = CRM_Utils_Array::value('check_permission', $params, TRUE);
147
148 $rgBao->params = $params;
149 $rgBao->fillTable();
150 $dao = new CRM_Core_DAO();
151 $dao->query($rgBao->thresholdQuery($params['check_permission']));
152 $dupes = array();
153 while ($dao->fetch()) {
154 if (isset($dao->id) && $dao->id) {
155 $dupes[] = $dao->id;
156 }
157 }
158 $dao->query($rgBao->tableDropQuery());
159 return array_diff($dupes, $except);
160 }
161
162 /**
163 * Return a contact_id-keyed array of arrays of possible dupes in the given group.
164 *
165 * @param int $rgid
166 * Rule group id.
167 * @param int $gid
168 * Contact group id (currently, works only with non-smart groups).
169 *
170 * @param int $limit
171 * @return array
172 * array of (cid1, cid2, weight) dupe triples
173 */
174 public static function dupesInGroup($rgid, $gid, $limit = NULL) {
175 $cids = array_keys(CRM_Contact_BAO_Group::getMember($gid, $limit));
176 if (!empty($cids)) {
177 return self::dupes($rgid, $cids);
178 }
179 return array();
180 }
181
182 /**
183 * Return dupes of a given contact, using the default rule group (of a provided usage).
184 *
185 * @param int $cid
186 * Contact id of the given contact.
187 * @param string $used
188 * Dedupe rule group usage ('Unsupervised' or 'Supervised' or 'General').
189 * @param string $ctype
190 * Contact type of the given contact.
191 *
192 * @return array
193 * array of dupe contact_ids
194 */
195 public static function dupesOfContact($cid, $used = 'Unsupervised', $ctype = NULL) {
196 // if not provided, fetch the contact type from the database
197 if (!$ctype) {
198 $dao = new CRM_Contact_DAO_Contact();
199 $dao->id = $cid;
200 if (!$dao->find(TRUE)) {
201 CRM_Core_Error::fatal("contact id of $cid does not exist");
202 }
203 $ctype = $dao->contact_type;
204 }
205 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
206 $rgBao->used = $used;
207 $rgBao->contact_type = $ctype;
208 if (!$rgBao->find(TRUE)) {
209 CRM_Core_Error::fatal("$used rule for $ctype does not exist");
210 }
211 $dupes = self::dupes($rgBao->id, array($cid));
212
213 // get the dupes for this cid
214 $result = array();
215 foreach ($dupes as $dupe) {
216 if ($dupe[0] == $cid) {
217 $result[] = $dupe[1];
218 }
219 elseif ($dupe[1] == $cid) {
220 $result[] = $dupe[0];
221 }
222 }
223 return $result;
224 }
225
226 /**
227 * A hackish function needed to massage CRM_Contact_Form_$ctype::formRule()
228 * object into a valid $params array for dedupe
229 *
230 * @param array $fields
231 * Contact structure from formRule().
232 * @param string $ctype
233 * Contact type of the given contact.
234 *
235 * @return array
236 * valid $params array for dedupe
237 */
238 public static function formatParams($fields, $ctype) {
239 $flat = array();
240 CRM_Utils_Array::flatten($fields, $flat);
241
242 // FIXME: This may no longer be necessary - check inputs
243 $replace_these = array(
244 'individual_prefix' => 'prefix_id',
245 'individual_suffix' => 'suffix_id',
246 'gender' => 'gender_id',
247 );
248 foreach (array('individual_suffix', 'individual_prefix', 'gender') as $name) {
249 if (!empty($fields[$name])) {
250 $flat[$replace_these[$name]] = $flat[$name];
251 unset($flat[$name]);
252 }
253 }
254
255 // handle {birth,deceased}_date
256 foreach (array(
257 'birth_date',
258 'deceased_date',
259 ) as $date) {
260 if (!empty($fields[$date])) {
261 $flat[$date] = $fields[$date];
262 if (is_array($flat[$date])) {
263 $flat[$date] = CRM_Utils_Date::format($flat[$date]);
264 }
265 $flat[$date] = CRM_Utils_Date::processDate($flat[$date]);
266 }
267 }
268
269 if (!empty($flat['contact_source'])) {
270 $flat['source'] = $flat['contact_source'];
271 unset($flat['contact_source']);
272 }
273
274 // handle preferred_communication_method
275 if (!empty($fields['preferred_communication_method'])) {
276 $methods = array_intersect($fields['preferred_communication_method'], array('1'));
277 $methods = array_keys($methods);
278 sort($methods);
279 if ($methods) {
280 $flat['preferred_communication_method'] = CRM_Core_DAO::VALUE_SEPARATOR . implode(CRM_Core_DAO::VALUE_SEPARATOR, $methods) . CRM_Core_DAO::VALUE_SEPARATOR;
281 }
282 }
283
284 // handle custom data
285 $tree = CRM_Core_BAO_CustomGroup::getTree($ctype, CRM_Core_DAO::$_nullObject, NULL, -1);
286 CRM_Core_BAO_CustomGroup::postProcess($tree, $fields, TRUE);
287 foreach ($tree as $key => $cg) {
288 if (!is_int($key)) {
289 continue;
290 }
291 foreach ($cg['fields'] as $cf) {
292 $flat[$cf['column_name']] = CRM_Utils_Array::value('data', $cf['customValue']);
293 }
294 }
295
296 // if the key is dotted, keep just the last part of it
297 foreach ($flat as $key => $value) {
298 if (substr_count($key, '.')) {
299 $last = explode('.', $key);
300 $last = array_pop($last);
301 // make sure the first occurrence is kept, not the last
302 if (!isset($flat[$last])) {
303 $flat[$last] = $value;
304 }
305 unset($flat[$key]);
306 }
307 }
308
309 // drop the -digit (and -Primary, for CRM-3902) postfixes (so event registration's $flat['email-5'] becomes $flat['email'])
310 // FIXME: CRM-5026 should be fixed here; the below clobbers all address info; we should split off address fields and match
311 // the -digit to civicrm_address.location_type_id and -Primary to civicrm_address.is_primary
312 foreach ($flat as $key => $value) {
313 $matches = array();
314 if (preg_match('/(.*)-(Primary-[\d+])$|(.*)-(\d+|Primary)$/', $key, $matches)) {
315 $return = array_values(array_filter($matches));
316 $flat[$return[1]] = $value;
317 unset($flat[$key]);
318 }
319 }
320
321 $params = array();
322 $supportedFields = CRM_Dedupe_BAO_RuleGroup::supportedFields($ctype);
323 if (is_array($supportedFields)) {
324 foreach ($supportedFields as $table => $fields) {
325 if ($table == 'civicrm_address') {
326 // for matching on civicrm_address fields, we also need the location_type_id
327 $fields['location_type_id'] = '';
328 // FIXME: we also need to do some hacking for id and name fields, see CRM-3902’s comments
329 $fixes = array(
330 'address_name' => 'name',
331 'country' => 'country_id',
332 'state_province' => 'state_province_id',
333 'county' => 'county_id',
334 );
335 foreach ($fixes as $orig => $target) {
336 if (!empty($flat[$orig])) {
337 $params[$table][$target] = $flat[$orig];
338 }
339 }
340 }
341 foreach ($fields as $field => $title) {
342 if (!empty($flat[$field])) {
343 $params[$table][$field] = $flat[$field];
344 }
345 }
346 }
347 }
348 return $params;
349 }
350
351 }