HR-402: fix for notices on case status
[civicrm-core.git] / CRM / Dedupe / Finder.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | CiviCRM version 4.5 |
5 +--------------------------------------------------------------------+
6 | Copyright CiviCRM LLC (c) 2004-2014 |
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
26 */
27
28 /**
29 *
30 * @package CRM
31 * @copyright CiviCRM LLC (c) 2004-2014
32 * $Id$
33 *
34 */
35
36 /**
37 * The CiviCRM duplicate discovery engine is based on an
38 * algorithm designed by David Strauss <david@fourkitchens.com>.
39 */
40 class CRM_Dedupe_Finder {
41
42 /**
43 * Return a contact_id-keyed array of arrays of possible dupes
44 * (of the key contact_id) - limited to dupes of $cids if provided.
45 *
46 * @param int $rgid rule group id
47 * @param array $cids contact ids to limit the search to
48 *
49 * @return array array of (cid1, cid2, weight) dupe triples
50 */
51 static function dupes($rgid, $cids = array(
52 )) {
53 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
54 $rgBao->id = $rgid;
55 $rgBao->contactIds = $cids;
56 if (!$rgBao->find(TRUE)) {
57 CRM_Core_Error::fatal("Dedupe rule not found for selected contacts");
58 }
59
60 $rgBao->fillTable();
61 $dao = new CRM_Core_DAO();
62 $dao->query($rgBao->thresholdQuery());
63 $dupes = array();
64 while ($dao->fetch()) {
65 $dupes[] = array($dao->id1, $dao->id2, $dao->weight);
66 }
67 $dao->query($rgBao->tableDropQuery());
68
69 return $dupes;
70 }
71
72 /**
73 * Return an array of possible dupes, based on the provided array of
74 * params, using the default rule group for the given contact type and
75 * usage.
76 *
77 * check_permission is a boolean flag to indicate if permission should be considered.
78 * default is to always check permissioning but public pages for example might not want
79 * permission to be checked for anonymous users. Refer CRM-6211. We might be beaking
80 * Multi-Site dedupe for public pages.
81 *
82 * @param array $params array of params of the form $params[$table][$field] == $value
83 * @param string $ctype contact type to match against
84 * @param string $used dedupe rule group usage ('Unsupervised' or 'Supervised' or 'General')
85 * @param array $except array of contacts that shouldn't be considered dupes
86 * @param int $ruleGroupID the id of the dedupe rule we should be using
87 *
88 * @return array matching contact ids
89 */
90 static function dupesByParams($params,
91 $ctype,
92 $used = 'Unsupervised',
93 $except = array(),
94 $ruleGroupID = NULL
95 ) {
96 // If $params is empty there is zero reason to proceed.
97 if (!$params) {
98 return array();
99 }
100
101 $foundByID = FALSE;
102 if ($ruleGroupID) {
103 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
104 $rgBao->id = $ruleGroupID;
105 $rgBao->contact_type = $ctype;
106 if ($rgBao->find(TRUE)) {
107 $foundByID = TRUE;
108 }
109 }
110
111 if (!$foundByID) {
112 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
113 $rgBao->contact_type = $ctype;
114 $rgBao->used = $used;
115 if (!$rgBao->find(TRUE)) {
116 CRM_Core_Error::fatal("$used rule for $ctype does not exist");
117 }
118 }
119 $params['check_permission'] = CRM_Utils_Array::value('check_permission', $params, TRUE);
120
121 $rgBao->params = $params;
122 $rgBao->fillTable();
123 $dao = new CRM_Core_DAO();
124 $dao->query($rgBao->thresholdQuery($params['check_permission']));
125 $dupes = array();
126 while ($dao->fetch()) {
127 if (isset($dao->id) && $dao->id) {
128 $dupes[] = $dao->id;
129 }
130 }
131 $dao->query($rgBao->tableDropQuery());
132 return array_diff($dupes, $except);
133 }
134
135 /**
136 * Return a contact_id-keyed array of arrays of possible dupes in the given group.
137 *
138 * @param int $rgid rule group id
139 * @param int $gid contact group id (currently, works only with non-smart groups)
140 *
141 * @return array array of (cid1, cid2, weight) dupe triples
142 */
143 static function dupesInGroup($rgid, $gid) {
144 $cids = array_keys(CRM_Contact_BAO_Group::getMember($gid));
145 if ( !empty($cids) ) {
146 return self::dupes($rgid, $cids);
147 }
148 return array();
149 }
150
151 /**
152 * Return dupes of a given contact, using the default rule group (of a provided usage).
153 *
154 * @param int $cid contact id of the given contact
155 * @param string $used dedupe rule group usage ('Unsupervised' or 'Supervised' or 'General')
156 * @param string $ctype contact type of the given contact
157 *
158 * @return array array of dupe contact_ids
159 */
160 static function dupesOfContact($cid, $used = 'Unsupervised', $ctype = NULL) {
161 // if not provided, fetch the contact type from the database
162 if (!$ctype) {
163 $dao = new CRM_Contact_DAO_Contact();
164 $dao->id = $cid;
165 if (!$dao->find(TRUE)) {
166 CRM_Core_Error::fatal("contact id of $cid does not exist");
167 }
168 $ctype = $dao->contact_type;
169 }
170 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
171 $rgBao->used = $used;
172 $rgBao->contact_type = $ctype;
173 if (!$rgBao->find(TRUE)) {
174 CRM_Core_Error::fatal("$used rule for $ctype does not exist");
175 }
176 $dupes = self::dupes($rgBao->id, array($cid));
177
178 // get the dupes for this cid
179 $result = array();
180 foreach ($dupes as $dupe) {
181 if ($dupe[0] == $cid) {
182 $result[] = $dupe[1];
183 }
184 elseif ($dupe[1] == $cid) {
185 $result[] = $dupe[0];
186 }
187 }
188 return $result;
189 }
190
191 /**
192 * A hackish function needed to massage CRM_Contact_Form_$ctype::formRule()
193 * object into a valid $params array for dedupe
194 *
195 * @param array $fields contact structure from formRule()
196 * @param string $ctype contact type of the given contact
197 *
198 * @return array valid $params array for dedupe
199 */
200 static function formatParams($fields, $ctype) {
201 $flat = array();
202 CRM_Utils_Array::flatten($fields, $flat);
203
204 // FIXME: This may no longer be necessary - check inputs
205 $replace_these = array(
206 'individual_prefix' => 'prefix_id',
207 'individual_suffix' => 'suffix_id',
208 'gender' => 'gender_id',
209 );
210 foreach (array('individual_suffix', 'individual_prefix', 'gender') as $name) {
211 if (!empty($fields[$name])) {
212 $flat[$replace_these[$name]] = $flat[$name];
213 unset($flat[$name]);
214 }
215 }
216
217 // handle {birth,deceased}_date
218 foreach (array(
219 'birth_date', 'deceased_date') as $date) {
220 if (!empty($fields[$date])) {
221 $flat[$date] = $fields[$date];
222 if (is_array($flat[$date])) {
223 $flat[$date] = CRM_Utils_Date::format($flat[$date]);
224 }
225 $flat[$date] = CRM_Utils_Date::processDate($flat[$date]);
226 }
227 }
228
229 if (!empty($flat['contact_source'])) {
230 $flat['source'] = $flat['contact_source'];
231 unset($flat['contact_source']);
232 }
233
234 // handle preferred_communication_method
235 if (array_key_exists('preferred_communication_method', $fields)) {
236 $methods = array_intersect($fields['preferred_communication_method'], array('1'));
237 $methods = array_keys($methods);
238 sort($methods);
239 if ($methods) {
240 $flat['preferred_communication_method'] = CRM_Core_DAO::VALUE_SEPARATOR . implode(CRM_Core_DAO::VALUE_SEPARATOR, $methods) . CRM_Core_DAO::VALUE_SEPARATOR;
241 }
242 }
243
244 // handle custom data
245 $tree = CRM_Core_BAO_CustomGroup::getTree($ctype, CRM_Core_DAO::$_nullObject, NULL, -1);
246 CRM_Core_BAO_CustomGroup::postProcess($tree, $fields, TRUE);
247 foreach ($tree as $key => $cg) {
248 if (!is_int($key)) {
249 continue;
250 }
251 foreach ($cg['fields'] as $cf) {
252 $flat[$cf['column_name']] = CRM_Utils_Array::value('data', $cf['customValue']);
253 }
254 }
255
256 // if the key is dotted, keep just the last part of it
257 foreach ($flat as $key => $value) {
258 if (substr_count($key, '.')) {
259 $last = explode('.', $key);
260 $last = array_pop($last);
261 // make sure the first occurence is kept, not the last
262 if (!isset($flat[$last])) {
263 $flat[$last] = $value;
264 }
265 unset($flat[$key]);
266 }
267 }
268
269 // drop the -digit (and -Primary, for CRM-3902) postfixes (so event registration's $flat['email-5'] becomes $flat['email'])
270 // FIXME: CRM-5026 should be fixed here; the below clobbers all address info; we should split off address fields and match
271 // the -digit to civicrm_address.location_type_id and -Primary to civicrm_address.is_primary
272 foreach ($flat as $key => $value) {
273 $matches = array();
274 if (preg_match('/(.*)-(\d+|Primary)$/', $key, $matches)) {
275 $flat[$matches[1]] = $value;
276 unset($flat[$key]);
277 }
278 }
279
280 $params = array();
281 $supportedFields = CRM_Dedupe_BAO_RuleGroup::supportedFields($ctype);
282 if (is_array($supportedFields)) {
283 foreach ($supportedFields as $table => $fields) {
284 if ($table == 'civicrm_address') {
285 // for matching on civicrm_address fields, we also need the location_type_id
286 $fields['location_type_id'] = '';
287 // FIXME: we also need to do some hacking for id and name fields, see CRM-3902’s comments
288 $fixes = array(
289 'address_name' => 'name', 'country' => 'country_id',
290 'state_province' => 'state_province_id', 'county' => 'county_id',
291 );
292 foreach ($fixes as $orig => $target) {
293 if (!empty($flat[$orig])) {
294 $params[$table][$target] = $flat[$orig];
295 }
296 }
297 }
298 foreach ($fields as $field => $title) {
299 if (!empty($flat[$field])) {
300 $params[$table][$field] = $flat[$field];
301 }
302 }
303 }
304 }
305 return $params;
306 }
307 }
308