Commit | Line | Data |
---|---|---|
6a488035 TO |
1 | <?php |
2 | /* | |
3 | +--------------------------------------------------------------------+ | |
06b69b18 | 4 | | CiviCRM version 4.5 | |
6a488035 | 5 | +--------------------------------------------------------------------+ |
06b69b18 | 6 | | Copyright CiviCRM LLC (c) 2004-2014 | |
6a488035 TO |
7 | +--------------------------------------------------------------------+ |
8 | | This file is a part of CiviCRM. | | |
9 | | | | |
10 | | CiviCRM is free software; you can copy, modify, and distribute it | | |
11 | | under the terms of the GNU Affero General Public License | | |
12 | | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. | | |
13 | | | | |
14 | | CiviCRM is distributed in the hope that it will be useful, but | | |
15 | | WITHOUT ANY WARRANTY; without even the implied warranty of | | |
16 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | | |
17 | | See the GNU Affero General Public License for more details. | | |
18 | | | | |
19 | | You should have received a copy of the GNU Affero General Public | | |
20 | | License and the CiviCRM Licensing Exception along | | |
21 | | with this program; if not, contact CiviCRM LLC | | |
22 | | at info[AT]civicrm[DOT]org. If you have questions about the | | |
23 | | GNU Affero General Public License or the licensing of CiviCRM, | | |
24 | | see the CiviCRM license FAQ at http://civicrm.org/licensing | | |
25 | +--------------------------------------------------------------------+ | |
26 | */ | |
27 | ||
28 | /** | |
29 | * | |
30 | * @package CRM | |
06b69b18 | 31 | * @copyright CiviCRM LLC (c) 2004-2014 |
6a488035 TO |
32 | * $Id$ |
33 | * | |
34 | */ | |
35 | ||
36 | /** | |
37 | * The CiviCRM duplicate discovery engine is based on an | |
38 | * algorithm designed by David Strauss <david@fourkitchens.com>. | |
39 | */ | |
40 | class CRM_Dedupe_Finder { | |
41 | ||
42 | /** | |
43 | * Return a contact_id-keyed array of arrays of possible dupes | |
44 | * (of the key contact_id) - limited to dupes of $cids if provided. | |
45 | * | |
46 | * @param int $rgid rule group id | |
47 | * @param array $cids contact ids to limit the search to | |
48 | * | |
49 | * @return array array of (cid1, cid2, weight) dupe triples | |
50 | */ | |
c490a46a | 51 | static function dupes($rgid, $cids = array()) { |
6a488035 TO |
52 | $rgBao = new CRM_Dedupe_BAO_RuleGroup(); |
53 | $rgBao->id = $rgid; | |
54 | $rgBao->contactIds = $cids; | |
55 | if (!$rgBao->find(TRUE)) { | |
16254ae1 | 56 | CRM_Core_Error::fatal("Dedupe rule not found for selected contacts"); |
6a488035 TO |
57 | } |
58 | ||
59 | $rgBao->fillTable(); | |
60 | $dao = new CRM_Core_DAO(); | |
61 | $dao->query($rgBao->thresholdQuery()); | |
62 | $dupes = array(); | |
63 | while ($dao->fetch()) { | |
64 | $dupes[] = array($dao->id1, $dao->id2, $dao->weight); | |
65 | } | |
66 | $dao->query($rgBao->tableDropQuery()); | |
67 | ||
68 | return $dupes; | |
69 | } | |
70 | ||
71 | /** | |
72 | * Return an array of possible dupes, based on the provided array of | |
73 | * params, using the default rule group for the given contact type and | |
74 | * usage. | |
75 | * | |
76 | * check_permission is a boolean flag to indicate if permission should be considered. | |
77 | * default is to always check permissioning but public pages for example might not want | |
78 | * permission to be checked for anonymous users. Refer CRM-6211. We might be beaking | |
79 | * Multi-Site dedupe for public pages. | |
80 | * | |
81 | * @param array $params array of params of the form $params[$table][$field] == $value | |
82 | * @param string $ctype contact type to match against | |
8ef12e64 | 83 | * @param string $used dedupe rule group usage ('Unsupervised' or 'Supervised' or 'General') |
6a488035 TO |
84 | * @param array $except array of contacts that shouldn't be considered dupes |
85 | * @param int $ruleGroupID the id of the dedupe rule we should be using | |
86 | * | |
87 | * @return array matching contact ids | |
88 | */ | |
89 | static function dupesByParams($params, | |
90 | $ctype, | |
91 | $used = 'Unsupervised', | |
92 | $except = array(), | |
93 | $ruleGroupID = NULL | |
94 | ) { | |
95 | // If $params is empty there is zero reason to proceed. | |
96 | if (!$params) { | |
97 | return array(); | |
98 | } | |
99 | ||
100 | $foundByID = FALSE; | |
101 | if ($ruleGroupID) { | |
102 | $rgBao = new CRM_Dedupe_BAO_RuleGroup(); | |
103 | $rgBao->id = $ruleGroupID; | |
104 | $rgBao->contact_type = $ctype; | |
105 | if ($rgBao->find(TRUE)) { | |
106 | $foundByID = TRUE; | |
107 | } | |
108 | } | |
109 | ||
110 | if (!$foundByID) { | |
111 | $rgBao = new CRM_Dedupe_BAO_RuleGroup(); | |
112 | $rgBao->contact_type = $ctype; | |
113 | $rgBao->used = $used; | |
114 | if (!$rgBao->find(TRUE)) { | |
115 | CRM_Core_Error::fatal("$used rule for $ctype does not exist"); | |
116 | } | |
117 | } | |
118 | $params['check_permission'] = CRM_Utils_Array::value('check_permission', $params, TRUE); | |
119 | ||
120 | $rgBao->params = $params; | |
121 | $rgBao->fillTable(); | |
122 | $dao = new CRM_Core_DAO(); | |
123 | $dao->query($rgBao->thresholdQuery($params['check_permission'])); | |
124 | $dupes = array(); | |
125 | while ($dao->fetch()) { | |
126 | if (isset($dao->id) && $dao->id) { | |
127 | $dupes[] = $dao->id; | |
128 | } | |
129 | } | |
130 | $dao->query($rgBao->tableDropQuery()); | |
131 | return array_diff($dupes, $except); | |
132 | } | |
133 | ||
134 | /** | |
135 | * Return a contact_id-keyed array of arrays of possible dupes in the given group. | |
136 | * | |
137 | * @param int $rgid rule group id | |
138 | * @param int $gid contact group id (currently, works only with non-smart groups) | |
139 | * | |
140 | * @return array array of (cid1, cid2, weight) dupe triples | |
141 | */ | |
142 | static function dupesInGroup($rgid, $gid) { | |
143 | $cids = array_keys(CRM_Contact_BAO_Group::getMember($gid)); | |
144 | if ( !empty($cids) ) { | |
145 | return self::dupes($rgid, $cids); | |
146 | } | |
147 | return array(); | |
148 | } | |
149 | ||
150 | /** | |
151 | * Return dupes of a given contact, using the default rule group (of a provided usage). | |
152 | * | |
153 | * @param int $cid contact id of the given contact | |
154 | * @param string $used dedupe rule group usage ('Unsupervised' or 'Supervised' or 'General') | |
155 | * @param string $ctype contact type of the given contact | |
156 | * | |
157 | * @return array array of dupe contact_ids | |
158 | */ | |
159 | static function dupesOfContact($cid, $used = 'Unsupervised', $ctype = NULL) { | |
160 | // if not provided, fetch the contact type from the database | |
161 | if (!$ctype) { | |
162 | $dao = new CRM_Contact_DAO_Contact(); | |
163 | $dao->id = $cid; | |
164 | if (!$dao->find(TRUE)) { | |
165 | CRM_Core_Error::fatal("contact id of $cid does not exist"); | |
166 | } | |
167 | $ctype = $dao->contact_type; | |
168 | } | |
169 | $rgBao = new CRM_Dedupe_BAO_RuleGroup(); | |
170 | $rgBao->used = $used; | |
171 | $rgBao->contact_type = $ctype; | |
172 | if (!$rgBao->find(TRUE)) { | |
173 | CRM_Core_Error::fatal("$used rule for $ctype does not exist"); | |
174 | } | |
175 | $dupes = self::dupes($rgBao->id, array($cid)); | |
176 | ||
177 | // get the dupes for this cid | |
178 | $result = array(); | |
179 | foreach ($dupes as $dupe) { | |
180 | if ($dupe[0] == $cid) { | |
181 | $result[] = $dupe[1]; | |
182 | } | |
183 | elseif ($dupe[1] == $cid) { | |
184 | $result[] = $dupe[0]; | |
185 | } | |
186 | } | |
187 | return $result; | |
188 | } | |
189 | ||
190 | /** | |
191 | * A hackish function needed to massage CRM_Contact_Form_$ctype::formRule() | |
192 | * object into a valid $params array for dedupe | |
193 | * | |
194 | * @param array $fields contact structure from formRule() | |
195 | * @param string $ctype contact type of the given contact | |
196 | * | |
197 | * @return array valid $params array for dedupe | |
198 | */ | |
199 | static function formatParams($fields, $ctype) { | |
200 | $flat = array(); | |
201 | CRM_Utils_Array::flatten($fields, $flat); | |
202 | ||
309a09df | 203 | // FIXME: This may no longer be necessary - check inputs |
6a488035 TO |
204 | $replace_these = array( |
205 | 'individual_prefix' => 'prefix_id', | |
206 | 'individual_suffix' => 'suffix_id', | |
207 | 'gender' => 'gender_id', | |
208 | ); | |
309a09df | 209 | foreach (array('individual_suffix', 'individual_prefix', 'gender') as $name) { |
a7488080 | 210 | if (!empty($fields[$name])) { |
6a488035 TO |
211 | $flat[$replace_these[$name]] = $flat[$name]; |
212 | unset($flat[$name]); | |
213 | } | |
214 | } | |
215 | ||
216 | // handle {birth,deceased}_date | |
217 | foreach (array( | |
218 | 'birth_date', 'deceased_date') as $date) { | |
a7488080 | 219 | if (!empty($fields[$date])) { |
6a488035 TO |
220 | $flat[$date] = $fields[$date]; |
221 | if (is_array($flat[$date])) { | |
222 | $flat[$date] = CRM_Utils_Date::format($flat[$date]); | |
223 | } | |
224 | $flat[$date] = CRM_Utils_Date::processDate($flat[$date]); | |
225 | } | |
226 | } | |
227 | ||
a7488080 | 228 | if (!empty($flat['contact_source'])) { |
6a488035 TO |
229 | $flat['source'] = $flat['contact_source']; |
230 | unset($flat['contact_source']); | |
231 | } | |
232 | ||
233 | // handle preferred_communication_method | |
df5ad245 | 234 | if (!empty($fields['preferred_communication_method'])) { |
6a488035 TO |
235 | $methods = array_intersect($fields['preferred_communication_method'], array('1')); |
236 | $methods = array_keys($methods); | |
237 | sort($methods); | |
238 | if ($methods) { | |
239 | $flat['preferred_communication_method'] = CRM_Core_DAO::VALUE_SEPARATOR . implode(CRM_Core_DAO::VALUE_SEPARATOR, $methods) . CRM_Core_DAO::VALUE_SEPARATOR; | |
240 | } | |
241 | } | |
242 | ||
243 | // handle custom data | |
244 | $tree = CRM_Core_BAO_CustomGroup::getTree($ctype, CRM_Core_DAO::$_nullObject, NULL, -1); | |
245 | CRM_Core_BAO_CustomGroup::postProcess($tree, $fields, TRUE); | |
246 | foreach ($tree as $key => $cg) { | |
247 | if (!is_int($key)) { | |
248 | continue; | |
249 | } | |
250 | foreach ($cg['fields'] as $cf) { | |
251 | $flat[$cf['column_name']] = CRM_Utils_Array::value('data', $cf['customValue']); | |
252 | } | |
253 | } | |
254 | ||
255 | // if the key is dotted, keep just the last part of it | |
256 | foreach ($flat as $key => $value) { | |
257 | if (substr_count($key, '.')) { | |
258 | $last = explode('.', $key); | |
259 | $last = array_pop($last); | |
260 | // make sure the first occurence is kept, not the last | |
261 | if (!isset($flat[$last])) { | |
262 | $flat[$last] = $value; | |
263 | } | |
264 | unset($flat[$key]); | |
265 | } | |
266 | } | |
267 | ||
268 | // drop the -digit (and -Primary, for CRM-3902) postfixes (so event registration's $flat['email-5'] becomes $flat['email']) | |
269 | // FIXME: CRM-5026 should be fixed here; the below clobbers all address info; we should split off address fields and match | |
270 | // the -digit to civicrm_address.location_type_id and -Primary to civicrm_address.is_primary | |
271 | foreach ($flat as $key => $value) { | |
272 | $matches = array(); | |
273 | if (preg_match('/(.*)-(\d+|Primary)$/', $key, $matches)) { | |
274 | $flat[$matches[1]] = $value; | |
275 | unset($flat[$key]); | |
276 | } | |
277 | } | |
278 | ||
279 | $params = array(); | |
280 | $supportedFields = CRM_Dedupe_BAO_RuleGroup::supportedFields($ctype); | |
281 | if (is_array($supportedFields)) { | |
282 | foreach ($supportedFields as $table => $fields) { | |
283 | if ($table == 'civicrm_address') { | |
284 | // for matching on civicrm_address fields, we also need the location_type_id | |
285 | $fields['location_type_id'] = ''; | |
286 | // FIXME: we also need to do some hacking for id and name fields, see CRM-3902’s comments | |
287 | $fixes = array( | |
288 | 'address_name' => 'name', 'country' => 'country_id', | |
289 | 'state_province' => 'state_province_id', 'county' => 'county_id', | |
290 | ); | |
291 | foreach ($fixes as $orig => $target) { | |
a7488080 | 292 | if (!empty($flat[$orig])) { |
6a488035 TO |
293 | $params[$table][$target] = $flat[$orig]; |
294 | } | |
295 | } | |
296 | } | |
297 | foreach ($fields as $field => $title) { | |
a7488080 | 298 | if (!empty($flat[$field])) { |
6a488035 TO |
299 | $params[$table][$field] = $flat[$field]; |
300 | } | |
301 | } | |
302 | } | |
303 | } | |
304 | return $params; | |
305 | } | |
306 | } | |
307 |