Merge remote-tracking branch 'upstream/4.5' into 4.5-master-2015-01-12-16-09-32
[civicrm-core.git] / CRM / Dedupe / BAO / Rule.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | CiviCRM version 4.6 |
5 +--------------------------------------------------------------------+
6 | Copyright CiviCRM LLC (c) 2004-2014 |
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
26 */
27
28 /**
29 *
30 * @package CRM
31 * @copyright CiviCRM LLC (c) 2004-2014
32 * $Id$
33 *
34 */
35
36 /**
37 * The CiviCRM duplicate discovery engine is based on an
38 * algorithm designed by David Strauss <david@fourkitchens.com>.
39 */
40 class CRM_Dedupe_BAO_Rule extends CRM_Dedupe_DAO_Rule {
41
42 /**
43 * Ids of the contacts to limit the SQL queries (whole-database queries otherwise)
44 */
45 var $contactIds = array();
46
47 /**
48 * Params to dedupe against (queries against the whole contact set otherwise)
49 */
50 var $params = array();
51
52 /**
53 * Return the SQL query for the given rule - either for finding matching
54 * pairs of contacts, or for matching against the $params variable (if set).
55 *
56 * @return string
57 * SQL query performing the search
58 */
59 public function sql() {
60 if ($this->params &&
61 (!array_key_exists($this->rule_table, $this->params) ||
62 !array_key_exists($this->rule_field, $this->params[$this->rule_table])
63 )
64 ) {
65 // if params is present and doesn't have an entry for a field, don't construct the clause.
66 return NULL;
67 }
68
69 // we need to initialise WHERE, ON and USING here, as some table types
70 // extend them; $where is an array of required conditions, $on and
71 // $using are arrays of required field matchings (for substring and
72 // full matches, respectively)
73 $where = array();
74 $on = array("SUBSTR(t1.{$this->rule_field}, 1, {$this->rule_length}) = SUBSTR(t2.{$this->rule_field}, 1, {$this->rule_length})");
75 $using = array($this->rule_field);
76
77 switch ($this->rule_table) {
78 case 'civicrm_contact':
79 $id = 'id';
80 //we should restrict by contact type in the first step
81 $sql = "SELECT contact_type FROM civicrm_dedupe_rule_group WHERE id = {$this->dedupe_rule_group_id};";
82 $ct = CRM_Core_DAO::singleValueQuery($sql);
83 if ($this->params) {
84 $where[] = "t1.contact_type = '{$ct}'";
85 }
86 else {
87 $where[] = "t1.contact_type = '{$ct}'";
88 $where[] = "t2.contact_type = '{$ct}'";
89 }
90 break;
91
92 case 'civicrm_address':
93 $id = 'contact_id';
94 $on[] = 't1.location_type_id = t2.location_type_id';
95 $using[] = 'location_type_id';
96 if ($this->params['civicrm_address']['location_type_id']) {
97 $locTypeId = CRM_Utils_Type::escape($this->params['civicrm_address']['location_type_id'], 'Integer', FALSE);
98 if ($locTypeId) {
99 $where[] = "t1.location_type_id = $locTypeId";
100 }
101 }
102 break;
103
104 case 'civicrm_email':
105 case 'civicrm_im':
106 case 'civicrm_openid':
107 case 'civicrm_phone':
108 $id = 'contact_id';
109 break;
110
111 case 'civicrm_note':
112 $id = 'entity_id';
113 if ($this->params) {
114 $where[] = "t1.entity_table = 'civicrm_contact'";
115 }
116 else {
117 $where[] = "t1.entity_table = 'civicrm_contact'";
118 $where[] = "t2.entity_table = 'civicrm_contact'";
119 }
120 break;
121
122 default:
123 // custom data tables
124 if (preg_match('/^civicrm_value_/', $this->rule_table) || preg_match('/^custom_value_/', $this->rule_table)) {
125 $id = 'entity_id';
126 }
127 else {
128 CRM_Core_Error::fatal("Unsupported rule_table for civicrm_dedupe_rule.id of {$this->id}");
129 }
130 break;
131 }
132
133 // build SELECT based on the field names containing contact ids
134 // if there are params provided, id1 should be 0
135 if ($this->params) {
136 $select = "t1.$id id1, {$this->rule_weight} weight";
137 }
138 else {
139 $select = "t1.$id id1, t2.$id id2, {$this->rule_weight} weight";
140 }
141
142 // build FROM (and WHERE, if it's a parametrised search)
143 // based on whether the rule is about substrings or not
144 if ($this->params) {
145 $from = "{$this->rule_table} t1";
146 $str = 'NULL';
147 if (isset($this->params[$this->rule_table][$this->rule_field])) {
148 $str = CRM_Utils_Type::escape($this->params[$this->rule_table][$this->rule_field], 'String');
149 }
150 if ($this->rule_length) {
151 $where[] = "SUBSTR(t1.{$this->rule_field}, 1, {$this->rule_length}) = SUBSTR('$str', 1, {$this->rule_length})";
152 $where[] = "t1.{$this->rule_field} IS NOT NULL";
153 }
154 else {
155 $where[] = "t1.{$this->rule_field} = '$str'";
156 }
157 }
158 else {
159 if ($this->rule_length) {
160 $from = "{$this->rule_table} t1 JOIN {$this->rule_table} t2 ON (" . implode(' AND ', $on) . ")";
161 }
162 else {
163 $from = "{$this->rule_table} t1 JOIN {$this->rule_table} t2 USING (" . implode(', ', $using) . ")";
164 }
165 }
166
167 // finish building WHERE, also limit the results if requested
168 if (!$this->params) {
169 $where[] = "t1.$id < t2.$id";
170 $where[] = "t1.{$this->rule_field} IS NOT NULL";
171 }
172 if ($this->contactIds) {
173 $cids = array();
174 foreach ($this->contactIds as $cid) {
175 $cids[] = CRM_Utils_Type::escape($cid, 'Integer');
176 }
177 if (count($cids) == 1) {
178 $where[] = "(t1.$id = {$cids[0]} OR t2.$id = {$cids[0]})";
179 }
180 else {
181 $where[] = "(t1.$id IN (" . implode(',', $cids) . ") OR t2.$id IN (" . implode(',', $cids) . "))";
182 }
183 }
184
185 return "SELECT $select FROM $from WHERE " . implode(' AND ', $where);
186 }
187
188 /**
189 * find fields related to a rule group.
190 *
191 * @param array contains the rule group property to identify rule group
192 *
193 * @return array
194 * rule fields array associated to rule group
195 */
196 public static function dedupeRuleFields($params) {
197 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
198 $rgBao->used = $params['used'];
199 $rgBao->contact_type = $params['contact_type'];
200 $rgBao->find(TRUE);
201
202 $ruleBao = new CRM_Dedupe_BAO_Rule();
203 $ruleBao->dedupe_rule_group_id = $rgBao->id;
204 $ruleBao->find();
205 $ruleFields = array();
206 while ($ruleBao->fetch()) {
207 $ruleFields[] = $ruleBao->rule_field;
208 }
209 return $ruleFields;
210 }
211
212 /**
213 * @param int $cid
214 * @param int $oid
215 *
216 * @return bool
217 */
218 public static function validateContacts($cid, $oid) {
219 if (!$cid || !$oid) {
220 return;
221 }
222 $exception = new CRM_Dedupe_DAO_Exception();
223 $exception->contact_id1 = $cid;
224 $exception->contact_id2 = $oid;
225 //make sure contact2 > contact1.
226 if ($cid > $oid) {
227 $exception->contact_id1 = $oid;
228 $exception->contact_id2 = $cid;
229 }
230
231 return $exception->find(TRUE) ? FALSE : TRUE;
232 }
233 }