[NFC] Preliminary cleanup
[civicrm-core.git] / CRM / Dedupe / BAO / Rule.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | Copyright CiviCRM LLC. All rights reserved. |
5 | |
6 | This work is published under the GNU AGPLv3 license with some |
7 | permitted exceptions and without any warranty. For full license |
8 | and copyright information, see https://civicrm.org/licensing |
9 +--------------------------------------------------------------------+
10 */
11
12 /**
13 *
14 * @package CRM
15 * @copyright CiviCRM LLC https://civicrm.org/licensing
16 * $Id$
17 *
18 */
19
20 /**
21 * The CiviCRM duplicate discovery engine is based on an
22 * algorithm designed by David Strauss <david@fourkitchens.com>.
23 */
24 class CRM_Dedupe_BAO_Rule extends CRM_Dedupe_DAO_Rule {
25
26 /**
27 * Ids of the contacts to limit the SQL queries (whole-database queries otherwise)
28 * @var array
29 */
30 public $contactIds = [];
31
32 /**
33 * Params to dedupe against (queries against the whole contact set otherwise)
34 * @var array
35 */
36 public $params = [];
37
38 /**
39 * Return the SQL query for the given rule - either for finding matching
40 * pairs of contacts, or for matching against the $params variable (if set).
41 *
42 * @return string
43 * SQL query performing the search
44 *
45 * @throws \CRM_Core_Exception
46 * @throws \CiviCRM_API3_Exception
47 */
48 public function sql() {
49 if ($this->params &&
50 (!array_key_exists($this->rule_table, $this->params) ||
51 !array_key_exists($this->rule_field, $this->params[$this->rule_table])
52 )
53 ) {
54 // if params is present and doesn't have an entry for a field, don't construct the clause.
55 return NULL;
56 }
57
58 // we need to initialise WHERE, ON and USING here, as some table types
59 // extend them; $where is an array of required conditions, $on and
60 // $using are arrays of required field matchings (for substring and
61 // full matches, respectively)
62 $where = [];
63 $on = ["SUBSTR(t1.{$this->rule_field}, 1, {$this->rule_length}) = SUBSTR(t2.{$this->rule_field}, 1, {$this->rule_length})"];
64 $entity = CRM_Core_DAO_AllCoreTables::getBriefName(CRM_Core_DAO_AllCoreTables::getClassForTable($this->rule_table));
65 $fields = civicrm_api3($entity, 'getfields', ['action' => 'create'])['values'];
66
67 $innerJoinClauses = [
68 "t1.{$this->rule_field} IS NOT NULL",
69 "t2.{$this->rule_field} IS NOT NULL",
70 "t1.{$this->rule_field} = t2.{$this->rule_field}",
71 ];
72 if ($fields[$this->rule_field]['type'] === CRM_Utils_Type::T_DATE) {
73 $innerJoinClauses[] = "t1.{$this->rule_field} > '1000-01-01'";
74 $innerJoinClauses[] = "t2.{$this->rule_field} > '1000-01-01'";
75 }
76 else {
77 $innerJoinClauses[] = "t1.{$this->rule_field} <> ''";
78 $innerJoinClauses[] = "t2.{$this->rule_field} <> ''";
79 }
80
81 switch ($this->rule_table) {
82 case 'civicrm_contact':
83 $id = 'id';
84 //we should restrict by contact type in the first step
85 $sql = "SELECT contact_type FROM civicrm_dedupe_rule_group WHERE id = {$this->dedupe_rule_group_id};";
86 $ct = CRM_Core_DAO::singleValueQuery($sql);
87 if ($this->params) {
88 $where[] = "t1.contact_type = '{$ct}'";
89 }
90 else {
91 $where[] = "t1.contact_type = '{$ct}'";
92 $where[] = "t2.contact_type = '{$ct}'";
93 }
94 break;
95
96 case 'civicrm_address':
97 $id = 'contact_id';
98 $on[] = 't1.location_type_id = t2.location_type_id';
99 $innerJoinClauses[] = 't1.location_type_id = t2.location_type_id';
100 if (!empty($this->params['civicrm_address']['location_type_id'])) {
101 $locTypeId = CRM_Utils_Type::escape($this->params['civicrm_address']['location_type_id'], 'Integer', FALSE);
102 if ($locTypeId) {
103 $where[] = "t1.location_type_id = $locTypeId";
104 }
105 }
106 break;
107
108 case 'civicrm_email':
109 case 'civicrm_im':
110 case 'civicrm_openid':
111 case 'civicrm_phone':
112 $id = 'contact_id';
113 break;
114
115 case 'civicrm_note':
116 $id = 'entity_id';
117 if ($this->params) {
118 $where[] = "t1.entity_table = 'civicrm_contact'";
119 }
120 else {
121 $where[] = "t1.entity_table = 'civicrm_contact'";
122 $where[] = "t2.entity_table = 'civicrm_contact'";
123 }
124 break;
125
126 default:
127 // custom data tables
128 if (preg_match('/^civicrm_value_/', $this->rule_table) || preg_match('/^custom_value_/', $this->rule_table)) {
129 $id = 'entity_id';
130 }
131 else {
132 throw new CRM_Core_Exception("Unsupported rule_table for civicrm_dedupe_rule.id of {$this->id}");
133 }
134 break;
135 }
136
137 // build SELECT based on the field names containing contact ids
138 // if there are params provided, id1 should be 0
139 if ($this->params) {
140 $select = "t1.$id id1, {$this->rule_weight} weight";
141 $subSelect = 'id1, weight';
142 }
143 else {
144 $select = "t1.$id id1, t2.$id id2, {$this->rule_weight} weight";
145 $subSelect = 'id1, id2, weight';
146 }
147
148 // build FROM (and WHERE, if it's a parametrised search)
149 // based on whether the rule is about substrings or not
150 if ($this->params) {
151 $from = "{$this->rule_table} t1";
152 $str = 'NULL';
153 if (isset($this->params[$this->rule_table][$this->rule_field])) {
154 $str = CRM_Utils_Type::escape($this->params[$this->rule_table][$this->rule_field], 'String');
155 }
156 if ($this->rule_length) {
157 $where[] = "SUBSTR(t1.{$this->rule_field}, 1, {$this->rule_length}) = SUBSTR('$str', 1, {$this->rule_length})";
158 $where[] = "t1.{$this->rule_field} IS NOT NULL";
159 }
160 else {
161 $where[] = "t1.{$this->rule_field} = '$str'";
162 }
163 }
164 else {
165 if ($this->rule_length) {
166 $from = "{$this->rule_table} t1 JOIN {$this->rule_table} t2 ON (" . implode(' AND ', $on) . ")";
167 }
168 else {
169 $from = "{$this->rule_table} t1 INNER JOIN {$this->rule_table} t2 ON (" . implode(' AND ', $innerJoinClauses) . ")";
170 }
171 }
172
173 // finish building WHERE, also limit the results if requested
174 if (!$this->params) {
175 $where[] = "t1.$id < t2.$id";
176 }
177 $query = "SELECT $select FROM $from WHERE " . implode(' AND ', $where);
178 if ($this->contactIds) {
179 $cids = [];
180 foreach ($this->contactIds as $cid) {
181 $cids[] = CRM_Utils_Type::escape($cid, 'Integer');
182 }
183 if (count($cids) == 1) {
184 $query .= " AND (t1.$id = {$cids[0]}) UNION $query AND t2.$id = {$cids[0]}";
185 }
186 else {
187 $query .= " AND t1.$id IN (" . implode(',', $cids) . ")
188 UNION $query AND t2.$id IN (" . implode(',', $cids) . ")";
189 }
190 // The `weight` is ambiguous in the context of the union; put the whole
191 // thing in a subquery.
192 $query = "SELECT $subSelect FROM ($query) subunion";
193 }
194
195 return $query;
196 }
197
198 /**
199 * find fields related to a rule group.
200 *
201 * @param array $params contains the rule group property to identify rule group
202 *
203 * @return array
204 * rule fields array associated to rule group
205 */
206 public static function dedupeRuleFields($params) {
207 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
208 $rgBao->used = $params['used'];
209 $rgBao->contact_type = $params['contact_type'];
210 $rgBao->find(TRUE);
211
212 $ruleBao = new CRM_Dedupe_BAO_Rule();
213 $ruleBao->dedupe_rule_group_id = $rgBao->id;
214 $ruleBao->find();
215 $ruleFields = [];
216 while ($ruleBao->fetch()) {
217 $ruleFields[] = $ruleBao->rule_field;
218 }
219 return $ruleFields;
220 }
221
222 /**
223 * @param int $cid
224 * @param int $oid
225 *
226 * @return bool
227 */
228 public static function validateContacts($cid, $oid) {
229 if (!$cid || !$oid) {
230 return NULL;
231 }
232 $exception = new CRM_Dedupe_DAO_Exception();
233 $exception->contact_id1 = $cid;
234 $exception->contact_id2 = $oid;
235 //make sure contact2 > contact1.
236 if ($cid > $oid) {
237 $exception->contact_id1 = $oid;
238 $exception->contact_id2 = $cid;
239 }
240
241 return $exception->find(TRUE) ? FALSE : TRUE;
242 }
243
244 }