Merge in 5.20
[civicrm-core.git] / CRM / Dedupe / BAO / Rule.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | Copyright CiviCRM LLC. All rights reserved. |
5 | |
6 | This work is published under the GNU AGPLv3 license with some |
7 | permitted exceptions and without any warranty. For full license |
8 | and copyright information, see https://civicrm.org/licensing |
9 +--------------------------------------------------------------------+
10 */
11
12 /**
13 *
14 * @package CRM
15 * @copyright CiviCRM LLC https://civicrm.org/licensing
16 * $Id$
17 *
18 */
19
20 /**
21 * The CiviCRM duplicate discovery engine is based on an
22 * algorithm designed by David Strauss <david@fourkitchens.com>.
23 */
24 class CRM_Dedupe_BAO_Rule extends CRM_Dedupe_DAO_Rule {
25
26 /**
27 * Ids of the contacts to limit the SQL queries (whole-database queries otherwise)
28 * @var array
29 */
30 public $contactIds = [];
31
32 /**
33 * Params to dedupe against (queries against the whole contact set otherwise)
34 * @var array
35 */
36 public $params = [];
37
38 /**
39 * Return the SQL query for the given rule - either for finding matching
40 * pairs of contacts, or for matching against the $params variable (if set).
41 *
42 * @return string
43 * SQL query performing the search
44 */
45 public function sql() {
46 if ($this->params &&
47 (!array_key_exists($this->rule_table, $this->params) ||
48 !array_key_exists($this->rule_field, $this->params[$this->rule_table])
49 )
50 ) {
51 // if params is present and doesn't have an entry for a field, don't construct the clause.
52 return NULL;
53 }
54
55 // we need to initialise WHERE, ON and USING here, as some table types
56 // extend them; $where is an array of required conditions, $on and
57 // $using are arrays of required field matchings (for substring and
58 // full matches, respectively)
59 $where = [];
60 $on = ["SUBSTR(t1.{$this->rule_field}, 1, {$this->rule_length}) = SUBSTR(t2.{$this->rule_field}, 1, {$this->rule_length})"];
61 $entity = CRM_Core_DAO_AllCoreTables::getBriefName(CRM_Core_DAO_AllCoreTables::getClassForTable($this->rule_table));
62 $fields = civicrm_api3($entity, 'getfields', ['action' => 'create'])['values'];
63
64 $innerJoinClauses = [
65 "t1.{$this->rule_field} IS NOT NULL",
66 "t2.{$this->rule_field} IS NOT NULL",
67 "t1.{$this->rule_field} = t2.{$this->rule_field}",
68 ];
69 if ($fields[$this->rule_field]['type'] === CRM_Utils_Type::T_DATE) {
70 $innerJoinClauses[] = "t1.{$this->rule_field} > '1000-01-01'";
71 $innerJoinClauses[] = "t2.{$this->rule_field} > '1000-01-01'";
72 }
73 else {
74 $innerJoinClauses[] = "t1.{$this->rule_field} <> ''";
75 $innerJoinClauses[] = "t2.{$this->rule_field} <> ''";
76 }
77
78 switch ($this->rule_table) {
79 case 'civicrm_contact':
80 $id = 'id';
81 //we should restrict by contact type in the first step
82 $sql = "SELECT contact_type FROM civicrm_dedupe_rule_group WHERE id = {$this->dedupe_rule_group_id};";
83 $ct = CRM_Core_DAO::singleValueQuery($sql);
84 if ($this->params) {
85 $where[] = "t1.contact_type = '{$ct}'";
86 }
87 else {
88 $where[] = "t1.contact_type = '{$ct}'";
89 $where[] = "t2.contact_type = '{$ct}'";
90 }
91 break;
92
93 case 'civicrm_address':
94 $id = 'contact_id';
95 $on[] = 't1.location_type_id = t2.location_type_id';
96 $innerJoinClauses[] = 't1.location_type_id = t2.location_type_id';
97 if (!empty($this->params['civicrm_address']['location_type_id'])) {
98 $locTypeId = CRM_Utils_Type::escape($this->params['civicrm_address']['location_type_id'], 'Integer', FALSE);
99 if ($locTypeId) {
100 $where[] = "t1.location_type_id = $locTypeId";
101 }
102 }
103 break;
104
105 case 'civicrm_email':
106 case 'civicrm_im':
107 case 'civicrm_openid':
108 case 'civicrm_phone':
109 $id = 'contact_id';
110 break;
111
112 case 'civicrm_note':
113 $id = 'entity_id';
114 if ($this->params) {
115 $where[] = "t1.entity_table = 'civicrm_contact'";
116 }
117 else {
118 $where[] = "t1.entity_table = 'civicrm_contact'";
119 $where[] = "t2.entity_table = 'civicrm_contact'";
120 }
121 break;
122
123 default:
124 // custom data tables
125 if (preg_match('/^civicrm_value_/', $this->rule_table) || preg_match('/^custom_value_/', $this->rule_table)) {
126 $id = 'entity_id';
127 }
128 else {
129 CRM_Core_Error::fatal("Unsupported rule_table for civicrm_dedupe_rule.id of {$this->id}");
130 }
131 break;
132 }
133
134 // build SELECT based on the field names containing contact ids
135 // if there are params provided, id1 should be 0
136 if ($this->params) {
137 $select = "t1.$id id1, {$this->rule_weight} weight";
138 $subSelect = 'id1, weight';
139 }
140 else {
141 $select = "t1.$id id1, t2.$id id2, {$this->rule_weight} weight";
142 $subSelect = 'id1, id2, weight';
143 }
144
145 // build FROM (and WHERE, if it's a parametrised search)
146 // based on whether the rule is about substrings or not
147 if ($this->params) {
148 $from = "{$this->rule_table} t1";
149 $str = 'NULL';
150 if (isset($this->params[$this->rule_table][$this->rule_field])) {
151 $str = CRM_Utils_Type::escape($this->params[$this->rule_table][$this->rule_field], 'String');
152 }
153 if ($this->rule_length) {
154 $where[] = "SUBSTR(t1.{$this->rule_field}, 1, {$this->rule_length}) = SUBSTR('$str', 1, {$this->rule_length})";
155 $where[] = "t1.{$this->rule_field} IS NOT NULL";
156 }
157 else {
158 $where[] = "t1.{$this->rule_field} = '$str'";
159 }
160 }
161 else {
162 if ($this->rule_length) {
163 $from = "{$this->rule_table} t1 JOIN {$this->rule_table} t2 ON (" . implode(' AND ', $on) . ")";
164 }
165 else {
166 $from = "{$this->rule_table} t1 INNER JOIN {$this->rule_table} t2 ON (" . implode(' AND ', $innerJoinClauses) . ")";
167 }
168 }
169
170 // finish building WHERE, also limit the results if requested
171 if (!$this->params) {
172 $where[] = "t1.$id < t2.$id";
173 }
174 $query = "SELECT $select FROM $from WHERE " . implode(' AND ', $where);
175 if ($this->contactIds) {
176 $cids = [];
177 foreach ($this->contactIds as $cid) {
178 $cids[] = CRM_Utils_Type::escape($cid, 'Integer');
179 }
180 if (count($cids) == 1) {
181 $query .= " AND (t1.$id = {$cids[0]}) UNION $query AND t2.$id = {$cids[0]}";
182 }
183 else {
184 $query .= " AND t1.$id IN (" . implode(',', $cids) . ")
185 UNION $query AND t2.$id IN (" . implode(',', $cids) . ")";
186 }
187 // The `weight` is ambiguous in the context of the union; put the whole
188 // thing in a subquery.
189 $query = "SELECT $subSelect FROM ($query) subunion";
190 }
191
192 return $query;
193 }
194
195 /**
196 * find fields related to a rule group.
197 *
198 * @param array $params contains the rule group property to identify rule group
199 *
200 * @return array
201 * rule fields array associated to rule group
202 */
203 public static function dedupeRuleFields($params) {
204 $rgBao = new CRM_Dedupe_BAO_RuleGroup();
205 $rgBao->used = $params['used'];
206 $rgBao->contact_type = $params['contact_type'];
207 $rgBao->find(TRUE);
208
209 $ruleBao = new CRM_Dedupe_BAO_Rule();
210 $ruleBao->dedupe_rule_group_id = $rgBao->id;
211 $ruleBao->find();
212 $ruleFields = [];
213 while ($ruleBao->fetch()) {
214 $ruleFields[] = $ruleBao->rule_field;
215 }
216 return $ruleFields;
217 }
218
219 /**
220 * @param int $cid
221 * @param int $oid
222 *
223 * @return bool
224 */
225 public static function validateContacts($cid, $oid) {
226 if (!$cid || !$oid) {
227 return NULL;
228 }
229 $exception = new CRM_Dedupe_DAO_Exception();
230 $exception->contact_id1 = $cid;
231 $exception->contact_id2 = $oid;
232 //make sure contact2 > contact1.
233 if ($cid > $oid) {
234 $exception->contact_id1 = $oid;
235 $exception->contact_id2 = $cid;
236 }
237
238 return $exception->find(TRUE) ? FALSE : TRUE;
239 }
240
241 }