| 1 | <?php |
| 2 | /* |
| 3 | +--------------------------------------------------------------------+ |
| 4 | | Copyright CiviCRM LLC. All rights reserved. | |
| 5 | | | |
| 6 | | This work is published under the GNU AGPLv3 license with some | |
| 7 | | permitted exceptions and without any warranty. For full license | |
| 8 | | and copyright information, see https://civicrm.org/licensing | |
| 9 | +--------------------------------------------------------------------+ |
| 10 | */ |
| 11 | |
| 12 | /** |
| 13 | * |
| 14 | * @package CRM |
| 15 | * @copyright CiviCRM LLC https://civicrm.org/licensing |
| 16 | */ |
| 17 | |
| 18 | /** |
| 19 | * The CiviCRM duplicate discovery engine is based on an |
| 20 | * algorithm designed by David Strauss <david@fourkitchens.com>. |
| 21 | */ |
| 22 | class CRM_Dedupe_BAO_DedupeRule extends CRM_Dedupe_DAO_DedupeRule { |
| 23 | |
| 24 | /** |
| 25 | * Ids of the contacts to limit the SQL queries (whole-database queries otherwise) |
| 26 | * @var array |
| 27 | */ |
| 28 | public $contactIds = []; |
| 29 | |
| 30 | /** |
| 31 | * Params to dedupe against (queries against the whole contact set otherwise) |
| 32 | * @var array |
| 33 | */ |
| 34 | public $params = []; |
| 35 | |
| 36 | /** |
| 37 | * Return the SQL query for the given rule - either for finding matching |
| 38 | * pairs of contacts, or for matching against the $params variable (if set). |
| 39 | * |
| 40 | * @return string |
| 41 | * SQL query performing the search |
| 42 | * |
| 43 | * @throws \CRM_Core_Exception |
| 44 | * @throws \CiviCRM_API3_Exception |
| 45 | */ |
| 46 | public function sql() { |
| 47 | if ($this->params && |
| 48 | (!array_key_exists($this->rule_table, $this->params) || |
| 49 | !array_key_exists($this->rule_field, $this->params[$this->rule_table]) |
| 50 | ) |
| 51 | ) { |
| 52 | // if params is present and doesn't have an entry for a field, don't construct the clause. |
| 53 | return NULL; |
| 54 | } |
| 55 | |
| 56 | // we need to initialise WHERE, ON and USING here, as some table types |
| 57 | // extend them; $where is an array of required conditions, $on and |
| 58 | // $using are arrays of required field matchings (for substring and |
| 59 | // full matches, respectively) |
| 60 | $where = []; |
| 61 | $on = ["SUBSTR(t1.{$this->rule_field}, 1, {$this->rule_length}) = SUBSTR(t2.{$this->rule_field}, 1, {$this->rule_length})"]; |
| 62 | |
| 63 | $innerJoinClauses = [ |
| 64 | "t1.{$this->rule_field} IS NOT NULL", |
| 65 | "t2.{$this->rule_field} IS NOT NULL", |
| 66 | "t1.{$this->rule_field} = t2.{$this->rule_field}", |
| 67 | ]; |
| 68 | |
| 69 | if (in_array($this->getFieldType($this->rule_field), CRM_Utils_Type::getTextTypes(), TRUE)) { |
| 70 | $innerJoinClauses[] = "t1.{$this->rule_field} <> ''"; |
| 71 | $innerJoinClauses[] = "t2.{$this->rule_field} <> ''"; |
| 72 | } |
| 73 | |
| 74 | switch ($this->rule_table) { |
| 75 | case 'civicrm_contact': |
| 76 | $id = 'id'; |
| 77 | //we should restrict by contact type in the first step |
| 78 | $sql = "SELECT contact_type FROM civicrm_dedupe_rule_group WHERE id = {$this->dedupe_rule_group_id};"; |
| 79 | $ct = CRM_Core_DAO::singleValueQuery($sql); |
| 80 | if ($this->params) { |
| 81 | $where[] = "t1.contact_type = '{$ct}'"; |
| 82 | } |
| 83 | else { |
| 84 | $where[] = "t1.contact_type = '{$ct}'"; |
| 85 | $where[] = "t2.contact_type = '{$ct}'"; |
| 86 | } |
| 87 | break; |
| 88 | |
| 89 | case 'civicrm_address': |
| 90 | case 'civicrm_email': |
| 91 | case 'civicrm_im': |
| 92 | case 'civicrm_openid': |
| 93 | case 'civicrm_phone': |
| 94 | case 'civicrm_website': |
| 95 | $id = 'contact_id'; |
| 96 | break; |
| 97 | |
| 98 | case 'civicrm_note': |
| 99 | $id = 'entity_id'; |
| 100 | if ($this->params) { |
| 101 | $where[] = "t1.entity_table = 'civicrm_contact'"; |
| 102 | } |
| 103 | else { |
| 104 | $where[] = "t1.entity_table = 'civicrm_contact'"; |
| 105 | $where[] = "t2.entity_table = 'civicrm_contact'"; |
| 106 | } |
| 107 | break; |
| 108 | |
| 109 | default: |
| 110 | // custom data tables |
| 111 | if (preg_match('/^civicrm_value_/', $this->rule_table) || preg_match('/^custom_value_/', $this->rule_table)) { |
| 112 | $id = 'entity_id'; |
| 113 | } |
| 114 | else { |
| 115 | throw new CRM_Core_Exception("Unsupported rule_table for civicrm_dedupe_rule.id of {$this->id}"); |
| 116 | } |
| 117 | break; |
| 118 | } |
| 119 | |
| 120 | // build SELECT based on the field names containing contact ids |
| 121 | // if there are params provided, id1 should be 0 |
| 122 | if ($this->params) { |
| 123 | $select = "t1.$id id1, {$this->rule_weight} weight"; |
| 124 | $subSelect = 'id1, weight'; |
| 125 | } |
| 126 | else { |
| 127 | $select = "t1.$id id1, t2.$id id2, {$this->rule_weight} weight"; |
| 128 | $subSelect = 'id1, id2, weight'; |
| 129 | } |
| 130 | |
| 131 | // build FROM (and WHERE, if it's a parametrised search) |
| 132 | // based on whether the rule is about substrings or not |
| 133 | if ($this->params) { |
| 134 | $from = "{$this->rule_table} t1"; |
| 135 | $str = 'NULL'; |
| 136 | if (isset($this->params[$this->rule_table][$this->rule_field])) { |
| 137 | $str = trim(CRM_Utils_Type::escape($this->params[$this->rule_table][$this->rule_field], 'String')); |
| 138 | } |
| 139 | if ($this->rule_length) { |
| 140 | $where[] = "SUBSTR(t1.{$this->rule_field}, 1, {$this->rule_length}) = SUBSTR('$str', 1, {$this->rule_length})"; |
| 141 | $where[] = "t1.{$this->rule_field} IS NOT NULL"; |
| 142 | } |
| 143 | else { |
| 144 | $where[] = "t1.{$this->rule_field} = '$str'"; |
| 145 | } |
| 146 | } |
| 147 | else { |
| 148 | if ($this->rule_length) { |
| 149 | $from = "{$this->rule_table} t1 JOIN {$this->rule_table} t2 ON (" . implode(' AND ', $on) . ")"; |
| 150 | } |
| 151 | else { |
| 152 | $from = "{$this->rule_table} t1 INNER JOIN {$this->rule_table} t2 ON (" . implode(' AND ', $innerJoinClauses) . ")"; |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | // finish building WHERE, also limit the results if requested |
| 157 | if (!$this->params) { |
| 158 | $where[] = "t1.$id < t2.$id"; |
| 159 | } |
| 160 | $query = "SELECT $select FROM $from WHERE " . implode(' AND ', $where); |
| 161 | if ($this->contactIds) { |
| 162 | $cids = []; |
| 163 | foreach ($this->contactIds as $cid) { |
| 164 | $cids[] = CRM_Utils_Type::escape($cid, 'Integer'); |
| 165 | } |
| 166 | if (count($cids) == 1) { |
| 167 | $query .= " AND (t1.$id = {$cids[0]}) UNION $query AND t2.$id = {$cids[0]}"; |
| 168 | } |
| 169 | else { |
| 170 | $query .= " AND t1.$id IN (" . implode(',', $cids) . ") |
| 171 | UNION $query AND t2.$id IN (" . implode(',', $cids) . ")"; |
| 172 | } |
| 173 | // The `weight` is ambiguous in the context of the union; put the whole |
| 174 | // thing in a subquery. |
| 175 | $query = "SELECT $subSelect FROM ($query) subunion"; |
| 176 | } |
| 177 | |
| 178 | return $query; |
| 179 | } |
| 180 | |
| 181 | /** |
| 182 | * find fields related to a rule group. |
| 183 | * |
| 184 | * @param array $params contains the rule group property to identify rule group |
| 185 | * |
| 186 | * @return array |
| 187 | * rule fields array associated to rule group |
| 188 | */ |
| 189 | public static function dedupeRuleFields($params) { |
| 190 | $rgBao = new CRM_Dedupe_BAO_DedupeRuleGroup(); |
| 191 | $rgBao->used = $params['used']; |
| 192 | $rgBao->contact_type = $params['contact_type']; |
| 193 | $rgBao->find(TRUE); |
| 194 | |
| 195 | $ruleBao = new CRM_Dedupe_BAO_DedupeRule(); |
| 196 | $ruleBao->dedupe_rule_group_id = $rgBao->id; |
| 197 | $ruleBao->find(); |
| 198 | $ruleFields = []; |
| 199 | while ($ruleBao->fetch()) { |
| 200 | $field_name = $ruleBao->rule_field; |
| 201 | if ($field_name == 'phone_numeric') { |
| 202 | $field_name = 'phone'; |
| 203 | } |
| 204 | $ruleFields[] = $field_name; |
| 205 | } |
| 206 | return $ruleFields; |
| 207 | } |
| 208 | |
| 209 | /** |
| 210 | * @param int $cid |
| 211 | * @param int $oid |
| 212 | * |
| 213 | * @return bool |
| 214 | */ |
| 215 | public static function validateContacts($cid, $oid) { |
| 216 | if (!$cid || !$oid) { |
| 217 | return NULL; |
| 218 | } |
| 219 | $exception = new CRM_Dedupe_DAO_DedupeException(); |
| 220 | $exception->contact_id1 = $cid; |
| 221 | $exception->contact_id2 = $oid; |
| 222 | //make sure contact2 > contact1. |
| 223 | if ($cid > $oid) { |
| 224 | $exception->contact_id1 = $oid; |
| 225 | $exception->contact_id2 = $cid; |
| 226 | } |
| 227 | |
| 228 | return !$exception->find(TRUE); |
| 229 | } |
| 230 | |
| 231 | /** |
| 232 | * Get the specification for the given field. |
| 233 | * |
| 234 | * @param string $fieldName |
| 235 | * |
| 236 | * @return array |
| 237 | * @throws \CiviCRM_API3_Exception |
| 238 | */ |
| 239 | public function getFieldType($fieldName) { |
| 240 | $entity = CRM_Core_DAO_AllCoreTables::getEntityNameForTable($this->rule_table); |
| 241 | if (!$entity) { |
| 242 | // This means we have stored a custom field rather than an entity name in rule_table, figure out the entity. |
| 243 | $entity = civicrm_api3('CustomGroup', 'getvalue', ['table_name' => $this->rule_table, 'return' => 'extends']); |
| 244 | if (in_array($entity, ['Individual', 'Household', 'Organization'])) { |
| 245 | $entity = 'Contact'; |
| 246 | } |
| 247 | $fieldName = 'custom_' . civicrm_api3('CustomField', 'getvalue', ['column_name' => $fieldName, 'return' => 'id']); |
| 248 | } |
| 249 | $fields = civicrm_api3($entity, 'getfields', ['action' => 'create'])['values']; |
| 250 | return $fields[$fieldName]['type']; |
| 251 | } |
| 252 | |
| 253 | } |