CRM-18443 add unit test & refactor code into smaller function
[civicrm-core.git] / tests / phpunit / CRM / Dedupe / MergerTest.php
1 <?php
2
3 /**
4 * Class CRM_Dedupe_DedupeMergerTest
5 * @group headless
6 */
7 class CRM_Dedupe_MergerTest extends CiviUnitTestCase {
8
9 protected $_groupId;
10 protected $_contactIds = array();
11
12 public function createDupeContacts() {
13 // create a group to hold contacts, so that dupe checks don't consider any other contacts in the DB
14 $params = array(
15 'name' => 'Test Dupe Merger Group',
16 'title' => 'Test Dupe Merger Group',
17 'domain_id' => 1,
18 'is_active' => 1,
19 'visibility' => 'Public Pages',
20 'version' => 3,
21 );
22 // TODO: This is not an API test!!
23 $result = civicrm_api('group', 'create', $params);
24 $this->_groupId = $result['id'];
25
26 // contact data set
27
28 // make dupe checks based on based on following contact sets:
29 // FIRST - LAST - EMAIL
30 // ---------------------------------
31 // robin - hood - robin@example.com
32 // robin - hood - robin@example.com
33 // robin - hood - hood@example.com
34 // robin - dale - robin@example.com
35 // little - dale - dale@example.com
36 // little - dale - dale@example.com
37 // will - dale - dale@example.com
38 // will - dale - will@example.com
39 // will - dale - will@example.com
40 $params = array(
41 array(
42 'first_name' => 'robin',
43 'last_name' => 'hood',
44 'email' => 'robin@example.com',
45 'contact_type' => 'Individual',
46 ),
47 array(
48 'first_name' => 'robin',
49 'last_name' => 'hood',
50 'email' => 'robin@example.com',
51 'contact_type' => 'Individual',
52 ),
53 array(
54 'first_name' => 'robin',
55 'last_name' => 'hood',
56 'email' => 'hood@example.com',
57 'contact_type' => 'Individual',
58 ),
59 array(
60 'first_name' => 'robin',
61 'last_name' => 'dale',
62 'email' => 'robin@example.com',
63 'contact_type' => 'Individual',
64 ),
65 array(
66 'first_name' => 'little',
67 'last_name' => 'dale',
68 'email' => 'dale@example.com',
69 'contact_type' => 'Individual',
70 ),
71 array(
72 'first_name' => 'little',
73 'last_name' => 'dale',
74 'email' => 'dale@example.com',
75 'contact_type' => 'Individual',
76 ),
77 array(
78 'first_name' => 'will',
79 'last_name' => 'dale',
80 'email' => 'dale@example.com',
81 'contact_type' => 'Individual',
82 ),
83 array(
84 'first_name' => 'will',
85 'last_name' => 'dale',
86 'email' => 'will@example.com',
87 'contact_type' => 'Individual',
88 ),
89 array(
90 'first_name' => 'will',
91 'last_name' => 'dale',
92 'email' => 'will@example.com',
93 'contact_type' => 'Individual',
94 ),
95 );
96
97 $count = 1;
98 foreach ($params as $param) {
99 $param['version'] = 3;
100 $contact = civicrm_api('contact', 'create', $param);
101 $this->_contactIds[$count++] = $contact['id'];
102
103 $grpParams = array(
104 'contact_id' => $contact['id'],
105 'group_id' => $this->_groupId,
106 'version' => 3,
107 );
108 $res = civicrm_api('group_contact', 'create', $grpParams);
109 }
110 }
111
112 /**
113 * Delete all created contacts.
114 */
115 public function deleteDupeContacts() {
116 foreach ($this->_contactIds as $contactId) {
117 $this->contactDelete($contactId);
118 }
119
120 // delete dupe group
121 $params = array('id' => $this->_groupId, 'version' => 3);
122 civicrm_api('group', 'delete', $params);
123 }
124
125 /**
126 * Test the batch merge.
127 */
128 public function testBatchMergeSelectedDuplicates() {
129 $this->createDupeContacts();
130
131 // verify that all contacts have been created separately
132 $this->assertEquals(count($this->_contactIds), 9, 'Check for number of contacts.');
133
134 $dao = new CRM_Dedupe_DAO_RuleGroup();
135 $dao->contact_type = 'Individual';
136 $dao->name = 'IndividualSupervised';
137 $dao->is_default = 1;
138 $dao->find(TRUE);
139
140 $foundDupes = CRM_Dedupe_Finder::dupesInGroup($dao->id, $this->_groupId);
141
142 // -------------------------------------------------------------------------
143 // Name and Email (reserved) Matches ( 3 pairs )
144 // --------------------------------------------------------------------------
145 // robin - hood - robin@example.com
146 // robin - hood - robin@example.com
147 // little - dale - dale@example.com
148 // little - dale - dale@example.com
149 // will - dale - will@example.com
150 // will - dale - will@example.com
151 // so 3 pairs for - first + last + mail
152 $this->assertEquals(count($foundDupes), 3, 'Check Individual-Supervised dupe rule for dupesInGroup().');
153
154 // Run dedupe finder as the browser would
155 $_SERVER['REQUEST_METHOD'] = 'GET'; //avoid invalid key error
156 $object = new CRM_Contact_Page_DedupeFind();
157 $object->set('gid', $this->_groupId);
158 $object->set('rgid', $dao->id);
159 $object->set('action', CRM_Core_Action::UPDATE);
160 @$object->run();
161
162 // Retrieve pairs from prev next cache table
163 $select = array('pn.is_selected' => 'is_selected');
164 $cacheKeyString = "merge Individual_{$dao->id}_{$this->_groupId}";
165 $pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
166
167 $this->assertEquals(count($foundDupes), count($pnDupePairs), 'Check number of dupe pairs in prev next cache.');
168
169 // mark first two pairs as selected
170 CRM_Core_DAO::singleValueQuery("UPDATE civicrm_prevnext_cache SET is_selected = 1 WHERE id IN ({$pnDupePairs[0]['prevnext_id']}, {$pnDupePairs[1]['prevnext_id']})");
171
172 $pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
173 $this->assertEquals($pnDupePairs[0]['is_selected'], 1, 'Check if first record in dupe pairs is marked as selected.');
174 $this->assertEquals($pnDupePairs[0]['is_selected'], 1, 'Check if second record in dupe pairs is marked as selected.');
175
176 // batch merge selected dupes
177 $result = CRM_Dedupe_Merger::batchMerge($dao->id, $this->_groupId, 'safe', TRUE, 5, 1);
178 $this->assertEquals(count($result['merged']), 2, 'Check number of merged pairs.');
179
180 // retrieve pairs from prev next cache table
181 $pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
182 $this->assertEquals(count($pnDupePairs), 1, 'Check number of remaining dupe pairs in prev next cache.');
183
184 $this->deleteDupeContacts();
185 }
186
187 /**
188 * Test the batch merge.
189 */
190 public function testBatchMergeAllDuplicates() {
191 $this->createDupeContacts();
192
193 // verify that all contacts have been created separately
194 $this->assertEquals(count($this->_contactIds), 9, 'Check for number of contacts.');
195
196 $dao = new CRM_Dedupe_DAO_RuleGroup();
197 $dao->contact_type = 'Individual';
198 $dao->name = 'IndividualSupervised';
199 $dao->is_default = 1;
200 $dao->find(TRUE);
201
202 $foundDupes = CRM_Dedupe_Finder::dupesInGroup($dao->id, $this->_groupId);
203
204 // -------------------------------------------------------------------------
205 // Name and Email (reserved) Matches ( 3 pairs )
206 // --------------------------------------------------------------------------
207 // robin - hood - robin@example.com
208 // robin - hood - robin@example.com
209 // little - dale - dale@example.com
210 // little - dale - dale@example.com
211 // will - dale - will@example.com
212 // will - dale - will@example.com
213 // so 3 pairs for - first + last + mail
214 $this->assertEquals(count($foundDupes), 3, 'Check Individual-Supervised dupe rule for dupesInGroup().');
215
216 // Run dedupe finder as the browser would
217 $_SERVER['REQUEST_METHOD'] = 'GET'; //avoid invalid key error
218 $object = new CRM_Contact_Page_DedupeFind();
219 $object->set('gid', $this->_groupId);
220 $object->set('rgid', $dao->id);
221 $object->set('action', CRM_Core_Action::UPDATE);
222 @$object->run();
223
224 // Retrieve pairs from prev next cache table
225 $select = array('pn.is_selected' => 'is_selected');
226 $cacheKeyString = "merge Individual_{$dao->id}_{$this->_groupId}";
227 $pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
228
229 $this->assertEquals(count($foundDupes), count($pnDupePairs), 'Check number of dupe pairs in prev next cache.');
230
231 // batch merge all dupes
232 $result = CRM_Dedupe_Merger::batchMerge($dao->id, $this->_groupId, 'safe', TRUE, 5, 2);
233 $this->assertEquals(count($result['merged']), 3, 'Check number of merged pairs.');
234
235 // retrieve pairs from prev next cache table
236 $pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
237 $this->assertEquals(count($pnDupePairs), 0, 'Check number of remaining dupe pairs in prev next cache.');
238
239 $this->deleteDupeContacts();
240 }
241
242 /**
243 * The goal of this function is to test that all required tables are returned.
244 */
245 public function testGetCidRefs() {
246 $this->entityCustomGroupWithSingleFieldCreate(__FUNCTION__, 'Contacts');
247 $this->assertEquals(array_merge($this->getStaticCIDRefs(), $this->getHackedInCIDRef()), CRM_Dedupe_Merger::cidRefs());
248 $this->assertEquals(array_merge($this->getCalculatedCIDRefs(), $this->getHackedInCIDRef()), CRM_Dedupe_Merger::cidRefs());
249 }
250
251 /**
252 * Get the list of not-really-cid-refs that are currently hacked in.
253 *
254 * This is hacked into getCIDs function.
255 *
256 * @return array
257 */
258 public function getHackedInCIDRef() {
259 return array(
260 'civicrm_entity_tag' => array(
261 0 => 'entity_id',
262 ),
263 );
264 }
265
266 /**
267 * Test function that gets duplicate pairs.
268 *
269 * It turns out there are 2 code paths retrieving this data so my initial focus is on ensuring
270 * they match.
271 */
272 public function testGetMatches() {
273 $this->setupMatchData();
274 $pairs = CRM_Dedupe_Merger::getDuplicatePairs(
275 1,
276 NULL,
277 TRUE,
278 25,
279 FALSE
280 );
281
282 $this->assertEquals(array(
283 0 => array(
284 'srcID' => $this->contacts[0]['id'],
285 'srcName' => 'Mr. Mickey Mouse II',
286 'dstID' => $this->contacts[1]['id'],
287 'dstName' => 'Mr. Mickey Mouse II',
288 'weight' => 20,
289 'canMerge' => TRUE,
290 ),
291 ), $pairs);
292 }
293
294 public function setupMatchData() {
295 $fixtures = array(
296 array(
297 'first_name' => 'Mickey',
298 'last_name' => 'Mouse',
299 'email' => 'mickey@mouse.com',
300 ),
301 array(
302 'first_name' => 'Mickey',
303 'last_name' => 'Mouse',
304 'email' => 'mickey@mouse.com',
305 ),
306 array(
307 'first_name' => 'Minnie',
308 'last_name' => 'Mouse',
309 'email' => 'mickey@mouse.com',
310 ),
311 );
312 foreach ($fixtures as $fixture) {
313 $contactID = $this->individualCreate($fixture);
314 $this->contacts[] = array_merge($fixture, array('id' => $contactID));
315 }
316 }
317
318
319 /**
320 * Get the list of tables that refer to the CID.
321 *
322 * This is a statically maintained (in this test list).
323 *
324 * There is also a check against an automated list but having both seems to add extra stability to me. They do
325 * not change often.
326 */
327 public function getStaticCIDRefs() {
328 return array(
329 'civicrm_acl_cache' => array(
330 0 => 'contact_id',
331 ),
332 'civicrm_acl_contact_cache' => array(
333 0 => 'user_id',
334 1 => 'contact_id',
335 ),
336 'civicrm_action_log' => array(
337 0 => 'contact_id',
338 ),
339 'civicrm_activity_contact' => array(
340 0 => 'contact_id',
341 ),
342 'civicrm_address' => array(
343 0 => 'contact_id',
344 ),
345 'civicrm_batch' => array(
346 0 => 'created_id',
347 1 => 'modified_id',
348 ),
349 'civicrm_campaign' => array(
350 0 => 'created_id',
351 1 => 'last_modified_id',
352 ),
353 'civicrm_case_contact' => array(
354 0 => 'contact_id',
355 ),
356 'civicrm_contact' => array(
357 0 => 'primary_contact_id',
358 1 => 'employer_id',
359 ),
360 'civicrm_contribution' => array(
361 0 => 'contact_id',
362 ),
363 'civicrm_contribution_page' => array(
364 0 => 'created_id',
365 ),
366 'civicrm_contribution_recur' => array(
367 0 => 'contact_id',
368 ),
369 'civicrm_contribution_soft' => array(
370 0 => 'contact_id',
371 ),
372 'civicrm_custom_group' => array(
373 0 => 'created_id',
374 ),
375 'civicrm_dashboard_contact' => array(
376 0 => 'contact_id',
377 ),
378 'civicrm_dedupe_exception' => array(
379 0 => 'contact_id1',
380 1 => 'contact_id2',
381 ),
382 'civicrm_domain' => array(
383 0 => 'contact_id',
384 ),
385 'civicrm_email' => array(
386 0 => 'contact_id',
387 ),
388 'civicrm_event' => array(
389 0 => 'created_id',
390 ),
391 'civicrm_event_carts' => array(
392 0 => 'user_id',
393 ),
394 'civicrm_financial_account' => array(
395 0 => 'contact_id',
396 ),
397 'civicrm_financial_item' => array(
398 0 => 'contact_id',
399 ),
400 'civicrm_grant' => array(
401 0 => 'contact_id',
402 ),
403 'civicrm_group' => array(
404 0 => 'created_id',
405 1 => 'modified_id',
406 ),
407 'civicrm_group_contact' => array(
408 0 => 'contact_id',
409 ),
410 'civicrm_group_contact_cache' => array(
411 0 => 'contact_id',
412 ),
413 'civicrm_group_organization' => array(
414 0 => 'organization_id',
415 ),
416 'civicrm_im' => array(
417 0 => 'contact_id',
418 ),
419 'civicrm_log' => array(
420 0 => 'modified_id',
421 ),
422 'civicrm_mailing' => array(
423 0 => 'created_id',
424 1 => 'scheduled_id',
425 2 => 'approver_id',
426 ),
427 'civicrm_mailing_abtest' => array(
428 0 => 'created_id',
429 ),
430 'civicrm_mailing_event_queue' => array(
431 0 => 'contact_id',
432 ),
433 'civicrm_mailing_event_subscribe' => array(
434 0 => 'contact_id',
435 ),
436 'civicrm_mailing_recipients' => array(
437 0 => 'contact_id',
438 ),
439 'civicrm_membership' => array(
440 0 => 'contact_id',
441 ),
442 'civicrm_membership_log' => array(
443 0 => 'modified_id',
444 ),
445 'civicrm_membership_type' => array(
446 0 => 'member_of_contact_id',
447 ),
448 'civicrm_note' => array(
449 0 => 'contact_id',
450 ),
451 'civicrm_openid' => array(
452 0 => 'contact_id',
453 ),
454 'civicrm_participant' => array(
455 0 => 'contact_id',
456 1 => 'transferred_to_contact_id', //CRM-16761
457 ),
458 'civicrm_payment_token' => array(
459 0 => 'contact_id',
460 1 => 'created_id',
461 ),
462 'civicrm_pcp' => array(
463 0 => 'contact_id',
464 ),
465 'civicrm_phone' => array(
466 0 => 'contact_id',
467 ),
468 'civicrm_pledge' => array(
469 0 => 'contact_id',
470 ),
471 'civicrm_print_label' => array(
472 0 => 'created_id',
473 ),
474 'civicrm_relationship' => array(
475 0 => 'contact_id_a',
476 1 => 'contact_id_b',
477 ),
478 'civicrm_report_instance' => array(
479 0 => 'created_id',
480 1 => 'owner_id',
481 ),
482 'civicrm_setting' => array(
483 0 => 'contact_id',
484 1 => 'created_id',
485 ),
486 'civicrm_subscription_history' => array(
487 0 => 'contact_id',
488 ),
489 'civicrm_survey' => array(
490 0 => 'created_id',
491 1 => 'last_modified_id',
492 ),
493 'civicrm_tag' => array(
494 0 => 'created_id',
495 ),
496 'civicrm_uf_group' => array(
497 0 => 'created_id',
498 ),
499 'civicrm_uf_match' => array(
500 0 => 'contact_id',
501 ),
502 'civicrm_value_testgetcidref_1' => array(
503 0 => 'entity_id',
504 ),
505 'civicrm_website' => array(
506 0 => 'contact_id',
507 ),
508 );
509 }
510
511 /**
512 * Get a list of CIDs that is calculated off the schema.
513 *
514 * Note this is an expensive and table locking query. Should be safe in tests though.
515 */
516 public function getCalculatedCIDRefs() {
517 $cidRefs = array();
518 $sql = "
519 SELECT
520 table_name,
521 column_name
522 FROM information_schema.key_column_usage
523 WHERE
524 referenced_table_schema = database() AND
525 referenced_table_name = 'civicrm_contact' AND
526 referenced_column_name = 'id';
527 ";
528 $dao = CRM_Core_DAO::executeQuery($sql);
529 while ($dao->fetch()) {
530 $cidRefs[$dao->table_name][] = $dao->column_name;
531 }
532 // Do specific re-ordering changes to make this the same as the ref validated one.
533 // The above query orders by FK alphabetically.
534 // There might be cleverer ways to do this but it shouldn't change much.
535 $cidRefs['civicrm_contact'][0] = 'primary_contact_id';
536 $cidRefs['civicrm_contact'][1] = 'employer_id';
537 $cidRefs['civicrm_acl_contact_cache'][0] = 'user_id';
538 $cidRefs['civicrm_acl_contact_cache'][1] = 'contact_id';
539 $cidRefs['civicrm_mailing'][0] = 'created_id';
540 $cidRefs['civicrm_mailing'][1] = 'scheduled_id';
541 $cidRefs['civicrm_mailing'][2] = 'approver_id';
542 return $cidRefs;
543 }
544
545 }