Merge pull request #8645 from eileenmcnaughton/group_query
[civicrm-core.git] / tests / phpunit / CRM / Dedupe / MergerTest.php
1 <?php
2
3 /**
4 * Class CRM_Dedupe_DedupeMergerTest
5 * @group headless
6 */
7 class CRM_Dedupe_MergerTest extends CiviUnitTestCase {
8
9 protected $_groupId;
10 protected $_contactIds = array();
11
12 public function tearDown() {
13 $this->quickCleanup(array('civicrm_contact', 'civicrm_group_contact', 'civicrm_group'));
14 parent::tearDown();
15 }
16
17 public function createDupeContacts() {
18 // create a group to hold contacts, so that dupe checks don't consider any other contacts in the DB
19 $params = array(
20 'name' => 'Test Dupe Merger Group',
21 'title' => 'Test Dupe Merger Group',
22 'domain_id' => 1,
23 'is_active' => 1,
24 'visibility' => 'Public Pages',
25 );
26
27 $result = $this->callAPISuccess('group', 'create', $params);
28 $this->_groupId = $result['id'];
29
30 // contact data set
31
32 // make dupe checks based on based on following contact sets:
33 // FIRST - LAST - EMAIL
34 // ---------------------------------
35 // robin - hood - robin@example.com
36 // robin - hood - robin@example.com
37 // robin - hood - hood@example.com
38 // robin - dale - robin@example.com
39 // little - dale - dale@example.com
40 // little - dale - dale@example.com
41 // will - dale - dale@example.com
42 // will - dale - will@example.com
43 // will - dale - will@example.com
44 $params = array(
45 array(
46 'first_name' => 'robin',
47 'last_name' => 'hood',
48 'email' => 'robin@example.com',
49 'contact_type' => 'Individual',
50 ),
51 array(
52 'first_name' => 'robin',
53 'last_name' => 'hood',
54 'email' => 'robin@example.com',
55 'contact_type' => 'Individual',
56 ),
57 array(
58 'first_name' => 'robin',
59 'last_name' => 'hood',
60 'email' => 'hood@example.com',
61 'contact_type' => 'Individual',
62 ),
63 array(
64 'first_name' => 'robin',
65 'last_name' => 'dale',
66 'email' => 'robin@example.com',
67 'contact_type' => 'Individual',
68 ),
69 array(
70 'first_name' => 'little',
71 'last_name' => 'dale',
72 'email' => 'dale@example.com',
73 'contact_type' => 'Individual',
74 ),
75 array(
76 'first_name' => 'little',
77 'last_name' => 'dale',
78 'email' => 'dale@example.com',
79 'contact_type' => 'Individual',
80 ),
81 array(
82 'first_name' => 'will',
83 'last_name' => 'dale',
84 'email' => 'dale@example.com',
85 'contact_type' => 'Individual',
86 ),
87 array(
88 'first_name' => 'will',
89 'last_name' => 'dale',
90 'email' => 'will@example.com',
91 'contact_type' => 'Individual',
92 ),
93 array(
94 'first_name' => 'will',
95 'last_name' => 'dale',
96 'email' => 'will@example.com',
97 'contact_type' => 'Individual',
98 ),
99 );
100
101 $count = 1;
102 foreach ($params as $param) {
103 $param['version'] = 3;
104 $contact = civicrm_api('contact', 'create', $param);
105 $this->_contactIds[$count++] = $contact['id'];
106
107 $grpParams = array(
108 'contact_id' => $contact['id'],
109 'group_id' => $this->_groupId,
110 'version' => 3,
111 );
112 $this->callAPISuccess('group_contact', 'create', $grpParams);
113 }
114 }
115
116 /**
117 * Delete all created contacts.
118 */
119 public function deleteDupeContacts() {
120 foreach ($this->_contactIds as $contactId) {
121 $this->contactDelete($contactId);
122 }
123 $this->groupDelete($this->_groupId);
124 }
125
126 /**
127 * Test the batch merge.
128 */
129 public function testBatchMergeSelectedDuplicates() {
130 $this->createDupeContacts();
131
132 // verify that all contacts have been created separately
133 $this->assertEquals(count($this->_contactIds), 9, 'Check for number of contacts.');
134
135 $dao = new CRM_Dedupe_DAO_RuleGroup();
136 $dao->contact_type = 'Individual';
137 $dao->name = 'IndividualSupervised';
138 $dao->is_default = 1;
139 $dao->find(TRUE);
140
141 $foundDupes = CRM_Dedupe_Finder::dupesInGroup($dao->id, $this->_groupId);
142
143 // -------------------------------------------------------------------------
144 // Name and Email (reserved) Matches ( 3 pairs )
145 // --------------------------------------------------------------------------
146 // robin - hood - robin@example.com
147 // robin - hood - robin@example.com
148 // little - dale - dale@example.com
149 // little - dale - dale@example.com
150 // will - dale - will@example.com
151 // will - dale - will@example.com
152 // so 3 pairs for - first + last + mail
153 $this->assertEquals(count($foundDupes), 3, 'Check Individual-Supervised dupe rule for dupesInGroup().');
154
155 // Run dedupe finder as the browser would
156 $_SERVER['REQUEST_METHOD'] = 'GET'; //avoid invalid key error
157 $object = new CRM_Contact_Page_DedupeFind();
158 $object->set('gid', $this->_groupId);
159 $object->set('rgid', $dao->id);
160 $object->set('action', CRM_Core_Action::UPDATE);
161 $object->setEmbedded(TRUE);
162 @$object->run();
163
164 // Retrieve pairs from prev next cache table
165 $select = array('pn.is_selected' => 'is_selected');
166 $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($dao->id, $this->_groupId);
167 $pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
168
169 $this->assertEquals(count($foundDupes), count($pnDupePairs), 'Check number of dupe pairs in prev next cache.');
170
171 // mark first two pairs as selected
172 CRM_Core_DAO::singleValueQuery("UPDATE civicrm_prevnext_cache SET is_selected = 1 WHERE id IN ({$pnDupePairs[0]['prevnext_id']}, {$pnDupePairs[1]['prevnext_id']})");
173
174 $pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
175 $this->assertEquals($pnDupePairs[0]['is_selected'], 1, 'Check if first record in dupe pairs is marked as selected.');
176 $this->assertEquals($pnDupePairs[0]['is_selected'], 1, 'Check if second record in dupe pairs is marked as selected.');
177
178 // batch merge selected dupes
179 $result = CRM_Dedupe_Merger::batchMerge($dao->id, $this->_groupId, 'safe', TRUE, 5, 1);
180 $this->assertEquals(count($result['merged']), 2, 'Check number of merged pairs.');
181
182 // retrieve pairs from prev next cache table
183 $pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
184 $this->assertEquals(count($pnDupePairs), 1, 'Check number of remaining dupe pairs in prev next cache.');
185
186 $this->deleteDupeContacts();
187 }
188
189 /**
190 * Test the batch merge.
191 */
192 public function testBatchMergeAllDuplicates() {
193 $this->createDupeContacts();
194
195 // verify that all contacts have been created separately
196 $this->assertEquals(count($this->_contactIds), 9, 'Check for number of contacts.');
197
198 $dao = new CRM_Dedupe_DAO_RuleGroup();
199 $dao->contact_type = 'Individual';
200 $dao->name = 'IndividualSupervised';
201 $dao->is_default = 1;
202 $dao->find(TRUE);
203
204 $foundDupes = CRM_Dedupe_Finder::dupesInGroup($dao->id, $this->_groupId);
205
206 // -------------------------------------------------------------------------
207 // Name and Email (reserved) Matches ( 3 pairs )
208 // --------------------------------------------------------------------------
209 // robin - hood - robin@example.com
210 // robin - hood - robin@example.com
211 // little - dale - dale@example.com
212 // little - dale - dale@example.com
213 // will - dale - will@example.com
214 // will - dale - will@example.com
215 // so 3 pairs for - first + last + mail
216 $this->assertEquals(count($foundDupes), 3, 'Check Individual-Supervised dupe rule for dupesInGroup().');
217
218 // Run dedupe finder as the browser would
219 $_SERVER['REQUEST_METHOD'] = 'GET'; //avoid invalid key error
220 $object = new CRM_Contact_Page_DedupeFind();
221 $object->set('gid', $this->_groupId);
222 $object->set('rgid', $dao->id);
223 $object->set('action', CRM_Core_Action::UPDATE);
224 $object->setEmbedded(TRUE);
225 @$object->run();
226
227 // Retrieve pairs from prev next cache table
228 $select = array('pn.is_selected' => 'is_selected');
229 $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($dao->id, $this->_groupId);
230 $pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
231
232 $this->assertEquals(count($foundDupes), count($pnDupePairs), 'Check number of dupe pairs in prev next cache.');
233
234 // batch merge all dupes
235 $result = CRM_Dedupe_Merger::batchMerge($dao->id, $this->_groupId, 'safe', TRUE, 5, 2);
236 $this->assertEquals(count($result['merged']), 3, 'Check number of merged pairs.');
237
238 // retrieve pairs from prev next cache table
239 $pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
240 $this->assertEquals(count($pnDupePairs), 0, 'Check number of remaining dupe pairs in prev next cache.');
241
242 $this->deleteDupeContacts();
243 }
244
245 /**
246 * The goal of this function is to test that all required tables are returned.
247 */
248 public function testGetCidRefs() {
249 $this->entityCustomGroupWithSingleFieldCreate(__FUNCTION__, 'Contacts');
250 $this->assertEquals(array_merge($this->getStaticCIDRefs(), $this->getHackedInCIDRef()), CRM_Dedupe_Merger::cidRefs());
251 $this->assertEquals(array_merge($this->getCalculatedCIDRefs(), $this->getHackedInCIDRef()), CRM_Dedupe_Merger::cidRefs());
252 }
253
254 /**
255 * Get the list of not-really-cid-refs that are currently hacked in.
256 *
257 * This is hacked into getCIDs function.
258 *
259 * @return array
260 */
261 public function getHackedInCIDRef() {
262 return array(
263 'civicrm_entity_tag' => array(
264 0 => 'entity_id',
265 ),
266 );
267 }
268
269 /**
270 * Test function that gets duplicate pairs.
271 *
272 * It turns out there are 2 code paths retrieving this data so my initial focus is on ensuring
273 * they match.
274 */
275 public function testGetMatches() {
276 $this->setupMatchData();
277 $pairs = CRM_Dedupe_Merger::getDuplicatePairs(
278 1,
279 NULL,
280 TRUE,
281 25,
282 FALSE
283 );
284
285 $this->assertEquals(array(
286 0 => array(
287 'srcID' => $this->contacts[1]['id'],
288 'srcName' => 'Mr. Mickey Mouse II',
289 'dstID' => $this->contacts[0]['id'],
290 'dstName' => 'Mr. Mickey Mouse II',
291 'weight' => 20,
292 'canMerge' => TRUE,
293 ),
294 1 => array(
295 'srcID' => $this->contacts[3]['id'],
296 'srcName' => 'Mr. Minnie Mouse II',
297 'dstID' => $this->contacts[2]['id'],
298 'dstName' => 'Mr. Minnie Mouse II',
299 'weight' => 20,
300 'canMerge' => TRUE,
301 ),
302 ), $pairs);
303 }
304
305 /**
306 * Test function that gets organization pairs.
307 *
308 * Note the rule will match on organization_name OR email - hence lots of matches.
309 */
310 public function testGetOrganizationMatches() {
311 $this->setupMatchData();
312 $ruleGroups = $this->callAPISuccessGetSingle('RuleGroup', array('contact_type' => 'Organization', 'used' => 'Supervised'));
313
314 $pairs = CRM_Dedupe_Merger::getDuplicatePairs(
315 $ruleGroups['id'],
316 NULL,
317 TRUE,
318 25,
319 FALSE
320 );
321
322 $this->assertEquals(array(
323 0 => array(
324 'srcID' => $this->contacts[5]['id'],
325 'srcName' => 'Walt Disney Ltd',
326 'dstID' => $this->contacts[4]['id'],
327 'dstName' => 'Walt Disney Ltd',
328 'weight' => 20,
329 'canMerge' => TRUE,
330 ),
331 1 => array(
332 'srcID' => $this->contacts[7]['id'],
333 'srcName' => 'Walt Disney',
334 'dstID' => $this->contacts[6]['id'],
335 'dstName' => 'Walt Disney',
336 'weight' => 10,
337 'canMerge' => TRUE,
338 ),
339 2 => array(
340 'srcID' => $this->contacts[6]['id'],
341 'srcName' => 'Walt Disney',
342 'dstID' => $this->contacts[4]['id'],
343 'dstName' => 'Walt Disney Ltd',
344 'weight' => 10,
345 'canMerge' => TRUE,
346 ),
347 3 => array(
348 'srcID' => $this->contacts[6]['id'],
349 'srcName' => 'Walt Disney',
350 'dstID' => $this->contacts[5]['id'],
351 'dstName' => 'Walt Disney Ltd',
352 'weight' => 10,
353 'canMerge' => TRUE,
354 ),
355 ), $pairs);
356 }
357
358 /**
359 * Test function that gets organization duplicate pairs.
360 */
361 public function testGetOrganizationMatchesInGroup() {
362 $this->setupMatchData();
363 $ruleGroups = $this->callAPISuccessGetSingle('RuleGroup', array('contact_type' => 'Organization', 'used' => 'Supervised'));
364
365 $groupID = $this->groupCreate(array('title' => 'she-mice'));
366
367 $this->callAPISuccess('GroupContact', 'create', array('group_id' => $groupID, 'contact_id' => $this->contacts[4]['id']));
368
369 $pairs = CRM_Dedupe_Merger::getDuplicatePairs(
370 $ruleGroups['id'],
371 $groupID,
372 TRUE,
373 25,
374 FALSE
375 );
376
377 $this->assertEquals(array(
378 0 => array(
379 'srcID' => $this->contacts[5]['id'],
380 'srcName' => 'Walt Disney Ltd',
381 'dstID' => $this->contacts[4]['id'],
382 'dstName' => 'Walt Disney Ltd',
383 'weight' => 20,
384 'canMerge' => TRUE,
385 ),
386 1 => array(
387 'srcID' => $this->contacts[6]['id'],
388 'srcName' => 'Walt Disney',
389 'dstID' => $this->contacts[4]['id'],
390 'dstName' => 'Walt Disney Ltd',
391 'weight' => 10,
392 'canMerge' => TRUE,
393 ),
394 ), $pairs);
395 }
396
397 /**
398 * Test function that gets duplicate pairs.
399 *
400 * It turns out there are 2 code paths retrieving this data so my initial focus is on ensuring
401 * they match.
402 */
403 public function testGetMatchesInGroup() {
404 $this->setupMatchData();
405
406 $groupID = $this->groupCreate(array('title' => 'she-mice'));
407
408 $this->callAPISuccess('GroupContact', 'create', array('group_id' => $groupID, 'contact_id' => $this->contacts[3]['id']));
409
410 $pairs = CRM_Dedupe_Merger::getDuplicatePairs(
411 1,
412 $groupID,
413 TRUE,
414 25,
415 FALSE
416 );
417
418 $this->assertEquals(array(
419 0 => array(
420 'srcID' => $this->contacts[3]['id'],
421 'srcName' => 'Mr. Minnie Mouse II',
422 'dstID' => $this->contacts[2]['id'],
423 'dstName' => 'Mr. Minnie Mouse II',
424 'weight' => 20,
425 'canMerge' => TRUE,
426 ),
427 ), $pairs);
428 }
429
430 /**
431 * Set up some contacts for our matching.
432 */
433 public function setupMatchData() {
434 $fixtures = array(
435 array(
436 'first_name' => 'Mickey',
437 'last_name' => 'Mouse',
438 'email' => 'mickey@mouse.com',
439 ),
440 array(
441 'first_name' => 'Mickey',
442 'last_name' => 'Mouse',
443 'email' => 'mickey@mouse.com',
444 ),
445 array(
446 'first_name' => 'Minnie',
447 'last_name' => 'Mouse',
448 'email' => 'mickey@mouse.com',
449 ),
450 array(
451 'first_name' => 'Minnie',
452 'last_name' => 'Mouse',
453 'email' => 'mickey@mouse.com',
454 ),
455 );
456 foreach ($fixtures as $fixture) {
457 $contactID = $this->individualCreate($fixture);
458 $this->contacts[] = array_merge($fixture, array('id' => $contactID));
459 }
460 $organizationFixtures = array(
461 array(
462 'organization_name' => 'Walt Disney Ltd',
463 'email' => 'walt@disney.com',
464 ),
465 array(
466 'organization_name' => 'Walt Disney Ltd',
467 'email' => 'walt@disney.com',
468 ),
469 array(
470 'organization_name' => 'Walt Disney',
471 'email' => 'walt@disney.com',
472 ),
473 array(
474 'organization_name' => 'Walt Disney',
475 'email' => 'walter@disney.com',
476 ),
477 );
478 foreach ($organizationFixtures as $fixture) {
479 $contactID = $this->organizationCreate($fixture);
480 $this->contacts[] = array_merge($fixture, array('id' => $contactID));
481 }
482 }
483
484
485 /**
486 * Get the list of tables that refer to the CID.
487 *
488 * This is a statically maintained (in this test list).
489 *
490 * There is also a check against an automated list but having both seems to add extra stability to me. They do
491 * not change often.
492 */
493 public function getStaticCIDRefs() {
494 return array(
495 'civicrm_acl_cache' => array(
496 0 => 'contact_id',
497 ),
498 'civicrm_acl_contact_cache' => array(
499 0 => 'user_id',
500 1 => 'contact_id',
501 ),
502 'civicrm_action_log' => array(
503 0 => 'contact_id',
504 ),
505 'civicrm_activity_contact' => array(
506 0 => 'contact_id',
507 ),
508 'civicrm_address' => array(
509 0 => 'contact_id',
510 ),
511 'civicrm_batch' => array(
512 0 => 'created_id',
513 1 => 'modified_id',
514 ),
515 'civicrm_campaign' => array(
516 0 => 'created_id',
517 1 => 'last_modified_id',
518 ),
519 'civicrm_case_contact' => array(
520 0 => 'contact_id',
521 ),
522 'civicrm_contact' => array(
523 0 => 'primary_contact_id',
524 1 => 'employer_id',
525 ),
526 'civicrm_contribution' => array(
527 0 => 'contact_id',
528 ),
529 'civicrm_contribution_page' => array(
530 0 => 'created_id',
531 ),
532 'civicrm_contribution_recur' => array(
533 0 => 'contact_id',
534 ),
535 'civicrm_contribution_soft' => array(
536 0 => 'contact_id',
537 ),
538 'civicrm_custom_group' => array(
539 0 => 'created_id',
540 ),
541 'civicrm_dashboard_contact' => array(
542 0 => 'contact_id',
543 ),
544 'civicrm_dedupe_exception' => array(
545 0 => 'contact_id1',
546 1 => 'contact_id2',
547 ),
548 'civicrm_domain' => array(
549 0 => 'contact_id',
550 ),
551 'civicrm_email' => array(
552 0 => 'contact_id',
553 ),
554 'civicrm_event' => array(
555 0 => 'created_id',
556 ),
557 'civicrm_event_carts' => array(
558 0 => 'user_id',
559 ),
560 'civicrm_financial_account' => array(
561 0 => 'contact_id',
562 ),
563 'civicrm_financial_item' => array(
564 0 => 'contact_id',
565 ),
566 'civicrm_grant' => array(
567 0 => 'contact_id',
568 ),
569 'civicrm_group' => array(
570 0 => 'created_id',
571 1 => 'modified_id',
572 ),
573 'civicrm_group_contact' => array(
574 0 => 'contact_id',
575 ),
576 'civicrm_group_contact_cache' => array(
577 0 => 'contact_id',
578 ),
579 'civicrm_group_organization' => array(
580 0 => 'organization_id',
581 ),
582 'civicrm_im' => array(
583 0 => 'contact_id',
584 ),
585 'civicrm_log' => array(
586 0 => 'modified_id',
587 ),
588 'civicrm_mailing' => array(
589 0 => 'created_id',
590 1 => 'scheduled_id',
591 2 => 'approver_id',
592 ),
593 'civicrm_mailing_abtest' => array(
594 0 => 'created_id',
595 ),
596 'civicrm_mailing_event_queue' => array(
597 0 => 'contact_id',
598 ),
599 'civicrm_mailing_event_subscribe' => array(
600 0 => 'contact_id',
601 ),
602 'civicrm_mailing_recipients' => array(
603 0 => 'contact_id',
604 ),
605 'civicrm_membership' => array(
606 0 => 'contact_id',
607 ),
608 'civicrm_membership_log' => array(
609 0 => 'modified_id',
610 ),
611 'civicrm_membership_type' => array(
612 0 => 'member_of_contact_id',
613 ),
614 'civicrm_note' => array(
615 0 => 'contact_id',
616 ),
617 'civicrm_openid' => array(
618 0 => 'contact_id',
619 ),
620 'civicrm_participant' => array(
621 0 => 'contact_id',
622 1 => 'transferred_to_contact_id', //CRM-16761
623 ),
624 'civicrm_payment_token' => array(
625 0 => 'contact_id',
626 1 => 'created_id',
627 ),
628 'civicrm_pcp' => array(
629 0 => 'contact_id',
630 ),
631 'civicrm_phone' => array(
632 0 => 'contact_id',
633 ),
634 'civicrm_pledge' => array(
635 0 => 'contact_id',
636 ),
637 'civicrm_print_label' => array(
638 0 => 'created_id',
639 ),
640 'civicrm_relationship' => array(
641 0 => 'contact_id_a',
642 1 => 'contact_id_b',
643 ),
644 'civicrm_report_instance' => array(
645 0 => 'created_id',
646 1 => 'owner_id',
647 ),
648 'civicrm_setting' => array(
649 0 => 'contact_id',
650 1 => 'created_id',
651 ),
652 'civicrm_subscription_history' => array(
653 0 => 'contact_id',
654 ),
655 'civicrm_survey' => array(
656 0 => 'created_id',
657 1 => 'last_modified_id',
658 ),
659 'civicrm_tag' => array(
660 0 => 'created_id',
661 ),
662 'civicrm_uf_group' => array(
663 0 => 'created_id',
664 ),
665 'civicrm_uf_match' => array(
666 0 => 'contact_id',
667 ),
668 'civicrm_value_testgetcidref_1' => array(
669 0 => 'entity_id',
670 ),
671 'civicrm_website' => array(
672 0 => 'contact_id',
673 ),
674 );
675 }
676
677 /**
678 * Get a list of CIDs that is calculated off the schema.
679 *
680 * Note this is an expensive and table locking query. Should be safe in tests though.
681 */
682 public function getCalculatedCIDRefs() {
683 $cidRefs = array();
684 $sql = "
685 SELECT
686 table_name,
687 column_name
688 FROM information_schema.key_column_usage
689 WHERE
690 referenced_table_schema = database() AND
691 referenced_table_name = 'civicrm_contact' AND
692 referenced_column_name = 'id';
693 ";
694 $dao = CRM_Core_DAO::executeQuery($sql);
695 while ($dao->fetch()) {
696 $cidRefs[$dao->table_name][] = $dao->column_name;
697 }
698 // Do specific re-ordering changes to make this the same as the ref validated one.
699 // The above query orders by FK alphabetically.
700 // There might be cleverer ways to do this but it shouldn't change much.
701 $cidRefs['civicrm_contact'][0] = 'primary_contact_id';
702 $cidRefs['civicrm_contact'][1] = 'employer_id';
703 $cidRefs['civicrm_acl_contact_cache'][0] = 'user_id';
704 $cidRefs['civicrm_acl_contact_cache'][1] = 'contact_id';
705 $cidRefs['civicrm_mailing'][0] = 'created_id';
706 $cidRefs['civicrm_mailing'][1] = 'scheduled_id';
707 $cidRefs['civicrm_mailing'][2] = 'approver_id';
708 return $cidRefs;
709 }
710
711 }