4 * Class CRM_Dedupe_DedupeMergerTest
7 class CRM_Dedupe_MergerTest
extends CiviUnitTestCase
{
10 protected $_contactIds = array();
12 public function tearDown() {
13 $this->quickCleanup(array('civicrm_contact', 'civicrm_group_contact', 'civicrm_group'));
17 public function createDupeContacts() {
18 // create a group to hold contacts, so that dupe checks don't consider any other contacts in the DB
20 'name' => 'Test Dupe Merger Group',
21 'title' => 'Test Dupe Merger Group',
24 'visibility' => 'Public Pages',
27 $result = $this->callAPISuccess('group', 'create', $params);
28 $this->_groupId
= $result['id'];
32 // make dupe checks based on based on following contact sets:
33 // FIRST - LAST - EMAIL
34 // ---------------------------------
35 // robin - hood - robin@example.com
36 // robin - hood - robin@example.com
37 // robin - hood - hood@example.com
38 // robin - dale - robin@example.com
39 // little - dale - dale@example.com
40 // little - dale - dale@example.com
41 // will - dale - dale@example.com
42 // will - dale - will@example.com
43 // will - dale - will@example.com
46 'first_name' => 'robin',
47 'last_name' => 'hood',
48 'email' => 'robin@example.com',
49 'contact_type' => 'Individual',
52 'first_name' => 'robin',
53 'last_name' => 'hood',
54 'email' => 'robin@example.com',
55 'contact_type' => 'Individual',
58 'first_name' => 'robin',
59 'last_name' => 'hood',
60 'email' => 'hood@example.com',
61 'contact_type' => 'Individual',
64 'first_name' => 'robin',
65 'last_name' => 'dale',
66 'email' => 'robin@example.com',
67 'contact_type' => 'Individual',
70 'first_name' => 'little',
71 'last_name' => 'dale',
72 'email' => 'dale@example.com',
73 'contact_type' => 'Individual',
76 'first_name' => 'little',
77 'last_name' => 'dale',
78 'email' => 'dale@example.com',
79 'contact_type' => 'Individual',
82 'first_name' => 'will',
83 'last_name' => 'dale',
84 'email' => 'dale@example.com',
85 'contact_type' => 'Individual',
88 'first_name' => 'will',
89 'last_name' => 'dale',
90 'email' => 'will@example.com',
91 'contact_type' => 'Individual',
94 'first_name' => 'will',
95 'last_name' => 'dale',
96 'email' => 'will@example.com',
97 'contact_type' => 'Individual',
102 foreach ($params as $param) {
103 $param['version'] = 3;
104 $contact = civicrm_api('contact', 'create', $param);
105 $this->_contactIds
[$count++
] = $contact['id'];
108 'contact_id' => $contact['id'],
109 'group_id' => $this->_groupId
,
112 $this->callAPISuccess('group_contact', 'create', $grpParams);
117 * Delete all created contacts.
119 public function deleteDupeContacts() {
120 foreach ($this->_contactIds
as $contactId) {
121 $this->contactDelete($contactId);
123 $this->groupDelete($this->_groupId
);
127 * Test the batch merge.
129 public function testBatchMergeSelectedDuplicates() {
130 $this->createDupeContacts();
132 // verify that all contacts have been created separately
133 $this->assertEquals(count($this->_contactIds
), 9, 'Check for number of contacts.');
135 $dao = new CRM_Dedupe_DAO_RuleGroup();
136 $dao->contact_type
= 'Individual';
137 $dao->name
= 'IndividualSupervised';
138 $dao->is_default
= 1;
141 $foundDupes = CRM_Dedupe_Finder
::dupesInGroup($dao->id
, $this->_groupId
);
143 // -------------------------------------------------------------------------
144 // Name and Email (reserved) Matches ( 3 pairs )
145 // --------------------------------------------------------------------------
146 // robin - hood - robin@example.com
147 // robin - hood - robin@example.com
148 // little - dale - dale@example.com
149 // little - dale - dale@example.com
150 // will - dale - will@example.com
151 // will - dale - will@example.com
152 // so 3 pairs for - first + last + mail
153 $this->assertEquals(count($foundDupes), 3, 'Check Individual-Supervised dupe rule for dupesInGroup().');
155 // Run dedupe finder as the browser would
156 $_SERVER['REQUEST_METHOD'] = 'GET'; //avoid invalid key error
157 $object = new CRM_Contact_Page_DedupeFind();
158 $object->set('gid', $this->_groupId
);
159 $object->set('rgid', $dao->id
);
160 $object->set('action', CRM_Core_Action
::UPDATE
);
161 $object->setEmbedded(TRUE);
164 // Retrieve pairs from prev next cache table
165 $select = array('pn.is_selected' => 'is_selected');
166 $cacheKeyString = CRM_Dedupe_Merger
::getMergeCacheKeyString($dao->id
, $this->_groupId
);
167 $pnDupePairs = CRM_Core_BAO_PrevNextCache
::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
169 $this->assertEquals(count($foundDupes), count($pnDupePairs), 'Check number of dupe pairs in prev next cache.');
171 // mark first two pairs as selected
172 CRM_Core_DAO
::singleValueQuery("UPDATE civicrm_prevnext_cache SET is_selected = 1 WHERE id IN ({$pnDupePairs[0]['prevnext_id']}, {$pnDupePairs[1]['prevnext_id']})");
174 $pnDupePairs = CRM_Core_BAO_PrevNextCache
::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
175 $this->assertEquals($pnDupePairs[0]['is_selected'], 1, 'Check if first record in dupe pairs is marked as selected.');
176 $this->assertEquals($pnDupePairs[0]['is_selected'], 1, 'Check if second record in dupe pairs is marked as selected.');
178 // batch merge selected dupes
179 $result = CRM_Dedupe_Merger
::batchMerge($dao->id
, $this->_groupId
, 'safe', TRUE, 5, 1);
180 $this->assertEquals(count($result['merged']), 2, 'Check number of merged pairs.');
182 // retrieve pairs from prev next cache table
183 $pnDupePairs = CRM_Core_BAO_PrevNextCache
::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
184 $this->assertEquals(count($pnDupePairs), 1, 'Check number of remaining dupe pairs in prev next cache.');
186 $this->deleteDupeContacts();
190 * Test the batch merge.
192 public function testBatchMergeAllDuplicates() {
193 $this->createDupeContacts();
195 // verify that all contacts have been created separately
196 $this->assertEquals(count($this->_contactIds
), 9, 'Check for number of contacts.');
198 $dao = new CRM_Dedupe_DAO_RuleGroup();
199 $dao->contact_type
= 'Individual';
200 $dao->name
= 'IndividualSupervised';
201 $dao->is_default
= 1;
204 $foundDupes = CRM_Dedupe_Finder
::dupesInGroup($dao->id
, $this->_groupId
);
206 // -------------------------------------------------------------------------
207 // Name and Email (reserved) Matches ( 3 pairs )
208 // --------------------------------------------------------------------------
209 // robin - hood - robin@example.com
210 // robin - hood - robin@example.com
211 // little - dale - dale@example.com
212 // little - dale - dale@example.com
213 // will - dale - will@example.com
214 // will - dale - will@example.com
215 // so 3 pairs for - first + last + mail
216 $this->assertEquals(count($foundDupes), 3, 'Check Individual-Supervised dupe rule for dupesInGroup().');
218 // Run dedupe finder as the browser would
219 $_SERVER['REQUEST_METHOD'] = 'GET'; //avoid invalid key error
220 $object = new CRM_Contact_Page_DedupeFind();
221 $object->set('gid', $this->_groupId
);
222 $object->set('rgid', $dao->id
);
223 $object->set('action', CRM_Core_Action
::UPDATE
);
224 $object->setEmbedded(TRUE);
227 // Retrieve pairs from prev next cache table
228 $select = array('pn.is_selected' => 'is_selected');
229 $cacheKeyString = CRM_Dedupe_Merger
::getMergeCacheKeyString($dao->id
, $this->_groupId
);
230 $pnDupePairs = CRM_Core_BAO_PrevNextCache
::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
232 $this->assertEquals(count($foundDupes), count($pnDupePairs), 'Check number of dupe pairs in prev next cache.');
234 // batch merge all dupes
235 $result = CRM_Dedupe_Merger
::batchMerge($dao->id
, $this->_groupId
, 'safe', TRUE, 5, 2);
236 $this->assertEquals(count($result['merged']), 3, 'Check number of merged pairs.');
238 // retrieve pairs from prev next cache table
239 $pnDupePairs = CRM_Core_BAO_PrevNextCache
::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
240 $this->assertEquals(count($pnDupePairs), 0, 'Check number of remaining dupe pairs in prev next cache.');
242 $this->deleteDupeContacts();
246 * The goal of this function is to test that all required tables are returned.
248 public function testGetCidRefs() {
249 $this->entityCustomGroupWithSingleFieldCreate(__FUNCTION__
, 'Contacts');
250 $this->assertEquals(array_merge($this->getStaticCIDRefs(), $this->getHackedInCIDRef()), CRM_Dedupe_Merger
::cidRefs());
251 $this->assertEquals(array_merge($this->getCalculatedCIDRefs(), $this->getHackedInCIDRef()), CRM_Dedupe_Merger
::cidRefs());
255 * Get the list of not-really-cid-refs that are currently hacked in.
257 * This is hacked into getCIDs function.
261 public function getHackedInCIDRef() {
263 'civicrm_entity_tag' => array(
270 * Test function that gets duplicate pairs.
272 * It turns out there are 2 code paths retrieving this data so my initial focus is on ensuring
275 public function testGetMatches() {
276 $this->setupMatchData();
277 $pairs = CRM_Dedupe_Merger
::getDuplicatePairs(
285 $this->assertEquals(array(
287 'srcID' => $this->contacts
[1]['id'],
288 'srcName' => 'Mr. Mickey Mouse II',
289 'dstID' => $this->contacts
[0]['id'],
290 'dstName' => 'Mr. Mickey Mouse II',
295 'srcID' => $this->contacts
[3]['id'],
296 'srcName' => 'Mr. Minnie Mouse II',
297 'dstID' => $this->contacts
[2]['id'],
298 'dstName' => 'Mr. Minnie Mouse II',
306 * Test function that gets organization pairs.
308 * Note the rule will match on organization_name OR email - hence lots of matches.
310 public function testGetOrganizationMatches() {
311 $this->setupMatchData();
312 $ruleGroups = $this->callAPISuccessGetSingle('RuleGroup', array('contact_type' => 'Organization', 'used' => 'Supervised'));
314 $pairs = CRM_Dedupe_Merger
::getDuplicatePairs(
322 $expectedPairs = array(
324 'srcID' => $this->contacts
[5]['id'],
325 'srcName' => 'Walt Disney Ltd',
326 'dstID' => $this->contacts
[4]['id'],
327 'dstName' => 'Walt Disney Ltd',
332 'srcID' => $this->contacts
[7]['id'],
333 'srcName' => 'Walt Disney',
334 'dstID' => $this->contacts
[6]['id'],
335 'dstName' => 'Walt Disney',
340 'srcID' => $this->contacts
[6]['id'],
341 'srcName' => 'Walt Disney',
342 'dstID' => $this->contacts
[4]['id'],
343 'dstName' => 'Walt Disney Ltd',
348 'srcID' => $this->contacts
[6]['id'],
349 'srcName' => 'Walt Disney',
350 'dstID' => $this->contacts
[5]['id'],
351 'dstName' => 'Walt Disney Ltd',
356 usort($pairs, array(__CLASS__
, 'compareDupes'));
357 usort($expectedPairs, array(__CLASS__
, 'compareDupes'));
358 $this->assertEquals($expectedPairs, $pairs);
362 * Function to sort $duplicate records in a stable way.
368 public static function compareDupes($a, $b) {
369 foreach (array('srcName', 'dstName', 'srcID', 'dstID') as $field) {
370 if ($a[$field] != $b[$field]) {
371 return ($a[$field] < $b[$field]) ?
1 : -1;
378 * Test function that gets organization duplicate pairs.
380 public function testGetOrganizationMatchesInGroup() {
381 $this->setupMatchData();
382 $ruleGroups = $this->callAPISuccessGetSingle('RuleGroup', array('contact_type' => 'Organization', 'used' => 'Supervised'));
384 $groupID = $this->groupCreate(array('title' => 'she-mice'));
386 $this->callAPISuccess('GroupContact', 'create', array('group_id' => $groupID, 'contact_id' => $this->contacts
[4]['id']));
388 $pairs = CRM_Dedupe_Merger
::getDuplicatePairs(
396 $this->assertEquals(array(
398 'srcID' => $this->contacts
[5]['id'],
399 'srcName' => 'Walt Disney Ltd',
400 'dstID' => $this->contacts
[4]['id'],
401 'dstName' => 'Walt Disney Ltd',
406 'srcID' => $this->contacts
[6]['id'],
407 'srcName' => 'Walt Disney',
408 'dstID' => $this->contacts
[4]['id'],
409 'dstName' => 'Walt Disney Ltd',
417 * Test function that gets duplicate pairs.
419 * It turns out there are 2 code paths retrieving this data so my initial focus is on ensuring
422 public function testGetMatchesInGroup() {
423 $this->setupMatchData();
425 $groupID = $this->groupCreate(array('title' => 'she-mice'));
427 $this->callAPISuccess('GroupContact', 'create', array('group_id' => $groupID, 'contact_id' => $this->contacts
[3]['id']));
429 $pairs = CRM_Dedupe_Merger
::getDuplicatePairs(
437 $this->assertEquals(array(
439 'srcID' => $this->contacts
[3]['id'],
440 'srcName' => 'Mr. Minnie Mouse II',
441 'dstID' => $this->contacts
[2]['id'],
442 'dstName' => 'Mr. Minnie Mouse II',
450 * Set up some contacts for our matching.
452 public function setupMatchData() {
455 'first_name' => 'Mickey',
456 'last_name' => 'Mouse',
457 'email' => 'mickey@mouse.com',
460 'first_name' => 'Mickey',
461 'last_name' => 'Mouse',
462 'email' => 'mickey@mouse.com',
465 'first_name' => 'Minnie',
466 'last_name' => 'Mouse',
467 'email' => 'mickey@mouse.com',
470 'first_name' => 'Minnie',
471 'last_name' => 'Mouse',
472 'email' => 'mickey@mouse.com',
475 foreach ($fixtures as $fixture) {
476 $contactID = $this->individualCreate($fixture);
477 $this->contacts
[] = array_merge($fixture, array('id' => $contactID));
479 $organizationFixtures = array(
481 'organization_name' => 'Walt Disney Ltd',
482 'email' => 'walt@disney.com',
485 'organization_name' => 'Walt Disney Ltd',
486 'email' => 'walt@disney.com',
489 'organization_name' => 'Walt Disney',
490 'email' => 'walt@disney.com',
493 'organization_name' => 'Walt Disney',
494 'email' => 'walter@disney.com',
497 foreach ($organizationFixtures as $fixture) {
498 $contactID = $this->organizationCreate($fixture);
499 $this->contacts
[] = array_merge($fixture, array('id' => $contactID));
505 * Get the list of tables that refer to the CID.
507 * This is a statically maintained (in this test list).
509 * There is also a check against an automated list but having both seems to add extra stability to me. They do
512 public function getStaticCIDRefs() {
514 'civicrm_acl_cache' => array(
517 'civicrm_acl_contact_cache' => array(
521 'civicrm_action_log' => array(
524 'civicrm_activity_contact' => array(
527 'civicrm_address' => array(
530 'civicrm_batch' => array(
534 'civicrm_campaign' => array(
536 1 => 'last_modified_id',
538 'civicrm_case_contact' => array(
541 'civicrm_contact' => array(
542 0 => 'primary_contact_id',
545 'civicrm_contribution' => array(
548 'civicrm_contribution_page' => array(
551 'civicrm_contribution_recur' => array(
554 'civicrm_contribution_soft' => array(
557 'civicrm_custom_group' => array(
560 'civicrm_dashboard_contact' => array(
563 'civicrm_dedupe_exception' => array(
567 'civicrm_domain' => array(
570 'civicrm_email' => array(
573 'civicrm_event' => array(
576 'civicrm_event_carts' => array(
579 'civicrm_financial_account' => array(
582 'civicrm_financial_item' => array(
585 'civicrm_grant' => array(
588 'civicrm_group' => array(
592 'civicrm_group_contact' => array(
595 'civicrm_group_contact_cache' => array(
598 'civicrm_group_organization' => array(
599 0 => 'organization_id',
601 'civicrm_im' => array(
604 'civicrm_log' => array(
607 'civicrm_mailing' => array(
612 'civicrm_mailing_abtest' => array(
615 'civicrm_mailing_event_queue' => array(
618 'civicrm_mailing_event_subscribe' => array(
621 'civicrm_mailing_recipients' => array(
624 'civicrm_membership' => array(
627 'civicrm_membership_log' => array(
630 'civicrm_membership_type' => array(
631 0 => 'member_of_contact_id',
633 'civicrm_note' => array(
636 'civicrm_openid' => array(
639 'civicrm_participant' => array(
641 1 => 'transferred_to_contact_id', //CRM-16761
643 'civicrm_payment_token' => array(
647 'civicrm_pcp' => array(
650 'civicrm_phone' => array(
653 'civicrm_pledge' => array(
656 'civicrm_print_label' => array(
659 'civicrm_relationship' => array(
663 'civicrm_report_instance' => array(
667 'civicrm_setting' => array(
671 'civicrm_subscription_history' => array(
674 'civicrm_survey' => array(
676 1 => 'last_modified_id',
678 'civicrm_tag' => array(
681 'civicrm_uf_group' => array(
684 'civicrm_uf_match' => array(
687 'civicrm_value_testgetcidref_1' => array(
690 'civicrm_website' => array(
697 * Get a list of CIDs that is calculated off the schema.
699 * Note this is an expensive and table locking query. Should be safe in tests though.
701 public function getCalculatedCIDRefs() {
707 FROM information_schema.key_column_usage
709 referenced_table_schema = database() AND
710 referenced_table_name = 'civicrm_contact' AND
711 referenced_column_name = 'id';
713 $dao = CRM_Core_DAO
::executeQuery($sql);
714 while ($dao->fetch()) {
715 $cidRefs[$dao->table_name
][] = $dao->column_name
;
717 // Do specific re-ordering changes to make this the same as the ref validated one.
718 // The above query orders by FK alphabetically.
719 // There might be cleverer ways to do this but it shouldn't change much.
720 $cidRefs['civicrm_contact'][0] = 'primary_contact_id';
721 $cidRefs['civicrm_contact'][1] = 'employer_id';
722 $cidRefs['civicrm_acl_contact_cache'][0] = 'user_id';
723 $cidRefs['civicrm_acl_contact_cache'][1] = 'contact_id';
724 $cidRefs['civicrm_mailing'][0] = 'created_id';
725 $cidRefs['civicrm_mailing'][1] = 'scheduled_id';
726 $cidRefs['civicrm_mailing'][2] = 'approver_id';