Add tests for getDuplicatePairs function
[civicrm-core.git] / tests / phpunit / CRM / Dedupe / MergerTest.php
1 <?php
2
3 /**
4 * Class CRM_Dedupe_DedupeMergerTest
5 * @group headless
6 */
7 class CRM_Dedupe_MergerTest extends CiviUnitTestCase {
8
9 protected $_groupId;
10 protected $_contactIds = array();
11
12 public function tearDown() {
13 $this->quickCleanup(array('civicrm_contact', 'civicrm_group_contact', 'civicrm_group'));
14 parent::tearDown();
15 }
16
17 public function createDupeContacts() {
18 // create a group to hold contacts, so that dupe checks don't consider any other contacts in the DB
19 $params = array(
20 'name' => 'Test Dupe Merger Group',
21 'title' => 'Test Dupe Merger Group',
22 'domain_id' => 1,
23 'is_active' => 1,
24 'visibility' => 'Public Pages',
25 'version' => 3,
26 );
27 // TODO: This is not an API test!!
28 $result = civicrm_api('group', 'create', $params);
29 $this->_groupId = $result['id'];
30
31 // contact data set
32
33 // make dupe checks based on based on following contact sets:
34 // FIRST - LAST - EMAIL
35 // ---------------------------------
36 // robin - hood - robin@example.com
37 // robin - hood - robin@example.com
38 // robin - hood - hood@example.com
39 // robin - dale - robin@example.com
40 // little - dale - dale@example.com
41 // little - dale - dale@example.com
42 // will - dale - dale@example.com
43 // will - dale - will@example.com
44 // will - dale - will@example.com
45 $params = array(
46 array(
47 'first_name' => 'robin',
48 'last_name' => 'hood',
49 'email' => 'robin@example.com',
50 'contact_type' => 'Individual',
51 ),
52 array(
53 'first_name' => 'robin',
54 'last_name' => 'hood',
55 'email' => 'robin@example.com',
56 'contact_type' => 'Individual',
57 ),
58 array(
59 'first_name' => 'robin',
60 'last_name' => 'hood',
61 'email' => 'hood@example.com',
62 'contact_type' => 'Individual',
63 ),
64 array(
65 'first_name' => 'robin',
66 'last_name' => 'dale',
67 'email' => 'robin@example.com',
68 'contact_type' => 'Individual',
69 ),
70 array(
71 'first_name' => 'little',
72 'last_name' => 'dale',
73 'email' => 'dale@example.com',
74 'contact_type' => 'Individual',
75 ),
76 array(
77 'first_name' => 'little',
78 'last_name' => 'dale',
79 'email' => 'dale@example.com',
80 'contact_type' => 'Individual',
81 ),
82 array(
83 'first_name' => 'will',
84 'last_name' => 'dale',
85 'email' => 'dale@example.com',
86 'contact_type' => 'Individual',
87 ),
88 array(
89 'first_name' => 'will',
90 'last_name' => 'dale',
91 'email' => 'will@example.com',
92 'contact_type' => 'Individual',
93 ),
94 array(
95 'first_name' => 'will',
96 'last_name' => 'dale',
97 'email' => 'will@example.com',
98 'contact_type' => 'Individual',
99 ),
100 );
101
102 $count = 1;
103 foreach ($params as $param) {
104 $param['version'] = 3;
105 $contact = civicrm_api('contact', 'create', $param);
106 $this->_contactIds[$count++] = $contact['id'];
107
108 $grpParams = array(
109 'contact_id' => $contact['id'],
110 'group_id' => $this->_groupId,
111 'version' => 3,
112 );
113 $res = civicrm_api('group_contact', 'create', $grpParams);
114 }
115 }
116
117 /**
118 * Delete all created contacts.
119 */
120 public function deleteDupeContacts() {
121 foreach ($this->_contactIds as $contactId) {
122 $this->contactDelete($contactId);
123 }
124
125 // delete dupe group
126 $params = array('id' => $this->_groupId, 'version' => 3);
127 civicrm_api('group', 'delete', $params);
128 }
129
130 /**
131 * Test the batch merge.
132 */
133 public function testBatchMergeSelectedDuplicates() {
134 $this->createDupeContacts();
135
136 // verify that all contacts have been created separately
137 $this->assertEquals(count($this->_contactIds), 9, 'Check for number of contacts.');
138
139 $dao = new CRM_Dedupe_DAO_RuleGroup();
140 $dao->contact_type = 'Individual';
141 $dao->name = 'IndividualSupervised';
142 $dao->is_default = 1;
143 $dao->find(TRUE);
144
145 $foundDupes = CRM_Dedupe_Finder::dupesInGroup($dao->id, $this->_groupId);
146
147 // -------------------------------------------------------------------------
148 // Name and Email (reserved) Matches ( 3 pairs )
149 // --------------------------------------------------------------------------
150 // robin - hood - robin@example.com
151 // robin - hood - robin@example.com
152 // little - dale - dale@example.com
153 // little - dale - dale@example.com
154 // will - dale - will@example.com
155 // will - dale - will@example.com
156 // so 3 pairs for - first + last + mail
157 $this->assertEquals(count($foundDupes), 3, 'Check Individual-Supervised dupe rule for dupesInGroup().');
158
159 // Run dedupe finder as the browser would
160 $_SERVER['REQUEST_METHOD'] = 'GET'; //avoid invalid key error
161 $object = new CRM_Contact_Page_DedupeFind();
162 $object->set('gid', $this->_groupId);
163 $object->set('rgid', $dao->id);
164 $object->set('action', CRM_Core_Action::UPDATE);
165 @$object->run();
166
167 // Retrieve pairs from prev next cache table
168 $select = array('pn.is_selected' => 'is_selected');
169 $cacheKeyString = "merge Individual_{$dao->id}_{$this->_groupId}";
170 $pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
171
172 $this->assertEquals(count($foundDupes), count($pnDupePairs), 'Check number of dupe pairs in prev next cache.');
173
174 // mark first two pairs as selected
175 CRM_Core_DAO::singleValueQuery("UPDATE civicrm_prevnext_cache SET is_selected = 1 WHERE id IN ({$pnDupePairs[0]['prevnext_id']}, {$pnDupePairs[1]['prevnext_id']})");
176
177 $pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
178 $this->assertEquals($pnDupePairs[0]['is_selected'], 1, 'Check if first record in dupe pairs is marked as selected.');
179 $this->assertEquals($pnDupePairs[0]['is_selected'], 1, 'Check if second record in dupe pairs is marked as selected.');
180
181 // batch merge selected dupes
182 $result = CRM_Dedupe_Merger::batchMerge($dao->id, $this->_groupId, 'safe', TRUE, 5, 1);
183 $this->assertEquals(count($result['merged']), 2, 'Check number of merged pairs.');
184
185 // retrieve pairs from prev next cache table
186 $pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
187 $this->assertEquals(count($pnDupePairs), 1, 'Check number of remaining dupe pairs in prev next cache.');
188
189 $this->deleteDupeContacts();
190 }
191
192 /**
193 * Test the batch merge.
194 */
195 public function testBatchMergeAllDuplicates() {
196 $this->createDupeContacts();
197
198 // verify that all contacts have been created separately
199 $this->assertEquals(count($this->_contactIds), 9, 'Check for number of contacts.');
200
201 $dao = new CRM_Dedupe_DAO_RuleGroup();
202 $dao->contact_type = 'Individual';
203 $dao->name = 'IndividualSupervised';
204 $dao->is_default = 1;
205 $dao->find(TRUE);
206
207 $foundDupes = CRM_Dedupe_Finder::dupesInGroup($dao->id, $this->_groupId);
208
209 // -------------------------------------------------------------------------
210 // Name and Email (reserved) Matches ( 3 pairs )
211 // --------------------------------------------------------------------------
212 // robin - hood - robin@example.com
213 // robin - hood - robin@example.com
214 // little - dale - dale@example.com
215 // little - dale - dale@example.com
216 // will - dale - will@example.com
217 // will - dale - will@example.com
218 // so 3 pairs for - first + last + mail
219 $this->assertEquals(count($foundDupes), 3, 'Check Individual-Supervised dupe rule for dupesInGroup().');
220
221 // Run dedupe finder as the browser would
222 $_SERVER['REQUEST_METHOD'] = 'GET'; //avoid invalid key error
223 $object = new CRM_Contact_Page_DedupeFind();
224 $object->set('gid', $this->_groupId);
225 $object->set('rgid', $dao->id);
226 $object->set('action', CRM_Core_Action::UPDATE);
227 @$object->run();
228
229 // Retrieve pairs from prev next cache table
230 $select = array('pn.is_selected' => 'is_selected');
231 $cacheKeyString = "merge Individual_{$dao->id}_{$this->_groupId}";
232 $pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
233
234 $this->assertEquals(count($foundDupes), count($pnDupePairs), 'Check number of dupe pairs in prev next cache.');
235
236 // batch merge all dupes
237 $result = CRM_Dedupe_Merger::batchMerge($dao->id, $this->_groupId, 'safe', TRUE, 5, 2);
238 $this->assertEquals(count($result['merged']), 3, 'Check number of merged pairs.');
239
240 // retrieve pairs from prev next cache table
241 $pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
242 $this->assertEquals(count($pnDupePairs), 0, 'Check number of remaining dupe pairs in prev next cache.');
243
244 $this->deleteDupeContacts();
245 }
246
247 /**
248 * The goal of this function is to test that all required tables are returned.
249 */
250 public function testGetCidRefs() {
251 $this->entityCustomGroupWithSingleFieldCreate(__FUNCTION__, 'Contacts');
252 $this->assertEquals(array_merge($this->getStaticCIDRefs(), $this->getHackedInCIDRef()), CRM_Dedupe_Merger::cidRefs());
253 $this->assertEquals(array_merge($this->getCalculatedCIDRefs(), $this->getHackedInCIDRef()), CRM_Dedupe_Merger::cidRefs());
254 }
255
256 /**
257 * Get the list of not-really-cid-refs that are currently hacked in.
258 *
259 * This is hacked into getCIDs function.
260 *
261 * @return array
262 */
263 public function getHackedInCIDRef() {
264 return array(
265 'civicrm_entity_tag' => array(
266 0 => 'entity_id',
267 ),
268 );
269 }
270
271 /**
272 * Test function that gets duplicate pairs.
273 *
274 * It turns out there are 2 code paths retrieving this data so my initial focus is on ensuring
275 * they match.
276 */
277 public function testGetMatches() {
278 $this->setupMatchData();
279 $pairs = CRM_Dedupe_Merger::getDuplicatePairs(
280 1,
281 NULL,
282 TRUE,
283 25,
284 FALSE
285 );
286
287 $this->assertEquals(array(
288 0 => array(
289 'srcID' => $this->contacts[0]['id'],
290 'srcName' => 'Mr. Mickey Mouse II',
291 'dstID' => $this->contacts[1]['id'],
292 'dstName' => 'Mr. Mickey Mouse II',
293 'weight' => 20,
294 'canMerge' => TRUE,
295 ),
296 1 => array(
297 'srcID' => $this->contacts[2]['id'],
298 'srcName' => 'Mr. Minnie Mouse II',
299 'dstID' => $this->contacts[3]['id'],
300 'dstName' => 'Mr. Minnie Mouse II',
301 'weight' => 20,
302 'canMerge' => TRUE,
303 ),
304 ), $pairs);
305 }
306
307 /**
308 * Test function that gets duplicate pairs.
309 *
310 * It turns out there are 2 code paths retrieving this data so my initial focus is on ensuring
311 * they match.
312 */
313 public function testGetMatchesInGroup() {
314 $this->setupMatchData();
315
316 $groupID = $this->groupCreate(array('title' => 'she-mice'));
317
318 $this->callAPISuccess('GroupContact', 'create', array('group_id' => $groupID, 'contact_id' => $this->contacts[3]['id']));
319
320 $pairs = CRM_Dedupe_Merger::getDuplicatePairs(
321 1,
322 $groupID,
323 TRUE,
324 25,
325 FALSE
326 );
327
328 $this->assertEquals(array(
329 0 => array(
330 'srcID' => $this->contacts[3]['id'],
331 'srcName' => 'Mr. Minnie Mouse II',
332 'dstID' => $this->contacts[2]['id'],
333 'dstName' => 'Mr. Minnie Mouse II',
334 'weight' => 20,
335 'canMerge' => TRUE,
336 ),
337 ), $pairs);
338 }
339
340 /**
341 * Set up some contacts for our matching.
342 */
343 public function setupMatchData() {
344 $fixtures = array(
345 array(
346 'first_name' => 'Mickey',
347 'last_name' => 'Mouse',
348 'email' => 'mickey@mouse.com',
349 ),
350 array(
351 'first_name' => 'Mickey',
352 'last_name' => 'Mouse',
353 'email' => 'mickey@mouse.com',
354 ),
355 array(
356 'first_name' => 'Minnie',
357 'last_name' => 'Mouse',
358 'email' => 'mickey@mouse.com',
359 ),
360 array(
361 'first_name' => 'Minnie',
362 'last_name' => 'Mouse',
363 'email' => 'mickey@mouse.com',
364 ),
365 );
366 foreach ($fixtures as $fixture) {
367 $contactID = $this->individualCreate($fixture);
368 $this->contacts[] = array_merge($fixture, array('id' => $contactID));
369 }
370 }
371
372
373 /**
374 * Get the list of tables that refer to the CID.
375 *
376 * This is a statically maintained (in this test list).
377 *
378 * There is also a check against an automated list but having both seems to add extra stability to me. They do
379 * not change often.
380 */
381 public function getStaticCIDRefs() {
382 return array(
383 'civicrm_acl_cache' => array(
384 0 => 'contact_id',
385 ),
386 'civicrm_acl_contact_cache' => array(
387 0 => 'user_id',
388 1 => 'contact_id',
389 ),
390 'civicrm_action_log' => array(
391 0 => 'contact_id',
392 ),
393 'civicrm_activity_contact' => array(
394 0 => 'contact_id',
395 ),
396 'civicrm_address' => array(
397 0 => 'contact_id',
398 ),
399 'civicrm_batch' => array(
400 0 => 'created_id',
401 1 => 'modified_id',
402 ),
403 'civicrm_campaign' => array(
404 0 => 'created_id',
405 1 => 'last_modified_id',
406 ),
407 'civicrm_case_contact' => array(
408 0 => 'contact_id',
409 ),
410 'civicrm_contact' => array(
411 0 => 'primary_contact_id',
412 1 => 'employer_id',
413 ),
414 'civicrm_contribution' => array(
415 0 => 'contact_id',
416 ),
417 'civicrm_contribution_page' => array(
418 0 => 'created_id',
419 ),
420 'civicrm_contribution_recur' => array(
421 0 => 'contact_id',
422 ),
423 'civicrm_contribution_soft' => array(
424 0 => 'contact_id',
425 ),
426 'civicrm_custom_group' => array(
427 0 => 'created_id',
428 ),
429 'civicrm_dashboard_contact' => array(
430 0 => 'contact_id',
431 ),
432 'civicrm_dedupe_exception' => array(
433 0 => 'contact_id1',
434 1 => 'contact_id2',
435 ),
436 'civicrm_domain' => array(
437 0 => 'contact_id',
438 ),
439 'civicrm_email' => array(
440 0 => 'contact_id',
441 ),
442 'civicrm_event' => array(
443 0 => 'created_id',
444 ),
445 'civicrm_event_carts' => array(
446 0 => 'user_id',
447 ),
448 'civicrm_financial_account' => array(
449 0 => 'contact_id',
450 ),
451 'civicrm_financial_item' => array(
452 0 => 'contact_id',
453 ),
454 'civicrm_grant' => array(
455 0 => 'contact_id',
456 ),
457 'civicrm_group' => array(
458 0 => 'created_id',
459 1 => 'modified_id',
460 ),
461 'civicrm_group_contact' => array(
462 0 => 'contact_id',
463 ),
464 'civicrm_group_contact_cache' => array(
465 0 => 'contact_id',
466 ),
467 'civicrm_group_organization' => array(
468 0 => 'organization_id',
469 ),
470 'civicrm_im' => array(
471 0 => 'contact_id',
472 ),
473 'civicrm_log' => array(
474 0 => 'modified_id',
475 ),
476 'civicrm_mailing' => array(
477 0 => 'created_id',
478 1 => 'scheduled_id',
479 2 => 'approver_id',
480 ),
481 'civicrm_mailing_abtest' => array(
482 0 => 'created_id',
483 ),
484 'civicrm_mailing_event_queue' => array(
485 0 => 'contact_id',
486 ),
487 'civicrm_mailing_event_subscribe' => array(
488 0 => 'contact_id',
489 ),
490 'civicrm_mailing_recipients' => array(
491 0 => 'contact_id',
492 ),
493 'civicrm_membership' => array(
494 0 => 'contact_id',
495 ),
496 'civicrm_membership_log' => array(
497 0 => 'modified_id',
498 ),
499 'civicrm_membership_type' => array(
500 0 => 'member_of_contact_id',
501 ),
502 'civicrm_note' => array(
503 0 => 'contact_id',
504 ),
505 'civicrm_openid' => array(
506 0 => 'contact_id',
507 ),
508 'civicrm_participant' => array(
509 0 => 'contact_id',
510 1 => 'transferred_to_contact_id', //CRM-16761
511 ),
512 'civicrm_payment_token' => array(
513 0 => 'contact_id',
514 1 => 'created_id',
515 ),
516 'civicrm_pcp' => array(
517 0 => 'contact_id',
518 ),
519 'civicrm_phone' => array(
520 0 => 'contact_id',
521 ),
522 'civicrm_pledge' => array(
523 0 => 'contact_id',
524 ),
525 'civicrm_print_label' => array(
526 0 => 'created_id',
527 ),
528 'civicrm_relationship' => array(
529 0 => 'contact_id_a',
530 1 => 'contact_id_b',
531 ),
532 'civicrm_report_instance' => array(
533 0 => 'created_id',
534 1 => 'owner_id',
535 ),
536 'civicrm_setting' => array(
537 0 => 'contact_id',
538 1 => 'created_id',
539 ),
540 'civicrm_subscription_history' => array(
541 0 => 'contact_id',
542 ),
543 'civicrm_survey' => array(
544 0 => 'created_id',
545 1 => 'last_modified_id',
546 ),
547 'civicrm_tag' => array(
548 0 => 'created_id',
549 ),
550 'civicrm_uf_group' => array(
551 0 => 'created_id',
552 ),
553 'civicrm_uf_match' => array(
554 0 => 'contact_id',
555 ),
556 'civicrm_value_testgetcidref_1' => array(
557 0 => 'entity_id',
558 ),
559 'civicrm_website' => array(
560 0 => 'contact_id',
561 ),
562 );
563 }
564
565 /**
566 * Get a list of CIDs that is calculated off the schema.
567 *
568 * Note this is an expensive and table locking query. Should be safe in tests though.
569 */
570 public function getCalculatedCIDRefs() {
571 $cidRefs = array();
572 $sql = "
573 SELECT
574 table_name,
575 column_name
576 FROM information_schema.key_column_usage
577 WHERE
578 referenced_table_schema = database() AND
579 referenced_table_name = 'civicrm_contact' AND
580 referenced_column_name = 'id';
581 ";
582 $dao = CRM_Core_DAO::executeQuery($sql);
583 while ($dao->fetch()) {
584 $cidRefs[$dao->table_name][] = $dao->column_name;
585 }
586 // Do specific re-ordering changes to make this the same as the ref validated one.
587 // The above query orders by FK alphabetically.
588 // There might be cleverer ways to do this but it shouldn't change much.
589 $cidRefs['civicrm_contact'][0] = 'primary_contact_id';
590 $cidRefs['civicrm_contact'][1] = 'employer_id';
591 $cidRefs['civicrm_acl_contact_cache'][0] = 'user_id';
592 $cidRefs['civicrm_acl_contact_cache'][1] = 'contact_id';
593 $cidRefs['civicrm_mailing'][0] = 'created_id';
594 $cidRefs['civicrm_mailing'][1] = 'scheduled_id';
595 $cidRefs['civicrm_mailing'][2] = 'approver_id';
596 return $cidRefs;
597 }
598
599 }