Merge pull request #8157 from eileenmcnaughton/CRM-18424
[civicrm-core.git] / tests / phpunit / CRM / Dedupe / MergerTest.php
1 <?php
2
3 /**
4 * Class CRM_Dedupe_DedupeMergerTest
5 * @group headless
6 */
7 class CRM_Dedupe_MergerTest extends CiviUnitTestCase {
8
9 protected $_groupId;
10 protected $_contactIds = array();
11
12 public function tearDown() {
13 $this->quickCleanup(array('civicrm_contact', 'civicrm_group_contact', 'civicrm_group'));
14 parent::tearDown();
15 }
16
17 public function createDupeContacts() {
18 // create a group to hold contacts, so that dupe checks don't consider any other contacts in the DB
19 $params = array(
20 'name' => 'Test Dupe Merger Group',
21 'title' => 'Test Dupe Merger Group',
22 'domain_id' => 1,
23 'is_active' => 1,
24 'visibility' => 'Public Pages',
25 'version' => 3,
26 );
27 // TODO: This is not an API test!!
28 $result = civicrm_api('group', 'create', $params);
29 $this->_groupId = $result['id'];
30
31 // contact data set
32
33 // make dupe checks based on based on following contact sets:
34 // FIRST - LAST - EMAIL
35 // ---------------------------------
36 // robin - hood - robin@example.com
37 // robin - hood - robin@example.com
38 // robin - hood - hood@example.com
39 // robin - dale - robin@example.com
40 // little - dale - dale@example.com
41 // little - dale - dale@example.com
42 // will - dale - dale@example.com
43 // will - dale - will@example.com
44 // will - dale - will@example.com
45 $params = array(
46 array(
47 'first_name' => 'robin',
48 'last_name' => 'hood',
49 'email' => 'robin@example.com',
50 'contact_type' => 'Individual',
51 ),
52 array(
53 'first_name' => 'robin',
54 'last_name' => 'hood',
55 'email' => 'robin@example.com',
56 'contact_type' => 'Individual',
57 ),
58 array(
59 'first_name' => 'robin',
60 'last_name' => 'hood',
61 'email' => 'hood@example.com',
62 'contact_type' => 'Individual',
63 ),
64 array(
65 'first_name' => 'robin',
66 'last_name' => 'dale',
67 'email' => 'robin@example.com',
68 'contact_type' => 'Individual',
69 ),
70 array(
71 'first_name' => 'little',
72 'last_name' => 'dale',
73 'email' => 'dale@example.com',
74 'contact_type' => 'Individual',
75 ),
76 array(
77 'first_name' => 'little',
78 'last_name' => 'dale',
79 'email' => 'dale@example.com',
80 'contact_type' => 'Individual',
81 ),
82 array(
83 'first_name' => 'will',
84 'last_name' => 'dale',
85 'email' => 'dale@example.com',
86 'contact_type' => 'Individual',
87 ),
88 array(
89 'first_name' => 'will',
90 'last_name' => 'dale',
91 'email' => 'will@example.com',
92 'contact_type' => 'Individual',
93 ),
94 array(
95 'first_name' => 'will',
96 'last_name' => 'dale',
97 'email' => 'will@example.com',
98 'contact_type' => 'Individual',
99 ),
100 );
101
102 $count = 1;
103 foreach ($params as $param) {
104 $param['version'] = 3;
105 $contact = civicrm_api('contact', 'create', $param);
106 $this->_contactIds[$count++] = $contact['id'];
107
108 $grpParams = array(
109 'contact_id' => $contact['id'],
110 'group_id' => $this->_groupId,
111 'version' => 3,
112 );
113 $res = civicrm_api('group_contact', 'create', $grpParams);
114 }
115 }
116
117 /**
118 * Delete all created contacts.
119 */
120 public function deleteDupeContacts() {
121 foreach ($this->_contactIds as $contactId) {
122 $this->contactDelete($contactId);
123 }
124
125 // delete dupe group
126 $params = array('id' => $this->_groupId, 'version' => 3);
127 civicrm_api('group', 'delete', $params);
128 }
129
130 /**
131 * Test the batch merge.
132 */
133 public function testBatchMergeSelectedDuplicates() {
134 $this->createDupeContacts();
135
136 // verify that all contacts have been created separately
137 $this->assertEquals(count($this->_contactIds), 9, 'Check for number of contacts.');
138
139 $dao = new CRM_Dedupe_DAO_RuleGroup();
140 $dao->contact_type = 'Individual';
141 $dao->name = 'IndividualSupervised';
142 $dao->is_default = 1;
143 $dao->find(TRUE);
144
145 $foundDupes = CRM_Dedupe_Finder::dupesInGroup($dao->id, $this->_groupId);
146
147 // -------------------------------------------------------------------------
148 // Name and Email (reserved) Matches ( 3 pairs )
149 // --------------------------------------------------------------------------
150 // robin - hood - robin@example.com
151 // robin - hood - robin@example.com
152 // little - dale - dale@example.com
153 // little - dale - dale@example.com
154 // will - dale - will@example.com
155 // will - dale - will@example.com
156 // so 3 pairs for - first + last + mail
157 $this->assertEquals(count($foundDupes), 3, 'Check Individual-Supervised dupe rule for dupesInGroup().');
158
159 // Run dedupe finder as the browser would
160 $_SERVER['REQUEST_METHOD'] = 'GET'; //avoid invalid key error
161 $object = new CRM_Contact_Page_DedupeFind();
162 $object->set('gid', $this->_groupId);
163 $object->set('rgid', $dao->id);
164 $object->set('action', CRM_Core_Action::UPDATE);
165 $object->setEmbedded(TRUE);
166 @$object->run();
167
168 // Retrieve pairs from prev next cache table
169 $select = array('pn.is_selected' => 'is_selected');
170 $cacheKeyString = "merge Individual_{$dao->id}_{$this->_groupId}";
171 $pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
172
173 $this->assertEquals(count($foundDupes), count($pnDupePairs), 'Check number of dupe pairs in prev next cache.');
174
175 // mark first two pairs as selected
176 CRM_Core_DAO::singleValueQuery("UPDATE civicrm_prevnext_cache SET is_selected = 1 WHERE id IN ({$pnDupePairs[0]['prevnext_id']}, {$pnDupePairs[1]['prevnext_id']})");
177
178 $pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
179 $this->assertEquals($pnDupePairs[0]['is_selected'], 1, 'Check if first record in dupe pairs is marked as selected.');
180 $this->assertEquals($pnDupePairs[0]['is_selected'], 1, 'Check if second record in dupe pairs is marked as selected.');
181
182 // batch merge selected dupes
183 $result = CRM_Dedupe_Merger::batchMerge($dao->id, $this->_groupId, 'safe', TRUE, 5, 1);
184 $this->assertEquals(count($result['merged']), 2, 'Check number of merged pairs.');
185
186 // retrieve pairs from prev next cache table
187 $pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
188 $this->assertEquals(count($pnDupePairs), 1, 'Check number of remaining dupe pairs in prev next cache.');
189
190 $this->deleteDupeContacts();
191 }
192
193 /**
194 * Test the batch merge.
195 */
196 public function testBatchMergeAllDuplicates() {
197 $this->createDupeContacts();
198
199 // verify that all contacts have been created separately
200 $this->assertEquals(count($this->_contactIds), 9, 'Check for number of contacts.');
201
202 $dao = new CRM_Dedupe_DAO_RuleGroup();
203 $dao->contact_type = 'Individual';
204 $dao->name = 'IndividualSupervised';
205 $dao->is_default = 1;
206 $dao->find(TRUE);
207
208 $foundDupes = CRM_Dedupe_Finder::dupesInGroup($dao->id, $this->_groupId);
209
210 // -------------------------------------------------------------------------
211 // Name and Email (reserved) Matches ( 3 pairs )
212 // --------------------------------------------------------------------------
213 // robin - hood - robin@example.com
214 // robin - hood - robin@example.com
215 // little - dale - dale@example.com
216 // little - dale - dale@example.com
217 // will - dale - will@example.com
218 // will - dale - will@example.com
219 // so 3 pairs for - first + last + mail
220 $this->assertEquals(count($foundDupes), 3, 'Check Individual-Supervised dupe rule for dupesInGroup().');
221
222 // Run dedupe finder as the browser would
223 $_SERVER['REQUEST_METHOD'] = 'GET'; //avoid invalid key error
224 $object = new CRM_Contact_Page_DedupeFind();
225 $object->set('gid', $this->_groupId);
226 $object->set('rgid', $dao->id);
227 $object->set('action', CRM_Core_Action::UPDATE);
228 $object->setEmbedded(TRUE);
229 @$object->run();
230
231 // Retrieve pairs from prev next cache table
232 $select = array('pn.is_selected' => 'is_selected');
233 $cacheKeyString = "merge Individual_{$dao->id}_{$this->_groupId}";
234 $pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
235
236 $this->assertEquals(count($foundDupes), count($pnDupePairs), 'Check number of dupe pairs in prev next cache.');
237
238 // batch merge all dupes
239 $result = CRM_Dedupe_Merger::batchMerge($dao->id, $this->_groupId, 'safe', TRUE, 5, 2);
240 $this->assertEquals(count($result['merged']), 3, 'Check number of merged pairs.');
241
242 // retrieve pairs from prev next cache table
243 $pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
244 $this->assertEquals(count($pnDupePairs), 0, 'Check number of remaining dupe pairs in prev next cache.');
245
246 $this->deleteDupeContacts();
247 }
248
249 /**
250 * The goal of this function is to test that all required tables are returned.
251 */
252 public function testGetCidRefs() {
253 $this->entityCustomGroupWithSingleFieldCreate(__FUNCTION__, 'Contacts');
254 $this->assertEquals(array_merge($this->getStaticCIDRefs(), $this->getHackedInCIDRef()), CRM_Dedupe_Merger::cidRefs());
255 $this->assertEquals(array_merge($this->getCalculatedCIDRefs(), $this->getHackedInCIDRef()), CRM_Dedupe_Merger::cidRefs());
256 }
257
258 /**
259 * Get the list of not-really-cid-refs that are currently hacked in.
260 *
261 * This is hacked into getCIDs function.
262 *
263 * @return array
264 */
265 public function getHackedInCIDRef() {
266 return array(
267 'civicrm_entity_tag' => array(
268 0 => 'entity_id',
269 ),
270 );
271 }
272
273 /**
274 * Test function that gets duplicate pairs.
275 *
276 * It turns out there are 2 code paths retrieving this data so my initial focus is on ensuring
277 * they match.
278 */
279 public function testGetMatches() {
280 $this->setupMatchData();
281 $pairs = CRM_Dedupe_Merger::getDuplicatePairs(
282 1,
283 NULL,
284 TRUE,
285 25,
286 FALSE
287 );
288
289 $this->assertEquals(array(
290 0 => array(
291 'srcID' => $this->contacts[0]['id'],
292 'srcName' => 'Mr. Mickey Mouse II',
293 'dstID' => $this->contacts[1]['id'],
294 'dstName' => 'Mr. Mickey Mouse II',
295 'weight' => 20,
296 'canMerge' => TRUE,
297 ),
298 1 => array(
299 'srcID' => $this->contacts[2]['id'],
300 'srcName' => 'Mr. Minnie Mouse II',
301 'dstID' => $this->contacts[3]['id'],
302 'dstName' => 'Mr. Minnie Mouse II',
303 'weight' => 20,
304 'canMerge' => TRUE,
305 ),
306 ), $pairs);
307 }
308
309 /**
310 * Test function that gets duplicate pairs.
311 *
312 * It turns out there are 2 code paths retrieving this data so my initial focus is on ensuring
313 * they match.
314 */
315 public function testGetMatchesInGroup() {
316 $this->setupMatchData();
317
318 $groupID = $this->groupCreate(array('title' => 'she-mice'));
319
320 $this->callAPISuccess('GroupContact', 'create', array('group_id' => $groupID, 'contact_id' => $this->contacts[3]['id']));
321
322 $pairs = CRM_Dedupe_Merger::getDuplicatePairs(
323 1,
324 $groupID,
325 TRUE,
326 25,
327 FALSE
328 );
329
330 $this->assertEquals(array(
331 0 => array(
332 'srcID' => $this->contacts[3]['id'],
333 'srcName' => 'Mr. Minnie Mouse II',
334 'dstID' => $this->contacts[2]['id'],
335 'dstName' => 'Mr. Minnie Mouse II',
336 'weight' => 20,
337 'canMerge' => TRUE,
338 ),
339 ), $pairs);
340 }
341
342 /**
343 * Set up some contacts for our matching.
344 */
345 public function setupMatchData() {
346 $fixtures = array(
347 array(
348 'first_name' => 'Mickey',
349 'last_name' => 'Mouse',
350 'email' => 'mickey@mouse.com',
351 ),
352 array(
353 'first_name' => 'Mickey',
354 'last_name' => 'Mouse',
355 'email' => 'mickey@mouse.com',
356 ),
357 array(
358 'first_name' => 'Minnie',
359 'last_name' => 'Mouse',
360 'email' => 'mickey@mouse.com',
361 ),
362 array(
363 'first_name' => 'Minnie',
364 'last_name' => 'Mouse',
365 'email' => 'mickey@mouse.com',
366 ),
367 );
368 foreach ($fixtures as $fixture) {
369 $contactID = $this->individualCreate($fixture);
370 $this->contacts[] = array_merge($fixture, array('id' => $contactID));
371 }
372 }
373
374
375 /**
376 * Get the list of tables that refer to the CID.
377 *
378 * This is a statically maintained (in this test list).
379 *
380 * There is also a check against an automated list but having both seems to add extra stability to me. They do
381 * not change often.
382 */
383 public function getStaticCIDRefs() {
384 return array(
385 'civicrm_acl_cache' => array(
386 0 => 'contact_id',
387 ),
388 'civicrm_acl_contact_cache' => array(
389 0 => 'user_id',
390 1 => 'contact_id',
391 ),
392 'civicrm_action_log' => array(
393 0 => 'contact_id',
394 ),
395 'civicrm_activity_contact' => array(
396 0 => 'contact_id',
397 ),
398 'civicrm_address' => array(
399 0 => 'contact_id',
400 ),
401 'civicrm_batch' => array(
402 0 => 'created_id',
403 1 => 'modified_id',
404 ),
405 'civicrm_campaign' => array(
406 0 => 'created_id',
407 1 => 'last_modified_id',
408 ),
409 'civicrm_case_contact' => array(
410 0 => 'contact_id',
411 ),
412 'civicrm_contact' => array(
413 0 => 'primary_contact_id',
414 1 => 'employer_id',
415 ),
416 'civicrm_contribution' => array(
417 0 => 'contact_id',
418 ),
419 'civicrm_contribution_page' => array(
420 0 => 'created_id',
421 ),
422 'civicrm_contribution_recur' => array(
423 0 => 'contact_id',
424 ),
425 'civicrm_contribution_soft' => array(
426 0 => 'contact_id',
427 ),
428 'civicrm_custom_group' => array(
429 0 => 'created_id',
430 ),
431 'civicrm_dashboard_contact' => array(
432 0 => 'contact_id',
433 ),
434 'civicrm_dedupe_exception' => array(
435 0 => 'contact_id1',
436 1 => 'contact_id2',
437 ),
438 'civicrm_domain' => array(
439 0 => 'contact_id',
440 ),
441 'civicrm_email' => array(
442 0 => 'contact_id',
443 ),
444 'civicrm_event' => array(
445 0 => 'created_id',
446 ),
447 'civicrm_event_carts' => array(
448 0 => 'user_id',
449 ),
450 'civicrm_financial_account' => array(
451 0 => 'contact_id',
452 ),
453 'civicrm_financial_item' => array(
454 0 => 'contact_id',
455 ),
456 'civicrm_grant' => array(
457 0 => 'contact_id',
458 ),
459 'civicrm_group' => array(
460 0 => 'created_id',
461 1 => 'modified_id',
462 ),
463 'civicrm_group_contact' => array(
464 0 => 'contact_id',
465 ),
466 'civicrm_group_contact_cache' => array(
467 0 => 'contact_id',
468 ),
469 'civicrm_group_organization' => array(
470 0 => 'organization_id',
471 ),
472 'civicrm_im' => array(
473 0 => 'contact_id',
474 ),
475 'civicrm_log' => array(
476 0 => 'modified_id',
477 ),
478 'civicrm_mailing' => array(
479 0 => 'created_id',
480 1 => 'scheduled_id',
481 2 => 'approver_id',
482 ),
483 'civicrm_mailing_abtest' => array(
484 0 => 'created_id',
485 ),
486 'civicrm_mailing_event_queue' => array(
487 0 => 'contact_id',
488 ),
489 'civicrm_mailing_event_subscribe' => array(
490 0 => 'contact_id',
491 ),
492 'civicrm_mailing_recipients' => array(
493 0 => 'contact_id',
494 ),
495 'civicrm_membership' => array(
496 0 => 'contact_id',
497 ),
498 'civicrm_membership_log' => array(
499 0 => 'modified_id',
500 ),
501 'civicrm_membership_type' => array(
502 0 => 'member_of_contact_id',
503 ),
504 'civicrm_note' => array(
505 0 => 'contact_id',
506 ),
507 'civicrm_openid' => array(
508 0 => 'contact_id',
509 ),
510 'civicrm_participant' => array(
511 0 => 'contact_id',
512 1 => 'transferred_to_contact_id', //CRM-16761
513 ),
514 'civicrm_payment_token' => array(
515 0 => 'contact_id',
516 1 => 'created_id',
517 ),
518 'civicrm_pcp' => array(
519 0 => 'contact_id',
520 ),
521 'civicrm_phone' => array(
522 0 => 'contact_id',
523 ),
524 'civicrm_pledge' => array(
525 0 => 'contact_id',
526 ),
527 'civicrm_print_label' => array(
528 0 => 'created_id',
529 ),
530 'civicrm_relationship' => array(
531 0 => 'contact_id_a',
532 1 => 'contact_id_b',
533 ),
534 'civicrm_report_instance' => array(
535 0 => 'created_id',
536 1 => 'owner_id',
537 ),
538 'civicrm_setting' => array(
539 0 => 'contact_id',
540 1 => 'created_id',
541 ),
542 'civicrm_subscription_history' => array(
543 0 => 'contact_id',
544 ),
545 'civicrm_survey' => array(
546 0 => 'created_id',
547 1 => 'last_modified_id',
548 ),
549 'civicrm_tag' => array(
550 0 => 'created_id',
551 ),
552 'civicrm_uf_group' => array(
553 0 => 'created_id',
554 ),
555 'civicrm_uf_match' => array(
556 0 => 'contact_id',
557 ),
558 'civicrm_value_testgetcidref_1' => array(
559 0 => 'entity_id',
560 ),
561 'civicrm_website' => array(
562 0 => 'contact_id',
563 ),
564 );
565 }
566
567 /**
568 * Get a list of CIDs that is calculated off the schema.
569 *
570 * Note this is an expensive and table locking query. Should be safe in tests though.
571 */
572 public function getCalculatedCIDRefs() {
573 $cidRefs = array();
574 $sql = "
575 SELECT
576 table_name,
577 column_name
578 FROM information_schema.key_column_usage
579 WHERE
580 referenced_table_schema = database() AND
581 referenced_table_name = 'civicrm_contact' AND
582 referenced_column_name = 'id';
583 ";
584 $dao = CRM_Core_DAO::executeQuery($sql);
585 while ($dao->fetch()) {
586 $cidRefs[$dao->table_name][] = $dao->column_name;
587 }
588 // Do specific re-ordering changes to make this the same as the ref validated one.
589 // The above query orders by FK alphabetically.
590 // There might be cleverer ways to do this but it shouldn't change much.
591 $cidRefs['civicrm_contact'][0] = 'primary_contact_id';
592 $cidRefs['civicrm_contact'][1] = 'employer_id';
593 $cidRefs['civicrm_acl_contact_cache'][0] = 'user_id';
594 $cidRefs['civicrm_acl_contact_cache'][1] = 'contact_id';
595 $cidRefs['civicrm_mailing'][0] = 'created_id';
596 $cidRefs['civicrm_mailing'][1] = 'scheduled_id';
597 $cidRefs['civicrm_mailing'][2] = 'approver_id';
598 return $cidRefs;
599 }
600
601 }