From 2988f5c73d68678aa9010ebe5829bb8b5f06ce35 Mon Sep 17 00:00:00 2001 From: eileen Date: Wed, 27 Apr 2016 13:02:16 +1200 Subject: [PATCH] CRM-18443 add unit test & refactor code into smaller function On starting to dig into the dedupe code I find that there are 2 different paths, one that is used by the batch merge and one used in other cases. I'm hoping to rationalise that & add testing with the focus being on small refactors + added tests. I'm less sure about adding an api as I work into the internals here.... Replact duplicate code with a function to calculate the cacheKey This part of investigation / cleanup for CRM-18443 --- CRM/Contact/Page/AJAX.php | 2 +- CRM/Dedupe/Merger.php | 74 ++++++++++++++++++------- tests/phpunit/CRM/Dedupe/MergerTest.php | 53 ++++++++++++++++++ 3 files changed, 108 insertions(+), 21 deletions(-) diff --git a/CRM/Contact/Page/AJAX.php b/CRM/Contact/Page/AJAX.php index 83786c7f09..0750017da1 100644 --- a/CRM/Contact/Page/AJAX.php +++ b/CRM/Contact/Page/AJAX.php @@ -726,7 +726,7 @@ LIMIT {$offset}, {$rowCount} if ($selected) { $whereClause .= ' AND pn.is_selected = 1'; } - $join .= " LEFT JOIN civicrm_dedupe_exception de ON ( pn.entity_id1 = de.contact_id1 AND pn.entity_id2 = de.contact_id2 )"; + $join .= CRM_Dedupe_Merger::getJoinOnDedupeTable(); $select = array( 'cc1.contact_type' => 'src_contact_type', diff --git a/CRM/Dedupe/Merger.php b/CRM/Dedupe/Merger.php index 99c5e65f03..3558ccd612 100644 --- a/CRM/Dedupe/Merger.php +++ b/CRM/Dedupe/Merger.php @@ -593,29 +593,16 @@ INNER JOIN civicrm_membership membership2 ON membership1.membership_type_id = m * @return array|bool */ public static function batchMerge($rgid, $gid = NULL, $mode = 'safe', $autoFlip = TRUE, $batchLimit = 1, $isSelected = 2) { - $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($rgid, $gid); - $join = CRM_Dedupe_Merger::getJoinOnDedupeTable(); - - $where = "de.id IS NULL"; - if ($isSelected === 0 || $isSelected === 1) { - $where .= " AND pn.is_selected = {$isSelected}"; - }// else consider all dupe pairs - $where .= " LIMIT {$batchLimit}"; - $redirectForPerformance = ($batchLimit > 1) ? TRUE : FALSE; - - $dupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, $join, $where); - if (empty($dupePairs) && !$redirectForPerformance && $isSelected == 2) { - // If we haven't found any dupes, probably cache is empty. - // Try filling cache and give another try. - CRM_Core_BAO_PrevNextCache::refillCache($rgid, $gid, $cacheKeyString); - $dupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, $join, $where); - } + $reloadCacheIfEmpty = (!$redirectForPerformance && $isSelected == 2); + $dupePairs = self::getDuplicatePairs($rgid, $gid, $reloadCacheIfEmpty, $batchLimit, $isSelected); $cacheParams = array( - 'cache_key_string' => $cacheKeyString, - 'join' => $join, - 'where' => $where, + 'cache_key_string' => self::getMergeCacheKeyString($rgid, $gid), + // @todo stop passing these parameters in & instead calculate them in the merge function based + // on the 'real' params like $isRespectExclusions $batchLimit and $isSelected. + 'join' => self::getJoinOnDedupeTable(), + 'where' => self::getWhereString($batchLimit, $isSelected), ); return CRM_Dedupe_Merger::merge($dupePairs, $cacheParams, $mode, $autoFlip, $redirectForPerformance); } @@ -635,6 +622,25 @@ INNER JOIN civicrm_membership membership2 ON membership1.membership_type_id = m "; } + /** + * Get where string for dedupe join. + * + * @param int $batchLimit + * @param bool $isSelected + * + * @return string + */ + protected static function getWhereString($batchLimit, $isSelected) { + $where = "de.id IS NULL"; + if ($isSelected === 0 || $isSelected === 1) { + $where .= " AND pn.is_selected = {$isSelected}"; + } + // else consider all dupe pairs + // @todo Adding limit to Where??!! + $where .= " LIMIT {$batchLimit}"; + return $where; + } + public static function updateMergeStats($cacheKeyString, $result = array()) { // gather latest stats $merged = count($result['merged']); @@ -781,6 +787,7 @@ INNER JOIN civicrm_membership membership2 ON membership1.membership_type_id = m if ($cacheKeyString && !$redirectForPerformance) { // retrieve next pair of dupes + // @todo call getDuplicatePairs. $dupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, $cacheParams['join'], $cacheParams['where'] @@ -1955,6 +1962,33 @@ INNER JOIN civicrm_membership membership2 ON membership1.membership_type_id = m } } + /** + * Get Duplicate Pairs based on a rule for a group. + * + * @param int $rule_group_id + * @param int $group_id + * @param bool $reloadCacheIfEmpty + * @param int $batchLimit + * @param bool $isSelected + * + * @return array + * Array of matches meeting the criteria. + */ + public static function getDuplicatePairs($rule_group_id, $group_id, $reloadCacheIfEmpty, $batchLimit, $isSelected) { + $where = self::getWhereString($batchLimit, $isSelected); + $cacheKeyString = self::getMergeCacheKeyString($rule_group_id, $group_id); + $join = self::getJoinOnDedupeTable(); + $dupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, $join, $where); + if (empty($dupePairs) && $reloadCacheIfEmpty) { + // If we haven't found any dupes, probably cache is empty. + // Try filling cache and give another try. + CRM_Core_BAO_PrevNextCache::refillCache($rule_group_id, $group_id, $cacheKeyString); + $dupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, $join, $where); + return $dupePairs; + } + return $dupePairs; + } + /** * Get the cache key string for the merge action. * diff --git a/tests/phpunit/CRM/Dedupe/MergerTest.php b/tests/phpunit/CRM/Dedupe/MergerTest.php index 069743f785..185ddec7dd 100644 --- a/tests/phpunit/CRM/Dedupe/MergerTest.php +++ b/tests/phpunit/CRM/Dedupe/MergerTest.php @@ -263,6 +263,59 @@ class CRM_Dedupe_MergerTest extends CiviUnitTestCase { ); } + /** + * Test function that gets duplicate pairs. + * + * It turns out there are 2 code paths retrieving this data so my initial focus is on ensuring + * they match. + */ + public function testGetMatches() { + $this->setupMatchData(); + $pairs = CRM_Dedupe_Merger::getDuplicatePairs( + 1, + NULL, + TRUE, + 25, + FALSE + ); + + $this->assertEquals(array( + 0 => array( + 'srcID' => $this->contacts[0]['id'], + 'srcName' => 'Mr. Mickey Mouse II', + 'dstID' => $this->contacts[1]['id'], + 'dstName' => 'Mr. Mickey Mouse II', + 'weight' => 20, + 'canMerge' => TRUE, + ), + ), $pairs); + } + + public function setupMatchData() { + $fixtures = array( + array( + 'first_name' => 'Mickey', + 'last_name' => 'Mouse', + 'email' => 'mickey@mouse.com', + ), + array( + 'first_name' => 'Mickey', + 'last_name' => 'Mouse', + 'email' => 'mickey@mouse.com', + ), + array( + 'first_name' => 'Minnie', + 'last_name' => 'Mouse', + 'email' => 'mickey@mouse.com', + ), + ); + foreach ($fixtures as $fixture) { + $contactID = $this->individualCreate($fixture); + $this->contacts[] = array_merge($fixture, array('id' => $contactID)); + } + } + + /** * Get the list of tables that refer to the CID. * -- 2.25.1