From 997a03fe8fd367ec54b99aaf08012075fe8b8206 Mon Sep 17 00:00:00 2001 From: eileen Date: Mon, 2 Sep 2019 11:34:02 +1200 Subject: [PATCH] Incorporate searchLimit in dedupe cacheKey When accessing dedupes by the api call or on the dedupe screen it's possible to pass in a searchLimit param. This works like the group limit in that it limits the number of contacts for whom a match is sought. For example if there are 2million contacts in the database and you have a search limit of 0 then it will look for duplicates for all 2 million. (unset is the same as 0). If you have a search limit of 1000 it will look for matches for the first 1000 contacts that match the criteria (criteria could be the group or other criteria passed in via the url although the api is the most obvious way to pass in criteria) Note there is a separate limit (sometimes called batch limit) that limits results from within the found matches. To test the searchLimit it is possible to add limit=5 to the url generated in the url by findContacts. Without this patch changing the limit once the search has been done will not alter the results as the limit is not part of the cachekey - this patch changes that. --- CRM/Contact/Form/Merge.php | 4 ++-- CRM/Contact/Page/AJAX.php | 3 ++- CRM/Contact/Page/DedupeFind.php | 6 ++--- CRM/Contact/Page/DedupeMerge.php | 12 ++++++---- CRM/Core/BAO/PrevNextCache.php | 2 +- CRM/Dedupe/Merger.php | 29 ++++++++++++++++++------- api/v3/Dedupe.php | 8 ++++++- api/v3/Job.php | 8 ++++++- tests/phpunit/CRM/Dedupe/MergerTest.php | 4 ++-- 9 files changed, 53 insertions(+), 23 deletions(-) diff --git a/CRM/Contact/Form/Merge.php b/CRM/Contact/Form/Merge.php index 91f282f6bb..da804021ea 100644 --- a/CRM/Contact/Form/Merge.php +++ b/CRM/Contact/Form/Merge.php @@ -114,7 +114,7 @@ class CRM_Contact_Form_Merge extends CRM_Core_Form { CRM_Core_Session::singleton()->pushUserContext($browseUrl); } - $cacheKey = CRM_Dedupe_Merger::getMergeCacheKeyString($this->_rgid, $gid, json_decode($this->criteria, TRUE)); + $cacheKey = CRM_Dedupe_Merger::getMergeCacheKeyString($this->_rgid, $gid, json_decode($this->criteria, TRUE), TRUE, $this->limit); $join = CRM_Dedupe_Merger::getJoinOnDedupeTable(); $where = "de.id IS NULL"; @@ -331,7 +331,7 @@ class CRM_Contact_Form_Merge extends CRM_Core_Form { } if ($this->next && $this->_mergeId) { - $cacheKey = CRM_Dedupe_Merger::getMergeCacheKeyString($this->_rgid, $this->_gid, json_decode($this->criteria, TRUE)); + $cacheKey = CRM_Dedupe_Merger::getMergeCacheKeyString($this->_rgid, $this->_gid, json_decode($this->criteria, TRUE), TRUE, $this->limit); $join = CRM_Dedupe_Merger::getJoinOnDedupeTable(); $where = "de.id IS NULL"; diff --git a/CRM/Contact/Page/AJAX.php b/CRM/Contact/Page/AJAX.php index 7dcedcc8cd..3ac0326cf7 100644 --- a/CRM/Contact/Page/AJAX.php +++ b/CRM/Contact/Page/AJAX.php @@ -638,6 +638,7 @@ LIMIT {$offset}, {$rowCount} $gid = CRM_Utils_Request::retrieve('gid', 'Positive'); $rgid = CRM_Utils_Request::retrieve('rgid', 'Positive'); + $limit = CRM_Utils_Request::retrieveValue('limit', 'Positive', 0); $null = NULL; $criteria = CRM_Utils_Request::retrieve('criteria', 'Json', $null, FALSE, '{}'); $selected = CRM_Utils_Request::retrieveValue('selected', 'Boolean'); @@ -646,7 +647,7 @@ LIMIT {$offset}, {$rowCount} } $whereClause = $orderByClause = ''; - $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($rgid, $gid, json_decode($criteria, TRUE)); + $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($rgid, $gid, json_decode($criteria, TRUE), TRUE, $limit); $searchRows = []; diff --git a/CRM/Contact/Page/DedupeFind.php b/CRM/Contact/Page/DedupeFind.php index 8b22a7140d..b7a22ea666 100644 --- a/CRM/Contact/Page/DedupeFind.php +++ b/CRM/Contact/Page/DedupeFind.php @@ -105,13 +105,13 @@ class CRM_Contact_Page_DedupeFind extends CRM_Core_Page_Basic { 'reset' => 1, 'rgid' => $rgid, 'gid' => $gid, - 'limit' => $limit, + 'limit' => (int) $limit, 'criteria' => $criteria, ]; $this->assign('urlQuery', CRM_Utils_System::makeQueryString($urlQry)); $this->assign('isSelected', $this->isSelected()); $criteria = json_decode($criteria, TRUE); - $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($rgid, $gid, $criteria); + $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($rgid, $gid, $criteria, TRUE, $limit); $this->assign('cacheKey', $cacheKeyString); if ($context == 'search') { @@ -129,7 +129,7 @@ class CRM_Contact_Page_DedupeFind extends CRM_Core_Page_Basic { } elseif ($action & CRM_Core_Action::MAP) { // do a batch merge if requested - $result = CRM_Dedupe_Merger::batchMerge($rgid, $gid, 'safe', 75, 2, $criteria); + $result = CRM_Dedupe_Merger::batchMerge($rgid, $gid, 'safe', 75, 2, $criteria, TRUE, NULL, $limit); $skippedCount = CRM_Utils_Request::retrieve('skipped', 'Positive', $this, FALSE, 0); $skippedCount = $skippedCount + count($result['skipped']); diff --git a/CRM/Contact/Page/DedupeMerge.php b/CRM/Contact/Page/DedupeMerge.php index 1dddf56194..348f65b1d8 100644 --- a/CRM/Contact/Page/DedupeMerge.php +++ b/CRM/Contact/Page/DedupeMerge.php @@ -70,7 +70,7 @@ class CRM_Contact_Page_DedupeMerge extends CRM_Core_Page { ]; $criteria = json_decode($criteria, TRUE); - $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($rgid, $gid, $criteria); + $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($rgid, $gid, $criteria, TRUE, $limit); if ($mode == 'aggressive' && !CRM_Core_Permission::check('force merge duplicate contacts')) { CRM_Core_Session::setStatus(ts('You do not have permission to force merge duplicate contact records'), ts('Permission Denied'), 'error'); @@ -98,7 +98,7 @@ class CRM_Contact_Page_DedupeMerge extends CRM_Core_Page { for ($i = 1; $i <= ceil($total / self::BATCHLIMIT); $i++) { $task = new CRM_Queue_Task( ['CRM_Contact_Page_DedupeMerge', 'callBatchMerge'], - [$rgid, $gid, $mode, self::BATCHLIMIT, $onlyProcessSelected, $criteria], + [$rgid, $gid, $mode, self::BATCHLIMIT, $onlyProcessSelected, $criteria, $limit], "Processed " . $i * self::BATCHLIMIT . " pair of duplicates out of " . $total ); @@ -132,11 +132,15 @@ class CRM_Contact_Page_DedupeMerge extends CRM_Core_Page { * @param int $batchLimit * @param int $isSelected * @param array $criteria + * @param int $searchLimit * * @return int + * + * @throws \CRM_Core_Exception + * @throws \CiviCRM_API3_Exception */ - public static function callBatchMerge(CRM_Queue_TaskContext $ctx, $rgid, $gid, $mode = 'safe', $batchLimit, $isSelected, $criteria) { - CRM_Dedupe_Merger::batchMerge($rgid, $gid, $mode, $batchLimit, $isSelected, $criteria, TRUE, FALSE); + public static function callBatchMerge(CRM_Queue_TaskContext $ctx, $rgid, $gid, $mode = 'safe', $batchLimit, $isSelected, $criteria, $searchLimit) { + CRM_Dedupe_Merger::batchMerge($rgid, $gid, $mode, $batchLimit, $isSelected, $criteria, TRUE, FALSE, $searchLimit); return CRM_Queue_Task::TASK_SUCCESS; } diff --git a/CRM/Core/BAO/PrevNextCache.php b/CRM/Core/BAO/PrevNextCache.php index bcc8e650f8..20b9fcfe48 100644 --- a/CRM/Core/BAO/PrevNextCache.php +++ b/CRM/Core/BAO/PrevNextCache.php @@ -429,7 +429,7 @@ WHERE (pn.cachekey $op %1 OR pn.cachekey $op %2) * @throws \CiviCRM_API3_Exception */ public static function refillCache($rgid, $gid, $criteria, $checkPermissions, $searchLimit = 0) { - $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($rgid, $gid, $criteria, $checkPermissions); + $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($rgid, $gid, $criteria, $checkPermissions, $searchLimit); // 1. Clear cache if any $sql = "DELETE FROM civicrm_prevnext_cache WHERE cachekey LIKE %1"; diff --git a/CRM/Dedupe/Merger.php b/CRM/Dedupe/Merger.php index 9e501f35f6..a35ae67e79 100644 --- a/CRM/Dedupe/Merger.php +++ b/CRM/Dedupe/Merger.php @@ -691,9 +691,17 @@ INNER JOIN civicrm_membership membership2 ON membership1.membership_type_id = m * If not set explicitly this is calculated but it is preferred that it be set * per comments on isSelected above. * + * @param int $searchLimit + * Limit on number of contacts to search for duplicates for. + * This means that if the limit is 1000 then only duplicates for the first 1000 contacts + * matching criteria will be found and batchMerged (the number of merges could be less than or greater than 100) + * * @return array|bool + * + * @throws \CRM_Core_Exception + * @throws \CiviCRM_API3_Exception */ - public static function batchMerge($rgid, $gid = NULL, $mode = 'safe', $batchLimit = 1, $isSelected = 2, $criteria = [], $checkPermissions = TRUE, $reloadCacheIfEmpty = NULL) { + public static function batchMerge($rgid, $gid = NULL, $mode = 'safe', $batchLimit = 1, $isSelected = 2, $criteria = [], $checkPermissions = TRUE, $reloadCacheIfEmpty = NULL, $searchLimit = 0) { $redirectForPerformance = ($batchLimit > 1) ? TRUE : FALSE; if (!isset($reloadCacheIfEmpty)) { @@ -706,7 +714,7 @@ INNER JOIN civicrm_membership membership2 ON membership1.membership_type_id = m $dupePairs = self::getDuplicatePairs($rgid, $gid, $reloadCacheIfEmpty, $batchLimit, $isSelected, ($mode == 'aggressive'), $criteria, $checkPermissions); $cacheParams = [ - 'cache_key_string' => self::getMergeCacheKeyString($rgid, $gid, $criteria, $checkPermissions), + 'cache_key_string' => self::getMergeCacheKeyString($rgid, $gid, $criteria, $checkPermissions, $searchLimit), // @todo stop passing these parameters in & instead calculate them in the merge function based // on the 'real' params like $isRespectExclusions $batchLimit and $isSelected. 'join' => self::getJoinOnDedupeTable(), @@ -1840,13 +1848,13 @@ INNER JOIN civicrm_membership membership2 ON membership1.membership_type_id = m * @throws \CiviCRM_API3_Exception */ public static function getDuplicatePairs($rule_group_id, $group_id, $reloadCacheIfEmpty, $batchLimit, $isSelected, $includeConflicts = TRUE, $criteria = [], $checkPermissions = TRUE, $searchLimit = 0) { - $dupePairs = self::getCachedDuplicateMatches($rule_group_id, $group_id, $batchLimit, $isSelected, $includeConflicts, $criteria, $checkPermissions); + $dupePairs = self::getCachedDuplicateMatches($rule_group_id, $group_id, $batchLimit, $isSelected, $includeConflicts, $criteria, $checkPermissions, $searchLimit); if (empty($dupePairs) && $reloadCacheIfEmpty) { // If we haven't found any dupes, probably cache is empty. // Try filling cache and give another try. We don't need to specify include conflicts here are there will not be any // until we have done some processing. CRM_Core_BAO_PrevNextCache::refillCache($rule_group_id, $group_id, $criteria, $checkPermissions, $searchLimit); - return self::getCachedDuplicateMatches($rule_group_id, $group_id, $batchLimit, $isSelected, FALSE, $criteria, $checkPermissions); + return self::getCachedDuplicateMatches($rule_group_id, $group_id, $batchLimit, $isSelected, FALSE, $criteria, $checkPermissions, $searchLimit); } return $dupePairs; } @@ -1859,17 +1867,21 @@ INNER JOIN civicrm_membership membership2 ON membership1.membership_type_id = m * @param array $criteria * Additional criteria to narrow down the merge group. * Currently we are only supporting the key 'contact' within it. - * * @param bool $checkPermissions * Respect the users permissions. + * @param int $searchLimit + * Number of contacts to seek dupes for (we need this because if + * we change it the results won't be refreshed otherwise. Changing the limit + * from 100 to 1000 SHOULD result in a new dedupe search). * * @return string */ - public static function getMergeCacheKeyString($rule_group_id, $group_id, $criteria = [], $checkPermissions = TRUE) { + public static function getMergeCacheKeyString($rule_group_id, $group_id, $criteria, $checkPermissions, $searchLimit) { $contactType = CRM_Dedupe_BAO_RuleGroup::getContactTypeForRuleGroup($rule_group_id); $cacheKeyString = "merge_{$contactType}"; $cacheKeyString .= $rule_group_id ? "_{$rule_group_id}" : '_0'; $cacheKeyString .= $group_id ? "_{$group_id}" : '_0'; + $cacheKeyString .= '_' . (int) $searchLimit; $cacheKeyString .= !empty($criteria) ? md5(serialize($criteria)) : '_0'; if ($checkPermissions) { $contactID = CRM_Core_Session::getLoggedInContactID(); @@ -2487,12 +2499,13 @@ INNER JOIN civicrm_membership membership2 ON membership1.membership_type_id = m * @param bool $includeConflicts * @param array $criteria * @param int $checkPermissions + * @param int $searchLimit * * @return array */ - protected static function getCachedDuplicateMatches($rule_group_id, $group_id, $batchLimit, $isSelected, $includeConflicts, $criteria, $checkPermissions) { + protected static function getCachedDuplicateMatches($rule_group_id, $group_id, $batchLimit, $isSelected, $includeConflicts, $criteria, $checkPermissions, $searchLimit = 0) { return CRM_Core_BAO_PrevNextCache::retrieve( - self::getMergeCacheKeyString($rule_group_id, $group_id, $criteria, $checkPermissions), + self::getMergeCacheKeyString($rule_group_id, $group_id, $criteria, $checkPermissions, $searchLimit), self::getJoinOnDedupeTable(), self::getWhereString($isSelected), 0, $batchLimit, diff --git a/api/v3/Dedupe.php b/api/v3/Dedupe.php index 0099206918..58041ea3f9 100644 --- a/api/v3/Dedupe.php +++ b/api/v3/Dedupe.php @@ -107,7 +107,8 @@ function civicrm_api3_dedupe_getstatistics($params) { $params['rule_group_id'], CRM_Utils_Array::value('group_id', $params), CRM_Utils_Array::value('criteria', $params, []), - CRM_Utils_Array::value('check_permissions', $params, []) + !empty($params['check_permissions']), + CRM_Utils_Array::value('search_limit', $params, 0) )); return civicrm_api3_create_success($stats); } @@ -135,6 +136,11 @@ function _civicrm_api3_dedupe_getstatistics_spec(&$params) { 'title' => ts('Criteria'), 'description' => ts('Dedupe search criteria, as parsable by v3 Contact.get api'), ]; + $spec['search_limit'] = [ + 'title' => ts('Number of contacts to look for matches for.'), + 'type' => CRM_Utils_Type::T_INT, + 'api.default' => (int) Civi::settings()->get('dedupe_default_limit'), + ]; } diff --git a/api/v3/Job.php b/api/v3/Job.php index 86cfa253d2..4a4b0eca75 100644 --- a/api/v3/Job.php +++ b/api/v3/Job.php @@ -541,7 +541,7 @@ function civicrm_api3_job_process_batch_merge($params) { $gid = CRM_Utils_Array::value('gid', $params); $mode = CRM_Utils_Array::value('mode', $params, 'safe'); - $result = CRM_Dedupe_Merger::batchMerge($rule_group_id, $gid, $mode, 1, 2, CRM_Utils_Array::value('criteria', $params, []), CRM_Utils_Array::value('check_permissions', $params)); + $result = CRM_Dedupe_Merger::batchMerge($rule_group_id, $gid, $mode, 1, 2, CRM_Utils_Array::value('criteria', $params, []), CRM_Utils_Array::value('check_permissions', $params), NULL, $params['search_limit']); return civicrm_api3_create_success($result, $params); } @@ -571,6 +571,12 @@ function _civicrm_api3_job_process_batch_merge_spec(&$params) { 'description' => 'let the api decide which contact to retain and which to delete?', 'type' => CRM_Utils_Type::T_BOOLEAN, ]; + $params['search_limit'] = [ + 'title' => ts('Number of contacts to look for matches for.'), + 'type' => CRM_Utils_Type::T_INT, + 'api.default' => (int) Civi::settings()->get('dedupe_default_limit'), + ]; + } /** diff --git a/tests/phpunit/CRM/Dedupe/MergerTest.php b/tests/phpunit/CRM/Dedupe/MergerTest.php index a5e3c8c67a..0798109eae 100644 --- a/tests/phpunit/CRM/Dedupe/MergerTest.php +++ b/tests/phpunit/CRM/Dedupe/MergerTest.php @@ -175,7 +175,7 @@ class CRM_Dedupe_MergerTest extends CiviUnitTestCase { // Retrieve pairs from prev next cache table $select = ['pn.is_selected' => 'is_selected']; - $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($dao->id, $this->_groupId); + $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($dao->id, $this->_groupId, [], TRUE, 0); $pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select); $this->assertEquals(count($foundDupes), count($pnDupePairs), 'Check number of dupe pairs in prev next cache.'); @@ -245,7 +245,7 @@ class CRM_Dedupe_MergerTest extends CiviUnitTestCase { // Retrieve pairs from prev next cache table $select = ['pn.is_selected' => 'is_selected']; - $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($dao->id, $this->_groupId); + $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($dao->id, $this->_groupId, [], TRUE, 0); $pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select); $this->assertEquals(count($foundDupes), count($pnDupePairs), 'Check number of dupe pairs in prev next cache.'); -- 2.25.1