When accessing dedupes by the api call or on the dedupe screen it's possible to pass in
a searchLimit param. This works like the group limit in that it limits the number of
contacts for whom a match is sought. For example if there are 2million contacts in the database
and you have a search limit of 0 then it will look for duplicates for all 2 million. (unset
is the same as 0). If you have a search limit of 1000 it will look for matches for the first
1000 contacts that match the criteria (criteria could be the group or other criteria passed in
via the url although the api is the most obvious way to pass in criteria)
Note there is a separate limit (sometimes called batch limit) that limits results from within
the found matches.
To test the searchLimit it is possible to add limit=5 to the url generated in the
url by findContacts. Without this patch changing the limit once the search has been done
will not alter the results as the limit is not part of the cachekey - this patch
changes that.
CRM_Core_Session::singleton()->pushUserContext($browseUrl);
}
- $cacheKey = CRM_Dedupe_Merger::getMergeCacheKeyString($this->_rgid, $gid, json_decode($this->criteria, TRUE));
+ $cacheKey = CRM_Dedupe_Merger::getMergeCacheKeyString($this->_rgid, $gid, json_decode($this->criteria, TRUE), TRUE, $this->limit);
$join = CRM_Dedupe_Merger::getJoinOnDedupeTable();
$where = "de.id IS NULL";
}
if ($this->next && $this->_mergeId) {
- $cacheKey = CRM_Dedupe_Merger::getMergeCacheKeyString($this->_rgid, $this->_gid, json_decode($this->criteria, TRUE));
+ $cacheKey = CRM_Dedupe_Merger::getMergeCacheKeyString($this->_rgid, $this->_gid, json_decode($this->criteria, TRUE), TRUE, $this->limit);
$join = CRM_Dedupe_Merger::getJoinOnDedupeTable();
$where = "de.id IS NULL";
$gid = CRM_Utils_Request::retrieve('gid', 'Positive');
$rgid = CRM_Utils_Request::retrieve('rgid', 'Positive');
+ $limit = CRM_Utils_Request::retrieveValue('limit', 'Positive', 0);
$null = NULL;
$criteria = CRM_Utils_Request::retrieve('criteria', 'Json', $null, FALSE, '{}');
$selected = CRM_Utils_Request::retrieveValue('selected', 'Boolean');
}
$whereClause = $orderByClause = '';
- $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($rgid, $gid, json_decode($criteria, TRUE));
+ $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($rgid, $gid, json_decode($criteria, TRUE), TRUE, $limit);
$searchRows = [];
'reset' => 1,
'rgid' => $rgid,
'gid' => $gid,
- 'limit' => $limit,
+ 'limit' => (int) $limit,
'criteria' => $criteria,
];
$this->assign('urlQuery', CRM_Utils_System::makeQueryString($urlQry));
$this->assign('isSelected', $this->isSelected());
$criteria = json_decode($criteria, TRUE);
- $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($rgid, $gid, $criteria);
+ $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($rgid, $gid, $criteria, TRUE, $limit);
$this->assign('cacheKey', $cacheKeyString);
if ($context == 'search') {
}
elseif ($action & CRM_Core_Action::MAP) {
// do a batch merge if requested
- $result = CRM_Dedupe_Merger::batchMerge($rgid, $gid, 'safe', 75, 2, $criteria);
+ $result = CRM_Dedupe_Merger::batchMerge($rgid, $gid, 'safe', 75, 2, $criteria, TRUE, NULL, $limit);
$skippedCount = CRM_Utils_Request::retrieve('skipped', 'Positive', $this, FALSE, 0);
$skippedCount = $skippedCount + count($result['skipped']);
];
$criteria = json_decode($criteria, TRUE);
- $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($rgid, $gid, $criteria);
+ $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($rgid, $gid, $criteria, TRUE, $limit);
if ($mode == 'aggressive' && !CRM_Core_Permission::check('force merge duplicate contacts')) {
CRM_Core_Session::setStatus(ts('You do not have permission to force merge duplicate contact records'), ts('Permission Denied'), 'error');
for ($i = 1; $i <= ceil($total / self::BATCHLIMIT); $i++) {
$task = new CRM_Queue_Task(
['CRM_Contact_Page_DedupeMerge', 'callBatchMerge'],
- [$rgid, $gid, $mode, self::BATCHLIMIT, $onlyProcessSelected, $criteria],
+ [$rgid, $gid, $mode, self::BATCHLIMIT, $onlyProcessSelected, $criteria, $limit],
"Processed " . $i * self::BATCHLIMIT . " pair of duplicates out of " . $total
);
* @param int $batchLimit
* @param int $isSelected
* @param array $criteria
+ * @param int $searchLimit
*
* @return int
+ *
+ * @throws \CRM_Core_Exception
+ * @throws \CiviCRM_API3_Exception
*/
- public static function callBatchMerge(CRM_Queue_TaskContext $ctx, $rgid, $gid, $mode = 'safe', $batchLimit, $isSelected, $criteria) {
- CRM_Dedupe_Merger::batchMerge($rgid, $gid, $mode, $batchLimit, $isSelected, $criteria, TRUE, FALSE);
+ public static function callBatchMerge(CRM_Queue_TaskContext $ctx, $rgid, $gid, $mode = 'safe', $batchLimit, $isSelected, $criteria, $searchLimit) {
+ CRM_Dedupe_Merger::batchMerge($rgid, $gid, $mode, $batchLimit, $isSelected, $criteria, TRUE, FALSE, $searchLimit);
return CRM_Queue_Task::TASK_SUCCESS;
}
* @throws \CiviCRM_API3_Exception
*/
public static function refillCache($rgid, $gid, $criteria, $checkPermissions, $searchLimit = 0) {
- $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($rgid, $gid, $criteria, $checkPermissions);
+ $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($rgid, $gid, $criteria, $checkPermissions, $searchLimit);
// 1. Clear cache if any
$sql = "DELETE FROM civicrm_prevnext_cache WHERE cachekey LIKE %1";
* If not set explicitly this is calculated but it is preferred that it be set
* per comments on isSelected above.
*
+ * @param int $searchLimit
+ * Limit on number of contacts to search for duplicates for.
+ * This means that if the limit is 1000 then only duplicates for the first 1000 contacts
+ * matching criteria will be found and batchMerged (the number of merges could be less than or greater than 100)
+ *
* @return array|bool
+ *
+ * @throws \CRM_Core_Exception
+ * @throws \CiviCRM_API3_Exception
*/
- public static function batchMerge($rgid, $gid = NULL, $mode = 'safe', $batchLimit = 1, $isSelected = 2, $criteria = [], $checkPermissions = TRUE, $reloadCacheIfEmpty = NULL) {
+ public static function batchMerge($rgid, $gid = NULL, $mode = 'safe', $batchLimit = 1, $isSelected = 2, $criteria = [], $checkPermissions = TRUE, $reloadCacheIfEmpty = NULL, $searchLimit = 0) {
$redirectForPerformance = ($batchLimit > 1) ? TRUE : FALSE;
if (!isset($reloadCacheIfEmpty)) {
$dupePairs = self::getDuplicatePairs($rgid, $gid, $reloadCacheIfEmpty, $batchLimit, $isSelected, ($mode == 'aggressive'), $criteria, $checkPermissions);
$cacheParams = [
- 'cache_key_string' => self::getMergeCacheKeyString($rgid, $gid, $criteria, $checkPermissions),
+ 'cache_key_string' => self::getMergeCacheKeyString($rgid, $gid, $criteria, $checkPermissions, $searchLimit),
// @todo stop passing these parameters in & instead calculate them in the merge function based
// on the 'real' params like $isRespectExclusions $batchLimit and $isSelected.
'join' => self::getJoinOnDedupeTable(),
* @throws \CiviCRM_API3_Exception
*/
public static function getDuplicatePairs($rule_group_id, $group_id, $reloadCacheIfEmpty, $batchLimit, $isSelected, $includeConflicts = TRUE, $criteria = [], $checkPermissions = TRUE, $searchLimit = 0) {
- $dupePairs = self::getCachedDuplicateMatches($rule_group_id, $group_id, $batchLimit, $isSelected, $includeConflicts, $criteria, $checkPermissions);
+ $dupePairs = self::getCachedDuplicateMatches($rule_group_id, $group_id, $batchLimit, $isSelected, $includeConflicts, $criteria, $checkPermissions, $searchLimit);
if (empty($dupePairs) && $reloadCacheIfEmpty) {
// If we haven't found any dupes, probably cache is empty.
// Try filling cache and give another try. We don't need to specify include conflicts here are there will not be any
// until we have done some processing.
CRM_Core_BAO_PrevNextCache::refillCache($rule_group_id, $group_id, $criteria, $checkPermissions, $searchLimit);
- return self::getCachedDuplicateMatches($rule_group_id, $group_id, $batchLimit, $isSelected, FALSE, $criteria, $checkPermissions);
+ return self::getCachedDuplicateMatches($rule_group_id, $group_id, $batchLimit, $isSelected, FALSE, $criteria, $checkPermissions, $searchLimit);
}
return $dupePairs;
}
* @param array $criteria
* Additional criteria to narrow down the merge group.
* Currently we are only supporting the key 'contact' within it.
- *
* @param bool $checkPermissions
* Respect the users permissions.
+ * @param int $searchLimit
+ * Number of contacts to seek dupes for (we need this because if
+ * we change it the results won't be refreshed otherwise. Changing the limit
+ * from 100 to 1000 SHOULD result in a new dedupe search).
*
* @return string
*/
- public static function getMergeCacheKeyString($rule_group_id, $group_id, $criteria = [], $checkPermissions = TRUE) {
+ public static function getMergeCacheKeyString($rule_group_id, $group_id, $criteria, $checkPermissions, $searchLimit) {
$contactType = CRM_Dedupe_BAO_RuleGroup::getContactTypeForRuleGroup($rule_group_id);
$cacheKeyString = "merge_{$contactType}";
$cacheKeyString .= $rule_group_id ? "_{$rule_group_id}" : '_0';
$cacheKeyString .= $group_id ? "_{$group_id}" : '_0';
+ $cacheKeyString .= '_' . (int) $searchLimit;
$cacheKeyString .= !empty($criteria) ? md5(serialize($criteria)) : '_0';
if ($checkPermissions) {
$contactID = CRM_Core_Session::getLoggedInContactID();
* @param bool $includeConflicts
* @param array $criteria
* @param int $checkPermissions
+ * @param int $searchLimit
*
* @return array
*/
- protected static function getCachedDuplicateMatches($rule_group_id, $group_id, $batchLimit, $isSelected, $includeConflicts, $criteria, $checkPermissions) {
+ protected static function getCachedDuplicateMatches($rule_group_id, $group_id, $batchLimit, $isSelected, $includeConflicts, $criteria, $checkPermissions, $searchLimit = 0) {
return CRM_Core_BAO_PrevNextCache::retrieve(
- self::getMergeCacheKeyString($rule_group_id, $group_id, $criteria, $checkPermissions),
+ self::getMergeCacheKeyString($rule_group_id, $group_id, $criteria, $checkPermissions, $searchLimit),
self::getJoinOnDedupeTable(),
self::getWhereString($isSelected),
0, $batchLimit,
$params['rule_group_id'],
CRM_Utils_Array::value('group_id', $params),
CRM_Utils_Array::value('criteria', $params, []),
- CRM_Utils_Array::value('check_permissions', $params, [])
+ !empty($params['check_permissions']),
+ CRM_Utils_Array::value('search_limit', $params, 0)
));
return civicrm_api3_create_success($stats);
}
'title' => ts('Criteria'),
'description' => ts('Dedupe search criteria, as parsable by v3 Contact.get api'),
];
+ $spec['search_limit'] = [
+ 'title' => ts('Number of contacts to look for matches for.'),
+ 'type' => CRM_Utils_Type::T_INT,
+ 'api.default' => (int) Civi::settings()->get('dedupe_default_limit'),
+ ];
}
$gid = CRM_Utils_Array::value('gid', $params);
$mode = CRM_Utils_Array::value('mode', $params, 'safe');
- $result = CRM_Dedupe_Merger::batchMerge($rule_group_id, $gid, $mode, 1, 2, CRM_Utils_Array::value('criteria', $params, []), CRM_Utils_Array::value('check_permissions', $params));
+ $result = CRM_Dedupe_Merger::batchMerge($rule_group_id, $gid, $mode, 1, 2, CRM_Utils_Array::value('criteria', $params, []), CRM_Utils_Array::value('check_permissions', $params), NULL, $params['search_limit']);
return civicrm_api3_create_success($result, $params);
}
'description' => 'let the api decide which contact to retain and which to delete?',
'type' => CRM_Utils_Type::T_BOOLEAN,
];
+ $params['search_limit'] = [
+ 'title' => ts('Number of contacts to look for matches for.'),
+ 'type' => CRM_Utils_Type::T_INT,
+ 'api.default' => (int) Civi::settings()->get('dedupe_default_limit'),
+ ];
+
}
/**
// Retrieve pairs from prev next cache table
$select = ['pn.is_selected' => 'is_selected'];
- $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($dao->id, $this->_groupId);
+ $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($dao->id, $this->_groupId, [], TRUE, 0);
$pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
$this->assertEquals(count($foundDupes), count($pnDupePairs), 'Check number of dupe pairs in prev next cache.');
// Retrieve pairs from prev next cache table
$select = ['pn.is_selected' => 'is_selected'];
- $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($dao->id, $this->_groupId);
+ $cacheKeyString = CRM_Dedupe_Merger::getMergeCacheKeyString($dao->id, $this->_groupId, [], TRUE, 0);
$pnDupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, NULL, NULL, 0, 0, $select);
$this->assertEquals(count($foundDupes), count($pnDupePairs), 'Check number of dupe pairs in prev next cache.');