From 66eceb0bce79bde8c3f10b278050ded75e5c4f98 Mon Sep 17 00:00:00 2001 From: eileen Date: Fri, 13 May 2016 16:17:05 +1200 Subject: [PATCH] CRM-18517 remove code loop on batch merge process By constantly reloading conflicts in batch mode we are also reloading conflicted entries. If we process this list until it is empty it will never be empty --- CRM/Core/BAO/PrevNextCache.php | 43 +++++++++++++++++++++++++--------- CRM/Dedupe/Merger.php | 22 +++++++++++------ 2 files changed, 47 insertions(+), 18 deletions(-) diff --git a/CRM/Core/BAO/PrevNextCache.php b/CRM/Core/BAO/PrevNextCache.php index 8231dea31f..f76ec2f943 100644 --- a/CRM/Core/BAO/PrevNextCache.php +++ b/CRM/Core/BAO/PrevNextCache.php @@ -198,18 +198,30 @@ WHERE cacheKey = %3 AND /** * Retrieve from prev-next cache. * + * This function is used from a variety of merge related functions, although + * it would probably be good to converge on calling CRM_Dedupe_Merger::getDuplicatePairs. + * + * We seem to currently be storing stats in this table too & they might make more sense in + * the main cache table. + * * @param string $cacheKey * @param string $join - * @param string $where + * @param string $whereClause * @param int $offset * @param int $rowCount + * @param array $select + * @param string $orderByClause + * @param bool $includeConflicts + * Should we return rows that have already been idenfified as having a conflict. + * When this is TRUE you should be careful you do not set up a loop. * * @param array $select * * @return array */ - public static function retrieve($cacheKey, $join = NULL, $where = NULL, $offset = 0, $rowCount = 0, $select = array()) { + public static function retrieve($cacheKey, $join = NULL, $whereClause = NULL, $offset = 0, $rowCount = 0, $select = array(), $orderByClause = '', $includeConflicts = TRUE) { $selectString = 'pn.*'; + if (!empty($select)) { $aliasArray = array(); foreach ($select as $column => $alias) { @@ -217,20 +229,29 @@ WHERE cacheKey = %3 AND } $selectString .= " , " . implode(' , ', $aliasArray); } - $query = " -SELECT SQL_CALC_FOUND_ROWS {$selectString} -FROM civicrm_prevnext_cache pn - {$join} -WHERE (pn.cacheKey = %1 OR pn.cacheKey = %2) -"; + $params = array( 1 => array($cacheKey, 'String'), - 2 => array("{$cacheKey}_conflicts", 'String'), ); - if ($where) { - $query .= " AND {$where}"; + if (!empty($whereClause)) { + $whereClause = " AND " . $whereClause; + } + if ($includeConflicts) { + $where = ' WHERE (pn.cacheKey = %1 OR pn.cacheKey = %2)' . $whereClause; + $params[2] = array("{$cacheKey}_conflicts", 'String'); } + else { + $where = ' WHERE (pn.cacheKey = %1)' . $whereClause; + } + + $query = " +SELECT SQL_CALC_FOUND_ROWS {$selectString} +FROM civicrm_prevnext_cache pn + {$join} + $where + $orderByClause +"; if ($rowCount) { $offset = CRM_Utils_Type::escape($offset, 'Int'); diff --git a/CRM/Dedupe/Merger.php b/CRM/Dedupe/Merger.php index e1f1bd7343..a65babad76 100644 --- a/CRM/Dedupe/Merger.php +++ b/CRM/Dedupe/Merger.php @@ -595,7 +595,7 @@ INNER JOIN civicrm_membership membership2 ON membership1.membership_type_id = m public static function batchMerge($rgid, $gid = NULL, $mode = 'safe', $autoFlip = TRUE, $batchLimit = 1, $isSelected = 2) { $redirectForPerformance = ($batchLimit > 1) ? TRUE : FALSE; $reloadCacheIfEmpty = (!$redirectForPerformance && $isSelected == 2); - $dupePairs = self::getDuplicatePairs($rgid, $gid, $reloadCacheIfEmpty, $batchLimit, $isSelected); + $dupePairs = self::getDuplicatePairs($rgid, $gid, $reloadCacheIfEmpty, $batchLimit, $isSelected, '', FALSE); $cacheParams = array( 'cache_key_string' => self::getMergeCacheKeyString($rgid, $gid), @@ -790,7 +790,12 @@ INNER JOIN civicrm_membership membership2 ON membership1.membership_type_id = m // @todo call getDuplicatePairs. $dupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, $cacheParams['join'], - $cacheParams['where'] + $cacheParams['where'], + 0, + 0, + array(), + '', + FALSE ); } else { @@ -1970,20 +1975,23 @@ INNER JOIN civicrm_membership membership2 ON membership1.membership_type_id = m * @param bool $reloadCacheIfEmpty * @param int $batchLimit * @param bool $isSelected + * @param array $orderByClause + * @param bool $includeConflicts * * @return array * Array of matches meeting the criteria. */ - public static function getDuplicatePairs($rule_group_id, $group_id, $reloadCacheIfEmpty, $batchLimit, $isSelected) { + public static function getDuplicatePairs($rule_group_id, $group_id, $reloadCacheIfEmpty, $batchLimit, $isSelected, $orderByClause = '', $includeConflicts = TRUE) { $where = self::getWhereString($batchLimit, $isSelected); - $cacheKeyString = self::getMergeCacheKeyString($rule_group_id, $group_id); + $cacheKeyString = self::getMergeCacheKeyString($rule_group_id, $group_id, $includeConflicts); $join = self::getJoinOnDedupeTable(); - $dupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, $join, $where); + $dupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, $join, $where, 0, 0, array(), $orderByClause, $includeConflicts); if (empty($dupePairs) && $reloadCacheIfEmpty) { // If we haven't found any dupes, probably cache is empty. - // Try filling cache and give another try. + // Try filling cache and give another try. We don't need to specify include conflicts here are there will not be any + // until we have done some processing. CRM_Core_BAO_PrevNextCache::refillCache($rule_group_id, $group_id, $cacheKeyString); - $dupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, $join, $where); + $dupePairs = CRM_Core_BAO_PrevNextCache::retrieve($cacheKeyString, $join, $where, 0, 0, array(), $orderByClause, $includeConflicts); return $dupePairs; } return $dupePairs; -- 2.25.1