From bafcc2468c2f7dc899937c64ee56c7f5301a519b Mon Sep 17 00:00:00 2001 From: Eileen McNaughton Date: Wed, 8 Feb 2023 17:56:22 +1300 Subject: [PATCH] Fix performance on deciding which tables to query --- CRM/Core/BAO/SchemaHandler.php | 43 +++++++++++++++++++ CRM/Core/DAO.php | 4 +- CRM/Dedupe/BAO/DedupeRuleGroup.php | 43 ++++++++++--------- .../CRM/Core/BAO/SchemaHandlerTest.php | 19 ++++++++ tests/phpunit/CRM/Dedupe/DedupeFinderTest.php | 2 +- 5 files changed, 89 insertions(+), 22 deletions(-) diff --git a/CRM/Core/BAO/SchemaHandler.php b/CRM/Core/BAO/SchemaHandler.php index a58dd8c482..03f39c2dec 100644 --- a/CRM/Core/BAO/SchemaHandler.php +++ b/CRM/Core/BAO/SchemaHandler.php @@ -906,6 +906,49 @@ MODIFY {$columnName} varchar( $length ) return \Civi::$statics[__CLASS__][__FUNCTION__]; } + /** + * Get estimated number of rows in the given tables. + * + * Note that this query is less precise than SELECT(*) - especially on + * larger tables but performs significantly better. + * See https://dba.stackexchange.com/questions/184685/why-is-count-slow-when-explain-knows-the-answer + * + * @param array $tables + * e.g ['civicrm_contact', 'civicrm_activity'] + * + * @return array + * e.g ['civicrm_contact' => 200000, 'civicrm_activity' => 100000] + */ + public static function getRowCountForTables(array $tables): array { + $cachedResults = Civi::$statics[__CLASS__][__FUNCTION__] ?? []; + // Compile list of tables not already cached. + $tablesToCheck = array_keys(array_diff_key(array_flip($tables), $cachedResults)); + $result = CRM_Core_DAO::executeQuery(' + SELECT TABLE_ROWS as row_count, TABLE_NAME as table_name FROM information_schema.TABLES WHERE + TABLE_NAME IN("' . implode('","', $tablesToCheck) . '") + AND TABLE_SCHEMA = DATABASE()' + ); + while ($result->fetch()) { + $cachedResults[$result->table_name] = (int) $result->row_count; + } + Civi::$statics[__CLASS__][__FUNCTION__] = $cachedResults; + return array_intersect_key($cachedResults, array_fill_keys($tables, TRUE)); + } + + /** + * Get estimated number of rows in the given table. + * + * @see self::getRowCountForTables + * + * @param string $tableName + * + * @return int + * The approximate number of rows in the table. This is also 0 if the table does not exist. + */ + public static function getRowCountForTable(string $tableName): int { + return self::getRowCountForTables([$tableName])[$tableName] ?? 0; + } + /** * Does the database support utf8mb4. * diff --git a/CRM/Core/DAO.php b/CRM/Core/DAO.php index e5e8060e23..b139480626 100644 --- a/CRM/Core/DAO.php +++ b/CRM/Core/DAO.php @@ -1081,9 +1081,11 @@ class CRM_Core_DAO extends DB_DataObject { } /** - * Scans all the tables using a slow query and table name. + * Gets the names of all the tables in the schema. * * @return array + * + * @throws \CRM_Core_Exception */ public static function getTableNames(): array { $dao = CRM_Core_DAO::executeQuery( diff --git a/CRM/Dedupe/BAO/DedupeRuleGroup.php b/CRM/Dedupe/BAO/DedupeRuleGroup.php index 5e6f1d80b7..dc25dc02b4 100644 --- a/CRM/Dedupe/BAO/DedupeRuleGroup.php +++ b/CRM/Dedupe/BAO/DedupeRuleGroup.php @@ -263,7 +263,7 @@ class CRM_Dedupe_BAO_DedupeRuleGroup extends CRM_Dedupe_DAO_DedupeRuleGroup { $query = "{$insertClause} {$query} {$groupByClause} ON DUPLICATE KEY UPDATE weight = weight + VALUES(weight)"; $dao = CRM_Core_DAO::executeQuery($query); - // FIXME: we need to be more acurate with affected rows, especially for insert vs duplicate insert. + // FIXME: we need to be more accurate with affected rows, especially for insert vs duplicate insert. // And that will help optimize further. $affectedRows = $dao->affectedRows(); @@ -337,28 +337,31 @@ class CRM_Dedupe_BAO_DedupeRuleGroup extends CRM_Dedupe_DAO_DedupeRuleGroup { } /** - * sort queries by number of records for the table associated with them. - * @param $tableQueries + * Sort queries by number of records for the table associated with them. + * + * @param array $tableQueries */ - public static function orderByTableCount(&$tableQueries) { - static $tableCount = []; - - $tempArray = []; - foreach ($tableQueries as $key => $query) { - $table = explode(".", $key); - $table = $table[0]; - if (!array_key_exists($table, $tableCount)) { - $query = "SELECT COUNT(*) FROM {$table}"; - $tableCount[$table] = CRM_Core_DAO::singleValueQuery($query); - } - $tempArray[$key] = $tableCount[$table]; - } + public static function orderByTableCount(array &$tableQueries): void { + uksort($tableQueries, 'self::isTableBigger'); + } - asort($tempArray); - foreach ($tempArray as $key => $count) { - $tempArray[$key] = $tableQueries[$key]; + /** + * Is the table extracted from the first string larger than the second string. + * + * @param string $a + * e.g civicrm_contact.first_name + * @param string $b + * e.g civicrm_address.street_address + * + * @return int + */ + private static function isTableBigger(string $a, string $b): int { + $tableA = explode('.', $a)[0]; + $tableB = explode('.', $b)[0]; + if ($tableA === $tableB) { + return 0; } - $tableQueries = $tempArray; + return CRM_Core_BAO_SchemaHandler::getRowCountForTable($tableA) <=> CRM_Core_BAO_SchemaHandler::getRowCountForTable($tableB); } /** diff --git a/tests/phpunit/CRM/Core/BAO/SchemaHandlerTest.php b/tests/phpunit/CRM/Core/BAO/SchemaHandlerTest.php index 78fa3f1bc1..bef335ba8a 100644 --- a/tests/phpunit/CRM/Core/BAO/SchemaHandlerTest.php +++ b/tests/phpunit/CRM/Core/BAO/SchemaHandlerTest.php @@ -132,6 +132,25 @@ class CRM_Core_BAO_SchemaHandlerTest extends CiviUnitTestCase { return $columns; } + /** + * Test the drop index if exists function for a non-existent index. + * + * @throws \CRM_Core_Exception + */ + public function testGetRowCountForTable(): void { + // Hopefully running ANALYZE TABLE will consistently update the 'approximate' values + // so we can test them. + CRM_Core_DAO::singleValueQuery('ANALYZE TABLE civicrm_domain'); + CRM_Core_DAO::singleValueQuery('ANALYZE TABLE civicrm_worldregion'); + CRM_Core_DAO::singleValueQuery('ANALYZE TABLE civicrm_acl'); + $this->assertEquals([ + 'civicrm_worldregion' => 6, + 'civicrm_acl' => 0, + 'civicrm_domain' => 2, + ], CRM_Core_BAO_SchemaHandler::getRowCountForTables(['civicrm_domain', 'civicrm_acl', 'random_name', 'civicrm_worldregion'])); + $this->assertEquals(2, CRM_Core_BAO_SchemaHandler::getRowCountForTable('civicrm_domain')); + } + /** * @param string $tableName * @param string $columnName diff --git a/tests/phpunit/CRM/Dedupe/DedupeFinderTest.php b/tests/phpunit/CRM/Dedupe/DedupeFinderTest.php index 73a6743296..9402348f87 100644 --- a/tests/phpunit/CRM/Dedupe/DedupeFinderTest.php +++ b/tests/phpunit/CRM/Dedupe/DedupeFinderTest.php @@ -42,7 +42,7 @@ class CRM_Dedupe_DedupeFinderTest extends CiviUnitTestCase { /** * Test the unsupervised dedupe rule against a group. * - * @throws \Exception + * @throws \CRM_Core_Exception */ public function testUnsupervisedDupes(): void { // make dupe checks based on following contact sets: -- 2.25.1