From ba89bdbde1aa7f21badab003b89b2cb0052fd175 Mon Sep 17 00:00:00 2001 From: Rich Lott / Artful Robot Date: Sat, 4 Jul 2020 08:10:30 +0100 Subject: [PATCH] Improve efficiency of findFiles --- CRM/Utils/File.php | 25 +++++++++++++++---- .../CRM/common/civicrm.settings.php.template | 21 ++++++++++++++++ 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/CRM/Utils/File.php b/CRM/Utils/File.php index ff1bccb6bf..67da4b10a6 100644 --- a/CRM/Utils/File.php +++ b/CRM/Utils/File.php @@ -753,9 +753,16 @@ HTACCESS; * @return array(string) */ public static function findFiles($dir, $pattern, $relative = FALSE) { - if (!is_dir($dir)) { + if (!is_dir($dir) || !is_readable($dir)) { return []; } + // Which dirs should we exclude from our searches? + // If not defined, we default to excluding any dirname that begins + // with a . which is the old behaviour and therefore excludes .git/ + $excludeDirsPattern = defined('CIVICRM_EXCLUDE_DIRS_PATTERN') + ? constant('CIVICRM_EXCLUDE_DIRS_PATTERN') + : '@' . preg_quote(DIRECTORY_SEPARATOR) . '\.@'; + $dir = rtrim($dir, '/'); $todos = [$dir]; $result = []; @@ -769,13 +776,21 @@ HTACCESS; } } } + // Find subdirs to recurse into. if ($dh = opendir($subdir)) { while (FALSE !== ($entry = readdir($dh))) { $path = $subdir . DIRECTORY_SEPARATOR . $entry; - if ($entry{0} == '.') { - // ignore - } - elseif (is_dir($path)) { + // Exclude . (self) and .. (parent) to avoid infinite loop. + // Exclude configured exclude dirs. + // Exclude dirs we can't read. + // Exclude anything that's not a dir. + if ( + $entry !== '.' + && $entry !== '..' + && (empty($excludeDirsPattern) || !preg_match($excludeDirsPattern, $path)) + && is_dir($path) + && is_readable($path) + ) { $todos[] = $path; } } diff --git a/templates/CRM/common/civicrm.settings.php.template b/templates/CRM/common/civicrm.settings.php.template index 28102ccd78..81239fa14c 100644 --- a/templates/CRM/common/civicrm.settings.php.template +++ b/templates/CRM/common/civicrm.settings.php.template @@ -502,6 +502,27 @@ if (CIVICRM_UF === 'UnitTests') { // define('CIVICRM_LOG_ROTATESIZE', 0 ); // } +/** + * Which directories should we exclude when scanning the codebase for things + * like extension .info files, or .html partials or .xml files etc. This needs + * to be a valid preg_match() pattern. + * + * If you do not define it, a pattern that excludes dirs starting with a dot is + * used, e.g. to exclude .git/). Adding suitable patterns here can vastly speed + * up your container rebuilds and cache flushes. The pattern is matched against + * the absolute path. Remember to use your system's DIRECTORY_SEPARATOR the + * examples below assume / + * + * Example: This excludes node_modules (can be huge), various CiviCRM dirs that + * are unlikely to have anything we need to scan inside, and (what could be + * your) Drupal's private file storage area. + * + * '@/(\.|node_modules|js/|css/|bower_components|packages/|vendor/|sites/default/files/private)@' + */ +// if (!defined('CIVICRM_EXCLUDE_DIRS_PATTERN')) { +// define('CIVICRM_EXCLUDE_DIRS_PATTERN', '@/\.@'); +// } + /** * * Do not change anything below this line. Keep as is -- 2.25.1