From 3e5a65dc095428799c37360a4d1a4fa2fe4da17c Mon Sep 17 00:00:00 2001 From: Eileen McNaughton Date: Thu, 7 Sep 2023 09:46:10 +1200 Subject: [PATCH] Move regex processing in EmailProcessor to handling class This is a partial of https://github.com/civicrm/civicrm-core/pull/27319 and includes moving a big chunk of code but no logic changes There is good test cover in CRM_Utils_Mail_EmailProcessorTest I have split this out as it gets some of the 'bulk' out of that PR which has a lot of more complex changes to grok --- CRM/Utils/Mail/EmailProcessor.php | 67 ++----------- CRM/Utils/Mail/IncomingMail.php | 150 ++++++++++++++++++++++++++++++ 2 files changed, 156 insertions(+), 61 deletions(-) create mode 100644 CRM/Utils/Mail/IncomingMail.php diff --git a/CRM/Utils/Mail/EmailProcessor.php b/CRM/Utils/Mail/EmailProcessor.php index bfd6da005e..8cc4df575c 100644 --- a/CRM/Utils/Mail/EmailProcessor.php +++ b/CRM/Utils/Mail/EmailProcessor.php @@ -84,21 +84,6 @@ class CRM_Utils_Mail_EmailProcessor { $createContact = !($dao->is_contact_creation_disabled_if_no_match ?? FALSE); } - $config = CRM_Core_Config::singleton(); - $verpSeparator = preg_quote($config->verpSeparator ?? ''); - $twoDigitStringMin = $verpSeparator . '(\d+)' . $verpSeparator . '(\d+)'; - $twoDigitString = $twoDigitStringMin . $verpSeparator; - - // a common-for-all-actions regex to handle CiviCRM 2.2 address patterns - $regex = '/^' . preg_quote($dao->localpart ?? '') . '(b|c|e|o|r|u)' . $twoDigitString . '([0-9a-f]{16})@' . preg_quote($dao->domain ?? '') . '$/'; - - // a tighter regex for finding bounce info in soft bounces’ mail bodies - $rpRegex = '/Return-Path:\s*' . preg_quote($dao->localpart ?? '') . '(b)' . $twoDigitString . '([0-9a-f]{16})@' . preg_quote($dao->domain ?? '') . '/'; - - // a regex for finding bound info X-Header - $rpXheaderRegex = '/X-CiviMail-Bounce: ' . preg_quote($dao->localpart ?? '') . '(b)' . $twoDigitString . '([0-9a-f]{16})@' . preg_quote($dao->domain ?? '') . '/i'; - // CiviMail in regex and Civimail in header !!! - // retrieve the emails try { $store = CRM_Mailing_MailStore::getStore($dao->name); @@ -113,51 +98,11 @@ class CRM_Utils_Mail_EmailProcessor { // process fifty at a time, CRM-4002 while ($mails = $store->fetchNext(MAIL_BATCH_SIZE)) { foreach ($mails as $key => $mail) { - - // for every addressee: match address elements if it's to CiviMail - $matches = []; - $action = NULL; - - if ($usedfor == 1) { - foreach ($mail->to as $address) { - if (preg_match($regex, ($address->email ?? ''), $matches)) { - [, $action, $job, $queue, $hash] = $matches; - break; - } - } - - // CRM-5471: if $matches is empty, it still might be a soft bounce sent - // to another address, so scan the body for ‘Return-Path: …bounce-pattern…’ - if (!$matches && preg_match($rpRegex, ($mail->generateBody() ?? ''), $matches)) { - [, $action, $job, $queue, $hash] = $matches; - } - - // if $matches is still empty, look for the X-CiviMail-Bounce header - // CRM-9855 - if (!$matches && preg_match($rpXheaderRegex, ($mail->generateBody() ?? ''), $matches)) { - [, $action, $job, $queue, $hash] = $matches; - } - // With Mandrilla, the X-CiviMail-Bounce header is produced by generateBody - // is base64 encoded - // Check all parts - if (!$matches) { - $all_parts = $mail->fetchParts(); - foreach ($all_parts as $k_part => $v_part) { - if ($v_part instanceof ezcMailFile) { - $p_file = $v_part->__get('fileName'); - $c_file = file_get_contents($p_file); - if (preg_match($rpXheaderRegex, ($c_file ?? ''), $matches)) { - [, $action, $job, $queue, $hash] = $matches; - } - } - } - } - - // if all else fails, check Delivered-To for possible pattern - if (!$matches and preg_match($regex, ($mail->getHeader('Delivered-To') ?? ''), $matches)) { - [, $action, $job, $queue, $hash] = $matches; - } - } + $incomingMail = new CRM_Utils_Mail_IncomingMail($mail, (string) $dao->domain, (string) $dao->localpart); + $action = $incomingMail->getAction(); + $job = $incomingMail->getJobID(); + $queue = $incomingMail->getQueueID(); + $hash = $incomingMail->getHash(); // preseve backward compatibility if ($usedfor == 0 || $is_create_activities) { @@ -194,7 +139,7 @@ class CRM_Utils_Mail_EmailProcessor { } // if $matches is empty, this email is not CiviMail-bound - if (!$matches) { + if (!$incomingMail->isVerp() && empty($matches)) { $store->markIgnored($key); continue; } diff --git a/CRM/Utils/Mail/IncomingMail.php b/CRM/Utils/Mail/IncomingMail.php new file mode 100644 index 0000000000..4da3997eee --- /dev/null +++ b/CRM/Utils/Mail/IncomingMail.php @@ -0,0 +1,150 @@ +action; + } + + /** + * @return int|null + */ + public function getQueueID(): ?int { + return $this->queueID; + } + + /** + * @return int|null + */ + public function getJobID(): ?int { + return $this->jobID; + } + + /** + * @return string|null + */ + public function getHash(): ?string { + return $this->hash; + } + + /** + * Is this a verp email. + * + * If the regex didn't find a match then no. + * + * @return bool + */ + public function isVerp(): bool { + return (bool) $this->action; + } + + /** + * @param \ezcMail $mail + * @param string $emailDomain + * @param string $emailLocalPart + * + * @throws \ezcBasePropertyNotFoundException + * @throws \CRM_Core_Exception + */ + public function __construct(ezcMail $mail, string $emailDomain, string $emailLocalPart) { + $this->mail = $mail; + + $verpSeparator = preg_quote(\Civi::settings()->get('verpSeparator') ?: ''); + $emailDomain = preg_quote($emailDomain); + $emailLocalPart = preg_quote($emailLocalPart); + $twoDigitStringMin = $verpSeparator . '(\d+)' . $verpSeparator . '(\d+)'; + $twoDigitString = $twoDigitStringMin . $verpSeparator; + + // a common-for-all-actions regex to handle CiviCRM 2.2 address patterns + $regex = '/^' . $emailLocalPart . '(b|c|e|o|r|u)' . $twoDigitString . '([0-9a-f]{16})@' . $emailDomain . '$/'; + + // a tighter regex for finding bounce info in soft bounces’ mail bodies + $rpRegex = '/Return-Path:\s*' . $emailLocalPart . '(b)' . $twoDigitString . '([0-9a-f]{16})@' . $emailDomain . '/'; + + // a regex for finding bound info X-Header + $rpXHeaderRegex = '/X-CiviMail-Bounce: ' . $emailLocalPart . '(b)' . $twoDigitString . '([0-9a-f]{16})@' . $emailDomain . '/i'; + // CiviMail in regex and Civimail in header !!! + $matches = NULL; + foreach ($this->mail->to as $address) { + if (preg_match($regex, ($address->email ?? ''), $matches)) { + [, $this->action, $this->jobID, $this->queueID, $this->hash] = $matches; + break; + } + } + + // CRM-5471: if $matches is empty, it still might be a soft bounce sent + // to another address, so scan the body for ‘Return-Path: …bounce-pattern…’ + if (!$matches && preg_match($rpRegex, ($mail->generateBody() ?? ''), $matches)) { + [, $this->action, $this->jobID, $this->queueID, $this->hash] = $matches; + } + + // if $matches is still empty, look for the X-CiviMail-Bounce header + // CRM-9855 + if (!$matches && preg_match($rpXHeaderRegex, ($mail->generateBody() ?? ''), $matches)) { + [, $this->action, $this->jobID, $this->queueID, $this->hash] = $matches; + } + // With Mandrill, the X-CiviMail-Bounce header is produced by generateBody + // is base64 encoded + // Check all parts + if (!$matches) { + $all_parts = $mail->fetchParts(); + foreach ($all_parts as $v_part) { + if ($v_part instanceof ezcMailFile) { + $p_file = $v_part->__get('fileName'); + $c_file = file_get_contents($p_file); + if (preg_match($rpXHeaderRegex, ($c_file ?? ''), $matches)) { + [, $this->action, $this->jobID, $this->queueID, $this->hash] = $matches; + } + } + } + } + + // if all else fails, check Delivered-To for possible pattern + if (!$matches && preg_match($regex, ($mail->getHeader('Delivered-To') ?? ''), $matches)) { + [, $this->action, $this->jobID, $this->queueID, $this->hash] = $matches; + } + } + +} -- 2.25.1