Merge pull request #16543 from jmcclelland/issue1589
[civicrm-core.git] / CRM / Utils / Mail / EmailProcessor.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | Copyright CiviCRM LLC. All rights reserved. |
5 | |
6 | This work is published under the GNU AGPLv3 license with some |
7 | permitted exceptions and without any warranty. For full license |
8 | and copyright information, see https://civicrm.org/licensing |
9 +--------------------------------------------------------------------+
10 */
11
12 /**
13 *
14 * @package CRM
15 * @copyright CiviCRM LLC https://civicrm.org/licensing
16 */
17
18 // we should consider moving these to the settings table
19 // before the 4.1 release
20 define('EMAIL_ACTIVITY_TYPE_ID', NULL);
21 define('MAIL_BATCH_SIZE', 50);
22
23 /**
24 * Class CRM_Utils_Mail_EmailProcessor.
25 */
26 class CRM_Utils_Mail_EmailProcessor {
27
28 const MIME_MAX_RECURSION = 10;
29
30 /**
31 * Process the default mailbox (ie. that is used by civiMail for the bounce)
32 *
33 * @param bool $is_create_activities
34 * Should activities be created
35 */
36 public static function processBounces($is_create_activities) {
37 $dao = new CRM_Core_DAO_MailSettings();
38 $dao->domain_id = CRM_Core_Config::domainID();
39 $dao->is_default = TRUE;
40 $dao->find();
41
42 while ($dao->fetch()) {
43 self::_process(TRUE, $dao, $is_create_activities);
44 }
45 }
46
47 /**
48 * Delete old files from a given directory (recursively).
49 *
50 * @param string $dir
51 * Directory to cleanup.
52 * @param int $age
53 * Files older than this many seconds will be deleted (default: 60 days).
54 */
55 public static function cleanupDir($dir, $age = 5184000) {
56 // return early if we can’t read/write the dir
57 if (!is_writable($dir) or !is_readable($dir) or !is_dir($dir)) {
58 return;
59 }
60
61 foreach (scandir($dir) as $file) {
62
63 // don’t go up the directory stack and skip new files/dirs
64 if ($file == '.' or $file == '..') {
65 continue;
66 }
67 if (filemtime("$dir/$file") > time() - $age) {
68 continue;
69 }
70
71 // it’s an old file/dir, so delete/recurse
72 is_dir("$dir/$file") ? self::cleanupDir("$dir/$file", $age) : unlink("$dir/$file");
73 }
74 }
75
76 /**
77 * Process the mailboxes that aren't default (ie. that aren't used by civiMail for the bounce).
78 *
79 * @return bool
80 *
81 * @throws CRM_Core_Exception.
82 */
83 public static function processActivities() {
84 $dao = new CRM_Core_DAO_MailSettings();
85 $dao->domain_id = CRM_Core_Config::domainID();
86 $dao->is_default = FALSE;
87 $dao->find();
88 $found = FALSE;
89 while ($dao->fetch()) {
90 $found = TRUE;
91 self::_process(FALSE, $dao, TRUE);
92 }
93 if (!$found) {
94 throw new CRM_Core_Exception(ts('No mailboxes have been configured for Email to Activity Processing'));
95 }
96 return $found;
97 }
98
99 /**
100 * Process the mailbox for all the settings from civicrm_mail_settings.
101 *
102 * @param bool|string $civiMail if true, processing is done in CiviMail context, or Activities otherwise.
103 */
104 public static function process($civiMail = TRUE) {
105 $dao = new CRM_Core_DAO_MailSettings();
106 $dao->domain_id = CRM_Core_Config::domainID();
107 $dao->find();
108
109 while ($dao->fetch()) {
110 self::_process($civiMail, $dao);
111 }
112 }
113
114 /**
115 * @param $civiMail
116 * @param CRM_Core_DAO_MailSettings $dao
117 * @param bool $is_create_activities
118 * Create activities.
119 *
120 * @throws Exception
121 * @throws CRM_Core_Exception
122 */
123 public static function _process($civiMail, $dao, $is_create_activities) {
124 // 0 = activities; 1 = bounce;
125 $usedfor = $dao->is_default;
126
127 $emailActivityTypeId
128 = (defined('EMAIL_ACTIVITY_TYPE_ID') && EMAIL_ACTIVITY_TYPE_ID)
129 ? EMAIL_ACTIVITY_TYPE_ID
130 : CRM_Core_PseudoConstant::getKey('CRM_Activity_BAO_Activity', 'activity_type_id', 'Inbound Email');
131
132 if (!$emailActivityTypeId) {
133 throw new CRM_Core_Exception(ts('Could not find a valid Activity Type ID for Inbound Email'));
134 }
135
136 $config = CRM_Core_Config::singleton();
137 $verpSeperator = preg_quote($config->verpSeparator);
138 $twoDigitStringMin = $verpSeperator . '(\d+)' . $verpSeperator . '(\d+)';
139 $twoDigitString = $twoDigitStringMin . $verpSeperator;
140 $threeDigitString = $twoDigitString . '(\d+)' . $verpSeperator;
141
142 // FIXME: legacy regexen to handle CiviCRM 2.1 address patterns, with domain id and possible VERP part
143 $commonRegex = '/^' . preg_quote($dao->localpart) . '(b|bounce|c|confirm|o|optOut|r|reply|re|e|resubscribe|u|unsubscribe)' . $threeDigitString . '([0-9a-f]{16})(-.*)?@' . preg_quote($dao->domain) . '$/';
144 $subscrRegex = '/^' . preg_quote($dao->localpart) . '(s|subscribe)' . $twoDigitStringMin . '@' . preg_quote($dao->domain) . '$/';
145
146 // a common-for-all-actions regex to handle CiviCRM 2.2 address patterns
147 $regex = '/^' . preg_quote($dao->localpart) . '(b|c|e|o|r|u)' . $twoDigitString . '([0-9a-f]{16})@' . preg_quote($dao->domain) . '$/';
148
149 // a tighter regex for finding bounce info in soft bounces’ mail bodies
150 $rpRegex = '/Return-Path:\s*' . preg_quote($dao->localpart) . '(b)' . $twoDigitString . '([0-9a-f]{16})@' . preg_quote($dao->domain) . '/';
151
152 // a regex for finding bound info X-Header
153 $rpXheaderRegex = '/X-CiviMail-Bounce: ' . preg_quote($dao->localpart) . '(b)' . $twoDigitString . '([0-9a-f]{16})@' . preg_quote($dao->domain) . '/i';
154 // CiviMail in regex and Civimail in header !!!
155
156 // retrieve the emails
157 try {
158 $store = CRM_Mailing_MailStore::getStore($dao->name);
159 }
160 catch (Exception$e) {
161 $message = ts('Could not connect to MailStore for ') . $dao->username . '@' . $dao->server . '<p>';
162 $message .= ts('Error message: ');
163 $message .= '<pre>' . $e->getMessage() . '</pre><p>';
164 throw new CRM_Core_Exception($message);
165 }
166
167 // process fifty at a time, CRM-4002
168 while ($mails = $store->fetchNext(MAIL_BATCH_SIZE)) {
169 foreach ($mails as $key => $mail) {
170
171 // for every addressee: match address elements if it's to CiviMail
172 $matches = [];
173 $action = NULL;
174
175 if ($usedfor == 1) {
176 foreach ($mail->to as $address) {
177 if (preg_match($regex, $address->email, $matches)) {
178 list($match, $action, $job, $queue, $hash) = $matches;
179 break;
180 // FIXME: the below elseifs should be dropped when we drop legacy support
181 }
182 elseif (preg_match($commonRegex, $address->email, $matches)) {
183 list($match, $action, $_, $job, $queue, $hash) = $matches;
184 break;
185 }
186 elseif (preg_match($subscrRegex, $address->email, $matches)) {
187 list($match, $action, $_, $job) = $matches;
188 break;
189 }
190 }
191
192 // CRM-5471: if $matches is empty, it still might be a soft bounce sent
193 // to another address, so scan the body for ‘Return-Path: …bounce-pattern…’
194 if (!$matches and preg_match($rpRegex, $mail->generateBody(), $matches)) {
195 list($match, $action, $job, $queue, $hash) = $matches;
196 }
197
198 // if $matches is still empty, look for the X-CiviMail-Bounce header
199 // CRM-9855
200 if (!$matches and preg_match($rpXheaderRegex, $mail->generateBody(), $matches)) {
201 list($match, $action, $job, $queue, $hash) = $matches;
202 }
203 // With Mandrilla, the X-CiviMail-Bounce header is produced by generateBody
204 // is base64 encoded
205 // Check all parts
206 if (!$matches) {
207 $all_parts = $mail->fetchParts();
208 foreach ($all_parts as $k_part => $v_part) {
209 if ($v_part instanceof ezcMailFile) {
210 $p_file = $v_part->__get('fileName');
211 $c_file = file_get_contents($p_file);
212 if (preg_match($rpXheaderRegex, $c_file, $matches)) {
213 list($match, $action, $job, $queue, $hash) = $matches;
214 }
215 }
216 }
217 }
218
219 // if all else fails, check Delivered-To for possible pattern
220 if (!$matches and preg_match($regex, $mail->getHeader('Delivered-To'), $matches)) {
221 list($match, $action, $job, $queue, $hash) = $matches;
222 }
223 }
224
225 // preseve backward compatibility
226 if ($usedfor == 0 || $is_create_activities) {
227 // if its the activities that needs to be processed ..
228 try {
229 $mailParams = CRM_Utils_Mail_Incoming::parseMailingObject($mail);
230 }
231 catch (Exception $e) {
232 echo $e->getMessage();
233 $store->markIgnored($key);
234 continue;
235 }
236
237 require_once 'CRM/Utils/DeprecatedUtils.php';
238 $params = _civicrm_api3_deprecated_activity_buildmailparams($mailParams, $emailActivityTypeId);
239
240 $params['version'] = 3;
241 if (!empty($dao->activity_status)) {
242 $params['status_id'] = $dao->activity_status;
243 }
244 $result = civicrm_api('activity', 'create', $params);
245
246 if ($result['is_error']) {
247 $matches = FALSE;
248 echo "Failed Processing: {$mail->subject}. Reason: {$result['error_message']}\n";
249 }
250 else {
251 $matches = TRUE;
252 CRM_Utils_Hook::emailProcessor('activity', $params, $mail, $result);
253 echo "Processed as Activity: {$mail->subject}\n";
254 }
255 }
256
257 // if $matches is empty, this email is not CiviMail-bound
258 if (!$matches) {
259 $store->markIgnored($key);
260 continue;
261 }
262
263 // get $replyTo from either the Reply-To header or from From
264 // FIXME: make sure it works with Reply-Tos containing non-email stuff
265 $replyTo = $mail->getHeader('Reply-To') ? $mail->getHeader('Reply-To') : $mail->from->email;
266
267 // handle the action by passing it to the proper API call
268 // FIXME: leave only one-letter cases when dropping legacy support
269 if (!empty($action)) {
270 $result = NULL;
271
272 switch ($action) {
273 case 'b':
274 case 'bounce':
275 $text = '';
276 if ($mail->body instanceof ezcMailText) {
277 $text = $mail->body->text;
278 }
279 elseif ($mail->body instanceof ezcMailMultipart) {
280 $text = self::getTextFromMultipart($mail->body);
281 }
282 elseif ($mail->body instanceof ezcMailFile) {
283 $text = file_get_contents($mail->body->__get('fileName'));
284 }
285
286 if (
287 empty($text) &&
288 $mail->subject == "Delivery Status Notification (Failure)"
289 ) {
290 // Exchange error - CRM-9361
291 foreach ($mail->body->getParts() as $part) {
292 if ($part instanceof ezcMailDeliveryStatus) {
293 foreach ($part->recipients as $rec) {
294 if ($rec["Status"] == "5.1.1") {
295 if (isset($rec["Description"])) {
296 $text = $rec["Description"];
297 }
298 else {
299 $text = $rec["Status"] . " Delivery to the following recipients failed";
300 }
301 break;
302 }
303 }
304 }
305 }
306 }
307
308 if (empty($text)) {
309 // If bounce processing fails, just take the raw body. Cf. CRM-11046
310 $text = $mail->generateBody();
311
312 // if text is still empty, lets fudge a blank text so the api call below will succeed
313 if (empty($text)) {
314 $text = ts('We could not extract the mail body from this bounce message.');
315 }
316 }
317
318 $params = [
319 'job_id' => $job,
320 'event_queue_id' => $queue,
321 'hash' => $hash,
322 'body' => $text,
323 'version' => 3,
324 // Setting is_transactional means it will rollback if
325 // it crashes part way through creating the bounce.
326 // If the api were standard & had a create this would be the
327 // default. Adding the standard api & deprecating this one
328 // would probably be the
329 // most consistent way to address this - but this is
330 // a quick hack.
331 'is_transactional' => 1,
332 ];
333 $result = civicrm_api('Mailing', 'event_bounce', $params);
334 break;
335
336 case 'c':
337 case 'confirm':
338 // CRM-7921
339 $params = [
340 'contact_id' => $job,
341 'subscribe_id' => $queue,
342 'hash' => $hash,
343 'version' => 3,
344 ];
345 $result = civicrm_api('Mailing', 'event_confirm', $params);
346 break;
347
348 case 'o':
349 case 'optOut':
350 $params = [
351 'job_id' => $job,
352 'event_queue_id' => $queue,
353 'hash' => $hash,
354 'version' => 3,
355 ];
356 $result = civicrm_api('MailingGroup', 'event_domain_unsubscribe', $params);
357 break;
358
359 case 'r':
360 case 'reply':
361 // instead of text and HTML parts (4th and 6th params) send the whole email as the last param
362 $params = [
363 'job_id' => $job,
364 'event_queue_id' => $queue,
365 'hash' => $hash,
366 'bodyTxt' => NULL,
367 'replyTo' => $replyTo,
368 'bodyHTML' => NULL,
369 'fullEmail' => $mail->generate(),
370 'version' => 3,
371 ];
372 $result = civicrm_api('Mailing', 'event_reply', $params);
373 break;
374
375 case 'e':
376 case 're':
377 case 'resubscribe':
378 $params = [
379 'job_id' => $job,
380 'event_queue_id' => $queue,
381 'hash' => $hash,
382 'version' => 3,
383 ];
384 $result = civicrm_api('MailingGroup', 'event_resubscribe', $params);
385 break;
386
387 case 's':
388 case 'subscribe':
389 $params = [
390 'email' => $mail->from->email,
391 'group_id' => $job,
392 'version' => 3,
393 ];
394 $result = civicrm_api('MailingGroup', 'event_subscribe', $params);
395 break;
396
397 case 'u':
398 case 'unsubscribe':
399 $params = [
400 'job_id' => $job,
401 'event_queue_id' => $queue,
402 'hash' => $hash,
403 'version' => 3,
404 ];
405 $result = civicrm_api('MailingGroup', 'event_unsubscribe', $params);
406 break;
407 }
408
409 if ($result['is_error']) {
410 echo "Failed Processing: {$mail->subject}, Action: $action, Job ID: $job, Queue ID: $queue, Hash: $hash. Reason: {$result['error_message']}\n";
411 }
412 else {
413 CRM_Utils_Hook::emailProcessor('mailing', $params, $mail, $result, $action);
414 }
415 }
416
417 $store->markProcessed($key);
418 }
419 // CRM-7356 – used by IMAP only
420 $store->expunge();
421 }
422 }
423
424 /**
425 * @param \ezcMailMultipart $multipart
426 * @param int $recursionLevel
427 *
428 * @return array
429 */
430 protected static function getTextFromMultipart($multipart, $recursionLevel = 0) {
431 if ($recursionLevel >= self::MIME_MAX_RECURSION) {
432 return NULL;
433 }
434 $recursionLevel += 1;
435 $text = NULL;
436 if ($multipart instanceof ezcMailMultipartReport) {
437 $text = self::getTextFromMulipartReport($multipart, $recursionLevel);
438 }
439 elseif ($multipart instanceof ezcMailMultipartRelated) {
440 $text = self::getTextFromMultipartRelated($multipart, $recursionLevel);
441 }
442 else {
443 foreach ($multipart->getParts() as $part) {
444 if (isset($part->subType) and $part->subType === 'plain') {
445 $text = $part->text;
446 }
447 elseif ($part instanceof ezcMailMultipart) {
448 $text = self::getTextFromMultipart($part, $recursionLevel);
449 }
450 if ($text) {
451 break;
452 }
453 }
454 }
455 return $text;
456 }
457
458 /**
459 * @param \ezcMailMultipartRelated $related
460 * @param int $recursionLevel
461 *
462 * @return array
463 */
464 protected static function getTextFromMultipartRelated($related, $recursionLevel) {
465 $text = NULL;
466 foreach ($related->getRelatedParts() as $part) {
467 if (isset($part->subType) and $part->subType === 'plain') {
468 $text = $part->text;
469 }
470 elseif ($part instanceof ezcMailMultipart) {
471 $text = self::getTextFromMultipart($part, $recursionLevel);
472 }
473 if ($text) {
474 break;
475 }
476 }
477 return $text;
478 }
479
480 /**
481 * @param \ezcMailMultipartReport $multipart
482 * @param $recursionLevel
483 *
484 * @return array
485 */
486 protected static function getTextFromMulipartReport($multipart, $recursionLevel) {
487 $text = NULL;
488 $part = $multipart->getMachinePart();
489 if ($part instanceof ezcMailDeliveryStatus) {
490 foreach ($part->recipients as $rec) {
491 if (isset($rec["Diagnostic-Code"])) {
492 $text = $rec["Diagnostic-Code"];
493 break;
494 }
495 elseif (isset($rec["Description"])) {
496 $text = $rec["Description"];
497 break;
498 }
499 // no diagnostic info present - try getting the human readable part
500 elseif (isset($rec["Status"])) {
501 $text = $rec["Status"];
502 $textpart = $multipart->getReadablePart();
503 if ($textpart !== NULL and isset($textpart->text)) {
504 $text .= " " . $textpart->text;
505 }
506 else {
507 $text .= " Delivery failed but no diagnostic code or description.";
508 }
509 break;
510 }
511 }
512 }
513 elseif ($part !== NULL and isset($part->text)) {
514 $text = $part->text;
515 }
516 elseif (($part = $multipart->getReadablePart()) !== NULL) {
517 if (isset($part->text)) {
518 $text = $part->text;
519 }
520 elseif ($part instanceof ezcMailMultipart) {
521 $text = self::getTextFromMultipart($part, $recursionLevel);
522 }
523 }
524 return $text;
525 }
526
527 }