Merge pull request #15800 from eileenmcnaughton/anet_valid
[civicrm-core.git] / CRM / Utils / Mail / EmailProcessor.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | Copyright CiviCRM LLC. All rights reserved. |
5 | |
6 | This work is published under the GNU AGPLv3 license with some |
7 | permitted exceptions and without any warranty. For full license |
8 | and copyright information, see https://civicrm.org/licensing |
9 +--------------------------------------------------------------------+
10 */
11
12 /**
13 *
14 * @package CRM
15 * @copyright CiviCRM LLC https://civicrm.org/licensing
16 */
17
18 // we should consider moving these to the settings table
19 // before the 4.1 release
20 define('EMAIL_ACTIVITY_TYPE_ID', NULL);
21 define('MAIL_BATCH_SIZE', 50);
22
23 /**
24 * Class CRM_Utils_Mail_EmailProcessor.
25 */
26 class CRM_Utils_Mail_EmailProcessor {
27
28 const MIME_MAX_RECURSION = 10;
29
30 /**
31 * Process the default mailbox (ie. that is used by civiMail for the bounce)
32 *
33 * @param bool $is_create_activities
34 * Should activities be created
35 */
36 public static function processBounces($is_create_activities) {
37 $dao = new CRM_Core_DAO_MailSettings();
38 $dao->domain_id = CRM_Core_Config::domainID();
39 $dao->is_default = TRUE;
40 $dao->find();
41
42 while ($dao->fetch()) {
43 self::_process(TRUE, $dao, $is_create_activities);
44 }
45 }
46
47 /**
48 * Delete old files from a given directory (recursively).
49 *
50 * @param string $dir
51 * Directory to cleanup.
52 * @param int $age
53 * Files older than this many seconds will be deleted (default: 60 days).
54 */
55 public static function cleanupDir($dir, $age = 5184000) {
56 // return early if we can’t read/write the dir
57 if (!is_writable($dir) or !is_readable($dir) or !is_dir($dir)) {
58 return;
59 }
60
61 foreach (scandir($dir) as $file) {
62
63 // don’t go up the directory stack and skip new files/dirs
64 if ($file == '.' or $file == '..') {
65 continue;
66 }
67 if (filemtime("$dir/$file") > time() - $age) {
68 continue;
69 }
70
71 // it’s an old file/dir, so delete/recurse
72 is_dir("$dir/$file") ? self::cleanupDir("$dir/$file", $age) : unlink("$dir/$file");
73 }
74 }
75
76 /**
77 * Process the mailboxes that aren't default (ie. that aren't used by civiMail for the bounce).
78 */
79 public static function processActivities() {
80 $dao = new CRM_Core_DAO_MailSettings();
81 $dao->domain_id = CRM_Core_Config::domainID();
82 $dao->is_default = FALSE;
83 $dao->find();
84 $found = FALSE;
85 while ($dao->fetch()) {
86 $found = TRUE;
87 self::_process(FALSE, $dao, TRUE);
88 }
89 if (!$found) {
90 CRM_Core_Error::fatal(ts('No mailboxes have been configured for Email to Activity Processing'));
91 }
92 return $found;
93 }
94
95 /**
96 * Process the mailbox for all the settings from civicrm_mail_settings.
97 *
98 * @param bool|string $civiMail if true, processing is done in CiviMail context, or Activities otherwise.
99 */
100 public static function process($civiMail = TRUE) {
101 $dao = new CRM_Core_DAO_MailSettings();
102 $dao->domain_id = CRM_Core_Config::domainID();
103 $dao->find();
104
105 while ($dao->fetch()) {
106 self::_process($civiMail, $dao);
107 }
108 }
109
110 /**
111 * @param $civiMail
112 * @param CRM_Core_DAO_MailSettings $dao
113 * @param bool $is_create_activities
114 * Create activities.
115 *
116 * @throws Exception
117 */
118 public static function _process($civiMail, $dao, $is_create_activities) {
119 // 0 = activities; 1 = bounce;
120 $usedfor = $dao->is_default;
121
122 $emailActivityTypeId
123 = (defined('EMAIL_ACTIVITY_TYPE_ID') && EMAIL_ACTIVITY_TYPE_ID)
124 ? EMAIL_ACTIVITY_TYPE_ID
125 : CRM_Core_PseudoConstant::getKey('CRM_Activity_BAO_Activity', 'activity_type_id', 'Inbound Email');
126
127 if (!$emailActivityTypeId) {
128 CRM_Core_Error::fatal(ts('Could not find a valid Activity Type ID for Inbound Email'));
129 }
130
131 $config = CRM_Core_Config::singleton();
132 $verpSeperator = preg_quote($config->verpSeparator);
133 $twoDigitStringMin = $verpSeperator . '(\d+)' . $verpSeperator . '(\d+)';
134 $twoDigitString = $twoDigitStringMin . $verpSeperator;
135 $threeDigitString = $twoDigitString . '(\d+)' . $verpSeperator;
136
137 // FIXME: legacy regexen to handle CiviCRM 2.1 address patterns, with domain id and possible VERP part
138 $commonRegex = '/^' . preg_quote($dao->localpart) . '(b|bounce|c|confirm|o|optOut|r|reply|re|e|resubscribe|u|unsubscribe)' . $threeDigitString . '([0-9a-f]{16})(-.*)?@' . preg_quote($dao->domain) . '$/';
139 $subscrRegex = '/^' . preg_quote($dao->localpart) . '(s|subscribe)' . $twoDigitStringMin . '@' . preg_quote($dao->domain) . '$/';
140
141 // a common-for-all-actions regex to handle CiviCRM 2.2 address patterns
142 $regex = '/^' . preg_quote($dao->localpart) . '(b|c|e|o|r|u)' . $twoDigitString . '([0-9a-f]{16})@' . preg_quote($dao->domain) . '$/';
143
144 // a tighter regex for finding bounce info in soft bounces’ mail bodies
145 $rpRegex = '/Return-Path:\s*' . preg_quote($dao->localpart) . '(b)' . $twoDigitString . '([0-9a-f]{16})@' . preg_quote($dao->domain) . '/';
146
147 // a regex for finding bound info X-Header
148 $rpXheaderRegex = '/X-CiviMail-Bounce: ' . preg_quote($dao->localpart) . '(b)' . $twoDigitString . '([0-9a-f]{16})@' . preg_quote($dao->domain) . '/i';
149 // CiviMail in regex and Civimail in header !!!
150
151 // retrieve the emails
152 try {
153 $store = CRM_Mailing_MailStore::getStore($dao->name);
154 }
155 catch (Exception$e) {
156 $message = ts('Could not connect to MailStore for ') . $dao->username . '@' . $dao->server . '<p>';
157 $message .= ts('Error message: ');
158 $message .= '<pre>' . $e->getMessage() . '</pre><p>';
159 CRM_Core_Error::fatal($message);
160 }
161
162 // process fifty at a time, CRM-4002
163 while ($mails = $store->fetchNext(MAIL_BATCH_SIZE)) {
164 foreach ($mails as $key => $mail) {
165
166 // for every addressee: match address elements if it's to CiviMail
167 $matches = [];
168 $action = NULL;
169
170 if ($usedfor == 1) {
171 foreach ($mail->to as $address) {
172 if (preg_match($regex, $address->email, $matches)) {
173 list($match, $action, $job, $queue, $hash) = $matches;
174 break;
175 // FIXME: the below elseifs should be dropped when we drop legacy support
176 }
177 elseif (preg_match($commonRegex, $address->email, $matches)) {
178 list($match, $action, $_, $job, $queue, $hash) = $matches;
179 break;
180 }
181 elseif (preg_match($subscrRegex, $address->email, $matches)) {
182 list($match, $action, $_, $job) = $matches;
183 break;
184 }
185 }
186
187 // CRM-5471: if $matches is empty, it still might be a soft bounce sent
188 // to another address, so scan the body for ‘Return-Path: …bounce-pattern…’
189 if (!$matches and preg_match($rpRegex, $mail->generateBody(), $matches)) {
190 list($match, $action, $job, $queue, $hash) = $matches;
191 }
192
193 // if $matches is still empty, look for the X-CiviMail-Bounce header
194 // CRM-9855
195 if (!$matches and preg_match($rpXheaderRegex, $mail->generateBody(), $matches)) {
196 list($match, $action, $job, $queue, $hash) = $matches;
197 }
198 // With Mandrilla, the X-CiviMail-Bounce header is produced by generateBody
199 // is base64 encoded
200 // Check all parts
201 if (!$matches) {
202 $all_parts = $mail->fetchParts();
203 foreach ($all_parts as $k_part => $v_part) {
204 if ($v_part instanceof ezcMailFile) {
205 $p_file = $v_part->__get('fileName');
206 $c_file = file_get_contents($p_file);
207 if (preg_match($rpXheaderRegex, $c_file, $matches)) {
208 list($match, $action, $job, $queue, $hash) = $matches;
209 }
210 }
211 }
212 }
213
214 // if all else fails, check Delivered-To for possible pattern
215 if (!$matches and preg_match($regex, $mail->getHeader('Delivered-To'), $matches)) {
216 list($match, $action, $job, $queue, $hash) = $matches;
217 }
218 }
219
220 // preseve backward compatibility
221 if ($usedfor == 0 || $is_create_activities) {
222 // if its the activities that needs to be processed ..
223 try {
224 $mailParams = CRM_Utils_Mail_Incoming::parseMailingObject($mail);
225 }
226 catch (Exception $e) {
227 echo $e->getMessage();
228 $store->markIgnored($key);
229 continue;
230 }
231
232 require_once 'CRM/Utils/DeprecatedUtils.php';
233 $params = _civicrm_api3_deprecated_activity_buildmailparams($mailParams, $emailActivityTypeId);
234
235 $params['version'] = 3;
236 if (!empty($dao->activity_status)) {
237 $params['status_id'] = $dao->activity_status;
238 }
239 $result = civicrm_api('activity', 'create', $params);
240
241 if ($result['is_error']) {
242 $matches = FALSE;
243 echo "Failed Processing: {$mail->subject}. Reason: {$result['error_message']}\n";
244 }
245 else {
246 $matches = TRUE;
247 CRM_Utils_Hook::emailProcessor('activity', $params, $mail, $result);
248 echo "Processed as Activity: {$mail->subject}\n";
249 }
250 }
251
252 // if $matches is empty, this email is not CiviMail-bound
253 if (!$matches) {
254 $store->markIgnored($key);
255 continue;
256 }
257
258 // get $replyTo from either the Reply-To header or from From
259 // FIXME: make sure it works with Reply-Tos containing non-email stuff
260 $replyTo = $mail->getHeader('Reply-To') ? $mail->getHeader('Reply-To') : $mail->from->email;
261
262 // handle the action by passing it to the proper API call
263 // FIXME: leave only one-letter cases when dropping legacy support
264 if (!empty($action)) {
265 $result = NULL;
266
267 switch ($action) {
268 case 'b':
269 case 'bounce':
270 $text = '';
271 if ($mail->body instanceof ezcMailText) {
272 $text = $mail->body->text;
273 }
274 elseif ($mail->body instanceof ezcMailMultipart) {
275 $text = self::getTextFromMultipart($mail->body);
276 }
277 elseif ($mail->body instanceof ezcMailFile) {
278 $text = file_get_contents($mail->body->__get('fileName'));
279 }
280
281 if (
282 empty($text) &&
283 $mail->subject == "Delivery Status Notification (Failure)"
284 ) {
285 // Exchange error - CRM-9361
286 foreach ($mail->body->getParts() as $part) {
287 if ($part instanceof ezcMailDeliveryStatus) {
288 foreach ($part->recipients as $rec) {
289 if ($rec["Status"] == "5.1.1") {
290 if (isset($rec["Description"])) {
291 $text = $rec["Description"];
292 }
293 else {
294 $text = $rec["Status"] . " Delivery to the following recipients failed";
295 }
296 break;
297 }
298 }
299 }
300 }
301 }
302
303 if (empty($text)) {
304 // If bounce processing fails, just take the raw body. Cf. CRM-11046
305 $text = $mail->generateBody();
306
307 // if text is still empty, lets fudge a blank text so the api call below will succeed
308 if (empty($text)) {
309 $text = ts('We could not extract the mail body from this bounce message.');
310 }
311 }
312
313 $params = [
314 'job_id' => $job,
315 'event_queue_id' => $queue,
316 'hash' => $hash,
317 'body' => $text,
318 'version' => 3,
319 // Setting is_transactional means it will rollback if
320 // it crashes part way through creating the bounce.
321 // If the api were standard & had a create this would be the
322 // default. Adding the standard api & deprecating this one
323 // would probably be the
324 // most consistent way to address this - but this is
325 // a quick hack.
326 'is_transactional' => 1,
327 ];
328 $result = civicrm_api('Mailing', 'event_bounce', $params);
329 break;
330
331 case 'c':
332 case 'confirm':
333 // CRM-7921
334 $params = [
335 'contact_id' => $job,
336 'subscribe_id' => $queue,
337 'hash' => $hash,
338 'version' => 3,
339 ];
340 $result = civicrm_api('Mailing', 'event_confirm', $params);
341 break;
342
343 case 'o':
344 case 'optOut':
345 $params = [
346 'job_id' => $job,
347 'event_queue_id' => $queue,
348 'hash' => $hash,
349 'version' => 3,
350 ];
351 $result = civicrm_api('MailingGroup', 'event_domain_unsubscribe', $params);
352 break;
353
354 case 'r':
355 case 'reply':
356 // instead of text and HTML parts (4th and 6th params) send the whole email as the last param
357 $params = [
358 'job_id' => $job,
359 'event_queue_id' => $queue,
360 'hash' => $hash,
361 'bodyTxt' => NULL,
362 'replyTo' => $replyTo,
363 'bodyHTML' => NULL,
364 'fullEmail' => $mail->generate(),
365 'version' => 3,
366 ];
367 $result = civicrm_api('Mailing', 'event_reply', $params);
368 break;
369
370 case 'e':
371 case 're':
372 case 'resubscribe':
373 $params = [
374 'job_id' => $job,
375 'event_queue_id' => $queue,
376 'hash' => $hash,
377 'version' => 3,
378 ];
379 $result = civicrm_api('MailingGroup', 'event_resubscribe', $params);
380 break;
381
382 case 's':
383 case 'subscribe':
384 $params = [
385 'email' => $mail->from->email,
386 'group_id' => $job,
387 'version' => 3,
388 ];
389 $result = civicrm_api('MailingGroup', 'event_subscribe', $params);
390 break;
391
392 case 'u':
393 case 'unsubscribe':
394 $params = [
395 'job_id' => $job,
396 'event_queue_id' => $queue,
397 'hash' => $hash,
398 'version' => 3,
399 ];
400 $result = civicrm_api('MailingGroup', 'event_unsubscribe', $params);
401 break;
402 }
403
404 if ($result['is_error']) {
405 echo "Failed Processing: {$mail->subject}, Action: $action, Job ID: $job, Queue ID: $queue, Hash: $hash. Reason: {$result['error_message']}\n";
406 }
407 else {
408 CRM_Utils_Hook::emailProcessor('mailing', $params, $mail, $result, $action);
409 }
410 }
411
412 $store->markProcessed($key);
413 }
414 // CRM-7356 – used by IMAP only
415 $store->expunge();
416 }
417 }
418
419 /**
420 * @param \ezcMailMultipart $multipart
421 * @param int $recursionLevel
422 *
423 * @return array
424 */
425 protected static function getTextFromMultipart($multipart, $recursionLevel = 0) {
426 if ($recursionLevel >= self::MIME_MAX_RECURSION) {
427 return NULL;
428 }
429 $recursionLevel += 1;
430 $text = NULL;
431 if ($multipart instanceof ezcMailMultipartReport) {
432 $text = self::getTextFromMulipartReport($multipart, $recursionLevel);
433 }
434 elseif ($multipart instanceof ezcMailMultipartRelated) {
435 $text = self::getTextFromMultipartRelated($multipart, $recursionLevel);
436 }
437 else {
438 foreach ($multipart->getParts() as $part) {
439 if (isset($part->subType) and $part->subType === 'plain') {
440 $text = $part->text;
441 }
442 elseif ($part instanceof ezcMailMultipart) {
443 $text = self::getTextFromMultipart($part, $recursionLevel);
444 }
445 if ($text) {
446 break;
447 }
448 }
449 }
450 return $text;
451 }
452
453 /**
454 * @param \ezcMailMultipartRelated $related
455 * @param int $recursionLevel
456 *
457 * @return array
458 */
459 protected static function getTextFromMultipartRelated($related, $recursionLevel) {
460 $text = NULL;
461 foreach ($related->getRelatedParts() as $part) {
462 if (isset($part->subType) and $part->subType === 'plain') {
463 $text = $part->text;
464 }
465 elseif ($part instanceof ezcMailMultipart) {
466 $text = self::getTextFromMultipart($part, $recursionLevel);
467 }
468 if ($text) {
469 break;
470 }
471 }
472 return $text;
473 }
474
475 /**
476 * @param \ezcMailMultipartReport $multipart
477 * @param $recursionLevel
478 *
479 * @return array
480 */
481 protected static function getTextFromMulipartReport($multipart, $recursionLevel) {
482 $text = NULL;
483 $part = $multipart->getMachinePart();
484 if ($part instanceof ezcMailDeliveryStatus) {
485 foreach ($part->recipients as $rec) {
486 if (isset($rec["Diagnostic-Code"])) {
487 $text = $rec["Diagnostic-Code"];
488 break;
489 }
490 elseif (isset($rec["Description"])) {
491 $text = $rec["Description"];
492 break;
493 }
494 // no diagnostic info present - try getting the human readable part
495 elseif (isset($rec["Status"])) {
496 $text = $rec["Status"];
497 $textpart = $multipart->getReadablePart();
498 if ($textpart !== NULL and isset($textpart->text)) {
499 $text .= " " . $textpart->text;
500 }
501 else {
502 $text .= " Delivery failed but no diagnostic code or description.";
503 }
504 break;
505 }
506 }
507 }
508 elseif ($part !== NULL and isset($part->text)) {
509 $text = $part->text;
510 }
511 elseif (($part = $multipart->getReadablePart()) !== NULL) {
512 if (isset($part->text)) {
513 $text = $part->text;
514 }
515 elseif ($part instanceof ezcMailMultipart) {
516 $text = self::getTextFromMultipart($part, $recursionLevel);
517 }
518 }
519 return $text;
520 }
521
522 }