From 5bfe3f8e5b8889bc8846b46bc8e5a2954719ce76 Mon Sep 17 00:00:00 2001 From: larssandergreen Date: Sat, 30 Sep 2023 22:21:07 -0600 Subject: [PATCH] Switch from html2text to soundasleep/html2text --- CRM/Utils/String.php | 3 +- composer.json | 5 +- composer.lock | 98 +++++++++++-------- .../ClickTracker/TextClickTrackerTest.php | 13 +-- .../CRM/Mailing/BaseMailingSystemTest.php | 30 ++---- tests/phpunit/CRM/Utils/HtmlToTextTest.php | 17 +--- .../CRM/Utils/TokenConsistencyTest.php | 11 +-- .../phpunit/Civi/Token/TokenProcessorTest.php | 2 +- 8 files changed, 78 insertions(+), 101 deletions(-) diff --git a/CRM/Utils/String.php b/CRM/Utils/String.php index 920edf7a84..12bb49df99 100644 --- a/CRM/Utils/String.php +++ b/CRM/Utils/String.php @@ -445,8 +445,7 @@ class CRM_Utils_String { */ public static function htmlToText($html) { $token_html = preg_replace('!\{([a-z_.]+)\}!i', 'token:{$1}', $html); - $converter = new \Html2Text\Html2Text($token_html, ['do_links' => 'table', 'width' => 75]); - $token_text = $converter->getText(); + $token_text = \Soundasleep\Html2Text::convert($token_html, ['ignore_errors' => TRUE]); $text = preg_replace('!token\:\{([a-z_.]+)\}!i', '{$1}', $token_text); return $text; } diff --git a/composer.json b/composer.json index 3e3eb89829..3181dfe49f 100644 --- a/composer.json +++ b/composer.json @@ -102,7 +102,7 @@ "symfony/polyfill-php80": "^1.0", "symfony/polyfill-php81": "^1.0", "symfony/polyfill-php82": "^1.0", - "html2text/html2text": "^4.3.1", + "soundasleep/html2text": "^2.1", "psr/container": "~1.0 || ~2.0", "ext-fileinfo": "*" }, @@ -275,9 +275,6 @@ "Update gitignore to ensure that sites that manage via git don't miss out on the important db.json file": "https://patch-diff.githubusercontent.com/raw/adrienrn/php-mimetyper/pull/15.patch", "Apply patch to fix php8.2 deprecation notice on dynamic property $filename": "https://patch-diff.githubusercontent.com/raw/adrienrn/php-mimetyper/pull/17.patch" }, - "html2text/html2text": { - "Fix deprecation warning in php8.1 on html_entity_decode": "https://raw.githubusercontent.com/civicrm/civicrm-core/e758d20e9f613ca6c4cf652c23d2cd7e5d3af3ce/tools/scripts/composer/html2text_html2_text_php81_deprecation.patch" - }, "pear/db": { "Apply patch to ensure that MySQLI reporting remains the same in php8.1": "https://patch-diff.githubusercontent.com/raw/pear/DB/pull/13.patch", "Apply patch to fix deprecations in php8.2": "https://patch-diff.githubusercontent.com/raw/pear/DB/pull/14.patch", diff --git a/composer.lock b/composer.lock index b4288d6318..072c5ee289 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "377176179275d0aa4262d031e5bc4559", + "content-hash": "d112a69d39ea11b6ad870811b2960200", "packages": [ { "name": "adrienrn/php-mimetyper", @@ -966,47 +966,6 @@ ], "time": "2023-04-17T16:00:37+00:00" }, - { - "name": "html2text/html2text", - "version": "4.3.1", - "source": { - "type": "git", - "url": "https://github.com/mtibben/html2text.git", - "reference": "61ad68e934066a6f8df29a3d23a6460536d0855c" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/mtibben/html2text/zipball/61ad68e934066a6f8df29a3d23a6460536d0855c", - "reference": "61ad68e934066a6f8df29a3d23a6460536d0855c", - "shasum": "" - }, - "require-dev": { - "phpunit/phpunit": "~4" - }, - "suggest": { - "ext-mbstring": "For best performance", - "symfony/polyfill-mbstring": "If you can't install ext-mbstring" - }, - "type": "library", - "autoload": { - "psr-4": { - "Html2Text\\": [ - "src/", - "test/" - ] - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "GPL-2.0-or-later" - ], - "description": "Converts HTML to formatted plain text", - "support": { - "issues": "https://github.com/mtibben/html2text/issues", - "source": "https://github.com/mtibben/html2text/tree/4.3.1" - }, - "time": "2020-04-16T23:44:31+00:00" - }, { "name": "laminas/laminas-escaper", "version": "2.6.1", @@ -3414,6 +3373,61 @@ }, "time": "2022-05-16T07:22:18+00:00" }, + { + "name": "soundasleep/html2text", + "version": "2.1.0", + "source": { + "type": "git", + "url": "https://github.com/soundasleep/html2text.git", + "reference": "83502b6f8f1aaef8e2e238897199d64f284b4af3" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/soundasleep/html2text/zipball/83502b6f8f1aaef8e2e238897199d64f284b4af3", + "reference": "83502b6f8f1aaef8e2e238897199d64f284b4af3", + "shasum": "" + }, + "require": { + "ext-dom": "*", + "ext-libxml": "*", + "php": "^7.3|^8.0" + }, + "require-dev": { + "phpstan/phpstan": "^1.9", + "phpunit/phpunit": "^7.0|^8.0|^9.0" + }, + "type": "library", + "autoload": { + "psr-4": { + "Soundasleep\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Jevon Wright", + "homepage": "https://jevon.org", + "role": "Developer" + } + ], + "description": "A PHP script to convert HTML into a plain text format", + "homepage": "https://github.com/soundasleep/html2text", + "keywords": [ + "email", + "html", + "php", + "text" + ], + "support": { + "email": "support@jevon.org", + "issues": "https://github.com/soundasleep/html2text/issues", + "source": "https://github.com/soundasleep/html2text/tree/2.1.0" + }, + "time": "2023-01-06T09:28:15+00:00" + }, { "name": "symfony/config", "version": "v4.4.42", diff --git a/ext/flexmailer/tests/phpunit/Civi/FlexMailer/ClickTracker/TextClickTrackerTest.php b/ext/flexmailer/tests/phpunit/Civi/FlexMailer/ClickTracker/TextClickTrackerTest.php index 14da23dd78..fac91ae073 100644 --- a/ext/flexmailer/tests/phpunit/Civi/FlexMailer/ClickTracker/TextClickTrackerTest.php +++ b/ext/flexmailer/tests/phpunit/Civi/FlexMailer/ClickTracker/TextClickTrackerTest.php @@ -45,19 +45,14 @@ class TextClickTrackerTest extends \CiviUnitTestCase { '

Foo

', ]; $exs[] = [ - // Messy looking URL, designed to trip-up quote handling + // Messy looking URL, designed to trip-up quote handling, no tracking as no http '

Foo

', - '

Foo

', + '

Foo

', ]; $exs[] = [ - // Messy looking URL, designed to trip-up quote handling + // Messy looking URL, designed to trip-up quote handling, no tracking as no http '

Foo

', - '

Foo

', - ]; - $exs[] = [ - // Messy looking URL, funny whitespace - '

Foo

', - '

Foo

', + '

Foo

', ]; $exs[] = [ // Messy looking URL, funny whitespace diff --git a/tests/phpunit/CRM/Mailing/BaseMailingSystemTest.php b/tests/phpunit/CRM/Mailing/BaseMailingSystemTest.php index 4844ebcd98..ec0b581f2d 100644 --- a/tests/phpunit/CRM/Mailing/BaseMailingSystemTest.php +++ b/tests/phpunit/CRM/Mailing/BaseMailingSystemTest.php @@ -163,14 +163,7 @@ abstract class CRM_Mailing_BaseMailingSystemTest extends CiviUnitTestCase { // Default header "Sample Header for TEXT formatted content.\n" . // body_html, filtered - "You can go to Google \\[1\\] or opt out \\[2\\]\\.\n" . - "\n" . - "\n" . - "Links:\n" . - "------\n" . - "\\[1\\] http://example.net/first\\?cs=[0-9a-f_]+\n" . - "\\[2\\] http.*civicrm/mailing/optout.*\n" . - "\n" . + "You can go to \\[Google\\]\\(http://example.net/first\?cs=[0-9a-f_]+\\) or \\[opt out\\]\\(http.*civicrm/mailing/optout.*\\)\\.\n" . // Default footer "to unsubscribe: http.*civicrm/mailing/optout" . ";", @@ -217,14 +210,7 @@ abstract class CRM_Mailing_BaseMailingSystemTest extends CiviUnitTestCase { $this->assertMatchesRegularExpression( ";" . // body_html, filtered - "You can go to Google \\[1\\] or opt out \\[2\\]\\.\n" . - "\n" . - "\n" . - "Links:\n" . - "------\n" . - "\\[1\\] http.*(extern/url.php|civicrm/mailing/url)(\?|&)u=\d+&qid=\d+\n" . - "\\[2\\] http.*civicrm/mailing/optout.*\n" . - "\n" . + "You can go to \\[Google\\]\\(http.*(extern/url.php|civicrm/mailing/url)(\?|&)u=\d+&qid=\d+\\) or \\[opt out\\]\\(http.*civicrm/mailing/optout.*\\)\\.\n" . // Default footer "to unsubscribe: http.*civicrm/mailing/optout" . ";", @@ -249,20 +235,20 @@ abstract class CRM_Mailing_BaseMailingSystemTest extends CiviUnitTestCase { $cases[0] = [ '

Foo

', ';

Foo

;', - ';\\[1\\] http://example\.net/;', + ';\\(http://example\.net/\\);', ['url_tracking' => 0], ]; $cases[1] = [ '

Foo

', // FIXME: Legacy tracker adds extra quote after URL ';

Foo

;', - ';\\[1\\] http://example\.net/\?id=\d+;', + ';\\(http://example\.net/\?id=\d+\\);', ['url_tracking' => 0], ]; $cases[2] = [ '

Foo

', ';

Foo

;', - ';\\[1\\] http.*civicrm/mailing/optout.*;', + ';\\(http.*civicrm/mailing/optout.*\\);', ['url_tracking' => 0], ]; $cases[3] = [ @@ -284,13 +270,13 @@ abstract class CRM_Mailing_BaseMailingSystemTest extends CiviUnitTestCase { $cases[5] = [ '

Foo

', ';

Foo

;', - ';\\[1\\] .*(extern/url.php|civicrm/mailing/url)[\?&]u=\d+.*;', + ';\\(.*(extern/url.php|civicrm/mailing/url)[\?&]u=\d+.*\\);', ['url_tracking' => 1], ]; $cases['url_trackin_enabled'] = [ '

Foo

', ';

Foo

;', - ';\\[1\\] .*(extern/url.php|civicrm/mailing/url)[\?&]u=\d+.*&id=\d+.*;', + ';\\(.*(extern/url.php|civicrm/mailing/url)[\?&]u=\d+.*&id=\d+.*\\);', ['url_tracking' => 1], ]; @@ -298,7 +284,7 @@ abstract class CRM_Mailing_BaseMailingSystemTest extends CiviUnitTestCase { // It would be redundant/slow to track the action URLs? '

Foo

', ';

Foo

;', - ';\\[1\\] http.*civicrm/mailing/optout.*;', + ';\\(http.*civicrm/mailing/optout.*\\);', ['url_tracking' => 1], ]; $cases[8] = [ diff --git a/tests/phpunit/CRM/Utils/HtmlToTextTest.php b/tests/phpunit/CRM/Utils/HtmlToTextTest.php index 817f49a721..dcf2f012df 100644 --- a/tests/phpunit/CRM/Utils/HtmlToTextTest.php +++ b/tests/phpunit/CRM/Utils/HtmlToTextTest.php @@ -25,27 +25,18 @@ class CRM_Utils_HtmlToTextTest extends CiviUnitTestCase { $cases[] = [ "\n

\n" . - "This is a paragraph with Bold and italics\n" . + "This is a paragraph with Bold and italics.\n" . "Also some hrefs and a\n" . "few mailto tags.\n" . "This is also a really long long line\n" . "\n", - "This is a paragraph with BOLD and _italics_ Also some hrefs [1] and a few\n" . - "mailto tags. This is also a really long long line\n" . - "\n" . - "Links:\n" . - "------\n" . - "[1] http://www.example.com\n" . - "", + "This is a paragraph with Bold and italics. Also some [hrefs](http://www.example.com)" . + " and a few mailto tags. This is also a really long long line", ]; $cases[] = [ "

\nA token\nis not treated as a relative URL", - "A token [1] is not treated as a relative URL\n" . - "\n" . - "Links:\n" . - "------\n" . - "[1] {action.do_something}\n", + "A [token]({action.do_something}) is not treated as a relative URL", ]; return $cases; diff --git a/tests/phpunit/CRM/Utils/TokenConsistencyTest.php b/tests/phpunit/CRM/Utils/TokenConsistencyTest.php index 7daf6f8b60..6f1716050c 100644 --- a/tests/phpunit/CRM/Utils/TokenConsistencyTest.php +++ b/tests/phpunit/CRM/Utils/TokenConsistencyTest.php @@ -1122,7 +1122,7 @@ United States', $tokenProcessor->getRow(0)->render('message')); ]); $context['eventId'] = $this->eventCreateUnpaid([ 'title' => 'The Webinar', - 'description' => '

Some online webinar thingy.

Attendees will need to install the TeleFoo app.

', + 'description' => '

Some online webinar thingy.

Attendees will need to install the TeleFoo app.

', ])['id']; $messages = $expected = []; @@ -1138,15 +1138,10 @@ United States', $tokenProcessor->getRow(0)->render('message')); $messages['event_text'] = 'You signed up for this event: {event.title}: {event.description}'; $expected['event_text'] = 'You signed up for this event: The Webinar: Some online webinar thingy. -Attendees will need to install the TeleFoo [1] app. - - -Links: ------- -[1] http://telefoo.example.com'; +Attendees will need to install the [TeleFoo](http://telefoo.example.com) app.'; $messages['event_html'] = '

You signed up for this event:

{event.title}

{event.description}'; - $expected['event_html'] = '

You signed up for this event:

The Webinar

Some online webinar thingy.

Attendees will need to install the TeleFoo app.

'; + $expected['event_html'] = '

You signed up for this event:

The Webinar

Some online webinar thingy.

Attendees will need to install the TeleFoo app.

'; $rendered = CRM_Core_TokenSmarty::render($messages, $context); diff --git a/tests/phpunit/Civi/Token/TokenProcessorTest.php b/tests/phpunit/Civi/Token/TokenProcessorTest.php index c2048cc22c..b741e7ef18 100644 --- a/tests/phpunit/Civi/Token/TokenProcessorTest.php +++ b/tests/phpunit/Civi/Token/TokenProcessorTest.php @@ -524,7 +524,7 @@ class TokenProcessorTest extends \CiviUnitTestCase { $testCases['TextMessages with HtmlData'] = [ 'text/plain', [ - 'This is {my_rich_text.and_such}...' => 'This is TESTING & SUCH...', + 'This is {my_rich_text.and_such}...' => 'This is testing & such...', 'This is {my_rich_text.and_such|lower}...' => 'This is testing & such...', 'This is {my_rich_text.and_such|upper}!' => 'This is TESTING & SUCH!', ], -- 2.25.1