From 04560429f529529d3d885e9da531062c2563fdd9 Mon Sep 17 00:00:00 2001 From: Tim Otten Date: Fri, 3 Mar 2023 00:54:08 -0800 Subject: [PATCH] CRM_Utils_XML - Add method filterMarkupText --- CRM/Utils/XML.php | 104 ++++++++++++++++++++++++++++ tests/phpunit/CRM/Utils/XMLTest.php | 37 ++++++++++ 2 files changed, 141 insertions(+) create mode 100644 tests/phpunit/CRM/Utils/XMLTest.php diff --git a/CRM/Utils/XML.php b/CRM/Utils/XML.php index 12eb7a629e..e817786236 100644 --- a/CRM/Utils/XML.php +++ b/CRM/Utils/XML.php @@ -135,4 +135,108 @@ class CRM_Utils_XML { return $arr; } + /** + * Apply a filter to the textual parts of the markup. + * + * @param string $markup + * Ex: 'Hello world & universe' + * @param callable $filter + * Ex: 'mb_strtoupper' + * @return string + * Ex: 'HELLO WORLD & UNIVERSE' + */ + public static function filterMarkupText(string $markup, callable $filter): string { + $tokens = static::tokenizeMarkupText($markup); + foreach ($tokens as &$tokenRec) { + if ($tokenRec[0] === 'text') { + $tokenRec[1] = htmlentities($filter(html_entity_decode($tokenRec[1]))); + } + } + return implode('', array_column($tokens, 1)); + } + + /** + * Split marked-up text into markup and text. + * + * @param string $markup + * Ex: 'link' + * @return array + * Ex: [ + * ['node', ''], + * ['text', 'link'], + * ['node', ''], + * ] + */ + protected static function tokenizeMarkupText(string $markup): array { + $modes = []; /* text, node, (') quoted attr, (") quoted attr */ + $tokens = []; + $buf = ''; + + $startToken = function (string $type) use (&$modes) { + array_unshift($modes, $type); + }; + + $finishToken = function () use (&$tokens, &$buf, &$modes) { + $type = array_shift($modes); + if ($buf !== '') { + $tokens[] = [$type, $buf]; + $buf = ''; + } + }; + + $startToken('text'); + for ($i = 0; $i < mb_strlen($markup); $i++) { + $ch = $markup[$i]; + switch ($modes[0] . ' ' . $ch) { + // Aside: Our style guide makes this harder to read. It's better with 1-case-per-line. + case 'text <': + $finishToken(); + $startToken('node'); + $buf .= $ch; + break; + + case 'node >': + $buf .= $ch; + $finishToken(); + $startToken('text'); + break; + + case "node '": + $buf .= $ch; + array_unshift($modes, "attr'"); + break; + + case 'node "': + $buf .= $ch; + array_unshift($modes, 'attr"'); + break; + + case "attr' '": + $buf .= $ch; + array_shift($modes); + break; + + case 'attr" "': + $buf .= $ch; + array_shift($modes); + break; + + case "attr' \\": + $buf .= $markup[$i] . $markup[++$i]; + break; + + case 'attr" \\': + $buf .= $markup[$i] . $markup[++$i]; + break; + + default: + $buf .= $ch; + break; + } + } + $finishToken(); + + return $tokens; + } + } diff --git a/tests/phpunit/CRM/Utils/XMLTest.php b/tests/phpunit/CRM/Utils/XMLTest.php new file mode 100644 index 0000000000..2f0f250d83 --- /dev/null +++ b/tests/phpunit/CRM/Utils/XMLTest.php @@ -0,0 +1,37 @@ +useTransaction(); + parent::setUp(); + } + + public function testFilterMarkupTest(): void { + $examples = [ + ['', 'mb_strtoupper', ''], + ['Ok', 'mb_strtoupper', 'OK'], + ['Ok', 'mb_strtolower', 'ok'], + ['This & That', 'mb_strtoupper', 'THIS & THAT'], + ['This & That', 'mb_strtolower', 'this & that'], + ['OneTwoThree', 'mb_strtoupper', 'ONETWOTHREE'], + ['OneTwoThree', 'mb_strtolower', 'onetwothree'], + ['The Foo Bar', 'mb_strtoupper', 'THE FOO BAR'], + ['The Foo Bar', 'mb_strtolower', 'the foo bar'], + ['The Foo Bar', 'mb_strtoupper', 'THE FOO BAR'], + ]; + foreach ($examples as $example) { + [$input, $filter, $expect] = $example; + $actual = CRM_Utils_XML::filterMarkupText($input, $filter); + $this->assertEquals($expect, $actual, sprintf('Filter "%s" via "%s"', $input, $filter)); + } + } + +} -- 2.25.1