From 31c6c663033c0be6cc8dad83719102630f8ac59f Mon Sep 17 00:00:00 2001 From: demeritcowboy Date: Wed, 25 Aug 2021 20:07:54 -0400 Subject: [PATCH] make filename non-english-friendly --- CRM/Contact/Form/Task/PDFLetterCommon.php | 4 +- CRM/Utils/File.php | 21 +++++ tests/phpunit/CRM/Utils/FileTest.php | 99 +++++++++++++++++++++++ 3 files changed, 122 insertions(+), 2 deletions(-) diff --git a/CRM/Contact/Form/Task/PDFLetterCommon.php b/CRM/Contact/Form/Task/PDFLetterCommon.php index 3c6b8416d5..9bfcfa7e77 100644 --- a/CRM/Contact/Form/Task/PDFLetterCommon.php +++ b/CRM/Contact/Form/Task/PDFLetterCommon.php @@ -230,10 +230,10 @@ class CRM_Contact_Form_Task_PDFLetterCommon extends CRM_Core_Form_Task_PDFLetter */ private static function getFileName(CRM_Core_Form $form) { if (!empty($form->getSubmittedValue('pdf_file_name'))) { - $fileName = CRM_Utils_String::munge($form->getSubmittedValue('pdf_file_name'), '_', 200); + $fileName = CRM_Utils_File::makeFilenameWithUnicode($form->getSubmittedValue('pdf_file_name'), '_', 200); } elseif (!empty($form->getSubmittedValue('subject'))) { - $fileName = CRM_Utils_String::munge($form->getSubmittedValue('subject'), '_', 200); + $fileName = CRM_Utils_File::makeFilenameWithUnicode($form->getSubmittedValue('subject'), '_', 200); } else { $fileName = 'CiviLetter'; diff --git a/CRM/Utils/File.php b/CRM/Utils/File.php index 0b81f69606..4fd3706bfa 100644 --- a/CRM/Utils/File.php +++ b/CRM/Utils/File.php @@ -459,6 +459,27 @@ class CRM_Utils_File { } } + /** + * CRM_Utils_String::munge() doesn't handle unicode and needs to be able + * to generate valid database tablenames so will sometimes generate a + * random string. Here what we want is a human-sensible filename that might + * contain unicode. + * Note that this does filter out emojis and such, but keeps characters that + * are considered alphanumeric in non-english languages. + * + * @param string $input + * @param string $replacementString Character or string to replace invalid characters with. Can be the empty string. + * @param int $cutoffLength Length to truncate the result after replacements. + * @return string + */ + public static function makeFilenameWithUnicode(string $input, string $replacementString = '_', int $cutoffLength = 63): string { + $filename = preg_replace('/\W/u', $replacementString, $input); + if ($cutoffLength) { + return mb_substr($filename, 0, $cutoffLength); + } + return $filename; + } + /** * Copies a file * diff --git a/tests/phpunit/CRM/Utils/FileTest.php b/tests/phpunit/CRM/Utils/FileTest.php index 08b32b905e..94e2618228 100644 --- a/tests/phpunit/CRM/Utils/FileTest.php +++ b/tests/phpunit/CRM/Utils/FileTest.php @@ -161,4 +161,103 @@ class CRM_Utils_FileTest extends CiviUnitTestCase { unlink($file); } + /** + * dataprovider for testMakeFilenameWithUnicode + * @return array + */ + public function makeFilenameWithUnicodeProvider(): array { + return [ + // explicit indices to make it easier to see which one failed + 0 => [ + 'string' => '', + 'replacementCharacter' => NULL, + 'cutoffLength' => NULL, + 'expected' => '', + ], + 1 => [ + 'string' => 'a', + 'replacementCharacter' => NULL, + 'cutoffLength' => NULL, + 'expected' => 'a', + ], + 2 => [ + 'string' => 'a b', + 'replacementCharacter' => NULL, + 'cutoffLength' => NULL, + 'expected' => 'a_b', + ], + 3 => [ + 'string' => 'a4b', + 'replacementCharacter' => NULL, + 'cutoffLength' => NULL, + 'expected' => 'a4b', + ], + 4 => [ + 'string' => '_a!@#$%^&*()[]+-=."\'{}<>?/\\|;:b', + 'replacementCharacter' => NULL, + 'cutoffLength' => NULL, + 'expected' => '_a____________________________b', + ], + 5 => [ + 'string' => '_a!@#$%^&*()[]+-=."\'{}<>?/\\|;:b', + 'replacementCharacter' => '', + 'cutoffLength' => NULL, + 'expected' => '_ab', + ], + // emojis get replaced, but alphabetic letters in non-english are kept + 6 => [ + 'string' => 'açbяc😀d', + 'replacementCharacter' => NULL, + 'cutoffLength' => NULL, + 'expected' => 'açbяc_d', + ], + 7 => [ + 'string' => 'çя😀', + 'replacementCharacter' => NULL, + 'cutoffLength' => NULL, + 'expected' => 'çя_', + ], + // test default cutoff + 8 => [ + 'string' => 'abcdefghijklmnopqrstuvwxyz0123456789012345678901234567890123456789', + 'replacementCharacter' => NULL, + 'cutoffLength' => NULL, + 'expected' => 'abcdefghijklmnopqrstuvwxyz0123456789012345678901234567890123456', + ], + 9 => [ + 'string' => 'abcdefghijklmnopqrstuvwxyz0123456789012345678901234567890123456789', + 'replacementCharacter' => '_', + 'cutoffLength' => 30, + 'expected' => 'abcdefghijklmnopqrstuvwxyz0123', + ], + // test cutoff truncates multibyte properly + 10 => [ + 'string' => 'ДДДДДДДДДДДДДДД', + 'replacementCharacter' => '', + 'cutoffLength' => 10, + 'expected' => 'ДДДДДДДДДД', + ], + ]; + } + + /** + * test makeFilenameWithUnicode + * @dataProvider makeFilenameWithUnicodeProvider + * @param string $input + * @param ?string $replacementCharacter + * @param ?int $cutoffLength + * @param string $expected + */ + public function testMakeFilenameWithUnicode(string $input, ?string $replacementCharacter, ?int $cutoffLength, string $expected) { + if (is_null($replacementCharacter) && is_null($cutoffLength)) { + $this->assertSame($expected, CRM_Utils_File::makeFilenameWithUnicode($input)); + } + elseif (is_null($cutoffLength)) { + $this->assertSame($expected, CRM_Utils_File::makeFilenameWithUnicode($input, $replacementCharacter)); + } + else { + $this->assertSame($expected, CRM_Utils_File::makeFilenameWithUnicode($input, $replacementCharacter, $cutoffLength)); + } + } + } -- 2.25.1