From 31c6c663033c0be6cc8dad83719102630f8ac59f Mon Sep 17 00:00:00 2001
From: demeritcowboy <demeritcowboy@hotmail.com>
Date: Wed, 25 Aug 2021 20:07:54 -0400
Subject: [PATCH] make filename non-english-friendly

---
 CRM/Contact/Form/Task/PDFLetterCommon.php |  4 +-
 CRM/Utils/File.php                        | 21 +++++
 tests/phpunit/CRM/Utils/FileTest.php      | 99 +++++++++++++++++++++++
 3 files changed, 122 insertions(+), 2 deletions(-)

diff --git a/CRM/Contact/Form/Task/PDFLetterCommon.php b/CRM/Contact/Form/Task/PDFLetterCommon.php
index 3c6b8416d5..9bfcfa7e77 100644
--- a/CRM/Contact/Form/Task/PDFLetterCommon.php
+++ b/CRM/Contact/Form/Task/PDFLetterCommon.php
@@ -230,10 +230,10 @@ class CRM_Contact_Form_Task_PDFLetterCommon extends CRM_Core_Form_Task_PDFLetter
    */
   private static function getFileName(CRM_Core_Form $form) {
     if (!empty($form->getSubmittedValue('pdf_file_name'))) {
-      $fileName = CRM_Utils_String::munge($form->getSubmittedValue('pdf_file_name'), '_', 200);
+      $fileName = CRM_Utils_File::makeFilenameWithUnicode($form->getSubmittedValue('pdf_file_name'), '_', 200);
     }
     elseif (!empty($form->getSubmittedValue('subject'))) {
-      $fileName = CRM_Utils_String::munge($form->getSubmittedValue('subject'), '_', 200);
+      $fileName = CRM_Utils_File::makeFilenameWithUnicode($form->getSubmittedValue('subject'), '_', 200);
     }
     else {
       $fileName = 'CiviLetter';
diff --git a/CRM/Utils/File.php b/CRM/Utils/File.php
index 0b81f69606..4fd3706bfa 100644
--- a/CRM/Utils/File.php
+++ b/CRM/Utils/File.php
@@ -459,6 +459,27 @@ class CRM_Utils_File {
     }
   }
 
+  /**
+   * CRM_Utils_String::munge() doesn't handle unicode and needs to be able
+   * to generate valid database tablenames so will sometimes generate a
+   * random string. Here what we want is a human-sensible filename that might
+   * contain unicode.
+   * Note that this does filter out emojis and such, but keeps characters that
+   * are considered alphanumeric in non-english languages.
+   *
+   * @param string $input
+   * @param string $replacementString Character or string to replace invalid characters with. Can be the empty string.
+   * @param int $cutoffLength Length to truncate the result after replacements.
+   * @return string
+   */
+  public static function makeFilenameWithUnicode(string $input, string $replacementString = '_', int $cutoffLength = 63): string {
+    $filename = preg_replace('/\W/u', $replacementString, $input);
+    if ($cutoffLength) {
+      return mb_substr($filename, 0, $cutoffLength);
+    }
+    return $filename;
+  }
+
   /**
    * Copies a file
    *
diff --git a/tests/phpunit/CRM/Utils/FileTest.php b/tests/phpunit/CRM/Utils/FileTest.php
index 08b32b905e..94e2618228 100644
--- a/tests/phpunit/CRM/Utils/FileTest.php
+++ b/tests/phpunit/CRM/Utils/FileTest.php
@@ -161,4 +161,103 @@ class CRM_Utils_FileTest extends CiviUnitTestCase {
     unlink($file);
   }
 
+  /**
+   * dataprovider for testMakeFilenameWithUnicode
+   * @return array
+   */
+  public function makeFilenameWithUnicodeProvider(): array {
+    return [
+      // explicit indices to make it easier to see which one failed
+      0 => [
+        'string' => '',
+        'replacementCharacter' => NULL,
+        'cutoffLength' => NULL,
+        'expected' => '',
+      ],
+      1 => [
+        'string' => 'a',
+        'replacementCharacter' => NULL,
+        'cutoffLength' => NULL,
+        'expected' => 'a',
+      ],
+      2 => [
+        'string' => 'a b',
+        'replacementCharacter' => NULL,
+        'cutoffLength' => NULL,
+        'expected' => 'a_b',
+      ],
+      3 => [
+        'string' => 'a4b',
+        'replacementCharacter' => NULL,
+        'cutoffLength' => NULL,
+        'expected' => 'a4b',
+      ],
+      4 => [
+        'string' => '_a!@#$%^&*()[]+-=."\'{}<>?/\\|;:b',
+        'replacementCharacter' => NULL,
+        'cutoffLength' => NULL,
+        'expected' => '_a____________________________b',
+      ],
+      5 => [
+        'string' => '_a!@#$%^&*()[]+-=."\'{}<>?/\\|;:b',
+        'replacementCharacter' => '',
+        'cutoffLength' => NULL,
+        'expected' => '_ab',
+      ],
+      // emojis get replaced, but alphabetic letters in non-english are kept
+      6 => [
+        'string' => 'aÃ§bÑcðd',
+        'replacementCharacter' => NULL,
+        'cutoffLength' => NULL,
+        'expected' => 'aÃ§bÑc_d',
+      ],
+      7 => [
+        'string' => 'Ã§Ñð',
+        'replacementCharacter' => NULL,
+        'cutoffLength' => NULL,
+        'expected' => 'Ã§Ñ_',
+      ],
+      // test default cutoff
+      8 => [
+        'string' => 'abcdefghijklmnopqrstuvwxyz0123456789012345678901234567890123456789',
+        'replacementCharacter' => NULL,
+        'cutoffLength' => NULL,
+        'expected' => 'abcdefghijklmnopqrstuvwxyz0123456789012345678901234567890123456',
+      ],
+      9 => [
+        'string' => 'abcdefghijklmnopqrstuvwxyz0123456789012345678901234567890123456789',
+        'replacementCharacter' => '_',
+        'cutoffLength' => 30,
+        'expected' => 'abcdefghijklmnopqrstuvwxyz0123',
+      ],
+      // test cutoff truncates multibyte properly
+      10 => [
+        'string' => 'ÐÐÐÐÐÐÐÐÐÐÐÐÐÐÐ',
+        'replacementCharacter' => '',
+        'cutoffLength' => 10,
+        'expected' => 'ÐÐÐÐÐÐÐÐÐÐ',
+      ],
+    ];
+  }
+
+  /**
+   * test makeFilenameWithUnicode
+   * @dataProvider makeFilenameWithUnicodeProvider
+   * @param string $input
+   * @param ?string $replacementCharacter
+   * @param ?int $cutoffLength
+   * @param string $expected
+   */
+  public function testMakeFilenameWithUnicode(string $input, ?string $replacementCharacter, ?int $cutoffLength, string $expected) {
+    if (is_null($replacementCharacter) && is_null($cutoffLength)) {
+      $this->assertSame($expected, CRM_Utils_File::makeFilenameWithUnicode($input));
+    }
+    elseif (is_null($cutoffLength)) {
+      $this->assertSame($expected, CRM_Utils_File::makeFilenameWithUnicode($input, $replacementCharacter));
+    }
+    else {
+      $this->assertSame($expected, CRM_Utils_File::makeFilenameWithUnicode($input, $replacementCharacter, $cutoffLength));
+    }
+  }
+
 }
-- 
2.25.1