make filename non-english-friendly
authordemeritcowboy <demeritcowboy@hotmail.com>
Thu, 26 Aug 2021 00:07:54 +0000 (20:07 -0400)
committerdemeritcowboy <demeritcowboy@hotmail.com>
Thu, 26 Aug 2021 00:07:54 +0000 (20:07 -0400)
CRM/Contact/Form/Task/PDFLetterCommon.php
CRM/Utils/File.php
tests/phpunit/CRM/Utils/FileTest.php

index 3c6b8416d5017dfaa9960a7faeeb3ec903fbf8ae..9bfcfa7e7705c7063bb1826c0cf2c9a8ccd2f7b4 100644 (file)
@@ -230,10 +230,10 @@ class CRM_Contact_Form_Task_PDFLetterCommon extends CRM_Core_Form_Task_PDFLetter
    */
   private static function getFileName(CRM_Core_Form $form) {
     if (!empty($form->getSubmittedValue('pdf_file_name'))) {
-      $fileName = CRM_Utils_String::munge($form->getSubmittedValue('pdf_file_name'), '_', 200);
+      $fileName = CRM_Utils_File::makeFilenameWithUnicode($form->getSubmittedValue('pdf_file_name'), '_', 200);
     }
     elseif (!empty($form->getSubmittedValue('subject'))) {
-      $fileName = CRM_Utils_String::munge($form->getSubmittedValue('subject'), '_', 200);
+      $fileName = CRM_Utils_File::makeFilenameWithUnicode($form->getSubmittedValue('subject'), '_', 200);
     }
     else {
       $fileName = 'CiviLetter';
index 0b81f696063898bf579d4eb2962bfbbd62388bad..4fd3706bfa08e05e91b69546d0517a5db62aa13c 100644 (file)
@@ -459,6 +459,27 @@ class CRM_Utils_File {
     }
   }
 
+  /**
+   * CRM_Utils_String::munge() doesn't handle unicode and needs to be able
+   * to generate valid database tablenames so will sometimes generate a
+   * random string. Here what we want is a human-sensible filename that might
+   * contain unicode.
+   * Note that this does filter out emojis and such, but keeps characters that
+   * are considered alphanumeric in non-english languages.
+   *
+   * @param string $input
+   * @param string $replacementString Character or string to replace invalid characters with. Can be the empty string.
+   * @param int $cutoffLength Length to truncate the result after replacements.
+   * @return string
+   */
+  public static function makeFilenameWithUnicode(string $input, string $replacementString = '_', int $cutoffLength = 63): string {
+    $filename = preg_replace('/\W/u', $replacementString, $input);
+    if ($cutoffLength) {
+      return mb_substr($filename, 0, $cutoffLength);
+    }
+    return $filename;
+  }
+
   /**
    * Copies a file
    *
index 08b32b905e91d91f7032c065520272ff6860bd6c..94e26182284542c76b72000852384a9212006cf7 100644 (file)
@@ -161,4 +161,103 @@ class CRM_Utils_FileTest extends CiviUnitTestCase {
     unlink($file);
   }
 
+  /**
+   * dataprovider for testMakeFilenameWithUnicode
+   * @return array
+   */
+  public function makeFilenameWithUnicodeProvider(): array {
+    return [
+      // explicit indices to make it easier to see which one failed
+      0 => [
+        'string' => '',
+        'replacementCharacter' => NULL,
+        'cutoffLength' => NULL,
+        'expected' => '',
+      ],
+      1 => [
+        'string' => 'a',
+        'replacementCharacter' => NULL,
+        'cutoffLength' => NULL,
+        'expected' => 'a',
+      ],
+      2 => [
+        'string' => 'a b',
+        'replacementCharacter' => NULL,
+        'cutoffLength' => NULL,
+        'expected' => 'a_b',
+      ],
+      3 => [
+        'string' => 'a4b',
+        'replacementCharacter' => NULL,
+        'cutoffLength' => NULL,
+        'expected' => 'a4b',
+      ],
+      4 => [
+        'string' => '_a!@#$%^&*()[]+-=."\'{}<>?/\\|;:b',
+        'replacementCharacter' => NULL,
+        'cutoffLength' => NULL,
+        'expected' => '_a____________________________b',
+      ],
+      5 => [
+        'string' => '_a!@#$%^&*()[]+-=."\'{}<>?/\\|;:b',
+        'replacementCharacter' => '',
+        'cutoffLength' => NULL,
+        'expected' => '_ab',
+      ],
+      // emojis get replaced, but alphabetic letters in non-english are kept
+      6 => [
+        'string' => 'açbяc😀d',
+        'replacementCharacter' => NULL,
+        'cutoffLength' => NULL,
+        'expected' => 'açbяc_d',
+      ],
+      7 => [
+        'string' => 'çя😀',
+        'replacementCharacter' => NULL,
+        'cutoffLength' => NULL,
+        'expected' => 'çя_',
+      ],
+      // test default cutoff
+      8 => [
+        'string' => 'abcdefghijklmnopqrstuvwxyz0123456789012345678901234567890123456789',
+        'replacementCharacter' => NULL,
+        'cutoffLength' => NULL,
+        'expected' => 'abcdefghijklmnopqrstuvwxyz0123456789012345678901234567890123456',
+      ],
+      9 => [
+        'string' => 'abcdefghijklmnopqrstuvwxyz0123456789012345678901234567890123456789',
+        'replacementCharacter' => '_',
+        'cutoffLength' => 30,
+        'expected' => 'abcdefghijklmnopqrstuvwxyz0123',
+      ],
+      // test cutoff truncates multibyte properly
+      10 => [
+        'string' => 'ДДДДДДДДДДДДДДД',
+        'replacementCharacter' => '',
+        'cutoffLength' => 10,
+        'expected' => 'ДДДДДДДДДД',
+      ],
+    ];
+  }
+
+  /**
+   * test makeFilenameWithUnicode
+   * @dataProvider makeFilenameWithUnicodeProvider
+   * @param string $input
+   * @param ?string $replacementCharacter
+   * @param ?int $cutoffLength
+   * @param string $expected
+   */
+  public function testMakeFilenameWithUnicode(string $input, ?string $replacementCharacter, ?int $cutoffLength, string $expected) {
+    if (is_null($replacementCharacter) && is_null($cutoffLength)) {
+      $this->assertSame($expected, CRM_Utils_File::makeFilenameWithUnicode($input));
+    }
+    elseif (is_null($cutoffLength)) {
+      $this->assertSame($expected, CRM_Utils_File::makeFilenameWithUnicode($input, $replacementCharacter));
+    }
+    else {
+      $this->assertSame($expected, CRM_Utils_File::makeFilenameWithUnicode($input, $replacementCharacter, $cutoffLength));
+    }
+  }
+
 }