From ad7fdc349a3d6313d3d1b69850b26b6fde2dab4c Mon Sep 17 00:00:00 2001
From: "deb.monish" <monish.deb@webaccessglobal.com>
Date: Tue, 14 Jun 2016 17:18:06 +0530
Subject: [PATCH] implement document merge using TbsZip

---
 CRM/Contact/Form/Task/PDFLetterCommon.php |  30 ++++--
 CRM/Utils/PDF/Document.php                | 122 +++++++++++++++-------
 2 files changed, 104 insertions(+), 48 deletions(-)

diff --git a/CRM/Contact/Form/Task/PDFLetterCommon.php b/CRM/Contact/Form/Task/PDFLetterCommon.php
index bafc8b8902..e10478bfba 100644
--- a/CRM/Contact/Form/Task/PDFLetterCommon.php
+++ b/CRM/Contact/Form/Task/PDFLetterCommon.php
@@ -283,10 +283,6 @@ class CRM_Contact_Form_Task_PDFLetterCommon {
         CRM_Core_BAO_MessageTemplate::add($messageTemplate);
       }
     }
-    // extract the content of uploaded document file
-    elseif (!empty($formValues['document_file'])) {
-      list($html_message, $formValues['document_type']) = CRM_Utils_PDF_Document::docReader($formValues['document_file']['name'], $formValues['document_file']['type'], TRUE);
-    }
     elseif (CRM_Utils_Array::value('template', $formValues) > 0) {
       if (!empty($formValues['bind_format']) && $formValues['format_id']) {
         $query = "UPDATE civicrm_msg_template SET pdf_format_id = {$formValues['format_id']} WHERE id = {$formValues['template']}";
@@ -299,8 +295,15 @@ class CRM_Contact_Form_Task_PDFLetterCommon {
       $documentInfo = CRM_Core_BAO_File::getEntityFile('civicrm_msg_template', $formValues['template']);
       foreach ((array) $documentInfo as $info) {
         list($html_message, $formValues['document_type']) = CRM_Utils_PDF_Document::docReader($info['fullPath'], $info['mime_type'], TRUE);
+        $formValues['document_file_path'] = $info['fullPath'];
       }
     }
+    // extract the content of uploaded document file
+    elseif (!empty($formValues['document_file'])) {
+      list($html_message, $formValues['document_type']) = CRM_Utils_PDF_Document::docReader($formValues['document_file']['name'], $formValues['document_file']['type'], TRUE);
+      $formValues['document_file_path'] = $formValues['document_file']['name'];
+    }
+
     if (!empty($formValues['update_format'])) {
       $bao = new CRM_Core_BAO_PdfFormat();
       $bao->savePdfFormat($formValues, $formValues['format_id']);
@@ -338,7 +341,16 @@ class CRM_Contact_Form_Task_PDFLetterCommon {
     $buttonName = $form->controller->getButtonName();
     $skipOnHold = isset($form->skipOnHold) ? $form->skipOnHold : FALSE;
     $skipDeceased = isset($form->skipDeceased) ? $form->skipDeceased : TRUE;
-    $html = array();
+    $html = $document = array();
+
+    // CRM-16725 Skip creation of activities if user is previewing their PDF letter(s)
+    if ($buttonName == '_qf_PDF_submit') {
+      self::createActivities($form, $html_message, $form->_contactIds);
+    }
+
+    if (!empty($formValues['document_file_path'])) {
+      $html_message = CRM_Utils_PDF_Document::doc2Text($formValues['document_file_path'], $formValues['document_type']);
+    }
 
     foreach ($form->_contactIds as $item => $contactId) {
       $params = array('contact_id' => $contactId);
@@ -373,16 +385,14 @@ class CRM_Contact_Form_Task_PDFLetterCommon {
       $html[] = $tokenHtml;
     }
 
-    // CRM-16725 Skip creation of activities if user is previewing their PDF letter(s)
-    if ($buttonName == '_qf_PDF_submit') {
-      self::createActivities($form, $html_message, $form->_contactIds);
-    }
-
     $type = $formValues['document_type'];
 
     if ($type == 'pdf') {
       CRM_Utils_PDF_Utils::html2pdf($html, "CiviLetter.pdf", FALSE, $formValues);
     }
+    elseif (!empty($formValues['document_file_path'])) {
+      CRM_Utils_PDF_Document::printDocuments($formValues['document_file_path'], $html, $type);
+    }
     else {
       CRM_Utils_PDF_Document::html2doc($html, "CiviLetter.$type", $formValues);
     }
diff --git a/CRM/Utils/PDF/Document.php b/CRM/Utils/PDF/Document.php
index ba7c77370d..78d48126b4 100644
--- a/CRM/Utils/PDF/Document.php
+++ b/CRM/Utils/PDF/Document.php
@@ -30,6 +30,9 @@
  * @package CRM
  * @copyright CiviCRM LLC (c) 2004-2015
  */
+
+require_once 'TbsZip/tbszip.php';
+
 class CRM_Utils_PDF_Document {
 
   /**
@@ -70,6 +73,16 @@ class CRM_Utils_PDF_Document {
       $section = $phpWord->addSection($pageStyle + array('breakType' => 'nextPage'));
       \PhpOffice\PhpWord\Shared\Html::addHtml($section, $html);
     }
+
+    self::printDoc($phpWord, $ext, $fileName);
+  }
+
+  /**
+   * @param object|string $phpWord
+   * @param string $ext
+   * @param string $fileName
+   */
+  public static function printDoc($phpWord, $ext, $fileName) {
     $formats = array(
       'docx' => 'Word2007',
       'odt' => 'ODText',
@@ -78,9 +91,12 @@ class CRM_Utils_PDF_Document {
       'pdf' => 'PDF',
     );
 
+    if (realpath($phpWord)) {
+      $phpWord = \PhpOffice\PhpWord\IOFactory::load($phpWord, $formats[$ext]);
+    }
+
     $objWriter = \PhpOffice\PhpWord\IOFactory::createWriter($phpWord, $formats[$ext]);
 
-    // TODO: Split document generation and output into separate functions
     CRM_Utils_System::setHttpHeader('Content-Type', "application/$ext");
     CRM_Utils_System::setHttpHeader('Content-Disposition', 'attachment; filename="' . $fileName . '"');
     $objWriter->save("php://output");
@@ -122,7 +138,7 @@ class CRM_Utils_PDF_Document {
     $phpWord = \PhpOffice\PhpWord\IOFactory::load($path, $fileType);
     $phpWord->save($absPath, 'HTML');
 
-    // return the html content for tokenreplacment and eventually dused for document download
+    // return the html content for tokenreplacment and eventually used for document download
     if ($returnContent) {
       $filename = fopen($absPath, 'r');
       $content = fread($filename, filesize($absPath));
@@ -135,56 +151,86 @@ class CRM_Utils_PDF_Document {
 
   /**
    * Extract content of docx/odt file as text and later used for token replacement
-   * @param string $filePath Document file path
-   * @param string $type File type of document
+   * @param string $filePath  Document file path
+   * @param string $docType  File type of document
+   * @param bool $returnZipObj  Return clsTbsZip object along with content?
    *
-   * @return string
-   *   File content of document as text
+   * @return string|array
+   *   File content of document as text or array of content and clsTbsZip object
    */
-  public static function doc2Text($filePath, $type) {
-    $content = '';
-    $docType = array_search($type, CRM_Core_SelectValues::documentApplicationType());
+  public static function doc2Text($filePath, $docType, $returnZipObj = FALSE) {
+    $dataFile = ($docType == 'docx') ? 'word/document.xml' : 'content.xml';
 
-    // for reference on document entry type check http://phpword.readthedocs.io/en/latest/writersreaders.html
-    $dataFiles = array(
-      'odt' => array(
-        'content.xml',
-        'styles.xml',
-        'Pictures/',
-      ),
+    $zip = new clsTbsZip();
+    $zip->Open($filePath);
+    $content = $zip->FileRead($dataFile);
+
+    if ($returnZipObj) {
+      return array($content, $zip);
+    }
+
+    return $content;
+  }
+
+  /**
+   * Modify contents of docx/odt file(s) and later merged into one final document
+   *
+   * @param string $filePath Document file path
+   * @param array $contents content of formatted/token-replaced document
+   * @param string $docType Document type e.g. odt/docx
+   */
+  public static function printDocuments($filePath, $contents, $docType) {
+    $ooxmlMap = array(
       'docx' => array(
-        'word/document.xml',
-        'word/styles.xml',
-        'docProps/custom.xml',
-        'word/numbering.xml',
-        'word/settings.xml',
-        'word/webSettings.xml',
-        'word/fontTable.xml',
-        'word/theme/theme1.xml',
+        'dataFile' => 'word/document.xml',
+        'startTag' => '<w:body>',
+        // TODO need to provide proper ooxml tag for pagebreak
+        'pageBreak' => '<w:pgMar></w:pgMar>',
+        'endTag' => '</w:body></w:document>',
+      ),
+      'odt' => array(
+        'dataFile' => 'content.xml',
+        'startTag' => '<office:body>',
+        'pageBreak' => '<text:p text:style-name="Standard"></text:p>',
+        'endTag' => '</office:body></office:document-content>',
       ),
     );
 
-    $zip = zip_open($filePath);
+    $dataMap = $ooxmlMap[$docType];
+    list($finalContent, $zip) = self::doc2Text($filePath, $docType, TRUE);
 
-    if (!$zip || is_numeric($zip)) {
-      return $content;
-    }
-
-    while ($zip_entry = zip_read($zip)) {
-      if (zip_entry_open($zip, $zip_entry) == FALSE || !in_array(zip_entry_name($zip_entry), $dataFiles[$docType])) {
+    // token-replaced document contents of each contact will be merged into final document
+    foreach ($contents as $key => $content) {
+      if ($key == 0) {
+        $finalContent = $content;
         continue;
       }
-      $content .= zip_entry_read($zip_entry, zip_entry_filesize($zip_entry));
-      zip_entry_close($zip_entry);
+
+      // 1. fetch the start position of document body
+      // 2. later fetch only the body part starting from position $start
+      // 3. replace closing body tag with pageBreak
+      // 4. append the $content to the finalContent
+      $start = strpos($content, $dataMap['startTag']);
+      $content = substr($content, $start);
+      $content = str_replace($dataMap['startTag'], $dataMap['pageBreak'], $content);
+      $finalContent = str_replace($dataMap['endTag'], $content, $finalContent);
     }
 
-    zip_close($zip);
+    //replace the loaded document file content located at $filePath with $finaContent
+    $zip->FileReplace($dataMap['dataFile'], $finalContent, TBSZIP_STRING);
+
+    // get and path of civicrm upload directory and then construct the filepath of final document
+    $uploadDir = Civi::settings()->get('uploadDir');
+    $absPath = Civi::paths()->getPath($uploadDir) . "CiviLetter.$docType";
 
-    $content = str_replace('</w:r></w:p></w:tc><w:tc>', "&nbsp;&nbsp;", $content);
-    $content = str_replace('</w:r></w:p>', "\r\n", $content);
-    $striped_content = nl2br(strip_tags($content));
+    // cleanup temporary document file created earlier if any
+    if (file_exists($absPath)) {
+      unlink($absPath);
+    }
+    // save the file document in civicrm upload directory, later used to download
+    $zip->Flush(TBSZIP_FILE, $absPath);
 
-    return array($striped_content, $docType);
+    self::printDoc($absPath, $docType, "CiviLetter.$docType");
   }
 
 }
-- 
2.25.1