From ad7fdc349a3d6313d3d1b69850b26b6fde2dab4c Mon Sep 17 00:00:00 2001 From: "deb.monish" Date: Tue, 14 Jun 2016 17:18:06 +0530 Subject: [PATCH] implement document merge using TbsZip --- CRM/Contact/Form/Task/PDFLetterCommon.php | 30 ++++-- CRM/Utils/PDF/Document.php | 122 +++++++++++++++------- 2 files changed, 104 insertions(+), 48 deletions(-) diff --git a/CRM/Contact/Form/Task/PDFLetterCommon.php b/CRM/Contact/Form/Task/PDFLetterCommon.php index bafc8b8902..e10478bfba 100644 --- a/CRM/Contact/Form/Task/PDFLetterCommon.php +++ b/CRM/Contact/Form/Task/PDFLetterCommon.php @@ -283,10 +283,6 @@ class CRM_Contact_Form_Task_PDFLetterCommon { CRM_Core_BAO_MessageTemplate::add($messageTemplate); } } - // extract the content of uploaded document file - elseif (!empty($formValues['document_file'])) { - list($html_message, $formValues['document_type']) = CRM_Utils_PDF_Document::docReader($formValues['document_file']['name'], $formValues['document_file']['type'], TRUE); - } elseif (CRM_Utils_Array::value('template', $formValues) > 0) { if (!empty($formValues['bind_format']) && $formValues['format_id']) { $query = "UPDATE civicrm_msg_template SET pdf_format_id = {$formValues['format_id']} WHERE id = {$formValues['template']}"; @@ -299,8 +295,15 @@ class CRM_Contact_Form_Task_PDFLetterCommon { $documentInfo = CRM_Core_BAO_File::getEntityFile('civicrm_msg_template', $formValues['template']); foreach ((array) $documentInfo as $info) { list($html_message, $formValues['document_type']) = CRM_Utils_PDF_Document::docReader($info['fullPath'], $info['mime_type'], TRUE); + $formValues['document_file_path'] = $info['fullPath']; } } + // extract the content of uploaded document file + elseif (!empty($formValues['document_file'])) { + list($html_message, $formValues['document_type']) = CRM_Utils_PDF_Document::docReader($formValues['document_file']['name'], $formValues['document_file']['type'], TRUE); + $formValues['document_file_path'] = $formValues['document_file']['name']; + } + if (!empty($formValues['update_format'])) { $bao = new CRM_Core_BAO_PdfFormat(); $bao->savePdfFormat($formValues, $formValues['format_id']); @@ -338,7 +341,16 @@ class CRM_Contact_Form_Task_PDFLetterCommon { $buttonName = $form->controller->getButtonName(); $skipOnHold = isset($form->skipOnHold) ? $form->skipOnHold : FALSE; $skipDeceased = isset($form->skipDeceased) ? $form->skipDeceased : TRUE; - $html = array(); + $html = $document = array(); + + // CRM-16725 Skip creation of activities if user is previewing their PDF letter(s) + if ($buttonName == '_qf_PDF_submit') { + self::createActivities($form, $html_message, $form->_contactIds); + } + + if (!empty($formValues['document_file_path'])) { + $html_message = CRM_Utils_PDF_Document::doc2Text($formValues['document_file_path'], $formValues['document_type']); + } foreach ($form->_contactIds as $item => $contactId) { $params = array('contact_id' => $contactId); @@ -373,16 +385,14 @@ class CRM_Contact_Form_Task_PDFLetterCommon { $html[] = $tokenHtml; } - // CRM-16725 Skip creation of activities if user is previewing their PDF letter(s) - if ($buttonName == '_qf_PDF_submit') { - self::createActivities($form, $html_message, $form->_contactIds); - } - $type = $formValues['document_type']; if ($type == 'pdf') { CRM_Utils_PDF_Utils::html2pdf($html, "CiviLetter.pdf", FALSE, $formValues); } + elseif (!empty($formValues['document_file_path'])) { + CRM_Utils_PDF_Document::printDocuments($formValues['document_file_path'], $html, $type); + } else { CRM_Utils_PDF_Document::html2doc($html, "CiviLetter.$type", $formValues); } diff --git a/CRM/Utils/PDF/Document.php b/CRM/Utils/PDF/Document.php index ba7c77370d..78d48126b4 100644 --- a/CRM/Utils/PDF/Document.php +++ b/CRM/Utils/PDF/Document.php @@ -30,6 +30,9 @@ * @package CRM * @copyright CiviCRM LLC (c) 2004-2015 */ + +require_once 'TbsZip/tbszip.php'; + class CRM_Utils_PDF_Document { /** @@ -70,6 +73,16 @@ class CRM_Utils_PDF_Document { $section = $phpWord->addSection($pageStyle + array('breakType' => 'nextPage')); \PhpOffice\PhpWord\Shared\Html::addHtml($section, $html); } + + self::printDoc($phpWord, $ext, $fileName); + } + + /** + * @param object|string $phpWord + * @param string $ext + * @param string $fileName + */ + public static function printDoc($phpWord, $ext, $fileName) { $formats = array( 'docx' => 'Word2007', 'odt' => 'ODText', @@ -78,9 +91,12 @@ class CRM_Utils_PDF_Document { 'pdf' => 'PDF', ); + if (realpath($phpWord)) { + $phpWord = \PhpOffice\PhpWord\IOFactory::load($phpWord, $formats[$ext]); + } + $objWriter = \PhpOffice\PhpWord\IOFactory::createWriter($phpWord, $formats[$ext]); - // TODO: Split document generation and output into separate functions CRM_Utils_System::setHttpHeader('Content-Type', "application/$ext"); CRM_Utils_System::setHttpHeader('Content-Disposition', 'attachment; filename="' . $fileName . '"'); $objWriter->save("php://output"); @@ -122,7 +138,7 @@ class CRM_Utils_PDF_Document { $phpWord = \PhpOffice\PhpWord\IOFactory::load($path, $fileType); $phpWord->save($absPath, 'HTML'); - // return the html content for tokenreplacment and eventually dused for document download + // return the html content for tokenreplacment and eventually used for document download if ($returnContent) { $filename = fopen($absPath, 'r'); $content = fread($filename, filesize($absPath)); @@ -135,56 +151,86 @@ class CRM_Utils_PDF_Document { /** * Extract content of docx/odt file as text and later used for token replacement - * @param string $filePath Document file path - * @param string $type File type of document + * @param string $filePath Document file path + * @param string $docType File type of document + * @param bool $returnZipObj Return clsTbsZip object along with content? * - * @return string - * File content of document as text + * @return string|array + * File content of document as text or array of content and clsTbsZip object */ - public static function doc2Text($filePath, $type) { - $content = ''; - $docType = array_search($type, CRM_Core_SelectValues::documentApplicationType()); + public static function doc2Text($filePath, $docType, $returnZipObj = FALSE) { + $dataFile = ($docType == 'docx') ? 'word/document.xml' : 'content.xml'; - // for reference on document entry type check http://phpword.readthedocs.io/en/latest/writersreaders.html - $dataFiles = array( - 'odt' => array( - 'content.xml', - 'styles.xml', - 'Pictures/', - ), + $zip = new clsTbsZip(); + $zip->Open($filePath); + $content = $zip->FileRead($dataFile); + + if ($returnZipObj) { + return array($content, $zip); + } + + return $content; + } + + /** + * Modify contents of docx/odt file(s) and later merged into one final document + * + * @param string $filePath Document file path + * @param array $contents content of formatted/token-replaced document + * @param string $docType Document type e.g. odt/docx + */ + public static function printDocuments($filePath, $contents, $docType) { + $ooxmlMap = array( 'docx' => array( - 'word/document.xml', - 'word/styles.xml', - 'docProps/custom.xml', - 'word/numbering.xml', - 'word/settings.xml', - 'word/webSettings.xml', - 'word/fontTable.xml', - 'word/theme/theme1.xml', + 'dataFile' => 'word/document.xml', + 'startTag' => '', + // TODO need to provide proper ooxml tag for pagebreak + 'pageBreak' => '', + 'endTag' => '', + ), + 'odt' => array( + 'dataFile' => 'content.xml', + 'startTag' => '', + 'pageBreak' => '', + 'endTag' => '', ), ); - $zip = zip_open($filePath); + $dataMap = $ooxmlMap[$docType]; + list($finalContent, $zip) = self::doc2Text($filePath, $docType, TRUE); - if (!$zip || is_numeric($zip)) { - return $content; - } - - while ($zip_entry = zip_read($zip)) { - if (zip_entry_open($zip, $zip_entry) == FALSE || !in_array(zip_entry_name($zip_entry), $dataFiles[$docType])) { + // token-replaced document contents of each contact will be merged into final document + foreach ($contents as $key => $content) { + if ($key == 0) { + $finalContent = $content; continue; } - $content .= zip_entry_read($zip_entry, zip_entry_filesize($zip_entry)); - zip_entry_close($zip_entry); + + // 1. fetch the start position of document body + // 2. later fetch only the body part starting from position $start + // 3. replace closing body tag with pageBreak + // 4. append the $content to the finalContent + $start = strpos($content, $dataMap['startTag']); + $content = substr($content, $start); + $content = str_replace($dataMap['startTag'], $dataMap['pageBreak'], $content); + $finalContent = str_replace($dataMap['endTag'], $content, $finalContent); } - zip_close($zip); + //replace the loaded document file content located at $filePath with $finaContent + $zip->FileReplace($dataMap['dataFile'], $finalContent, TBSZIP_STRING); + + // get and path of civicrm upload directory and then construct the filepath of final document + $uploadDir = Civi::settings()->get('uploadDir'); + $absPath = Civi::paths()->getPath($uploadDir) . "CiviLetter.$docType"; - $content = str_replace('', "  ", $content); - $content = str_replace('', "\r\n", $content); - $striped_content = nl2br(strip_tags($content)); + // cleanup temporary document file created earlier if any + if (file_exists($absPath)) { + unlink($absPath); + } + // save the file document in civicrm upload directory, later used to download + $zip->Flush(TBSZIP_FILE, $absPath); - return array($striped_content, $docType); + self::printDoc($absPath, $docType, "CiviLetter.$docType"); } } -- 2.25.1