Merge pull request #23185 from eileenmcnaughton/smarty_wiz
[civicrm-core.git] / CRM / Import / DataSource / CSV.php
CommitLineData
6a488035
TO
1<?php
2/*
3 +--------------------------------------------------------------------+
bc77d7c0 4 | Copyright CiviCRM LLC. All rights reserved. |
6a488035 5 | |
bc77d7c0
TO
6 | This work is published under the GNU AGPLv3 license with some |
7 | permitted exceptions and without any warranty. For full license |
8 | and copyright information, see https://civicrm.org/licensing |
6a488035 9 +--------------------------------------------------------------------+
d25dd0ee 10 */
6a488035
TO
11
12/**
13 *
14 * @package CRM
ca5cec67 15 * @copyright CiviCRM LLC https://civicrm.org/licensing
6a488035
TO
16 */
17class CRM_Import_DataSource_CSV extends CRM_Import_DataSource {
7da04cde 18 const
b0b2638a
DL
19 NUM_ROWS_TO_INSERT = 100;
20
e0ef6999 21 /**
fe482240 22 * Provides information about the data source.
e0ef6999 23 *
a6c01b45
CW
24 * @return array
25 * collection of info about this data source
e0ef6999 26 */
39dc35d4 27 public function getInfo(): array {
be2fb01f 28 return ['title' => ts('Comma-Separated Values (CSV)')];
6a488035
TO
29 }
30
e0ef6999 31 /**
fe482240 32 * Set variables up before form is built.
3bdf1f3a 33 *
34 * @param CRM_Core_Form $form
e0ef6999 35 */
3a05d67e
TO
36 public function preProcess(&$form) {
37 }
6a488035 38
e0ef6999 39 /**
b8c71ffa 40 * This is function is called by the form object to get the DataSource's form snippet.
41 *
42 * It should add all fields necessary to get the data
e0ef6999
EM
43 * uploaded to the temporary table in the DB.
44 *
c490a46a 45 * @param CRM_Core_Form $form
c4382285 46 *
47 * @throws \CRM_Core_Exception
e0ef6999 48 */
00be9182 49 public function buildQuickForm(&$form) {
6a488035
TO
50 $form->add('hidden', 'hidden_dataSource', 'CRM_Import_DataSource_CSV');
51
52 $config = CRM_Core_Config::singleton();
53
2e966dd5 54 $uploadFileSize = CRM_Utils_Number::formatUnitSize($config->maxFileSize . 'm', TRUE);
ebcf0a88
JP
55 //Fetch uploadFileSize from php_ini when $config->maxFileSize is set to "no limit".
56 if (empty($uploadFileSize)) {
57 $uploadFileSize = CRM_Utils_Number::formatUnitSize(ini_get('upload_max_filesize'), TRUE);
58 }
6a488035
TO
59 $uploadSize = round(($uploadFileSize / (1024 * 1024)), 2);
60 $form->assign('uploadSize', $uploadSize);
f1644834 61 $form->add('File', 'uploadFile', ts('Import Data File'), NULL, TRUE);
6a488035 62 $form->setMaxFileSize($uploadFileSize);
be2fb01f 63 $form->addRule('uploadFile', ts('File size should be less than %1 MBytes (%2 bytes)', [
971e129b
SL
64 1 => $uploadSize,
65 2 => $uploadFileSize,
66 ]), 'maxfilesize', $uploadFileSize);
6a488035
TO
67 $form->addRule('uploadFile', ts('Input file must be in CSV format'), 'utf8File');
68 $form->addRule('uploadFile', ts('A valid file must be uploaded.'), 'uploadedfile');
69
70 $form->addElement('checkbox', 'skipColumnHeader', ts('First row contains column headers'));
71 }
72
e0ef6999 73 /**
fe482240 74 * Process the form submission.
54957108 75 *
76 * @param array $params
77 * @param string $db
78 * @param \CRM_Core_Form $form
0a66a182 79 *
80 * @throws \CRM_Core_Exception
e0ef6999 81 */
00be9182 82 public function postProcess(&$params, &$db, &$form) {
6a488035 83 $file = $params['uploadFile']['name'];
886013fa 84 $result = self::_CsvToTable(
6a488035
TO
85 $file,
86 CRM_Utils_Array::value('skipColumnHeader', $params, FALSE),
87 CRM_Utils_Array::value('import_table_name', $params),
88 CRM_Utils_Array::value('fieldSeparator', $params, ',')
89 );
90
91 $form->set('originalColHeader', CRM_Utils_Array::value('original_col_header', $result));
eb4ce815 92 $form->set('importTableName', $result['import_table_name']);
6a488035
TO
93 }
94
95 /**
96 * Create a table that matches the CSV file and populate it with the file's contents
97 *
6f69cc11
TO
98 * @param string $file
99 * File name to load.
100 * @param bool $headers
101 * Whether the first row contains headers.
0a66a182 102 * @param string $tableName
6f69cc11
TO
103 * Name of table from which data imported.
104 * @param string $fieldSeparator
b44e3f84 105 * Character that separates the various columns in the file.
6a488035 106 *
0a66a182 107 * @return array
a6c01b45 108 * name of the created table
0a66a182 109 * @throws \CRM_Core_Exception
6a488035 110 */
bd5d7c2b 111 private static function _CsvToTable(
6a488035 112 $file,
3a05d67e 113 $headers = FALSE,
0a66a182 114 $tableName = NULL,
6a488035
TO
115 $fieldSeparator = ','
116 ) {
be2fb01f 117 $result = [];
6a488035
TO
118 $fd = fopen($file, 'r');
119 if (!$fd) {
7980012b 120 throw new CRM_Core_Exception("Could not read $file");
6a488035 121 }
3bf4c8a0 122 if (filesize($file) == 0) {
7980012b 123 throw new CRM_Core_Exception("$file is empty. Please upload a valid file.");
3bf4c8a0 124 }
6a488035 125
6a488035 126 // support tab separated
808ca920 127 if (strtolower($fieldSeparator) === 'tab' ||
128 strtolower($fieldSeparator) === '\t'
6a488035
TO
129 ) {
130 $fieldSeparator = "\t";
131 }
132
133 $firstrow = fgetcsv($fd, 0, $fieldSeparator);
134
135 // create the column names from the CSV header or as col_0, col_1, etc.
136 if ($headers) {
137 //need to get original headers.
138 $result['original_col_header'] = $firstrow;
139
140 $strtolower = function_exists('mb_strtolower') ? 'mb_strtolower' : 'strtolower';
353ffa53
TO
141 $columns = array_map($strtolower, $firstrow);
142 $columns = str_replace(' ', '_', $columns);
143 $columns = preg_replace('/[^a-z_]/', '', $columns);
6a488035
TO
144
145 // need to take care of null as well as duplicate col names.
146 $duplicateColName = FALSE;
147 if (count($columns) != count(array_unique($columns))) {
148 $duplicateColName = TRUE;
149 }
150
6a488035
TO
151 // need to truncate values per mysql field name length limits
152 // mysql allows 64, but we need to account for appending colKey
153 // CRM-9079
154 foreach ($columns as $colKey => & $colName) {
155 if (strlen($colName) > 58) {
156 $colName = substr($colName, 0, 58);
157 }
158 }
159
160 if (in_array('', $columns) || $duplicateColName) {
161 foreach ($columns as $colKey => & $colName) {
162 if (!$colName) {
163 $colName = "col_$colKey";
164 }
165 elseif ($duplicateColName) {
166 $colName .= "_$colKey";
167 }
168 }
169 }
170
171 // CRM-4881: we need to quote column names, as they may be MySQL reserved words
bd5d7c2b
TO
172 foreach ($columns as & $column) {
173 $column = "`$column`";
3a05d67e 174 }
6a488035
TO
175 }
176 else {
be2fb01f 177 $columns = [];
bd5d7c2b
TO
178 foreach ($firstrow as $i => $_) {
179 $columns[] = "col_$i";
3a05d67e 180 }
6a488035
TO
181 }
182
0a66a182 183 if ($tableName) {
808ca920 184 CRM_Core_DAO::executeQuery("DROP TABLE IF EXISTS $tableName");
6a488035 185 }
0a66a182 186 $table = CRM_Utils_SQL_TempTable::build()->setDurable();
187 $tableName = $table->getName();
808ca920 188 CRM_Core_DAO::executeQuery("DROP TABLE IF EXISTS $tableName");
0a66a182 189 $table->createWithColumns(implode(' text, ', $columns) . ' text');
6a488035
TO
190
191 $numColumns = count($columns);
6a488035
TO
192
193 // the proper approach, but some MySQL installs do not have this enabled
194 // $load = "LOAD DATA LOCAL INFILE '$file' INTO TABLE $table FIELDS TERMINATED BY '$fieldSeparator' OPTIONALLY ENCLOSED BY '\"'";
195 // if ($headers) { $load .= ' IGNORE 1 LINES'; }
196 // $db->query($load);
197
198 // parse the CSV line by line and build one big INSERT (while MySQL-escaping the CSV contents)
199 if (!$headers) {
200 rewind($fd);
201 }
202
353ffa53 203 $sql = NULL;
6a488035
TO
204 $first = TRUE;
205 $count = 0;
206 while ($row = fgetcsv($fd, 0, $fieldSeparator)) {
207 // skip rows that dont match column count, else we get a sql error
208 if (count($row) != $numColumns) {
209 continue;
210 }
460f7b9b 211 // A blank line will be array(0 => NULL)
212 if ($row === [NULL]) {
213 continue;
214 }
6a488035
TO
215
216 if (!$first) {
217 $sql .= ', ';
218 }
219
220 $first = FALSE;
5dcdc4d6
SB
221
222 // CRM-17859 Trim non-breaking spaces from columns.
913e1c96 223 $row = array_map(['CRM_Import_DataSource_CSV', 'trimNonBreakingSpaces'], $row);
be2fb01f 224 $row = array_map(['CRM_Core_DAO', 'escapeString'], $row);
6a488035
TO
225 $sql .= "('" . implode("', '", $row) . "')";
226 $count++;
227
228 if ($count >= self::NUM_ROWS_TO_INSERT && !empty($sql)) {
808ca920 229 CRM_Core_DAO::executeQuery("INSERT IGNORE INTO $tableName VALUES $sql");
6a488035 230
353ffa53 231 $sql = NULL;
6a488035
TO
232 $first = TRUE;
233 $count = 0;
234 }
235 }
236
237 if (!empty($sql)) {
808ca920 238 CRM_Core_DAO::executeQuery("INSERT IGNORE INTO $tableName VALUES $sql");
6a488035
TO
239 }
240
241 fclose($fd);
242
243 //get the import tmp table name.
0a66a182 244 $result['import_table_name'] = $tableName;
6a488035
TO
245
246 return $result;
247 }
96025800 248
913e1c96 249 /**
250 * Trim non-breaking spaces in a multibyte-safe way.
251 * See also dev/core#2127 - avoid breaking strings ending in à or any other
252 * unicode character sharing the same 0xA0 byte as a non-breaking space.
253 *
254 * @param string $string
255 * @return string The trimmed string
256 */
257 public static function trimNonBreakingSpaces(string $string): string {
258 $encoding = mb_detect_encoding($string, NULL, TRUE);
259 if ($encoding === FALSE) {
260 // This could mean a couple things. One is that the string is
261 // ASCII-encoded but contains a non-breaking space, which causes
262 // php to fail to detect the encoding. So let's just do what we
263 // did before which works in that situation and is at least no
264 // worse in other situations.
265 return trim($string, chr(0xC2) . chr(0xA0));
266 }
267 elseif ($encoding !== 'UTF-8') {
268 $string = mb_convert_encoding($string, 'UTF-8', [$encoding]);
269 }
270 return preg_replace("/^(\u{a0})+|(\u{a0})+$/", '', $string);
271 }
272
6a488035 273}