Merge pull request #23080 from eileenmcnaughton/isset
[civicrm-core.git] / CRM / Import / DataSource / CSV.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | Copyright CiviCRM LLC. All rights reserved. |
5 | |
6 | This work is published under the GNU AGPLv3 license with some |
7 | permitted exceptions and without any warranty. For full license |
8 | and copyright information, see https://civicrm.org/licensing |
9 +--------------------------------------------------------------------+
10 */
11
12 /**
13 *
14 * @package CRM
15 * @copyright CiviCRM LLC https://civicrm.org/licensing
16 */
17 class CRM_Import_DataSource_CSV extends CRM_Import_DataSource {
18 const
19 NUM_ROWS_TO_INSERT = 100;
20
21 /**
22 * Provides information about the data source.
23 *
24 * @return array
25 * collection of info about this data source
26 */
27 public function getInfo() {
28 return ['title' => ts('Comma-Separated Values (CSV)')];
29 }
30
31 /**
32 * Set variables up before form is built.
33 *
34 * @param CRM_Core_Form $form
35 */
36 public function preProcess(&$form) {
37 }
38
39 /**
40 * This is function is called by the form object to get the DataSource's form snippet.
41 *
42 * It should add all fields necessary to get the data
43 * uploaded to the temporary table in the DB.
44 *
45 * @param CRM_Core_Form $form
46 *
47 * @throws \CRM_Core_Exception
48 */
49 public function buildQuickForm(&$form) {
50 $form->add('hidden', 'hidden_dataSource', 'CRM_Import_DataSource_CSV');
51
52 $config = CRM_Core_Config::singleton();
53
54 $uploadFileSize = CRM_Utils_Number::formatUnitSize($config->maxFileSize . 'm', TRUE);
55 //Fetch uploadFileSize from php_ini when $config->maxFileSize is set to "no limit".
56 if (empty($uploadFileSize)) {
57 $uploadFileSize = CRM_Utils_Number::formatUnitSize(ini_get('upload_max_filesize'), TRUE);
58 }
59 $uploadSize = round(($uploadFileSize / (1024 * 1024)), 2);
60 $form->assign('uploadSize', $uploadSize);
61 $form->add('File', 'uploadFile', ts('Import Data File'), NULL, TRUE);
62 $form->setMaxFileSize($uploadFileSize);
63 $form->addRule('uploadFile', ts('File size should be less than %1 MBytes (%2 bytes)', [
64 1 => $uploadSize,
65 2 => $uploadFileSize,
66 ]), 'maxfilesize', $uploadFileSize);
67 $form->addRule('uploadFile', ts('Input file must be in CSV format'), 'utf8File');
68 $form->addRule('uploadFile', ts('A valid file must be uploaded.'), 'uploadedfile');
69
70 $form->addElement('checkbox', 'skipColumnHeader', ts('First row contains column headers'));
71 }
72
73 /**
74 * Process the form submission.
75 *
76 * @param array $params
77 * @param string $db
78 * @param \CRM_Core_Form $form
79 *
80 * @throws \CRM_Core_Exception
81 */
82 public function postProcess(&$params, &$db, &$form) {
83 $file = $params['uploadFile']['name'];
84 $result = self::_CsvToTable(
85 $file,
86 CRM_Utils_Array::value('skipColumnHeader', $params, FALSE),
87 CRM_Utils_Array::value('import_table_name', $params),
88 CRM_Utils_Array::value('fieldSeparator', $params, ',')
89 );
90
91 $form->set('originalColHeader', CRM_Utils_Array::value('original_col_header', $result));
92
93 $table = $result['import_table_name'];
94 $importJob = new CRM_Contact_Import_ImportJob($table);
95 $form->set('importTableName', $importJob->getTableName());
96 }
97
98 /**
99 * Create a table that matches the CSV file and populate it with the file's contents
100 *
101 * @param string $file
102 * File name to load.
103 * @param bool $headers
104 * Whether the first row contains headers.
105 * @param string $tableName
106 * Name of table from which data imported.
107 * @param string $fieldSeparator
108 * Character that separates the various columns in the file.
109 *
110 * @return array
111 * name of the created table
112 * @throws \CRM_Core_Exception
113 */
114 private static function _CsvToTable(
115 $file,
116 $headers = FALSE,
117 $tableName = NULL,
118 $fieldSeparator = ','
119 ) {
120 $result = [];
121 $fd = fopen($file, 'r');
122 if (!$fd) {
123 throw new CRM_Core_Exception("Could not read $file");
124 }
125 if (filesize($file) == 0) {
126 throw new CRM_Core_Exception("$file is empty. Please upload a valid file.");
127 }
128
129 // support tab separated
130 if (strtolower($fieldSeparator) === 'tab' ||
131 strtolower($fieldSeparator) === '\t'
132 ) {
133 $fieldSeparator = "\t";
134 }
135
136 $firstrow = fgetcsv($fd, 0, $fieldSeparator);
137
138 // create the column names from the CSV header or as col_0, col_1, etc.
139 if ($headers) {
140 //need to get original headers.
141 $result['original_col_header'] = $firstrow;
142
143 $strtolower = function_exists('mb_strtolower') ? 'mb_strtolower' : 'strtolower';
144 $columns = array_map($strtolower, $firstrow);
145 $columns = str_replace(' ', '_', $columns);
146 $columns = preg_replace('/[^a-z_]/', '', $columns);
147
148 // need to take care of null as well as duplicate col names.
149 $duplicateColName = FALSE;
150 if (count($columns) != count(array_unique($columns))) {
151 $duplicateColName = TRUE;
152 }
153
154 // need to truncate values per mysql field name length limits
155 // mysql allows 64, but we need to account for appending colKey
156 // CRM-9079
157 foreach ($columns as $colKey => & $colName) {
158 if (strlen($colName) > 58) {
159 $colName = substr($colName, 0, 58);
160 }
161 }
162
163 if (in_array('', $columns) || $duplicateColName) {
164 foreach ($columns as $colKey => & $colName) {
165 if (!$colName) {
166 $colName = "col_$colKey";
167 }
168 elseif ($duplicateColName) {
169 $colName .= "_$colKey";
170 }
171 }
172 }
173
174 // CRM-4881: we need to quote column names, as they may be MySQL reserved words
175 foreach ($columns as & $column) {
176 $column = "`$column`";
177 }
178 }
179 else {
180 $columns = [];
181 foreach ($firstrow as $i => $_) {
182 $columns[] = "col_$i";
183 }
184 }
185
186 if ($tableName) {
187 CRM_Core_DAO::executeQuery("DROP TABLE IF EXISTS $tableName");
188 }
189 $table = CRM_Utils_SQL_TempTable::build()->setDurable();
190 $tableName = $table->getName();
191 CRM_Core_DAO::executeQuery("DROP TABLE IF EXISTS $tableName");
192 $table->createWithColumns(implode(' text, ', $columns) . ' text');
193
194 $numColumns = count($columns);
195
196 // the proper approach, but some MySQL installs do not have this enabled
197 // $load = "LOAD DATA LOCAL INFILE '$file' INTO TABLE $table FIELDS TERMINATED BY '$fieldSeparator' OPTIONALLY ENCLOSED BY '\"'";
198 // if ($headers) { $load .= ' IGNORE 1 LINES'; }
199 // $db->query($load);
200
201 // parse the CSV line by line and build one big INSERT (while MySQL-escaping the CSV contents)
202 if (!$headers) {
203 rewind($fd);
204 }
205
206 $sql = NULL;
207 $first = TRUE;
208 $count = 0;
209 while ($row = fgetcsv($fd, 0, $fieldSeparator)) {
210 // skip rows that dont match column count, else we get a sql error
211 if (count($row) != $numColumns) {
212 continue;
213 }
214 // A blank line will be array(0 => NULL)
215 if ($row === [NULL]) {
216 continue;
217 }
218
219 if (!$first) {
220 $sql .= ', ';
221 }
222
223 $first = FALSE;
224
225 // CRM-17859 Trim non-breaking spaces from columns.
226 $row = array_map(['CRM_Import_DataSource_CSV', 'trimNonBreakingSpaces'], $row);
227 $row = array_map(['CRM_Core_DAO', 'escapeString'], $row);
228 $sql .= "('" . implode("', '", $row) . "')";
229 $count++;
230
231 if ($count >= self::NUM_ROWS_TO_INSERT && !empty($sql)) {
232 CRM_Core_DAO::executeQuery("INSERT IGNORE INTO $tableName VALUES $sql");
233
234 $sql = NULL;
235 $first = TRUE;
236 $count = 0;
237 }
238 }
239
240 if (!empty($sql)) {
241 CRM_Core_DAO::executeQuery("INSERT IGNORE INTO $tableName VALUES $sql");
242 }
243
244 fclose($fd);
245
246 //get the import tmp table name.
247 $result['import_table_name'] = $tableName;
248
249 return $result;
250 }
251
252 /**
253 * Trim non-breaking spaces in a multibyte-safe way.
254 * See also dev/core#2127 - avoid breaking strings ending in à or any other
255 * unicode character sharing the same 0xA0 byte as a non-breaking space.
256 *
257 * @param string $string
258 * @return string The trimmed string
259 */
260 public static function trimNonBreakingSpaces(string $string): string {
261 $encoding = mb_detect_encoding($string, NULL, TRUE);
262 if ($encoding === FALSE) {
263 // This could mean a couple things. One is that the string is
264 // ASCII-encoded but contains a non-breaking space, which causes
265 // php to fail to detect the encoding. So let's just do what we
266 // did before which works in that situation and is at least no
267 // worse in other situations.
268 return trim($string, chr(0xC2) . chr(0xA0));
269 }
270 elseif ($encoding !== 'UTF-8') {
271 $string = mb_convert_encoding($string, 'UTF-8', [$encoding]);
272 }
273 return preg_replace("/^(\u{a0})+|(\u{a0})+$/", '', $string);
274 }
275
276 }