Merge pull request #23245 from eileenmcnaughton/import_wip
[civicrm-core.git] / CRM / Import / DataSource / CSV.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | Copyright CiviCRM LLC. All rights reserved. |
5 | |
6 | This work is published under the GNU AGPLv3 license with some |
7 | permitted exceptions and without any warranty. For full license |
8 | and copyright information, see https://civicrm.org/licensing |
9 +--------------------------------------------------------------------+
10 */
11
12 /**
13 *
14 * @package CRM
15 * @copyright CiviCRM LLC https://civicrm.org/licensing
16 */
17 class CRM_Import_DataSource_CSV extends CRM_Import_DataSource {
18 const
19 NUM_ROWS_TO_INSERT = 100;
20
21 /**
22 * Form fields declared for this datasource.
23 *
24 * @var string[]
25 */
26 protected $submittableFields = ['skipColumnHeader', 'uploadField'];
27
28 /**
29 * Provides information about the data source.
30 *
31 * @return array
32 * collection of info about this data source
33 */
34 public function getInfo(): array {
35 return ['title' => ts('Comma-Separated Values (CSV)')];
36 }
37
38 /**
39 * Set variables up before form is built.
40 *
41 * @param CRM_Core_Form $form
42 */
43 public function preProcess(&$form) {
44 }
45
46 /**
47 * This is function is called by the form object to get the DataSource's form snippet.
48 *
49 * It should add all fields necessary to get the data
50 * uploaded to the temporary table in the DB.
51 *
52 * @param CRM_Core_Form $form
53 *
54 * @throws \CRM_Core_Exception
55 */
56 public function buildQuickForm(&$form) {
57 $form->add('hidden', 'hidden_dataSource', 'CRM_Import_DataSource_CSV');
58
59 $config = CRM_Core_Config::singleton();
60
61 $uploadFileSize = CRM_Utils_Number::formatUnitSize($config->maxFileSize . 'm', TRUE);
62 //Fetch uploadFileSize from php_ini when $config->maxFileSize is set to "no limit".
63 if (empty($uploadFileSize)) {
64 $uploadFileSize = CRM_Utils_Number::formatUnitSize(ini_get('upload_max_filesize'), TRUE);
65 }
66 $uploadSize = round(($uploadFileSize / (1024 * 1024)), 2);
67 $form->assign('uploadSize', $uploadSize);
68 $form->add('File', 'uploadFile', ts('Import Data File'), NULL, TRUE);
69 $form->setMaxFileSize($uploadFileSize);
70 $form->addRule('uploadFile', ts('File size should be less than %1 MBytes (%2 bytes)', [
71 1 => $uploadSize,
72 2 => $uploadFileSize,
73 ]), 'maxfilesize', $uploadFileSize);
74 $form->addRule('uploadFile', ts('Input file must be in CSV format'), 'utf8File');
75 $form->addRule('uploadFile', ts('A valid file must be uploaded.'), 'uploadedfile');
76
77 $form->addElement('checkbox', 'skipColumnHeader', ts('First row contains column headers'));
78 }
79
80 /**
81 * Process the form submission.
82 *
83 * @param array $params
84 * @param string $db
85 * @param \CRM_Core_Form $form
86 *
87 * @throws \CRM_Core_Exception
88 */
89 public function postProcess(&$params, &$db, &$form) {
90 $file = $params['uploadFile']['name'];
91 $result = self::_CsvToTable(
92 $file,
93 CRM_Utils_Array::value('skipColumnHeader', $params, FALSE),
94 CRM_Utils_Array::value('import_table_name', $params),
95 CRM_Utils_Array::value('fieldSeparator', $params, ',')
96 );
97
98 $form->set('originalColHeader', CRM_Utils_Array::value('column_headers', $result));
99 $form->set('importTableName', $result['import_table_name']);
100 $this->dataSourceMetadata = [
101 'table_name' => $result['import_table_name'],
102 'column_headers' => $result['column_headers'] ?? NULL,
103 ];
104 }
105
106 /**
107 * Create a table that matches the CSV file and populate it with the file's contents
108 *
109 * @param string $file
110 * File name to load.
111 * @param bool $headers
112 * Whether the first row contains headers.
113 * @param string $tableName
114 * Name of table from which data imported.
115 * @param string $fieldSeparator
116 * Character that separates the various columns in the file.
117 *
118 * @return array
119 * name of the created table
120 * @throws \CRM_Core_Exception
121 */
122 private static function _CsvToTable(
123 $file,
124 $headers = FALSE,
125 $tableName = NULL,
126 $fieldSeparator = ','
127 ) {
128 $result = [];
129 $fd = fopen($file, 'r');
130 if (!$fd) {
131 throw new CRM_Core_Exception("Could not read $file");
132 }
133 if (filesize($file) == 0) {
134 throw new CRM_Core_Exception("$file is empty. Please upload a valid file.");
135 }
136
137 // support tab separated
138 if (strtolower($fieldSeparator) === 'tab' ||
139 strtolower($fieldSeparator) === '\t'
140 ) {
141 $fieldSeparator = "\t";
142 }
143
144 $firstrow = fgetcsv($fd, 0, $fieldSeparator);
145
146 // create the column names from the CSV header or as col_0, col_1, etc.
147 if ($headers) {
148 //need to get original headers.
149 $result['column_headers'] = $firstrow;
150
151 $strtolower = function_exists('mb_strtolower') ? 'mb_strtolower' : 'strtolower';
152 $columns = array_map($strtolower, $firstrow);
153 $columns = str_replace(' ', '_', $columns);
154 $columns = preg_replace('/[^a-z_]/', '', $columns);
155
156 // need to take care of null as well as duplicate col names.
157 $duplicateColName = FALSE;
158 if (count($columns) != count(array_unique($columns))) {
159 $duplicateColName = TRUE;
160 }
161
162 // need to truncate values per mysql field name length limits
163 // mysql allows 64, but we need to account for appending colKey
164 // CRM-9079
165 foreach ($columns as $colKey => & $colName) {
166 if (strlen($colName) > 58) {
167 $colName = substr($colName, 0, 58);
168 }
169 }
170
171 if (in_array('', $columns) || $duplicateColName) {
172 foreach ($columns as $colKey => & $colName) {
173 if (!$colName) {
174 $colName = "col_$colKey";
175 }
176 elseif ($duplicateColName) {
177 $colName .= "_$colKey";
178 }
179 }
180 }
181
182 // CRM-4881: we need to quote column names, as they may be MySQL reserved words
183 foreach ($columns as & $column) {
184 $column = "`$column`";
185 }
186 }
187 else {
188 $columns = [];
189 foreach ($firstrow as $i => $_) {
190 $columns[] = "col_$i";
191 }
192 }
193
194 if ($tableName) {
195 CRM_Core_DAO::executeQuery("DROP TABLE IF EXISTS $tableName");
196 }
197 $table = CRM_Utils_SQL_TempTable::build()->setDurable();
198 $tableName = $table->getName();
199 CRM_Core_DAO::executeQuery("DROP TABLE IF EXISTS $tableName");
200 $table->createWithColumns(implode(' text, ', $columns) . ' text');
201
202 $numColumns = count($columns);
203
204 // the proper approach, but some MySQL installs do not have this enabled
205 // $load = "LOAD DATA LOCAL INFILE '$file' INTO TABLE $table FIELDS TERMINATED BY '$fieldSeparator' OPTIONALLY ENCLOSED BY '\"'";
206 // if ($headers) { $load .= ' IGNORE 1 LINES'; }
207 // $db->query($load);
208
209 // parse the CSV line by line and build one big INSERT (while MySQL-escaping the CSV contents)
210 if (!$headers) {
211 rewind($fd);
212 }
213
214 $sql = NULL;
215 $first = TRUE;
216 $count = 0;
217 while ($row = fgetcsv($fd, 0, $fieldSeparator)) {
218 // skip rows that dont match column count, else we get a sql error
219 if (count($row) != $numColumns) {
220 continue;
221 }
222 // A blank line will be array(0 => NULL)
223 if ($row === [NULL]) {
224 continue;
225 }
226
227 if (!$first) {
228 $sql .= ', ';
229 }
230
231 $first = FALSE;
232
233 // CRM-17859 Trim non-breaking spaces from columns.
234 $row = array_map(['CRM_Import_DataSource_CSV', 'trimNonBreakingSpaces'], $row);
235 $row = array_map(['CRM_Core_DAO', 'escapeString'], $row);
236 $sql .= "('" . implode("', '", $row) . "')";
237 $count++;
238
239 if ($count >= self::NUM_ROWS_TO_INSERT && !empty($sql)) {
240 CRM_Core_DAO::executeQuery("INSERT IGNORE INTO $tableName VALUES $sql");
241
242 $sql = NULL;
243 $first = TRUE;
244 $count = 0;
245 }
246 }
247
248 if (!empty($sql)) {
249 CRM_Core_DAO::executeQuery("INSERT IGNORE INTO $tableName VALUES $sql");
250 }
251
252 fclose($fd);
253
254 //get the import tmp table name.
255 $result['import_table_name'] = $tableName;
256 return $result;
257 }
258
259 /**
260 * Trim non-breaking spaces in a multibyte-safe way.
261 * See also dev/core#2127 - avoid breaking strings ending in à or any other
262 * unicode character sharing the same 0xA0 byte as a non-breaking space.
263 *
264 * @param string $string
265 * @return string The trimmed string
266 */
267 public static function trimNonBreakingSpaces(string $string): string {
268 $encoding = mb_detect_encoding($string, NULL, TRUE);
269 if ($encoding === FALSE) {
270 // This could mean a couple things. One is that the string is
271 // ASCII-encoded but contains a non-breaking space, which causes
272 // php to fail to detect the encoding. So let's just do what we
273 // did before which works in that situation and is at least no
274 // worse in other situations.
275 return trim($string, chr(0xC2) . chr(0xA0));
276 }
277 elseif ($encoding !== 'UTF-8') {
278 $string = mb_convert_encoding($string, 'UTF-8', [$encoding]);
279 }
280 return preg_replace("/^(\u{a0})+|(\u{a0})+$/", '', $string);
281 }
282
283 }