Merge pull request #23744 from eileenmcnaughton/import_csv
[civicrm-core.git] / CRM / Import / DataSource / CSV.php
CommitLineData
6a488035
TO
1<?php
2/*
3 +--------------------------------------------------------------------+
bc77d7c0 4 | Copyright CiviCRM LLC. All rights reserved. |
6a488035 5 | |
bc77d7c0
TO
6 | This work is published under the GNU AGPLv3 license with some |
7 | permitted exceptions and without any warranty. For full license |
8 | and copyright information, see https://civicrm.org/licensing |
6a488035 9 +--------------------------------------------------------------------+
d25dd0ee 10 */
6a488035
TO
11
12/**
13 *
14 * @package CRM
ca5cec67 15 * @copyright CiviCRM LLC https://civicrm.org/licensing
6a488035
TO
16 */
17class CRM_Import_DataSource_CSV extends CRM_Import_DataSource {
7da04cde 18 const
b0b2638a
DL
19 NUM_ROWS_TO_INSERT = 100;
20
7b057b66
EM
21 /**
22 * Form fields declared for this datasource.
23 *
24 * @var string[]
25 */
26 protected $submittableFields = ['skipColumnHeader', 'uploadField'];
27
e0ef6999 28 /**
fe482240 29 * Provides information about the data source.
e0ef6999 30 *
a6c01b45
CW
31 * @return array
32 * collection of info about this data source
e0ef6999 33 */
39dc35d4 34 public function getInfo(): array {
be2fb01f 35 return ['title' => ts('Comma-Separated Values (CSV)')];
6a488035
TO
36 }
37
e0ef6999 38 /**
b8c71ffa 39 * This is function is called by the form object to get the DataSource's form snippet.
40 *
41 * It should add all fields necessary to get the data
e0ef6999
EM
42 * uploaded to the temporary table in the DB.
43 *
c490a46a 44 * @param CRM_Core_Form $form
c4382285 45 *
46 * @throws \CRM_Core_Exception
e0ef6999 47 */
00be9182 48 public function buildQuickForm(&$form) {
6a488035
TO
49 $form->add('hidden', 'hidden_dataSource', 'CRM_Import_DataSource_CSV');
50
d8526720 51 $uploadFileSize = CRM_Utils_Number::formatUnitSize(Civi::settings()->get('maxFileSize') . 'm', TRUE);
ebcf0a88
JP
52 //Fetch uploadFileSize from php_ini when $config->maxFileSize is set to "no limit".
53 if (empty($uploadFileSize)) {
54 $uploadFileSize = CRM_Utils_Number::formatUnitSize(ini_get('upload_max_filesize'), TRUE);
55 }
6a488035
TO
56 $uploadSize = round(($uploadFileSize / (1024 * 1024)), 2);
57 $form->assign('uploadSize', $uploadSize);
f1644834 58 $form->add('File', 'uploadFile', ts('Import Data File'), NULL, TRUE);
6a488035 59 $form->setMaxFileSize($uploadFileSize);
be2fb01f 60 $form->addRule('uploadFile', ts('File size should be less than %1 MBytes (%2 bytes)', [
971e129b
SL
61 1 => $uploadSize,
62 2 => $uploadFileSize,
63 ]), 'maxfilesize', $uploadFileSize);
6a488035
TO
64 $form->addRule('uploadFile', ts('Input file must be in CSV format'), 'utf8File');
65 $form->addRule('uploadFile', ts('A valid file must be uploaded.'), 'uploadedfile');
66
67 $form->addElement('checkbox', 'skipColumnHeader', ts('First row contains column headers'));
68 }
69
e0ef6999 70 /**
1a783cdb 71 * Initialize the datasource, based on the submitted values stored in the user job.
0a66a182 72 *
4a01628c 73 * @throws \API_Exception
0a66a182 74 * @throws \CRM_Core_Exception
e0ef6999 75 */
1a783cdb 76 public function initialize(): void {
fb8d5738 77 $result = $this->csvToTable(
8d88fae0
EM
78 $this->getSubmittedValue('uploadFile')['name'],
79 $this->getSubmittedValue('skipColumnHeader'),
80 $this->getSubmittedValue('fieldSeparator') ?? ','
6a488035 81 );
8d88fae0 82 $this->addTrackingFieldsToTable($result['import_table_name']);
6a488035 83
4a01628c 84 $this->updateUserJobMetadata('DataSource', [
7b057b66 85 'table_name' => $result['import_table_name'],
fb8d5738 86 'column_headers' => $result['column_headers'],
4a01628c
EM
87 'number_of_columns' => $result['number_of_columns'],
88 ]);
6a488035
TO
89 }
90
91 /**
92 * Create a table that matches the CSV file and populate it with the file's contents
93 *
6f69cc11
TO
94 * @param string $file
95 * File name to load.
96 * @param bool $headers
97 * Whether the first row contains headers.
6f69cc11 98 * @param string $fieldSeparator
b44e3f84 99 * Character that separates the various columns in the file.
6a488035 100 *
0a66a182 101 * @return array
a6c01b45 102 * name of the created table
0a66a182 103 * @throws \CRM_Core_Exception
6a488035 104 */
fb8d5738 105 private function csvToTable(
6a488035 106 $file,
3a05d67e 107 $headers = FALSE,
6a488035
TO
108 $fieldSeparator = ','
109 ) {
be2fb01f 110 $result = [];
6a488035
TO
111 $fd = fopen($file, 'r');
112 if (!$fd) {
7980012b 113 throw new CRM_Core_Exception("Could not read $file");
6a488035 114 }
3bf4c8a0 115 if (filesize($file) == 0) {
7980012b 116 throw new CRM_Core_Exception("$file is empty. Please upload a valid file.");
3bf4c8a0 117 }
6a488035 118
6a488035 119 // support tab separated
808ca920 120 if (strtolower($fieldSeparator) === 'tab' ||
121 strtolower($fieldSeparator) === '\t'
6a488035
TO
122 ) {
123 $fieldSeparator = "\t";
124 }
125
126 $firstrow = fgetcsv($fd, 0, $fieldSeparator);
fb8d5738 127 $result['column_headers'] = array_fill(0, count($firstrow), '');
6a488035
TO
128 // create the column names from the CSV header or as col_0, col_1, etc.
129 if ($headers) {
130 //need to get original headers.
7b057b66 131 $result['column_headers'] = $firstrow;
6a488035
TO
132
133 $strtolower = function_exists('mb_strtolower') ? 'mb_strtolower' : 'strtolower';
353ffa53
TO
134 $columns = array_map($strtolower, $firstrow);
135 $columns = str_replace(' ', '_', $columns);
136 $columns = preg_replace('/[^a-z_]/', '', $columns);
6a488035
TO
137
138 // need to take care of null as well as duplicate col names.
139 $duplicateColName = FALSE;
140 if (count($columns) != count(array_unique($columns))) {
141 $duplicateColName = TRUE;
142 }
143
6a488035
TO
144 // need to truncate values per mysql field name length limits
145 // mysql allows 64, but we need to account for appending colKey
146 // CRM-9079
147 foreach ($columns as $colKey => & $colName) {
148 if (strlen($colName) > 58) {
149 $colName = substr($colName, 0, 58);
150 }
151 }
152
153 if (in_array('', $columns) || $duplicateColName) {
154 foreach ($columns as $colKey => & $colName) {
155 if (!$colName) {
156 $colName = "col_$colKey";
157 }
158 elseif ($duplicateColName) {
159 $colName .= "_$colKey";
160 }
161 }
162 }
163
164 // CRM-4881: we need to quote column names, as they may be MySQL reserved words
bd5d7c2b
TO
165 foreach ($columns as & $column) {
166 $column = "`$column`";
3a05d67e 167 }
6a488035
TO
168 }
169 else {
be2fb01f 170 $columns = [];
bd5d7c2b
TO
171 foreach ($firstrow as $i => $_) {
172 $columns[] = "col_$i";
3a05d67e 173 }
6a488035
TO
174 }
175
0a66a182 176 $table = CRM_Utils_SQL_TempTable::build()->setDurable();
177 $tableName = $table->getName();
808ca920 178 CRM_Core_DAO::executeQuery("DROP TABLE IF EXISTS $tableName");
0a66a182 179 $table->createWithColumns(implode(' text, ', $columns) . ' text');
6a488035
TO
180
181 $numColumns = count($columns);
6a488035
TO
182
183 // the proper approach, but some MySQL installs do not have this enabled
184 // $load = "LOAD DATA LOCAL INFILE '$file' INTO TABLE $table FIELDS TERMINATED BY '$fieldSeparator' OPTIONALLY ENCLOSED BY '\"'";
185 // if ($headers) { $load .= ' IGNORE 1 LINES'; }
186 // $db->query($load);
187
188 // parse the CSV line by line and build one big INSERT (while MySQL-escaping the CSV contents)
189 if (!$headers) {
190 rewind($fd);
191 }
192
353ffa53 193 $sql = NULL;
6a488035
TO
194 $first = TRUE;
195 $count = 0;
196 while ($row = fgetcsv($fd, 0, $fieldSeparator)) {
197 // skip rows that dont match column count, else we get a sql error
198 if (count($row) != $numColumns) {
199 continue;
200 }
460f7b9b 201 // A blank line will be array(0 => NULL)
202 if ($row === [NULL]) {
203 continue;
204 }
6a488035
TO
205
206 if (!$first) {
207 $sql .= ', ';
208 }
209
210 $first = FALSE;
5dcdc4d6
SB
211
212 // CRM-17859 Trim non-breaking spaces from columns.
913e1c96 213 $row = array_map(['CRM_Import_DataSource_CSV', 'trimNonBreakingSpaces'], $row);
be2fb01f 214 $row = array_map(['CRM_Core_DAO', 'escapeString'], $row);
6a488035
TO
215 $sql .= "('" . implode("', '", $row) . "')";
216 $count++;
217
218 if ($count >= self::NUM_ROWS_TO_INSERT && !empty($sql)) {
808ca920 219 CRM_Core_DAO::executeQuery("INSERT IGNORE INTO $tableName VALUES $sql");
6a488035 220
353ffa53 221 $sql = NULL;
6a488035
TO
222 $first = TRUE;
223 $count = 0;
224 }
225 }
226
227 if (!empty($sql)) {
808ca920 228 CRM_Core_DAO::executeQuery("INSERT IGNORE INTO $tableName VALUES $sql");
6a488035
TO
229 }
230
231 fclose($fd);
232
233 //get the import tmp table name.
0a66a182 234 $result['import_table_name'] = $tableName;
4a01628c 235 $result['number_of_columns'] = $numColumns;
6a488035
TO
236 return $result;
237 }
96025800 238
913e1c96 239 /**
240 * Trim non-breaking spaces in a multibyte-safe way.
241 * See also dev/core#2127 - avoid breaking strings ending in à or any other
242 * unicode character sharing the same 0xA0 byte as a non-breaking space.
243 *
244 * @param string $string
245 * @return string The trimmed string
246 */
247 public static function trimNonBreakingSpaces(string $string): string {
248 $encoding = mb_detect_encoding($string, NULL, TRUE);
249 if ($encoding === FALSE) {
250 // This could mean a couple things. One is that the string is
251 // ASCII-encoded but contains a non-breaking space, which causes
252 // php to fail to detect the encoding. So let's just do what we
253 // did before which works in that situation and is at least no
254 // worse in other situations.
255 return trim($string, chr(0xC2) . chr(0xA0));
256 }
257 elseif ($encoding !== 'UTF-8') {
258 $string = mb_convert_encoding($string, 'UTF-8', [$encoding]);
259 }
260 return preg_replace("/^(\u{a0})+|(\u{a0})+$/", '', $string);
261 }
262
6a488035 263}