Commit | Line | Data |
---|---|---|
6a488035 TO |
1 | <?php |
2 | /* | |
3 | +--------------------------------------------------------------------+ | |
bc77d7c0 | 4 | | Copyright CiviCRM LLC. All rights reserved. | |
6a488035 | 5 | | | |
bc77d7c0 TO |
6 | | This work is published under the GNU AGPLv3 license with some | |
7 | | permitted exceptions and without any warranty. For full license | | |
8 | | and copyright information, see https://civicrm.org/licensing | | |
6a488035 | 9 | +--------------------------------------------------------------------+ |
d25dd0ee | 10 | */ |
6a488035 TO |
11 | |
12 | /** | |
13 | * | |
14 | * @package CRM | |
ca5cec67 | 15 | * @copyright CiviCRM LLC https://civicrm.org/licensing |
6a488035 TO |
16 | */ |
17 | class CRM_Import_DataSource_CSV extends CRM_Import_DataSource { | |
7da04cde | 18 | const |
b0b2638a DL |
19 | NUM_ROWS_TO_INSERT = 100; |
20 | ||
7b057b66 EM |
21 | /** |
22 | * Form fields declared for this datasource. | |
23 | * | |
24 | * @var string[] | |
25 | */ | |
26 | protected $submittableFields = ['skipColumnHeader', 'uploadField']; | |
27 | ||
e0ef6999 | 28 | /** |
fe482240 | 29 | * Provides information about the data source. |
e0ef6999 | 30 | * |
a6c01b45 CW |
31 | * @return array |
32 | * collection of info about this data source | |
e0ef6999 | 33 | */ |
39dc35d4 | 34 | public function getInfo(): array { |
be2fb01f | 35 | return ['title' => ts('Comma-Separated Values (CSV)')]; |
6a488035 TO |
36 | } |
37 | ||
e0ef6999 | 38 | /** |
fe482240 | 39 | * Set variables up before form is built. |
3bdf1f3a | 40 | * |
41 | * @param CRM_Core_Form $form | |
e0ef6999 | 42 | */ |
3a05d67e TO |
43 | public function preProcess(&$form) { |
44 | } | |
6a488035 | 45 | |
e0ef6999 | 46 | /** |
b8c71ffa | 47 | * This is function is called by the form object to get the DataSource's form snippet. |
48 | * | |
49 | * It should add all fields necessary to get the data | |
e0ef6999 EM |
50 | * uploaded to the temporary table in the DB. |
51 | * | |
c490a46a | 52 | * @param CRM_Core_Form $form |
c4382285 | 53 | * |
54 | * @throws \CRM_Core_Exception | |
e0ef6999 | 55 | */ |
00be9182 | 56 | public function buildQuickForm(&$form) { |
6a488035 TO |
57 | $form->add('hidden', 'hidden_dataSource', 'CRM_Import_DataSource_CSV'); |
58 | ||
59 | $config = CRM_Core_Config::singleton(); | |
60 | ||
2e966dd5 | 61 | $uploadFileSize = CRM_Utils_Number::formatUnitSize($config->maxFileSize . 'm', TRUE); |
ebcf0a88 JP |
62 | //Fetch uploadFileSize from php_ini when $config->maxFileSize is set to "no limit". |
63 | if (empty($uploadFileSize)) { | |
64 | $uploadFileSize = CRM_Utils_Number::formatUnitSize(ini_get('upload_max_filesize'), TRUE); | |
65 | } | |
6a488035 TO |
66 | $uploadSize = round(($uploadFileSize / (1024 * 1024)), 2); |
67 | $form->assign('uploadSize', $uploadSize); | |
f1644834 | 68 | $form->add('File', 'uploadFile', ts('Import Data File'), NULL, TRUE); |
6a488035 | 69 | $form->setMaxFileSize($uploadFileSize); |
be2fb01f | 70 | $form->addRule('uploadFile', ts('File size should be less than %1 MBytes (%2 bytes)', [ |
971e129b SL |
71 | 1 => $uploadSize, |
72 | 2 => $uploadFileSize, | |
73 | ]), 'maxfilesize', $uploadFileSize); | |
6a488035 TO |
74 | $form->addRule('uploadFile', ts('Input file must be in CSV format'), 'utf8File'); |
75 | $form->addRule('uploadFile', ts('A valid file must be uploaded.'), 'uploadedfile'); | |
76 | ||
77 | $form->addElement('checkbox', 'skipColumnHeader', ts('First row contains column headers')); | |
78 | } | |
79 | ||
e0ef6999 | 80 | /** |
fe482240 | 81 | * Process the form submission. |
54957108 | 82 | * |
83 | * @param array $params | |
84 | * @param string $db | |
85 | * @param \CRM_Core_Form $form | |
0a66a182 | 86 | * |
4a01628c | 87 | * @throws \API_Exception |
0a66a182 | 88 | * @throws \CRM_Core_Exception |
e0ef6999 | 89 | */ |
00be9182 | 90 | public function postProcess(&$params, &$db, &$form) { |
6a488035 | 91 | $file = $params['uploadFile']['name']; |
4a01628c | 92 | $firstRowIsColumnHeader = $params['skipColumnHeader'] ?? FALSE; |
886013fa | 93 | $result = self::_CsvToTable( |
6a488035 | 94 | $file, |
4a01628c | 95 | $firstRowIsColumnHeader, |
6a488035 TO |
96 | CRM_Utils_Array::value('import_table_name', $params), |
97 | CRM_Utils_Array::value('fieldSeparator', $params, ',') | |
98 | ); | |
99 | ||
7b057b66 | 100 | $form->set('originalColHeader', CRM_Utils_Array::value('column_headers', $result)); |
eb4ce815 | 101 | $form->set('importTableName', $result['import_table_name']); |
4a01628c | 102 | $this->updateUserJobMetadata('DataSource', [ |
7b057b66 | 103 | 'table_name' => $result['import_table_name'], |
4a01628c EM |
104 | 'column_headers' => $firstRowIsColumnHeader ? $result['column_headers'] : [], |
105 | 'number_of_columns' => $result['number_of_columns'], | |
106 | ]); | |
6a488035 TO |
107 | } |
108 | ||
109 | /** | |
110 | * Create a table that matches the CSV file and populate it with the file's contents | |
111 | * | |
6f69cc11 TO |
112 | * @param string $file |
113 | * File name to load. | |
114 | * @param bool $headers | |
115 | * Whether the first row contains headers. | |
0a66a182 | 116 | * @param string $tableName |
6f69cc11 TO |
117 | * Name of table from which data imported. |
118 | * @param string $fieldSeparator | |
b44e3f84 | 119 | * Character that separates the various columns in the file. |
6a488035 | 120 | * |
0a66a182 | 121 | * @return array |
a6c01b45 | 122 | * name of the created table |
0a66a182 | 123 | * @throws \CRM_Core_Exception |
6a488035 | 124 | */ |
bd5d7c2b | 125 | private static function _CsvToTable( |
6a488035 | 126 | $file, |
3a05d67e | 127 | $headers = FALSE, |
0a66a182 | 128 | $tableName = NULL, |
6a488035 TO |
129 | $fieldSeparator = ',' |
130 | ) { | |
be2fb01f | 131 | $result = []; |
6a488035 TO |
132 | $fd = fopen($file, 'r'); |
133 | if (!$fd) { | |
7980012b | 134 | throw new CRM_Core_Exception("Could not read $file"); |
6a488035 | 135 | } |
3bf4c8a0 | 136 | if (filesize($file) == 0) { |
7980012b | 137 | throw new CRM_Core_Exception("$file is empty. Please upload a valid file."); |
3bf4c8a0 | 138 | } |
6a488035 | 139 | |
6a488035 | 140 | // support tab separated |
808ca920 | 141 | if (strtolower($fieldSeparator) === 'tab' || |
142 | strtolower($fieldSeparator) === '\t' | |
6a488035 TO |
143 | ) { |
144 | $fieldSeparator = "\t"; | |
145 | } | |
146 | ||
147 | $firstrow = fgetcsv($fd, 0, $fieldSeparator); | |
148 | ||
149 | // create the column names from the CSV header or as col_0, col_1, etc. | |
150 | if ($headers) { | |
151 | //need to get original headers. | |
7b057b66 | 152 | $result['column_headers'] = $firstrow; |
6a488035 TO |
153 | |
154 | $strtolower = function_exists('mb_strtolower') ? 'mb_strtolower' : 'strtolower'; | |
353ffa53 TO |
155 | $columns = array_map($strtolower, $firstrow); |
156 | $columns = str_replace(' ', '_', $columns); | |
157 | $columns = preg_replace('/[^a-z_]/', '', $columns); | |
6a488035 TO |
158 | |
159 | // need to take care of null as well as duplicate col names. | |
160 | $duplicateColName = FALSE; | |
161 | if (count($columns) != count(array_unique($columns))) { | |
162 | $duplicateColName = TRUE; | |
163 | } | |
164 | ||
6a488035 TO |
165 | // need to truncate values per mysql field name length limits |
166 | // mysql allows 64, but we need to account for appending colKey | |
167 | // CRM-9079 | |
168 | foreach ($columns as $colKey => & $colName) { | |
169 | if (strlen($colName) > 58) { | |
170 | $colName = substr($colName, 0, 58); | |
171 | } | |
172 | } | |
173 | ||
174 | if (in_array('', $columns) || $duplicateColName) { | |
175 | foreach ($columns as $colKey => & $colName) { | |
176 | if (!$colName) { | |
177 | $colName = "col_$colKey"; | |
178 | } | |
179 | elseif ($duplicateColName) { | |
180 | $colName .= "_$colKey"; | |
181 | } | |
182 | } | |
183 | } | |
184 | ||
185 | // CRM-4881: we need to quote column names, as they may be MySQL reserved words | |
bd5d7c2b TO |
186 | foreach ($columns as & $column) { |
187 | $column = "`$column`"; | |
3a05d67e | 188 | } |
6a488035 TO |
189 | } |
190 | else { | |
be2fb01f | 191 | $columns = []; |
bd5d7c2b TO |
192 | foreach ($firstrow as $i => $_) { |
193 | $columns[] = "col_$i"; | |
3a05d67e | 194 | } |
6a488035 TO |
195 | } |
196 | ||
0a66a182 | 197 | if ($tableName) { |
808ca920 | 198 | CRM_Core_DAO::executeQuery("DROP TABLE IF EXISTS $tableName"); |
6a488035 | 199 | } |
0a66a182 | 200 | $table = CRM_Utils_SQL_TempTable::build()->setDurable(); |
201 | $tableName = $table->getName(); | |
808ca920 | 202 | CRM_Core_DAO::executeQuery("DROP TABLE IF EXISTS $tableName"); |
0a66a182 | 203 | $table->createWithColumns(implode(' text, ', $columns) . ' text'); |
6a488035 TO |
204 | |
205 | $numColumns = count($columns); | |
6a488035 TO |
206 | |
207 | // the proper approach, but some MySQL installs do not have this enabled | |
208 | // $load = "LOAD DATA LOCAL INFILE '$file' INTO TABLE $table FIELDS TERMINATED BY '$fieldSeparator' OPTIONALLY ENCLOSED BY '\"'"; | |
209 | // if ($headers) { $load .= ' IGNORE 1 LINES'; } | |
210 | // $db->query($load); | |
211 | ||
212 | // parse the CSV line by line and build one big INSERT (while MySQL-escaping the CSV contents) | |
213 | if (!$headers) { | |
214 | rewind($fd); | |
215 | } | |
216 | ||
353ffa53 | 217 | $sql = NULL; |
6a488035 TO |
218 | $first = TRUE; |
219 | $count = 0; | |
220 | while ($row = fgetcsv($fd, 0, $fieldSeparator)) { | |
221 | // skip rows that dont match column count, else we get a sql error | |
222 | if (count($row) != $numColumns) { | |
223 | continue; | |
224 | } | |
460f7b9b | 225 | // A blank line will be array(0 => NULL) |
226 | if ($row === [NULL]) { | |
227 | continue; | |
228 | } | |
6a488035 TO |
229 | |
230 | if (!$first) { | |
231 | $sql .= ', '; | |
232 | } | |
233 | ||
234 | $first = FALSE; | |
5dcdc4d6 SB |
235 | |
236 | // CRM-17859 Trim non-breaking spaces from columns. | |
913e1c96 | 237 | $row = array_map(['CRM_Import_DataSource_CSV', 'trimNonBreakingSpaces'], $row); |
be2fb01f | 238 | $row = array_map(['CRM_Core_DAO', 'escapeString'], $row); |
6a488035 TO |
239 | $sql .= "('" . implode("', '", $row) . "')"; |
240 | $count++; | |
241 | ||
242 | if ($count >= self::NUM_ROWS_TO_INSERT && !empty($sql)) { | |
808ca920 | 243 | CRM_Core_DAO::executeQuery("INSERT IGNORE INTO $tableName VALUES $sql"); |
6a488035 | 244 | |
353ffa53 | 245 | $sql = NULL; |
6a488035 TO |
246 | $first = TRUE; |
247 | $count = 0; | |
248 | } | |
249 | } | |
250 | ||
251 | if (!empty($sql)) { | |
808ca920 | 252 | CRM_Core_DAO::executeQuery("INSERT IGNORE INTO $tableName VALUES $sql"); |
6a488035 TO |
253 | } |
254 | ||
255 | fclose($fd); | |
256 | ||
257 | //get the import tmp table name. | |
0a66a182 | 258 | $result['import_table_name'] = $tableName; |
4a01628c | 259 | $result['number_of_columns'] = $numColumns; |
6a488035 TO |
260 | return $result; |
261 | } | |
96025800 | 262 | |
913e1c96 | 263 | /** |
264 | * Trim non-breaking spaces in a multibyte-safe way. | |
265 | * See also dev/core#2127 - avoid breaking strings ending in à or any other | |
266 | * unicode character sharing the same 0xA0 byte as a non-breaking space. | |
267 | * | |
268 | * @param string $string | |
269 | * @return string The trimmed string | |
270 | */ | |
271 | public static function trimNonBreakingSpaces(string $string): string { | |
272 | $encoding = mb_detect_encoding($string, NULL, TRUE); | |
273 | if ($encoding === FALSE) { | |
274 | // This could mean a couple things. One is that the string is | |
275 | // ASCII-encoded but contains a non-breaking space, which causes | |
276 | // php to fail to detect the encoding. So let's just do what we | |
277 | // did before which works in that situation and is at least no | |
278 | // worse in other situations. | |
279 | return trim($string, chr(0xC2) . chr(0xA0)); | |
280 | } | |
281 | elseif ($encoding !== 'UTF-8') { | |
282 | $string = mb_convert_encoding($string, 'UTF-8', [$encoding]); | |
283 | } | |
284 | return preg_replace("/^(\u{a0})+|(\u{a0})+$/", '', $string); | |
285 | } | |
286 | ||
6a488035 | 287 | } |