Make tablename a property of the datasource
[civicrm-core.git] / CRM / Import / DataSource / CSV.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | Copyright CiviCRM LLC. All rights reserved. |
5 | |
6 | This work is published under the GNU AGPLv3 license with some |
7 | permitted exceptions and without any warranty. For full license |
8 | and copyright information, see https://civicrm.org/licensing |
9 +--------------------------------------------------------------------+
10 */
11
12 /**
13 *
14 * @package CRM
15 * @copyright CiviCRM LLC https://civicrm.org/licensing
16 */
17 class CRM_Import_DataSource_CSV extends CRM_Import_DataSource {
18 const
19 NUM_ROWS_TO_INSERT = 100;
20
21 /**
22 * Form fields declared for this datasource.
23 *
24 * @var string[]
25 */
26 protected $submittableFields = ['skipColumnHeader', 'uploadField'];
27
28 /**
29 * Provides information about the data source.
30 *
31 * @return array
32 * collection of info about this data source
33 */
34 public function getInfo(): array {
35 return ['title' => ts('Comma-Separated Values (CSV)')];
36 }
37
38 /**
39 * This is function is called by the form object to get the DataSource's form snippet.
40 *
41 * It should add all fields necessary to get the data
42 * uploaded to the temporary table in the DB.
43 *
44 * @param CRM_Core_Form $form
45 *
46 * @throws \CRM_Core_Exception
47 */
48 public function buildQuickForm(&$form) {
49 $form->add('hidden', 'hidden_dataSource', 'CRM_Import_DataSource_CSV');
50
51 $uploadFileSize = CRM_Utils_Number::formatUnitSize(Civi::settings()->get('maxFileSize') . 'm', TRUE);
52 //Fetch uploadFileSize from php_ini when $config->maxFileSize is set to "no limit".
53 if (empty($uploadFileSize)) {
54 $uploadFileSize = CRM_Utils_Number::formatUnitSize(ini_get('upload_max_filesize'), TRUE);
55 }
56 $uploadSize = round(($uploadFileSize / (1024 * 1024)), 2);
57 $form->assign('uploadSize', $uploadSize);
58 $form->add('File', 'uploadFile', ts('Import Data File'), NULL, TRUE);
59 $form->setMaxFileSize($uploadFileSize);
60 $form->addRule('uploadFile', ts('File size should be less than %1 MBytes (%2 bytes)', [
61 1 => $uploadSize,
62 2 => $uploadFileSize,
63 ]), 'maxfilesize', $uploadFileSize);
64 $form->addRule('uploadFile', ts('Input file must be in CSV format'), 'utf8File');
65 $form->addRule('uploadFile', ts('A valid file must be uploaded.'), 'uploadedfile');
66
67 $form->addElement('checkbox', 'skipColumnHeader', ts('First row contains column headers'));
68 }
69
70 /**
71 * Process the form submission.
72 *
73 * @param array $params
74 * @param string $db
75 * @param \CRM_Core_Form $form
76 *
77 * @throws \API_Exception
78 * @throws \CRM_Core_Exception
79 * @throws \API_Exception
80 */
81 public function postProcess(&$params, &$db, &$form) {
82 $result = self::_CsvToTable(
83 $this->getSubmittedValue('uploadFile')['name'],
84 $this->getSubmittedValue('skipColumnHeader'),
85 $this->getSubmittedValue('fieldSeparator') ?? ','
86 );
87 $this->addTrackingFieldsToTable($result['import_table_name']);
88
89 $this->updateUserJobMetadata('DataSource', [
90 'table_name' => $result['import_table_name'],
91 'column_headers' => $this->getSubmittedValue('skipColumnHeader') ? $result['column_headers'] : [],
92 'number_of_columns' => $result['number_of_columns'],
93 ]);
94 }
95
96 /**
97 * Create a table that matches the CSV file and populate it with the file's contents
98 *
99 * @param string $file
100 * File name to load.
101 * @param bool $headers
102 * Whether the first row contains headers.
103 * @param string $fieldSeparator
104 * Character that separates the various columns in the file.
105 *
106 * @return array
107 * name of the created table
108 * @throws \CRM_Core_Exception
109 */
110 private static function _CsvToTable(
111 $file,
112 $headers = FALSE,
113 $fieldSeparator = ','
114 ) {
115 $result = [];
116 $fd = fopen($file, 'r');
117 if (!$fd) {
118 throw new CRM_Core_Exception("Could not read $file");
119 }
120 if (filesize($file) == 0) {
121 throw new CRM_Core_Exception("$file is empty. Please upload a valid file.");
122 }
123
124 // support tab separated
125 if (strtolower($fieldSeparator) === 'tab' ||
126 strtolower($fieldSeparator) === '\t'
127 ) {
128 $fieldSeparator = "\t";
129 }
130
131 $firstrow = fgetcsv($fd, 0, $fieldSeparator);
132
133 // create the column names from the CSV header or as col_0, col_1, etc.
134 if ($headers) {
135 //need to get original headers.
136 $result['column_headers'] = $firstrow;
137
138 $strtolower = function_exists('mb_strtolower') ? 'mb_strtolower' : 'strtolower';
139 $columns = array_map($strtolower, $firstrow);
140 $columns = str_replace(' ', '_', $columns);
141 $columns = preg_replace('/[^a-z_]/', '', $columns);
142
143 // need to take care of null as well as duplicate col names.
144 $duplicateColName = FALSE;
145 if (count($columns) != count(array_unique($columns))) {
146 $duplicateColName = TRUE;
147 }
148
149 // need to truncate values per mysql field name length limits
150 // mysql allows 64, but we need to account for appending colKey
151 // CRM-9079
152 foreach ($columns as $colKey => & $colName) {
153 if (strlen($colName) > 58) {
154 $colName = substr($colName, 0, 58);
155 }
156 }
157
158 if (in_array('', $columns) || $duplicateColName) {
159 foreach ($columns as $colKey => & $colName) {
160 if (!$colName) {
161 $colName = "col_$colKey";
162 }
163 elseif ($duplicateColName) {
164 $colName .= "_$colKey";
165 }
166 }
167 }
168
169 // CRM-4881: we need to quote column names, as they may be MySQL reserved words
170 foreach ($columns as & $column) {
171 $column = "`$column`";
172 }
173 }
174 else {
175 $columns = [];
176 foreach ($firstrow as $i => $_) {
177 $columns[] = "col_$i";
178 }
179 }
180
181 $table = CRM_Utils_SQL_TempTable::build()->setDurable();
182 $tableName = $table->getName();
183 CRM_Core_DAO::executeQuery("DROP TABLE IF EXISTS $tableName");
184 $table->createWithColumns(implode(' text, ', $columns) . ' text');
185
186 $numColumns = count($columns);
187
188 // the proper approach, but some MySQL installs do not have this enabled
189 // $load = "LOAD DATA LOCAL INFILE '$file' INTO TABLE $table FIELDS TERMINATED BY '$fieldSeparator' OPTIONALLY ENCLOSED BY '\"'";
190 // if ($headers) { $load .= ' IGNORE 1 LINES'; }
191 // $db->query($load);
192
193 // parse the CSV line by line and build one big INSERT (while MySQL-escaping the CSV contents)
194 if (!$headers) {
195 rewind($fd);
196 }
197
198 $sql = NULL;
199 $first = TRUE;
200 $count = 0;
201 while ($row = fgetcsv($fd, 0, $fieldSeparator)) {
202 // skip rows that dont match column count, else we get a sql error
203 if (count($row) != $numColumns) {
204 continue;
205 }
206 // A blank line will be array(0 => NULL)
207 if ($row === [NULL]) {
208 continue;
209 }
210
211 if (!$first) {
212 $sql .= ', ';
213 }
214
215 $first = FALSE;
216
217 // CRM-17859 Trim non-breaking spaces from columns.
218 $row = array_map(['CRM_Import_DataSource_CSV', 'trimNonBreakingSpaces'], $row);
219 $row = array_map(['CRM_Core_DAO', 'escapeString'], $row);
220 $sql .= "('" . implode("', '", $row) . "')";
221 $count++;
222
223 if ($count >= self::NUM_ROWS_TO_INSERT && !empty($sql)) {
224 CRM_Core_DAO::executeQuery("INSERT IGNORE INTO $tableName VALUES $sql");
225
226 $sql = NULL;
227 $first = TRUE;
228 $count = 0;
229 }
230 }
231
232 if (!empty($sql)) {
233 CRM_Core_DAO::executeQuery("INSERT IGNORE INTO $tableName VALUES $sql");
234 }
235
236 fclose($fd);
237
238 //get the import tmp table name.
239 $result['import_table_name'] = $tableName;
240 $result['number_of_columns'] = $numColumns;
241 return $result;
242 }
243
244 /**
245 * Trim non-breaking spaces in a multibyte-safe way.
246 * See also dev/core#2127 - avoid breaking strings ending in à or any other
247 * unicode character sharing the same 0xA0 byte as a non-breaking space.
248 *
249 * @param string $string
250 * @return string The trimmed string
251 */
252 public static function trimNonBreakingSpaces(string $string): string {
253 $encoding = mb_detect_encoding($string, NULL, TRUE);
254 if ($encoding === FALSE) {
255 // This could mean a couple things. One is that the string is
256 // ASCII-encoded but contains a non-breaking space, which causes
257 // php to fail to detect the encoding. So let's just do what we
258 // did before which works in that situation and is at least no
259 // worse in other situations.
260 return trim($string, chr(0xC2) . chr(0xA0));
261 }
262 elseif ($encoding !== 'UTF-8') {
263 $string = mb_convert_encoding($string, 'UTF-8', [$encoding]);
264 }
265 return preg_replace("/^(\u{a0})+|(\u{a0})+$/", '', $string);
266 }
267
268 }