Merge pull request #10019 from eileenmcnaughton/cacheid
[civicrm-core.git] / CRM / Import / DataSource / CSV.php
CommitLineData
6a488035
TO
1<?php
2/*
3 +--------------------------------------------------------------------+
7e9e8871 4 | CiviCRM version 4.7 |
6a488035 5 +--------------------------------------------------------------------+
0f03f337 6 | Copyright CiviCRM LLC (c) 2004-2017 |
6a488035
TO
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
c73475ea 12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
6a488035
TO
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
d25dd0ee 26 */
6a488035
TO
27
28/**
29 *
30 * @package CRM
0f03f337 31 * @copyright CiviCRM LLC (c) 2004-2017
6a488035
TO
32 */
33class CRM_Import_DataSource_CSV extends CRM_Import_DataSource {
7da04cde 34 const
b0b2638a
DL
35 NUM_ROWS_TO_INSERT = 100;
36
e0ef6999 37 /**
fe482240 38 * Provides information about the data source.
e0ef6999 39 *
a6c01b45
CW
40 * @return array
41 * collection of info about this data source
e0ef6999 42 */
00be9182 43 public function getInfo() {
6a488035
TO
44 return array('title' => ts('Comma-Separated Values (CSV)'));
45 }
46
e0ef6999 47 /**
fe482240 48 * Set variables up before form is built.
3bdf1f3a 49 *
50 * @param CRM_Core_Form $form
e0ef6999 51 */
3a05d67e
TO
52 public function preProcess(&$form) {
53 }
6a488035 54
e0ef6999 55 /**
b8c71ffa 56 * This is function is called by the form object to get the DataSource's form snippet.
57 *
58 * It should add all fields necessary to get the data
e0ef6999
EM
59 * uploaded to the temporary table in the DB.
60 *
c490a46a 61 * @param CRM_Core_Form $form
e0ef6999 62 */
00be9182 63 public function buildQuickForm(&$form) {
6a488035
TO
64 $form->add('hidden', 'hidden_dataSource', 'CRM_Import_DataSource_CSV');
65
66 $config = CRM_Core_Config::singleton();
67
2e966dd5 68 $uploadFileSize = CRM_Utils_Number::formatUnitSize($config->maxFileSize . 'm', TRUE);
6a488035
TO
69 $uploadSize = round(($uploadFileSize / (1024 * 1024)), 2);
70 $form->assign('uploadSize', $uploadSize);
66dc6009 71 $form->add('File', 'uploadFile', ts('Import Data File'), 'size=30 maxlength=255', TRUE);
6a488035 72 $form->setMaxFileSize($uploadFileSize);
353ffa53
TO
73 $form->addRule('uploadFile', ts('File size should be less than %1 MBytes (%2 bytes)', array(
74 1 => $uploadSize,
bed98343 75 2 => $uploadFileSize,
353ffa53 76 )), 'maxfilesize', $uploadFileSize);
6a488035
TO
77 $form->addRule('uploadFile', ts('Input file must be in CSV format'), 'utf8File');
78 $form->addRule('uploadFile', ts('A valid file must be uploaded.'), 'uploadedfile');
79
80 $form->addElement('checkbox', 'skipColumnHeader', ts('First row contains column headers'));
81 }
82
e0ef6999 83 /**
fe482240 84 * Process the form submission.
54957108 85 *
86 * @param array $params
87 * @param string $db
88 * @param \CRM_Core_Form $form
e0ef6999 89 */
00be9182 90 public function postProcess(&$params, &$db, &$form) {
6a488035 91 $file = $params['uploadFile']['name'];
6a488035
TO
92 $result = self::_CsvToTable($db,
93 $file,
94 CRM_Utils_Array::value('skipColumnHeader', $params, FALSE),
95 CRM_Utils_Array::value('import_table_name', $params),
96 CRM_Utils_Array::value('fieldSeparator', $params, ',')
97 );
98
99 $form->set('originalColHeader', CRM_Utils_Array::value('original_col_header', $result));
100
101 $table = $result['import_table_name'];
719a6fec 102 $importJob = new CRM_Contact_Import_ImportJob($table);
6a488035
TO
103 $form->set('importTableName', $importJob->getTableName());
104 }
105
106 /**
107 * Create a table that matches the CSV file and populate it with the file's contents
108 *
6f69cc11
TO
109 * @param object $db
110 * Handle to the database connection.
111 * @param string $file
112 * File name to load.
113 * @param bool $headers
114 * Whether the first row contains headers.
115 * @param string $table
116 * Name of table from which data imported.
117 * @param string $fieldSeparator
b44e3f84 118 * Character that separates the various columns in the file.
6a488035 119 *
a6c01b45
CW
120 * @return string
121 * name of the created table
6a488035 122 */
bd5d7c2b
TO
123 private static function _CsvToTable(
124 &$db,
6a488035 125 $file,
3a05d67e
TO
126 $headers = FALSE,
127 $table = NULL,
6a488035
TO
128 $fieldSeparator = ','
129 ) {
130 $result = array();
131 $fd = fopen($file, 'r');
132 if (!$fd) {
133 CRM_Core_Error::fatal("Could not read $file");
134 }
3bf4c8a0
SB
135 if (filesize($file) == 0) {
136 CRM_Core_Error::fatal("$file is empty. Please upload a valid file.");
137 }
6a488035
TO
138
139 $config = CRM_Core_Config::singleton();
140 // support tab separated
141 if (strtolower($fieldSeparator) == 'tab' ||
142 strtolower($fieldSeparator) == '\t'
143 ) {
144 $fieldSeparator = "\t";
145 }
146
147 $firstrow = fgetcsv($fd, 0, $fieldSeparator);
148
149 // create the column names from the CSV header or as col_0, col_1, etc.
150 if ($headers) {
151 //need to get original headers.
152 $result['original_col_header'] = $firstrow;
153
154 $strtolower = function_exists('mb_strtolower') ? 'mb_strtolower' : 'strtolower';
353ffa53
TO
155 $columns = array_map($strtolower, $firstrow);
156 $columns = str_replace(' ', '_', $columns);
157 $columns = preg_replace('/[^a-z_]/', '', $columns);
6a488035
TO
158
159 // need to take care of null as well as duplicate col names.
160 $duplicateColName = FALSE;
161 if (count($columns) != count(array_unique($columns))) {
162 $duplicateColName = TRUE;
163 }
164
6a488035
TO
165 // need to truncate values per mysql field name length limits
166 // mysql allows 64, but we need to account for appending colKey
167 // CRM-9079
168 foreach ($columns as $colKey => & $colName) {
169 if (strlen($colName) > 58) {
170 $colName = substr($colName, 0, 58);
171 }
172 }
173
174 if (in_array('', $columns) || $duplicateColName) {
175 foreach ($columns as $colKey => & $colName) {
176 if (!$colName) {
177 $colName = "col_$colKey";
178 }
179 elseif ($duplicateColName) {
180 $colName .= "_$colKey";
181 }
182 }
183 }
184
185 // CRM-4881: we need to quote column names, as they may be MySQL reserved words
bd5d7c2b
TO
186 foreach ($columns as & $column) {
187 $column = "`$column`";
3a05d67e 188 }
6a488035
TO
189 }
190 else {
191 $columns = array();
bd5d7c2b
TO
192 foreach ($firstrow as $i => $_) {
193 $columns[] = "col_$i";
3a05d67e 194 }
6a488035
TO
195 }
196
197 // FIXME: we should regen this table's name if it exists rather than drop it
198 if (!$table) {
199 $table = 'civicrm_import_job_' . md5(uniqid(rand(), TRUE));
200 }
201
202 $db->query("DROP TABLE IF EXISTS $table");
203
204 $numColumns = count($columns);
205 $create = "CREATE TABLE $table (" . implode(' text, ', $columns) . " text) ENGINE=InnoDB DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci";
206 $db->query($create);
207
208 // the proper approach, but some MySQL installs do not have this enabled
209 // $load = "LOAD DATA LOCAL INFILE '$file' INTO TABLE $table FIELDS TERMINATED BY '$fieldSeparator' OPTIONALLY ENCLOSED BY '\"'";
210 // if ($headers) { $load .= ' IGNORE 1 LINES'; }
211 // $db->query($load);
212
213 // parse the CSV line by line and build one big INSERT (while MySQL-escaping the CSV contents)
214 if (!$headers) {
215 rewind($fd);
216 }
217
353ffa53 218 $sql = NULL;
6a488035
TO
219 $first = TRUE;
220 $count = 0;
221 while ($row = fgetcsv($fd, 0, $fieldSeparator)) {
222 // skip rows that dont match column count, else we get a sql error
223 if (count($row) != $numColumns) {
224 continue;
225 }
226
227 if (!$first) {
228 $sql .= ', ';
229 }
230
231 $first = FALSE;
5dcdc4d6
SB
232
233 // CRM-17859 Trim non-breaking spaces from columns.
234 $row = array_map(
235 function($string) {
236 return trim($string, chr(0xC2) . chr(0xA0));
237 }, $row);
8f56d1f5 238 $row = array_map(array('CRM_Core_DAO', 'escapeString'), $row);
6a488035
TO
239 $sql .= "('" . implode("', '", $row) . "')";
240 $count++;
241
242 if ($count >= self::NUM_ROWS_TO_INSERT && !empty($sql)) {
243 $sql = "INSERT IGNORE INTO $table VALUES $sql";
244 $db->query($sql);
245
353ffa53 246 $sql = NULL;
6a488035
TO
247 $first = TRUE;
248 $count = 0;
249 }
250 }
251
252 if (!empty($sql)) {
253 $sql = "INSERT IGNORE INTO $table VALUES $sql";
254 $db->query($sql);
255 }
256
257 fclose($fd);
258
259 //get the import tmp table name.
260 $result['import_table_name'] = $table;
261
262 return $result;
263 }
96025800 264
6a488035 265}