Merge pull request #4696 from colemanw/CRM-15669
[civicrm-core.git] / CRM / Import / DataSource / CSV.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | CiviCRM version 4.6 |
5 +--------------------------------------------------------------------+
6 | Copyright CiviCRM LLC (c) 2004-2014 |
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2009. |
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
26 */
27
28 /**
29 *
30 * @package CRM
31 * @copyright CiviCRM LLC (c) 2004-2014
32 * $Id$
33 *
34 */
35 class CRM_Import_DataSource_CSV extends CRM_Import_DataSource {
36 const
37 NUM_ROWS_TO_INSERT = 100;
38
39 /**
40 * Provides information about the data source
41 *
42 * @return array collection of info about this data source
43 *
44 *
45 */
46 public function getInfo() {
47 return array('title' => ts('Comma-Separated Values (CSV)'));
48 }
49
50 /**
51 * Set variables up before form is built
52 *
53 */
54 public function preProcess(&$form) {}
55
56 /**
57 * This is function is called by the form object to get the DataSource's
58 * form snippet. It should add all fields necesarry to get the data
59 * uploaded to the temporary table in the DB.
60 *
61 * @param CRM_Core_Form $form
62 *
63 * @return void (operates directly on form argument)
64 */
65 public function buildQuickForm(&$form) {
66 $form->add('hidden', 'hidden_dataSource', 'CRM_Import_DataSource_CSV');
67
68 $config = CRM_Core_Config::singleton();
69
70 $uploadFileSize = CRM_Core_Config_Defaults::formatUnitSize($config->maxFileSize.'m', TRUE);
71 $uploadSize = round(($uploadFileSize / (1024 * 1024)), 2);
72 $form->assign('uploadSize', $uploadSize);
73 $form->add('File', 'uploadFile', ts('Import Data File'), 'size=30 maxlength=255', TRUE);
74 $form->setMaxFileSize($uploadFileSize);
75 $form->addRule('uploadFile', ts('File size should be less than %1 MBytes (%2 bytes)', array(1 => $uploadSize, 2 => $uploadFileSize)), 'maxfilesize', $uploadFileSize);
76 $form->addRule('uploadFile', ts('Input file must be in CSV format'), 'utf8File');
77 $form->addRule('uploadFile', ts('A valid file must be uploaded.'), 'uploadedfile');
78
79 $form->addElement('checkbox', 'skipColumnHeader', ts('First row contains column headers'));
80 }
81
82 /**
83 * Process the form submission
84 *
85 */
86 public function postProcess(&$params, &$db, &$form) {
87 $file = $params['uploadFile']['name'];
88 $result = self::_CsvToTable($db,
89 $file,
90 CRM_Utils_Array::value('skipColumnHeader', $params, FALSE),
91 CRM_Utils_Array::value('import_table_name', $params),
92 CRM_Utils_Array::value('fieldSeparator', $params, ',')
93 );
94
95 $form->set('originalColHeader', CRM_Utils_Array::value('original_col_header', $result));
96
97 $table = $result['import_table_name'];
98 $importJob = new CRM_Contact_Import_ImportJob($table);
99 $form->set('importTableName', $importJob->getTableName());
100 }
101
102 /**
103 * Create a table that matches the CSV file and populate it with the file's contents
104 *
105 * @param object $db handle to the database connection
106 * @param string $file file name to load
107 * @param bool $headers whether the first row contains headers
108 * @param string $table Name of table from which data imported.
109 * @param string $fieldSeparator Character that seperates the various columns in the file
110 *
111 * @return string name of the created table
112 */
113 private static function _CsvToTable(&$db,
114 $file,
115 $headers = FALSE,
116 $table = NULL,
117 $fieldSeparator = ','
118 ) {
119 $result = array();
120 $fd = fopen($file, 'r');
121 if (!$fd) {
122 CRM_Core_Error::fatal("Could not read $file");
123 }
124
125 $config = CRM_Core_Config::singleton();
126 // support tab separated
127 if (strtolower($fieldSeparator) == 'tab' ||
128 strtolower($fieldSeparator) == '\t'
129 ) {
130 $fieldSeparator = "\t";
131 }
132
133 $firstrow = fgetcsv($fd, 0, $fieldSeparator);
134
135 // create the column names from the CSV header or as col_0, col_1, etc.
136 if ($headers) {
137 //need to get original headers.
138 $result['original_col_header'] = $firstrow;
139
140 $strtolower = function_exists('mb_strtolower') ? 'mb_strtolower' : 'strtolower';
141 $columns = array_map($strtolower, $firstrow);
142 $columns = str_replace(' ', '_', $columns);
143 $columns = preg_replace('/[^a-z_]/', '', $columns);
144
145 // need to take care of null as well as duplicate col names.
146 $duplicateColName = FALSE;
147 if (count($columns) != count(array_unique($columns))) {
148 $duplicateColName = TRUE;
149 }
150
151
152 // need to truncate values per mysql field name length limits
153 // mysql allows 64, but we need to account for appending colKey
154 // CRM-9079
155 foreach ($columns as $colKey => & $colName) {
156 if (strlen($colName) > 58) {
157 $colName = substr($colName, 0, 58);
158 }
159 }
160
161 if (in_array('', $columns) || $duplicateColName) {
162 foreach ($columns as $colKey => & $colName) {
163 if (!$colName) {
164 $colName = "col_$colKey";
165 }
166 elseif ($duplicateColName) {
167 $colName .= "_$colKey";
168 }
169 }
170 }
171
172 // CRM-4881: we need to quote column names, as they may be MySQL reserved words
173 foreach ($columns as & $column) $column = "`$column`";
174 }
175 else {
176 $columns = array();
177 foreach ($firstrow as $i => $_) $columns[] = "col_$i";
178 }
179
180 // FIXME: we should regen this table's name if it exists rather than drop it
181 if (!$table) {
182 $table = 'civicrm_import_job_' . md5(uniqid(rand(), TRUE));
183 }
184
185 $db->query("DROP TABLE IF EXISTS $table");
186
187 $numColumns = count($columns);
188 $create = "CREATE TABLE $table (" . implode(' text, ', $columns) . " text) ENGINE=InnoDB DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci";
189 $db->query($create);
190
191 // the proper approach, but some MySQL installs do not have this enabled
192 // $load = "LOAD DATA LOCAL INFILE '$file' INTO TABLE $table FIELDS TERMINATED BY '$fieldSeparator' OPTIONALLY ENCLOSED BY '\"'";
193 // if ($headers) { $load .= ' IGNORE 1 LINES'; }
194 // $db->query($load);
195
196 // parse the CSV line by line and build one big INSERT (while MySQL-escaping the CSV contents)
197 if (!$headers) {
198 rewind($fd);
199 }
200
201 $sql = NULL;
202 $first = TRUE;
203 $count = 0;
204 while ($row = fgetcsv($fd, 0, $fieldSeparator)) {
205 // skip rows that dont match column count, else we get a sql error
206 if (count($row) != $numColumns) {
207 continue;
208 }
209
210 if (!$first) {
211 $sql .= ', ';
212 }
213
214 $first = FALSE;
215 $row = array_map('civicrm_mysql_real_escape_string', $row);
216 $sql .= "('" . implode("', '", $row) . "')";
217 $count++;
218
219 if ($count >= self::NUM_ROWS_TO_INSERT && !empty($sql)) {
220 $sql = "INSERT IGNORE INTO $table VALUES $sql";
221 $db->query($sql);
222
223 $sql = NULL;
224 $first = TRUE;
225 $count = 0;
226 }
227 }
228
229 if (!empty($sql)) {
230 $sql = "INSERT IGNORE INTO $table VALUES $sql";
231 $db->query($sql);
232 }
233
234 fclose($fd);
235
236 //get the import tmp table name.
237 $result['import_table_name'] = $table;
238
239 return $result;
240 }
241 }
242
243 /**
244 * @param $string
245 *
246 * @return string
247 */
248 function civicrm_mysql_real_escape_string($string) {
249 static $dao = NULL;
250 if (!$dao) {
251 $dao = new CRM_Core_DAO();
252 }
253 return $dao->escape($string);
254 }