CRM-16373 - Move formatUnitSize() from CRM_Core_Config to CRM_Utils_Number
[civicrm-core.git] / CRM / Import / DataSource / CSV.php
CommitLineData
6a488035
TO
1<?php
2/*
3 +--------------------------------------------------------------------+
7e9e8871 4 | CiviCRM version 4.7 |
6a488035 5 +--------------------------------------------------------------------+
e7112fa7 6 | Copyright CiviCRM LLC (c) 2004-2015 |
6a488035
TO
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
c73475ea 12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
6a488035
TO
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
d25dd0ee 26 */
6a488035
TO
27
28/**
29 *
30 * @package CRM
e7112fa7 31 * @copyright CiviCRM LLC (c) 2004-2015
6a488035
TO
32 * $Id$
33 *
34 */
35class CRM_Import_DataSource_CSV extends CRM_Import_DataSource {
7da04cde 36 const
b0b2638a
DL
37 NUM_ROWS_TO_INSERT = 100;
38
e0ef6999 39 /**
fe482240 40 * Provides information about the data source.
e0ef6999 41 *
a6c01b45
CW
42 * @return array
43 * collection of info about this data source
e0ef6999 44 */
00be9182 45 public function getInfo() {
6a488035
TO
46 return array('title' => ts('Comma-Separated Values (CSV)'));
47 }
48
e0ef6999 49 /**
fe482240 50 * Set variables up before form is built.
e0ef6999 51 */
3a05d67e
TO
52 public function preProcess(&$form) {
53 }
6a488035 54
e0ef6999
EM
55 /**
56 * This is function is called by the form object to get the DataSource's
57 * form snippet. It should add all fields necesarry to get the data
58 * uploaded to the temporary table in the DB.
59 *
c490a46a 60 * @param CRM_Core_Form $form
e0ef6999 61 *
a6c01b45
CW
62 * @return void
63 * (operates directly on form argument)
e0ef6999 64 */
00be9182 65 public function buildQuickForm(&$form) {
6a488035
TO
66 $form->add('hidden', 'hidden_dataSource', 'CRM_Import_DataSource_CSV');
67
68 $config = CRM_Core_Config::singleton();
69
2e966dd5 70 $uploadFileSize = CRM_Utils_Number::formatUnitSize($config->maxFileSize . 'm', TRUE);
6a488035
TO
71 $uploadSize = round(($uploadFileSize / (1024 * 1024)), 2);
72 $form->assign('uploadSize', $uploadSize);
66dc6009 73 $form->add('File', 'uploadFile', ts('Import Data File'), 'size=30 maxlength=255', TRUE);
6a488035 74 $form->setMaxFileSize($uploadFileSize);
353ffa53
TO
75 $form->addRule('uploadFile', ts('File size should be less than %1 MBytes (%2 bytes)', array(
76 1 => $uploadSize,
bed98343 77 2 => $uploadFileSize,
353ffa53 78 )), 'maxfilesize', $uploadFileSize);
6a488035
TO
79 $form->addRule('uploadFile', ts('Input file must be in CSV format'), 'utf8File');
80 $form->addRule('uploadFile', ts('A valid file must be uploaded.'), 'uploadedfile');
81
82 $form->addElement('checkbox', 'skipColumnHeader', ts('First row contains column headers'));
83 }
84
e0ef6999 85 /**
fe482240 86 * Process the form submission.
e0ef6999 87 */
00be9182 88 public function postProcess(&$params, &$db, &$form) {
6a488035 89 $file = $params['uploadFile']['name'];
6a488035
TO
90 $result = self::_CsvToTable($db,
91 $file,
92 CRM_Utils_Array::value('skipColumnHeader', $params, FALSE),
93 CRM_Utils_Array::value('import_table_name', $params),
94 CRM_Utils_Array::value('fieldSeparator', $params, ',')
95 );
96
97 $form->set('originalColHeader', CRM_Utils_Array::value('original_col_header', $result));
98
99 $table = $result['import_table_name'];
719a6fec 100 $importJob = new CRM_Contact_Import_ImportJob($table);
6a488035
TO
101 $form->set('importTableName', $importJob->getTableName());
102 }
103
104 /**
105 * Create a table that matches the CSV file and populate it with the file's contents
106 *
6f69cc11
TO
107 * @param object $db
108 * Handle to the database connection.
109 * @param string $file
110 * File name to load.
111 * @param bool $headers
112 * Whether the first row contains headers.
113 * @param string $table
114 * Name of table from which data imported.
115 * @param string $fieldSeparator
b44e3f84 116 * Character that separates the various columns in the file.
6a488035 117 *
a6c01b45
CW
118 * @return string
119 * name of the created table
6a488035 120 */
bd5d7c2b
TO
121 private static function _CsvToTable(
122 &$db,
6a488035 123 $file,
3a05d67e
TO
124 $headers = FALSE,
125 $table = NULL,
6a488035
TO
126 $fieldSeparator = ','
127 ) {
128 $result = array();
129 $fd = fopen($file, 'r');
130 if (!$fd) {
131 CRM_Core_Error::fatal("Could not read $file");
132 }
133
134 $config = CRM_Core_Config::singleton();
135 // support tab separated
136 if (strtolower($fieldSeparator) == 'tab' ||
137 strtolower($fieldSeparator) == '\t'
138 ) {
139 $fieldSeparator = "\t";
140 }
141
142 $firstrow = fgetcsv($fd, 0, $fieldSeparator);
143
144 // create the column names from the CSV header or as col_0, col_1, etc.
145 if ($headers) {
146 //need to get original headers.
147 $result['original_col_header'] = $firstrow;
148
149 $strtolower = function_exists('mb_strtolower') ? 'mb_strtolower' : 'strtolower';
353ffa53
TO
150 $columns = array_map($strtolower, $firstrow);
151 $columns = str_replace(' ', '_', $columns);
152 $columns = preg_replace('/[^a-z_]/', '', $columns);
6a488035
TO
153
154 // need to take care of null as well as duplicate col names.
155 $duplicateColName = FALSE;
156 if (count($columns) != count(array_unique($columns))) {
157 $duplicateColName = TRUE;
158 }
159
6a488035
TO
160 // need to truncate values per mysql field name length limits
161 // mysql allows 64, but we need to account for appending colKey
162 // CRM-9079
163 foreach ($columns as $colKey => & $colName) {
164 if (strlen($colName) > 58) {
165 $colName = substr($colName, 0, 58);
166 }
167 }
168
169 if (in_array('', $columns) || $duplicateColName) {
170 foreach ($columns as $colKey => & $colName) {
171 if (!$colName) {
172 $colName = "col_$colKey";
173 }
174 elseif ($duplicateColName) {
175 $colName .= "_$colKey";
176 }
177 }
178 }
179
180 // CRM-4881: we need to quote column names, as they may be MySQL reserved words
bd5d7c2b
TO
181 foreach ($columns as & $column) {
182 $column = "`$column`";
3a05d67e 183 }
6a488035
TO
184 }
185 else {
186 $columns = array();
bd5d7c2b
TO
187 foreach ($firstrow as $i => $_) {
188 $columns[] = "col_$i";
3a05d67e 189 }
6a488035
TO
190 }
191
192 // FIXME: we should regen this table's name if it exists rather than drop it
193 if (!$table) {
194 $table = 'civicrm_import_job_' . md5(uniqid(rand(), TRUE));
195 }
196
197 $db->query("DROP TABLE IF EXISTS $table");
198
199 $numColumns = count($columns);
200 $create = "CREATE TABLE $table (" . implode(' text, ', $columns) . " text) ENGINE=InnoDB DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci";
201 $db->query($create);
202
203 // the proper approach, but some MySQL installs do not have this enabled
204 // $load = "LOAD DATA LOCAL INFILE '$file' INTO TABLE $table FIELDS TERMINATED BY '$fieldSeparator' OPTIONALLY ENCLOSED BY '\"'";
205 // if ($headers) { $load .= ' IGNORE 1 LINES'; }
206 // $db->query($load);
207
208 // parse the CSV line by line and build one big INSERT (while MySQL-escaping the CSV contents)
209 if (!$headers) {
210 rewind($fd);
211 }
212
353ffa53 213 $sql = NULL;
6a488035
TO
214 $first = TRUE;
215 $count = 0;
216 while ($row = fgetcsv($fd, 0, $fieldSeparator)) {
217 // skip rows that dont match column count, else we get a sql error
218 if (count($row) != $numColumns) {
219 continue;
220 }
221
222 if (!$first) {
223 $sql .= ', ';
224 }
225
226 $first = FALSE;
227 $row = array_map('civicrm_mysql_real_escape_string', $row);
228 $sql .= "('" . implode("', '", $row) . "')";
229 $count++;
230
231 if ($count >= self::NUM_ROWS_TO_INSERT && !empty($sql)) {
232 $sql = "INSERT IGNORE INTO $table VALUES $sql";
233 $db->query($sql);
234
353ffa53 235 $sql = NULL;
6a488035
TO
236 $first = TRUE;
237 $count = 0;
238 }
239 }
240
241 if (!empty($sql)) {
242 $sql = "INSERT IGNORE INTO $table VALUES $sql";
243 $db->query($sql);
244 }
245
246 fclose($fd);
247
248 //get the import tmp table name.
249 $result['import_table_name'] = $table;
250
251 return $result;
252 }
96025800 253
6a488035
TO
254}
255
e0ef6999
EM
256/**
257 * @param $string
258 *
259 * @return string
260 */
6a488035
TO
261function civicrm_mysql_real_escape_string($string) {
262 static $dao = NULL;
263 if (!$dao) {
264 $dao = new CRM_Core_DAO();
265 }
266 return $dao->escape($string);
267}