Merge pull request #10998 from michaelmcandrew/CRM-21197
[civicrm-core.git] / CRM / Import / Parser.php
CommitLineData
ec3811b1
CW
1<?php
2/*
3 +--------------------------------------------------------------------+
7e9e8871 4 | CiviCRM version 4.7 |
ec3811b1 5 +--------------------------------------------------------------------+
0f03f337 6 | Copyright CiviCRM LLC (c) 2004-2017 |
ec3811b1
CW
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
d25dd0ee 26 */
ec3811b1
CW
27
28/**
29 *
30 * @package CRM
0f03f337 31 * @copyright CiviCRM LLC (c) 2004-2017
ec3811b1 32 */
ec3811b1
CW
33abstract class CRM_Import_Parser {
34 /**
35 * Settings
36 */
ca2057ea 37 const MAX_WARNINGS = 25, DEFAULT_TIMEOUT = 30;
ec3811b1
CW
38
39 /**
40 * Return codes
41 */
7da04cde 42 const VALID = 1, WARNING = 2, ERROR = 4, CONFLICT = 8, STOP = 16, DUPLICATE = 32, MULTIPLE_DUPE = 64, NO_MATCH = 128, UNPARSED_ADDRESS_WARNING = 256;
ec3811b1
CW
43
44 /**
45 * Parser modes
46 */
7da04cde 47 const MODE_MAPFIELD = 1, MODE_PREVIEW = 2, MODE_SUMMARY = 4, MODE_IMPORT = 8;
ec3811b1
CW
48
49 /**
50 * Codes for duplicate record handling
51 */
7da04cde 52 const DUPLICATE_SKIP = 1, DUPLICATE_REPLACE = 2, DUPLICATE_UPDATE = 4, DUPLICATE_FILL = 8, DUPLICATE_NOCHECK = 16;
ec3811b1
CW
53
54 /**
55 * Contact types
56 */
7da04cde 57 const CONTACT_INDIVIDUAL = 1, CONTACT_HOUSEHOLD = 2, CONTACT_ORGANIZATION = 4;
69a4c20a
CW
58
59
60 /**
100fef9d 61 * Total number of non empty lines
69a4c20a
CW
62 */
63 protected $_totalCount;
64
65 /**
100fef9d 66 * Running total number of valid lines
69a4c20a
CW
67 */
68 protected $_validCount;
69
70 /**
100fef9d 71 * Running total number of invalid rows
69a4c20a
CW
72 */
73 protected $_invalidRowCount;
74
75 /**
100fef9d 76 * Maximum number of non-empty/comment lines to process
69a4c20a
CW
77 *
78 * @var int
79 */
80 protected $_maxLinesToProcess;
81
69a4c20a 82 /**
100fef9d 83 * Array of error lines, bounded by MAX_ERROR
69a4c20a
CW
84 */
85 protected $_errors;
86
87 /**
100fef9d 88 * Total number of conflict lines
69a4c20a
CW
89 */
90 protected $_conflictCount;
91
92 /**
100fef9d 93 * Array of conflict lines
69a4c20a
CW
94 */
95 protected $_conflicts;
96
97 /**
100fef9d 98 * Total number of duplicate (from database) lines
69a4c20a
CW
99 */
100 protected $_duplicateCount;
101
102 /**
100fef9d 103 * Array of duplicate lines
69a4c20a
CW
104 */
105 protected $_duplicates;
106
107 /**
100fef9d 108 * Running total number of warnings
69a4c20a
CW
109 */
110 protected $_warningCount;
111
112 /**
100fef9d 113 * Maximum number of warnings to store
69a4c20a
CW
114 */
115 protected $_maxWarningCount = self::MAX_WARNINGS;
116
117 /**
100fef9d 118 * Array of warning lines, bounded by MAX_WARNING
69a4c20a
CW
119 */
120 protected $_warnings;
121
122 /**
100fef9d 123 * Array of all the fields that could potentially be part
69a4c20a
CW
124 * of this import process
125 * @var array
126 */
127 protected $_fields;
128
129 /**
100fef9d 130 * Array of the fields that are actually part of the import process
69a4c20a
CW
131 * the position in the array also dictates their position in the import
132 * file
133 * @var array
134 */
135 protected $_activeFields;
136
137 /**
100fef9d 138 * Cache the count of active fields
69a4c20a
CW
139 *
140 * @var int
141 */
142 protected $_activeFieldCount;
143
144 /**
100fef9d 145 * Cache of preview rows
69a4c20a
CW
146 *
147 * @var array
148 */
149 protected $_rows;
150
151 /**
100fef9d 152 * Filename of error data
69a4c20a
CW
153 *
154 * @var string
155 */
156 protected $_errorFileName;
157
158 /**
100fef9d 159 * Filename of conflict data
69a4c20a
CW
160 *
161 * @var string
162 */
163 protected $_conflictFileName;
164
165 /**
100fef9d 166 * Filename of duplicate data
69a4c20a
CW
167 *
168 * @var string
169 */
170 protected $_duplicateFileName;
171
172 /**
100fef9d 173 * Contact type
69a4c20a
CW
174 *
175 * @var int
176 */
177 public $_contactType;
e87ff4ce 178 /**
179 * Contact sub-type
180 *
181 * @var int
182 */
183 public $_contactSubType;
69a4c20a
CW
184
185 /**
e87ff4ce 186 * Class constructor.
69a4c20a 187 */
00be9182 188 public function __construct() {
69a4c20a 189 $this->_maxLinesToProcess = 0;
69a4c20a
CW
190 }
191
192 /**
fe482240 193 * Abstract function definitions.
69a4c20a 194 */
bed98343 195 abstract protected function init();
e0ef6999
EM
196
197 /**
198 * @return mixed
199 */
bed98343 200 abstract protected function fini();
e0ef6999
EM
201
202 /**
2b4bc760 203 * Map field.
204 *
205 * @param array $values
e0ef6999
EM
206 *
207 * @return mixed
208 */
bed98343 209 abstract protected function mapField(&$values);
e0ef6999
EM
210
211 /**
2b4bc760 212 * Preview.
213 *
214 * @param array $values
e0ef6999
EM
215 *
216 * @return mixed
217 */
bed98343 218 abstract protected function preview(&$values);
e0ef6999
EM
219
220 /**
221 * @param $values
222 *
223 * @return mixed
224 */
bed98343 225 abstract protected function summary(&$values);
e0ef6999
EM
226
227 /**
228 * @param $onDuplicate
229 * @param $values
230 *
231 * @return mixed
232 */
bed98343 233 abstract protected function import($onDuplicate, &$values);
69a4c20a
CW
234
235 /**
fe482240 236 * Set and validate field values.
69a4c20a 237 *
5a4f6742 238 * @param array $elements
16b10e64 239 * array.
6f69cc11 240 * @param $erroneousField
16b10e64 241 * reference.
77b97be7
EM
242 *
243 * @return int
69a4c20a 244 */
00be9182 245 public function setActiveFieldValues($elements, &$erroneousField) {
69a4c20a
CW
246 $maxCount = count($elements) < $this->_activeFieldCount ? count($elements) : $this->_activeFieldCount;
247 for ($i = 0; $i < $maxCount; $i++) {
248 $this->_activeFields[$i]->setValue($elements[$i]);
249 }
250
251 // reset all the values that we did not have an equivalent import element
252 for (; $i < $this->_activeFieldCount; $i++) {
253 $this->_activeFields[$i]->resetValue();
254 }
255
256 // now validate the fields and return false if error
257 $valid = self::VALID;
258 for ($i = 0; $i < $this->_activeFieldCount; $i++) {
259 if (!$this->_activeFields[$i]->validate()) {
260 // no need to do any more validation
261 $erroneousField = $i;
262 $valid = self::ERROR;
263 break;
264 }
265 }
266 return $valid;
267 }
268
269 /**
fe482240 270 * Format the field values for input to the api.
69a4c20a 271 *
a6c01b45
CW
272 * @return array
273 * (reference) associative array of name/value pairs
69a4c20a 274 */
00be9182 275 public function &getActiveFieldParams() {
69a4c20a
CW
276 $params = array();
277 for ($i = 0; $i < $this->_activeFieldCount; $i++) {
278 if (isset($this->_activeFields[$i]->_value)
279 && !isset($params[$this->_activeFields[$i]->_name])
280 && !isset($this->_activeFields[$i]->_related)
281 ) {
282
283 $params[$this->_activeFields[$i]->_name] = $this->_activeFields[$i]->_value;
284 }
285 }
286 return $params;
287 }
288
8cebffb2 289 /**
badf5061
JP
290 * Add progress bar to the import process. Calculates time remaining, status etc.
291 *
8cebffb2 292 * @param $statusID
badf5061 293 * status id of the import process saved in $config->uploadDir.
8cebffb2
JP
294 * @param bool $startImport
295 * True when progress bar is to be initiated.
296 * @param $startTimestamp
297 * Initial timstamp when the import was started.
298 * @param $prevTimestamp
299 * Previous timestamp when this function was last called.
300 * @param $totalRowCount
301 * Total number of rows in the import file.
302 *
303 * @return NULL|$currTimestamp
304 */
305 public function progressImport($statusID, $startImport = TRUE, $startTimestamp = NULL, $prevTimestamp = NULL, $totalRowCount = NULL) {
306 $config = CRM_Core_Config::singleton();
307 $statusFile = "{$config->uploadDir}status_{$statusID}.txt";
308
309 if ($startImport) {
310 $status = "<div class='description'>&nbsp; " . ts('No processing status reported yet.') . "</div>";
311 //do not force the browser to display the save dialog, CRM-7640
312 $contents = json_encode(array(0, $status));
313 file_put_contents($statusFile, $contents);
314 }
315 else {
316 $rowCount = isset($this->_rowCount) ? $this->_rowCount : $this->_lineCount;
317 $currTimestamp = time();
318 $totalTime = ($currTimestamp - $startTimestamp);
319 $time = ($currTimestamp - $prevTimestamp);
320 $recordsLeft = $totalRowCount - $rowCount;
321 if ($recordsLeft < 0) {
322 $recordsLeft = 0;
323 }
324 $estimatedTime = ($recordsLeft / 50) * $time;
325 $estMinutes = floor($estimatedTime / 60);
326 $timeFormatted = '';
327 if ($estMinutes > 1) {
328 $timeFormatted = $estMinutes . ' ' . ts('minutes') . ' ';
329 $estimatedTime = $estimatedTime - ($estMinutes * 60);
330 }
331 $timeFormatted .= round($estimatedTime) . ' ' . ts('seconds');
332 $processedPercent = (int ) (($rowCount * 100) / $totalRowCount);
333 $statusMsg = ts('%1 of %2 records - %3 remaining',
334 array(1 => $rowCount, 2 => $totalRowCount, 3 => $timeFormatted)
335 );
336 $status = "<div class=\"description\">&nbsp; <strong>{$statusMsg}</strong></div>";
337 $contents = json_encode(array($processedPercent, $status));
338
339 file_put_contents($statusFile, $contents);
340 return $currTimestamp;
341 }
342 }
343
e0ef6999
EM
344 /**
345 * @return array
346 */
00be9182 347 public function getSelectValues() {
69a4c20a
CW
348 $values = array();
349 foreach ($this->_fields as $name => $field) {
350 $values[$name] = $field->_title;
351 }
352 return $values;
353 }
354
e0ef6999
EM
355 /**
356 * @return array
357 */
00be9182 358 public function getSelectTypes() {
69a4c20a
CW
359 $values = array();
360 foreach ($this->_fields as $name => $field) {
361 if (isset($field->_hasLocationType)) {
362 $values[$name] = $field->_hasLocationType;
363 }
364 }
365 return $values;
366 }
367
e0ef6999
EM
368 /**
369 * @return array
370 */
00be9182 371 public function getHeaderPatterns() {
69a4c20a
CW
372 $values = array();
373 foreach ($this->_fields as $name => $field) {
374 if (isset($field->_headerPattern)) {
375 $values[$name] = $field->_headerPattern;
376 }
377 }
378 return $values;
379 }
380
e0ef6999
EM
381 /**
382 * @return array
383 */
00be9182 384 public function getDataPatterns() {
69a4c20a
CW
385 $values = array();
386 foreach ($this->_fields as $name => $field) {
387 $values[$name] = $field->_dataPattern;
388 }
389 return $values;
390 }
391
392 /**
2b4bc760 393 * Remove single-quote enclosures from a value array (row).
69a4c20a
CW
394 *
395 * @param array $values
396 * @param string $enclosure
397 *
398 * @return void
69a4c20a 399 */
00be9182 400 public static function encloseScrub(&$values, $enclosure = "'") {
69a4c20a
CW
401 if (empty($values)) {
402 return;
403 }
404
405 foreach ($values as $k => $v) {
406 $values[$k] = preg_replace("/^$enclosure(.*)$enclosure$/", '$1', $v);
407 }
408 }
409
410 /**
fe482240 411 * Setter function.
69a4c20a
CW
412 *
413 * @param int $max
414 *
415 * @return void
69a4c20a 416 */
00be9182 417 public function setMaxLinesToProcess($max) {
69a4c20a
CW
418 $this->_maxLinesToProcess = $max;
419 }
420
421 /**
fe482240 422 * Determines the file extension based on error code.
69a4c20a
CW
423 *
424 * @var $type error code constant
425 * @return string
69a4c20a 426 */
00be9182 427 public static function errorFileName($type) {
69a4c20a
CW
428 $fileName = NULL;
429 if (empty($type)) {
430 return $fileName;
431 }
432
433 $config = CRM_Core_Config::singleton();
434 $fileName = $config->uploadDir . "sqlImport";
435 switch ($type) {
436 case self::ERROR:
437 $fileName .= '.errors';
438 break;
439
440 case self::CONFLICT:
441 $fileName .= '.conflicts';
442 break;
443
444 case self::DUPLICATE:
445 $fileName .= '.duplicates';
446 break;
447
448 case self::NO_MATCH:
449 $fileName .= '.mismatch';
450 break;
451
452 case self::UNPARSED_ADDRESS_WARNING:
453 $fileName .= '.unparsedAddress';
454 break;
455 }
456
457 return $fileName;
458 }
459
460 /**
fe482240 461 * Determines the file name based on error code.
69a4c20a
CW
462 *
463 * @var $type error code constant
464 * @return string
69a4c20a 465 */
00be9182 466 public static function saveFileName($type) {
69a4c20a
CW
467 $fileName = NULL;
468 if (empty($type)) {
469 return $fileName;
470 }
471 switch ($type) {
472 case self::ERROR:
473 $fileName = 'Import_Errors.csv';
474 break;
475
476 case self::CONFLICT:
477 $fileName = 'Import_Conflicts.csv';
478 break;
479
480 case self::DUPLICATE:
481 $fileName = 'Import_Duplicates.csv';
482 break;
483
484 case self::NO_MATCH:
485 $fileName = 'Import_Mismatch.csv';
486 break;
487
488 case self::UNPARSED_ADDRESS_WARNING:
489 $fileName = 'Import_Unparsed_Address.csv';
490 break;
491 }
492
493 return $fileName;
494 }
495
56316747 496 /**
497 * Check if contact is a duplicate .
498 *
499 * @param array $formatValues
500 *
501 * @return array
502 */
503 protected function checkContactDuplicate(&$formatValues) {
504 //retrieve contact id using contact dedupe rule
505 $formatValues['contact_type'] = $this->_contactType;
506 $formatValues['version'] = 3;
507 require_once 'CRM/Utils/DeprecatedUtils.php';
508 $error = _civicrm_api3_deprecated_check_contact_dedupe($formatValues);
509 return $error;
510 }
511
ec3811b1 512}