Merge pull request #15745 from seamuslee001/master
[civicrm-core.git] / CRM / Import / Parser.php
CommitLineData
ec3811b1
CW
1<?php
2/*
3 +--------------------------------------------------------------------+
fee14197 4 | CiviCRM version 5 |
ec3811b1 5 +--------------------------------------------------------------------+
6b83d5bd 6 | Copyright CiviCRM LLC (c) 2004-2019 |
ec3811b1
CW
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
d25dd0ee 26 */
ec3811b1
CW
27
28/**
29 *
30 * @package CRM
6b83d5bd 31 * @copyright CiviCRM LLC (c) 2004-2019
ec3811b1 32 */
ec3811b1
CW
33abstract class CRM_Import_Parser {
34 /**
35 * Settings
36 */
ca2057ea 37 const MAX_WARNINGS = 25, DEFAULT_TIMEOUT = 30;
ec3811b1
CW
38
39 /**
40 * Return codes
41 */
7da04cde 42 const VALID = 1, WARNING = 2, ERROR = 4, CONFLICT = 8, STOP = 16, DUPLICATE = 32, MULTIPLE_DUPE = 64, NO_MATCH = 128, UNPARSED_ADDRESS_WARNING = 256;
ec3811b1
CW
43
44 /**
45 * Parser modes
46 */
7da04cde 47 const MODE_MAPFIELD = 1, MODE_PREVIEW = 2, MODE_SUMMARY = 4, MODE_IMPORT = 8;
ec3811b1
CW
48
49 /**
50 * Codes for duplicate record handling
51 */
7da04cde 52 const DUPLICATE_SKIP = 1, DUPLICATE_REPLACE = 2, DUPLICATE_UPDATE = 4, DUPLICATE_FILL = 8, DUPLICATE_NOCHECK = 16;
ec3811b1
CW
53
54 /**
55 * Contact types
56 */
7da04cde 57 const CONTACT_INDIVIDUAL = 1, CONTACT_HOUSEHOLD = 2, CONTACT_ORGANIZATION = 4;
69a4c20a
CW
58
59
60 /**
100fef9d 61 * Total number of non empty lines
971e129b 62 * @var int
69a4c20a
CW
63 */
64 protected $_totalCount;
65
66 /**
100fef9d 67 * Running total number of valid lines
971e129b 68 * @var int
69a4c20a
CW
69 */
70 protected $_validCount;
71
72 /**
100fef9d 73 * Running total number of invalid rows
971e129b 74 * @var int
69a4c20a
CW
75 */
76 protected $_invalidRowCount;
77
78 /**
100fef9d 79 * Maximum number of non-empty/comment lines to process
69a4c20a
CW
80 *
81 * @var int
82 */
83 protected $_maxLinesToProcess;
84
69a4c20a 85 /**
100fef9d 86 * Array of error lines, bounded by MAX_ERROR
971e129b 87 * @var array
69a4c20a
CW
88 */
89 protected $_errors;
90
91 /**
100fef9d 92 * Total number of conflict lines
971e129b 93 * @var int
69a4c20a
CW
94 */
95 protected $_conflictCount;
96
97 /**
100fef9d 98 * Array of conflict lines
971e129b 99 * @var array
69a4c20a
CW
100 */
101 protected $_conflicts;
102
103 /**
100fef9d 104 * Total number of duplicate (from database) lines
971e129b 105 * @var int
69a4c20a
CW
106 */
107 protected $_duplicateCount;
108
109 /**
100fef9d 110 * Array of duplicate lines
971e129b 111 * @var array
69a4c20a
CW
112 */
113 protected $_duplicates;
114
115 /**
100fef9d 116 * Running total number of warnings
971e129b 117 * @var int
69a4c20a
CW
118 */
119 protected $_warningCount;
120
121 /**
100fef9d 122 * Maximum number of warnings to store
971e129b 123 * @var int
69a4c20a
CW
124 */
125 protected $_maxWarningCount = self::MAX_WARNINGS;
126
127 /**
100fef9d 128 * Array of warning lines, bounded by MAX_WARNING
971e129b 129 * @var array
69a4c20a
CW
130 */
131 protected $_warnings;
132
133 /**
100fef9d 134 * Array of all the fields that could potentially be part
69a4c20a
CW
135 * of this import process
136 * @var array
137 */
138 protected $_fields;
139
64cafaa3 140 /**
141 * Metadata for all available fields, keyed by unique name.
142 *
143 * This is intended to supercede $_fields which uses a special sauce format which
144 * importableFieldsMetadata uses the standard getfields type format.
145 *
146 * @var array
147 */
148 protected $importableFieldsMetadata = [];
149
150 /**
151 * Get metadata for all importable fields in std getfields style format.
152 *
153 * @return array
154 */
155 public function getImportableFieldsMetadata(): array {
156 return $this->importableFieldsMetadata;
157 }
158
159 /**
160 * Set metadata for all importable fields in std getfields style format.
161 * @param array $importableFieldsMetadata
162 */
163 public function setImportableFieldsMetadata(array $importableFieldsMetadata) {
164 $this->importableFieldsMetadata = $importableFieldsMetadata;
165 }
166
69a4c20a 167 /**
100fef9d 168 * Array of the fields that are actually part of the import process
69a4c20a
CW
169 * the position in the array also dictates their position in the import
170 * file
171 * @var array
172 */
173 protected $_activeFields;
174
175 /**
100fef9d 176 * Cache the count of active fields
69a4c20a
CW
177 *
178 * @var int
179 */
180 protected $_activeFieldCount;
181
182 /**
100fef9d 183 * Cache of preview rows
69a4c20a
CW
184 *
185 * @var array
186 */
187 protected $_rows;
188
189 /**
100fef9d 190 * Filename of error data
69a4c20a
CW
191 *
192 * @var string
193 */
194 protected $_errorFileName;
195
196 /**
100fef9d 197 * Filename of conflict data
69a4c20a
CW
198 *
199 * @var string
200 */
201 protected $_conflictFileName;
202
203 /**
100fef9d 204 * Filename of duplicate data
69a4c20a
CW
205 *
206 * @var string
207 */
208 protected $_duplicateFileName;
209
210 /**
100fef9d 211 * Contact type
69a4c20a
CW
212 *
213 * @var int
214 */
215 public $_contactType;
e87ff4ce 216 /**
217 * Contact sub-type
218 *
219 * @var int
220 */
221 public $_contactSubType;
69a4c20a
CW
222
223 /**
e87ff4ce 224 * Class constructor.
69a4c20a 225 */
00be9182 226 public function __construct() {
69a4c20a 227 $this->_maxLinesToProcess = 0;
69a4c20a
CW
228 }
229
230 /**
fe482240 231 * Abstract function definitions.
69a4c20a 232 */
bed98343 233 abstract protected function init();
e0ef6999
EM
234
235 /**
236 * @return mixed
237 */
bed98343 238 abstract protected function fini();
e0ef6999
EM
239
240 /**
2b4bc760 241 * Map field.
242 *
243 * @param array $values
e0ef6999
EM
244 *
245 * @return mixed
246 */
bed98343 247 abstract protected function mapField(&$values);
e0ef6999
EM
248
249 /**
2b4bc760 250 * Preview.
251 *
252 * @param array $values
e0ef6999
EM
253 *
254 * @return mixed
255 */
bed98343 256 abstract protected function preview(&$values);
e0ef6999
EM
257
258 /**
259 * @param $values
260 *
261 * @return mixed
262 */
bed98343 263 abstract protected function summary(&$values);
e0ef6999
EM
264
265 /**
266 * @param $onDuplicate
267 * @param $values
268 *
269 * @return mixed
270 */
bed98343 271 abstract protected function import($onDuplicate, &$values);
69a4c20a
CW
272
273 /**
fe482240 274 * Set and validate field values.
69a4c20a 275 *
5a4f6742 276 * @param array $elements
16b10e64 277 * array.
6f69cc11 278 * @param $erroneousField
16b10e64 279 * reference.
77b97be7
EM
280 *
281 * @return int
69a4c20a 282 */
00be9182 283 public function setActiveFieldValues($elements, &$erroneousField) {
69a4c20a
CW
284 $maxCount = count($elements) < $this->_activeFieldCount ? count($elements) : $this->_activeFieldCount;
285 for ($i = 0; $i < $maxCount; $i++) {
286 $this->_activeFields[$i]->setValue($elements[$i]);
287 }
288
289 // reset all the values that we did not have an equivalent import element
290 for (; $i < $this->_activeFieldCount; $i++) {
291 $this->_activeFields[$i]->resetValue();
292 }
293
294 // now validate the fields and return false if error
295 $valid = self::VALID;
296 for ($i = 0; $i < $this->_activeFieldCount; $i++) {
297 if (!$this->_activeFields[$i]->validate()) {
298 // no need to do any more validation
299 $erroneousField = $i;
300 $valid = self::ERROR;
301 break;
302 }
303 }
304 return $valid;
305 }
306
307 /**
fe482240 308 * Format the field values for input to the api.
69a4c20a 309 *
a6c01b45
CW
310 * @return array
311 * (reference) associative array of name/value pairs
69a4c20a 312 */
00be9182 313 public function &getActiveFieldParams() {
be2fb01f 314 $params = [];
69a4c20a
CW
315 for ($i = 0; $i < $this->_activeFieldCount; $i++) {
316 if (isset($this->_activeFields[$i]->_value)
317 && !isset($params[$this->_activeFields[$i]->_name])
318 && !isset($this->_activeFields[$i]->_related)
319 ) {
320
321 $params[$this->_activeFields[$i]->_name] = $this->_activeFields[$i]->_value;
322 }
323 }
324 return $params;
325 }
326
8cebffb2 327 /**
badf5061
JP
328 * Add progress bar to the import process. Calculates time remaining, status etc.
329 *
8cebffb2 330 * @param $statusID
badf5061 331 * status id of the import process saved in $config->uploadDir.
8cebffb2
JP
332 * @param bool $startImport
333 * True when progress bar is to be initiated.
334 * @param $startTimestamp
335 * Initial timstamp when the import was started.
336 * @param $prevTimestamp
337 * Previous timestamp when this function was last called.
338 * @param $totalRowCount
339 * Total number of rows in the import file.
340 *
341 * @return NULL|$currTimestamp
342 */
343 public function progressImport($statusID, $startImport = TRUE, $startTimestamp = NULL, $prevTimestamp = NULL, $totalRowCount = NULL) {
344 $config = CRM_Core_Config::singleton();
345 $statusFile = "{$config->uploadDir}status_{$statusID}.txt";
346
347 if ($startImport) {
348 $status = "<div class='description'>&nbsp; " . ts('No processing status reported yet.') . "</div>";
349 //do not force the browser to display the save dialog, CRM-7640
be2fb01f 350 $contents = json_encode([0, $status]);
8cebffb2
JP
351 file_put_contents($statusFile, $contents);
352 }
353 else {
354 $rowCount = isset($this->_rowCount) ? $this->_rowCount : $this->_lineCount;
355 $currTimestamp = time();
356 $totalTime = ($currTimestamp - $startTimestamp);
357 $time = ($currTimestamp - $prevTimestamp);
358 $recordsLeft = $totalRowCount - $rowCount;
359 if ($recordsLeft < 0) {
360 $recordsLeft = 0;
361 }
362 $estimatedTime = ($recordsLeft / 50) * $time;
363 $estMinutes = floor($estimatedTime / 60);
364 $timeFormatted = '';
365 if ($estMinutes > 1) {
366 $timeFormatted = $estMinutes . ' ' . ts('minutes') . ' ';
367 $estimatedTime = $estimatedTime - ($estMinutes * 60);
368 }
369 $timeFormatted .= round($estimatedTime) . ' ' . ts('seconds');
370 $processedPercent = (int ) (($rowCount * 100) / $totalRowCount);
371 $statusMsg = ts('%1 of %2 records - %3 remaining',
be2fb01f 372 [1 => $rowCount, 2 => $totalRowCount, 3 => $timeFormatted]
8cebffb2
JP
373 );
374 $status = "<div class=\"description\">&nbsp; <strong>{$statusMsg}</strong></div>";
be2fb01f 375 $contents = json_encode([$processedPercent, $status]);
8cebffb2
JP
376
377 file_put_contents($statusFile, $contents);
378 return $currTimestamp;
379 }
380 }
381
e0ef6999
EM
382 /**
383 * @return array
384 */
00be9182 385 public function getSelectValues() {
be2fb01f 386 $values = [];
69a4c20a
CW
387 foreach ($this->_fields as $name => $field) {
388 $values[$name] = $field->_title;
389 }
390 return $values;
391 }
392
e0ef6999
EM
393 /**
394 * @return array
395 */
00be9182 396 public function getSelectTypes() {
be2fb01f 397 $values = [];
69a4c20a
CW
398 foreach ($this->_fields as $name => $field) {
399 if (isset($field->_hasLocationType)) {
400 $values[$name] = $field->_hasLocationType;
401 }
402 }
403 return $values;
404 }
405
e0ef6999
EM
406 /**
407 * @return array
408 */
00be9182 409 public function getHeaderPatterns() {
be2fb01f 410 $values = [];
69a4c20a
CW
411 foreach ($this->_fields as $name => $field) {
412 if (isset($field->_headerPattern)) {
413 $values[$name] = $field->_headerPattern;
414 }
415 }
416 return $values;
417 }
418
e0ef6999
EM
419 /**
420 * @return array
421 */
00be9182 422 public function getDataPatterns() {
be2fb01f 423 $values = [];
69a4c20a
CW
424 foreach ($this->_fields as $name => $field) {
425 $values[$name] = $field->_dataPattern;
426 }
427 return $values;
428 }
429
430 /**
2b4bc760 431 * Remove single-quote enclosures from a value array (row).
69a4c20a
CW
432 *
433 * @param array $values
434 * @param string $enclosure
435 *
436 * @return void
69a4c20a 437 */
00be9182 438 public static function encloseScrub(&$values, $enclosure = "'") {
69a4c20a
CW
439 if (empty($values)) {
440 return;
441 }
442
443 foreach ($values as $k => $v) {
444 $values[$k] = preg_replace("/^$enclosure(.*)$enclosure$/", '$1', $v);
445 }
446 }
447
448 /**
fe482240 449 * Setter function.
69a4c20a
CW
450 *
451 * @param int $max
452 *
453 * @return void
69a4c20a 454 */
00be9182 455 public function setMaxLinesToProcess($max) {
69a4c20a
CW
456 $this->_maxLinesToProcess = $max;
457 }
458
459 /**
fe482240 460 * Determines the file extension based on error code.
69a4c20a
CW
461 *
462 * @var $type error code constant
463 * @return string
69a4c20a 464 */
00be9182 465 public static function errorFileName($type) {
69a4c20a
CW
466 $fileName = NULL;
467 if (empty($type)) {
468 return $fileName;
469 }
470
471 $config = CRM_Core_Config::singleton();
472 $fileName = $config->uploadDir . "sqlImport";
473 switch ($type) {
474 case self::ERROR:
475 $fileName .= '.errors';
476 break;
477
478 case self::CONFLICT:
479 $fileName .= '.conflicts';
480 break;
481
482 case self::DUPLICATE:
483 $fileName .= '.duplicates';
484 break;
485
486 case self::NO_MATCH:
487 $fileName .= '.mismatch';
488 break;
489
490 case self::UNPARSED_ADDRESS_WARNING:
491 $fileName .= '.unparsedAddress';
492 break;
493 }
494
495 return $fileName;
496 }
497
498 /**
fe482240 499 * Determines the file name based on error code.
69a4c20a
CW
500 *
501 * @var $type error code constant
502 * @return string
69a4c20a 503 */
00be9182 504 public static function saveFileName($type) {
69a4c20a
CW
505 $fileName = NULL;
506 if (empty($type)) {
507 return $fileName;
508 }
509 switch ($type) {
510 case self::ERROR:
511 $fileName = 'Import_Errors.csv';
512 break;
513
514 case self::CONFLICT:
515 $fileName = 'Import_Conflicts.csv';
516 break;
517
518 case self::DUPLICATE:
519 $fileName = 'Import_Duplicates.csv';
520 break;
521
522 case self::NO_MATCH:
523 $fileName = 'Import_Mismatch.csv';
524 break;
525
526 case self::UNPARSED_ADDRESS_WARNING:
527 $fileName = 'Import_Unparsed_Address.csv';
528 break;
529 }
530
531 return $fileName;
532 }
533
56316747 534 /**
535 * Check if contact is a duplicate .
536 *
537 * @param array $formatValues
538 *
539 * @return array
540 */
541 protected function checkContactDuplicate(&$formatValues) {
542 //retrieve contact id using contact dedupe rule
543 $formatValues['contact_type'] = $this->_contactType;
544 $formatValues['version'] = 3;
545 require_once 'CRM/Utils/DeprecatedUtils.php';
546 $error = _civicrm_api3_deprecated_check_contact_dedupe($formatValues);
547 return $error;
548 }
549
14b9e069 550 /**
551 * Parse a field which could be represented by a label or name value rather than the DB value.
552 *
9ae10cd7 553 * We will try to match name first or (per https://lab.civicrm.org/dev/core/issues/1285 if we have an id.
554 *
555 * but if not available then see if we have a label that can be converted to a name.
14b9e069 556 *
557 * @param string|int|null $submittedValue
558 * @param array $fieldSpec
559 * Metadata for the field
560 *
561 * @return mixed
562 */
563 protected function parsePseudoConstantField($submittedValue, $fieldSpec) {
0b742997
SL
564 // dev/core#1289 Somehow we have wound up here but the BAO has not been specified in the fieldspec so we need to check this but future us problem, for now lets just return the submittedValue
565 if (!isset($fieldSpec['bao'])) {
566 return $submittedValue;
567 }
14b9e069 568 /* @var \CRM_Core_DAO $bao */
569 $bao = $fieldSpec['bao'];
570 // For historical reasons use validate as context - ie disabled name matches ARE permitted.
571 $nameOptions = $bao::buildOptions($fieldSpec['name'], 'validate');
9ae10cd7 572 if (isset($nameOptions[$submittedValue])) {
573 return $submittedValue;
574 }
575 if (in_array($submittedValue, $nameOptions)) {
576 return array_search($submittedValue, $nameOptions, TRUE);
577 }
578
579 $labelOptions = array_flip($bao::buildOptions($fieldSpec['name'], 'match'));
580 if (isset($labelOptions[$submittedValue])) {
581 return array_search($labelOptions[$submittedValue], $nameOptions, TRUE);
14b9e069 582 }
583 return '';
584 }
585
ec3811b1 586}