Merge pull request #15843 from totten/master-simplehead
[civicrm-core.git] / CRM / Import / Parser.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | Copyright CiviCRM LLC. All rights reserved. |
5 | |
6 | This work is published under the GNU AGPLv3 license with some |
7 | permitted exceptions and without any warranty. For full license |
8 | and copyright information, see https://civicrm.org/licensing |
9 +--------------------------------------------------------------------+
10 */
11
12 /**
13 *
14 * @package CRM
15 * @copyright CiviCRM LLC https://civicrm.org/licensing
16 */
17 abstract class CRM_Import_Parser {
18 /**
19 * Settings
20 */
21 const MAX_WARNINGS = 25, DEFAULT_TIMEOUT = 30;
22
23 /**
24 * Return codes
25 */
26 const VALID = 1, WARNING = 2, ERROR = 4, CONFLICT = 8, STOP = 16, DUPLICATE = 32, MULTIPLE_DUPE = 64, NO_MATCH = 128, UNPARSED_ADDRESS_WARNING = 256;
27
28 /**
29 * Parser modes
30 */
31 const MODE_MAPFIELD = 1, MODE_PREVIEW = 2, MODE_SUMMARY = 4, MODE_IMPORT = 8;
32
33 /**
34 * Codes for duplicate record handling
35 */
36 const DUPLICATE_SKIP = 1, DUPLICATE_REPLACE = 2, DUPLICATE_UPDATE = 4, DUPLICATE_FILL = 8, DUPLICATE_NOCHECK = 16;
37
38 /**
39 * Contact types
40 */
41 const CONTACT_INDIVIDUAL = 1, CONTACT_HOUSEHOLD = 2, CONTACT_ORGANIZATION = 4;
42
43
44 /**
45 * Total number of non empty lines
46 * @var int
47 */
48 protected $_totalCount;
49
50 /**
51 * Running total number of valid lines
52 * @var int
53 */
54 protected $_validCount;
55
56 /**
57 * Running total number of invalid rows
58 * @var int
59 */
60 protected $_invalidRowCount;
61
62 /**
63 * Maximum number of non-empty/comment lines to process
64 *
65 * @var int
66 */
67 protected $_maxLinesToProcess;
68
69 /**
70 * Array of error lines, bounded by MAX_ERROR
71 * @var array
72 */
73 protected $_errors;
74
75 /**
76 * Total number of conflict lines
77 * @var int
78 */
79 protected $_conflictCount;
80
81 /**
82 * Array of conflict lines
83 * @var array
84 */
85 protected $_conflicts;
86
87 /**
88 * Total number of duplicate (from database) lines
89 * @var int
90 */
91 protected $_duplicateCount;
92
93 /**
94 * Array of duplicate lines
95 * @var array
96 */
97 protected $_duplicates;
98
99 /**
100 * Running total number of warnings
101 * @var int
102 */
103 protected $_warningCount;
104
105 /**
106 * Maximum number of warnings to store
107 * @var int
108 */
109 protected $_maxWarningCount = self::MAX_WARNINGS;
110
111 /**
112 * Array of warning lines, bounded by MAX_WARNING
113 * @var array
114 */
115 protected $_warnings;
116
117 /**
118 * Array of all the fields that could potentially be part
119 * of this import process
120 * @var array
121 */
122 protected $_fields;
123
124 /**
125 * Metadata for all available fields, keyed by unique name.
126 *
127 * This is intended to supercede $_fields which uses a special sauce format which
128 * importableFieldsMetadata uses the standard getfields type format.
129 *
130 * @var array
131 */
132 protected $importableFieldsMetadata = [];
133
134 /**
135 * Get metadata for all importable fields in std getfields style format.
136 *
137 * @return array
138 */
139 public function getImportableFieldsMetadata(): array {
140 return $this->importableFieldsMetadata;
141 }
142
143 /**
144 * Set metadata for all importable fields in std getfields style format.
145 * @param array $importableFieldsMetadata
146 */
147 public function setImportableFieldsMetadata(array $importableFieldsMetadata) {
148 $this->importableFieldsMetadata = $importableFieldsMetadata;
149 }
150
151 /**
152 * Array of the fields that are actually part of the import process
153 * the position in the array also dictates their position in the import
154 * file
155 * @var array
156 */
157 protected $_activeFields;
158
159 /**
160 * Cache the count of active fields
161 *
162 * @var int
163 */
164 protected $_activeFieldCount;
165
166 /**
167 * Cache of preview rows
168 *
169 * @var array
170 */
171 protected $_rows;
172
173 /**
174 * Filename of error data
175 *
176 * @var string
177 */
178 protected $_errorFileName;
179
180 /**
181 * Filename of conflict data
182 *
183 * @var string
184 */
185 protected $_conflictFileName;
186
187 /**
188 * Filename of duplicate data
189 *
190 * @var string
191 */
192 protected $_duplicateFileName;
193
194 /**
195 * Contact type
196 *
197 * @var int
198 */
199 public $_contactType;
200 /**
201 * Contact sub-type
202 *
203 * @var int
204 */
205 public $_contactSubType;
206
207 /**
208 * Class constructor.
209 */
210 public function __construct() {
211 $this->_maxLinesToProcess = 0;
212 }
213
214 /**
215 * Abstract function definitions.
216 */
217 abstract protected function init();
218
219 /**
220 * @return mixed
221 */
222 abstract protected function fini();
223
224 /**
225 * Map field.
226 *
227 * @param array $values
228 *
229 * @return mixed
230 */
231 abstract protected function mapField(&$values);
232
233 /**
234 * Preview.
235 *
236 * @param array $values
237 *
238 * @return mixed
239 */
240 abstract protected function preview(&$values);
241
242 /**
243 * @param $values
244 *
245 * @return mixed
246 */
247 abstract protected function summary(&$values);
248
249 /**
250 * @param $onDuplicate
251 * @param $values
252 *
253 * @return mixed
254 */
255 abstract protected function import($onDuplicate, &$values);
256
257 /**
258 * Set and validate field values.
259 *
260 * @param array $elements
261 * array.
262 * @param $erroneousField
263 * reference.
264 *
265 * @return int
266 */
267 public function setActiveFieldValues($elements, &$erroneousField) {
268 $maxCount = count($elements) < $this->_activeFieldCount ? count($elements) : $this->_activeFieldCount;
269 for ($i = 0; $i < $maxCount; $i++) {
270 $this->_activeFields[$i]->setValue($elements[$i]);
271 }
272
273 // reset all the values that we did not have an equivalent import element
274 for (; $i < $this->_activeFieldCount; $i++) {
275 $this->_activeFields[$i]->resetValue();
276 }
277
278 // now validate the fields and return false if error
279 $valid = self::VALID;
280 for ($i = 0; $i < $this->_activeFieldCount; $i++) {
281 if (!$this->_activeFields[$i]->validate()) {
282 // no need to do any more validation
283 $erroneousField = $i;
284 $valid = self::ERROR;
285 break;
286 }
287 }
288 return $valid;
289 }
290
291 /**
292 * Format the field values for input to the api.
293 *
294 * @return array
295 * (reference) associative array of name/value pairs
296 */
297 public function &getActiveFieldParams() {
298 $params = [];
299 for ($i = 0; $i < $this->_activeFieldCount; $i++) {
300 if (isset($this->_activeFields[$i]->_value)
301 && !isset($params[$this->_activeFields[$i]->_name])
302 && !isset($this->_activeFields[$i]->_related)
303 ) {
304
305 $params[$this->_activeFields[$i]->_name] = $this->_activeFields[$i]->_value;
306 }
307 }
308 return $params;
309 }
310
311 /**
312 * Add progress bar to the import process. Calculates time remaining, status etc.
313 *
314 * @param $statusID
315 * status id of the import process saved in $config->uploadDir.
316 * @param bool $startImport
317 * True when progress bar is to be initiated.
318 * @param $startTimestamp
319 * Initial timstamp when the import was started.
320 * @param $prevTimestamp
321 * Previous timestamp when this function was last called.
322 * @param $totalRowCount
323 * Total number of rows in the import file.
324 *
325 * @return NULL|$currTimestamp
326 */
327 public function progressImport($statusID, $startImport = TRUE, $startTimestamp = NULL, $prevTimestamp = NULL, $totalRowCount = NULL) {
328 $config = CRM_Core_Config::singleton();
329 $statusFile = "{$config->uploadDir}status_{$statusID}.txt";
330
331 if ($startImport) {
332 $status = "<div class='description'>&nbsp; " . ts('No processing status reported yet.') . "</div>";
333 //do not force the browser to display the save dialog, CRM-7640
334 $contents = json_encode([0, $status]);
335 file_put_contents($statusFile, $contents);
336 }
337 else {
338 $rowCount = isset($this->_rowCount) ? $this->_rowCount : $this->_lineCount;
339 $currTimestamp = time();
340 $totalTime = ($currTimestamp - $startTimestamp);
341 $time = ($currTimestamp - $prevTimestamp);
342 $recordsLeft = $totalRowCount - $rowCount;
343 if ($recordsLeft < 0) {
344 $recordsLeft = 0;
345 }
346 $estimatedTime = ($recordsLeft / 50) * $time;
347 $estMinutes = floor($estimatedTime / 60);
348 $timeFormatted = '';
349 if ($estMinutes > 1) {
350 $timeFormatted = $estMinutes . ' ' . ts('minutes') . ' ';
351 $estimatedTime = $estimatedTime - ($estMinutes * 60);
352 }
353 $timeFormatted .= round($estimatedTime) . ' ' . ts('seconds');
354 $processedPercent = (int ) (($rowCount * 100) / $totalRowCount);
355 $statusMsg = ts('%1 of %2 records - %3 remaining',
356 [1 => $rowCount, 2 => $totalRowCount, 3 => $timeFormatted]
357 );
358 $status = "<div class=\"description\">&nbsp; <strong>{$statusMsg}</strong></div>";
359 $contents = json_encode([$processedPercent, $status]);
360
361 file_put_contents($statusFile, $contents);
362 return $currTimestamp;
363 }
364 }
365
366 /**
367 * @return array
368 */
369 public function getSelectValues() {
370 $values = [];
371 foreach ($this->_fields as $name => $field) {
372 $values[$name] = $field->_title;
373 }
374 return $values;
375 }
376
377 /**
378 * @return array
379 */
380 public function getSelectTypes() {
381 $values = [];
382 foreach ($this->_fields as $name => $field) {
383 if (isset($field->_hasLocationType)) {
384 $values[$name] = $field->_hasLocationType;
385 }
386 }
387 return $values;
388 }
389
390 /**
391 * @return array
392 */
393 public function getHeaderPatterns() {
394 $values = [];
395 foreach ($this->_fields as $name => $field) {
396 if (isset($field->_headerPattern)) {
397 $values[$name] = $field->_headerPattern;
398 }
399 }
400 return $values;
401 }
402
403 /**
404 * @return array
405 */
406 public function getDataPatterns() {
407 $values = [];
408 foreach ($this->_fields as $name => $field) {
409 $values[$name] = $field->_dataPattern;
410 }
411 return $values;
412 }
413
414 /**
415 * Remove single-quote enclosures from a value array (row).
416 *
417 * @param array $values
418 * @param string $enclosure
419 *
420 * @return void
421 */
422 public static function encloseScrub(&$values, $enclosure = "'") {
423 if (empty($values)) {
424 return;
425 }
426
427 foreach ($values as $k => $v) {
428 $values[$k] = preg_replace("/^$enclosure(.*)$enclosure$/", '$1', $v);
429 }
430 }
431
432 /**
433 * Setter function.
434 *
435 * @param int $max
436 *
437 * @return void
438 */
439 public function setMaxLinesToProcess($max) {
440 $this->_maxLinesToProcess = $max;
441 }
442
443 /**
444 * Determines the file extension based on error code.
445 *
446 * @var $type error code constant
447 * @return string
448 */
449 public static function errorFileName($type) {
450 $fileName = NULL;
451 if (empty($type)) {
452 return $fileName;
453 }
454
455 $config = CRM_Core_Config::singleton();
456 $fileName = $config->uploadDir . "sqlImport";
457 switch ($type) {
458 case self::ERROR:
459 $fileName .= '.errors';
460 break;
461
462 case self::CONFLICT:
463 $fileName .= '.conflicts';
464 break;
465
466 case self::DUPLICATE:
467 $fileName .= '.duplicates';
468 break;
469
470 case self::NO_MATCH:
471 $fileName .= '.mismatch';
472 break;
473
474 case self::UNPARSED_ADDRESS_WARNING:
475 $fileName .= '.unparsedAddress';
476 break;
477 }
478
479 return $fileName;
480 }
481
482 /**
483 * Determines the file name based on error code.
484 *
485 * @var $type error code constant
486 * @return string
487 */
488 public static function saveFileName($type) {
489 $fileName = NULL;
490 if (empty($type)) {
491 return $fileName;
492 }
493 switch ($type) {
494 case self::ERROR:
495 $fileName = 'Import_Errors.csv';
496 break;
497
498 case self::CONFLICT:
499 $fileName = 'Import_Conflicts.csv';
500 break;
501
502 case self::DUPLICATE:
503 $fileName = 'Import_Duplicates.csv';
504 break;
505
506 case self::NO_MATCH:
507 $fileName = 'Import_Mismatch.csv';
508 break;
509
510 case self::UNPARSED_ADDRESS_WARNING:
511 $fileName = 'Import_Unparsed_Address.csv';
512 break;
513 }
514
515 return $fileName;
516 }
517
518 /**
519 * Check if contact is a duplicate .
520 *
521 * @param array $formatValues
522 *
523 * @return array
524 */
525 protected function checkContactDuplicate(&$formatValues) {
526 //retrieve contact id using contact dedupe rule
527 $formatValues['contact_type'] = $this->_contactType;
528 $formatValues['version'] = 3;
529 require_once 'CRM/Utils/DeprecatedUtils.php';
530 $error = _civicrm_api3_deprecated_check_contact_dedupe($formatValues);
531 return $error;
532 }
533
534 /**
535 * Parse a field which could be represented by a label or name value rather than the DB value.
536 *
537 * We will try to match name first or (per https://lab.civicrm.org/dev/core/issues/1285 if we have an id.
538 *
539 * but if not available then see if we have a label that can be converted to a name.
540 *
541 * @param string|int|null $submittedValue
542 * @param array $fieldSpec
543 * Metadata for the field
544 *
545 * @return mixed
546 */
547 protected function parsePseudoConstantField($submittedValue, $fieldSpec) {
548 // dev/core#1289 Somehow we have wound up here but the BAO has not been specified in the fieldspec so we need to check this but future us problem, for now lets just return the submittedValue
549 if (!isset($fieldSpec['bao'])) {
550 return $submittedValue;
551 }
552 /* @var \CRM_Core_DAO $bao */
553 $bao = $fieldSpec['bao'];
554 // For historical reasons use validate as context - ie disabled name matches ARE permitted.
555 $nameOptions = $bao::buildOptions($fieldSpec['name'], 'validate');
556 if (isset($nameOptions[$submittedValue])) {
557 return $submittedValue;
558 }
559 if (in_array($submittedValue, $nameOptions)) {
560 return array_search($submittedValue, $nameOptions, TRUE);
561 }
562
563 $labelOptions = array_flip($bao::buildOptions($fieldSpec['name'], 'match'));
564 if (isset($labelOptions[$submittedValue])) {
565 return array_search($labelOptions[$submittedValue], $nameOptions, TRUE);
566 }
567 return '';
568 }
569
570 }