From c5458931a8246924b1a2b149319a0ea2ae23b7c6 Mon Sep 17 00:00:00 2001 From: Tim Otten Date: Wed, 29 Jul 2015 21:21:55 -0700 Subject: [PATCH] CRM_Utils_SQL_* --- CRM/Utils/SQL/Insert.php | 14 +- CRM/Utils/SQL/Select.php | 366 ++++++++++++++++++--- tests/phpunit/CRM/Utils/SQL/SelectTest.php | 120 ++++++- 3 files changed, 444 insertions(+), 56 deletions(-) diff --git a/CRM/Utils/SQL/Insert.php b/CRM/Utils/SQL/Insert.php index 0f94442729..5e13384e32 100644 --- a/CRM/Utils/SQL/Insert.php +++ b/CRM/Utils/SQL/Insert.php @@ -61,6 +61,14 @@ class CRM_Utils_SQL_Insert { $this->rows = array(); } + public function columns($columns) { + if ($this->columns !== NULL) { + throw new CRM_Core_Exception("Column order already specified."); + } + $this->columns = $columns; + return $this; + } + /** * @param array $rows * @return CRM_Utils_SQL_Insert @@ -79,17 +87,17 @@ class CRM_Utils_SQL_Insert { */ public function row($row) { $columns = array_keys($row); - sort($columns); if ($this->columns === NULL) { + sort($columns); $this->columns = $columns; } - elseif ($this->columns != $columns) { + elseif (array_diff($this->columns, $columns) !== array()) { throw new CRM_Core_Exception("Inconsistent column names"); } $escapedRow = array(); - foreach ($columns as $column) { + foreach ($this->columns as $column) { $escapedRow[$column] = $this->escapeString($row[$column]); } $this->rows[] = $escapedRow; diff --git a/CRM/Utils/SQL/Select.php b/CRM/Utils/SQL/Select.php index 79560acb6c..cdc50c9e80 100644 --- a/CRM/Utils/SQL/Select.php +++ b/CRM/Utils/SQL/Select.php @@ -4,18 +4,19 @@ * Dear God Why Do I Have To Write This (Dumb SQL Builder) * * Usage: - * $select = new CRM_Utils_SQL_Select('civicrm_activity act'); - * $select + * @code + * $select = CRM_Utils_SQL_Select::from('civicrm_activity act') * ->join('absence', 'inner join civicrm_activity absence on absence.id = act.source_record_id') - * ->where('activity_type_id = #type', array('#type' => 234)) - * ->where('status_id IN (#statuses)', array('#statuses' => array(1,2,3)) - * ->where('subject like @subj', array('@subj' => '%hello%')) - * ->where('!dynamicColumn = 1', array('!dynamicColumn' => 'coalesce(is_active,0)')) + * ->where('activity_type_id = #type', array('type' => 234)) + * ->where('status_id IN (#statuses)', array('statuses' => array(1,2,3)) + * ->where('subject like @subj', array('subj' => '%hello%')) + * ->where('!dynamicColumn = 1', array('dynamicColumn' => 'coalesce(is_active,0)')) * ->where('!column = @value', array( - * '!column' => $customField->column_name, - * '@value' => $form['foo'] + * 'column' => $customField->column_name, + * 'value' => $form['foo'] * )) * echo $select->toSQL(); + * @endcode * * Design principles: * - Portable @@ -29,11 +30,63 @@ * - use "!varname" to insert raw (unescaped) values * - use "#varname" to insert a numerical value (these are validated but not escaped) * - to disable any preprocessing, simply omit the variable list + * - control characters (@!#) are mandatory in expressions but optional in arg-keys * - Variables may be individual values or arrays; arrays are imploded with commas * - Conditionals are AND'd; if you need OR's, do it yourself * - Use classes/functions with documentation (rather than undocumented array-trees) + * - For any given string, interpolation is only performed once. After an interpolation, + * a string may never again be subjected to interpolation. + * + * The "interpolate-once" principle can be enforced by either interpolating on input + * xor output. The notations for input and output interpolation are a bit different, + * and they may not be mixed. + * + * @code + * // Interpolate on input. Set params when using them. + * $select->where('activity_type_id = #type', array( + * 'type' => 234, + * )); + * + * // Interpolate on output. Set params independently. + * $select + * ->where('activity_type_id = #type') + * ->param('type', 234), + * @endcode */ -class CRM_Utils_SQL_Select { +class CRM_Utils_SQL_Select implements ArrayAccess { + + /** + * Interpolate values as soon as they are passed in (where(), join(), etc). + * + * Default. + * + * Pro: Every clause has its own unique namespace for parameters. + * Con: Probably slower. + * Advice: Use this when aggregating SQL fragments from agents who + * maintained by different parties. + */ + const INTERPOLATE_INPUT = 'in'; + + /** + * Interpolate values when rendering SQL output (toSQL()). + * + * Pro: Probably faster. + * Con: Must maintain an aggregated list of all parameters. + * Advice: Use this when you have control over the entire query. + */ + const INTERPOLATE_OUTPUT = 'out'; + + /** + * Determine mode automatically. When the first attempt is made + * to use input-interpolation (eg `where(..., array(...))`) or + * output-interpolation (eg `param(...)`), the mode will be + * set. Subsequent calls will be validated using the same mode. + */ + const INTERPOLATE_AUTO = 'auto'; + + private $mode = NULL; + private $insertInto = NULL; + private $insertIntoFields = array(); private $selects = array(); private $from; private $joins = array(); @@ -43,16 +96,31 @@ class CRM_Utils_SQL_Select { private $orderBys = array(); private $limit = NULL; private $offset = NULL; + private $params = array(); + + // Public to work-around PHP 5.3 limit. + public $strict = NULL; /** * Create a new SELECT query. * * @param string $from * Table-name and optional alias. + * @param array $options + * @return CRM_Utils_SQL_Select + */ + public static function from($from, $options = array()) { + return new self($from, $options); + } + + /** + * Create a partial SELECT query. + * + * @param array $options * @return CRM_Utils_SQL_Select */ - public static function from($from) { - return new self($from); + public static function fragment($options = array()) { + return new self(NULL, $options); } /** @@ -60,23 +128,86 @@ class CRM_Utils_SQL_Select { * * @param string $from * Table-name and optional alias. + * @param array $options */ - public function __construct($from) { + public function __construct($from, $options = array()) { $this->from = $from; + $this->mode = isset($options['mode']) ? $options['mode'] : self::INTERPOLATE_AUTO; + } + + /** + * Make a new copy of this query. + * + * @return CRM_Utils_SQL_Select + */ + public function copy() { + return clone $this; + } + + /** + * @param CRM_Utils_SQL_Select $other + * @param array|NULL $parts + * ex: 'joins', 'wheres' + * @return $this + */ + public function merge($other, $parts = NULL) { + if ($other === NULL) { + return $this; + } + + if ($this->mode === self::INTERPOLATE_AUTO) { + $this->mode = $other->mode; + } + elseif ($other->mode === self::INTERPOLATE_AUTO) { + // Noop. + } + elseif ($this->mode !== $other->mode) { + // Mixing modes will lead to someone getting an expected substitution. + throw new RuntimeException("Cannot merge queries that use different interpolation modes ({$this->mode} vs {$other->mode})."); + } + + $arrayFields = array('insertIntoFields', 'selects', 'joins', 'wheres', 'groupBys', 'havings', 'orderBys', 'params'); + foreach ($arrayFields as $f) { + if ($parts === NULL || in_array($f, $parts)) { + $this->{$f} = array_merge($this->{$f}, $other->{$f}); + } + } + + $flatFields = array('insertInto', 'from', 'limit', 'offset'); + foreach ($flatFields as $f) { + if ($parts === NULL || in_array($f, $parts)) { + if ($other->{$f} !== NULL) { + $this->{$f} = $other->{$f}; + } + } + } + + return $this; } /** * Add a new JOIN clause. * - * @param string $name + * Note: To add multiple JOINs at once, use $name===NULL and + * pass an array of $exprs. + * + * @param string|NULL $name * The effective alias of the joined table. - * @param string $expr + * @param string|array $exprs * The complete join expression (eg "INNER JOIN mytable myalias ON mytable.id = maintable.foo_id"). * @param array|null $args * @return CRM_Utils_SQL_Select */ - public function join($name, $expr, $args = NULL) { - $this->joins[$name] = $this->interpolate($expr, $args); + public function join($name, $exprs, $args = NULL) { + if ($name !== NULL) { + $this->joins[$name] = $this->interpolate($exprs, $args); + } + else { + foreach ($exprs as $name => $expr) { + $this->joins[$name] = $this->interpolate($expr, $args); + } + return $this; + } return $this; } @@ -155,6 +286,33 @@ class CRM_Utils_SQL_Select { return $this; } + /** + * Set one (or multiple) parameters to interpolate into the query. + * + * @param array|string $keys + * Key name, or an array of key-value pairs. + * @param null|mixed $value + * @return $this + */ + public function param($keys, $value = NULL) { + if ($this->mode === self::INTERPOLATE_AUTO) { + $this->mode = self::INTERPOLATE_OUTPUT; + } + elseif ($this->mode !== self::INTERPOLATE_OUTPUT) { + throw new RuntimeException("Select::param() only makes sense when interpolating on output."); + } + + if (is_array($keys)) { + foreach ($keys as $k => $v) { + $this->params[$k] = $v; + } + } + else { + $this->params[$keys] = $value; + } + return $this; + } + /** * Set a limit on the number of records to return. * @@ -175,6 +333,81 @@ class CRM_Utils_SQL_Select { return $this; } + /** + * Insert the results of the SELECT query into another + * table. + * + * @param string $table + * The name of the other table (which receives new data). + * @param array $fields + * The fields to fill in the other table (in order). + * @return $this + * @see insertIntoField + */ + public function insertInto($table, $fields = array()) { + $this->insertInto = $table; + $this->insertIntoField($fields); + return $this; + } + + /** + * @param array $fields + * The fields to fill in the other table (in order). + * @return $this + */ + public function insertIntoField($fields) { + $fields = (array) $fields; + foreach ($fields as $field) { + $this->insertIntoFields[] = $field; + } + return $this; + } + + /** + * @param array|NULL $parts + * List of fields to check (e.g. 'selects', 'joins'). + * Defaults to all. + * @return bool + */ + public function isEmpty($parts = NULL) { + $empty = TRUE; + $fields = array( + 'insertInto', + 'insertIntoFields', + 'selects', + 'from', + 'joins', + 'wheres', + 'groupBys', + 'havings', + 'orderBys', + 'limit', + 'offset', + ); + if ($parts !== NULL) { + $fields = array_intersect($fields, $parts); + } + foreach ($fields as $field) { + if (!empty($this->{$field})) { + $empty = FALSE; + } + } + return $empty; + } + + /** + * Enable (or disable) strict mode. + * + * In strict mode, unknown variables will generate exceptions. + * + * @param bool $strict + * @return $this + */ + public function strict($strict = TRUE) { + $this->strict = $strict; + return $this; + } + /** * Given a string like "field_name = @value", replace "@value" with an escaped SQL string * @@ -191,37 +424,59 @@ class CRM_Utils_SQL_Select { * @return string * SQL expression */ - public function interpolate($expr, $args) { + public function interpolate($expr, $args, $activeMode = self::INTERPOLATE_INPUT) { if ($args === NULL) { return $expr; } else { - foreach (array_keys($args) as $key) { - $values = is_array($args[$key]) ? $args[$key] : array($args[$key]); - if ($key{0} == '@') { - $parts = array_map(array($this, 'escapeString'), $values); - $args[$key] = implode(', ', $parts); + if ($this->mode === self::INTERPOLATE_AUTO) { + $this->mode = $activeMode; + } + elseif ($activeMode !== $this->mode) { + throw new RuntimeException("Cannot mix interpolation modes."); + } + + $select = $this; + return preg_replace_callback('/([#!@])([a-zA-Z0-9_]+)/', function($m) use ($select, $args) { + if (isset($args[$m[2]])) { + $values = $args[$m[2]]; } - elseif ($key{0} == '!') { - $args[$key] = implode(', ', $values); + elseif (isset($args[$m[1] . $m[2]])) { + // Backward compat. Keys in $args look like "#myNumber" or "@myString". + $values = $args[$m[1] . $m[2]]; } - elseif ($key{0} == '#') { - foreach ($values as $valueKey => $value) { - if ($value === NULL) { - $values[$valueKey] = 'NULL'; - } - elseif (!is_numeric($value)) { - //throw new API_Exception("Failed encoding non-numeric value" . var_export(array($key => $args[$key]), TRUE)); - throw new CRM_Core_Exception("Failed encoding non-numeric value"); - } - } - $args[$key] = implode(', ', $values); + elseif ($select->strict) { + throw new CRM_Core_Exception('Cannot build query. Variable "' . $m[1] . $m[2] . '" is unknown.'); } else { - throw new CRM_Core_Exception("Bad SQL parameter key: $key"); + // Unrecognized variables are ignored. Mitigate risk of accidents. + return $m[0]; } - } - return strtr($expr, $args); + $values = is_array($values) ? $values : array($values); + switch ($m[1]) { + case '@': + $parts = array_map(array($select, 'escapeString'), $values); + return implode(', ', $parts); + + case '!': + return implode(', ', $values); + + case '#': + foreach ($values as $valueKey => $value) { + if ($value === NULL) { + $values[$valueKey] = 'NULL'; + } + elseif (!is_numeric($value)) { + //throw new API_Exception("Failed encoding non-numeric value" . var_export(array($key => $args[$key]), TRUE)); + throw new CRM_Core_Exception("Failed encoding non-numeric value"); + } + } + return implode(', ', $values); + + default: + throw new CRM_Core_Exception("Unrecognized prefix"); + } + }, $expr); } } @@ -230,7 +485,7 @@ class CRM_Utils_SQL_Select { * @return string * SQL expression, e.g. "it\'s great" (with-quotes) or NULL (without-quotes) */ - protected function escapeString($value) { + public function escapeString($value) { return $value === NULL ? 'NULL' : '"' . CRM_Core_DAO::escapeString($value) . '"'; } @@ -239,13 +494,21 @@ class CRM_Utils_SQL_Select { * SQL statement */ public function toSQL() { + $sql = ''; + if ($this->insertInto) { + $sql .= 'INSERT INTO ' . $this->insertInto . ' ('; + $sql .= implode(', ', $this->insertIntoFields); + $sql .= ")\n"; + } if ($this->selects) { - $sql = 'SELECT ' . implode(', ', $this->selects) . "\n"; + $sql .= 'SELECT ' . implode(', ', $this->selects) . "\n"; } else { - $sql = 'SELECT *' . "\n"; + $sql .= 'SELECT *' . "\n"; + } + if ($this->from !== NULL) { + $sql .= 'FROM ' . $this->from . "\n"; } - $sql .= 'FROM ' . $this->from . "\n"; foreach ($this->joins as $join) { $sql .= $join . "\n"; } @@ -267,7 +530,26 @@ class CRM_Utils_SQL_Select { $sql .= 'OFFSET ' . $this->offset . "\n"; } } + if ($this->mode === self::INTERPOLATE_OUTPUT) { + $sql = $this->interpolate($sql, $this->params, self::INTERPOLATE_OUTPUT); + } return $sql; } + public function offsetExists($offset) { + return isset($this->params[$offset]); + } + + public function offsetGet($offset) { + return $this->params[$offset]; + } + + public function offsetSet($offset, $value) { + $this->param($offset, $value); + } + + public function offsetUnset($offset) { + unset($this->params[$offset]); + } + } diff --git a/tests/phpunit/CRM/Utils/SQL/SelectTest.php b/tests/phpunit/CRM/Utils/SQL/SelectTest.php index a85b86f339..d241508e68 100644 --- a/tests/phpunit/CRM/Utils/SQL/SelectTest.php +++ b/tests/phpunit/CRM/Utils/SQL/SelectTest.php @@ -80,12 +80,47 @@ class CRM_Utils_SQL_SelectTest extends CiviUnitTestCase { $this->assertLike('SELECT * FROM foo bar', $select->toSQL()); } + public function testModeOutput() { + $select = CRM_Utils_SQL_Select::from('foo', array('mode' => 'out')) + ->where('foo = @value') + ->where(array( + 'whiz > @base', + 'frob != @base', + )) + ->param('@value', 'not"valid') + ->param(array( + '@base' => 'in"valid', + )); + $this->assertLike('SELECT * FROM foo WHERE (foo = "not\\"valid") AND (whiz > "in\\"valid") AND (frob != "in\\"valid")', $select->toSQL()); + + try { + CRM_Utils_SQL_Select::from('foo', array('mode' => 'out')) + ->where('foo = @value', array('@value' => 'not"valid')); + $this->fail('In output mode, we should reject requests to interpolate inputs.'); + } + catch (Exception $e) { + $this->assertRegExp("/Cannot mix interpolation modes/", $e->getMessage()); + } + + $outputModeFragment = CRM_Utils_SQL_Select::fragment() + ->param('value', 'whatever'); + $inputModeFragment = CRM_Utils_SQL_Select::fragment() + ->where('foo = @value', array('@value' => 'not"valid')); + try { + $outputModeFragment->merge($inputModeFragment); + $this->fail('In output-mode, we should reject requests to merge from input-mode.'); + } + catch (Exception $e) { + $this->assertRegExp("/Cannot merge queries that use different interpolation modes/", $e->getMessage()); + } + } + public function testBig() { $select = CRM_Utils_SQL_Select::from('foo') ->select('foo.id') ->join('rel1', 'INNER JOIN rel1_table rel1 ON foo.id = rel1.foo_id') ->join('rel2', 'LEFT JOIN rel2_table rel2 ON foo.id = rel2.foo_id') - ->where('foo.type = @type', array('@type' => 'mytype')) + ->where('foo.type = @theType', array('@theType' => 'mytype')) ->groupBy("foo.id") ->having('sum(rel1.stat) > 10') ->orderBy('rel2.whiz') @@ -103,6 +138,20 @@ class CRM_Utils_SQL_SelectTest extends CiviUnitTestCase { ); } + /** + * Parameter-values could include control characters like + * '"@" or "!", but they should never be evaluated. + */ + public function testNoIterativeInterpolation() { + $select = CRM_Utils_SQL_Select::from('foo') + ->where('a = @a and b = @b and c = @c', array( + 'a' => '@b', + 'b' => '@c', + 'c' => '@a', + )); + $this->assertLike('SELECT * FROM foo WHERE (a = "@b" and b = "@c" and c = "@a")', $select->toSQL()); + } + public function testInterpolate() { $actual = CRM_Utils_SQL_Select::from('ignore')->interpolate( '@escaped !unescaped #validated', @@ -115,6 +164,44 @@ class CRM_Utils_SQL_SelectTest extends CiviUnitTestCase { $this->assertLike('"foo\"bar" concat(foo,bar) 15.2', $actual); } + public function testInterpolateWildcard() { + $actual = CRM_Utils_SQL_Select::from('ignore')->interpolate( + 'escaped @escaped unescaped !unescaped validated #validated', + array( + 'escaped' => 'foo"bar', + 'unescaped' => 'concat(foo,bar)', + 'validated' => 15.2, + ) + ); + $this->assertLike('escaped "foo\"bar" unescaped concat(foo,bar) validated 15.2', $actual); + } + + public function testInterpolateUnknown() { + $actual = CRM_Utils_SQL_Select::from('ignore')->interpolate( + 'escaped @escaped unescaped !unescaped validated #validated', + array( + 'irrelevant' => 'foo', + ) + ); + $this->assertLike('escaped @escaped unescaped !unescaped validated #validated', $actual); + } + + public function testInterpolateUnknownStrict() { + try { + CRM_Utils_SQL_Select::from('ignore') + ->strict() + ->interpolate('@johnMcClane', + array( + 'irrelevant' => 'foo', + ) + ); + $this->fail('Unknown variables should throw errors in strict mode.'); + } + catch (Exception $e) { + $this->assertRegExp('/Cannot build query. Variable "@johnMcClane" is unknown./', $e->getMessage()); + } + } + public function testInterpolateArray() { $actual = CRM_Utils_SQL_Select::from('ignore')->interpolate( '(@escaped) (!unescaped) (#validated)', @@ -149,16 +236,27 @@ class CRM_Utils_SQL_SelectTest extends CiviUnitTestCase { } } - public function testInterpolateBadKey() { - try { - $result = CRM_Utils_SQL_Select::from('ignore')->interpolate('this is a {var}', array( - '{var}' => 'not a well-formed variable name', - )); - $this->fail('Expected exception; got: ' . var_export($result, TRUE)); - } - catch (CRM_Core_Exception $e) { - $this->assertTrue(TRUE, "Caught expected exception"); - } + public function testMerge() { + $fragmentOutMode = CRM_Utils_SQL_Select::fragment() + ->select(array('a', 'b')) + ->where('a = #two') + ->param('two', 2); + $fragmentAutoMode = CRM_Utils_SQL_Select::fragment() + ->select('e') + ->where('whipit()'); + $query = CRM_Utils_SQL_Select::from('foo') + ->select(array('c', 'd')) + ->where('c = @four') + ->param('four', 4) + ->merge($fragmentOutMode) + ->merge($fragmentAutoMode); + $this->assertLike('SELECT c, d, a, b, e FROM foo WHERE (c = "4") AND (a = 2) AND (whipit())', $query->toSQL()); + } + + public function testArrayGet() { + $select = CRM_Utils_SQL_Select::from("foo") + ->param('hello', 'world'); + $this->assertEquals('world', $select['hello']); } /** -- 2.25.1