| 1 | <?php |
| 2 | namespace Civi\Angular; |
| 3 | |
| 4 | class Coder { |
| 5 | |
| 6 | /** |
| 7 | * |
| 8 | * Determine whether an HTML snippet remains consistent (through an |
| 9 | * decode/encode loop). |
| 10 | * |
| 11 | * Note: Variations in whitespace are permitted. |
| 12 | * |
| 13 | * @param string $html |
| 14 | * @return bool |
| 15 | */ |
| 16 | public function checkConsistentHtml($html) { |
| 17 | try { |
| 18 | $recodedHtml = $this->recode($html); |
| 19 | } |
| 20 | catch (\Exception $e) { |
| 21 | return FALSE; |
| 22 | } |
| 23 | |
| 24 | $htmlSig = preg_replace('/[ \t\r\n\/]+/', '', $this->cleanup($html)); |
| 25 | $docSig = preg_replace('/[ \t\r\n\/]+/', '', $recodedHtml); |
| 26 | if ($htmlSig !== $docSig || empty($html) != empty($htmlSig)) { |
| 27 | return FALSE; |
| 28 | } |
| 29 | return TRUE; |
| 30 | } |
| 31 | |
| 32 | /** |
| 33 | * Parse an HTML snippet and re-encode is as HTML. |
| 34 | * |
| 35 | * This is useful for detecting cases where the parser or encoder |
| 36 | * have quirks/bugs. |
| 37 | * |
| 38 | * @param string $html |
| 39 | * @return string |
| 40 | */ |
| 41 | public function recode($html) { |
| 42 | $doc = \phpQuery::newDocument("$html", 'text/html'); |
| 43 | return $this->encode($doc); |
| 44 | } |
| 45 | |
| 46 | /** |
| 47 | * Encode a phpQueryObject as HTML. |
| 48 | * |
| 49 | * @param \phpQueryObject $doc |
| 50 | * @return string |
| 51 | * HTML |
| 52 | */ |
| 53 | public function encode($doc) { |
| 54 | $doc->document->formatOutput = TRUE; |
| 55 | return $this->cleanup($doc->markupOuter()); |
| 56 | } |
| 57 | |
| 58 | protected function cleanup($html) { |
| 59 | $html = preg_replace_callback("/([\\-a-zA-Z0-9]+)=(')([^']*)(')/", [$this, 'cleanupAttribute'], $html); |
| 60 | $html = preg_replace_callback('/([\-a-zA-Z0-9]+)=(")([^"]*)(")/', [$this, 'cleanupAttribute'], $html); |
| 61 | return $html; |
| 62 | } |
| 63 | |
| 64 | protected function cleanupAttribute($matches) { |
| 65 | list ($full, $attr, $lquote, $value, $rquote) = $matches; |
| 66 | |
| 67 | switch ($attr) { |
| 68 | case 'href': |
| 69 | if (strpos($value, '%7B%7B') !== FALSE && strpos($value, '%7D%7D') !== FALSE) { |
| 70 | $value = urldecode($value); |
| 71 | } |
| 72 | break; |
| 73 | |
| 74 | default: |
| 75 | $value = html_entity_decode($value); |
| 76 | break; |
| 77 | } |
| 78 | |
| 79 | return "$attr=$lquote$value$rquote"; |
| 80 | } |
| 81 | |
| 82 | } |