Commit | Line | Data |
---|---|---|
7f254ad8 AE |
1 | <?php\r |
2 | \r | |
3 | /**\r | |
4 | * Class that handles operations involving percent-encoding in URIs.\r | |
5 | *\r | |
6 | * @warning\r | |
7 | * Be careful when reusing instances of PercentEncoder. The object\r | |
8 | * you use for normalize() SHOULD NOT be used for encode(), or\r | |
9 | * vice-versa.\r | |
10 | */\r | |
11 | class HTMLPurifier_PercentEncoder\r | |
12 | {\r | |
13 | \r | |
14 | /**\r | |
15 | * Reserved characters to preserve when using encode().\r | |
16 | */\r | |
17 | protected $preserve = array();\r | |
18 | \r | |
19 | /**\r | |
20 | * String of characters that should be preserved while using encode().\r | |
21 | */\r | |
22 | public function __construct($preserve = false) {\r | |
23 | // unreserved letters, ought to const-ify\r | |
24 | for ($i = 48; $i <= 57; $i++) $this->preserve[$i] = true; // digits\r | |
25 | for ($i = 65; $i <= 90; $i++) $this->preserve[$i] = true; // upper-case\r | |
26 | for ($i = 97; $i <= 122; $i++) $this->preserve[$i] = true; // lower-case\r | |
27 | $this->preserve[45] = true; // Dash -\r | |
28 | $this->preserve[46] = true; // Period .\r | |
29 | $this->preserve[95] = true; // Underscore _\r | |
30 | $this->preserve[126]= true; // Tilde ~\r | |
31 | \r | |
32 | // extra letters not to escape\r | |
33 | if ($preserve !== false) {\r | |
34 | for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {\r | |
35 | $this->preserve[ord($preserve[$i])] = true;\r | |
36 | }\r | |
37 | }\r | |
38 | }\r | |
39 | \r | |
40 | /**\r | |
41 | * Our replacement for urlencode, it encodes all non-reserved characters,\r | |
42 | * as well as any extra characters that were instructed to be preserved.\r | |
43 | * @note\r | |
44 | * Assumes that the string has already been normalized, making any\r | |
45 | * and all percent escape sequences valid. Percents will not be\r | |
46 | * re-escaped, regardless of their status in $preserve\r | |
47 | * @param $string String to be encoded\r | |
48 | * @return Encoded string.\r | |
49 | */\r | |
50 | public function encode($string) {\r | |
51 | $ret = '';\r | |
52 | for ($i = 0, $c = strlen($string); $i < $c; $i++) {\r | |
53 | if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])]) ) {\r | |
54 | $ret .= '%' . sprintf('%02X', $int);\r | |
55 | } else {\r | |
56 | $ret .= $string[$i];\r | |
57 | }\r | |
58 | }\r | |
59 | return $ret;\r | |
60 | }\r | |
61 | \r | |
62 | /**\r | |
63 | * Fix up percent-encoding by decoding unreserved characters and normalizing.\r | |
64 | * @warning This function is affected by $preserve, even though the\r | |
65 | * usual desired behavior is for this not to preserve those\r | |
66 | * characters. Be careful when reusing instances of PercentEncoder!\r | |
67 | * @param $string String to normalize\r | |
68 | */\r | |
69 | public function normalize($string) {\r | |
70 | if ($string == '') return '';\r | |
71 | $parts = explode('%', $string);\r | |
72 | $ret = array_shift($parts);\r | |
73 | foreach ($parts as $part) {\r | |
74 | $length = strlen($part);\r | |
75 | if ($length < 2) {\r | |
76 | $ret .= '%25' . $part;\r | |
77 | continue;\r | |
78 | }\r | |
79 | $encoding = substr($part, 0, 2);\r | |
80 | $text = substr($part, 2);\r | |
81 | if (!ctype_xdigit($encoding)) {\r | |
82 | $ret .= '%25' . $part;\r | |
83 | continue;\r | |
84 | }\r | |
85 | $int = hexdec($encoding);\r | |
86 | if (isset($this->preserve[$int])) {\r | |
87 | $ret .= chr($int) . $text;\r | |
88 | continue;\r | |
89 | }\r | |
90 | $encoding = strtoupper($encoding);\r | |
91 | $ret .= '%' . $encoding . $text;\r | |
92 | }\r | |
93 | return $ret;\r | |
94 | }\r | |
95 | \r | |
96 | }\r | |
97 | \r | |
98 | // vim: et sw=4 sts=4\r |