Wordpress 3.5
[autoinstalls/wordpress.git] / wp-includes / SimplePie / Decode / HTML / Entities.php
1 <?php
2 /**
3  * SimplePie
4  *
5  * A PHP-Based RSS and Atom Feed Framework.
6  * Takes the hard work out of managing a complete RSS/Atom solution.
7  *
8  * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without modification, are
12  * permitted provided that the following conditions are met:
13  *
14  *      * Redistributions of source code must retain the above copyright notice, this list of
15  *        conditions and the following disclaimer.
16  *
17  *      * Redistributions in binary form must reproduce the above copyright notice, this list
18  *        of conditions and the following disclaimer in the documentation and/or other materials
19  *        provided with the distribution.
20  *
21  *      * Neither the name of the SimplePie Team nor the names of its contributors may be used
22  *        to endorse or promote products derived from this software without specific prior
23  *        written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
26  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
27  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
28  * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
32  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  *
35  * @package SimplePie
36  * @version 1.3.1
37  * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
38  * @author Ryan Parman
39  * @author Geoffrey Sneddon
40  * @author Ryan McCue
41  * @link http://simplepie.org/ SimplePie
42  * @license http://www.opensource.org/licenses/bsd-license.php BSD License
43  */
44
45
46 /**
47  * Decode HTML Entities
48  *
49  * This implements HTML5 as of revision 967 (2007-06-28)
50  *
51  * @deprecated Use DOMDocument instead!
52  * @package SimplePie
53  */
54 class SimplePie_Decode_HTML_Entities
55 {
56         /**
57          * Data to be parsed
58          *
59          * @access private
60          * @var string
61          */
62         var $data = '';
63
64         /**
65          * Currently consumed bytes
66          *
67          * @access private
68          * @var string
69          */
70         var $consumed = '';
71
72         /**
73          * Position of the current byte being parsed
74          *
75          * @access private
76          * @var int
77          */
78         var $position = 0;
79
80         /**
81          * Create an instance of the class with the input data
82          *
83          * @access public
84          * @param string $data Input data
85          */
86         public function __construct($data)
87         {
88                 $this->data = $data;
89         }
90
91         /**
92          * Parse the input data
93          *
94          * @access public
95          * @return string Output data
96          */
97         public function parse()
98         {
99                 while (($this->position = strpos($this->data, '&', $this->position)) !== false)
100                 {
101                         $this->consume();
102                         $this->entity();
103                         $this->consumed = '';
104                 }
105                 return $this->data;
106         }
107
108         /**
109          * Consume the next byte
110          *
111          * @access private
112          * @return mixed The next byte, or false, if there is no more data
113          */
114         public function consume()
115         {
116                 if (isset($this->data[$this->position]))
117                 {
118                         $this->consumed .= $this->data[$this->position];
119                         return $this->data[$this->position++];
120                 }
121                 else
122                 {
123                         return false;
124                 }
125         }
126
127         /**
128          * Consume a range of characters
129          *
130          * @access private
131          * @param string $chars Characters to consume
132          * @return mixed A series of characters that match the range, or false
133          */
134         public function consume_range($chars)
135         {
136                 if ($len = strspn($this->data, $chars, $this->position))
137                 {
138                         $data = substr($this->data, $this->position, $len);
139                         $this->consumed .= $data;
140                         $this->position += $len;
141                         return $data;
142                 }
143                 else
144                 {
145                         return false;
146                 }
147         }
148
149         /**
150          * Unconsume one byte
151          *
152          * @access private
153          */
154         public function unconsume()
155         {
156                 $this->consumed = substr($this->consumed, 0, -1);
157                 $this->position--;
158         }
159
160         /**
161          * Decode an entity
162          *
163          * @access private
164          */
165         public function entity()
166         {
167                 switch ($this->consume())
168                 {
169                         case "\x09":
170                         case "\x0A":
171                         case "\x0B":
172                         case "\x0B":
173                         case "\x0C":
174                         case "\x20":
175                         case "\x3C":
176                         case "\x26":
177                         case false:
178                                 break;
179
180                         case "\x23":
181                                 switch ($this->consume())
182                                 {
183                                         case "\x78":
184                                         case "\x58":
185                                                 $range = '0123456789ABCDEFabcdef';
186                                                 $hex = true;
187                                                 break;
188
189                                         default:
190                                                 $range = '0123456789';
191                                                 $hex = false;
192                                                 $this->unconsume();
193                                                 break;
194                                 }
195
196                                 if ($codepoint = $this->consume_range($range))
197                                 {
198                                         static $windows_1252_specials = array(0x0D => "\x0A", 0x80 => "\xE2\x82\xAC", 0x81 => "\xEF\xBF\xBD", 0x82 => "\xE2\x80\x9A", 0x83 => "\xC6\x92", 0x84 => "\xE2\x80\x9E", 0x85 => "\xE2\x80\xA6", 0x86 => "\xE2\x80\xA0", 0x87 => "\xE2\x80\xA1", 0x88 => "\xCB\x86", 0x89 => "\xE2\x80\xB0", 0x8A => "\xC5\xA0", 0x8B => "\xE2\x80\xB9", 0x8C => "\xC5\x92", 0x8D => "\xEF\xBF\xBD", 0x8E => "\xC5\xBD", 0x8F => "\xEF\xBF\xBD", 0x90 => "\xEF\xBF\xBD", 0x91 => "\xE2\x80\x98", 0x92 => "\xE2\x80\x99", 0x93 => "\xE2\x80\x9C", 0x94 => "\xE2\x80\x9D", 0x95 => "\xE2\x80\xA2", 0x96 => "\xE2\x80\x93", 0x97 => "\xE2\x80\x94", 0x98 => "\xCB\x9C", 0x99 => "\xE2\x84\xA2", 0x9A => "\xC5\xA1", 0x9B => "\xE2\x80\xBA", 0x9C => "\xC5\x93", 0x9D => "\xEF\xBF\xBD", 0x9E => "\xC5\xBE", 0x9F => "\xC5\xB8");
199
200                                         if ($hex)
201                                         {
202                                                 $codepoint = hexdec($codepoint);
203                                         }
204                                         else
205                                         {
206                                                 $codepoint = intval($codepoint);
207                                         }
208
209                                         if (isset($windows_1252_specials[$codepoint]))
210                                         {
211                                                 $replacement = $windows_1252_specials[$codepoint];
212                                         }
213                                         else
214                                         {
215                                                 $replacement = SimplePie_Misc::codepoint_to_utf8($codepoint);
216                                         }
217
218                                         if (!in_array($this->consume(), array(';', false), true))
219                                         {
220                                                 $this->unconsume();
221                                         }
222
223                                         $consumed_length = strlen($this->consumed);
224                                         $this->data = substr_replace($this->data, $replacement, $this->position - $consumed_length, $consumed_length);
225                                         $this->position += strlen($replacement) - $consumed_length;
226                                 }
227                                 break;
228
229                         default:
230                                 static $entities = array(
231                                         'Aacute' => "\xC3\x81",
232                                         'aacute' => "\xC3\xA1",
233                                         'Aacute;' => "\xC3\x81",
234                                         'aacute;' => "\xC3\xA1",
235                                         'Acirc' => "\xC3\x82",
236                                         'acirc' => "\xC3\xA2",
237                                         'Acirc;' => "\xC3\x82",
238                                         'acirc;' => "\xC3\xA2",
239                                         'acute' => "\xC2\xB4",
240                                         'acute;' => "\xC2\xB4",
241                                         'AElig' => "\xC3\x86",
242                                         'aelig' => "\xC3\xA6",
243                                         'AElig;' => "\xC3\x86",
244                                         'aelig;' => "\xC3\xA6",
245                                         'Agrave' => "\xC3\x80",
246                                         'agrave' => "\xC3\xA0",
247                                         'Agrave;' => "\xC3\x80",
248                                         'agrave;' => "\xC3\xA0",
249                                         'alefsym;' => "\xE2\x84\xB5",
250                                         'Alpha;' => "\xCE\x91",
251                                         'alpha;' => "\xCE\xB1",
252                                         'AMP' => "\x26",
253                                         'amp' => "\x26",
254                                         'AMP;' => "\x26",
255                                         'amp;' => "\x26",
256                                         'and;' => "\xE2\x88\xA7",
257                                         'ang;' => "\xE2\x88\xA0",
258                                         'apos;' => "\x27",
259                                         'Aring' => "\xC3\x85",
260                                         'aring' => "\xC3\xA5",
261                                         'Aring;' => "\xC3\x85",
262                                         'aring;' => "\xC3\xA5",
263                                         'asymp;' => "\xE2\x89\x88",
264                                         'Atilde' => "\xC3\x83",
265                                         'atilde' => "\xC3\xA3",
266                                         'Atilde;' => "\xC3\x83",
267                                         'atilde;' => "\xC3\xA3",
268                                         'Auml' => "\xC3\x84",
269                                         'auml' => "\xC3\xA4",
270                                         'Auml;' => "\xC3\x84",
271                                         'auml;' => "\xC3\xA4",
272                                         'bdquo;' => "\xE2\x80\x9E",
273                                         'Beta;' => "\xCE\x92",
274                                         'beta;' => "\xCE\xB2",
275                                         'brvbar' => "\xC2\xA6",
276                                         'brvbar;' => "\xC2\xA6",
277                                         'bull;' => "\xE2\x80\xA2",
278                                         'cap;' => "\xE2\x88\xA9",
279                                         'Ccedil' => "\xC3\x87",
280                                         'ccedil' => "\xC3\xA7",
281                                         'Ccedil;' => "\xC3\x87",
282                                         'ccedil;' => "\xC3\xA7",
283                                         'cedil' => "\xC2\xB8",
284                                         'cedil;' => "\xC2\xB8",
285                                         'cent' => "\xC2\xA2",
286                                         'cent;' => "\xC2\xA2",
287                                         'Chi;' => "\xCE\xA7",
288                                         'chi;' => "\xCF\x87",
289                                         'circ;' => "\xCB\x86",
290                                         'clubs;' => "\xE2\x99\xA3",
291                                         'cong;' => "\xE2\x89\x85",
292                                         'COPY' => "\xC2\xA9",
293                                         'copy' => "\xC2\xA9",
294                                         'COPY;' => "\xC2\xA9",
295                                         'copy;' => "\xC2\xA9",
296                                         'crarr;' => "\xE2\x86\xB5",
297                                         'cup;' => "\xE2\x88\xAA",
298                                         'curren' => "\xC2\xA4",
299                                         'curren;' => "\xC2\xA4",
300                                         'Dagger;' => "\xE2\x80\xA1",
301                                         'dagger;' => "\xE2\x80\xA0",
302                                         'dArr;' => "\xE2\x87\x93",
303                                         'darr;' => "\xE2\x86\x93",
304                                         'deg' => "\xC2\xB0",
305                                         'deg;' => "\xC2\xB0",
306                                         'Delta;' => "\xCE\x94",
307                                         'delta;' => "\xCE\xB4",
308                                         'diams;' => "\xE2\x99\xA6",
309                                         'divide' => "\xC3\xB7",
310                                         'divide;' => "\xC3\xB7",
311                                         'Eacute' => "\xC3\x89",
312                                         'eacute' => "\xC3\xA9",
313                                         'Eacute;' => "\xC3\x89",
314                                         'eacute;' => "\xC3\xA9",
315                                         'Ecirc' => "\xC3\x8A",
316                                         'ecirc' => "\xC3\xAA",
317                                         'Ecirc;' => "\xC3\x8A",
318                                         'ecirc;' => "\xC3\xAA",
319                                         'Egrave' => "\xC3\x88",
320                                         'egrave' => "\xC3\xA8",
321                                         'Egrave;' => "\xC3\x88",
322                                         'egrave;' => "\xC3\xA8",
323                                         'empty;' => "\xE2\x88\x85",
324                                         'emsp;' => "\xE2\x80\x83",
325                                         'ensp;' => "\xE2\x80\x82",
326                                         'Epsilon;' => "\xCE\x95",
327                                         'epsilon;' => "\xCE\xB5",
328                                         'equiv;' => "\xE2\x89\xA1",
329                                         'Eta;' => "\xCE\x97",
330                                         'eta;' => "\xCE\xB7",
331                                         'ETH' => "\xC3\x90",
332                                         'eth' => "\xC3\xB0",
333                                         'ETH;' => "\xC3\x90",
334                                         'eth;' => "\xC3\xB0",
335                                         'Euml' => "\xC3\x8B",
336                                         'euml' => "\xC3\xAB",
337                                         'Euml;' => "\xC3\x8B",
338                                         'euml;' => "\xC3\xAB",
339                                         'euro;' => "\xE2\x82\xAC",
340                                         'exist;' => "\xE2\x88\x83",
341                                         'fnof;' => "\xC6\x92",
342                                         'forall;' => "\xE2\x88\x80",
343                                         'frac12' => "\xC2\xBD",
344                                         'frac12;' => "\xC2\xBD",
345                                         'frac14' => "\xC2\xBC",
346                                         'frac14;' => "\xC2\xBC",
347                                         'frac34' => "\xC2\xBE",
348                                         'frac34;' => "\xC2\xBE",
349                                         'frasl;' => "\xE2\x81\x84",
350                                         'Gamma;' => "\xCE\x93",
351                                         'gamma;' => "\xCE\xB3",
352                                         'ge;' => "\xE2\x89\xA5",
353                                         'GT' => "\x3E",
354                                         'gt' => "\x3E",
355                                         'GT;' => "\x3E",
356                                         'gt;' => "\x3E",
357                                         'hArr;' => "\xE2\x87\x94",
358                                         'harr;' => "\xE2\x86\x94",
359                                         'hearts;' => "\xE2\x99\xA5",
360                                         'hellip;' => "\xE2\x80\xA6",
361                                         'Iacute' => "\xC3\x8D",
362                                         'iacute' => "\xC3\xAD",
363                                         'Iacute;' => "\xC3\x8D",
364                                         'iacute;' => "\xC3\xAD",
365                                         'Icirc' => "\xC3\x8E",
366                                         'icirc' => "\xC3\xAE",
367                                         'Icirc;' => "\xC3\x8E",
368                                         'icirc;' => "\xC3\xAE",
369                                         'iexcl' => "\xC2\xA1",
370                                         'iexcl;' => "\xC2\xA1",
371                                         'Igrave' => "\xC3\x8C",
372                                         'igrave' => "\xC3\xAC",
373                                         'Igrave;' => "\xC3\x8C",
374                                         'igrave;' => "\xC3\xAC",
375                                         'image;' => "\xE2\x84\x91",
376                                         'infin;' => "\xE2\x88\x9E",
377                                         'int;' => "\xE2\x88\xAB",
378                                         'Iota;' => "\xCE\x99",
379                                         'iota;' => "\xCE\xB9",
380                                         'iquest' => "\xC2\xBF",
381                                         'iquest;' => "\xC2\xBF",
382                                         'isin;' => "\xE2\x88\x88",
383                                         'Iuml' => "\xC3\x8F",
384                                         'iuml' => "\xC3\xAF",
385                                         'Iuml;' => "\xC3\x8F",
386                                         'iuml;' => "\xC3\xAF",
387                                         'Kappa;' => "\xCE\x9A",
388                                         'kappa;' => "\xCE\xBA",
389                                         'Lambda;' => "\xCE\x9B",
390                                         'lambda;' => "\xCE\xBB",
391                                         'lang;' => "\xE3\x80\x88",
392                                         'laquo' => "\xC2\xAB",
393                                         'laquo;' => "\xC2\xAB",
394                                         'lArr;' => "\xE2\x87\x90",
395                                         'larr;' => "\xE2\x86\x90",
396                                         'lceil;' => "\xE2\x8C\x88",
397                                         'ldquo;' => "\xE2\x80\x9C",
398                                         'le;' => "\xE2\x89\xA4",
399                                         'lfloor;' => "\xE2\x8C\x8A",
400                                         'lowast;' => "\xE2\x88\x97",
401                                         'loz;' => "\xE2\x97\x8A",
402                                         'lrm;' => "\xE2\x80\x8E",
403                                         'lsaquo;' => "\xE2\x80\xB9",
404                                         'lsquo;' => "\xE2\x80\x98",
405                                         'LT' => "\x3C",
406                                         'lt' => "\x3C",
407                                         'LT;' => "\x3C",
408                                         'lt;' => "\x3C",
409                                         'macr' => "\xC2\xAF",
410                                         'macr;' => "\xC2\xAF",
411                                         'mdash;' => "\xE2\x80\x94",
412                                         'micro' => "\xC2\xB5",
413                                         'micro;' => "\xC2\xB5",
414                                         'middot' => "\xC2\xB7",
415                                         'middot;' => "\xC2\xB7",
416                                         'minus;' => "\xE2\x88\x92",
417                                         'Mu;' => "\xCE\x9C",
418                                         'mu;' => "\xCE\xBC",
419                                         'nabla;' => "\xE2\x88\x87",
420                                         'nbsp' => "\xC2\xA0",
421                                         'nbsp;' => "\xC2\xA0",
422                                         'ndash;' => "\xE2\x80\x93",
423                                         'ne;' => "\xE2\x89\xA0",
424                                         'ni;' => "\xE2\x88\x8B",
425                                         'not' => "\xC2\xAC",
426                                         'not;' => "\xC2\xAC",
427                                         'notin;' => "\xE2\x88\x89",
428                                         'nsub;' => "\xE2\x8A\x84",
429                                         'Ntilde' => "\xC3\x91",
430                                         'ntilde' => "\xC3\xB1",
431                                         'Ntilde;' => "\xC3\x91",
432                                         'ntilde;' => "\xC3\xB1",
433                                         'Nu;' => "\xCE\x9D",
434                                         'nu;' => "\xCE\xBD",
435                                         'Oacute' => "\xC3\x93",
436                                         'oacute' => "\xC3\xB3",
437                                         'Oacute;' => "\xC3\x93",
438                                         'oacute;' => "\xC3\xB3",
439                                         'Ocirc' => "\xC3\x94",
440                                         'ocirc' => "\xC3\xB4",
441                                         'Ocirc;' => "\xC3\x94",
442                                         'ocirc;' => "\xC3\xB4",
443                                         'OElig;' => "\xC5\x92",
444                                         'oelig;' => "\xC5\x93",
445                                         'Ograve' => "\xC3\x92",
446                                         'ograve' => "\xC3\xB2",
447                                         'Ograve;' => "\xC3\x92",
448                                         'ograve;' => "\xC3\xB2",
449                                         'oline;' => "\xE2\x80\xBE",
450                                         'Omega;' => "\xCE\xA9",
451                                         'omega;' => "\xCF\x89",
452                                         'Omicron;' => "\xCE\x9F",
453                                         'omicron;' => "\xCE\xBF",
454                                         'oplus;' => "\xE2\x8A\x95",
455                                         'or;' => "\xE2\x88\xA8",
456                                         'ordf' => "\xC2\xAA",
457                                         'ordf;' => "\xC2\xAA",
458                                         'ordm' => "\xC2\xBA",
459                                         'ordm;' => "\xC2\xBA",
460                                         'Oslash' => "\xC3\x98",
461                                         'oslash' => "\xC3\xB8",
462                                         'Oslash;' => "\xC3\x98",
463                                         'oslash;' => "\xC3\xB8",
464                                         'Otilde' => "\xC3\x95",
465                                         'otilde' => "\xC3\xB5",
466                                         'Otilde;' => "\xC3\x95",
467                                         'otilde;' => "\xC3\xB5",
468                                         'otimes;' => "\xE2\x8A\x97",
469                                         'Ouml' => "\xC3\x96",
470                                         'ouml' => "\xC3\xB6",
471                                         'Ouml;' => "\xC3\x96",
472                                         'ouml;' => "\xC3\xB6",
473                                         'para' => "\xC2\xB6",
474                                         'para;' => "\xC2\xB6",
475                                         'part;' => "\xE2\x88\x82",
476                                         'permil;' => "\xE2\x80\xB0",
477                                         'perp;' => "\xE2\x8A\xA5",
478                                         'Phi;' => "\xCE\xA6",
479                                         'phi;' => "\xCF\x86",
480                                         'Pi;' => "\xCE\xA0",
481                                         'pi;' => "\xCF\x80",
482                                         'piv;' => "\xCF\x96",
483                                         'plusmn' => "\xC2\xB1",
484                                         'plusmn;' => "\xC2\xB1",
485                                         'pound' => "\xC2\xA3",
486                                         'pound;' => "\xC2\xA3",
487                                         'Prime;' => "\xE2\x80\xB3",
488                                         'prime;' => "\xE2\x80\xB2",
489                                         'prod;' => "\xE2\x88\x8F",
490                                         'prop;' => "\xE2\x88\x9D",
491                                         'Psi;' => "\xCE\xA8",
492                                         'psi;' => "\xCF\x88",
493                                         'QUOT' => "\x22",
494                                         'quot' => "\x22",
495                                         'QUOT;' => "\x22",
496                                         'quot;' => "\x22",
497                                         'radic;' => "\xE2\x88\x9A",
498                                         'rang;' => "\xE3\x80\x89",
499                                         'raquo' => "\xC2\xBB",
500                                         'raquo;' => "\xC2\xBB",
501                                         'rArr;' => "\xE2\x87\x92",
502                                         'rarr;' => "\xE2\x86\x92",
503                                         'rceil;' => "\xE2\x8C\x89",
504                                         'rdquo;' => "\xE2\x80\x9D",
505                                         'real;' => "\xE2\x84\x9C",
506                                         'REG' => "\xC2\xAE",
507                                         'reg' => "\xC2\xAE",
508                                         'REG;' => "\xC2\xAE",
509                                         'reg;' => "\xC2\xAE",
510                                         'rfloor;' => "\xE2\x8C\x8B",
511                                         'Rho;' => "\xCE\xA1",
512                                         'rho;' => "\xCF\x81",
513                                         'rlm;' => "\xE2\x80\x8F",
514                                         'rsaquo;' => "\xE2\x80\xBA",
515                                         'rsquo;' => "\xE2\x80\x99",
516                                         'sbquo;' => "\xE2\x80\x9A",
517                                         'Scaron;' => "\xC5\xA0",
518                                         'scaron;' => "\xC5\xA1",
519                                         'sdot;' => "\xE2\x8B\x85",
520                                         'sect' => "\xC2\xA7",
521                                         'sect;' => "\xC2\xA7",
522                                         'shy' => "\xC2\xAD",
523                                         'shy;' => "\xC2\xAD",
524                                         'Sigma;' => "\xCE\xA3",
525                                         'sigma;' => "\xCF\x83",
526                                         'sigmaf;' => "\xCF\x82",
527                                         'sim;' => "\xE2\x88\xBC",
528                                         'spades;' => "\xE2\x99\xA0",
529                                         'sub;' => "\xE2\x8A\x82",
530                                         'sube;' => "\xE2\x8A\x86",
531                                         'sum;' => "\xE2\x88\x91",
532                                         'sup;' => "\xE2\x8A\x83",
533                                         'sup1' => "\xC2\xB9",
534                                         'sup1;' => "\xC2\xB9",
535                                         'sup2' => "\xC2\xB2",
536                                         'sup2;' => "\xC2\xB2",
537                                         'sup3' => "\xC2\xB3",
538                                         'sup3;' => "\xC2\xB3",
539                                         'supe;' => "\xE2\x8A\x87",
540                                         'szlig' => "\xC3\x9F",
541                                         'szlig;' => "\xC3\x9F",
542                                         'Tau;' => "\xCE\xA4",
543                                         'tau;' => "\xCF\x84",
544                                         'there4;' => "\xE2\x88\xB4",
545                                         'Theta;' => "\xCE\x98",
546                                         'theta;' => "\xCE\xB8",
547                                         'thetasym;' => "\xCF\x91",
548                                         'thinsp;' => "\xE2\x80\x89",
549                                         'THORN' => "\xC3\x9E",
550                                         'thorn' => "\xC3\xBE",
551                                         'THORN;' => "\xC3\x9E",
552                                         'thorn;' => "\xC3\xBE",
553                                         'tilde;' => "\xCB\x9C",
554                                         'times' => "\xC3\x97",
555                                         'times;' => "\xC3\x97",
556                                         'TRADE;' => "\xE2\x84\xA2",
557                                         'trade;' => "\xE2\x84\xA2",
558                                         'Uacute' => "\xC3\x9A",
559                                         'uacute' => "\xC3\xBA",
560                                         'Uacute;' => "\xC3\x9A",
561                                         'uacute;' => "\xC3\xBA",
562                                         'uArr;' => "\xE2\x87\x91",
563                                         'uarr;' => "\xE2\x86\x91",
564                                         'Ucirc' => "\xC3\x9B",
565                                         'ucirc' => "\xC3\xBB",
566                                         'Ucirc;' => "\xC3\x9B",
567                                         'ucirc;' => "\xC3\xBB",
568                                         'Ugrave' => "\xC3\x99",
569                                         'ugrave' => "\xC3\xB9",
570                                         'Ugrave;' => "\xC3\x99",
571                                         'ugrave;' => "\xC3\xB9",
572                                         'uml' => "\xC2\xA8",
573                                         'uml;' => "\xC2\xA8",
574                                         'upsih;' => "\xCF\x92",
575                                         'Upsilon;' => "\xCE\xA5",
576                                         'upsilon;' => "\xCF\x85",
577                                         'Uuml' => "\xC3\x9C",
578                                         'uuml' => "\xC3\xBC",
579                                         'Uuml;' => "\xC3\x9C",
580                                         'uuml;' => "\xC3\xBC",
581                                         'weierp;' => "\xE2\x84\x98",
582                                         'Xi;' => "\xCE\x9E",
583                                         'xi;' => "\xCE\xBE",
584                                         'Yacute' => "\xC3\x9D",
585                                         'yacute' => "\xC3\xBD",
586                                         'Yacute;' => "\xC3\x9D",
587                                         'yacute;' => "\xC3\xBD",
588                                         'yen' => "\xC2\xA5",
589                                         'yen;' => "\xC2\xA5",
590                                         'yuml' => "\xC3\xBF",
591                                         'Yuml;' => "\xC5\xB8",
592                                         'yuml;' => "\xC3\xBF",
593                                         'Zeta;' => "\xCE\x96",
594                                         'zeta;' => "\xCE\xB6",
595                                         'zwj;' => "\xE2\x80\x8D",
596                                         'zwnj;' => "\xE2\x80\x8C"
597                                 );
598
599                                 for ($i = 0, $match = null; $i < 9 && $this->consume() !== false; $i++)
600                                 {
601                                         $consumed = substr($this->consumed, 1);
602                                         if (isset($entities[$consumed]))
603                                         {
604                                                 $match = $consumed;
605                                         }
606                                 }
607
608                                 if ($match !== null)
609                                 {
610                                         $this->data = substr_replace($this->data, $entities[$match], $this->position - strlen($consumed) - 1, strlen($match) + 1);
611                                         $this->position += strlen($entities[$match]) - strlen($consumed) - 1;
612                                 }
613                                 break;
614                 }
615         }
616 }
617