]> scripts.mit.edu Git - autoinstallsdev/mediawiki.git/blob - vendor/wikimedia/css-sanitizer/src/Parser/StringDataSource.php
MediaWiki 1.30.2
[autoinstallsdev/mediawiki.git] / vendor / wikimedia / css-sanitizer / src / Parser / StringDataSource.php
1 <?php
2 /**
3  * @file
4  * @license https://opensource.org/licenses/Apache-2.0 Apache-2.0
5  */
6
7 namespace Wikimedia\CSS\Parser;
8
9 /**
10  * Read data for the CSS parser
11  */
12 class StringDataSource implements DataSource {
13
14         /** @var string */
15         protected $string;
16
17         /** @var int */
18         protected $len = 0, $pos = 0;
19
20         /** @var string[] */
21         protected $putBack = [];
22
23         /**
24          * @param string $string Input string. Must be valid UTF-8 with no BOM.
25          */
26         public function __construct( $string ) {
27                 static $newPHP;
28
29                 $this->string = (string)$string;
30                 $this->len = strlen( $this->string );
31
32                 // HHVM 3.4 and older come with an outdated version of libmbfl that
33                 // incorrectly allows values above U+10FFFF, so we have to check
34                 // for them separately. (This issue also exists in PHP 5.3 and
35                 // older, which are no longer supported.)
36                 // @codeCoverageIgnoreStart
37                 if ( $newPHP === null ) {
38                         $newPHP = !mb_check_encoding( "\xf4\x90\x80\x80", 'UTF-8' );
39                 }
40                 // @codeCoverageIgnoreEnd
41
42                 if ( !mb_check_encoding( $this->string, 'UTF-8' ) ||
43                         !$newPHP && preg_match( "/\xf4[\x90-\xbf]|[\xf5-\xff]/S", $this->string ) !== 0
44                 ) {
45                         throw new \InvalidArgumentException( '$string is not valid UTF-8' );
46                 }
47         }
48
49         public function readCharacter() {
50                 if ( $this->putBack ) {
51                         return array_pop( $this->putBack );
52                 }
53
54                 if ( $this->pos >= $this->len ) {
55                         return self::EOF;
56                 }
57
58                 // We already checked that the string is valid UTF-8 in the
59                 // constructor, so we can do a quick binary "get next character" here.
60                 $p = $this->pos;
61                 $c = $this->string[$p];
62                 $cc = ord( $this->string[$p] );
63                 if ( $cc <= 0x7f ) {
64                         $this->pos += 1;
65                         return $c;
66                 } elseif ( ( $cc & 0xe0 ) === 0xc0 ) {
67                         $this->pos += 2;
68                         return substr( $this->string, $p, 2 );
69                 } elseif ( ( $cc & 0xf0 ) === 0xe0 ) {
70                         $this->pos += 3;
71                         return substr( $this->string, $p, 3 );
72                 } elseif ( ( $cc & 0xf8 ) === 0xf0 ) {
73                         $this->pos += 4;
74                         return substr( $this->string, $p, 4 );
75                 } else {
76                         // WTF? Should never get here because it should have failed
77                         // validation in the constructor.
78                         // @codeCoverageIgnoreStart
79                         throw new \UnexpectedValueException(
80                                 sprintf( 'Unexpected byte %02X in string at position %d.', $cc, $this->pos )
81                         );
82                         // @codeCoverageIgnoreEnd
83                 }
84         }
85
86         public function putBackCharacter( $char ) {
87                 if ( $char !== self::EOF ) {
88                         $this->putBack[] = $char;
89                 }
90         }
91 }