]> scripts.mit.edu Git - autoinstalls/mediawiki.git/blobdiff - vendor/wikimedia/css-sanitizer/src/Parser/StringDataSource.php
MediaWiki 1.30.2
[autoinstalls/mediawiki.git] / vendor / wikimedia / css-sanitizer / src / Parser / StringDataSource.php
diff --git a/vendor/wikimedia/css-sanitizer/src/Parser/StringDataSource.php b/vendor/wikimedia/css-sanitizer/src/Parser/StringDataSource.php
new file mode 100644 (file)
index 0000000..378c0cf
--- /dev/null
@@ -0,0 +1,91 @@
+<?php
+/**
+ * @file
+ * @license https://opensource.org/licenses/Apache-2.0 Apache-2.0
+ */
+
+namespace Wikimedia\CSS\Parser;
+
+/**
+ * Read data for the CSS parser
+ */
+class StringDataSource implements DataSource {
+
+       /** @var string */
+       protected $string;
+
+       /** @var int */
+       protected $len = 0, $pos = 0;
+
+       /** @var string[] */
+       protected $putBack = [];
+
+       /**
+        * @param string $string Input string. Must be valid UTF-8 with no BOM.
+        */
+       public function __construct( $string ) {
+               static $newPHP;
+
+               $this->string = (string)$string;
+               $this->len = strlen( $this->string );
+
+               // HHVM 3.4 and older come with an outdated version of libmbfl that
+               // incorrectly allows values above U+10FFFF, so we have to check
+               // for them separately. (This issue also exists in PHP 5.3 and
+               // older, which are no longer supported.)
+               // @codeCoverageIgnoreStart
+               if ( $newPHP === null ) {
+                       $newPHP = !mb_check_encoding( "\xf4\x90\x80\x80", 'UTF-8' );
+               }
+               // @codeCoverageIgnoreEnd
+
+               if ( !mb_check_encoding( $this->string, 'UTF-8' ) ||
+                       !$newPHP && preg_match( "/\xf4[\x90-\xbf]|[\xf5-\xff]/S", $this->string ) !== 0
+               ) {
+                       throw new \InvalidArgumentException( '$string is not valid UTF-8' );
+               }
+       }
+
+       public function readCharacter() {
+               if ( $this->putBack ) {
+                       return array_pop( $this->putBack );
+               }
+
+               if ( $this->pos >= $this->len ) {
+                       return self::EOF;
+               }
+
+               // We already checked that the string is valid UTF-8 in the
+               // constructor, so we can do a quick binary "get next character" here.
+               $p = $this->pos;
+               $c = $this->string[$p];
+               $cc = ord( $this->string[$p] );
+               if ( $cc <= 0x7f ) {
+                       $this->pos += 1;
+                       return $c;
+               } elseif ( ( $cc & 0xe0 ) === 0xc0 ) {
+                       $this->pos += 2;
+                       return substr( $this->string, $p, 2 );
+               } elseif ( ( $cc & 0xf0 ) === 0xe0 ) {
+                       $this->pos += 3;
+                       return substr( $this->string, $p, 3 );
+               } elseif ( ( $cc & 0xf8 ) === 0xf0 ) {
+                       $this->pos += 4;
+                       return substr( $this->string, $p, 4 );
+               } else {
+                       // WTF? Should never get here because it should have failed
+                       // validation in the constructor.
+                       // @codeCoverageIgnoreStart
+                       throw new \UnexpectedValueException(
+                               sprintf( 'Unexpected byte %02X in string at position %d.', $cc, $this->pos )
+                       );
+                       // @codeCoverageIgnoreEnd
+               }
+       }
+
+       public function putBackCharacter( $char ) {
+               if ( $char !== self::EOF ) {
+                       $this->putBack[] = $char;
+               }
+       }
+}