4 * @license https://opensource.org/licenses/Apache-2.0 Apache-2.0
7 namespace Wikimedia\CSS\Parser;
10 * Read data for the CSS parser
12 class StringDataSource implements DataSource {
18 protected $len = 0, $pos = 0;
21 protected $putBack = [];
24 * @param string $string Input string. Must be valid UTF-8 with no BOM.
26 public function __construct( $string ) {
29 $this->string = (string)$string;
30 $this->len = strlen( $this->string );
32 // HHVM 3.4 and older come with an outdated version of libmbfl that
33 // incorrectly allows values above U+10FFFF, so we have to check
34 // for them separately. (This issue also exists in PHP 5.3 and
35 // older, which are no longer supported.)
36 // @codeCoverageIgnoreStart
37 if ( $newPHP === null ) {
38 $newPHP = !mb_check_encoding( "\xf4\x90\x80\x80", 'UTF-8' );
40 // @codeCoverageIgnoreEnd
42 if ( !mb_check_encoding( $this->string, 'UTF-8' ) ||
43 !$newPHP && preg_match( "/\xf4[\x90-\xbf]|[\xf5-\xff]/S", $this->string ) !== 0
45 throw new \InvalidArgumentException( '$string is not valid UTF-8' );
49 public function readCharacter() {
50 if ( $this->putBack ) {
51 return array_pop( $this->putBack );
54 if ( $this->pos >= $this->len ) {
58 // We already checked that the string is valid UTF-8 in the
59 // constructor, so we can do a quick binary "get next character" here.
61 $c = $this->string[$p];
62 $cc = ord( $this->string[$p] );
66 } elseif ( ( $cc & 0xe0 ) === 0xc0 ) {
68 return substr( $this->string, $p, 2 );
69 } elseif ( ( $cc & 0xf0 ) === 0xe0 ) {
71 return substr( $this->string, $p, 3 );
72 } elseif ( ( $cc & 0xf8 ) === 0xf0 ) {
74 return substr( $this->string, $p, 4 );
76 // WTF? Should never get here because it should have failed
77 // validation in the constructor.
78 // @codeCoverageIgnoreStart
79 throw new \UnexpectedValueException(
80 sprintf( 'Unexpected byte %02X in string at position %d.', $cc, $this->pos )
82 // @codeCoverageIgnoreEnd
86 public function putBackCharacter( $char ) {
87 if ( $char !== self::EOF ) {
88 $this->putBack[] = $char;