4 * @license https://opensource.org/licenses/Apache-2.0 Apache-2.0
7 namespace Wikimedia\CSS\Grammar;
9 use Wikimedia\CSS\Objects\ComponentValueList;
10 use Wikimedia\CSS\Objects\Token;
11 use Wikimedia\CSS\Objects\SimpleBlock;
12 use Wikimedia\CSS\Objects\CSSFunction;
15 * Base class for grammar matchers.
17 * The [CSS Syntax Level 3][SYN3] and [Values Level 3][VAL3] specifications use
18 * a mostly context-free grammar to define what things like selectors and
19 * property values look like. The Matcher classes allow for constructing an
20 * object that will determine whether a ComponentValueList actually matches
23 * [SYN3]: https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/
24 * [VAL3]: https://www.w3.org/TR/2016/CR-css-values-3-20160929/
26 abstract class Matcher {
28 /** @var string|null Name to set on Match objects */
29 protected $captureName = null;
32 * @var array Default options for self::match()
33 * - skip-whitespace: (bool) Allow whitespace in between any two tokens
34 * - nonterminal: (bool) Don't require the whole of $values is matched
35 * - mark-significance: (bool) On a successful match, replace T_WHITESPACE
36 * tokens as necessary to indicate significant whitespace.
38 protected $defaultOptions = [
39 'skip-whitespace' => true,
40 'nonterminal' => false,
41 'mark-significance' => false,
46 * @param mixed ... See static::__construct()
49 public static function create() {
50 // @todo Once we drop support for PHP 5.5, just do this:
51 // public static function create( ...$args ) {
52 // return new static( ...$args );
55 $args = func_get_args();
56 switch ( count( $args ) ) {
60 return new static( $args[0] );
62 return new static( $args[0], $args[1] );
64 return new static( $args[0], $args[1], $args[2] );
66 return new static( $args[0], $args[1], $args[2], $args[3] );
68 // Slow, but all the existing Matchers have a max of 4 args.
69 $rc = new \ReflectionClass( static::class );
70 return $rc->newInstanceArgs( $args );
75 * Return a copy of this matcher that will capture its matches
77 * A "capturing" Matcher will produce Matches that return a value from the
78 * Match::getName() method. The Match::getCapturedMatches() method may be
79 * used to retrieve them from the top-level Match.
81 * The concept is similar to capturing groups in PCRE and other regex
84 * @param string|null $captureName Name to apply to captured Match objects
87 public function capture( $captureName ) {
88 $ret = clone( $this );
89 $ret->captureName = $captureName;
94 * Match against a list of ComponentValues
95 * @param ComponentValueList $values
96 * @param array $options Matching options, see self::$defaultOptions
99 public function match( ComponentValueList $values, array $options = [] ) {
100 $options += $this->getDefaultOptions();
101 $start = $this->next( $values, -1, $options );
102 $l = count( $values );
103 foreach ( $this->generateMatches( $values, $start, $options ) as $match ) {
104 if ( $match->getNext() === $l || $options['nonterminal'] ) {
105 if ( $options['mark-significance'] ) {
106 $significantWS = self::collectSignificantWhitespace( $match );
107 self::markSignificantWhitespace( $values, $match, $significantWS, $match->getNext() );
116 * Collect any 'significantWhitespace' matches
117 * @param Match $match
118 * @param Token[]|null &$ret
121 private static function collectSignificantWhitespace( Match $match, &$ret = [] ) {
122 if ( $match->getName() === 'significantWhitespace' ) {
123 $ret = array_merge( $ret, $match->getValues() );
125 foreach ( $match->getCapturedMatches() as $m ) {
126 self::collectSignificantWhitespace( $m, $ret );
132 * Mark whitespace as significant or not
133 * @param ComponentValueList $list
134 * @param Match $match
135 * @param Token[] $significantWS
138 private static function markSignificantWhitespace( $list, $match, $significantWS, $end ) {
139 for ( $i = 0; $i < $end; $i++ ) {
141 if ( $cv instanceof Token && $cv->type() === Token::T_WHITESPACE ) {
142 $significant = in_array( $cv, $significantWS, true );
143 if ( $significant !== $cv->significant() ) {
144 $list[$i] = $cv->copyWithSignificance( $significant );
145 $match->fixWhitespace( $cv, $list[$i] );
147 } elseif ( $cv instanceof CSSFunction || $cv instanceof SimpleBlock ) {
148 self::markSignificantWhitespace(
149 $cv->getValue(), $match, $significantWS, count( $cv->getValue() )
156 * Fetch the default options for this Matcher
157 * @return array See self::$defaultOptions
159 public function getDefaultOptions() {
160 return $this->defaultOptions;
164 * Set the default options for this Matcher
165 * @param array $options See self::$defaultOptions
166 * @return static $this
168 public function setDefaultOptions( array $options ) {
169 $this->defaultOptions = $options + $this->defaultOptions;
174 * Find the next ComponentValue in the input, possibly skipping whitespace
175 * @param ComponentValueList $values Input values
176 * @param int $start Current position in the input. May be -1, in which
177 * case the first position in the input should be returned.
178 * @param array $options See self::$defaultOptions
179 * @return int Next token index
181 protected function next( ComponentValueList $values, $start, array $options ) {
182 $skipWS = $options['skip-whitespace'];
185 $l = count( $values );
188 } while ( $skipWS && $i < $l &&
189 $values[$i] instanceof Token && $values[$i]->type() === Token::T_WHITESPACE
196 * @param ComponentValueList $list
198 * @param int $end First position after the match
199 * @param Match|null $submatch Submatch, for capturing. If $submatch itself
200 * named it will be kept as a capture in the returned Match, otherwise its
201 * captured matches (if any) as returned by getCapturedMatches() will be
202 * kept as captures in the returned Match.
203 * @param array $stack Stack from which to fetch more submatches for
204 * capturing (see $submatch). The stack is expected to be an array of
205 * arrays, with the first element of each subarray being a Match.
208 protected function makeMatch(
209 ComponentValueList $list, $start, $end, Match $submatch = null, array $stack = []
211 $matches = array_column( $stack, 0 );
212 $matches[] = $submatch;
216 $m = array_shift( $matches );
217 if ( !$m instanceof Match ) {
218 // skip it, probably null
219 } elseif ( $m->getName() !== null ) {
221 } elseif ( $m->getCapturedMatches() ) {
222 $matches = array_merge( $m->getCapturedMatches(), $matches );
226 return new Match( $list, $start, $end - $start, $this->captureName, $keptMatches );
230 * Match against a list of ComponentValues
232 * The job of a Matcher is to determine all the ways its particular grammar
233 * fragment can consume ComponentValues starting at a particular location
234 * in the ComponentValueList, represented by returning Match objects. For
235 * example, a matcher implementing `IDENT*` at a starting position where
236 * there are three IDENT tokens in a row would be able to match 0, 1, 2, or
237 * all 3 of those IDENT tokens, and therefore should return an iterator
238 * over that set of Match objects.
240 * Some matchers take other matchers as input, for example `IDENT*` is
241 * probably going to be implemented as a matcher for `*` that repeatedly
242 * applies a matcher for `IDENT`. The `*` matcher would call the `IDENT`
243 * matcher's generateMatches() method directly.
245 * Most Matchers implement this method as a generator so as to not build up
246 * the full set of results when it's reasonably likely the caller is going
247 * to terminate early.
249 * @param ComponentValueList $values
250 * @param int $start Starting position in $values
251 * @param array $options See self::$defaultOptions.
252 * Always use the options passed in, don't use $this->defaultOptions yourself.
253 * @return \Iterator<Match> Iterates over the set of Match objects
254 * defining all the ways this matcher can match.
256 abstract protected function generateMatches( ComponentValueList $values, $start, array $options );