3 namespace RemexHtml\Tokenizer;
6 * This is the interface for handlers receiving events from the Tokenizer.
7 * All events which consume characters give a source offset and length,
8 * allowing for input stream patching. The offset and length are relative to
9 * the preprocessed input, see Tokenizer::getPreprocessd
11 interface TokenHandler {
13 * Called once at the start of the document (STATE_START)
15 * @param Tokenizer $tokenizer The Tokenizer which generated the event
16 * @param string|null $fragmentNamespace The fragment namespace, or null
17 * to run in document mode.
18 * @param string|null $fragmentName The fragment tag name, or null to run
21 function startDocument( Tokenizer $tokenizer, $fragmentNamespace, $fragmentName );
24 * Called when the end of the input string is consumed
25 * @param integer $pos The input position (past the end)
27 function endDocument( $pos );
30 * This is called for "parse errors" (as defined by the spec). The spec
31 * does not define names for error messages, so we just use some English
32 * text for now. The imagined audience is a developer reading validator
35 * @param string $text The error message
36 * @param integer $pos The input position
38 function error( $text, $pos );
41 * A merged sequence of character tokens. We use the SAX-like convention of
42 * requiring the handler to do the substring operation, i.e. the actual
43 * text is substr( $text, $start, $length ), since this allows us to avoid
44 * some copying, at least if ignoreCharRefs and ignoreNulls are enabled.
46 * @param string $text The string which contains the emitted characters
47 * @param integer $start The start of the range within $text to use
48 * @param integer $length The length of the range within $text to use
49 * @param integer $sourceStart The input position
50 * @param integer $sourceLength The input length
52 function characters( $text, $start, $length, $sourceStart, $sourceLength );
55 * A start tag event. We call it a tag rather than an element since the
56 * start/end events are not balanced, so the relationship between tags
57 * and elements is complex. Errors emitted by attribute parsing will be
58 * not be received until $attrs is accessed by the handler.
60 * @param string $name The tag name
61 * @param Attributes $attrs The tag attributes
62 * @param bool $selfClose Whether there is a self-closing slash
63 * @param integer $sourceStart The input position
64 * @param integer $sourceLength The input length
66 function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength );
71 * @param string $name The tag name
72 * @param integer $sourceStart The input position
73 * @param integer $sourceLength The input length
75 function endTag( $name, $sourceStart, $sourceLength );
78 * A DOCTYPE declaration
80 * @param string|null $name The DOCTYPE name, or null if none was found
81 * @param string|null $public The public identifier, or null if none was found
82 * @param string|null $system The system identifier, or null if none was found
83 * @param bool $quirks What the spec calls the "force-quirks flag"
84 * @param integer $sourceStart The input position
85 * @param integer $sourceLength The input length
87 function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength );
92 * @param string $text The inner text of the comment
93 * @param integer $sourceStart The input position
94 * @param integer $sourceLength The input length
96 function comment( $text, $sourceStart, $sourceLength );