3 namespace Wikimedia\Purtle;
6 use InvalidArgumentException;
10 * Base class for RdfWriter implementations.
12 * Subclasses have to implement at least the writeXXX() methods to generate the desired output
13 * for the respective RDF constructs. Subclasses may override the startXXX() and finishXXX()
14 * methods to generate structural output, and override expandXXX() to transform identifiers.
17 * @author Daniel Kinzler
19 abstract class RdfWriterBase implements RdfWriter {
22 * @var array An array of strings, RdfWriters, or closures.
27 * @var RdfWriter[] sub-writers.
31 const STATE_START = 0;
32 const STATE_DOCUMENT = 5;
33 const STATE_SUBJECT = 10;
34 const STATE_PREDICATE = 11;
35 const STATE_OBJECT = 12;
36 const STATE_FINISH = 666;
39 * @var string the current state
41 private $state = self::STATE_START;
44 * Shorthands that can be used in place of IRIs, e.g. ("a" to mean rdf:type).
46 * @var string[] a map of shorthand names to [ $base, $local ] pairs.
47 * @todo Handle "a" as a special case directly. Use for custom "variables" like %currentValue
50 private $shorthands = [];
53 * @var string[] a map of prefixes to base IRIs
55 private $prefixes = [];
58 * @var array pair to store the current subject.
59 * Holds the $base and $local parameters passed to about().
61 protected $currentSubject = [ null, null ];
64 * @var array pair to store the current predicate.
65 * Holds the $base and $local parameters passed to say().
67 protected $currentPredicate = [ null, null ];
75 * Role ID for writers that will generate a full RDF document.
77 const DOCUMENT_ROLE = 'document';
78 const SUBDOCUMENT_ROLE = 'sub';
81 * Role ID for writers that will generate a single inline blank node.
83 const BNODE_ROLE = 'bnode';
86 * Role ID for writers that will generate a single inline RDR statement.
88 const STATEMENT_ROLE = 'statement';
91 * @var string The writer's role, see the XXX_ROLE constants.
96 * Are prefixed locked against modification?
99 private $prefixesLocked = false;
102 * @param string $role The writer's role, use the XXX_ROLE constants.
103 * @param BNodeLabeler|null $labeler
105 * @throws InvalidArgumentException
107 public function __construct( $role, BNodeLabeler $labeler = null ) {
108 if ( !is_string( $role ) ) {
109 throw new InvalidArgumentException( '$role must be a string' );
113 $this->labeler = $labeler ?: new BNodeLabeler();
115 $this->registerShorthand( 'a', 'rdf', 'type' );
117 $this->prefix( 'rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' );
118 $this->prefix( 'xsd', 'http://www.w3.org/2001/XMLSchema#' );
122 * @param string $role
123 * @param BNodeLabeler $labeler
125 * @return RdfWriterBase
127 abstract protected function newSubWriter( $role, BNodeLabeler $labeler );
130 * Registers a shorthand that can be used instead of a qname,
131 * like 'a' can be used instead of 'rdf:type'.
133 * @param string $shorthand
134 * @param string $prefix
135 * @param string $local
137 protected function registerShorthand( $shorthand, $prefix, $local ) {
138 $this->shorthands[$shorthand] = [ $prefix, $local ];
144 * @param string $prefix
145 * @param string $iri The base IRI
147 * @throws LogicException
149 public function prefix( $prefix, $iri ) {
150 if ( $this->prefixesLocked ) {
151 throw new LogicException( 'Prefixes can not be added after start()' );
154 $this->prefixes[$prefix] = $iri;
158 * Determines whether $shorthand can be used as a shorthand.
160 * @param string $shorthand
164 protected function isShorthand( $shorthand ) {
165 return isset( $this->shorthands[$shorthand] );
169 * Determines whether $shorthand can legally be used as a prefix.
171 * @param string $prefix
175 protected function isPrefix( $prefix ) {
176 return isset( $this->prefixes[$prefix] );
180 * Returns the prefix map.
182 * @return string[] An associative array mapping prefixes to base IRIs.
184 public function getPrefixes() {
185 return $this->prefixes;
189 * @param string|null $languageCode
193 protected function isValidLanguageCode( $languageCode ) {
194 // preg_match is somewhat (12%) slower than strspn but more readable
195 return $languageCode !== null && preg_match( '/^[\da-z-]{2,}$/i', $languageCode );
201 final public function sub() {
202 $writer = $this->newSubWriter( self::SUBDOCUMENT_ROLE, $this->labeler );
203 $writer->state = self::STATE_DOCUMENT;
205 // share registered prefixes
206 $writer->prefixes =& $this->prefixes;
208 $this->subs[] = $writer;
213 * Returns the writers role. The role determines the behavior of the writer with respect
214 * to which states and transitions are possible: a BNODE_ROLE writer would for instance
215 * not accept a call to about(), since it can only process triples about a single subject
216 * (the blank node it represents).
218 * @return string A string corresponding to one of the the XXX_ROLE constants.
220 final public function getRole() {
225 * Appends string to the output buffer.
228 final protected function write( $w ) {
229 $this->buffer[] = $w;
233 * If $base is a shorthand, $base and $local are updated to hold whatever qname
234 * the shorthand was associated with.
236 * Otherwise, $base and $local remain unchanged.
238 * @param string &$base
239 * @param string|null &$local
241 protected function expandShorthand( &$base, &$local ) {
242 if ( $local === null && isset( $this->shorthands[$base] ) ) {
243 list( $base, $local ) = $this->shorthands[$base];
248 * If $base is a registered prefix, $base will be replaced by the base IRI associated with
249 * that prefix, with $local appended. $local will be set to null.
251 * Otherwise, $base and $local remain unchanged.
253 * @param string &$base
254 * @param string|null &$local
256 * @throws LogicException
258 protected function expandQName( &$base, &$local ) {
259 if ( $local !== null && $base !== '_' ) {
260 if ( isset( $this->prefixes[$base] ) ) {
261 $base = $this->prefixes[$base] . $local; //XXX: can we avoid this concat?
264 throw new LogicException( 'Unknown prefix: ' . $base );
270 * @see RdfWriter::blank()
272 * @param string|null $label node label, will be generated if not given.
276 final public function blank( $label = null ) {
277 return $this->labeler->getLabel( $label );
281 * @see RdfWriter::start()
283 final public function start() {
284 $this->state( self::STATE_DOCUMENT );
285 $this->prefixesLocked = true;
289 * @see RdfWriter::finish()
291 final public function finish() {
292 // close all unclosed states
293 $this->state( self::STATE_DOCUMENT );
295 // ...then insert output of sub-writers into the buffer,
296 // so it gets placed before the footer...
300 $this->state( self::STATE_FINISH );
302 // Detaches all subs.
307 * @see RdfWriter::drain()
311 final public function drain() {
312 // we can drain after finish, but finish state is sticky
313 if ( $this->state !== self::STATE_FINISH ) {
314 $this->state( self::STATE_DOCUMENT );
318 $this->flattenBuffer();
320 $rdf = join( '', $this->buffer );
327 * Calls drain() an any RdfWriter instances in $this->buffer, and replaces them
328 * in $this->buffer with the string returned by the drain() call. Any closures
329 * present in the $this->buffer will be called, and replaced by their return value.
331 private function flattenBuffer() {
332 foreach ( $this->buffer as &$b ) {
333 if ( $b instanceof Closure ) {
336 if ( $b instanceof RdfWriter ) {
343 * Drains all subwriters, and appends their output to this writer's buffer.
344 * Subwriters remain usable.
346 private function drainSubs() {
347 foreach ( $this->subs as $sub ) {
348 $rdf = $sub->drain();
349 $this->write( $rdf );
354 * @see RdfWriter::about()
356 * @param string $base A QName prefix if $local is given, or an IRI if $local is null.
357 * @param string|null $local A QName suffix, or null if $base is an IRI.
359 * @return RdfWriter $this
361 final public function about( $base, $local = null ) {
362 $this->expandSubject( $base, $local );
364 if ( $this->state === self::STATE_OBJECT
365 && $base === $this->currentSubject[0]
366 && $local === $this->currentSubject[1]
368 return $this; // redundant about() call
371 $this->state( self::STATE_SUBJECT );
373 $this->currentSubject[0] = $base;
374 $this->currentSubject[1] = $local;
375 $this->currentPredicate[0] = null;
376 $this->currentPredicate[1] = null;
378 $this->writeSubject( $base, $local );
383 * @see RdfWriter::a()
384 * Shorthand for say( 'a' )->is( $type ).
386 * @param string $typeBase The data type's QName prefix if $typeLocal is given,
387 * or an IRI or shorthand if $typeLocal is null.
388 * @param string|null $typeLocal The data type's QName suffix,
389 * or null if $typeBase is an IRI or shorthand.
391 * @return RdfWriter $this
393 final public function a( $typeBase, $typeLocal = null ) {
394 return $this->say( 'a' )->is( $typeBase, $typeLocal );
398 * @see RdfWriter::say()
400 * @param string $base A QName prefix.
401 * @param string|null $local A QName suffix.
403 * @return RdfWriter $this
405 final public function say( $base, $local = null ) {
406 $this->expandPredicate( $base, $local );
408 if ( $this->state === self::STATE_OBJECT
409 && $base === $this->currentPredicate[0]
410 && $local === $this->currentPredicate[1]
412 return $this; // redundant about() call
415 $this->state( self::STATE_PREDICATE );
417 $this->currentPredicate[0] = $base;
418 $this->currentPredicate[1] = $local;
420 $this->writePredicate( $base, $local );
425 * @see RdfWriter::is()
427 * @param string $base A QName prefix if $local is given, or an IRI if $local is null.
428 * @param string|null $local A QName suffix, or null if $base is an IRI.
430 * @return RdfWriter $this
432 final public function is( $base, $local = null ) {
433 $this->state( self::STATE_OBJECT );
435 $this->expandResource( $base, $local );
436 $this->writeResource( $base, $local );
441 * @see RdfWriter::text()
443 * @param string $text the text to be placed in the output
444 * @param string|null $language the language the text is in
448 final public function text( $text, $language = null ) {
449 $this->state( self::STATE_OBJECT );
451 $this->writeText( $text, $language );
456 * @see RdfWriter::value()
458 * @param string $value the value encoded as a string
459 * @param string|null $typeBase The data type's QName prefix if $typeLocal is given,
460 * or an IRI or shorthand if $typeLocal is null.
461 * @param string|null $typeLocal The data type's QName suffix,
462 * or null if $typeBase is an IRI or shorthand.
466 final public function value( $value, $typeBase = null, $typeLocal = null ) {
467 $this->state( self::STATE_OBJECT );
469 if ( $typeBase === null && !is_string( $value ) ) {
470 $vtype = gettype( $value );
474 $typeLocal = 'integer';
480 $typeLocal = 'double';
486 $typeLocal = 'boolean';
487 $value = $value ? 'true' : 'false';
492 $this->expandType( $typeBase, $typeLocal );
494 $this->writeValue( $value, $typeBase, $typeLocal );
499 * State transition table
500 * First state is "from", second is "to"
503 protected $transitionTable = [
504 self::STATE_START => [
505 self::STATE_DOCUMENT => true,
507 self::STATE_DOCUMENT => [
508 self::STATE_DOCUMENT => true,
509 self::STATE_SUBJECT => true,
510 self::STATE_FINISH => true,
512 self::STATE_SUBJECT => [
513 self::STATE_PREDICATE => true,
515 self::STATE_PREDICATE => [
516 self::STATE_OBJECT => true,
518 self::STATE_OBJECT => [
519 self::STATE_DOCUMENT => true,
520 self::STATE_SUBJECT => true,
521 self::STATE_PREDICATE => true,
522 self::STATE_OBJECT => true,
527 * Perform a state transition. Writer states roughly correspond to states in a naive
528 * regular parser for the respective syntax. State transitions may generate output,
529 * particularly of structural elements which correspond to terminals in a respective
532 * @param int $newState one of the self::STATE_... constants
534 * @throws LogicException
536 final protected function state( $newState ) {
537 if ( !isset( $this->transitionTable[$this->state][$newState] ) ) {
538 throw new LogicException( 'Bad transition: ' . $this->state . ' -> ' . $newState );
541 $action = $this->transitionTable[$this->state][$newState];
542 if ( $action !== true ) {
543 if ( is_string( $action ) ) {
544 $this->write( $action );
550 $this->state = $newState;
554 * Must be implemented to generate output that starts a statement (or set of statements)
555 * about a subject. Depending on the requirements of the output format, the implementation
558 * @note: $base and $local are given as passed to about() and processed by expandSubject().
560 * @param string $base
561 * @param string|null $local
563 abstract protected function writeSubject( $base, $local = null );
566 * Must be implemented to generate output that represents the association of a predicate
567 * with a subject that was previously defined by a call to writeSubject().
569 * @note: $base and $local are given as passed to say() and processed by expandPredicate().
571 * @param string $base
572 * @param string|null $local
574 abstract protected function writePredicate( $base, $local = null );
577 * Must be implemented to generate output that represents a resource used as the object
580 * @note: $base and $local are given as passed to is() and processed by expandObject().
582 * @param string $base
583 * @param string|null $local
585 abstract protected function writeResource( $base, $local = null );
588 * Must be implemented to generate output that represents a text used as the object
591 * @param string $text the text to be placed in the output
592 * @param string|null $language the language the text is in
594 abstract protected function writeText( $text, $language );
597 * Must be implemented to generate output that represents a (typed) literal used as the object
600 * @note: $typeBase and $typeLocal are given as passed to value() and processed by expandType().
602 * @param string $value the value encoded as a string
603 * @param string $typeBase
604 * @param string|null $typeLocal
606 abstract protected function writeValue( $value, $typeBase, $typeLocal = null );
609 * Perform any expansion (shorthand to qname, qname to IRI) desired
610 * for subject identifiers.
612 * @param string &$base
613 * @param string|null &$local
615 protected function expandSubject( &$base, &$local ) {
619 * Perform any expansion (shorthand to qname, qname to IRI) desired
620 * for predicate identifiers.
622 * @param string &$base
623 * @param string|null &$local
625 protected function expandPredicate( &$base, &$local ) {
629 * Perform any expansion (shorthand to qname, qname to IRI) desired
630 * for resource identifiers.
632 * @param string &$base
633 * @param string|null &$local
635 protected function expandResource( &$base, &$local ) {
639 * Perform any expansion (shorthand to qname, qname to IRI) desired
640 * for type identifiers.
642 * @param string &$base
643 * @param string|null &$local
645 protected function expandType( &$base, &$local ) {