]> scripts.mit.edu Git - autoinstalls/mediawiki.git/blob - includes/CommentStore.php
MediaWiki 1.30.2
[autoinstalls/mediawiki.git] / includes / CommentStore.php
1 <?php
2 /**
3  * Manage storage of comments in the database
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with this program; if not, write to the Free Software Foundation, Inc.,
17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18  * http://www.gnu.org/copyleft/gpl.html
19  *
20  * @file
21  */
22
23 use Wikimedia\Rdbms\IDatabase;
24
25 /**
26  * CommentStore handles storage of comments (edit summaries, log reasons, etc)
27  * in the database.
28  * @since 1.30
29  */
30 class CommentStore {
31
32         /**
33          * Maximum length of a comment in UTF-8 characters. Longer comments will be truncated.
34          * @note This must be at least 255 and not greater than floor( MAX_COMMENT_LENGTH / 4 ).
35          */
36         const COMMENT_CHARACTER_LIMIT = 1000;
37
38         /**
39          * Maximum length of a comment in bytes. Longer comments will be truncated.
40          * @note This value is determined by the size of the underlying database field,
41          *  currently BLOB in MySQL/MariaDB.
42          */
43         const MAX_COMMENT_LENGTH = 65535;
44
45         /**
46          * Maximum length of serialized data in bytes. Longer data will result in an exception.
47          * @note This value is determined by the size of the underlying database field,
48          *  currently BLOB in MySQL/MariaDB.
49          */
50         const MAX_DATA_LENGTH = 65535;
51
52         /**
53          * Define fields that use temporary tables for transitional purposes
54          * @var array Keys are '$key', values are arrays with four fields:
55          *  - table: Temporary table name
56          *  - pk: Temporary table column referring to the main table's primary key
57          *  - field: Temporary table column referring comment.comment_id
58          *  - joinPK: Main table's primary key
59          */
60         protected static $tempTables = [
61                 'rev_comment' => [
62                         'table' => 'revision_comment_temp',
63                         'pk' => 'revcomment_rev',
64                         'field' => 'revcomment_comment_id',
65                         'joinPK' => 'rev_id',
66                 ],
67                 'img_description' => [
68                         'table' => 'image_comment_temp',
69                         'pk' => 'imgcomment_name',
70                         'field' => 'imgcomment_description_id',
71                         'joinPK' => 'img_name',
72                 ],
73         ];
74
75         /**
76          * Fields that formerly used $tempTables
77          * @var array Key is '$key', value is the MediaWiki version in which it was
78          *  removed from $tempTables.
79          */
80         protected static $formerTempTables = [];
81
82         /** @var string */
83         protected $key;
84
85         /** @var int One of the MIGRATION_* constants */
86         protected $stage;
87
88         /** @var array|null Cache for `self::getJoin()` */
89         protected $joinCache = null;
90
91         /** @var Language Language to use for comment truncation */
92         protected $lang;
93
94         /**
95          * @param string $key A key such as "rev_comment" identifying the comment
96          *  field being fetched.
97          * @param Language $lang Language to use for comment truncation. Defaults
98          *  to $wgContLang.
99          */
100         public function __construct( $key, Language $lang = null ) {
101                 global $wgCommentTableSchemaMigrationStage, $wgContLang;
102
103                 $this->key = $key;
104                 $this->stage = $wgCommentTableSchemaMigrationStage;
105                 $this->lang = $lang ?: $wgContLang;
106         }
107
108         /**
109          * Static constructor for easier chaining
110          * @param string $key A key such as "rev_comment" identifying the comment
111          *  field being fetched.
112          * @return CommentStore
113          */
114         public static function newKey( $key ) {
115                 return new CommentStore( $key );
116         }
117
118         /**
119          * Get SELECT fields for the comment key
120          *
121          * Each resulting row should be passed to `self::getCommentLegacy()` to get the
122          * actual comment.
123          *
124          * @note Use of this method may require a subsequent database query to
125          *  actually fetch the comment. If possible, use `self::getJoin()` instead.
126          * @return string[] to include in the `$vars` to `IDatabase->select()`. All
127          *  fields are aliased, so `+` is safe to use.
128          */
129         public function getFields() {
130                 $fields = [];
131                 if ( $this->stage === MIGRATION_OLD ) {
132                         $fields["{$this->key}_text"] = $this->key;
133                         $fields["{$this->key}_data"] = 'NULL';
134                         $fields["{$this->key}_cid"] = 'NULL';
135                 } else {
136                         if ( $this->stage < MIGRATION_NEW ) {
137                                 $fields["{$this->key}_old"] = $this->key;
138                         }
139                         if ( isset( self::$tempTables[$this->key] ) ) {
140                                 $fields["{$this->key}_pk"] = self::$tempTables[$this->key]['joinPK'];
141                         } else {
142                                 $fields["{$this->key}_id"] = "{$this->key}_id";
143                         }
144                 }
145                 return $fields;
146         }
147
148         /**
149          * Get SELECT fields and joins for the comment key
150          *
151          * Each resulting row should be passed to `self::getComment()` to get the
152          * actual comment.
153          *
154          * @return array With three keys:
155          *   - tables: (string[]) to include in the `$table` to `IDatabase->select()`
156          *   - fields: (string[]) to include in the `$vars` to `IDatabase->select()`
157          *   - joins: (array) to include in the `$join_conds` to `IDatabase->select()`
158          *  All tables, fields, and joins are aliased, so `+` is safe to use.
159          */
160         public function getJoin() {
161                 if ( $this->joinCache === null ) {
162                         $tables = [];
163                         $fields = [];
164                         $joins = [];
165
166                         if ( $this->stage === MIGRATION_OLD ) {
167                                 $fields["{$this->key}_text"] = $this->key;
168                                 $fields["{$this->key}_data"] = 'NULL';
169                                 $fields["{$this->key}_cid"] = 'NULL';
170                         } else {
171                                 $join = $this->stage === MIGRATION_NEW ? 'JOIN' : 'LEFT JOIN';
172
173                                 if ( isset( self::$tempTables[$this->key] ) ) {
174                                         $t = self::$tempTables[$this->key];
175                                         $alias = "temp_$this->key";
176                                         $tables[$alias] = $t['table'];
177                                         $joins[$alias] = [ $join, "{$alias}.{$t['pk']} = {$t['joinPK']}" ];
178                                         $joinField = "{$alias}.{$t['field']}";
179                                 } else {
180                                         $joinField = "{$this->key}_id";
181                                 }
182
183                                 $alias = "comment_$this->key";
184                                 $tables[$alias] = 'comment';
185                                 $joins[$alias] = [ $join, "{$alias}.comment_id = {$joinField}" ];
186
187                                 if ( $this->stage === MIGRATION_NEW ) {
188                                         $fields["{$this->key}_text"] = "{$alias}.comment_text";
189                                 } else {
190                                         $fields["{$this->key}_text"] = "COALESCE( {$alias}.comment_text, $this->key )";
191                                 }
192                                 $fields["{$this->key}_data"] = "{$alias}.comment_data";
193                                 $fields["{$this->key}_cid"] = "{$alias}.comment_id";
194                         }
195
196                         $this->joinCache = [
197                                 'tables' => $tables,
198                                 'fields' => $fields,
199                                 'joins' => $joins,
200                         ];
201                 }
202
203                 return $this->joinCache;
204         }
205
206         /**
207          * Extract the comment from a row
208          *
209          * Shared implementation for getComment() and getCommentLegacy()
210          *
211          * @param IDatabase|null $db Database handle for getCommentLegacy(), or null for getComment()
212          * @param object|array $row
213          * @param bool $fallback
214          * @return CommentStoreComment
215          */
216         private function getCommentInternal( IDatabase $db = null, $row, $fallback = false ) {
217                 $key = $this->key;
218                 $row = (array)$row;
219                 if ( array_key_exists( "{$key}_text", $row ) && array_key_exists( "{$key}_data", $row ) ) {
220                         $cid = isset( $row["{$key}_cid"] ) ? $row["{$key}_cid"] : null;
221                         $text = $row["{$key}_text"];
222                         $data = $row["{$key}_data"];
223                 } elseif ( $this->stage === MIGRATION_OLD ) {
224                         $cid = null;
225                         if ( $fallback && isset( $row[$key] ) ) {
226                                 wfLogWarning( "Using deprecated fallback handling for comment $key" );
227                                 $text = $row[$key];
228                         } else {
229                                 wfLogWarning( "Missing {$key}_text and {$key}_data fields in row with MIGRATION_OLD" );
230                                 $text = '';
231                         }
232                         $data = null;
233                 } else {
234                         if ( isset( self::$tempTables[$key] ) ) {
235                                 if ( array_key_exists( "{$key}_pk", $row ) ) {
236                                         if ( !$db ) {
237                                                 throw new InvalidArgumentException(
238                                                         "\$row does not contain fields needed for comment $key and getComment(), but "
239                                                         . "does have fields for getCommentLegacy()"
240                                                 );
241                                         }
242                                         $t = self::$tempTables[$key];
243                                         $id = $row["{$key}_pk"];
244                                         $row2 = $db->selectRow(
245                                                 [ $t['table'], 'comment' ],
246                                                 [ 'comment_id', 'comment_text', 'comment_data' ],
247                                                 [ $t['pk'] => $id ],
248                                                 __METHOD__,
249                                                 [],
250                                                 [ 'comment' => [ 'JOIN', [ "comment_id = {$t['field']}" ] ] ]
251                                         );
252                                 } elseif ( $fallback && isset( $row[$key] ) ) {
253                                         wfLogWarning( "Using deprecated fallback handling for comment $key" );
254                                         $row2 = (object)[ 'comment_text' => $row[$key], 'comment_data' => null ];
255                                 } else {
256                                         throw new InvalidArgumentException( "\$row does not contain fields needed for comment $key" );
257                                 }
258                         } else {
259                                 if ( array_key_exists( "{$key}_id", $row ) ) {
260                                         if ( !$db ) {
261                                                 throw new InvalidArgumentException(
262                                                         "\$row does not contain fields needed for comment $key and getComment(), but "
263                                                         . "does have fields for getCommentLegacy()"
264                                                 );
265                                         }
266                                         $id = $row["{$key}_id"];
267                                         $row2 = $db->selectRow(
268                                                 'comment',
269                                                 [ 'comment_id', 'comment_text', 'comment_data' ],
270                                                 [ 'comment_id' => $id ],
271                                                 __METHOD__
272                                         );
273                                 } elseif ( $fallback && isset( $row[$key] ) ) {
274                                         wfLogWarning( "Using deprecated fallback handling for comment $key" );
275                                         $row2 = (object)[ 'comment_text' => $row[$key], 'comment_data' => null ];
276                                 } else {
277                                         throw new InvalidArgumentException( "\$row does not contain fields needed for comment $key" );
278                                 }
279                         }
280
281                         if ( $row2 ) {
282                                 $cid = $row2->comment_id;
283                                 $text = $row2->comment_text;
284                                 $data = $row2->comment_data;
285                         } elseif ( $this->stage < MIGRATION_NEW && array_key_exists( "{$key}_old", $row ) ) {
286                                 $cid = null;
287                                 $text = $row["{$key}_old"];
288                                 $data = null;
289                         } else {
290                                 // @codeCoverageIgnoreStart
291                                 wfLogWarning( "Missing comment row for $key, id=$id" );
292                                 $cid = null;
293                                 $text = '';
294                                 $data = null;
295                                 // @codeCoverageIgnoreEnd
296                         }
297                 }
298
299                 $msg = null;
300                 if ( $data !== null ) {
301                         $data = FormatJson::decode( $data );
302                         if ( !is_object( $data ) ) {
303                                 // @codeCoverageIgnoreStart
304                                 wfLogWarning( "Invalid JSON object in comment: $data" );
305                                 $data = null;
306                                 // @codeCoverageIgnoreEnd
307                         } else {
308                                 $data = (array)$data;
309                                 if ( isset( $data['_message'] ) ) {
310                                         $msg = self::decodeMessage( $data['_message'] )
311                                                 ->setInterfaceMessageFlag( true );
312                                 }
313                                 if ( !empty( $data['_null'] ) ) {
314                                         $data = null;
315                                 } else {
316                                         foreach ( $data as $k => $v ) {
317                                                 if ( substr( $k, 0, 1 ) === '_' ) {
318                                                         unset( $data[$k] );
319                                                 }
320                                         }
321                                 }
322                         }
323                 }
324
325                 return new CommentStoreComment( $cid, $text, $msg, $data );
326         }
327
328         /**
329          * Extract the comment from a row
330          *
331          * Use `self::getJoin()` to ensure the row contains the needed data.
332          *
333          * If you need to fake a comment in a row for some reason, set fields
334          * `{$key}_text` (string) and `{$key}_data` (JSON string or null).
335          *
336          * @param object|array $row Result row.
337          * @param bool $fallback If true, fall back as well as possible instead of throwing an exception.
338          * @return CommentStoreComment
339          */
340         public function getComment( $row, $fallback = false ) {
341                 return $this->getCommentInternal( null, $row, $fallback );
342         }
343
344         /**
345          * Extract the comment from a row, with legacy lookups.
346          *
347          * If `$row` might have been generated using `self::getFields()` rather
348          * than `self::getJoin()`, use this. Prefer `self::getComment()` if you
349          * know callers used `self::getJoin()` for the row fetch.
350          *
351          * If you need to fake a comment in a row for some reason, set fields
352          * `{$key}_text` (string) and `{$key}_data` (JSON string or null).
353          *
354          * @param IDatabase $db Database handle to use for lookup
355          * @param object|array $row Result row.
356          * @param bool $fallback If true, fall back as well as possible instead of throwing an exception.
357          * @return CommentStoreComment
358          */
359         public function getCommentLegacy( IDatabase $db, $row, $fallback = false ) {
360                 return $this->getCommentInternal( $db, $row, $fallback );
361         }
362
363         /**
364          * Create a new CommentStoreComment, inserting it into the database if necessary
365          *
366          * If a comment is going to be passed to `self::insert()` or the like
367          * multiple times, it will be more efficient to pass a CommentStoreComment
368          * once rather than making `self::insert()` do it every time through.
369          *
370          * @note When passing a CommentStoreComment, this may set `$comment->id` if
371          *  it's not already set. If `$comment->id` is already set, it will not be
372          *  verified that the specified comment actually exists or that it
373          *  corresponds to the comment text, message, and/or data in the
374          *  CommentStoreComment.
375          * @param IDatabase $dbw Database handle to insert on. Unused if `$comment`
376          *  is a CommentStoreComment and `$comment->id` is set.
377          * @param string|Message|CommentStoreComment $comment Comment text or Message object, or
378          *  a CommentStoreComment.
379          * @param array|null $data Structured data to store. Keys beginning with '_' are reserved.
380          *  Ignored if $comment is a CommentStoreComment.
381          * @return CommentStoreComment
382          */
383         public function createComment( IDatabase $dbw, $comment, array $data = null ) {
384                 $comment = CommentStoreComment::newUnsavedComment( $comment, $data );
385
386                 # Truncate comment in a Unicode-sensitive manner
387                 $comment->text = $this->lang->truncate( $comment->text, self::MAX_COMMENT_LENGTH );
388                 if ( mb_strlen( $comment->text, 'UTF-8' ) > self::COMMENT_CHARACTER_LIMIT ) {
389                         $ellipsis = wfMessage( 'ellipsis' )->inLanguage( $this->lang )->escaped();
390                         if ( mb_strlen( $ellipsis ) >= self::COMMENT_CHARACTER_LIMIT ) {
391                                 // WTF?
392                                 $ellipsis = '...';
393                         }
394                         $maxLength = self::COMMENT_CHARACTER_LIMIT - mb_strlen( $ellipsis, 'UTF-8' );
395                         $comment->text = mb_substr( $comment->text, 0, $maxLength, 'UTF-8' ) . $ellipsis;
396                 }
397
398                 if ( $this->stage > MIGRATION_OLD && !$comment->id ) {
399                         $dbData = $comment->data;
400                         if ( !$comment->message instanceof RawMessage ) {
401                                 if ( $dbData === null ) {
402                                         $dbData = [ '_null' => true ];
403                                 }
404                                 $dbData['_message'] = self::encodeMessage( $comment->message );
405                         }
406                         if ( $dbData !== null ) {
407                                 $dbData = FormatJson::encode( (object)$dbData, false, FormatJson::ALL_OK );
408                                 $len = strlen( $dbData );
409                                 if ( $len > self::MAX_DATA_LENGTH ) {
410                                         $max = self::MAX_DATA_LENGTH;
411                                         throw new OverflowException( "Comment data is too long ($len bytes, maximum is $max)" );
412                                 }
413                         }
414
415                         $hash = self::hash( $comment->text, $dbData );
416                         $comment->id = $dbw->selectField(
417                                 'comment',
418                                 'comment_id',
419                                 [
420                                         'comment_hash' => $hash,
421                                         'comment_text' => $comment->text,
422                                         'comment_data' => $dbData,
423                                 ],
424                                 __METHOD__
425                         );
426                         if ( !$comment->id ) {
427                                 $dbw->insert(
428                                         'comment',
429                                         [
430                                                 'comment_hash' => $hash,
431                                                 'comment_text' => $comment->text,
432                                                 'comment_data' => $dbData,
433                                         ],
434                                         __METHOD__
435                                 );
436                                 $comment->id = $dbw->insertId();
437                         }
438                 }
439
440                 return $comment;
441         }
442
443         /**
444          * Implementation for `self::insert()` and `self::insertWithTempTable()`
445          * @param IDatabase $dbw
446          * @param string|Message|CommentStoreComment $comment
447          * @param array|null $data
448          * @return array [ array $fields, callable $callback ]
449          */
450         private function insertInternal( IDatabase $dbw, $comment, $data ) {
451                 $fields = [];
452                 $callback = null;
453
454                 $comment = $this->createComment( $dbw, $comment, $data );
455
456                 if ( $this->stage <= MIGRATION_WRITE_BOTH ) {
457                         $fields[$this->key] = $this->lang->truncate( $comment->text, 255 );
458                 }
459
460                 if ( $this->stage >= MIGRATION_WRITE_BOTH ) {
461                         if ( isset( self::$tempTables[$this->key] ) ) {
462                                 $t = self::$tempTables[$this->key];
463                                 $func = __METHOD__;
464                                 $commentId = $comment->id;
465                                 $callback = function ( $id ) use ( $dbw, $commentId, $t, $func ) {
466                                         $dbw->insert(
467                                                 $t['table'],
468                                                 [
469                                                         $t['pk'] => $id,
470                                                         $t['field'] => $commentId,
471                                                 ],
472                                                 $func
473                                         );
474                                 };
475                         } else {
476                                 $fields["{$this->key}_id"] = $comment->id;
477                         }
478                 }
479
480                 return [ $fields, $callback ];
481         }
482
483         /**
484          * Insert a comment in preparation for a row that references it
485          *
486          * @note It's recommended to include both the call to this method and the
487          *  row insert in the same transaction.
488          * @param IDatabase $dbw Database handle to insert on
489          * @param string|Message|CommentStoreComment $comment As for `self::createComment()`
490          * @param array|null $data As for `self::createComment()`
491          * @return array Fields for the insert or update
492          */
493         public function insert( IDatabase $dbw, $comment, $data = null ) {
494                 if ( isset( self::$tempTables[$this->key] ) ) {
495                         throw new InvalidArgumentException( "Must use insertWithTempTable() for $this->key" );
496                 }
497
498                 list( $fields ) = $this->insertInternal( $dbw, $comment, $data );
499                 return $fields;
500         }
501
502         /**
503          * Insert a comment in a temporary table in preparation for a row that references it
504          *
505          * This is currently needed for "rev_comment" and "img_description". In the
506          * future that requirement will be removed.
507          *
508          * @note It's recommended to include both the call to this method and the
509          *  row insert in the same transaction.
510          * @param IDatabase $dbw Database handle to insert on
511          * @param string|Message|CommentStoreComment $comment As for `self::createComment()`
512          * @param array|null $data As for `self::createComment()`
513          * @return array Two values:
514          *  - array Fields for the insert or update
515          *  - callable Function to call when the primary key of the row being
516          *    inserted/updated is known. Pass it that primary key.
517          */
518         public function insertWithTempTable( IDatabase $dbw, $comment, $data = null ) {
519                 if ( isset( self::$formerTempTables[$this->key] ) ) {
520                         wfDeprecated( __METHOD__ . " for $this->key", self::$formerTempTables[$this->key] );
521                 } elseif ( !isset( self::$tempTables[$this->key] ) ) {
522                         throw new InvalidArgumentException( "Must use insert() for $this->key" );
523                 }
524
525                 list( $fields, $callback ) = $this->insertInternal( $dbw, $comment, $data );
526                 if ( !$callback ) {
527                         $callback = function () {
528                                 // Do nothing.
529                         };
530                 }
531                 return [ $fields, $callback ];
532         }
533
534         /**
535          * Encode a Message as a PHP data structure
536          * @param Message $msg
537          * @return array
538          */
539         protected static function encodeMessage( Message $msg ) {
540                 $key = count( $msg->getKeysToTry() ) > 1 ? $msg->getKeysToTry() : $msg->getKey();
541                 $params = $msg->getParams();
542                 foreach ( $params as &$param ) {
543                         if ( $param instanceof Message ) {
544                                 $param = [
545                                         'message' => self::encodeMessage( $param )
546                                 ];
547                         }
548                 }
549                 array_unshift( $params, $key );
550                 return $params;
551         }
552
553         /**
554          * Decode a message that was encoded by self::encodeMessage()
555          * @param array $data
556          * @return Message
557          */
558         protected static function decodeMessage( $data ) {
559                 $key = array_shift( $data );
560                 foreach ( $data as &$param ) {
561                         if ( is_object( $param ) ) {
562                                 $param = (array)$param;
563                         }
564                         if ( is_array( $param ) && count( $param ) === 1 && isset( $param['message'] ) ) {
565                                 $param = self::decodeMessage( $param['message'] );
566                         }
567                 }
568                 return new Message( $key, $data );
569         }
570
571         /**
572          * Hashing function for comment storage
573          * @param string $text Comment text
574          * @param string|null $data Comment data
575          * @return int 32-bit signed integer
576          */
577         public static function hash( $text, $data ) {
578                 $hash = crc32( $text ) ^ crc32( (string)$data );
579
580                 // 64-bit PHP returns an unsigned CRC, change it to signed for
581                 // insertion into the database.
582                 if ( $hash >= 0x80000000 ) {
583                         $hash |= -1 << 32;
584                 }
585
586                 return $hash;
587         }
588
589 }