]> scripts.mit.edu Git - autoinstallsdev/mediawiki.git/blob - includes/HistoryBlob.php
MediaWiki 1.15.5
[autoinstallsdev/mediawiki.git] / includes / HistoryBlob.php
1 <?php
2
3 /**
4  * Base class for general text storage via the "object" flag in old_flags, or 
5  * two-part external storage URLs. Used for represent efficient concatenated 
6  * storage, and migration-related pointer objects.
7  */
8 interface HistoryBlob
9 {
10         /**
11          * Adds an item of text, returns a stub object which points to the item.
12          * You must call setLocation() on the stub object before storing it to the
13          * database
14          * Returns the key for getItem()
15          */
16         public function addItem( $text );
17
18         /**
19          * Get item by key, or false if the key is not present
20          */
21         public function getItem( $key );
22
23         /**
24          * Set the "default text"
25          * This concept is an odd property of the current DB schema, whereby each text item has a revision
26          * associated with it. The default text is the text of the associated revision. There may, however,
27          * be other revisions in the same object.
28          *
29          * Default text is not required for two-part external storage URLs.
30          */
31         public function setText( $text );
32
33         /**
34          * Get default text. This is called from Revision::getRevisionText()
35          */
36         function getText();
37 }
38
39 /**
40  * Concatenated gzip (CGZ) storage
41  * Improves compression ratio by concatenating like objects before gzipping
42  */
43 class ConcatenatedGzipHistoryBlob implements HistoryBlob
44 {
45         public $mVersion = 0, $mCompressed = false, $mItems = array(), $mDefaultHash = '';
46         public $mSize = 0;
47         public $mMaxSize = 10000000;
48         public $mMaxCount = 100;
49
50         /** Constructor */
51         public function ConcatenatedGzipHistoryBlob() {
52                 if ( !function_exists( 'gzdeflate' ) ) {
53                         throw new MWException( "Need zlib support to read or write this kind of history object (ConcatenatedGzipHistoryBlob)\n" );
54                 }
55         }
56
57         public function addItem( $text ) {
58                 $this->uncompress();
59                 $hash = md5( $text );
60                 if ( !isset( $this->mItems[$hash] ) ) {
61                         $this->mItems[$hash] = $text;
62                         $this->mSize += strlen( $text );
63                 }
64                 return $hash;
65         }
66
67         public function getItem( $hash ) {
68                 $this->uncompress();
69                 if ( array_key_exists( $hash, $this->mItems ) ) {
70                         return $this->mItems[$hash];
71                 } else {
72                         return false;
73                 }
74         }
75
76         public function setText( $text ) {
77                 $this->uncompress();
78                 $this->mDefaultHash = $this->addItem( $text );
79         }
80
81         public function getText() {
82                 $this->uncompress();
83                 return $this->getItem( $this->mDefaultHash );
84         }
85
86         /**
87          * Remove an item
88          */
89         public function removeItem( $hash ) {
90                 $this->mSize -= strlen( $this->mItems[$hash] );
91                 unset( $this->mItems[$hash] );
92         }
93
94         /**
95          * Compress the bulk data in the object
96          */
97         public function compress() {
98                 if ( !$this->mCompressed  ) {
99                         $this->mItems = gzdeflate( serialize( $this->mItems ) );
100                         $this->mCompressed = true;
101                 }
102         }
103
104         /**
105          * Uncompress bulk data
106          */
107         public function uncompress() {
108                 if ( $this->mCompressed ) {
109                         $this->mItems = unserialize( gzinflate( $this->mItems ) );
110                         $this->mCompressed = false;
111                 }
112         }
113
114
115         function __sleep() {
116                 $this->compress();
117                 return array( 'mVersion', 'mCompressed', 'mItems', 'mDefaultHash' );
118         }
119
120         function __wakeup() {
121                 $this->uncompress();
122         }
123
124         /**
125          * Helper function for compression jobs
126          * Returns true until the object is "full" and ready to be committed
127          */
128         public function isHappy() {
129                 return $this->mSize < $this->mMaxSize 
130                         && count( $this->mItems ) < $this->mMaxCount;
131         }
132 }
133
134
135 /**
136  * One-step cache variable to hold base blobs; operations that
137  * pull multiple revisions may often pull multiple times from
138  * the same blob. By keeping the last-used one open, we avoid
139  * redundant unserialization and decompression overhead.
140  */
141 global $wgBlobCache;
142 $wgBlobCache = array();
143
144
145 /**
146  * Pointer object for an item within a CGZ blob stored in the text table.
147  */
148 class HistoryBlobStub {
149         var $mOldId, $mHash, $mRef;
150
151         /**
152          * @param string $hash The content hash of the text
153          * @param integer $oldid The old_id for the CGZ object
154          */
155         function HistoryBlobStub( $hash = '', $oldid = 0 ) {
156                 $this->mHash = $hash;
157         }
158
159         /**
160          * Sets the location (old_id) of the main object to which this object
161          * points
162          */
163         function setLocation( $id ) {
164                 $this->mOldId = $id;
165         }
166
167         /**
168          * Sets the location (old_id) of the referring object
169          */
170         function setReferrer( $id ) {
171                 $this->mRef = $id;
172         }
173
174         /**
175          * Gets the location of the referring object
176          */
177         function getReferrer() {
178                 return $this->mRef;
179         }
180
181         function getText() {
182                 $fname = 'HistoryBlobStub::getText';
183                 global $wgBlobCache;
184                 if( isset( $wgBlobCache[$this->mOldId] ) ) {
185                         $obj = $wgBlobCache[$this->mOldId];
186                 } else {
187                         $dbr = wfGetDB( DB_SLAVE );
188                         $row = $dbr->selectRow( 'text', array( 'old_flags', 'old_text' ), array( 'old_id' => $this->mOldId ) );
189                         if( !$row ) {
190                                 return false;
191                         }
192                         $flags = explode( ',', $row->old_flags );
193                         if( in_array( 'external', $flags ) ) {
194                                 $url=$row->old_text;
195                                 @list( /* $proto */ ,$path)=explode('://',$url,2);
196                                 if ($path=="") {
197                                         wfProfileOut( $fname );
198                                         return false;
199                                 }
200                                 $row->old_text=ExternalStore::fetchFromUrl($url);
201
202                         }
203                         if( !in_array( 'object', $flags ) ) {
204                                 return false;
205                         }
206
207                         if( in_array( 'gzip', $flags ) ) {
208                                 // This shouldn't happen, but a bug in the compress script
209                                 // may at times gzip-compress a HistoryBlob object row.
210                                 $obj = unserialize( gzinflate( $row->old_text ) );
211                         } else {
212                                 $obj = unserialize( $row->old_text );
213                         }
214
215                         if( !is_object( $obj ) ) {
216                                 // Correct for old double-serialization bug.
217                                 $obj = unserialize( $obj );
218                         }
219
220                         // Save this item for reference; if pulling many
221                         // items in a row we'll likely use it again.
222                         $obj->uncompress();
223                         $wgBlobCache = array( $this->mOldId => $obj );
224                 }
225                 return $obj->getItem( $this->mHash );
226         }
227
228         /**
229          * Get the content hash
230          */
231         function getHash() {
232                 return $this->mHash;
233         }
234 }
235
236
237 /**
238  * To speed up conversion from 1.4 to 1.5 schema, text rows can refer to the
239  * leftover cur table as the backend. This avoids expensively copying hundreds
240  * of megabytes of data during the conversion downtime.
241  *
242  * Serialized HistoryBlobCurStub objects will be inserted into the text table
243  * on conversion if $wgFastSchemaUpgrades is set to true.
244  */
245 class HistoryBlobCurStub {
246         var $mCurId;
247
248         /**
249          * @param integer $curid The cur_id pointed to
250          */
251         function HistoryBlobCurStub( $curid = 0 ) {
252                 $this->mCurId = $curid;
253         }
254
255         /**
256          * Sets the location (cur_id) of the main object to which this object
257          * points
258          */
259         function setLocation( $id ) {
260                 $this->mCurId = $id;
261         }
262
263         function getText() {
264                 $dbr = wfGetDB( DB_SLAVE );
265                 $row = $dbr->selectRow( 'cur', array( 'cur_text' ), array( 'cur_id' => $this->mCurId ) );
266                 if( !$row ) {
267                         return false;
268                 }
269                 return $row->cur_text;
270         }
271 }
272
273 /**
274  * Diff-based history compression
275  * Requires xdiff 1.5+ and zlib
276  */
277 class DiffHistoryBlob implements HistoryBlob {
278         /** Uncompressed item cache */
279         var $mItems = array();
280
281         /** Total uncompressed size */
282         var $mSize = 0;
283
284         /** 
285          * Array of diffs. If a diff D from A to B is notated D = B - A, and Z is 
286          * an empty string:
287          *
288          *              { item[map[i]] - item[map[i-1]]   where i > 0
289          *    diff[i] = { 
290          *              { item[map[i]] - Z                where i = 0
291          */
292         var $mDiffs;
293
294         /** The diff map, see above */
295         var $mDiffMap;
296
297         /**
298          * The key for getText()
299          */
300         var $mDefaultKey;
301
302         /**
303          * Compressed storage
304          */
305         var $mCompressed;
306
307         /**
308          * True if the object is locked against further writes
309          */
310         var $mFrozen = false;
311
312         /**
313          * The maximum uncompressed size before the object becomes sad
314          * Should be less than max_allowed_packet
315          */
316         var $mMaxSize = 10000000;
317
318         /**
319          * The maximum number of text items before the object becomes sad
320          */
321         var $mMaxCount = 100;
322         
323         /** Constants from xdiff.h */
324         const XDL_BDOP_INS = 1;
325         const XDL_BDOP_CPY = 2;
326         const XDL_BDOP_INSB = 3;
327
328         function __construct() {
329                 if ( !function_exists( 'gzdeflate' ) ) {
330                         throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" );
331                 }
332         }
333
334         function addItem( $text ) {
335                 if ( $this->mFrozen ) {
336                         throw new MWException( __METHOD__.": Cannot add more items after sleep/wakeup" );
337                 }
338
339                 $this->mItems[] = $text;
340                 $this->mSize += strlen( $text );
341                 $this->mDiffs = null; // later
342                 return count( $this->mItems ) - 1;
343         }
344
345         function getItem( $key ) {
346                 return $this->mItems[$key];
347         }
348
349         function setText( $text ) {
350                 $this->mDefaultKey = $this->addItem( $text );
351         }
352
353         function getText() {
354                 return $this->getItem( $this->mDefaultKey );
355         }
356
357         function compress() {
358                 if ( !function_exists( 'xdiff_string_rabdiff' ) ){ 
359                         throw new MWException( "Need xdiff 1.5+ support to write DiffHistoryBlob\n" );
360                 }
361                 if ( isset( $this->mDiffs ) ) {
362                         // Already compressed
363                         return;
364                 }
365                 if ( !count( $this->mItems ) ) {
366                         // Empty
367                         return;
368                 }
369
370                 // Create two diff sequences: one for main text and one for small text
371                 $sequences = array(
372                         'small' => array(
373                                 'tail' => '',
374                                 'diffs' => array(),
375                                 'map' => array(),
376                         ),
377                         'main' => array(
378                                 'tail' => '',
379                                 'diffs' => array(),
380                                 'map' => array(),
381                         ),
382                 );
383                 $smallFactor = 0.5;
384
385                 for ( $i = 0; $i < count( $this->mItems ); $i++ ) {
386                         $text = $this->mItems[$i];
387                         if ( $i == 0 ) {
388                                 $seqName = 'main';
389                         } else {
390                                 $mainTail = $sequences['main']['tail'];
391                                 if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) {
392                                         $seqName = 'small';
393                                 } else {
394                                         $seqName = 'main';
395                                 }
396                         }
397                         $seq =& $sequences[$seqName];
398                         $tail = $seq['tail'];
399                         $diff = $this->diff( $tail, $text );
400                         $seq['diffs'][] = $diff;
401                         $seq['map'][] = $i;
402                         $seq['tail'] = $text;
403                 }
404                 unset( $seq ); // unlink dangerous alias
405
406                 // Knit the sequences together
407                 $tail = '';
408                 $this->mDiffs = array();
409                 $this->mDiffMap = array();
410                 foreach ( $sequences as $seq ) {
411                         if ( !count( $seq['diffs'] ) ) {
412                                 continue;
413                         }
414                         if ( $tail === '' ) {
415                                 $this->mDiffs[] = $seq['diffs'][0];
416                         } else {
417                                 $head = $this->patch( '', $seq['diffs'][0] );
418                                 $this->mDiffs[] = $this->diff( $tail, $head );
419                         }
420                         $this->mDiffMap[] = $seq['map'][0];
421                         for ( $i = 1; $i < count( $seq['diffs'] ); $i++ ) {
422                                 $this->mDiffs[] = $seq['diffs'][$i];
423                                 $this->mDiffMap[] = $seq['map'][$i];
424                         }
425                         $tail = $seq['tail'];
426                 }
427         }
428
429         function diff( $t1, $t2 ) {
430                 # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff
431                 # "String is not zero-terminated"
432                 wfSuppressWarnings();
433                 $diff = xdiff_string_rabdiff( $t1, $t2 ) . '';
434                 wfRestoreWarnings();
435                 return $diff;
436         }
437
438         function patch( $base, $diff ) {
439                 if ( function_exists( 'xdiff_string_bpatch' ) ) {
440                         wfSuppressWarnings();
441                         $text = xdiff_string_bpatch( $base, $diff ) . '';
442                         wfRestoreWarnings();
443                         return $text;
444                 }
445
446                 # Pure PHP implementation
447
448                 $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) );
449                 
450                 # Check the checksum if mhash is available
451                 if ( extension_loaded( 'mhash' ) ) {
452                         $ofp = mhash( MHASH_ADLER32, $base );
453                         if ( $ofp !== substr( $diff, 0, 4 ) ) {
454                                 wfDebug( __METHOD__. ": incorrect base checksum\n" );
455                                 return false;
456                         }
457                 }
458                 if ( $header['csize'] != strlen( $base ) ) {
459                         wfDebug( __METHOD__. ": incorrect base length\n" );
460                         return false;
461                 }
462                 
463                 $p = 8;
464                 $out = '';
465                 while ( $p < strlen( $diff ) ) {
466                         $x = unpack( 'Cop', substr( $diff, $p, 1 ) );
467                         $op = $x['op'];
468                         ++$p;
469                         switch ( $op ) {
470                         case self::XDL_BDOP_INS:
471                                 $x = unpack( 'Csize', substr( $diff, $p, 1 ) );
472                                 $p++;
473                                 $out .= substr( $diff, $p, $x['size'] );
474                                 $p += $x['size'];
475                                 break;
476                         case self::XDL_BDOP_INSB:
477                                 $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) );
478                                 $p += 4;
479                                 $out .= substr( $diff, $p, $x['csize'] );
480                                 $p += $x['csize'];
481                                 break;
482                         case self::XDL_BDOP_CPY:
483                                 $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) );
484                                 $p += 8;
485                                 $out .= substr( $base, $x['off'], $x['csize'] );
486                                 break;
487                         default:
488                                 wfDebug( __METHOD__.": invalid op\n" );
489                                 return false;
490                         }
491                 }
492                 return $out;
493         }
494
495         function uncompress() {
496                 if ( !$this->mDiffs ) {
497                         return;
498                 }
499                 $tail = '';
500                 for ( $diffKey = 0; $diffKey < count( $this->mDiffs ); $diffKey++ ) {
501                         $textKey = $this->mDiffMap[$diffKey];
502                         $text = $this->patch( $tail, $this->mDiffs[$diffKey] );
503                         $this->mItems[$textKey] = $text;
504                         $tail = $text;
505                 }
506         }
507
508         function __sleep() {
509                 $this->compress();
510                 if ( !count( $this->mItems ) ) {
511                         // Empty object
512                         $info = false;
513                 } else {
514                         // Take forward differences to improve the compression ratio for sequences
515                         $map = '';
516                         $prev = 0;
517                         foreach ( $this->mDiffMap as $i ) {
518                                 if ( $map !== '' ) {
519                                         $map .= ',';
520                                 }
521                                 $map .= $i - $prev;
522                                 $prev = $i;
523                         }
524                         $info = array(
525                                 'diffs' => $this->mDiffs,
526                                 'map' => $map
527                         );
528                 }
529                 if ( isset( $this->mDefaultKey ) ) {
530                         $info['default'] = $this->mDefaultKey;
531                 }
532                 $this->mCompressed = gzdeflate( serialize( $info ) );
533                 return array( 'mCompressed' );
534         }
535
536         function __wakeup() {
537                 // addItem() doesn't work if mItems is partially filled from mDiffs
538                 $this->mFrozen = true;
539                 $info = unserialize( gzinflate( $this->mCompressed ) );
540                 unset( $this->mCompressed );
541
542                 if ( !$info ) {
543                         // Empty object
544                         return;
545                 }
546
547                 if ( isset( $info['default'] ) ) {
548                         $this->mDefaultKey = $info['default'];
549                 }
550                 $this->mDiffs = $info['diffs'];
551                 if ( isset( $info['base'] ) ) {
552                         // Old format
553                         $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 );
554                         array_unshift( $this->mDiffs, 
555                                 pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) .
556                                 $info['base'] );
557                 } else {
558                         // New format
559                         $map = explode( ',', $info['map'] );
560                         $cur = 0;
561                         $this->mDiffMap = array();
562                         foreach ( $map as $i ) {
563                                 $cur += $i;
564                                 $this->mDiffMap[] = $cur;
565                         }
566                 }
567                 $this->uncompress();
568         }
569
570         /**
571          * Helper function for compression jobs
572          * Returns true until the object is "full" and ready to be committed
573          */
574         function isHappy() {
575                 return $this->mSize < $this->mMaxSize 
576                         && count( $this->mItems ) < $this->mMaxCount;
577         }
578
579 }