]> scripts.mit.edu Git - autoinstalls/mediawiki.git/blob - includes/diff/WordLevelDiff.php
MediaWiki 1.30.2
[autoinstalls/mediawiki.git] / includes / diff / WordLevelDiff.php
1 <?php
2 /**
3  * Copyright © 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
4  * You may copy this code freely under the conditions of the GPL.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with this program; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19  * http://www.gnu.org/copyleft/gpl.html
20  *
21  * @file
22  * @ingroup DifferenceEngine
23  * @defgroup DifferenceEngine DifferenceEngine
24  */
25
26 use MediaWiki\Diff\ComplexityException;
27 use MediaWiki\Diff\WordAccumulator;
28
29 /**
30  * Performs a word-level diff on several lines
31  *
32  * @ingroup DifferenceEngine
33  */
34 class WordLevelDiff extends \Diff {
35         /**
36          * @inheritDoc
37          */
38         protected $bailoutComplexity = 40000000; // Roughly 6K x 6K words changed
39
40         /**
41          * @param string[] $linesBefore
42          * @param string[] $linesAfter
43          */
44         public function __construct( $linesBefore, $linesAfter ) {
45                 list( $wordsBefore, $wordsBeforeStripped ) = $this->split( $linesBefore );
46                 list( $wordsAfter, $wordsAfterStripped ) = $this->split( $linesAfter );
47
48                 try {
49                         parent::__construct( $wordsBeforeStripped, $wordsAfterStripped );
50                 } catch ( ComplexityException $ex ) {
51                         // Too hard to diff, just show whole paragraph(s) as changed
52                         $this->edits = [ new DiffOpChange( $linesBefore, $linesAfter ) ];
53                 }
54
55                 $xi = $yi = 0;
56                 $editCount = count( $this->edits );
57                 for ( $i = 0; $i < $editCount; $i++ ) {
58                         $orig = &$this->edits[$i]->orig;
59                         if ( is_array( $orig ) ) {
60                                 $orig = array_slice( $wordsBefore, $xi, count( $orig ) );
61                                 $xi += count( $orig );
62                         }
63
64                         $closing = &$this->edits[$i]->closing;
65                         if ( is_array( $closing ) ) {
66                                 $closing = array_slice( $wordsAfter, $yi, count( $closing ) );
67                                 $yi += count( $closing );
68                         }
69                 }
70         }
71
72         /**
73          * @param string[] $lines
74          *
75          * @return array[]
76          */
77         private function split( $lines ) {
78                 $words = [];
79                 $stripped = [];
80                 $first = true;
81                 foreach ( $lines as $line ) {
82                         if ( $first ) {
83                                 $first = false;
84                         } else {
85                                 $words[] = "\n";
86                                 $stripped[] = "\n";
87                         }
88                         $m = [];
89                         if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
90                                 $line, $m ) ) {
91                                 foreach ( $m[0] as $word ) {
92                                         $words[] = $word;
93                                 }
94                                 foreach ( $m[1] as $stripped_word ) {
95                                         $stripped[] = $stripped_word;
96                                 }
97                         }
98                 }
99
100                 return [ $words, $stripped ];
101         }
102
103         /**
104          * @return string[]
105          */
106         public function orig() {
107                 $orig = new WordAccumulator;
108
109                 foreach ( $this->edits as $edit ) {
110                         if ( $edit->type == 'copy' ) {
111                                 $orig->addWords( $edit->orig );
112                         } elseif ( $edit->orig ) {
113                                 $orig->addWords( $edit->orig, 'del' );
114                         }
115                 }
116                 $lines = $orig->getLines();
117
118                 return $lines;
119         }
120
121         /**
122          * @return string[]
123          */
124         public function closing() {
125                 $closing = new WordAccumulator;
126
127                 foreach ( $this->edits as $edit ) {
128                         if ( $edit->type == 'copy' ) {
129                                 $closing->addWords( $edit->closing );
130                         } elseif ( $edit->closing ) {
131                                 $closing->addWords( $edit->closing, 'ins' );
132                         }
133                 }
134                 $lines = $closing->getLines();
135
136                 return $lines;
137         }
138
139 }