]> scripts.mit.edu Git - autoinstallsdev/mediawiki.git/blob - tests/phpunit/includes/LinkFilterTest.php
MediaWiki 1.30.2
[autoinstallsdev/mediawiki.git] / tests / phpunit / includes / LinkFilterTest.php
1 <?php
2
3 use Wikimedia\Rdbms\LikeMatch;
4
5 /**
6  * @group Database
7  */
8 class LinkFilterTest extends MediaWikiLangTestCase {
9
10         protected function setUp() {
11                 parent::setUp();
12
13                 $this->setMwGlobals( 'wgUrlProtocols', [
14                         'http://',
15                         'https://',
16                         'ftp://',
17                         'irc://',
18                         'ircs://',
19                         'gopher://',
20                         'telnet://',
21                         'nntp://',
22                         'worldwind://',
23                         'mailto:',
24                         'news:',
25                         'svn://',
26                         'git://',
27                         'mms://',
28                         '//',
29                 ] );
30         }
31
32         /**
33          * createRegexFromLike($like)
34          *
35          * Takes an array as created by LinkFilter::makeLikeArray() and creates a regex from it
36          *
37          * @param array $like Array as created by LinkFilter::makeLikeArray()
38          * @return string Regex
39          */
40         function createRegexFromLIKE( $like ) {
41                 $regex = '!^';
42
43                 foreach ( $like as $item ) {
44                         if ( $item instanceof LikeMatch ) {
45                                 if ( $item->toString() == '%' ) {
46                                         $regex .= '.*';
47                                 } elseif ( $item->toString() == '_' ) {
48                                         $regex .= '.';
49                                 }
50                         } else {
51                                 $regex .= preg_quote( $item, '!' );
52                         }
53
54                 }
55
56                 $regex .= '$!';
57
58                 return $regex;
59         }
60
61         /**
62          * provideValidPatterns()
63          *
64          * @return array
65          */
66         public static function provideValidPatterns() {
67                 return [
68                         // Protocol, Search pattern, URL which matches the pattern
69                         [ 'http://', '*.test.com', 'http://www.test.com' ],
70                         [ 'http://', 'test.com:8080/dir/file', 'http://name:pass@test.com:8080/dir/file' ],
71                         [ 'https://', '*.com', 'https://s.s.test..com:88/dir/file?a=1&b=2' ],
72                         [ 'https://', '*.com', 'https://name:pass@secure.com/index.html' ],
73                         [ 'http://', 'name:pass@test.com', 'http://test.com' ],
74                         [ 'http://', 'test.com', 'http://name:pass@test.com' ],
75                         [ 'http://', '*.test.com', 'http://a.b.c.test.com/dir/dir/file?a=6' ],
76                         [ null, 'http://*.test.com', 'http://www.test.com' ],
77                         [ 'mailto:', 'name@mail.test123.com', 'mailto:name@mail.test123.com' ],
78                         [ '',
79                                 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg',
80                                 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg'
81                         ],
82                         [ '', 'http://name:pass@*.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg',
83                                 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg' ],
84                         [ '', 'http://name:wrongpass@*.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]',
85                                 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg' ],
86                         [ 'http://', 'name:pass@*.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg',
87                                 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg' ],
88                         [ '', 'http://name:pass@www.test.com:12345',
89                                 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg' ],
90                         [ 'ftp://', 'user:pass@ftp.test.com:1233/home/user/file;type=efw',
91                                 'ftp://user:pass@ftp.test.com:1233/home/user/file;type=efw' ],
92                         [ null, 'ftp://otheruser:otherpass@ftp.test.com:1233/home/user/file;type=',
93                                 'ftp://user:pass@ftp.test.com:1233/home/user/file;type=efw' ],
94                         [ null, 'ftp://@ftp.test.com:1233/home/user/file;type=',
95                                 'ftp://user:pass@ftp.test.com:1233/home/user/file;type=efw' ],
96                         [ null, 'ftp://ftp.test.com/',
97                                 'ftp://user:pass@ftp.test.com/home/user/file;type=efw' ],
98                         [ null, 'ftp://ftp.test.com/',
99                                 'ftp://user:pass@ftp.test.com/home/user/file;type=efw' ],
100                         [ null, 'ftp://*.test.com:222/',
101                                 'ftp://user:pass@ftp.test.com:222/home' ],
102                         [ 'irc://', '*.myserver:6667/', 'irc://test.myserver:6667/' ],
103                         [ 'irc://', 'name:pass@*.myserver/', 'irc://test.myserver:6667/' ],
104                         [ 'irc://', 'name:pass@*.myserver/', 'irc://other:@test.myserver:6667/' ],
105                         [ '', 'irc://test/name,string,abc?msg=t', 'irc://test/name,string,abc?msg=test' ],
106                         [ '', 'https://gerrit.wikimedia.org/r/#/q/status:open,n,z',
107                                 'https://gerrit.wikimedia.org/r/#/q/status:open,n,z' ],
108                         [ '', 'https://gerrit.wikimedia.org',
109                                 'https://gerrit.wikimedia.org/r/#/q/status:open,n,z' ],
110                         [ 'mailto:', '*.test.com', 'mailto:name@pop3.test.com' ],
111                         [ 'mailto:', 'test.com', 'mailto:name@test.com' ],
112                         [ 'news:', 'test.1234afc@news.test.com', 'news:test.1234afc@news.test.com' ],
113                         [ 'news:', '*.test.com', 'news:test.1234afc@news.test.com' ],
114                         [ '', 'news:4df8kh$iagfewewf(at)newsbf02aaa.news.aol.com',
115                                 'news:4df8kh$iagfewewf(at)newsbf02aaa.news.aol.com' ],
116                         [ '', 'news:*.aol.com',
117                                 'news:4df8kh$iagfewewf(at)newsbf02aaa.news.aol.com' ],
118                         [ '', 'git://github.com/prwef/abc-def.git', 'git://github.com/prwef/abc-def.git' ],
119                         [ 'git://', 'github.com/', 'git://github.com/prwef/abc-def.git' ],
120                         [ 'git://', '*.github.com/', 'git://a.b.c.d.e.f.github.com/prwef/abc-def.git' ],
121                         [ '', 'gopher://*.test.com/', 'gopher://gopher.test.com/0/v2/vstat' ],
122                         [ 'telnet://', '*.test.com', 'telnet://shell.test.com/~home/' ],
123                         [ '', 'http://test.com', 'http://test.com/index?arg=1' ],
124                         [ 'http://', '*.test.com', 'http://www.test.com/index?arg=1' ],
125                         [ '' ,
126                                 'http://xx23124:__ffdfdef__@www.test.com:12345/dir' ,
127                                 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg'
128                         ],
129
130                         // Tests for false positives
131                         [ 'http://', 'test.com', 'http://www.test.com', false ],
132                         [ 'http://', 'www1.test.com', 'http://www.test.com', false ],
133                         [ 'http://', '*.test.com', 'http://www.test.t.com', false ],
134                         [ '', 'http://test.com:8080', 'http://www.test.com:8080', false ],
135                         [ '', 'https://test.com', 'http://test.com', false ],
136                         [ '', 'http://test.com', 'https://test.com', false ],
137                         [ 'http://', 'http://test.com', 'http://test.com', false ],
138                         [ null, 'http://www.test.com', 'http://www.test.com:80', false ],
139                         [ null, 'http://www.test.com:80', 'http://www.test.com', false ],
140                         [ null, 'http://*.test.com:80', 'http://www.test.com', false ],
141                         [ '', 'https://gerrit.wikimedia.org/r/#/XXX/status:open,n,z',
142                                 'https://gerrit.wikimedia.org/r/#/q/status:open,n,z', false ],
143                         [ '', 'https://*.wikimedia.org/r/#/q/status:open,n,z',
144                                 'https://gerrit.wikimedia.org/r/#/XXX/status:open,n,z', false ],
145                         [ 'mailto:', '@test.com', '@abc.test.com', false ],
146                         [ 'mailto:', 'mail@test.com', 'mail2@test.com', false ],
147                         [ '', 'mailto:mail@test.com', 'mail2@test.com', false ],
148                         [ '', 'mailto:@test.com', '@abc.test.com', false ],
149                         [ 'ftp://', '*.co', 'ftp://www.co.uk', false ],
150                         [ 'ftp://', '*.co', 'ftp://www.co.m', false ],
151                         [ 'ftp://', '*.co/dir/', 'ftp://www.co/dir2/', false ],
152                         [ 'ftp://', 'www.co/dir/', 'ftp://www.co/dir2/', false ],
153                         [ 'ftp://', 'test.com/dir/', 'ftp://test.com/', false ],
154                         [ '', 'http://test.com:8080/dir/', 'http://test.com:808/dir/', false ],
155                         [ '', 'http://test.com/dir/index.html', 'http://test.com/dir/index.php', false ],
156
157                         // These are false positives too and ideally shouldn't match, but that
158                         // would require using regexes and RLIKE instead of LIKE
159                         // [ null, 'http://*.test.com', 'http://www.test.com:80', false ],
160                         // [ '', 'https://*.wikimedia.org/r/#/q/status:open,n,z',
161                         //      'https://gerrit.wikimedia.org/XXX/r/#/q/status:open,n,z', false ],
162                 ];
163         }
164
165         /**
166          * testMakeLikeArrayWithValidPatterns()
167          *
168          * Tests whether the LIKE clause produced by LinkFilter::makeLikeArray($pattern, $protocol)
169          * will find one of the URL indexes produced by wfMakeUrlIndexes($url)
170          *
171          * @dataProvider provideValidPatterns
172          *
173          * @param string $protocol Protocol, e.g. 'http://' or 'mailto:'
174          * @param string $pattern Search pattern to feed to LinkFilter::makeLikeArray
175          * @param string $url URL to feed to wfMakeUrlIndexes
176          * @param bool $shouldBeFound Should the URL be found? (defaults true)
177          */
178         function testMakeLikeArrayWithValidPatterns( $protocol, $pattern, $url, $shouldBeFound = true ) {
179                 $indexes = wfMakeUrlIndexes( $url );
180                 $likeArray = LinkFilter::makeLikeArray( $pattern, $protocol );
181
182                 $this->assertTrue( $likeArray !== false,
183                         "LinkFilter::makeLikeArray('$pattern', '$protocol') returned false on a valid pattern"
184                 );
185
186                 $regex = $this->createRegexFromLIKE( $likeArray );
187                 $debugmsg = "Regex: '" . $regex . "'\n";
188                 $debugmsg .= count( $indexes ) . " index(es) created by wfMakeUrlIndexes():\n";
189
190                 $matches = 0;
191
192                 foreach ( $indexes as $index ) {
193                         $matches += preg_match( $regex, $index );
194                         $debugmsg .= "\t'$index'\n";
195                 }
196
197                 if ( $shouldBeFound ) {
198                         $this->assertTrue(
199                                 $matches > 0,
200                                 "Search pattern '$protocol$pattern' does not find url '$url' \n$debugmsg"
201                         );
202                 } else {
203                         $this->assertFalse(
204                                 $matches > 0,
205                                 "Search pattern '$protocol$pattern' should not find url '$url' \n$debugmsg"
206                         );
207                 }
208         }
209
210         /**
211          * provideInvalidPatterns()
212          *
213          * @return array
214          */
215         public static function provideInvalidPatterns() {
216                 return [
217                         [ '' ],
218                         [ '*' ],
219                         [ 'http://*' ],
220                         [ 'http://*/' ],
221                         [ 'http://*/dir/file' ],
222                         [ 'test.*.com' ],
223                         [ 'http://test.*.com' ],
224                         [ 'test.*.com' ],
225                         [ 'http://*.test.*' ],
226                         [ 'http://*test.com' ],
227                         [ 'https://*' ],
228                         [ '*://test.com' ],
229                         [ 'mailto:name:pass@t*est.com' ],
230                         [ 'http://*:888/' ],
231                         [ '*http://' ],
232                         [ 'test.com/*/index' ],
233                         [ 'test.com/dir/index?arg=*' ],
234                 ];
235         }
236
237         /**
238          * testMakeLikeArrayWithInvalidPatterns()
239          *
240          * Tests whether LinkFilter::makeLikeArray($pattern) will reject invalid search patterns
241          *
242          * @dataProvider provideInvalidPatterns
243          *
244          * @param string $pattern Invalid search pattern
245          */
246         function testMakeLikeArrayWithInvalidPatterns( $pattern ) {
247                 $this->assertFalse(
248                         LinkFilter::makeLikeArray( $pattern ),
249                         "'$pattern' is not a valid pattern and should be rejected"
250                 );
251         }
252
253 }