]> scripts.mit.edu Git - autoinstallsdev/mediawiki.git/blob - maintenance/benchmarks/bench_utf8_title_check.php
MediaWiki 1.30.2
[autoinstallsdev/mediawiki.git] / maintenance / benchmarks / bench_utf8_title_check.php
1 <?php
2 /**
3  * Benchmark for using a regexp vs. mb_check_encoding to check for UTF-8 encoding.
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with this program; if not, write to the Free Software Foundation, Inc.,
17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18  * http://www.gnu.org/copyleft/gpl.html
19  *
20  * @file
21  * @ingroup Benchmark
22  */
23
24 require_once __DIR__ . '/Benchmarker.php';
25
26 /**
27  * This little benchmark executes the regexp formerly used in Language->checkTitleEncoding()
28  * and compares its execution time against that of mb_check_encoding.
29  *
30  * @ingroup Benchmark
31  */
32 class BenchUtf8TitleCheck extends Benchmarker {
33         private $data;
34
35         private $isutf8;
36
37         public function __construct() {
38                 parent::__construct();
39
40                 // @codingStandardsIgnoreStart Ignore long line warnings.
41                 $this->data = [
42                         "",
43                         "United States of America", // 7bit ASCII
44                         "S%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e",
45                         "Acteur%7CAlbert%20Robbins%7CAnglais%7CAnn%20Donahue%7CAnthony%20E.%20Zuiker%7CCarol%20Mendelsohn",
46                         // This comes from T38839
47                         "Acteur%7CAlbert%20Robbins%7CAnglais%7CAnn%20Donahue%7CAnthony%20E.%20Zuiker%7CCarol%20Mendelsohn%7C"
48                         . "Catherine%20Willows%7CDavid%20Hodges%7CDavid%20Phillips%7CGil%20Grissom%7CGreg%20Sanders%7CHodges%7C"
49                         . "Internet%20Movie%20Database%7CJim%20Brass%7CLady%20Heather%7C"
50                         . "Les%20Experts%20(s%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e)%7CLes%20Experts%20:%20Manhattan%7C"
51                         . "Les%20Experts%20:%20Miami%7CListe%20des%20personnages%20des%20Experts%7C"
52                         . "Liste%20des%20%C3%A9pisodes%20des%20Experts%7CMod%C3%A8le%20discussion:Palette%20Les%20Experts%7C"
53                         . "Nick%20Stokes%7CPersonnage%20de%20fiction%7CPersonnage%20fictif%7CPersonnage%20de%20fiction%7C"
54                         . "Personnages%20r%C3%A9currents%20dans%20Les%20Experts%7CRaymond%20Langston%7CRiley%20Adams%7C"
55                         . "Saison%201%20des%20Experts%7CSaison%2010%20des%20Experts%7CSaison%2011%20des%20Experts%7C"
56                         . "Saison%2012%20des%20Experts%7CSaison%202%20des%20Experts%7CSaison%203%20des%20Experts%7C"
57                         . "Saison%204%20des%20Experts%7CSaison%205%20des%20Experts%7CSaison%206%20des%20Experts%7C"
58                         . "Saison%207%20des%20Experts%7CSaison%208%20des%20Experts%7CSaison%209%20des%20Experts%7C"
59                         . "Sara%20Sidle%7CSofia%20Curtis%7CS%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e%7CWallace%20Langham%7C"
60                         . "Warrick%20Brown%7CWendy%20Simms%7C%C3%89tats-Unis"
61                 ];
62                 // @codingStandardsIgnoreEnd
63
64                 $this->addDescription( "Benchmark for using a regexp vs. mb_check_encoding " .
65                         "to check for UTF-8 encoding." );
66         }
67
68         public function execute() {
69                 $benchmarks = [];
70                 foreach ( $this->data as $val ) {
71                         $benchmarks[] = [
72                                 'function' => [ $this, 'use_regexp' ],
73                                 'args' => [ rawurldecode( $val ) ]
74                         ];
75                         $benchmarks[] = [
76                                 'function' => [ $this, 'use_regexp_non_capturing' ],
77                                 'args' => [ rawurldecode( $val ) ]
78                         ];
79                         $benchmarks[] = [
80                                 'function' => [ $this, 'use_regexp_once_only' ],
81                                 'args' => [ rawurldecode( $val ) ]
82                         ];
83                         $benchmarks[] = [
84                                 'function' => [ $this, 'use_mb_check_encoding' ],
85                                 'args' => [ rawurldecode( $val ) ]
86                         ];
87                 }
88                 $this->bench( $benchmarks );
89         }
90
91         protected function use_regexp( $s ) {
92                 $this->isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
93                         '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
94         }
95
96         protected function use_regexp_non_capturing( $s ) {
97                 // Same as above with a non-capturing subgroup.
98                 $this->isutf8 = preg_match( '/^(?:[\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
99                         '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
100         }
101
102         protected function use_regexp_once_only( $s ) {
103                 // Same as above with a once-only subgroup.
104                 $this->isutf8 = preg_match( '/^(?>[\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
105                         '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
106         }
107
108         protected function use_mb_check_encoding( $s ) {
109                 $this->isutf8 = mb_check_encoding( $s, 'UTF-8' );
110         }
111 }
112
113 $maintClass = 'BenchUtf8TitleCheck';
114 require_once RUN_MAINTENANCE_IF_MAIN;