3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
19 use MediaWiki\Shell\Shell;
20 use Symfony\Component\Process\ProcessBuilder;
22 // @codingStandardsIgnoreStart
23 class SyntaxHighlight {
24 // @codingStandardsIgnoreEnd
26 /** @var int The maximum number of lines that may be selected for highlighting. **/
27 const HIGHLIGHT_MAX_LINES = 1000;
29 /** @var int Maximum input size for the highlighter (100 kB). **/
30 const HIGHLIGHT_MAX_BYTES = 102400;
32 /** @var string CSS class for syntax-highlighted code. **/
33 const HIGHLIGHT_CSS_CLASS = 'mw-highlight';
35 /** @var int Cache version. Increment whenever the HTML changes. */
36 const CACHE_VERSION = 2;
38 /** @var array Mapping of MIME-types to lexer names. **/
39 private static $mimeLexers = [
40 'text/javascript' => 'javascript',
41 'application/json' => 'javascript',
46 * Get the Pygments lexer name for a particular language.
48 * @param string $lang Language name.
49 * @return string|null Lexer name, or null if no matching lexer.
51 private static function getLexer( $lang ) {
52 static $lexers = null;
54 if ( $lang === null ) {
59 $lexers = require __DIR__ . '/SyntaxHighlight.lexers.php';
62 $lexer = strtolower( $lang );
64 if ( in_array( $lexer, $lexers ) ) {
68 $geshi2pygments = SyntaxHighlightGeSHiCompat::getGeSHiToPygmentsMap();
70 // Check if this is a GeSHi lexer name for which there exists
71 // a compatible Pygments lexer with a different name.
72 if ( isset( $geshi2pygments[$lexer] ) ) {
73 $lexer = $geshi2pygments[$lexer];
74 if ( in_array( $lexer, $lexers ) ) {
83 * Register parser hook
85 * @param $parser Parser
87 public static function onParserFirstCallInit( Parser &$parser ) {
88 foreach ( [ 'source', 'syntaxhighlight' ] as $tag ) {
89 $parser->setHook( $tag, [ 'SyntaxHighlight_GeSHi', 'parserHook' ] );
98 * @param Parser $parser
100 * @throws MWException
102 public static function parserHook( $text, $args = [], $parser ) {
105 // Replace strip markers (For e.g. {{#tag:syntaxhighlight|<nowiki>...}})
106 $out = $parser->mStripState->unstripNoWiki( $text );
108 // Don't trim leading spaces away, just the linefeeds
109 $out = preg_replace( '/^\n+/', '', rtrim( $out ) );
111 // Convert deprecated attributes
112 if ( isset( $args['enclose'] ) ) {
113 if ( $args['enclose'] === 'none' ) {
114 $args['inline'] = true;
116 unset( $args['enclose'] );
119 $lexer = isset( $args['lang'] ) ? $args['lang'] : '';
121 $result = self::highlight( $out, $lexer, $args );
122 if ( !$result->isGood() ) {
123 $parser->addTrackingCategory( 'syntaxhighlight-error-category' );
125 $out = $result->getValue();
127 // HTML Tidy will convert tabs to spaces incorrectly (bug 30930).
128 // But the conversion from tab to space occurs while reading the input,
129 // before the conversion from 	 to tab, so we can armor it that way.
131 $out = str_replace( "\t", '	', $out );
134 // Allow certain HTML attributes
135 $htmlAttribs = Sanitizer::validateAttributes( $args, [ 'style', 'class', 'id', 'dir' ] );
136 if ( !isset( $htmlAttribs['class'] ) ) {
137 $htmlAttribs['class'] = self::HIGHLIGHT_CSS_CLASS;
139 $htmlAttribs['class'] .= ' ' . self::HIGHLIGHT_CSS_CLASS;
141 if ( !( isset( $htmlAttribs['dir'] ) && $htmlAttribs['dir'] === 'rtl' ) ) {
142 $htmlAttribs['dir'] = 'ltr';
145 if ( isset( $args['inline'] ) ) {
146 // Enforce inlineness. Stray newlines may result in unexpected list and paragraph processing
147 // (also known as doBlockLevels()).
148 $out = str_replace( "\n", ' ', $out );
149 $out = Html::rawElement( 'code', $htmlAttribs, $out );
152 // Not entirely sure what benefit this provides, but it was here already
153 $htmlAttribs['class'] .= ' ' . 'mw-content-' . $htmlAttribs['dir'];
155 // Unwrap Pygments output to provide our own wrapper. We can't just always use the 'nowrap'
156 // option (pass 'inline'), since it disables other useful things like line highlighting.
157 // Tolerate absence of quotes for Html::element() and wgWellFormedXml=false.
159 if ( preg_match( '/^<div class="?mw-highlight"?>(.*)<\/div>$/s', trim( $out ), $m ) ) {
160 $out = trim( $m[1] );
162 throw new MWException( 'Unexpected output from Pygments encountered' );
165 // Use 'nowiki' strip marker to prevent list processing (also known as doBlockLevels()).
166 // However, leave the wrapping <div/> outside to prevent <p/>-wrapping.
167 $marker = $parser::MARKER_PREFIX . '-syntaxhighlightinner-' .
168 sprintf( '%08X', $parser->mMarkerIndex++ ) . $parser::MARKER_SUFFIX;
169 $parser->mStripState->addNoWiki( $marker, $out );
171 $out = Html::openElement( 'div', $htmlAttribs ) .
173 Html::closeElement( 'div' );
177 $parser->getOutput()->addModuleStyles( 'ext.pygments' );
185 public static function getPygmentizePath() {
186 global $wgPygmentizePath;
188 // If $wgPygmentizePath is unset, use the bundled copy.
189 if ( $wgPygmentizePath === false ) {
190 $wgPygmentizePath = __DIR__ . '/pygments/pygmentize';
193 return $wgPygmentizePath;
197 * Highlight a code-block using a particular lexer.
199 * @param string $code Code to highlight.
200 * @param string|null $lang Language name, or null to use plain markup.
201 * @param array $args Associative array of additional arguments.
202 * If it contains a 'line' key, the output will include line numbers.
203 * If it includes a 'highlight' key, the value will be parsed as a
204 * comma-separated list of lines and line-ranges to highlight.
205 * If it contains a 'start' key, the value will be used as the line at which to
206 * start highlighting.
207 * If it contains a 'inline' key, the output will not be wrapped in `<div><pre/></div>`.
208 * @return Status Status object, with HTML representing the highlighted
211 public static function highlight( $code, $lang = null, $args = [] ) {
212 $status = new Status;
214 $lexer = self::getLexer( $lang );
215 if ( $lexer === null && $lang !== null ) {
216 $status->warning( 'syntaxhighlight-error-unknown-language', $lang );
219 $length = strlen( $code );
220 if ( strlen( $code ) > self::HIGHLIGHT_MAX_BYTES ) {
221 $status->warning( 'syntaxhighlight-error-exceeds-size-limit',
222 $length, self::HIGHLIGHT_MAX_BYTES );
226 if ( Shell::isDisabled() ) {
227 $status->warning( 'syntaxhighlight-error-pygments-invocation-failure' );
229 'MediaWiki determined that it cannot invoke Pygments. ' .
230 'As a result, SyntaxHighlight_GeSHi will not perform any syntax highlighting. ' .
231 'See the debug log for details: ' .
232 'https://www.mediawiki.org/wiki/Manual:$wgDebugLogFile'
237 $inline = isset( $args['inline'] );
239 if ( $lexer === null ) {
241 $status->value = htmlspecialchars( trim( $code ), ENT_NOQUOTES );
243 $pre = Html::element( 'pre', [], $code );
244 $status->value = Html::rawElement(
246 [ 'class' => self::HIGHLIGHT_CSS_CLASS ],
254 'cssclass' => self::HIGHLIGHT_CSS_CLASS,
255 'encoding' => 'utf-8',
259 if ( isset( $args['line'] ) ) {
260 $options['linenos'] = 'inline';
263 if ( $lexer === 'php' && strpos( $code, '<?php' ) === false ) {
264 $options['startinline'] = 1;
267 // Highlight specific lines
268 if ( isset( $args['highlight'] ) ) {
269 $lines = self::parseHighlightLines( $args['highlight'] );
270 if ( count( $lines ) ) {
271 $options['hl_lines'] = implode( ' ', $lines );
275 // Starting line number
276 if ( isset( $args['start'] ) && ctype_digit( $args['start'] ) ) {
277 $options['linenostart'] = (int)$args['start'];
281 $options['nowrap'] = 1;
284 $cache = ObjectCache::getMainWANInstance();
285 $cacheKey = self::makeCacheKey( $code, $lexer, $options );
286 $output = $cache->get( $cacheKey );
288 if ( $output === false ) {
290 foreach ( $options as $k => $v ) {
291 $optionPairs[] = "{$k}={$v}";
293 $builder = new ProcessBuilder();
294 $builder->setPrefix( self::getPygmentizePath() );
296 ->add( '-l' )->add( $lexer )
297 ->add( '-f' )->add( 'html' )
298 ->add( '-O' )->add( implode( ',', $optionPairs ) )
301 $process->setInput( $code );
303 /* Workaround for T151523 (buggy $process->getOutput()).
304 If/when this issue is fixed in HHVM or Symfony,
305 replace this with "$process->run(); $output = $process->getOutput();"
308 $process->run( function ( $type, $capturedOutput ) use ( &$output ) {
309 $output .= $capturedOutput;
312 if ( !$process->isSuccessful() ) {
313 $status->warning( 'syntaxhighlight-error-pygments-invocation-failure' );
314 wfWarn( 'Failed to invoke Pygments: ' . $process->getErrorOutput() );
315 $status->value = self::highlight( $code, null, $args )->getValue();
319 $cache->set( $cacheKey, $output );
323 $output = trim( $output );
326 $status->value = $output;
331 * Construct a cache key for the results of a Pygments invocation.
333 * @param string $code Code to be highlighted.
334 * @param string $lexer Lexer name.
335 * @param array $options Options array.
336 * @return string Cache key.
338 private static function makeCacheKey( $code, $lexer, $options ) {
339 $optionString = FormatJson::encode( $options, false, FormatJson::ALL_OK );
340 $hash = md5( "{$code}|{$lexer}|{$optionString}|" . self::CACHE_VERSION );
341 if ( function_exists( 'wfGlobalCacheKey' ) ) {
342 return wfGlobalCacheKey( 'highlight', $hash );
344 return 'highlight:' . $hash;
349 * Take an input specifying a list of lines to highlight, returning
350 * a raw list of matching line numbers.
352 * Input is comma-separated list of lines or line ranges.
354 * @param string $lineSpec
355 * @return int[] Line numbers.
357 protected static function parseHighlightLines( $lineSpec ) {
359 $values = array_map( 'trim', explode( ',', $lineSpec ) );
360 foreach ( $values as $value ) {
361 if ( ctype_digit( $value ) ) {
362 $lines[] = (int)$value;
363 } elseif ( strpos( $value, '-' ) !== false ) {
364 list( $start, $end ) = array_map( 'trim', explode( '-', $value ) );
365 if ( self::validHighlightRange( $start, $end ) ) {
366 for ( $i = intval( $start ); $i <= $end; $i++ ) {
371 if ( count( $lines ) > self::HIGHLIGHT_MAX_LINES ) {
372 $lines = array_slice( $lines, 0, self::HIGHLIGHT_MAX_LINES );
380 * Validate a provided input range
385 protected static function validHighlightRange( $start, $end ) {
386 // Since we're taking this tiny range and producing a an
387 // array of every integer between them, it would be trivial
388 // to DoS the system by asking for a huge range.
389 // Impose an arbitrary limit on the number of lines in a
390 // given range to reduce the impact.
392 ctype_digit( $start ) &&
393 ctype_digit( $end ) &&
396 $end - $start < self::HIGHLIGHT_MAX_LINES;
400 * Hook into Content::getParserOutput to provide syntax highlighting for
406 public static function onContentGetParserOutput( Content $content, Title $title,
407 $revId, ParserOptions $options, $generateHtml, ParserOutput &$output
409 global $wgParser, $wgTextModelsToParse;
411 if ( !$generateHtml ) {
412 // Nothing special for us to do, let MediaWiki handle this.
416 // Determine the language
417 $extension = ExtensionRegistry::getInstance();
418 $models = $extension->getAttribute( 'SyntaxHighlightModels' );
419 $model = $content->getModel();
420 if ( !isset( $models[$model] ) ) {
421 // We don't care about this model, carry on.
424 $lexer = $models[$model];
426 // Hope that $wgSyntaxHighlightModels does not contain silly types.
427 $text = ContentHandler::getContentText( $content );
429 // Oops! Non-text content? Let MediaWiki handle this.
433 // Parse using the standard parser to get links etc. into the database, HTML is replaced below.
434 // We could do this using $content->fillParserOutput(), but alas it is 'protected'.
435 if ( $content instanceof TextContent && in_array( $model, $wgTextModelsToParse ) ) {
436 $output = $wgParser->parse( $text, $title, $options, true, true, $revId );
439 $status = self::highlight( $text, $lexer );
440 if ( !$status->isOK() ) {
443 $out = $status->getValue();
445 $output->addModuleStyles( 'ext.pygments' );
446 $output->setText( '<div dir="ltr">' . $out . '</div>' );
448 // Inform MediaWiki that we have parsed this page and it shouldn't mess with it.
453 * Hook to provide syntax highlighting for API pretty-printed output
455 * @param IContextSource $context
456 * @param string $text
457 * @param string $mime
458 * @param string $format
462 public static function onApiFormatHighlight( IContextSource $context, $text, $mime, $format ) {
463 if ( !isset( self::$mimeLexers[$mime] ) ) {
467 $lexer = self::$mimeLexers[$mime];
468 $status = self::highlight( $text, $lexer );
469 if ( !$status->isOK() ) {
473 $out = $status->getValue();
474 if ( preg_match( '/^<pre([^>]*)>/i', $out, $m ) ) {
475 $attrs = Sanitizer::decodeTagAttributes( $m[1] );
476 $attrs['class'] .= ' api-pretty-content';
477 $encodedAttrs = Sanitizer::safeEncodeTagAttributes( $attrs );
478 $out = '<pre' . $encodedAttrs. '>' . substr( $out, strlen( $m[0] ) );
480 $output = $context->getOutput();
481 $output->addModuleStyles( 'ext.pygments' );
482 $output->addHTML( '<div dir="ltr">' . $out . '</div>' );
484 // Inform MediaWiki that we have parsed this page and it shouldn't mess with it.
489 * Conditionally register resource loader modules that depends on the
490 * VisualEditor MediaWiki extension.
492 * @param ResourceLoader $resourceLoader
494 public static function onResourceLoaderRegisterModules( &$resourceLoader ) {
495 if ( ! ExtensionRegistry::getInstance()->isLoaded( 'VisualEditor' ) ) {
499 $resourceLoader->register( 'ext.geshi.visualEditor', [
500 'class' => 'ResourceLoaderSyntaxHighlightVisualEditorModule',
501 'localBasePath' => __DIR__ . DIRECTORY_SEPARATOR . 'modules',
502 'remoteExtPath' => 'SyntaxHighlight_GeSHi/modules',
504 've-syntaxhighlight/ve.dm.MWSyntaxHighlightNode.js',
505 've-syntaxhighlight/ve.ce.MWSyntaxHighlightNode.js',
506 've-syntaxhighlight/ve.ui.MWSyntaxHighlightWindow.js',
507 've-syntaxhighlight/ve.ui.MWSyntaxHighlightDialog.js',
508 've-syntaxhighlight/ve.ui.MWSyntaxHighlightDialogTool.js',
509 've-syntaxhighlight/ve.ui.MWSyntaxHighlightInspector.js',
510 've-syntaxhighlight/ve.ui.MWSyntaxHighlightInspectorTool.js',
513 've-syntaxhighlight/ve.ce.MWSyntaxHighlightNode.css',
514 've-syntaxhighlight/ve.ui.MWSyntaxHighlightDialog.css',
515 've-syntaxhighlight/ve.ui.MWSyntaxHighlightInspector.css',
518 'ext.visualEditor.mwcore',
521 'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-code',
522 'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-language',
523 'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-none',
524 'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-showlines',
525 'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-startingline',
526 'syntaxhighlight-visualeditor-mwsyntaxhighlightinspector-title',
528 'targets' => [ 'desktop', 'mobile' ],
533 * Backward-compatibility shim for extensions.
534 * @deprecated since MW 1.25
536 public static function prepare( $text, $lang ) {
537 wfDeprecated( __METHOD__ );
538 return new GeSHi( self::highlight( $text, $lang )->getValue() );
542 * Backward-compatibility shim for extensions.
543 * @deprecated since MW 1.25
544 * @param GeSHi $geshi
547 public static function buildHeadItem( $geshi ) {
548 wfDeprecated( __METHOD__ );
549 $geshi->parse_code();
553 class_alias( SyntaxHighlight::class, 'SyntaxHighlight_GeSHi' );