3 * Copyright © 2007 Martin Seidel (Xarax) <jodeldi@gmx.de>
5 * Inspired by djvuhandler from Tim Starling
6 * Modified and written by Xarax
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
24 class PdfHandler extends ImageHandler {
25 public static $messages = [
26 'main' => 'pdf-file-page-warning',
27 'header' => 'pdf-file-page-warning-header',
28 'info' => 'pdf-file-page-warning-info',
29 'footer' => 'pdf-file-page-warning-footer',
35 function isEnabled() {
36 global $wgPdfProcessor, $wgPdfPostProcessor, $wgPdfInfo;
38 if ( !isset( $wgPdfProcessor ) || !isset( $wgPdfPostProcessor ) || !isset( $wgPdfInfo ) ) {
39 wfDebug( "PdfHandler is disabled, please set the following\n" );
40 wfDebug( "variables in LocalSettings.php:\n" );
41 wfDebug( "\$wgPdfProcessor, \$wgPdfPostProcessor, \$wgPdfInfo\n" );
51 function mustRender( $file ) {
59 function isMultiPage( $file ) {
68 function validateParam( $name, $value ) {
69 if ( $name === 'page' && trim( $value ) !== (string)intval( $value ) ) {
70 // Extra junk on the end of page, probably actually a caption
71 // e.g. [[File:Foo.pdf|thumb|Page 3 of the document shows foo]]
74 if ( in_array( $name, [ 'width', 'height', 'page' ] ) ) {
75 return ( $value > 0 );
81 * @param $params array
84 function makeParamString( $params ) {
85 $page = isset( $params['page'] ) ? $params['page'] : 1;
86 if ( !isset( $params['width'] ) ) {
89 return "page{$page}-{$params['width']}px";
96 function parseParamString( $str ) {
99 if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) {
100 return [ 'width' => $m[2], 'page' => $m[1] ];
107 * @param $params array
110 function getScriptParams( $params ) {
112 'width' => $params['width'],
113 'page' => $params['page'],
120 function getParamMap() {
122 'img_width' => 'width',
123 'img_page' => 'page',
131 * @return MediaTransformError
133 protected function doThumbError( $width, $height, $msg ) {
134 return new MediaTransformError( 'thumbnail_error',
135 $width, $height, wfMessage( $msg )->inContentLanguage()->text() );
140 * @param $dstPath string
141 * @param $dstUrl string
142 * @param $params array
144 * @return MediaTransformError|MediaTransformOutput|ThumbnailImage|TransformParameterError
146 function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
147 global $wgPdfProcessor, $wgPdfPostProcessor, $wgPdfHandlerDpi, $wgPdfHandlerJpegQuality;
149 if ( !$this->normaliseParams( $image, $params ) ) {
150 return new TransformParameterError( $params );
153 $width = (int)$params['width'];
154 $height = (int)$params['height'];
155 $page = (int)$params['page'];
157 if ( $page > $this->pageCount( $image ) ) {
158 return $this->doThumbError( $width, $height, 'pdf_page_error' );
161 if ( $flags & self::TRANSFORM_LATER ) {
162 return new ThumbnailImage( $image, $dstUrl, $width, $height, false, $page );
165 if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) {
166 return $this->doThumbError( $width, $height, 'thumbnail_dest_directory' );
169 // Thumbnail extraction is very inefficient for large files.
170 // Provide a way to pool count limit the number of downloaders.
171 if ( $image->getSize() >= 1e7 ) { // 10MB
172 $work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ),
174 'doWork' => function () use ( $image ) {
175 return $image->getLocalRefPath();
179 $srcPath = $work->execute();
181 $srcPath = $image->getLocalRefPath();
184 if ( $srcPath === false ) { // could not download original
185 return $this->doThumbError( $width, $height, 'filemissing' );
188 $cmd = '(' . wfEscapeShellArg(
192 "-dFirstPage={$page}",
193 "-dLastPage={$page}",
195 "-r{$wgPdfHandlerDpi}",
201 $cmd .= " | " . wfEscapeShellArg(
206 $wgPdfHandlerJpegQuality,
214 wfDebug( __METHOD__ . ": $cmd\n" );
216 $err = wfShellExecWithStderr( $cmd, $retval );
218 $removed = $this->removeBadFile( $dstPath, $retval );
220 if ( $retval != 0 || $removed ) {
221 wfDebugLog( 'thumbnail',
222 sprintf( 'thumbnail failed on %s: error %d "%s" from "%s"',
223 wfHostname(), $retval, trim( $err ), $cmd ) );
224 return new MediaTransformError( 'thumbnail_error', $width, $height, $err );
226 return new ThumbnailImage( $image, $dstUrl, $width, $height, $dstPath, $page );
232 * @param $path string
235 function getPdfImage( $image, $path ) {
237 $pdfimg = new PdfImage( $path );
238 } elseif ( !isset( $image->pdfImage ) ) {
239 $pdfimg = $image->pdfImage = new PdfImage( $path );
241 $pdfimg = $image->pdfImage;
251 function getMetaArray( $image ) {
252 if ( isset( $image->pdfMetaArray ) ) {
253 return $image->pdfMetaArray;
256 $metadata = $image->getMetadata();
258 if ( !$this->isMetadataValid( $image, $metadata ) ) {
259 wfDebug( "Pdf metadata is invalid or missing, should have been fixed in upgradeRow\n" );
263 $work = new PoolCounterWorkViaCallback(
264 'PdfHandler-unserialize-metadata',
267 'doWork' => function () use ( $image, $metadata ) {
268 wfSuppressWarnings();
269 $image->pdfMetaArray = unserialize( $metadata );
276 return $image->pdfMetaArray;
281 * @param $path string
284 function getImageSize( $image, $path ) {
285 return $this->getPdfImage( $image, $path )->getImageSize();
290 * @param $mime string
291 * @param $params null
294 function getThumbType( $ext, $mime, $params = null ) {
295 global $wgPdfOutputExtension;
298 if ( !isset( $mime ) ) {
299 $magic = MimeMagic::singleton();
300 $mime = $magic->guessTypesForExtension( $wgPdfOutputExtension );
302 return [ $wgPdfOutputExtension, $mime ];
307 * @param $path string
310 function getMetadata( $image, $path ) {
311 return serialize( $this->getPdfImage( $image, $path )->retrieveMetaData() );
316 * @param $metadata string
319 function isMetadataValid( $image, $metadata ) {
320 if ( !$metadata || $metadata === serialize( [] ) ) {
321 return self::METADATA_BAD;
322 } elseif ( strpos( $metadata, 'mergedMetadata' ) === false ) {
323 return self::METADATA_COMPATIBLE;
325 return self::METADATA_GOOD;
330 * @param bool|IContextSource $context Context to use (optional)
333 function formatMetadata( $image, $context = false ) {
334 $meta = $image->getMetadata();
339 wfSuppressWarnings();
340 $meta = unserialize( $meta );
343 if ( !isset( $meta['mergedMetadata'] )
344 || !is_array( $meta['mergedMetadata'] )
345 || count( $meta['mergedMetadata'] ) < 1
350 // Inherited from MediaHandler.
351 return $this->formatMetadataHelper( $meta['mergedMetadata'], $context );
358 function pageCount( File $image ) {
359 $info = $this->getDimensionInfo( $image );
361 return $info ? $info['pageCount'] : false;
369 function getPageDimensions( File $image, $page ) {
370 $index = $page; // MW starts pages at 1, as they are stored here
372 $info = $this->getDimensionInfo( $image );
373 if ( $info && isset( $info['dimensionsByPage'][$index] ) ) {
374 return $info['dimensionsByPage'][$index];
380 protected function getDimensionInfo( File $file ) {
381 $cache = ObjectCache::getMainWANInstance();
382 return $cache->getWithSetCallback(
383 $cache->makeKey( 'file-pdf', 'dimensions', $file->getSha1() ),
384 $cache::TTL_INDEFINITE,
385 function () use ( $file ) {
386 $data = $this->getMetaArray( $file );
387 if ( !$data || !isset( $data['Pages'] ) ) {
390 unset( $data['text'] ); // lower peak RAM
393 $count = intval( $data['Pages'] );
394 for ( $i = 1; $i <= $count; $i++ ) {
395 $dimsByPage[$i] = PdfImage::getPageSize( $data, $i );
398 return [ 'pageCount' => $count, 'dimensionsByPage' => $dimsByPage ];
400 [ 'pcTTL' => $cache::TTL_INDEFINITE ]
409 function getPageText( File $image, $page ) {
410 $data = $this->getMetaArray( $image );
411 if ( !$data || !isset( $data['text'] ) || !isset( $data['text'][$page - 1] ) ) {
414 return $data['text'][$page - 1];
418 * Adds a warning about PDFs being potentially dangerous to the file
419 * page. Multiple messages with this base will be used.
423 function getWarningConfig( $file ) {
425 'messages' => self::$messages,
426 'link' => '//www.mediawiki.org/wiki/Special:MyLanguage/Help:Security/PDF_files',
427 'module' => 'pdfhandler.messages',
432 * Register a module with the warning messages in it.
433 * @param &$resourceLoader ResourceLoader
435 static function registerWarningModule( &$resourceLoader ) {
436 $resourceLoader->register( 'pdfhandler.messages', [
437 'messages' => array_values( self::$messages ),