X-Git-Url: https://scripts.mit.edu/gitweb/autoinstalls/mediawiki.git/blobdiff_plain/18a6620945d02687fbcfc4c27355d952fd748b41..HEAD:/includes/media/GIFMetadataExtractor.php diff --git a/includes/media/GIFMetadataExtractor.php b/includes/media/GIFMetadataExtractor.php index bc1a4804..ac5fc81c 100644 --- a/includes/media/GIFMetadataExtractor.php +++ b/includes/media/GIFMetadataExtractor.php @@ -7,6 +7,21 @@ * Deliberately not using MWExceptions to avoid external dependencies, encouraging * redistribution. * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * * @file * @ingroup Media */ @@ -17,168 +32,316 @@ * @ingroup Media */ class GIFMetadataExtractor { - static $gif_frame_sep; - static $gif_extension_sep; - static $gif_term; + /** @var string */ + private static $gifFrameSep; + + /** @var string */ + private static $gifExtensionSep; + + /** @var string */ + private static $gifTerm; + const VERSION = 1; + + // Each sub-block is less than or equal to 255 bytes. + // Most of the time its 255 bytes, except for in XMP + // blocks, where it's usually between 32-127 bytes each. + const MAX_SUBBLOCKS = 262144; // 5mb divided by 20. + + /** + * @throws Exception + * @param string $filename + * @return array + */ static function getMetadata( $filename ) { - self::$gif_frame_sep = pack( "C", ord("," ) ); - self::$gif_extension_sep = pack( "C", ord("!" ) ); - self::$gif_term = pack( "C", ord(";" ) ); - + self::$gifFrameSep = pack( "C", ord( "," ) ); // 2C + self::$gifExtensionSep = pack( "C", ord( "!" ) ); // 21 + self::$gifTerm = pack( "C", ord( ";" ) ); // 3B + $frameCount = 0; $duration = 0.0; $isLooped = false; - - if (!$filename) + $xmp = ""; + $comment = []; + + if ( !$filename ) { throw new Exception( "No file name specified" ); - elseif ( !file_exists($filename) || is_dir($filename) ) + } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) { throw new Exception( "File $filename does not exist" ); - - $fh = fopen( $filename, 'r' ); - - if (!$fh) + } + + $fh = fopen( $filename, 'rb' ); + + if ( !$fh ) { throw new Exception( "Unable to open file $filename" ); - + } + // Check for the GIF header $buf = fread( $fh, 6 ); - if ( !($buf == 'GIF87a' || $buf == 'GIF89a') ) { + if ( !( $buf == 'GIF87a' || $buf == 'GIF89a' ) ) { throw new Exception( "Not a valid GIF file; header: $buf" ); } - - // Skip over width and height. - fread( $fh, 4 ); - + + // Read width and height. + $buf = fread( $fh, 2 ); + $width = unpack( 'v', $buf )[1]; + $buf = fread( $fh, 2 ); + $height = unpack( 'v', $buf )[1]; + // Read BPP $buf = fread( $fh, 1 ); $bpp = self::decodeBPP( $buf ); - + // Skip over background and aspect ratio fread( $fh, 2 ); - + // Skip over the GCT self::readGCT( $fh, $bpp ); - - while( !feof( $fh ) ) { + + while ( !feof( $fh ) ) { $buf = fread( $fh, 1 ); - - if ($buf == self::$gif_frame_sep) { + + if ( $buf == self::$gifFrameSep ) { // Found a frame $frameCount++; - - ## Skip bounding box + + # # Skip bounding box fread( $fh, 8 ); - - ## Read BPP + + # # Read BPP $buf = fread( $fh, 1 ); $bpp = self::decodeBPP( $buf ); - - ## Read GCT + + # # Read GCT self::readGCT( $fh, $bpp ); fread( $fh, 1 ); - self::skipBlock( $fh ); - } elseif ( $buf == self::$gif_extension_sep ) { + self::skipBlock( $fh ); + } elseif ( $buf == self::$gifExtensionSep ) { $buf = fread( $fh, 1 ); - $extension_code = unpack( 'C', $buf ); - $extension_code = $extension_code[1]; - - if ($extension_code == 0xF9) { + if ( strlen( $buf ) < 1 ) { + throw new Exception( "Ran out of input" ); + } + $extension_code = unpack( 'C', $buf )[1]; + + if ( $extension_code == 0xF9 ) { // Graphics Control Extension. fread( $fh, 1 ); // Block size - + fread( $fh, 1 ); // Transparency, disposal method, user input - + $buf = fread( $fh, 2 ); // Delay, in hundredths of seconds. - $delay = unpack( 'v', $buf ); - $delay = $delay[1]; + if ( strlen( $buf ) < 2 ) { + throw new Exception( "Ran out of input" ); + } + $delay = unpack( 'v', $buf )[1]; $duration += $delay * 0.01; - + fread( $fh, 1 ); // Transparent colour index - + $term = fread( $fh, 1 ); // Should be a terminator - $term = unpack( 'C', $term ); - $term = $term[1]; - if ($term != 0 ) + if ( strlen( $term ) < 1 ) { + throw new Exception( "Ran out of input" ); + } + $term = unpack( 'C', $term )[1]; + if ( $term != 0 ) { throw new Exception( "Malformed Graphics Control Extension block" ); - } elseif ($extension_code == 0xFF) { + } + } elseif ( $extension_code == 0xFE ) { + // Comment block(s). + $data = self::readBlock( $fh ); + if ( $data === "" ) { + throw new Exception( 'Read error, zero-length comment block' ); + } + + // The standard says this should be ASCII, however its unclear if + // thats true in practise. Check to see if its valid utf-8, if so + // assume its that, otherwise assume its windows-1252 (iso-8859-1) + $dataCopy = $data; + // quickIsNFCVerify has the side effect of replacing any invalid characters + UtfNormal\Validator::quickIsNFCVerify( $dataCopy ); + + if ( $dataCopy !== $data ) { + MediaWiki\suppressWarnings(); + $data = iconv( 'windows-1252', 'UTF-8', $data ); + MediaWiki\restoreWarnings(); + } + + $commentCount = count( $comment ); + if ( $commentCount === 0 + || $comment[$commentCount - 1] !== $data + ) { + // Some applications repeat the same comment on each + // frame of an animated GIF image, so if this comment + // is identical to the last, only extract once. + $comment[] = $data; + } + } elseif ( $extension_code == 0xFF ) { // Application extension (Netscape info about the animated gif) + // or XMP (or theoretically any other type of extension block) $blockLength = fread( $fh, 1 ); - $blockLength = unpack( 'C', $blockLength ); - $blockLength = $blockLength[1]; + if ( strlen( $blockLength ) < 1 ) { + throw new Exception( "Ran out of input" ); + } + $blockLength = unpack( 'C', $blockLength )[1]; $data = fread( $fh, $blockLength ); - - // NETSCAPE2.0 (application name) - if ($blockLength != 11 || $data != 'NETSCAPE2.0') { - fseek( $fh, -($blockLength + 1), SEEK_CUR ); + + if ( $blockLength != 11 ) { + wfDebug( __METHOD__ . " GIF application block with wrong length\n" ); + fseek( $fh, -( $blockLength + 1 ), SEEK_CUR ); self::skipBlock( $fh ); continue; } - - $data = fread( $fh, 2 ); // Block length and introduction, should be 03 01 - - if ($data != "\x03\x01") { - throw new Exception( "Expected \x03\x01, got $data" ); - } - - // Unsigned little-endian integer, loop count or zero for "forever" - $loopData = fread( $fh, 2 ); - $loopData = unpack( 'v', $loopData ); - $loopCount = $loopData[1]; - - if ($loopCount != 1) { - $isLooped = true; + + // NETSCAPE2.0 (application name for animated gif) + if ( $data == 'NETSCAPE2.0' ) { + $data = fread( $fh, 2 ); // Block length and introduction, should be 03 01 + + if ( $data != "\x03\x01" ) { + throw new Exception( "Expected \x03\x01, got $data" ); + } + + // Unsigned little-endian integer, loop count or zero for "forever" + $loopData = fread( $fh, 2 ); + if ( strlen( $loopData ) < 2 ) { + throw new Exception( "Ran out of input" ); + } + $loopCount = unpack( 'v', $loopData )[1]; + + if ( $loopCount != 1 ) { + $isLooped = true; + } + + // Read out terminator byte + fread( $fh, 1 ); + } elseif ( $data == 'XMP DataXMP' ) { + // application name for XMP data. + // see pg 18 of XMP spec part 3. + + $xmp = self::readBlock( $fh, true ); + + if ( substr( $xmp, -257, 3 ) !== "\x01\xFF\xFE" + || substr( $xmp, -4 ) !== "\x03\x02\x01\x00" + ) { + // this is just a sanity check. + throw new Exception( "XMP does not have magic trailer!" ); + } + + // strip out trailer. + $xmp = substr( $xmp, 0, -257 ); + } else { + // unrecognized extension block + fseek( $fh, -( $blockLength + 1 ), SEEK_CUR ); + self::skipBlock( $fh ); + continue; } - - // Read out terminator byte - fread( $fh, 1 ); } else { self::skipBlock( $fh ); } - } elseif ( $buf == self::$gif_term ) { + } elseif ( $buf == self::$gifTerm ) { break; } else { - $byte = unpack( 'C', $buf ); - $byte = $byte[1]; - throw new Exception( "At position: ".ftell($fh). ", Unknown byte ".$byte ); + if ( strlen( $buf ) < 1 ) { + throw new Exception( "Ran out of input" ); + } + $byte = unpack( 'C', $buf )[1]; + throw new Exception( "At position: " . ftell( $fh ) . ", Unknown byte " . $byte ); } } - - return array( + + return [ 'frameCount' => $frameCount, 'looped' => $isLooped, - 'duration' => $duration - ); - + 'duration' => $duration, + 'xmp' => $xmp, + 'comment' => $comment, + ]; } - + + /** + * @param resource $fh + * @param int $bpp + * @return void + */ static function readGCT( $fh, $bpp ) { - if ($bpp > 0) { - for( $i=1; $i<=pow(2,$bpp); ++$i ) { + if ( $bpp > 0 ) { + $max = pow( 2, $bpp ); + for ( $i = 1; $i <= $max; ++$i ) { fread( $fh, 3 ); } } } - + + /** + * @param string $data + * @throws Exception + * @return int + */ static function decodeBPP( $data ) { - $buf = unpack( 'C', $data ); - $buf = $buf[1]; + if ( strlen( $data ) < 1 ) { + throw new Exception( "Ran out of input" ); + } + $buf = unpack( 'C', $data )[1]; $bpp = ( $buf & 7 ) + 1; $buf >>= 7; - + $have_map = $buf & 1; - + return $have_map ? $bpp : 0; } - + + /** + * @param resource $fh + * @throws Exception + */ static function skipBlock( $fh ) { while ( !feof( $fh ) ) { $buf = fread( $fh, 1 ); - $block_len = unpack( 'C', $buf ); - $block_len = $block_len[1]; - if ($block_len == 0) + if ( strlen( $buf ) < 1 ) { + throw new Exception( "Ran out of input" ); + } + $block_len = unpack( 'C', $buf )[1]; + if ( $block_len == 0 ) { return; + } fread( $fh, $block_len ); } } + /** + * Read a block. In the GIF format, a block is made up of + * several sub-blocks. Each sub block starts with one byte + * saying how long the sub-block is, followed by the sub-block. + * The entire block is terminated by a sub-block of length + * 0. + * @param resource $fh File handle + * @param bool $includeLengths Include the length bytes of the + * sub-blocks in the returned value. Normally this is false, + * except XMP is weird and does a hack where you need to keep + * these length bytes. + * @throws Exception + * @return string The data. + */ + static function readBlock( $fh, $includeLengths = false ) { + $data = ''; + $subLength = fread( $fh, 1 ); + $blocks = 0; + + while ( $subLength !== "\0" ) { + $blocks++; + if ( $blocks > self::MAX_SUBBLOCKS ) { + throw new Exception( "MAX_SUBBLOCKS exceeded (over $blocks sub-blocks)" ); + } + if ( feof( $fh ) ) { + throw new Exception( "Read error: Unexpected EOF." ); + } + if ( $includeLengths ) { + $data .= $subLength; + } + + $data .= fread( $fh, ord( $subLength ) ); + $subLength = fread( $fh, 1 ); + } + + return $data; + } }