]> scripts.mit.edu Git - autoinstallsdev/mediawiki.git/blob - includes/media/PNGMetadataExtractor.php
MediaWiki 1.30.2
[autoinstallsdev/mediawiki.git] / includes / media / PNGMetadataExtractor.php
1 <?php
2 /**
3  * PNG frame counter and metadata extractor.
4  *
5  * Slightly derived from GIFMetadataExtractor.php
6  * Deliberately not using MWExceptions to avoid external dependencies, encouraging
7  * redistribution.
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License along
20  * with this program; if not, write to the Free Software Foundation, Inc.,
21  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22  * http://www.gnu.org/copyleft/gpl.html
23  *
24  * @file
25  * @ingroup Media
26  */
27
28 /**
29  * PNG frame counter.
30  *
31  * @ingroup Media
32  */
33 class PNGMetadataExtractor {
34         /** @var string */
35         private static $pngSig;
36
37         /** @var int */
38         private static $crcSize;
39
40         /** @var array */
41         private static $textChunks;
42
43         const VERSION = 1;
44         const MAX_CHUNK_SIZE = 3145728; // 3 megabytes
45
46         static function getMetadata( $filename ) {
47                 self::$pngSig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 );
48                 self::$crcSize = 4;
49                 /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
50                  * and https://www.w3.org/TR/PNG/#11keywords
51                  */
52                 self::$textChunks = [
53                         'xml:com.adobe.xmp' => 'xmp',
54                         # Artist is unofficial. Author is the recommended
55                         # keyword in the PNG spec. However some people output
56                         # Artist so support both.
57                         'artist' => 'Artist',
58                         'model' => 'Model',
59                         'make' => 'Make',
60                         'author' => 'Artist',
61                         'comment' => 'PNGFileComment',
62                         'description' => 'ImageDescription',
63                         'title' => 'ObjectName',
64                         'copyright' => 'Copyright',
65                         # Source as in original device used to make image
66                         # not as in who gave you the image
67                         'source' => 'Model',
68                         'software' => 'Software',
69                         'disclaimer' => 'Disclaimer',
70                         'warning' => 'ContentWarning',
71                         'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
72                         'label' => 'Label',
73                         'creation time' => 'DateTimeDigitized',
74                         /* Other potentially useful things - Document */
75                 ];
76
77                 $frameCount = 0;
78                 $loopCount = 1;
79                 $text = [];
80                 $duration = 0.0;
81                 $bitDepth = 0;
82                 $colorType = 'unknown';
83
84                 if ( !$filename ) {
85                         throw new Exception( __METHOD__ . ": No file name specified" );
86                 } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
87                         throw new Exception( __METHOD__ . ": File $filename does not exist" );
88                 }
89
90                 $fh = fopen( $filename, 'rb' );
91
92                 if ( !$fh ) {
93                         throw new Exception( __METHOD__ . ": Unable to open file $filename" );
94                 }
95
96                 // Check for the PNG header
97                 $buf = fread( $fh, 8 );
98                 if ( $buf != self::$pngSig ) {
99                         throw new Exception( __METHOD__ . ": Not a valid PNG file; header: $buf" );
100                 }
101
102                 // Read chunks
103                 while ( !feof( $fh ) ) {
104                         $buf = fread( $fh, 4 );
105                         if ( !$buf || strlen( $buf ) < 4 ) {
106                                 throw new Exception( __METHOD__ . ": Read error" );
107                         }
108                         $chunk_size = unpack( "N", $buf )[1];
109
110                         if ( $chunk_size < 0 ) {
111                                 throw new Exception( __METHOD__ . ": Chunk size too big for unpack" );
112                         }
113
114                         $chunk_type = fread( $fh, 4 );
115                         if ( !$chunk_type || strlen( $chunk_type ) < 4 ) {
116                                 throw new Exception( __METHOD__ . ": Read error" );
117                         }
118
119                         if ( $chunk_type == "IHDR" ) {
120                                 $buf = self::read( $fh, $chunk_size );
121                                 if ( !$buf || strlen( $buf ) < $chunk_size ) {
122                                         throw new Exception( __METHOD__ . ": Read error" );
123                                 }
124                                 $width = unpack( 'N', substr( $buf, 0, 4 ) )[1];
125                                 $height = unpack( 'N', substr( $buf, 4, 4 ) )[1];
126                                 $bitDepth = ord( substr( $buf, 8, 1 ) );
127                                 // Detect the color type in British English as per the spec
128                                 // https://www.w3.org/TR/PNG/#11IHDR
129                                 switch ( ord( substr( $buf, 9, 1 ) ) ) {
130                                         case 0:
131                                                 $colorType = 'greyscale';
132                                                 break;
133                                         case 2:
134                                                 $colorType = 'truecolour';
135                                                 break;
136                                         case 3:
137                                                 $colorType = 'index-coloured';
138                                                 break;
139                                         case 4:
140                                                 $colorType = 'greyscale-alpha';
141                                                 break;
142                                         case 6:
143                                                 $colorType = 'truecolour-alpha';
144                                                 break;
145                                         default:
146                                                 $colorType = 'unknown';
147                                                 break;
148                                 }
149                         } elseif ( $chunk_type == "acTL" ) {
150                                 $buf = fread( $fh, $chunk_size );
151                                 if ( !$buf || strlen( $buf ) < $chunk_size || $chunk_size < 4 ) {
152                                         throw new Exception( __METHOD__ . ": Read error" );
153                                 }
154
155                                 $actl = unpack( "Nframes/Nplays", $buf );
156                                 $frameCount = $actl['frames'];
157                                 $loopCount = $actl['plays'];
158                         } elseif ( $chunk_type == "fcTL" ) {
159                                 $buf = self::read( $fh, $chunk_size );
160                                 if ( !$buf || strlen( $buf ) < $chunk_size ) {
161                                         throw new Exception( __METHOD__ . ": Read error" );
162                                 }
163                                 $buf = substr( $buf, 20 );
164                                 if ( strlen( $buf ) < 4 ) {
165                                         throw new Exception( __METHOD__ . ": Read error" );
166                                 }
167
168                                 $fctldur = unpack( "ndelay_num/ndelay_den", $buf );
169                                 if ( $fctldur['delay_den'] == 0 ) {
170                                         $fctldur['delay_den'] = 100;
171                                 }
172                                 if ( $fctldur['delay_num'] ) {
173                                         $duration += $fctldur['delay_num'] / $fctldur['delay_den'];
174                                 }
175                         } elseif ( $chunk_type == "iTXt" ) {
176                                 // Extracts iTXt chunks, uncompressing if necessary.
177                                 $buf = self::read( $fh, $chunk_size );
178                                 $items = [];
179                                 if ( preg_match(
180                                         '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds',
181                                         $buf, $items )
182                                 ) {
183                                         /* $items[1] = text chunk name, $items[2] = compressed flag,
184                                          * $items[3] = lang code (or ""), $items[4]= compression type.
185                                          * $items[5] = content
186                                          */
187
188                                         // Theoretically should be case-sensitive, but in practise...
189                                         $items[1] = strtolower( $items[1] );
190                                         if ( !isset( self::$textChunks[$items[1]] ) ) {
191                                                 // Only extract textual chunks on our list.
192                                                 fseek( $fh, self::$crcSize, SEEK_CUR );
193                                                 continue;
194                                         }
195
196                                         $items[3] = strtolower( $items[3] );
197                                         if ( $items[3] == '' ) {
198                                                 // if no lang specified use x-default like in xmp.
199                                                 $items[3] = 'x-default';
200                                         }
201
202                                         // if compressed
203                                         if ( $items[2] == "\x01" ) {
204                                                 if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) {
205                                                         MediaWiki\suppressWarnings();
206                                                         $items[5] = gzuncompress( $items[5] );
207                                                         MediaWiki\restoreWarnings();
208
209                                                         if ( $items[5] === false ) {
210                                                                 // decompression failed
211                                                                 wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] . "\n" );
212                                                                 fseek( $fh, self::$crcSize, SEEK_CUR );
213                                                                 continue;
214                                                         }
215                                                 } else {
216                                                         wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,'
217                                                                 . " or potentially invalid compression method\n" );
218                                                         fseek( $fh, self::$crcSize, SEEK_CUR );
219                                                         continue;
220                                                 }
221                                         }
222                                         $finalKeyword = self::$textChunks[$items[1]];
223                                         $text[$finalKeyword][$items[3]] = $items[5];
224                                         $text[$finalKeyword]['_type'] = 'lang';
225                                 } else {
226                                         // Error reading iTXt chunk
227                                         throw new Exception( __METHOD__ . ": Read error on iTXt chunk" );
228                                 }
229                         } elseif ( $chunk_type == 'tEXt' ) {
230                                 $buf = self::read( $fh, $chunk_size );
231
232                                 // In case there is no \x00 which will make explode fail.
233                                 if ( strpos( $buf, "\x00" ) === false ) {
234                                         throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
235                                 }
236
237                                 list( $keyword, $content ) = explode( "\x00", $buf, 2 );
238                                 if ( $keyword === '' || $content === '' ) {
239                                         throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
240                                 }
241
242                                 // Theoretically should be case-sensitive, but in practise...
243                                 $keyword = strtolower( $keyword );
244                                 if ( !isset( self::$textChunks[$keyword] ) ) {
245                                         // Don't recognize chunk, so skip.
246                                         fseek( $fh, self::$crcSize, SEEK_CUR );
247                                         continue;
248                                 }
249                                 MediaWiki\suppressWarnings();
250                                 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
251                                 MediaWiki\restoreWarnings();
252
253                                 if ( $content === false ) {
254                                         throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
255                                 }
256
257                                 $finalKeyword = self::$textChunks[$keyword];
258                                 $text[$finalKeyword]['x-default'] = $content;
259                                 $text[$finalKeyword]['_type'] = 'lang';
260                         } elseif ( $chunk_type == 'zTXt' ) {
261                                 if ( function_exists( 'gzuncompress' ) ) {
262                                         $buf = self::read( $fh, $chunk_size );
263
264                                         // In case there is no \x00 which will make explode fail.
265                                         if ( strpos( $buf, "\x00" ) === false ) {
266                                                 throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
267                                         }
268
269                                         list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 );
270                                         if ( $keyword === '' || $postKeyword === '' ) {
271                                                 throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
272                                         }
273                                         // Theoretically should be case-sensitive, but in practise...
274                                         $keyword = strtolower( $keyword );
275
276                                         if ( !isset( self::$textChunks[$keyword] ) ) {
277                                                 // Don't recognize chunk, so skip.
278                                                 fseek( $fh, self::$crcSize, SEEK_CUR );
279                                                 continue;
280                                         }
281                                         $compression = substr( $postKeyword, 0, 1 );
282                                         $content = substr( $postKeyword, 1 );
283                                         if ( $compression !== "\x00" ) {
284                                                 wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping.\n" );
285                                                 fseek( $fh, self::$crcSize, SEEK_CUR );
286                                                 continue;
287                                         }
288
289                                         MediaWiki\suppressWarnings();
290                                         $content = gzuncompress( $content );
291                                         MediaWiki\restoreWarnings();
292
293                                         if ( $content === false ) {
294                                                 // decompression failed
295                                                 wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword . "\n" );
296                                                 fseek( $fh, self::$crcSize, SEEK_CUR );
297                                                 continue;
298                                         }
299
300                                         MediaWiki\suppressWarnings();
301                                         $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
302                                         MediaWiki\restoreWarnings();
303
304                                         if ( $content === false ) {
305                                                 throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
306                                         }
307
308                                         $finalKeyword = self::$textChunks[$keyword];
309                                         $text[$finalKeyword]['x-default'] = $content;
310                                         $text[$finalKeyword]['_type'] = 'lang';
311                                 } else {
312                                         wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping.\n" );
313                                         fseek( $fh, $chunk_size, SEEK_CUR );
314                                 }
315                         } elseif ( $chunk_type == 'tIME' ) {
316                                 // last mod timestamp.
317                                 if ( $chunk_size !== 7 ) {
318                                         throw new Exception( __METHOD__ . ": tIME wrong size" );
319                                 }
320                                 $buf = self::read( $fh, $chunk_size );
321                                 if ( !$buf || strlen( $buf ) < $chunk_size ) {
322                                         throw new Exception( __METHOD__ . ": Read error" );
323                                 }
324
325                                 // Note: spec says this should be UTC.
326                                 $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf );
327                                 $strTime = sprintf( "%04d%02d%02d%02d%02d%02d",
328                                         $t['y'], $t['m'], $t['d'], $t['h'],
329                                         $t['min'], $t['s'] );
330
331                                 $exifTime = wfTimestamp( TS_EXIF, $strTime );
332
333                                 if ( $exifTime ) {
334                                         $text['DateTime'] = $exifTime;
335                                 }
336                         } elseif ( $chunk_type == 'pHYs' ) {
337                                 // how big pixels are (dots per meter).
338                                 if ( $chunk_size !== 9 ) {
339                                         throw new Exception( __METHOD__ . ": pHYs wrong size" );
340                                 }
341
342                                 $buf = self::read( $fh, $chunk_size );
343                                 if ( !$buf || strlen( $buf ) < $chunk_size ) {
344                                         throw new Exception( __METHOD__ . ": Read error" );
345                                 }
346
347                                 $dim = unpack( "Nwidth/Nheight/Cunit", $buf );
348                                 if ( $dim['unit'] == 1 ) {
349                                         // Need to check for negative because php
350                                         // doesn't deal with super-large unsigned 32-bit ints well
351                                         if ( $dim['width'] > 0 && $dim['height'] > 0 ) {
352                                                 // unit is meters
353                                                 // (as opposed to 0 = undefined )
354                                                 $text['XResolution'] = $dim['width']
355                                                         . '/100';
356                                                 $text['YResolution'] = $dim['height']
357                                                         . '/100';
358                                                 $text['ResolutionUnit'] = 3;
359                                                 // 3 = dots per cm (from Exif).
360                                         }
361                                 }
362                         } elseif ( $chunk_type == "IEND" ) {
363                                 break;
364                         } else {
365                                 fseek( $fh, $chunk_size, SEEK_CUR );
366                         }
367                         fseek( $fh, self::$crcSize, SEEK_CUR );
368                 }
369                 fclose( $fh );
370
371                 if ( $loopCount > 1 ) {
372                         $duration *= $loopCount;
373                 }
374
375                 if ( isset( $text['DateTimeDigitized'] ) ) {
376                         // Convert date format from rfc2822 to exif.
377                         foreach ( $text['DateTimeDigitized'] as $name => &$value ) {
378                                 if ( $name === '_type' ) {
379                                         continue;
380                                 }
381
382                                 // @todo FIXME: Currently timezones are ignored.
383                                 // possibly should be wfTimestamp's
384                                 // responsibility. (at least for numeric TZ)
385                                 $formatted = wfTimestamp( TS_EXIF, $value );
386                                 if ( $formatted ) {
387                                         // Only change if we could convert the
388                                         // date.
389                                         // The png standard says it should be
390                                         // in rfc2822 format, but not required.
391                                         // In general for the exif stuff we
392                                         // prettify the date if we can, but we
393                                         // display as-is if we cannot or if
394                                         // it is invalid.
395                                         // So do the same here.
396
397                                         $value = $formatted;
398                                 }
399                         }
400                 }
401
402                 return [
403                         'frameCount' => $frameCount,
404                         'loopCount' => $loopCount,
405                         'duration' => $duration,
406                         'text' => $text,
407                         'bitDepth' => $bitDepth,
408                         'colorType' => $colorType,
409                 ];
410         }
411
412         /**
413          * Read a chunk, checking to make sure its not too big.
414          *
415          * @param resource $fh The file handle
416          * @param int $size Size in bytes.
417          * @throws Exception If too big
418          * @return string The chunk.
419          */
420         private static function read( $fh, $size ) {
421                 if ( $size > self::MAX_CHUNK_SIZE ) {
422                         throw new Exception( __METHOD__ . ': Chunk size of ' . $size .
423                                 ' too big. Max size is: ' . self::MAX_CHUNK_SIZE );
424                 }
425
426                 return fread( $fh, $size );
427         }
428 }