]> scripts.mit.edu Git - autoinstallsdev/mediawiki.git/blob - includes/media/Exif.php
MediaWiki 1.30.2
[autoinstallsdev/mediawiki.git] / includes / media / Exif.php
1 <?php
2 /**
3  * Extraction and validation of image metadata.
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with this program; if not, write to the Free Software Foundation, Inc.,
17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18  * http://www.gnu.org/copyleft/gpl.html
19  *
20  * @ingroup Media
21  * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com>
22  * @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason, 2009 Brent Garber
23  * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
24  * @see http://exif.org/Exif2-2.PDF The Exif 2.2 specification
25  * @file
26  */
27
28 /**
29  * Class to extract and validate Exif data from jpeg (and possibly tiff) files.
30  * @ingroup Media
31  */
32 class Exif {
33         /** An 8-bit (1-byte) unsigned integer. */
34         const BYTE = 1;
35
36         /** An 8-bit byte containing one 7-bit ASCII code.
37          *  The final byte is terminated with NULL.
38          */
39         const ASCII = 2;
40
41         /** A 16-bit (2-byte) unsigned integer. */
42         const SHORT = 3;
43
44         /** A 32-bit (4-byte) unsigned integer. */
45         const LONG = 4;
46
47         /** Two LONGs. The first LONG is the numerator and the second LONG expresses
48          *  the denominator
49          */
50         const RATIONAL = 5;
51
52         /** A 16-bit (2-byte) or 32-bit (4-byte) unsigned integer. */
53         const SHORT_OR_LONG = 6;
54
55         /** An 8-bit byte that can take any value depending on the field definition */
56         const UNDEFINED = 7;
57
58         /** A 32-bit (4-byte) signed integer (2's complement notation), */
59         const SLONG = 9;
60
61         /** Two SLONGs. The first SLONG is the numerator and the second SLONG is
62          *  the denominator.
63          */
64         const SRATIONAL = 10;
65
66         /** A fake value for things we don't want or don't support. */
67         const IGNORE = -1;
68
69         /** @var array Exif tags grouped by category, the tagname itself is the key
70          *    and the type is the value, in the case of more than one possible value
71          *    type they are separated by commas.
72          */
73         private $mExifTags;
74
75         /** @var array The raw Exif data returned by exif_read_data() */
76         private $mRawExifData;
77
78         /** @var array A Filtered version of $mRawExifData that has been pruned
79          *    of invalid tags and tags that contain content they shouldn't contain
80          *    according to the Exif specification
81          */
82         private $mFilteredExifData;
83
84         /** @var string The file being processed */
85         private $file;
86
87         /** @var string The basename of the file being processed */
88         private $basename;
89
90         /** @var string The private log to log to, e.g. 'exif' */
91         private $log = false;
92
93         /** @var string The byte order of the file. Needed because php's extension
94          *    doesn't fully process some obscure props.
95          */
96         private $byteOrder;
97
98         /**
99          * @param string $file Filename.
100          * @param string $byteOrder Type of byte ordering either 'BE' (Big Endian)
101          *   or 'LE' (Little Endian). Default ''.
102          * @throws MWException
103          * @todo FIXME: The following are broke:
104          *   SubjectArea. Need to test the more obscure tags.
105          *   DigitalZoomRatio = 0/0 is rejected. need to determine if that's valid.
106          *   Possibly should treat 0/0 = 0. need to read exif spec on that.
107          */
108         function __construct( $file, $byteOrder = '' ) {
109                 /**
110                  * Page numbers here refer to pages in the Exif 2.2 standard
111                  *
112                  * Note, Exif::UNDEFINED is treated as a string, not as an array of bytes
113                  * so don't put a count parameter for any UNDEFINED values.
114                  *
115                  * @link http://exif.org/Exif2-2.PDF The Exif 2.2 specification
116                  */
117                 $this->mExifTags = [
118                         # TIFF Rev. 6.0 Attribute Information (p22)
119                         'IFD0' => [
120                                 # Tags relating to image structure
121                                 'ImageWidth' => self::SHORT_OR_LONG, # Image width
122                                 'ImageLength' => self::SHORT_OR_LONG, # Image height
123                                 'BitsPerSample' => [ self::SHORT, 3 ], # Number of bits per component
124                                 # "When a primary image is JPEG compressed, this designation is not"
125                                 # "necessary and is omitted." (p23)
126                                 'Compression' => self::SHORT, # Compression scheme #p23
127                                 'PhotometricInterpretation' => self::SHORT, # Pixel composition #p23
128                                 'Orientation' => self::SHORT, # Orientation of image #p24
129                                 'SamplesPerPixel' => self::SHORT, # Number of components
130                                 'PlanarConfiguration' => self::SHORT, # Image data arrangement #p24
131                                 'YCbCrSubSampling' => [ self::SHORT, 2 ], # Subsampling ratio of Y to C #p24
132                                 'YCbCrPositioning' => self::SHORT, # Y and C positioning #p24-25
133                                 'XResolution' => self::RATIONAL, # Image resolution in width direction
134                                 'YResolution' => self::RATIONAL, # Image resolution in height direction
135                                 'ResolutionUnit' => self::SHORT, # Unit of X and Y resolution #(p26)
136
137                                 # Tags relating to recording offset
138                                 'StripOffsets' => self::SHORT_OR_LONG, # Image data location
139                                 'RowsPerStrip' => self::SHORT_OR_LONG, # Number of rows per strip
140                                 'StripByteCounts' => self::SHORT_OR_LONG, # Bytes per compressed strip
141                                 'JPEGInterchangeFormat' => self::SHORT_OR_LONG, # Offset to JPEG SOI
142                                 'JPEGInterchangeFormatLength' => self::SHORT_OR_LONG, # Bytes of JPEG data
143
144                                 # Tags relating to image data characteristics
145                                 'TransferFunction' => self::IGNORE, # Transfer function
146                                 'WhitePoint' => [ self::RATIONAL, 2 ], # White point chromaticity
147                                 'PrimaryChromaticities' => [ self::RATIONAL, 6 ], # Chromaticities of primarities
148                                 # Color space transformation matrix coefficients #p27
149                                 'YCbCrCoefficients' => [ self::RATIONAL, 3 ],
150                                 'ReferenceBlackWhite' => [ self::RATIONAL, 6 ], # Pair of black and white reference values
151
152                                 # Other tags
153                                 'DateTime' => self::ASCII, # File change date and time
154                                 'ImageDescription' => self::ASCII, # Image title
155                                 'Make' => self::ASCII, # Image input equipment manufacturer
156                                 'Model' => self::ASCII, # Image input equipment model
157                                 'Software' => self::ASCII, # Software used
158                                 'Artist' => self::ASCII, # Person who created the image
159                                 'Copyright' => self::ASCII, # Copyright holder
160                         ],
161
162                         # Exif IFD Attribute Information (p30-31)
163                         'EXIF' => [
164                                 # @todo NOTE: Nonexistence of this field is taken to mean nonconformance
165                                 # to the Exif 2.1 AND 2.2 standards
166                                 'ExifVersion' => self::UNDEFINED, # Exif version
167                                 'FlashPixVersion' => self::UNDEFINED, # Supported Flashpix version #p32
168
169                                 # Tags relating to Image Data Characteristics
170                                 'ColorSpace' => self::SHORT, # Color space information #p32
171
172                                 # Tags relating to image configuration
173                                 'ComponentsConfiguration' => self::UNDEFINED, # Meaning of each component #p33
174                                 'CompressedBitsPerPixel' => self::RATIONAL, # Image compression mode
175                                 'PixelYDimension' => self::SHORT_OR_LONG, # Valid image height
176                                 'PixelXDimension' => self::SHORT_OR_LONG, # Valid image width
177
178                                 # Tags relating to related user information
179                                 'MakerNote' => self::IGNORE, # Manufacturer notes
180                                 'UserComment' => self::UNDEFINED, # User comments #p34
181
182                                 # Tags relating to related file information
183                                 'RelatedSoundFile' => self::ASCII, # Related audio file
184
185                                 # Tags relating to date and time
186                                 'DateTimeOriginal' => self::ASCII, # Date and time of original data generation #p36
187                                 'DateTimeDigitized' => self::ASCII, # Date and time of original data generation
188                                 'SubSecTime' => self::ASCII, # DateTime subseconds
189                                 'SubSecTimeOriginal' => self::ASCII, # DateTimeOriginal subseconds
190                                 'SubSecTimeDigitized' => self::ASCII, # DateTimeDigitized subseconds
191
192                                 # Tags relating to picture-taking conditions (p31)
193                                 'ExposureTime' => self::RATIONAL, # Exposure time
194                                 'FNumber' => self::RATIONAL, # F Number
195                                 'ExposureProgram' => self::SHORT, # Exposure Program #p38
196                                 'SpectralSensitivity' => self::ASCII, # Spectral sensitivity
197                                 'ISOSpeedRatings' => self::SHORT, # ISO speed rating
198                                 'OECF' => self::IGNORE,
199                                 # Optoelectronic conversion factor. Note: We don't have support for this atm.
200                                 'ShutterSpeedValue' => self::SRATIONAL, # Shutter speed
201                                 'ApertureValue' => self::RATIONAL, # Aperture
202                                 'BrightnessValue' => self::SRATIONAL, # Brightness
203                                 'ExposureBiasValue' => self::SRATIONAL, # Exposure bias
204                                 'MaxApertureValue' => self::RATIONAL, # Maximum land aperture
205                                 'SubjectDistance' => self::RATIONAL, # Subject distance
206                                 'MeteringMode' => self::SHORT, # Metering mode #p40
207                                 'LightSource' => self::SHORT, # Light source #p40-41
208                                 'Flash' => self::SHORT, # Flash #p41-42
209                                 'FocalLength' => self::RATIONAL, # Lens focal length
210                                 'SubjectArea' => [ self::SHORT, 4 ], # Subject area
211                                 'FlashEnergy' => self::RATIONAL, # Flash energy
212                                 'SpatialFrequencyResponse' => self::IGNORE, # Spatial frequency response. Not supported atm.
213                                 'FocalPlaneXResolution' => self::RATIONAL, # Focal plane X resolution
214                                 'FocalPlaneYResolution' => self::RATIONAL, # Focal plane Y resolution
215                                 'FocalPlaneResolutionUnit' => self::SHORT, # Focal plane resolution unit #p46
216                                 'SubjectLocation' => [ self::SHORT, 2 ], # Subject location
217                                 'ExposureIndex' => self::RATIONAL, # Exposure index
218                                 'SensingMethod' => self::SHORT, # Sensing method #p46
219                                 'FileSource' => self::UNDEFINED, # File source #p47
220                                 'SceneType' => self::UNDEFINED, # Scene type #p47
221                                 'CFAPattern' => self::IGNORE, # CFA pattern. not supported atm.
222                                 'CustomRendered' => self::SHORT, # Custom image processing #p48
223                                 'ExposureMode' => self::SHORT, # Exposure mode #p48
224                                 'WhiteBalance' => self::SHORT, # White Balance #p49
225                                 'DigitalZoomRatio' => self::RATIONAL, # Digital zoom ration
226                                 'FocalLengthIn35mmFilm' => self::SHORT, # Focal length in 35 mm film
227                                 'SceneCaptureType' => self::SHORT, # Scene capture type #p49
228                                 'GainControl' => self::SHORT, # Scene control #p49-50
229                                 'Contrast' => self::SHORT, # Contrast #p50
230                                 'Saturation' => self::SHORT, # Saturation #p50
231                                 'Sharpness' => self::SHORT, # Sharpness #p50
232                                 'DeviceSettingDescription' => self::IGNORE,
233                                 # Device settings description. This could maybe be supported. Need to find an
234                                 # example file that uses this to see if it has stuff of interest in it.
235                                 'SubjectDistanceRange' => self::SHORT, # Subject distance range #p51
236
237                                 'ImageUniqueID' => self::ASCII, # Unique image ID
238                         ],
239
240                         # GPS Attribute Information (p52)
241                         'GPS' => [
242                                 'GPSVersion' => self::UNDEFINED,
243                                 # Should be an array of 4 Exif::BYTE's. However php treats it as an undefined
244                                 # Note exif standard calls this GPSVersionID, but php doesn't like the id suffix
245                                 'GPSLatitudeRef' => self::ASCII, # North or South Latitude #p52-53
246                                 'GPSLatitude' => [ self::RATIONAL, 3 ], # Latitude
247                                 'GPSLongitudeRef' => self::ASCII, # East or West Longitude #p53
248                                 'GPSLongitude' => [ self::RATIONAL, 3 ], # Longitude
249                                 'GPSAltitudeRef' => self::UNDEFINED,
250                                 # Altitude reference. Note, the exif standard says this should be an EXIF::Byte,
251                                 # but php seems to disagree.
252                                 'GPSAltitude' => self::RATIONAL, # Altitude
253                                 'GPSTimeStamp' => [ self::RATIONAL, 3 ], # GPS time (atomic clock)
254                                 'GPSSatellites' => self::ASCII, # Satellites used for measurement
255                                 'GPSStatus' => self::ASCII, # Receiver status #p54
256                                 'GPSMeasureMode' => self::ASCII, # Measurement mode #p54-55
257                                 'GPSDOP' => self::RATIONAL, # Measurement precision
258                                 'GPSSpeedRef' => self::ASCII, # Speed unit #p55
259                                 'GPSSpeed' => self::RATIONAL, # Speed of GPS receiver
260                                 'GPSTrackRef' => self::ASCII, # Reference for direction of movement #p55
261                                 'GPSTrack' => self::RATIONAL, # Direction of movement
262                                 'GPSImgDirectionRef' => self::ASCII, # Reference for direction of image #p56
263                                 'GPSImgDirection' => self::RATIONAL, # Direction of image
264                                 'GPSMapDatum' => self::ASCII, # Geodetic survey data used
265                                 'GPSDestLatitudeRef' => self::ASCII, # Reference for latitude of destination #p56
266                                 'GPSDestLatitude' => [ self::RATIONAL, 3 ], # Latitude destination
267                                 'GPSDestLongitudeRef' => self::ASCII, # Reference for longitude of destination #p57
268                                 'GPSDestLongitude' => [ self::RATIONAL, 3 ], # Longitude of destination
269                                 'GPSDestBearingRef' => self::ASCII, # Reference for bearing of destination #p57
270                                 'GPSDestBearing' => self::RATIONAL, # Bearing of destination
271                                 'GPSDestDistanceRef' => self::ASCII, # Reference for distance to destination #p57-58
272                                 'GPSDestDistance' => self::RATIONAL, # Distance to destination
273                                 'GPSProcessingMethod' => self::UNDEFINED, # Name of GPS processing method
274                                 'GPSAreaInformation' => self::UNDEFINED, # Name of GPS area
275                                 'GPSDateStamp' => self::ASCII, # GPS date
276                                 'GPSDifferential' => self::SHORT, # GPS differential correction
277                         ],
278                 ];
279
280                 $this->file = $file;
281                 $this->basename = wfBaseName( $this->file );
282                 if ( $byteOrder === 'BE' || $byteOrder === 'LE' ) {
283                         $this->byteOrder = $byteOrder;
284                 } else {
285                         // Only give a warning for b/c, since originally we didn't
286                         // require this. The number of things affected by this is
287                         // rather small.
288                         wfWarn( 'Exif class did not have byte order specified. ' .
289                                 'Some properties may be decoded incorrectly.' );
290                         $this->byteOrder = 'BE'; // BE seems about twice as popular as LE in jpg's.
291                 }
292
293                 $this->debugFile( $this->basename, __FUNCTION__, true );
294                 if ( function_exists( 'exif_read_data' ) ) {
295                         MediaWiki\suppressWarnings();
296                         $data = exif_read_data( $this->file, 0, true );
297                         MediaWiki\restoreWarnings();
298                 } else {
299                         throw new MWException( "Internal error: exif_read_data not present. " .
300                                 "\$wgShowEXIF may be incorrectly set or not checked by an extension." );
301                 }
302                 /**
303                  * exif_read_data() will return false on invalid input, such as
304                  * when somebody uploads a file called something.jpeg
305                  * containing random gibberish.
306                  */
307                 $this->mRawExifData = $data ?: [];
308                 $this->makeFilteredData();
309                 $this->collapseData();
310                 $this->debugFile( __FUNCTION__, false );
311         }
312
313         /**
314          * Make $this->mFilteredExifData
315          */
316         function makeFilteredData() {
317                 $this->mFilteredExifData = [];
318
319                 foreach ( array_keys( $this->mRawExifData ) as $section ) {
320                         if ( !array_key_exists( $section, $this->mExifTags ) ) {
321                                 $this->debug( $section, __FUNCTION__, "'$section' is not a valid Exif section" );
322                                 continue;
323                         }
324
325                         foreach ( array_keys( $this->mRawExifData[$section] ) as $tag ) {
326                                 if ( !array_key_exists( $tag, $this->mExifTags[$section] ) ) {
327                                         $this->debug( $tag, __FUNCTION__, "'$tag' is not a valid tag in '$section'" );
328                                         continue;
329                                 }
330
331                                 $this->mFilteredExifData[$tag] = $this->mRawExifData[$section][$tag];
332                                 // This is ok, as the tags in the different sections do not conflict.
333                                 // except in computed and thumbnail section, which we don't use.
334
335                                 $value = $this->mRawExifData[$section][$tag];
336                                 if ( !$this->validate( $section, $tag, $value ) ) {
337                                         $this->debug( $value, __FUNCTION__, "'$tag' contained invalid data" );
338                                         unset( $this->mFilteredExifData[$tag] );
339                                 }
340                         }
341                 }
342         }
343
344         /**
345          * Collapse some fields together.
346          * This converts some fields from exif form, to a more friendly form.
347          * For example GPS latitude to a single number.
348          *
349          * The rationale behind this is that we're storing data, not presenting to the user
350          * For example a longitude is a single number describing how far away you are from
351          * the prime meridian. Well it might be nice to split it up into minutes and seconds
352          * for the user, it doesn't really make sense to split a single number into 4 parts
353          * for storage. (degrees, minutes, second, direction vs single floating point number).
354          *
355          * Other things this might do (not really sure if they make sense or not):
356          * Dates -> mediawiki date format.
357          * convert values that can be in different units to be in one standardized unit.
358          *
359          * As an alternative approach, some of this could be done in the validate phase
360          * if we make up our own types like Exif::DATE.
361          */
362         function collapseData() {
363                 $this->exifGPStoNumber( 'GPSLatitude' );
364                 $this->exifGPStoNumber( 'GPSDestLatitude' );
365                 $this->exifGPStoNumber( 'GPSLongitude' );
366                 $this->exifGPStoNumber( 'GPSDestLongitude' );
367
368                 if ( isset( $this->mFilteredExifData['GPSAltitude'] )
369                         && isset( $this->mFilteredExifData['GPSAltitudeRef'] )
370                 ) {
371                         // We know altitude data is a <num>/<denom> from the validation
372                         // functions ran earlier. But multiplying such a string by -1
373                         // doesn't work well, so convert.
374                         list( $num, $denom ) = explode( '/', $this->mFilteredExifData['GPSAltitude'] );
375                         $this->mFilteredExifData['GPSAltitude'] = $num / $denom;
376
377                         if ( $this->mFilteredExifData['GPSAltitudeRef'] === "\1" ) {
378                                 $this->mFilteredExifData['GPSAltitude'] *= -1;
379                         }
380                         unset( $this->mFilteredExifData['GPSAltitudeRef'] );
381                 }
382
383                 $this->exifPropToOrd( 'FileSource' );
384                 $this->exifPropToOrd( 'SceneType' );
385
386                 $this->charCodeString( 'UserComment' );
387                 $this->charCodeString( 'GPSProcessingMethod' );
388                 $this->charCodeString( 'GPSAreaInformation' );
389
390                 // ComponentsConfiguration should really be an array instead of a string...
391                 // This turns a string of binary numbers into an array of numbers.
392
393                 if ( isset( $this->mFilteredExifData['ComponentsConfiguration'] ) ) {
394                         $val = $this->mFilteredExifData['ComponentsConfiguration'];
395                         $ccVals = [];
396
397                         $strLen = strlen( $val );
398                         for ( $i = 0; $i < $strLen; $i++ ) {
399                                 $ccVals[$i] = ord( substr( $val, $i, 1 ) );
400                         }
401                         $ccVals['_type'] = 'ol'; // this is for formatting later.
402                         $this->mFilteredExifData['ComponentsConfiguration'] = $ccVals;
403                 }
404
405                 // GPSVersion(ID) is treated as the wrong type by php exif support.
406                 // Go through each byte turning it into a version string.
407                 // For example: "\x02\x02\x00\x00" -> "2.2.0.0"
408
409                 // Also change exif tag name from GPSVersion (what php exif thinks it is)
410                 // to GPSVersionID (what the exif standard thinks it is).
411
412                 if ( isset( $this->mFilteredExifData['GPSVersion'] ) ) {
413                         $val = $this->mFilteredExifData['GPSVersion'];
414                         $newVal = '';
415
416                         $strLen = strlen( $val );
417                         for ( $i = 0; $i < $strLen; $i++ ) {
418                                 if ( $i !== 0 ) {
419                                         $newVal .= '.';
420                                 }
421                                 $newVal .= ord( substr( $val, $i, 1 ) );
422                         }
423
424                         if ( $this->byteOrder === 'LE' ) {
425                                 // Need to reverse the string
426                                 $newVal2 = '';
427                                 for ( $i = strlen( $newVal ) - 1; $i >= 0; $i-- ) {
428                                         $newVal2 .= substr( $newVal, $i, 1 );
429                                 }
430                                 $this->mFilteredExifData['GPSVersionID'] = $newVal2;
431                         } else {
432                                 $this->mFilteredExifData['GPSVersionID'] = $newVal;
433                         }
434                         unset( $this->mFilteredExifData['GPSVersion'] );
435                 }
436         }
437
438         /**
439          * Do userComment tags and similar. See pg. 34 of exif standard.
440          * basically first 8 bytes is charset, rest is value.
441          * This has not been tested on any shift-JIS strings.
442          * @param string $prop Prop name
443          */
444         private function charCodeString( $prop ) {
445                 if ( isset( $this->mFilteredExifData[$prop] ) ) {
446                         if ( strlen( $this->mFilteredExifData[$prop] ) <= 8 ) {
447                                 // invalid. Must be at least 9 bytes long.
448
449                                 $this->debug( $this->mFilteredExifData[$prop], __FUNCTION__, false );
450                                 unset( $this->mFilteredExifData[$prop] );
451
452                                 return;
453                         }
454                         $charCode = substr( $this->mFilteredExifData[$prop], 0, 8 );
455                         $val = substr( $this->mFilteredExifData[$prop], 8 );
456
457                         switch ( $charCode ) {
458                                 case "\x4A\x49\x53\x00\x00\x00\x00\x00":
459                                         // JIS
460                                         $charset = "Shift-JIS";
461                                         break;
462                                 case "UNICODE\x00":
463                                         $charset = "UTF-16" . $this->byteOrder;
464                                         break;
465                                 default: // ascii or undefined.
466                                         $charset = "";
467                                         break;
468                         }
469                         if ( $charset ) {
470                                 MediaWiki\suppressWarnings();
471                                 $val = iconv( $charset, 'UTF-8//IGNORE', $val );
472                                 MediaWiki\restoreWarnings();
473                         } else {
474                                 // if valid utf-8, assume that, otherwise assume windows-1252
475                                 $valCopy = $val;
476                                 UtfNormal\Validator::quickIsNFCVerify( $valCopy ); // validates $valCopy.
477                                 if ( $valCopy !== $val ) {
478                                         MediaWiki\suppressWarnings();
479                                         $val = iconv( 'Windows-1252', 'UTF-8//IGNORE', $val );
480                                         MediaWiki\restoreWarnings();
481                                 }
482                         }
483
484                         // trim and check to make sure not only whitespace.
485                         $val = trim( $val );
486                         if ( strlen( $val ) === 0 ) {
487                                 // only whitespace.
488                                 $this->debug( $this->mFilteredExifData[$prop], __FUNCTION__, "$prop: Is only whitespace" );
489                                 unset( $this->mFilteredExifData[$prop] );
490
491                                 return;
492                         }
493
494                         // all's good.
495                         $this->mFilteredExifData[$prop] = $val;
496                 }
497         }
498
499         /**
500          * Convert an Exif::UNDEFINED from a raw binary string
501          * to its value. This is sometimes needed depending on
502          * the type of UNDEFINED field
503          * @param string $prop Name of property
504          */
505         private function exifPropToOrd( $prop ) {
506                 if ( isset( $this->mFilteredExifData[$prop] ) ) {
507                         $this->mFilteredExifData[$prop] = ord( $this->mFilteredExifData[$prop] );
508                 }
509         }
510
511         /**
512          * Convert gps in exif form to a single floating point number
513          * for example 10 degress 20`40`` S -> -10.34444
514          * @param string $prop A GPS coordinate exif tag name (like GPSLongitude)
515          */
516         private function exifGPStoNumber( $prop ) {
517                 $loc =& $this->mFilteredExifData[$prop];
518                 $dir =& $this->mFilteredExifData[$prop . 'Ref'];
519                 $res = false;
520
521                 if ( isset( $loc ) && isset( $dir )
522                         && ( $dir === 'N' || $dir === 'S' || $dir === 'E' || $dir === 'W' )
523                 ) {
524                         list( $num, $denom ) = explode( '/', $loc[0] );
525                         $res = $num / $denom;
526                         list( $num, $denom ) = explode( '/', $loc[1] );
527                         $res += ( $num / $denom ) * ( 1 / 60 );
528                         list( $num, $denom ) = explode( '/', $loc[2] );
529                         $res += ( $num / $denom ) * ( 1 / 3600 );
530
531                         if ( $dir === 'S' || $dir === 'W' ) {
532                                 $res *= -1; // make negative
533                         }
534                 }
535
536                 // update the exif records.
537
538                 if ( $res !== false ) { // using !== as $res could potentially be 0
539                         $this->mFilteredExifData[$prop] = $res;
540                         unset( $this->mFilteredExifData[$prop . 'Ref'] );
541                 } else { // if invalid
542                         unset( $this->mFilteredExifData[$prop] );
543                         unset( $this->mFilteredExifData[$prop . 'Ref'] );
544                 }
545         }
546
547         /**#@-*/
548
549         /**#@+
550          * @return array
551          */
552         /**
553          * Get $this->mRawExifData
554          * @return array
555          */
556         function getData() {
557                 return $this->mRawExifData;
558         }
559
560         /**
561          * Get $this->mFilteredExifData
562          * @return array
563          */
564         function getFilteredData() {
565                 return $this->mFilteredExifData;
566         }
567
568         /**#@-*/
569
570         /**
571          * The version of the output format
572          *
573          * Before the actual metadata information is saved in the database we
574          * strip some of it since we don't want to save things like thumbnails
575          * which usually accompany Exif data. This value gets saved in the
576          * database along with the actual Exif data, and if the version in the
577          * database doesn't equal the value returned by this function the Exif
578          * data is regenerated.
579          *
580          * @return int
581          */
582         public static function version() {
583                 return 2; // We don't need no bloddy constants!
584         }
585
586         /**
587          * Validates if a tag value is of the type it should be according to the Exif spec
588          *
589          * @param mixed $in The input value to check
590          * @return bool
591          */
592         private function isByte( $in ) {
593                 if ( !is_array( $in ) && sprintf( '%d', $in ) == $in && $in >= 0 && $in <= 255 ) {
594                         $this->debug( $in, __FUNCTION__, true );
595
596                         return true;
597                 } else {
598                         $this->debug( $in, __FUNCTION__, false );
599
600                         return false;
601                 }
602         }
603
604         /**
605          * @param mixed $in The input value to check
606          * @return bool
607          */
608         private function isASCII( $in ) {
609                 if ( is_array( $in ) ) {
610                         return false;
611                 }
612
613                 if ( preg_match( "/[^\x0a\x20-\x7e]/", $in ) ) {
614                         $this->debug( $in, __FUNCTION__, 'found a character not in our whitelist' );
615
616                         return false;
617                 }
618
619                 if ( preg_match( '/^\s*$/', $in ) ) {
620                         $this->debug( $in, __FUNCTION__, 'input consisted solely of whitespace' );
621
622                         return false;
623                 }
624
625                 return true;
626         }
627
628         /**
629          * @param mixed $in The input value to check
630          * @return bool
631          */
632         private function isShort( $in ) {
633                 if ( !is_array( $in ) && sprintf( '%d', $in ) == $in && $in >= 0 && $in <= 65536 ) {
634                         $this->debug( $in, __FUNCTION__, true );
635
636                         return true;
637                 } else {
638                         $this->debug( $in, __FUNCTION__, false );
639
640                         return false;
641                 }
642         }
643
644         /**
645          * @param mixed $in The input value to check
646          * @return bool
647          */
648         private function isLong( $in ) {
649                 if ( !is_array( $in ) && sprintf( '%d', $in ) == $in && $in >= 0 && $in <= 4294967296 ) {
650                         $this->debug( $in, __FUNCTION__, true );
651
652                         return true;
653                 } else {
654                         $this->debug( $in, __FUNCTION__, false );
655
656                         return false;
657                 }
658         }
659
660         /**
661          * @param mixed $in The input value to check
662          * @return bool
663          */
664         private function isRational( $in ) {
665                 $m = [];
666
667                 # Avoid division by zero
668                 if ( !is_array( $in )
669                         && preg_match( '/^(\d+)\/(\d+[1-9]|[1-9]\d*)$/', $in, $m )
670                 ) {
671                         return $this->isLong( $m[1] ) && $this->isLong( $m[2] );
672                 } else {
673                         $this->debug( $in, __FUNCTION__, 'fed a non-fraction value' );
674
675                         return false;
676                 }
677         }
678
679         /**
680          * @param mixed $in The input value to check
681          * @return bool
682          */
683         private function isUndefined( $in ) {
684                 $this->debug( $in, __FUNCTION__, true );
685
686                 return true;
687         }
688
689         /**
690          * @param mixed $in The input value to check
691          * @return bool
692          */
693         private function isSlong( $in ) {
694                 if ( $this->isLong( abs( $in ) ) ) {
695                         $this->debug( $in, __FUNCTION__, true );
696
697                         return true;
698                 } else {
699                         $this->debug( $in, __FUNCTION__, false );
700
701                         return false;
702                 }
703         }
704
705         /**
706          * @param mixed $in The input value to check
707          * @return bool
708          */
709         private function isSrational( $in ) {
710                 $m = [];
711
712                 # Avoid division by zero
713                 if ( !is_array( $in ) &&
714                         preg_match( '/^(-?\d+)\/(\d+[1-9]|[1-9]\d*)$/', $in, $m )
715                 ) {
716                         return $this->isSlong( $m[0] ) && $this->isSlong( $m[1] );
717                 } else {
718                         $this->debug( $in, __FUNCTION__, 'fed a non-fraction value' );
719
720                         return false;
721                 }
722         }
723
724         /**#@-*/
725
726         /**
727          * Validates if a tag has a legal value according to the Exif spec
728          *
729          * @param string $section Section where tag is located.
730          * @param string $tag The tag to check.
731          * @param mixed $val The value of the tag.
732          * @param bool $recursive True if called recursively for array types.
733          * @return bool
734          */
735         private function validate( $section, $tag, $val, $recursive = false ) {
736                 $debug = "tag is '$tag'";
737                 $etype = $this->mExifTags[$section][$tag];
738                 $ecount = 1;
739                 if ( is_array( $etype ) ) {
740                         list( $etype, $ecount ) = $etype;
741                         if ( $recursive ) {
742                                 $ecount = 1; // checking individual elements
743                         }
744                 }
745                 $count = count( $val );
746                 if ( $ecount != $count ) {
747                         $this->debug( $val, __FUNCTION__, "Expected $ecount elements for $tag but got $count" );
748
749                         return false;
750                 }
751                 if ( $count > 1 ) {
752                         foreach ( $val as $v ) {
753                                 if ( !$this->validate( $section, $tag, $v, true ) ) {
754                                         return false;
755                                 }
756                         }
757
758                         return true;
759                 }
760                 // Does not work if not typecast
761                 switch ( (string)$etype ) {
762                         case (string)self::BYTE:
763                                 $this->debug( $val, __FUNCTION__, $debug );
764
765                                 return $this->isByte( $val );
766                         case (string)self::ASCII:
767                                 $this->debug( $val, __FUNCTION__, $debug );
768
769                                 return $this->isASCII( $val );
770                         case (string)self::SHORT:
771                                 $this->debug( $val, __FUNCTION__, $debug );
772
773                                 return $this->isShort( $val );
774                         case (string)self::LONG:
775                                 $this->debug( $val, __FUNCTION__, $debug );
776
777                                 return $this->isLong( $val );
778                         case (string)self::RATIONAL:
779                                 $this->debug( $val, __FUNCTION__, $debug );
780
781                                 return $this->isRational( $val );
782                         case (string)self::SHORT_OR_LONG:
783                                 $this->debug( $val, __FUNCTION__, $debug );
784
785                                 return $this->isShort( $val ) || $this->isLong( $val );
786                         case (string)self::UNDEFINED:
787                                 $this->debug( $val, __FUNCTION__, $debug );
788
789                                 return $this->isUndefined( $val );
790                         case (string)self::SLONG:
791                                 $this->debug( $val, __FUNCTION__, $debug );
792
793                                 return $this->isSlong( $val );
794                         case (string)self::SRATIONAL:
795                                 $this->debug( $val, __FUNCTION__, $debug );
796
797                                 return $this->isSrational( $val );
798                         case (string)self::IGNORE:
799                                 $this->debug( $val, __FUNCTION__, $debug );
800
801                                 return false;
802                         default:
803                                 $this->debug( $val, __FUNCTION__, "The tag '$tag' is unknown" );
804
805                                 return false;
806                 }
807         }
808
809         /**
810          * Convenience function for debugging output
811          *
812          * @param mixed $in Arrays will be processed with print_r().
813          * @param string $fname Function name to log.
814          * @param string|bool|null $action Default null.
815          */
816         private function debug( $in, $fname, $action = null ) {
817                 if ( !$this->log ) {
818                         return;
819                 }
820                 $type = gettype( $in );
821                 $class = ucfirst( __CLASS__ );
822                 if ( is_array( $in ) ) {
823                         $in = print_r( $in, true );
824                 }
825
826                 if ( $action === true ) {
827                         wfDebugLog( $this->log, "$class::$fname: accepted: '$in' (type: $type)" );
828                 } elseif ( $action === false ) {
829                         wfDebugLog( $this->log, "$class::$fname: rejected: '$in' (type: $type)" );
830                 } elseif ( $action === null ) {
831                         wfDebugLog( $this->log, "$class::$fname: input was: '$in' (type: $type)" );
832                 } else {
833                         wfDebugLog( $this->log, "$class::$fname: $action (type: $type; content: '$in')" );
834                 }
835         }
836
837         /**
838          * Convenience function for debugging output
839          *
840          * @param string $fname The name of the function calling this function
841          * @param bool $io Specify whether we're beginning or ending
842          */
843         private function debugFile( $fname, $io ) {
844                 if ( !$this->log ) {
845                         return;
846                 }
847                 $class = ucfirst( __CLASS__ );
848                 if ( $io ) {
849                         wfDebugLog( $this->log, "$class::$fname: begin processing: '{$this->basename}'" );
850                 } else {
851                         wfDebugLog( $this->log, "$class::$fname: end processing: '{$this->basename}'" );
852                 }
853         }
854 }