<?php
+/**
+ * Simulation of Microsoft Internet Explorer's MIME type detection algorithm.
+ *
+ * @file
+ * @todo Define the exact license of this file.
+ */
/**
- * This class simulates Microsoft Internet Explorer's terribly broken and
+ * This class simulates Microsoft Internet Explorer's terribly broken and
* insecure MIME type detection algorithm. It can be used to check web uploads
- * with an apparently safe type, to see if IE will reinterpret them to produce
+ * with an apparently safe type, to see if IE will reinterpret them to produce
* something dangerous.
*
- * It is full of bugs and strange design choices should not under any
- * circumstances be used to determine a MIME type to present to a user or
+ * It is full of bugs and strange design choices should not under any
+ * circumstances be used to determine a MIME type to present to a user or
* client. (Apple Safari developers, this means you too.)
*
- * This class is based on a disassembly of IE 5.0, 6.0 and 7.0. Although I have
- * attempted to ensure that this code works in exactly the same way as Internet
- * Explorer, it does not share any source code, or creative choices such as
- * variable names, thus I (Tim Starling) claim copyright on it.
+ * This class is based on a disassembly of IE 5.0, 6.0 and 7.0. Although I have
+ * attempted to ensure that this code works in exactly the same way as Internet
+ * Explorer, it does not share any source code, or creative choices such as
+ * variable names, thus I (Tim Starling) claim copyright on it.
*
* It may be redistributed without restriction. To aid reuse, this class does
* not depend on any MediaWiki module.
/**
* Relevant data taken from the type table in IE 5
*/
- protected $baseTypeTable = array(
- 'ambiguous' /*1*/ => array(
- 'text/plain',
- 'application/octet-stream',
+ protected $baseTypeTable = [
+ 'ambiguous' /*1*/ => [
+ 'text/plain',
+ 'application/octet-stream',
'application/x-netcdf', // [sic]
- ),
- 'text' /*3*/ => array(
+ ],
+ 'text' /*3*/ => [
'text/richtext', 'image/x-bitmap', 'application/postscript', 'application/base64',
'application/macbinhex40', 'application/x-cdf', 'text/scriptlet'
- ),
- 'binary' /*4*/ => array(
+ ],
+ 'binary' /*4*/ => [
'application/pdf', 'audio/x-aiff', 'audio/basic', 'audio/wav', 'image/gif',
- 'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp',
- 'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi',
+ 'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp',
+ 'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi',
'video/x-msvideo', 'video/mpeg', 'application/x-compressed',
'application/x-zip-compressed', 'application/x-gzip-compressed', 'application/java',
'application/x-msdownload'
- ),
- 'html' /*5*/ => array( 'text/html' ),
- );
+ ],
+ 'html' /*5*/ => [ 'text/html' ],
+ ];
/**
* Changes to the type table in later versions of IE
*/
- protected $addedTypes = array(
- 'ie07' => array(
- 'text' => array( 'text/xml', 'application/xml' )
- ),
- );
+ protected $addedTypes = [
+ 'ie07' => [
+ 'text' => [ 'text/xml', 'application/xml' ]
+ ],
+ ];
/**
* An approximation of the "Content Type" values in HKEY_CLASSES_ROOT in a
*
* Used for extension to MIME type mapping if detection fails.
*/
- protected $registry = array(
+ protected $registry = [
'.323' => 'text/h323',
'.3g2' => 'video/3gpp2',
'.3gp' => 'video/3gpp',
'.xml' => 'text/xml',
'.xps' => 'application/vnd.ms-xpsdocument',
'.xsl' => 'text/xml',
- );
+ ];
- /**
- * IE versions which have been analysed to bring you this class, and for
- * which some substantive difference exists. These will appear as keys
+ /**
+ * IE versions which have been analysed to bring you this class, and for
+ * which some substantive difference exists. These will appear as keys
* in the return value of getRealMimesFromData(). The names are chosen to sort correctly.
*/
- protected $versions = array( 'ie05', 'ie06', 'ie07', 'ie07.strict', 'ie07.nohtml' );
+ protected $versions = [ 'ie05', 'ie06', 'ie07', 'ie07.strict', 'ie07.nohtml' ];
/**
- * Type table with versions expanded
+ * Type table with versions expanded
*/
- protected $typeTable = array();
+ protected $typeTable = [];
/** constructor */
function __construct() {
- // Construct versioned type arrays from the base type array plus additions
+ // Construct versioned type arrays from the base type array plus additions
$types = $this->baseTypeTable;
foreach ( $this->versions as $version ) {
if ( isset( $this->addedTypes[$version] ) ) {
}
/**
- * Get the MIME types from getMimesFromData(), but convert the result from IE's
+ * Get the MIME types from getMimesFromData(), but convert the result from IE's
* idiosyncratic private types into something other apps will understand.
*
- * @param $fileName String: the file name (unused at present)
- * @param $chunk String: the first 256 bytes of the file
- * @param $proposed String: the MIME type proposed by the server
+ * @param string $fileName the file name (unused at present)
+ * @param string $chunk the first 256 bytes of the file
+ * @param string $proposed the MIME type proposed by the server
*
- * @return Array: map of IE version to detected mime type
+ * @return array map of IE version to detected MIME type
*/
public function getRealMimesFromData( $fileName, $chunk, $proposed ) {
$types = $this->getMimesFromData( $fileName, $chunk, $proposed );
- $types = array_map( array( $this, 'translateMimeType' ), $types );
+ $types = array_map( [ $this, 'translateMimeType' ], $types );
return $types;
}
/**
* Translate a MIME type from IE's idiosyncratic private types into
* more commonly understood type strings
+ * @param string $type
+ * @return string
*/
public function translateMimeType( $type ) {
- static $table = array(
+ static $table = [
'image/pjpeg' => 'image/jpeg',
'image/x-png' => 'image/png',
'image/x-wmf' => 'application/x-msmetafile',
'application/x-compressed' => 'application/x-compress',
'application/x-gzip-compressed' => 'application/x-gzip',
'audio/mid' => 'audio/midi',
- );
+ ];
if ( isset( $table[$type] ) ) {
$type = $table[$type];
}
/**
* Get the untranslated MIME types for all known versions
*
- * @param $fileName String: the file name (unused at present)
- * @param $chunk String: the first 256 bytes of the file
- * @param $proposed String: the MIME type proposed by the server
+ * @param string $fileName the file name (unused at present)
+ * @param string $chunk the first 256 bytes of the file
+ * @param string $proposed the MIME type proposed by the server
*
- * @return Array: map of IE version to detected mime type
+ * @return array map of IE version to detected MIME type
*/
public function getMimesFromData( $fileName, $chunk, $proposed ) {
- $types = array();
+ $types = [];
foreach ( $this->versions as $version ) {
$types[$version] = $this->getMimeTypeForVersion( $version, $fileName, $chunk, $proposed );
}
/**
* Get the MIME type for a given named version
+ * @param string $version
+ * @param string $fileName
+ * @param string $chunk
+ * @param string $proposed
+ * @return bool|string
*/
protected function getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ) {
// Strip text after a semicolon
$proposedFormat = $this->getDataFormat( $version, $proposed );
if ( $proposedFormat == 'unknown'
&& $proposed != 'multipart/mixed'
- && $proposed != 'multipart/x-mixed-replace' )
- {
+ && $proposed != 'multipart/x-mixed-replace'
+ ) {
return $proposed;
}
if ( strval( $chunk ) === '' ) {
// Truncate chunk at 255 bytes
$chunk = substr( $chunk, 0, 255 );
- // IE does the Check*Headers() calls last, and instead does the following image
- // type checks by directly looking for the magic numbers. What I do here should
+ // IE does the Check*Headers() calls last, and instead does the following image
+ // type checks by directly looking for the magic numbers. What I do here should
// have the same effect since the magic number checks are identical in both cases.
$result = $this->sampleData( $version, $chunk );
$sampleFound = $result['found'];
return 'image/gif';
}
if ( ( $proposed == 'image/pjpeg' || $proposed == 'image/jpeg' )
- && $binaryType == 'image/pjpeg' )
- {
+ && $binaryType == 'image/pjpeg'
+ ) {
return $proposed;
}
// PNG check added in IE 7
if ( $version >= 'ie07'
&& ( $proposed == 'image/x-png' || $proposed == 'image/png' )
- && $binaryType == 'image/x-png' )
- {
+ && $binaryType == 'image/x-png'
+ ) {
return $proposed;
}
return 'application/x-cdf';
}
- // RSS and Atom were added in IE 7 so they won't be in $sampleFound for
+ // RSS and Atom were added in IE 7 so they won't be in $sampleFound for
// previous versions
if ( isset( $sampleFound['rss'] ) ) {
return 'application/rss+xml';
}
if ( isset( $sampleFound['rdf-tag'] )
&& isset( $sampleFound['rdf-url'] )
- && isset( $sampleFound['rdf-purl'] ) )
- {
+ && isset( $sampleFound['rdf-purl'] )
+ ) {
return 'application/rss+xml';
}
if ( isset( $sampleFound['atom'] ) ) {
// Freaky heuristics to determine if the data is text or binary
// The heuristic is of course broken for non-ASCII text
- if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] )
- < ( $counters['ctrl'] + $counters['high'] ) * 16 )
- {
+ if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] )
+ < ( $counters['ctrl'] + $counters['high'] ) * 16
+ ) {
$kindOfBinary = true;
$type = $binaryType ? $binaryType : $textType;
if ( $type === false ) {
return $this->registry[$ext];
}
- // TODO: If the extension has an application registered to it, IE will return
- // application/octet-stream. We'll skip that, so we could erroneously
+ // TODO: If the extension has an application registered to it, IE will return
+ // application/octet-stream. We'll skip that, so we could erroneously
// return text/plain or application/x-netcdf where application/octet-stream
// would be correct.
/**
* Check for text headers at the start of the chunk
* Confirmed same in 5 and 7.
+ * @param string $version
+ * @param string $chunk
+ * @return bool|string
*/
private function checkTextHeaders( $version, $chunk ) {
$chunk2 = substr( $chunk, 0, 2 );
/**
* Check for binary headers at the start of the chunk
* Confirmed same in 5 and 7.
+ * @param string $version
+ * @param string $chunk
+ * @return bool|string
*/
private function checkBinaryHeaders( $version, $chunk ) {
$chunk2 = substr( $chunk, 0, 2 );
return 'image/pjpeg'; // actually plain JPEG but this is what IE returns
}
- if ( $chunk2 == 'BM'
+ if ( $chunk2 == 'BM'
&& substr( $chunk, 6, 2 ) == "\000\000"
- && substr( $chunk, 8, 2 ) == "\000\000" )
- {
+ && substr( $chunk, 8, 2 ) == "\000\000"
+ ) {
return 'image/bmp'; // another non-standard MIME
}
- if ( $chunk4 == 'RIFF'
- && substr( $chunk, 8, 4 ) == 'WAVE' )
- {
+ if ( $chunk4 == 'RIFF'
+ && substr( $chunk, 8, 4 ) == 'WAVE'
+ ) {
return 'audio/wav';
}
// These were integer literals in IE
if ( $chunk4 == ".sd\000"
|| $chunk4 == ".snd"
|| $chunk4 == "\000ds."
- || $chunk4 == "dns." )
- {
+ || $chunk4 == "dns."
+ ) {
return 'audio/basic';
}
if ( $chunk3 == "MM\000" ) {
return 'video/mpeg';
}
if ( $chunk4 == "\001\000\000\000"
- && substr( $chunk, 40, 4 ) == ' EMF' )
- {
+ && substr( $chunk, 40, 4 ) == ' EMF'
+ ) {
return 'image/x-emf';
}
if ( $chunk4 == "\xd7\xcd\xc6\x9a" ) {
/**
* Do heuristic checks on the bulk of the data sample.
* Search for HTML tags.
+ * @param string $version
+ * @param string $chunk
+ * @return array
*/
protected function sampleData( $version, $chunk ) {
- $found = array();
- $counters = array(
+ $found = [];
+ $counters = [
'ctrl' => 0,
'high' => 0,
'low' => 0,
'lf' => 0,
'cr' => 0,
'ff' => 0
- );
- $htmlTags = array(
+ ];
+ $htmlTags = [
'html',
'head',
'title',
'img',
'plaintext',
'table'
- );
+ ];
$rdfUrl = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
$rdfPurl = 'http://purl.org/rss/1.0/';
$xbmMagic1 = '#define';
$xbmMagic2 = '_width';
$xbmMagic3 = '_bits';
$binhexMagic = 'converted with BinHex';
+ $chunkLength = strlen( $chunk );
- for ( $offset = 0; $offset < strlen( $chunk ); $offset++ ) {
+ for ( $offset = 0; $offset < $chunkLength; $offset++ ) {
$curChar = $chunk[$offset];
if ( $curChar == "\x0a" ) {
$counters['lf']++;
if ( !strncasecmp( $remainder, $rdfUrl, strlen( $rdfUrl ) ) ) {
$found['rdf-url'] = true;
if ( isset( $found['rdf-tag'] )
- && isset( $found['rdf-purl'] ) ) // [sic]
- {
+ && isset( $found['rdf-purl'] ) // [sic]
+ ) {
break;
}
continue;
}
if ( !strncasecmp( $remainder, $rdfPurl, strlen( $rdfPurl ) ) ) {
- if ( isset( $found['rdf-tag'] )
- && isset( $found['rdf-url'] ) ) // [sic]
- {
+ if ( isset( $found['rdf-tag'] )
+ && isset( $found['rdf-url'] ) // [sic]
+ ) {
break;
}
continue;
$found['binhex'] = true;
}
}
- return array( 'found' => $found, 'counters' => $counters );
+ return [ 'found' => $found, 'counters' => $counters ];
}
+ /**
+ * @param string $version
+ * @param string|null $type
+ * @return int|string
+ */
protected function getDataFormat( $version, $type ) {
$types = $this->typeTable[$version];
if ( $type == '(null)' || strval( $type ) === '' ) {
return 'unknown';
}
}
-