Gilles has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/350405 )
Change subject: Store original media dimensions as additional header
......................................................................
Store original media dimensions as additional header
Bug: T150741
For storage repos that support headers (such as Swift), this will store the
original
media dimensions as an extra custom header, X-Content-Dimensions.
The header is formatted to minimize its length when dealing with multipage
documents, by expressing the information as page ranges keyed by dimensions.
Example for a multipage documents with some pages of different sizes:
X-Content-Dimensions: 1903x899:1-9,11/1903x873:10
Example for a single page document:
X-Content-Dimensions: 800x600:1
Change-Id: Ic4c6a86557b3705cf75d074753e9ce2ee070a6df
---
M includes/filerepo/file/File.php
M includes/filerepo/file/LocalFile.php
M includes/libs/filebackend/FileBackendStore.php
M includes/media/Exif.php
M includes/media/ExifBitmap.php
M includes/media/GIFMetadataExtractor.php
M includes/media/MediaHandler.php
M includes/media/PNGMetadataExtractor.php
M includes/media/XCF.php
M maintenance/importImages.php
M maintenance/refreshFileHeaders.php
M tests/phpunit/includes/media/MediaHandlerTest.php
12 files changed, 139 insertions(+), 24 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core
refs/changes/05/350405/1
diff --git a/includes/filerepo/file/File.php b/includes/filerepo/file/File.php
index e367812..1585602 100644
--- a/includes/filerepo/file/File.php
+++ b/includes/filerepo/file/File.php
@@ -2151,10 +2151,10 @@
/**
* @return array HTTP header name/value map to use for HEAD/GET request
responses
*/
- function getStreamHeaders() {
+ function getContentHeaders() {
$handler = $this->getHandler();
if ( $handler ) {
- return $handler->getStreamHeaders( $this->getMetadata()
);
+ return $handler->getContentHeaders(
$this->getMetadata() );
} else {
return [];
}
diff --git a/includes/filerepo/file/LocalFile.php
b/includes/filerepo/file/LocalFile.php
index 292fc80..b9ae962 100644
--- a/includes/filerepo/file/LocalFile.php
+++ b/includes/filerepo/file/LocalFile.php
@@ -1200,7 +1200,7 @@
$options = [];
$handler = MediaHandler::getHandler( $props['mime'] );
if ( $handler ) {
- $options['headers'] = $handler->getStreamHeaders(
$props['metadata'] );
+ $options['headers'] = $handler->getContentHeaders(
$props['metadata'] );
} else {
$options['headers'] = [];
}
diff --git a/includes/libs/filebackend/FileBackendStore.php
b/includes/libs/filebackend/FileBackendStore.php
index 039bd42..e2f7886 100644
--- a/includes/libs/filebackend/FileBackendStore.php
+++ b/includes/libs/filebackend/FileBackendStore.php
@@ -1250,7 +1250,7 @@
* @return array
*/
protected function sanitizeOpHeaders( array $op ) {
- static $longs = [ 'content-disposition' ];
+ static $longs = [ 'content-disposition', 'x-content-dimensions'
];
if ( isset( $op['headers'] ) ) { // op sets HTTP headers
$newHeaders = [];
diff --git a/includes/media/Exif.php b/includes/media/Exif.php
index 95fa859..621a4aa 100644
--- a/includes/media/Exif.php
+++ b/includes/media/Exif.php
@@ -117,6 +117,11 @@
* @link http://exif.org/Exif2-2.PDF The Exif 2.2 specification
*/
$this->mExifTags = [
+ 'COMPUTED' => [
+ 'Width' => Exif::SHORT_OR_LONG, # Image width
+ 'Height' => Exif::SHORT_OR_LONG, # Image height
+ ],
+
# TIFF Rev. 6.0 Attribute Information (p22)
'IFD0' => [
# Tags relating to image structure
diff --git a/includes/media/ExifBitmap.php b/includes/media/ExifBitmap.php
index 7aeefa0..2f81045 100644
--- a/includes/media/ExifBitmap.php
+++ b/includes/media/ExifBitmap.php
@@ -314,4 +314,25 @@
return true;
}
+
+ /**
+ * Get useful response headers for GET/HEAD requests for a file with the
given metadata
+ * @param $metadata mixed Result this handlers getMetadata() for a file
+ * @return Array
+ */
+ public function getContentHeaders( $metadata ) {
+ wfSuppressWarnings();
+ $metadata = unserialize( $metadata );
+ wfRestoreWarnings();
+
+ if ( !isset( $metadata['Width'] ) || !isset(
$metadata['Height'] ) ) {
+ return [];
+ }
+
+ $dimensionsMetadata = [];
+ $dimensionsMetadata['width'] = $metadata['Width'];
+ $dimensionsMetadata['height'] = $metadata['Height'];
+
+ return parent::getContentHeaders( serialize(
$dimensionsMetadata ) );
+ }
}
diff --git a/includes/media/GIFMetadataExtractor.php
b/includes/media/GIFMetadataExtractor.php
index de409e7..13dba57 100644
--- a/includes/media/GIFMetadataExtractor.php
+++ b/includes/media/GIFMetadataExtractor.php
@@ -41,7 +41,7 @@
/** @var string */
private static $gifTerm;
- const VERSION = 1;
+ const VERSION = 2;
// Each sub-block is less than or equal to 255 bytes.
// Most of the time its 255 bytes, except for in XMP
@@ -54,9 +54,9 @@
* @return array
*/
static function getMetadata( $filename ) {
- self::$gifFrameSep = pack( "C", ord( "," ) );
- self::$gifExtensionSep = pack( "C", ord( "!" ) );
- self::$gifTerm = pack( "C", ord( ";" ) );
+ self::$gifFrameSep = pack( "C", ord( "," ) ); // 2C
+ self::$gifExtensionSep = pack( "C", ord( "!" ) ); // 21
+ self::$gifTerm = pack( "C", ord( ";" ) ); // 3B
$frameCount = 0;
$duration = 0.0;
@@ -82,8 +82,11 @@
throw new Exception( "Not a valid GIF file; header:
$buf" );
}
- // Skip over width and height.
- fread( $fh, 4 );
+ // Read width and height.
+ $buf = fread( $fh, 2 );
+ $width = unpack( 'v', $buf )[1];
+ $buf = fread( $fh, 2 );
+ $height = unpack( 'v', $buf )[1];
// Read BPP
$buf = fread( $fh, 1 );
@@ -251,6 +254,8 @@
'duration' => $duration,
'xmp' => $xmp,
'comment' => $comment,
+ 'width' => $width,
+ 'height' => $height,
];
}
diff --git a/includes/media/MediaHandler.php b/includes/media/MediaHandler.php
index 6a23bd6..d6ec7c1 100644
--- a/includes/media/MediaHandler.php
+++ b/includes/media/MediaHandler.php
@@ -305,16 +305,6 @@
}
/**
- * Get useful response headers for GET/HEAD requests for a file with
the given metadata
- *
- * @param mixed $metadata Result of the getMetadata() function of this
handler for a file
- * @return array
- */
- public function getStreamHeaders( $metadata ) {
- return [];
- }
-
- /**
* True if the handled types can be transformed
*
* @param File $file
@@ -868,4 +858,70 @@
public function getWarningConfig( $file ) {
return null;
}
+
+ /**
+ * Converts a dimensions array about a potentially multipage document
from an
+ * exhaustive list of ordered page numbers to a list of page ranges
+ * @param Array $pagesByDimensions
+ * @return String
+ */
+ public static function getPageRangesByDimensions( $pagesByDimensions ) {
+ $pageRangesByDimensions = [];
+
+ foreach ( $pagesByDimensions as $dimensions => $pageList ) {
+ $ranges = [];
+ $firstPage = $pageList[0];
+ $lastPage = $firstPage - 1;
+
+ foreach ( $pageList as $page ) {
+ if ( $page > $lastPage + 1 ) {
+ if ( $firstPage != $lastPage ) {
+ $ranges []=
"$firstPage-$lastPage";
+ } else {
+ $ranges []= "$firstPage";
+ }
+
+ $firstPage = $page;
+ }
+
+ $lastPage = $page;
+ }
+
+ if ( $firstPage != $lastPage ) {
+ $ranges []= "$firstPage-$lastPage";
+ } else{
+ $ranges []= "$firstPage";
+ }
+
+ $pageRangesByDimensions[ $dimensions ] = $ranges;
+ }
+
+ $dimensionsString = [];
+ foreach ( $pageRangesByDimensions as $dimensions => $pageRanges
) {
+ $dimensionsString []= "$dimensions:" . implode( ',',
$pageRanges );
+ }
+
+ return implode( '/', $dimensionsString );
+ }
+
+ /**
+ * Get useful response headers for GET/HEAD requests for a file with the
given metadata
+ * @param $metadata mixed Result this handlers getMetadata() for a file
+ * @return Array
+ */
+ public function getContentHeaders( $metadata ) {
+ wfSuppressWarnings();
+ $metadata = unserialize( $metadata );
+ wfRestoreWarnings();
+
+ if ( !isset( $metadata['width'] ) || !isset(
$metadata['height'] ) ) {
+ return [];
+ }
+
+ $dimensionString = $metadata['width'] . 'x' .
$metadata['height'];
+ $pagesByDimensions = [ $dimensionString => [ 1 ] ];
+ $pageRangesByDimensions =
MediaHandler::getPageRangesByDimensions( $pagesByDimensions );
+
+ return [ 'X-Content-Dimensions' => $pageRangesByDimensions ];
+ }
}
diff --git a/includes/media/PNGMetadataExtractor.php
b/includes/media/PNGMetadataExtractor.php
index d0517d7..75a91f5 100644
--- a/includes/media/PNGMetadataExtractor.php
+++ b/includes/media/PNGMetadataExtractor.php
@@ -40,7 +40,7 @@
/** @var array */
private static $textChunks;
- const VERSION = 1;
+ const VERSION = 2;
const MAX_CHUNK_SIZE = 3145728; // 3 megabytes
static function getMetadata( $filename ) {
@@ -121,6 +121,8 @@
if ( !$buf || strlen( $buf ) < $chunk_size ) {
throw new Exception( __METHOD__ . ":
Read error" );
}
+ $width = unpack( 'N', substr( $buf, 0, 4 ) )[1];
+ $height = unpack( 'N', substr( $buf, 4, 4 )
)[1];
$bitDepth = ord( substr( $buf, 8, 1 ) );
// Detect the color type in British English as
per the spec
// https://www.w3.org/TR/PNG/#11IHDR
@@ -404,6 +406,8 @@
'text' => $text,
'bitDepth' => $bitDepth,
'colorType' => $colorType,
+ 'width' => $width,
+ 'height' => $height,
];
}
diff --git a/includes/media/XCF.php b/includes/media/XCF.php
index c419524..bc1e2fb 100644
--- a/includes/media/XCF.php
+++ b/includes/media/XCF.php
@@ -175,6 +175,9 @@
$metadata['colorType'] = 'unknown';
}
+
+ $metadata['width'] = $header['width'];
+ $metadata['height'] = $header['height'];
} else {
// Marker to prevent repeated attempted extraction
$metadata['error'] = true;
diff --git a/maintenance/importImages.php b/maintenance/importImages.php
index 23bdb3f..ac07106 100644
--- a/maintenance/importImages.php
+++ b/maintenance/importImages.php
@@ -307,7 +307,7 @@
$publishOptions = [];
$handler = MediaHandler::getHandler(
$props['mime'] );
if ( $handler ) {
- $publishOptions['headers'] =
$handler->getStreamHeaders( $props['metadata'] );
+ $publishOptions['headers'] =
$handler->getContentHeaders( $props['metadata'] );
} else {
$publishOptions['headers'] = [];
}
diff --git a/maintenance/refreshFileHeaders.php
b/maintenance/refreshFileHeaders.php
index e075501..f922055 100644
--- a/maintenance/refreshFileHeaders.php
+++ b/maintenance/refreshFileHeaders.php
@@ -57,13 +57,13 @@
__METHOD__, [ 'LIMIT' => $this->mBatchSize,
'ORDER BY' => 'img_name ASC' ] );
foreach ( $res as $row ) {
$file = $repo->newFileFromRow( $row );
- $headers = $file->getStreamHeaders();
+ $headers = $file->getContentHeaders();
if ( count( $headers ) ) {
$this->updateFileHeaders( $file,
$headers );
}
// Do all of the older file versions...
foreach ( $file->getHistory() as $oldFile ) {
- $headers = $oldFile->getStreamHeaders();
+ $headers =
$oldFile->getContentHeaders();
if ( count( $headers ) ) {
$this->updateFileHeaders(
$oldFile, $headers );
}
diff --git a/tests/phpunit/includes/media/MediaHandlerTest.php
b/tests/phpunit/includes/media/MediaHandlerTest.php
index 7a052f6..4589fa5 100644
--- a/tests/phpunit/includes/media/MediaHandlerTest.php
+++ b/tests/phpunit/includes/media/MediaHandlerTest.php
@@ -65,4 +65,25 @@
}
return $result;
}
+
+ /**
+ * @covers MediaHandler::getPageRangesByDimensions
+ *
+ * @dataProvider provideTestGetPageRangesByDimensions
+ */
+ public function testGetPageRangesByDimensions( $pagesByDimensions,
$expected ) {
+ $this->assertEquals( $expected,
MediaHandler::getPageRangesByDimensions( $pagesByDimensions ) );
+ }
+
+ public static function provideTestGetPageRangesByDimensions() {
+ return [
+ [ [ '123x456' => [ 1 ] ], '123x456:1' ],
+ [ [ '123x456' => [ 1, 2 ] ], '123x456:1-2' ],
+ [ [ '123x456' => [ 1, 2, 3 ] ], '123x456:1-3' ],
+ [ [ '123x456' => [ 1, 2, 3, 5 ] ], '123x456:1-3,5' ],
+ [ [ '123x456' => [ 1, 3 ] ], '123x456:1,3' ],
+ [ [ '123x456' => [ 1, 2, 3, 5, 6, 7 ] ],
'123x456:1-3,5-7' ],
+ [ [ '123x456' => [ 1, 2, 3, 5, 6, 7 ], '789x789' => [
4, 8, 9 ] ], '123x456:1-3,5-7/789x789:4,8-9' ],
+ ];
+ }
}
--
To view, visit https://gerrit.wikimedia.org/r/350405
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ic4c6a86557b3705cf75d074753e9ce2ee070a6df
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Gilles <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits