http://www.mediawiki.org/wiki/Special:Code/MediaWiki/100535
Revision: 100535 Author: aaron Date: 2011-10-23 09:36:35 +0000 (Sun, 23 Oct 2011) Log Message: ----------- Added a basic thumb-handler.php file, configured via thumb.config.php. The code is based on the wmf thumb handler, but simplified. It is disabled by default. * The thumb.php parameter extraction can also be overridden by the config to handle more complex setups and things like OggHandler and PagedTiffHandler. * A simple 404 error page is also included. It can be overridden by the config. * Additional HTTP headers can be passed through cURL via the config. Added Paths: ----------- trunk/phase3/404.php trunk/phase3/thumb-handler.php trunk/phase3/thumb.config.sample Added: trunk/phase3/404.php =================================================================== --- trunk/phase3/404.php (rev 0) +++ trunk/phase3/404.php 2011-10-23 09:36:35 UTC (rev 100535) @@ -0,0 +1,29 @@ +<?php + +header( 'HTTP/1.1 404 Not Found' ); +header( 'Content-Type: text/html;charset=utf-8' ); + +# $_SERVER['REQUEST_URI'] has two different definitions depending on PHP version +if ( preg_match( '!^([a-z]*://)([a-z.]*)(/.*)$!', $_SERVER['REQUEST_URI'], $matches ) ) { + $prot = $matches[1]; + $serv = $matches[2]; + $loc = $matches[3]; +} else { + $prot = "http://"; + $serv = strlen( $_SERVER['HTTP_HOST'] ) ? $_SERVER['HTTP_HOST'] : $_SERVER['SERVER_NAME']; + $loc = $_SERVER["REQUEST_URI"]; +} +$encUrl = htmlspecialchars( $prot . $serv . $loc ); + +// Looks like a typical apache2 error +$standard_404 = <<<ENDTEXT +<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN"> +<html><head> +<title>404 Not Found</title> +</head><body> +<h1>Not Found</h1> +<p>The requested URL $encUrl was not found on this server.</p> +</body></html> +ENDTEXT; + +echo $standard_404; Property changes on: trunk/phase3/404.php ___________________________________________________________________ Added: svn:eol-style + native Added: trunk/phase3/thumb-handler.php =================================================================== --- trunk/phase3/thumb-handler.php (rev 0) +++ trunk/phase3/thumb-handler.php 2011-10-23 09:36:35 UTC (rev 100535) @@ -0,0 +1,230 @@ +<?php + +# Valid web server entry point +define( 'THUMB_HANDLER', true ); + +# Load thumb-handler configuration. We don't want to use +# WebStart.php or the like as it would kill performance. +$configPath = dirname( __FILE__ ) . "/thumb.config.php"; +if ( !file_exists( $configPath ) ) { + die( "Thumb-handler.php is not enabled for this wiki.\n" ); +} +require( $configPath ); + +function wfHandleThumb404() { + global $thgThumb404File; + + # lighttpd puts the original request in REQUEST_URI, while + # sjs sets that to the 404 handler, and puts the original + # request in REDIRECT_URL. + if ( isset( $_SERVER['REDIRECT_URL'] ) ) { + # The URL is un-encoded, so put it back how it was. + $uri = str_replace( "%2F", "/", urlencode( $_SERVER['REDIRECT_URL'] ) ); + } else { + $uri = $_SERVER['REQUEST_URI']; + } + + # Extract thumb.php params from the URI. + if ( function_exists( 'wfCustomExtractThumbParams' ) ) { + $params = wfCustomExtractThumbParams( $uri ); // overridden by configuration + } else { + $params = wfExtractThumbParams( $uri ); // basic wiki URL param extracting + } + if ( $params === null ) { // not a valid thumb request + header( 'X-Debug: no regex match' ); // useful for debugging + require_once( $thgThumb404File ); // standard 404 message + return; + } + + # Do some basic checks on the filename... + if ( preg_match( '/[\x80-\xff]/', $uri ) ) { + header( 'HTTP/1.0 400 Bad request' ); + header( 'Content-Type: text/html' ); + echo "<html><head><title>Bad request</title></head><body>" . + "The URI contained bytes with the high bit set, this is not allowed." . + "</body></html>"; + return; + } elseif ( strpos( $params['f'], '%20' ) !== false ) { + header( 'HTTP/1.0 404 Not found' ); + header( 'Content-Type: text/html' ); + header( 'X-Debug: filename contains a space' ); // useful for debugging + echo "<html><head><title>Not found</title></head><body>" . + "The URL contained spaces, we don't have any thumbnail files with spaces." . + "</body></html>"; + return; + } + + wfStreamThumbViaCurl( $params, $uri ); +} + +/** + * Extract the required params for thumb.php from the thumbnail request URI. + * At least 'width' and 'f' should be set if the result is an array. + * + * @param $uri String Thumbnail request URI + * @return Array|null associative params array or null + */ +function wfExtractThumbParams( $uri ) { + global $thgThumbServer, $thgThumbFragment, $thgThumbHashFragment; + + $thumbRegex = '!^(?:' . preg_quote( $thgThumbServer ) . ')?/' . + preg_quote( $thgThumbFragment ) . '(/archive|/temp|)/' . + $thgThumbHashFragment . '([^/]*)/' . '(page(\d*)-)*(\d*)px-([^/]*)$!'; + + # Is this a thumbnail? + if ( preg_match( $thumbRegex, $uri, $matches ) ) { + list( $all, $archOrTemp, $filename, $pagefull, $pagenum, $size, $fn2 ) = $matches; + $params = array( 'f' => $filename, 'width' => $size ); + if ( $pagenum ) { + $params['page'] = $pagenum; + } + if ( $archOrTemp == '/archive' ) { + $params['archived'] = 1; + } elseif ( $archOrTemp == '/temp' ) { + $params['temp'] = 1; + } + } else { + $params = null; + } + + return $params; +} + +/** + * cURL to thumb.php and stream back the resulting file or give an error message. + * + * @param $params Array Parameters to thumb.php + * @param $uri String Thumbnail request URI + * @return void + */ +function wfStreamThumbViaCurl( array $params, $uri ) { + global $thgThumbScriptPath, $thgThumbCurlProxy, $thgThumbCurlTimeout; + + if ( !function_exists( 'curl_init' ) ) { + header( 'HTTP/1.0 404 Not found' ); + header( 'Content-Type: text/html' ); + header( 'X-Debug: cURL is not enabled' ); // useful for debugging + echo "<html><head><title>Not found</title></head><body>" . + "cURL is not enabled for PHP on this wiki. Unable to send request thumb.php." . + "</body></html>"; + return; + } + + # Build up the request URL to use with CURL... + $reqURL = "{$thgThumbScriptPath}?"; + $first = true; + foreach ( $params as $name => $value ) { + if ( $first ) { + $first = false; + } else { + $reqURL .= '&'; + } + // Note: value is already urlencoded + $reqURL .= "$name=$value"; + } + + $ch = curl_init( $reqURL ); + if ( $thgThumbCurlProxy ) { + curl_setopt( $ch, CURLOPT_PROXY, $thgThumbCurlProxy ); + } + + $headers = array(); // HTTP headers + # Set certain headers... + $headers[] = "X-Original-URI: " . str_replace( "\n", '', $uri ); + if ( function_exists( 'wfCustomThumbRequestHeaders' ) ) { + wfCustomThumbRequestHeaders( $headers ); // add on any custom headers (like XFF) + } + # Pass through some other headers... + $passthrough = array( 'If-Modified-Since', 'Referer', 'User-Agent' ); + foreach ( $passthrough as $headerName ) { + $serverVarName = 'HTTP_' . str_replace( '-', '_', strtoupper( $headerName ) ); + if ( !empty( $_SERVER[$serverVarName] ) ) { + $headers[] = $headerName . ': ' . + str_replace( "\n", '', $_SERVER[$serverVarName] ); + } + } + + curl_setopt( $ch, CURLOPT_HTTPHEADER, $headers ); + curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); + curl_setopt( $ch, CURLOPT_TIMEOUT, $thgThumbCurlTimeout ); + + # Actually make the request + $text = curl_exec( $ch ); + + # Send it on to the client + $errno = curl_errno( $ch ); + $contentType = curl_getinfo( $ch, CURLINFO_CONTENT_TYPE ); + $httpCode = curl_getinfo( $ch, CURLINFO_HTTP_CODE ); + if ( $errno ) { + header( 'HTTP/1.1 500 Internal server error' ); + header( 'Cache-Control: no-cache' ); + list( $text, $contentType ) = wfCurlErrorText( $ch ); + } elseif ( $httpCode == 304 ) { + header( 'HTTP/1.1 304 Not modified' ); + $contentType = ''; + $text = ''; + } elseif ( strval( $text ) == '' ) { + header( 'HTTP/1.1 500 Internal server error' ); + header( 'Cache-Control: no-cache' ); + list( $text, $contentType ) = wfCurlEmptyText( $ch ); + } elseif ( $httpCode == 404 ) { + header( 'HTTP/1.1 404 Not found' ); + header( 'Cache-Control: s-maxage=300, must-revalidate, max-age=0' ); + } elseif ( $httpCode != 200 + || substr( $contentType, 0, 9 ) == 'text/html' + || substr( $text, 0, 5 ) == '<html' ) + { + # Error message, suppress cache + header( 'HTTP/1.1 500 Internal server error' ); + header( 'Cache-Control: no-cache' ); + } + + if ( !$contentType ) { + header( 'Content-Type:' ); + } else { + header( "Content-Type: $contentType" ); + } + + print $text; // thumb data or error text + + curl_close( $ch ); +} + +/** + * Get error message and content type for when the cURL response is empty. + * + * @param $ch cURL handle + * @return Array (error html, content type) + */ +function wfCurlErrorText( $ch ) { + $contentType = 'text/html'; + $error = htmlspecialchars( curl_error( $ch ) ); + $text = <<<EOT +<html> +<head><title>Thumbnail error</title></head> +<body>Error retrieving thumbnail from scaling server: $error</body> +</html> +EOT; + return array( $text, $contentType ); +} + +/** + * Get error message and content type for when the cURL response is an error. + * + * @param $ch cURL handle + * @return Array (error html, content type) + */ +function wfCurlEmptyText( $ch ) { + $contentType = 'text/html'; + $error = htmlspecialchars( curl_error( $ch ) ); + $text = <<<EOT +<html> +<head><title>Thumbnail error</title></head> +<body>Error retrieving thumbnail from scaling server: empty response</body> +</html> +EOT; + return array( $text, $contentType ); +} + +# Entry point +wfHandleThumb404(); Property changes on: trunk/phase3/thumb-handler.php ___________________________________________________________________ Added: svn:eol-style + native Added: trunk/phase3/thumb.config.sample =================================================================== --- trunk/phase3/thumb.config.sample (rev 0) +++ trunk/phase3/thumb.config.sample 2011-10-23 09:36:35 UTC (rev 100535) @@ -0,0 +1,39 @@ +<?php +/** + * @cond file_level_code + * This is not a valid entry point, perform no further processing unless THUMB_HANDLER is defined + */ +if ( !defined( 'THUMB_HANDLER' ) ) { + echo "This file is part of MediaWiki and is not a valid entry point\n"; + die( 1 ); +} + +/** + * Sample configuration file for thumb-handler.php. + * In order to use thumb-handler.php: + * 1) Copy this file to thumb.config.php and modify the settings. + * 2) The webserver must be setup to have thumb-handler.php as a 404 handler. + * This can be done in apache by editing .htaccess in the /thumb directory by adding: + * ErrorDocument 404 /path/to/thumb-handler.php + */ + +# URL name of the server (e.g. "upload.wikipedia.org"). +$thgThumbServer = "http://localhost"; +# URL fragment after the server name to the thumb directory +$thgThumbFragment = "MW_trunk/images/thumb"; +# URL regex fragment correspond to the directory hashing of thumbnails. +# This must correspond to $wgLocalFileRepo['hashLevels']. +$thgThumbHashFragment = '\w/\w\w/'; // 2-level directory hashing + +# The URL to thumb.php, accessible from the web server. +$thgThumbScriptPath = "http://localhost/MW_trunk/thumb.php"; + +# Timeout to use for cURL request to thumb.php. +# Leave it long enough to generate a ulimit timeout in ordinary +# cases, but short enough to avoid a local PHP timeout. +$thgThumbCurlTimeout = 53; +# Optional proxy server to use to access thumb.php +$thgThumbCurlProxy = null; // proxy to thumb.php + +# File path to a php file the gives a 404 error message +$thgThumb404File = "404.php"; _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs