MarkAHershberger has uploaded a new change for review. https://gerrit.wikimedia.org/r/307882
Change subject: Script to import images using ForeignAPIRepo ...................................................................... Script to import images using ForeignAPIRepo In some environments (e.g. Webservers behind firewalls), InstantCommons isn't available or isn't desired. Meanwhile, templates are still imported from Wikipedia and, without InstantCommons, there isn't an easy way to import the files associated with those templates. The script here will produce a list of wanted files and (as a separate step outside the firewall if necessary) download those images into a directory. The script can also import them directly to the wiki in a single step if desired. This patch also includes a small adjustment to ForeignAPIRepo for DRY. Change-Id: Ib3c2145be59c86b77bcec5fc8bcea657c6822c70 --- M includes/filerepo/ForeignAPIRepo.php A maintenance/importForeignFileList.php 2 files changed, 195 insertions(+), 16 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core refs/changes/82/307882/1 diff --git a/includes/filerepo/ForeignAPIRepo.php b/includes/filerepo/ForeignAPIRepo.php index 8619ba6..9f79700 100644 --- a/includes/filerepo/ForeignAPIRepo.php +++ b/includes/filerepo/ForeignAPIRepo.php @@ -253,6 +253,18 @@ return $ret; } + public function getImageData( $name, $width = -1, $height = -1, $otherParams = '', $etc = [] ) { + return $this->fetchImageQuery( + array_merge( [ + 'titles' => 'File:' . $name, + 'iiprop' => self::getIIProps(), + 'iiurlwidth' => $width, + 'iiurlheight' => $height, + 'iiurlparam' => $otherParams, + 'prop' => 'imageinfo' + ], $etc ) ); + } + /** * @param string $name * @param int $width @@ -263,13 +275,7 @@ * @return bool */ function getThumbUrl( $name, $width = -1, $height = -1, &$result = null, $otherParams = '' ) { - $data = $this->fetchImageQuery( [ - 'titles' => 'File:' . $name, - 'iiprop' => self::getIIProps(), - 'iiurlwidth' => $width, - 'iiurlheight' => $height, - 'iiurlparam' => $otherParams, - 'prop' => 'imageinfo' ] ); + $data = $this->getImageData( $name, $width, $height, $otherParams ); $info = $this->getImageInfo( $data ); if ( $data && $info && isset( $info['thumburl'] ) ) { @@ -292,15 +298,7 @@ * @since 1.22 */ function getThumbError( $name, $width = -1, $height = -1, $otherParams = '', $lang = null ) { - $data = $this->fetchImageQuery( [ - 'titles' => 'File:' . $name, - 'iiprop' => self::getIIProps(), - 'iiurlwidth' => $width, - 'iiurlheight' => $height, - 'iiurlparam' => $otherParams, - 'prop' => 'imageinfo', - 'uselang' => $lang, - ] ); + $data = $this->getImageData( $name, $width, $height, $otherParams, [ 'uselang' => $lang ] ); $info = $this->getImageInfo( $data ); if ( $data && $info && isset( $info['thumberror'] ) ) { diff --git a/maintenance/importForeignFileList.php b/maintenance/importForeignFileList.php new file mode 100644 index 0000000..2f55f57 --- /dev/null +++ b/maintenance/importForeignFileList.php @@ -0,0 +1,181 @@ +<?php +/** + * Import a list of files from a foreigh api repo + * + * @file + * @ingroup Maintenance + * @author Mark A. Hershberger + */ + +require_once __DIR__ . '/Maintenance.php'; +class ImportForeignFileList extends Maintenance { + protected $repo; + protected $forceOverwrite; + protected $onlyShowWanted = false; + + static protected $apiURL = 'https://commons.wikimedia.org/w/api.php'; + + public function __construct() { + parent::__construct(); + $this->mDescription = "Import foreign file list"; + + $this->addOption( 'apiUrl', 'URL to retrieve from (' .self::$apiURL. ')', + false, true, 'u' ); + $this->addOption( 'wantedFiles', 'Only print out wanted files', false, false, 'w' ); + $this->addOption( 'downloadDir', 'If provided, the files will only be downloaded to ' . + 'this directory and not imported.', false, true, 'd' ); + $this->addOption( 'force', 'Force overwriting existing files.', false, false, 'f' ); + + $this->addArg( 'list', 'The file to read the wanted list from. If ' . + 'not specified, uses the list from ' . + '[[Special:WantedFiles]].', false, true ); + } + + public function execute() { + $this->init(); + foreach ( $this->getFileList() as $item ) { + $file = $this->getFileName( $item ); + if ( $this->onlyShowWanted ) { + $this->output( "$file\n" ); + } else if ( $this->fileAlreadyExists( $file ) ) { + $this->output( "File exists, not overwriting without --force: $file\n" ); + } else if ( $file !== false ) { + $this->output( "Trying to retrieve: $file ... " ); + $info = $this->getImageInfo( $file ); + $fileName = $this->fetchFile( $file, $info ); + if ( $fileName ) { + $this->output( "success. " ); + if ( $this->importFile( $file, $fileName ) ) { + $this->output( "Successfully imported." ); + } + $this->output( "\n" ); + } else { + $this->output( "Not found on remote, skipping.\n" ); + } + } else { + $this->output( "Problem turning $item into proper filename" ); + } + } + } + + public function init() { + global $wgShowExceptionDetails, $wgForeignFileRepos, $wgUploadDirectory; + $wgShowExceptionDetails = true; + + $this->failed = 0; + $this->dir = $this->getOption( 'downloadDir' ); + $this->onlyShowWanted = $this->getOption( 'wantedFiles' ); + $this->forceOverwrite = $this->getOption( 'force' ) ? true : false; + if ( $this->dir && ! ( is_dir( $this->dir ) && is_readable( $this->dir ) ) ) { + $this->error( 'Directory (' . $this->dir . ') cannot be used. ' . + 'Make sure it exists and is writable.', 1 ); + } else if ( !$this->dir ) { + $this->dir = false; // php would eval null as false, so + // make sure we really have false. + } + $wgForeignFileRepos[] = [ + 'class' => 'ForeignAPIRepo', + 'name' => '_thisrepo', + 'backend' => '_thisrepo-backend', + 'hashLevels' => 2, + 'directory' => $wgUploadDirectory, // b/c (copied from Setup.php) + ]; + $this->repo = new \ForeignAPIRepo( + [ 'name' => 'iter', + 'apibase' => $this->getOption( 'apiUrl', self::$apiURL ), + 'backend' => '_thisrepo-backend' + ] + ); + } + + public function getFileList() { + $file = $this->getArg( 0 ); + if ( $file && is_readable( $file ) ) { + return explode("\n", file_get_contents( $file ) ); + } + + $files = new \WantedFilesPage(); + return $files->reallyDoQuery( false ); + } + + + public function getImageInfo( $name ) { + $data = $this->repo->getImageData( $name ); + return $this->repo->getImageInfo( $data ); + } + + public function fetchFile( $name, $info ) { + if ( isset( $info['url'] ) ) { + $content = Http::get( $info['url'], [ 'userAgent' => 'MW importImages cli' ] ); + if ( $content !== false ) { + $file = $this->dir . "/$name"; + if ( $this->dir === false ) { + $file = tempnam( wfTempDir(), "import" ); + } + $fh = fopen( $file, "w" ); + fwrite( $fh, $content ); + fclose( $fh ); + return $file; + } + } + return false; + } + + public function importFile( $file, $fileName ) { + if ( file_exists( $fileName ) && $this->dir === false ) { + // Copied from importImages.php + $props = FSFile::getPropsFromPath( $fileName ); + $flags = 0; + $publishOptions = []; + $handler = MediaHandler::getHandler( $props['mime'] ); + if ( $handler ) { + $publishOptions['headers'] = $handler->getStreamHeaders( $props['metadata'] ); + } else { + $publishOptions['headers'] = []; + } + $image = wfLocalFile( $file ); + $archive = $image->publish( $fileName, $flags, $publishOptions ); + if ( !$archive->isGood() ) { + $this->error( "failed. (" . $archive->getWikiText( false, false, 'en' ) . ")" ); + $this->failed++; + } else { + $summary = $commentText = "Imported via MW importImages CLI"; + $timestamp = false; + $image->recordUpload2( + $archive->value, + $summary, + $commentText, + $props, + $timestamp + ); + } + unlink( $fileName ); + } + return false; + } + + public function getFileName( $file ) { + if ( is_object( $file ) ) { + $file = $file->title; + } + if ( substr( $file, 0, 5 ) === "File:" ) { + $file = substr( $file, 5 ); + } + + return $file; + } + + public function fileAlreadyExists( $file ) { + if ( $this->dir && file_exists( $this->dir . "/$file" ) && !$this->forceOverwrite ) { + return true; + } else if ( !$this->dir ) { + $image = wfLocalFile( $file ); + return $image->exists(); + } + return false; + } + +} + +$maintClass = "ImportForeignFileList"; +require_once RUN_MAINTENANCE_IF_MAIN; -- To view, visit https://gerrit.wikimedia.org/r/307882 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ib3c2145be59c86b77bcec5fc8bcea657c6822c70 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/core Gerrit-Branch: master Gerrit-Owner: MarkAHershberger <m...@nichework.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits