MarkAHershberger has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/307882

Change subject: Script to import images using ForeignAPIRepo
......................................................................

Script to import images using ForeignAPIRepo

In some environments (e.g. Webservers behind firewalls), InstantCommons
isn't available or isn't desired.  Meanwhile, templates are still
imported from Wikipedia and, without InstantCommons, there isn't an easy
way to import the files associated with those templates.

The script here will produce a list of wanted files and (as a separate
step outside the firewall if necessary) download those images into a
directory.  The script can also import them directly to the wiki in a
single step if desired.

This patch also includes a small adjustment to ForeignAPIRepo for DRY.

Change-Id: Ib3c2145be59c86b77bcec5fc8bcea657c6822c70
---
M includes/filerepo/ForeignAPIRepo.php
A maintenance/importForeignFileList.php
2 files changed, 195 insertions(+), 16 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/82/307882/1

diff --git a/includes/filerepo/ForeignAPIRepo.php 
b/includes/filerepo/ForeignAPIRepo.php
index 8619ba6..9f79700 100644
--- a/includes/filerepo/ForeignAPIRepo.php
+++ b/includes/filerepo/ForeignAPIRepo.php
@@ -253,6 +253,18 @@
                return $ret;
        }
 
+    public function getImageData( $name, $width = -1, $height = -1, 
$otherParams = '', $etc = [] ) {
+        return $this->fetchImageQuery(
+            array_merge( [
+                'titles' => 'File:' . $name,
+                'iiprop' => self::getIIProps(),
+                'iiurlwidth' => $width,
+                'iiurlheight' => $height,
+                'iiurlparam' => $otherParams,
+                'prop' => 'imageinfo'
+            ], $etc ) );
+    }
+
        /**
         * @param string $name
         * @param int $width
@@ -263,13 +275,7 @@
         * @return bool
         */
        function getThumbUrl( $name, $width = -1, $height = -1, &$result = 
null, $otherParams = '' ) {
-               $data = $this->fetchImageQuery( [
-                       'titles' => 'File:' . $name,
-                       'iiprop' => self::getIIProps(),
-                       'iiurlwidth' => $width,
-                       'iiurlheight' => $height,
-                       'iiurlparam' => $otherParams,
-                       'prop' => 'imageinfo' ] );
+               $data = $this->getImageData( $name, $width, $height, 
$otherParams );
                $info = $this->getImageInfo( $data );
 
                if ( $data && $info && isset( $info['thumburl'] ) ) {
@@ -292,15 +298,7 @@
         * @since 1.22
         */
        function getThumbError( $name, $width = -1, $height = -1, $otherParams 
= '', $lang = null ) {
-               $data = $this->fetchImageQuery( [
-                       'titles' => 'File:' . $name,
-                       'iiprop' => self::getIIProps(),
-                       'iiurlwidth' => $width,
-                       'iiurlheight' => $height,
-                       'iiurlparam' => $otherParams,
-                       'prop' => 'imageinfo',
-                       'uselang' => $lang,
-               ] );
+               $data = $this->getImageData( $name, $width, $height, 
$otherParams, [ 'uselang' => $lang ] );
                $info = $this->getImageInfo( $data );
 
                if ( $data && $info && isset( $info['thumberror'] ) ) {
diff --git a/maintenance/importForeignFileList.php 
b/maintenance/importForeignFileList.php
new file mode 100644
index 0000000..2f55f57
--- /dev/null
+++ b/maintenance/importForeignFileList.php
@@ -0,0 +1,181 @@
+<?php
+/**
+ * Import a list of files from a foreigh api repo
+ *
+ * @file
+ * @ingroup Maintenance
+ * @author Mark A. Hershberger
+ */
+
+require_once __DIR__ . '/Maintenance.php';
+class ImportForeignFileList extends Maintenance {
+       protected $repo;
+       protected $forceOverwrite;
+       protected $onlyShowWanted = false;
+
+       static protected $apiURL = 'https://commons.wikimedia.org/w/api.php';
+
+       public function __construct() {
+               parent::__construct();
+               $this->mDescription = "Import foreign file list";
+
+               $this->addOption( 'apiUrl', 'URL to retrieve from (' 
.self::$apiURL. ')',
+                                                 false, true, 'u' );
+               $this->addOption( 'wantedFiles', 'Only print out wanted files', 
false, false, 'w' );
+               $this->addOption( 'downloadDir', 'If provided, the files will 
only be downloaded to ' .
+                                                 'this directory and not 
imported.', false, true, 'd' );
+               $this->addOption( 'force', 'Force overwriting existing files.', 
false, false, 'f' );
+
+               $this->addArg( 'list', 'The file to read the wanted list from.  
If ' .
+                                          'not specified, uses the list from ' 
.
+                                          '[[Special:WantedFiles]].', false, 
true );
+       }
+
+       public function execute() {
+               $this->init();
+               foreach ( $this->getFileList() as $item ) {
+                       $file = $this->getFileName( $item );
+                       if ( $this->onlyShowWanted ) {
+                               $this->output( "$file\n" );
+                       } else if ( $this->fileAlreadyExists( $file ) ) {
+                               $this->output( "File exists, not overwriting 
without --force: $file\n" );
+                       } else if ( $file !== false ) {
+                               $this->output( "Trying to retrieve: $file ... " 
);
+                               $info = $this->getImageInfo( $file );
+                               $fileName = $this->fetchFile( $file, $info );
+                               if ( $fileName ) {
+                                       $this->output( "success. " );
+                                       if ( $this->importFile( $file, 
$fileName ) ) {
+                                               $this->output( "Successfully 
imported." );
+                                       }
+                                       $this->output( "\n" );
+                               } else {
+                                       $this->output( "Not found on remote, 
skipping.\n" );
+                               }
+                       } else {
+                               $this->output( "Problem turning $item into 
proper filename" );
+                       }
+               }
+       }
+
+       public function init() {
+               global $wgShowExceptionDetails, $wgForeignFileRepos, 
$wgUploadDirectory;
+               $wgShowExceptionDetails = true;
+
+               $this->failed = 0;
+               $this->dir = $this->getOption( 'downloadDir' );
+               $this->onlyShowWanted = $this->getOption( 'wantedFiles' );
+               $this->forceOverwrite = $this->getOption( 'force' ) ? true : 
false;
+               if ( $this->dir && ! ( is_dir( $this->dir ) && is_readable( 
$this->dir ) ) ) {
+                       $this->error( 'Directory (' . $this->dir . ') cannot be 
used. ' .
+                                                 'Make sure it exists and is 
writable.', 1 );
+               } else if ( !$this->dir ) {
+                       $this->dir = false; // php would eval null as false, so
+                                                               // make sure we 
really have false.
+               }
+               $wgForeignFileRepos[] = [
+                       'class' => 'ForeignAPIRepo',
+                       'name' => '_thisrepo',
+                       'backend' => '_thisrepo-backend',
+                       'hashLevels' => 2,
+                       'directory' => $wgUploadDirectory, // b/c (copied from 
Setup.php)
+               ];
+               $this->repo = new \ForeignAPIRepo(
+                       [ 'name' => 'iter',
+                         'apibase' => $this->getOption( 'apiUrl', 
self::$apiURL ),
+                         'backend' => '_thisrepo-backend'
+                       ]
+               );
+       }
+
+       public function getFileList() {
+               $file = $this->getArg( 0 );
+               if ( $file && is_readable( $file ) ) {
+                       return explode("\n", file_get_contents( $file ) );
+               }
+
+               $files = new \WantedFilesPage();
+               return $files->reallyDoQuery( false );
+       }
+
+
+       public function getImageInfo( $name ) {
+               $data = $this->repo->getImageData( $name );
+               return $this->repo->getImageInfo( $data );
+       }
+
+       public function fetchFile( $name, $info ) {
+               if ( isset( $info['url'] ) ) {
+                       $content = Http::get( $info['url'], [ 'userAgent' => 
'MW importImages cli' ] );
+                       if ( $content !== false ) {
+                               $file = $this->dir . "/$name";
+                               if ( $this->dir === false ) {
+                                       $file = tempnam( wfTempDir(), "import" 
);
+                               }
+                               $fh = fopen( $file, "w" );
+                               fwrite( $fh, $content );
+                               fclose( $fh );
+                               return $file;
+                       }
+               }
+               return false;
+       }
+
+       public function importFile( $file, $fileName ) {
+               if ( file_exists( $fileName ) && $this->dir === false ) {
+                       // Copied from importImages.php
+                       $props = FSFile::getPropsFromPath( $fileName );
+                       $flags = 0;
+                       $publishOptions = [];
+                       $handler = MediaHandler::getHandler( $props['mime'] );
+                       if ( $handler ) {
+                               $publishOptions['headers'] = 
$handler->getStreamHeaders( $props['metadata'] );
+                       } else {
+                               $publishOptions['headers'] = [];
+                       }
+                       $image = wfLocalFile( $file );
+                       $archive = $image->publish( $fileName, $flags, 
$publishOptions );
+                       if ( !$archive->isGood() ) {
+                               $this->error( "failed. (" . 
$archive->getWikiText( false, false, 'en' ) . ")" );
+                               $this->failed++;
+                       } else {
+                               $summary = $commentText = "Imported via MW 
importImages CLI";
+                               $timestamp = false;
+                               $image->recordUpload2(
+                                       $archive->value,
+                                       $summary,
+                                       $commentText,
+                                       $props,
+                                       $timestamp
+                               );
+                       }
+                       unlink( $fileName );
+               }
+               return false;
+       }
+
+       public function getFileName( $file ) {
+               if ( is_object( $file ) ) {
+                       $file = $file->title;
+               }
+               if ( substr( $file, 0, 5 ) === "File:" ) {
+                       $file = substr( $file, 5 );
+               }
+
+               return $file;
+       }
+
+       public function fileAlreadyExists( $file ) {
+               if ( $this->dir && file_exists( $this->dir . "/$file" ) && 
!$this->forceOverwrite ) {
+                       return true;
+               } else if ( !$this->dir ) {
+                       $image = wfLocalFile( $file );
+                       return $image->exists();
+               }
+               return false;
+       }
+
+}
+
+$maintClass = "ImportForeignFileList";
+require_once RUN_MAINTENANCE_IF_MAIN;

-- 
To view, visit https://gerrit.wikimedia.org/r/307882
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib3c2145be59c86b77bcec5fc8bcea657c6822c70
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: MarkAHershberger <m...@nichework.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to