Andrew-WMDE has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/382730 )

Change subject: [WIP] Support continuation in ApiDetailRetriever
......................................................................

[WIP] Support continuation in ApiDetailRetriever

Bug: T161014
Change-Id: I1781eda903f47fee3281aba2c3637ce01f180ba5
---
M src/Remote/MediaWiki/ApiDetailRetriever.php
1 file changed, 103 insertions(+), 33 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/FileImporter 
refs/changes/30/382730/1

diff --git a/src/Remote/MediaWiki/ApiDetailRetriever.php 
b/src/Remote/MediaWiki/ApiDetailRetriever.php
index ec7002e..9d86b55 100644
--- a/src/Remote/MediaWiki/ApiDetailRetriever.php
+++ b/src/Remote/MediaWiki/ApiDetailRetriever.php
@@ -77,15 +77,14 @@
        }
 
        /**
-        * @param SourceUrl $sourceUrl
+        * @param string $apiUrl
+        * @param array $params
         *
-        * @return ImportDetails
+        * @return array
         * @throws ImportException
         */
-       public function getImportDetails( SourceUrl $sourceUrl ) {
-               $apiUrl = $this->httpApiLookup->getApiUrl( $sourceUrl );
-
-               $requestUrl = $apiUrl . '?' . http_build_query( 
$this->getParams( $sourceUrl ) );
+       private function sendAPIRequest( $apiUrl, $params ) {
+               $requestUrl = $apiUrl . '?' . http_build_query( $params );
                try {
                        $imageInfoRequest = 
$this->httpRequestExecutor->execute( $requestUrl );
                } catch ( HttpRequestException $e ) {
@@ -94,18 +93,20 @@
                        );
                }
                $requestData = json_decode( $imageInfoRequest->getContent(), 
true );
+               return [ $requestUrl, $requestData ];
+       }
 
-               if ( array_key_exists( 'continue', $requestData ) ) {
-                       $this->logger->warning(
-                               'API returned continue data',
-                               [
-                                       'sourceUrl' => $sourceUrl->getUrl(),
-                                       'requestUrl' => $requestUrl,
-                               ]
-                       );
-                       // TODO support continuation
-                       throw new LocalizedImportException( 
'fileimporter-api-toomanyrevisions' );
-               }
+       /**
+        * @param SourceUrl $sourceUrl
+        *
+        * @return ImportDetails
+        * @throws ImportException
+        */
+       public function getImportDetails( SourceUrl $sourceUrl ) {
+               $apiUrl = $this->httpApiLookup->getApiUrl( $sourceUrl );
+               $params = $this->getParams( $sourceUrl );
+
+               list($requestUrl, $requestData) = $this->sendAPIRequest( 
$apiUrl, $params );
 
                if ( count( $requestData['query']['pages'] ) !== 1 ) {
                        $this->logger->warning(
@@ -145,6 +146,48 @@
                                ]
                        );
                        throw new LocalizedImportException( 
'fileimporter-api-badinfo' );
+               }
+
+               $maxRev = 1000;
+               $textRevCount = count( $pageInfoData['revisions'] );
+               $fileRevCount = count( $pageInfoData['imageinfo'] );
+
+               while ( array_key_exists( 'continue', $requestData ) ) {
+                       $rvContinue = array_key_exists( 'rvcontinue', 
$requestData['continue'] ) ?
+                               $requestData['continue']['rvcontinue'] : null;
+
+                       $iiContinue = array_key_exists( 'iistart', 
$requestData['continue'] ) ?
+                               $requestData['continue']['iistart'] : null;
+
+                       $params = $this->getParams( $sourceUrl, [ $rvContinue, 
$iiContinue ] );
+
+                       list( $requestUrl, $requestData ) = 
$this->sendAPIRequest( $apiUrl, $params );
+
+                       $newPageInfoData = array_pop( 
$requestData['query']['pages'] );
+
+                       if ( array_key_exists( 'revisions', $newPageInfoData ) 
) {
+                               $textRevCount += count( 
$newPageInfoData['revisions'] );
+                               $pageInfoData['revisions'] =
+                                       array_merge( 
$pageInfoData['revisions'], $newPageInfoData['revisions'] );
+                       }
+
+                       if ( array_key_exists( 'imageinfo', $newPageInfoData ) 
) {
+                               $fileRevCount += count( 
$newPageInfoData['imageinfo'] );
+                               $pageInfoData['imageinfo'] =
+                                       array_merge( 
$pageInfoData['imageinfo'], $newPageInfoData['imageinfo'] );
+                       }
+
+                       if ( $textRevCount > $maxRev || $fileRevCount > $maxRev 
) {
+                               $this->logger->warning(
+                                       'Too many revisions were being fetched',
+                                       [
+                                               'sourceUrl' => 
$sourceUrl->getUrl(),
+                                               'requestUrl' => $requestUrl,
+                                       ]
+                               );
+
+                               throw new LocalizedImportException( 
'fileimporter-api-toomanyrevisions' );
+                       }
                }
 
                $imageInfoData = $pageInfoData['imageinfo'];
@@ -260,14 +303,31 @@
                return new TextRevisions( $revisions );
        }
 
-       private function getParams( SourceUrl $sourceUrl ) {
-               return [
+       private function getParams( SourceUrl $sourceUrl, $params=null ) {
+               $base = [
                        'action' => 'query',
                        'format' => 'json',
-                       'prop' => 'imageinfo|revisions',
                        'titles' => $this->getTitleFromSourceUrl( $sourceUrl ),
-                       'iilimit' => '500',
+               ];
+
+               $rv = [
                        'rvlimit' => '500',
+                       'rvprop' => implode(
+                               '|',
+                               [
+                                       'flags',
+                                       'timestamp',
+                                       'user',
+                                       'sha1',
+                                       'contentmodel',
+                                       'comment',
+                                       'content',
+                               ]
+                       ),
+               ];
+
+               $ii = [
+                       'iilimit' => '500',
                        'iiurlwidth' => '800',
                        'iiurlheight' => '400',
                        'iiprop' => implode(
@@ -283,19 +343,29 @@
                                        'sha1',
                                ]
                        ),
-                       'rvprop' => implode(
-                               '|',
-                               [
-                                       'flags',
-                                       'timestamp',
-                                       'user',
-                                       'sha1',
-                                       'contentmodel',
-                                       'comment',
-                                       'content',
-                               ]
-                       ),
                ];
+
+               if ( !$params ) {
+                       return $base + [ 'prop' => 'imageinfo|revisions', ] + 
$ii + $rv;
+               }
+
+               list( $rvContinue, $iiContinue ) = $params;
+
+               $base += [ 'prop' => '', ];
+
+               if ( $rvContinue ) {
+                       $base['prop'] = "revisions";
+                       $base['rvcontinue'] = $rvContinue;
+                       $base += $rv;
+               }
+
+               if ( $iiContinue ) {
+                       $base['prop'] .= ( $base['prop'] ) ? "|imageinfo" : 
"imageinfo";
+                       $base['iistart'] = $iiContinue;
+                       $base += $ii;
+               }
+
+               return $base;
        }
 
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/382730
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I1781eda903f47fee3281aba2c3637ce01f180ba5
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/FileImporter
Gerrit-Branch: master
Gerrit-Owner: Andrew-WMDE <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to