Andrew-WMDE has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/382730 )
Change subject: [WIP] Support continuation in ApiDetailRetriever
......................................................................
[WIP] Support continuation in ApiDetailRetriever
Bug: T161014
Change-Id: I1781eda903f47fee3281aba2c3637ce01f180ba5
---
M src/Remote/MediaWiki/ApiDetailRetriever.php
1 file changed, 103 insertions(+), 33 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/FileImporter
refs/changes/30/382730/1
diff --git a/src/Remote/MediaWiki/ApiDetailRetriever.php
b/src/Remote/MediaWiki/ApiDetailRetriever.php
index ec7002e..9d86b55 100644
--- a/src/Remote/MediaWiki/ApiDetailRetriever.php
+++ b/src/Remote/MediaWiki/ApiDetailRetriever.php
@@ -77,15 +77,14 @@
}
/**
- * @param SourceUrl $sourceUrl
+ * @param string $apiUrl
+ * @param array $params
*
- * @return ImportDetails
+ * @return array
* @throws ImportException
*/
- public function getImportDetails( SourceUrl $sourceUrl ) {
- $apiUrl = $this->httpApiLookup->getApiUrl( $sourceUrl );
-
- $requestUrl = $apiUrl . '?' . http_build_query(
$this->getParams( $sourceUrl ) );
+ private function sendAPIRequest( $apiUrl, $params ) {
+ $requestUrl = $apiUrl . '?' . http_build_query( $params );
try {
$imageInfoRequest =
$this->httpRequestExecutor->execute( $requestUrl );
} catch ( HttpRequestException $e ) {
@@ -94,18 +93,20 @@
);
}
$requestData = json_decode( $imageInfoRequest->getContent(),
true );
+ return [ $requestUrl, $requestData ];
+ }
- if ( array_key_exists( 'continue', $requestData ) ) {
- $this->logger->warning(
- 'API returned continue data',
- [
- 'sourceUrl' => $sourceUrl->getUrl(),
- 'requestUrl' => $requestUrl,
- ]
- );
- // TODO support continuation
- throw new LocalizedImportException(
'fileimporter-api-toomanyrevisions' );
- }
+ /**
+ * @param SourceUrl $sourceUrl
+ *
+ * @return ImportDetails
+ * @throws ImportException
+ */
+ public function getImportDetails( SourceUrl $sourceUrl ) {
+ $apiUrl = $this->httpApiLookup->getApiUrl( $sourceUrl );
+ $params = $this->getParams( $sourceUrl );
+
+ list($requestUrl, $requestData) = $this->sendAPIRequest(
$apiUrl, $params );
if ( count( $requestData['query']['pages'] ) !== 1 ) {
$this->logger->warning(
@@ -145,6 +146,48 @@
]
);
throw new LocalizedImportException(
'fileimporter-api-badinfo' );
+ }
+
+ $maxRev = 1000;
+ $textRevCount = count( $pageInfoData['revisions'] );
+ $fileRevCount = count( $pageInfoData['imageinfo'] );
+
+ while ( array_key_exists( 'continue', $requestData ) ) {
+ $rvContinue = array_key_exists( 'rvcontinue',
$requestData['continue'] ) ?
+ $requestData['continue']['rvcontinue'] : null;
+
+ $iiContinue = array_key_exists( 'iistart',
$requestData['continue'] ) ?
+ $requestData['continue']['iistart'] : null;
+
+ $params = $this->getParams( $sourceUrl, [ $rvContinue,
$iiContinue ] );
+
+ list( $requestUrl, $requestData ) =
$this->sendAPIRequest( $apiUrl, $params );
+
+ $newPageInfoData = array_pop(
$requestData['query']['pages'] );
+
+ if ( array_key_exists( 'revisions', $newPageInfoData )
) {
+ $textRevCount += count(
$newPageInfoData['revisions'] );
+ $pageInfoData['revisions'] =
+ array_merge(
$pageInfoData['revisions'], $newPageInfoData['revisions'] );
+ }
+
+ if ( array_key_exists( 'imageinfo', $newPageInfoData )
) {
+ $fileRevCount += count(
$newPageInfoData['imageinfo'] );
+ $pageInfoData['imageinfo'] =
+ array_merge(
$pageInfoData['imageinfo'], $newPageInfoData['imageinfo'] );
+ }
+
+ if ( $textRevCount > $maxRev || $fileRevCount > $maxRev
) {
+ $this->logger->warning(
+ 'Too many revisions were being fetched',
+ [
+ 'sourceUrl' =>
$sourceUrl->getUrl(),
+ 'requestUrl' => $requestUrl,
+ ]
+ );
+
+ throw new LocalizedImportException(
'fileimporter-api-toomanyrevisions' );
+ }
}
$imageInfoData = $pageInfoData['imageinfo'];
@@ -260,14 +303,31 @@
return new TextRevisions( $revisions );
}
- private function getParams( SourceUrl $sourceUrl ) {
- return [
+ private function getParams( SourceUrl $sourceUrl, $params=null ) {
+ $base = [
'action' => 'query',
'format' => 'json',
- 'prop' => 'imageinfo|revisions',
'titles' => $this->getTitleFromSourceUrl( $sourceUrl ),
- 'iilimit' => '500',
+ ];
+
+ $rv = [
'rvlimit' => '500',
+ 'rvprop' => implode(
+ '|',
+ [
+ 'flags',
+ 'timestamp',
+ 'user',
+ 'sha1',
+ 'contentmodel',
+ 'comment',
+ 'content',
+ ]
+ ),
+ ];
+
+ $ii = [
+ 'iilimit' => '500',
'iiurlwidth' => '800',
'iiurlheight' => '400',
'iiprop' => implode(
@@ -283,19 +343,29 @@
'sha1',
]
),
- 'rvprop' => implode(
- '|',
- [
- 'flags',
- 'timestamp',
- 'user',
- 'sha1',
- 'contentmodel',
- 'comment',
- 'content',
- ]
- ),
];
+
+ if ( !$params ) {
+ return $base + [ 'prop' => 'imageinfo|revisions', ] +
$ii + $rv;
+ }
+
+ list( $rvContinue, $iiContinue ) = $params;
+
+ $base += [ 'prop' => '', ];
+
+ if ( $rvContinue ) {
+ $base['prop'] = "revisions";
+ $base['rvcontinue'] = $rvContinue;
+ $base += $rv;
+ }
+
+ if ( $iiContinue ) {
+ $base['prop'] .= ( $base['prop'] ) ? "|imageinfo" :
"imageinfo";
+ $base['iistart'] = $iiContinue;
+ $base += $ii;
+ }
+
+ return $base;
}
}
--
To view, visit https://gerrit.wikimedia.org/r/382730
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I1781eda903f47fee3281aba2c3637ce01f180ba5
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/FileImporter
Gerrit-Branch: master
Gerrit-Owner: Andrew-WMDE <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits