EBernhardson has uploaded a new change for review. https://gerrit.wikimedia.org/r/192744
Change subject: Convert LQT import to one class per file ...................................................................... Convert LQT import to one class per file Change-Id: Iae5f6ecac74b4a37abf14fe32824b34c9c92b655 --- M autoload.php A includes/Import/LiquidThreadsApi/ApiBackend.php A includes/Import/LiquidThreadsApi/CachedApiData.php M includes/Import/LiquidThreadsApi/CachedData.php A includes/Import/LiquidThreadsApi/CachedPageData.php A includes/Import/LiquidThreadsApi/CachedThreadData.php A includes/Import/LiquidThreadsApi/ImportHeader.php A includes/Import/LiquidThreadsApi/ImportPost.php A includes/Import/LiquidThreadsApi/ImportRevision.php A includes/Import/LiquidThreadsApi/ImportSource.php A includes/Import/LiquidThreadsApi/ImportSummary.php A includes/Import/LiquidThreadsApi/ImportTopic.php D includes/Import/LiquidThreadsApi/Iterators.php A includes/Import/LiquidThreadsApi/LocalApiBackend.php A includes/Import/LiquidThreadsApi/MovedImportPost.php A includes/Import/LiquidThreadsApi/MovedImportRevision.php A includes/Import/LiquidThreadsApi/MovedImportTopic.php D includes/Import/LiquidThreadsApi/Objects.php A includes/Import/LiquidThreadsApi/PageRevisionedObject.php A includes/Import/LiquidThreadsApi/RemoteApiBackend.php A includes/Import/LiquidThreadsApi/ReplyIterator.php A includes/Import/LiquidThreadsApi/RevisionIterator.php A includes/Import/LiquidThreadsApi/ScriptedImportRevision.php D includes/Import/LiquidThreadsApi/Source.php A includes/Import/LiquidThreadsApi/TopicIterator.php 25 files changed, 1,480 insertions(+), 1,225 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Flow refs/changes/44/192744/1 diff --git a/autoload.php b/autoload.php index 7170dde..ca19f68 100644 --- a/autoload.php +++ b/autoload.php @@ -24,7 +24,7 @@ 'ApiFlowViewTopicSummary' => __DIR__ . '/includes/api/ApiFlowViewTopicSummary.php', 'ApiParsoidUtilsFlow' => __DIR__ . '/includes/api/ApiParsoidUtilsFlow.php', 'ApiQueryPropFlowInfo' => __DIR__ . '/includes/api/ApiQueryPropFlowInfo.php', - 'ComposerAutoloaderInit6dec29097855247b3d86a90060e1e364' => __DIR__ . '/vendor/composer/autoload_real.php', + 'ComposerAutoloaderInit43ee8fafd8706e266eea3204b1f3f979' => __DIR__ . '/vendor/composer/autoload_real.php', 'Composer\\Autoload\\ClassLoader' => __DIR__ . '/vendor/composer/ClassLoader.php', 'FlowHooks' => __DIR__ . '/Hooks.php', 'Flow\\Actions\\CompareHeaderRevisionsAction' => __DIR__ . '/includes/Actions/CompareHeaderRevisionsAction.php', @@ -188,29 +188,29 @@ 'Flow\\Import\\ImportSourceStore' => __DIR__ . '/includes/Import/ImportSourceStore.php', 'Flow\\Import\\ImportSourceStoreException' => __DIR__ . '/includes/Import/Exception.php', 'Flow\\Import\\Importer' => __DIR__ . '/includes/Import/Importer.php', - 'Flow\\Import\\LiquidThreadsApi\\ApiBackend' => __DIR__ . '/includes/Import/LiquidThreadsApi/Source.php', + 'Flow\\Import\\LiquidThreadsApi\\ApiBackend' => __DIR__ . '/includes/Import/LiquidThreadsApi/ApiBackend.php', 'Flow\\Import\\LiquidThreadsApi\\ApiNotFoundException' => __DIR__ . '/includes/Import/LiquidThreadsApi/Exception.php', - 'Flow\\Import\\LiquidThreadsApi\\CachedApiData' => __DIR__ . '/includes/Import/LiquidThreadsApi/CachedData.php', + 'Flow\\Import\\LiquidThreadsApi\\CachedApiData' => __DIR__ . '/includes/Import/LiquidThreadsApi/CachedApiData.php', 'Flow\\Import\\LiquidThreadsApi\\CachedData' => __DIR__ . '/includes/Import/LiquidThreadsApi/CachedData.php', - 'Flow\\Import\\LiquidThreadsApi\\CachedPageData' => __DIR__ . '/includes/Import/LiquidThreadsApi/CachedData.php', - 'Flow\\Import\\LiquidThreadsApi\\CachedThreadData' => __DIR__ . '/includes/Import/LiquidThreadsApi/CachedData.php', + 'Flow\\Import\\LiquidThreadsApi\\CachedPageData' => __DIR__ . '/includes/Import/LiquidThreadsApi/CachedPageData.php', + 'Flow\\Import\\LiquidThreadsApi\\CachedThreadData' => __DIR__ . '/includes/Import/LiquidThreadsApi/CachedThreadData.php', 'Flow\\Import\\LiquidThreadsApi\\ConversionStrategy' => __DIR__ . '/includes/Import/LiquidThreadsApi/ConversionStrategy.php', - 'Flow\\Import\\LiquidThreadsApi\\ImportHeader' => __DIR__ . '/includes/Import/LiquidThreadsApi/Objects.php', - 'Flow\\Import\\LiquidThreadsApi\\ImportPost' => __DIR__ . '/includes/Import/LiquidThreadsApi/Objects.php', - 'Flow\\Import\\LiquidThreadsApi\\ImportRevision' => __DIR__ . '/includes/Import/LiquidThreadsApi/Objects.php', - 'Flow\\Import\\LiquidThreadsApi\\ImportSource' => __DIR__ . '/includes/Import/LiquidThreadsApi/Source.php', - 'Flow\\Import\\LiquidThreadsApi\\ImportSummary' => __DIR__ . '/includes/Import/LiquidThreadsApi/Objects.php', - 'Flow\\Import\\LiquidThreadsApi\\ImportTopic' => __DIR__ . '/includes/Import/LiquidThreadsApi/Objects.php', - 'Flow\\Import\\LiquidThreadsApi\\LocalApiBackend' => __DIR__ . '/includes/Import/LiquidThreadsApi/Source.php', - 'Flow\\Import\\LiquidThreadsApi\\MovedImportPost' => __DIR__ . '/includes/Import/LiquidThreadsApi/Objects.php', - 'Flow\\Import\\LiquidThreadsApi\\MovedImportRevision' => __DIR__ . '/includes/Import/LiquidThreadsApi/Objects.php', - 'Flow\\Import\\LiquidThreadsApi\\MovedImportTopic' => __DIR__ . '/includes/Import/LiquidThreadsApi/Objects.php', - 'Flow\\Import\\LiquidThreadsApi\\PageRevisionedObject' => __DIR__ . '/includes/Import/LiquidThreadsApi/Objects.php', - 'Flow\\Import\\LiquidThreadsApi\\RemoteApiBackend' => __DIR__ . '/includes/Import/LiquidThreadsApi/Source.php', - 'Flow\\Import\\LiquidThreadsApi\\ReplyIterator' => __DIR__ . '/includes/Import/LiquidThreadsApi/Iterators.php', - 'Flow\\Import\\LiquidThreadsApi\\RevisionIterator' => __DIR__ . '/includes/Import/LiquidThreadsApi/Iterators.php', - 'Flow\\Import\\LiquidThreadsApi\\ScriptedImportRevision' => __DIR__ . '/includes/Import/LiquidThreadsApi/Objects.php', - 'Flow\\Import\\LiquidThreadsApi\\TopicIterator' => __DIR__ . '/includes/Import/LiquidThreadsApi/Iterators.php', + 'Flow\\Import\\LiquidThreadsApi\\ImportHeader' => __DIR__ . '/includes/Import/LiquidThreadsApi/ImportHeader.php', + 'Flow\\Import\\LiquidThreadsApi\\ImportPost' => __DIR__ . '/includes/Import/LiquidThreadsApi/ImportPost.php', + 'Flow\\Import\\LiquidThreadsApi\\ImportRevision' => __DIR__ . '/includes/Import/LiquidThreadsApi/ImportRevision.php', + 'Flow\\Import\\LiquidThreadsApi\\ImportSource' => __DIR__ . '/includes/Import/LiquidThreadsApi/ImportSource.php', + 'Flow\\Import\\LiquidThreadsApi\\ImportSummary' => __DIR__ . '/includes/Import/LiquidThreadsApi/ImportSummary.php', + 'Flow\\Import\\LiquidThreadsApi\\ImportTopic' => __DIR__ . '/includes/Import/LiquidThreadsApi/ImportTopic.php', + 'Flow\\Import\\LiquidThreadsApi\\LocalApiBackend' => __DIR__ . '/includes/Import/LiquidThreadsApi/LocalApiBackend.php', + 'Flow\\Import\\LiquidThreadsApi\\MovedImportPost' => __DIR__ . '/includes/Import/LiquidThreadsApi/MovedImportPost.php', + 'Flow\\Import\\LiquidThreadsApi\\MovedImportRevision' => __DIR__ . '/includes/Import/LiquidThreadsApi/MovedImportRevision.php', + 'Flow\\Import\\LiquidThreadsApi\\MovedImportTopic' => __DIR__ . '/includes/Import/LiquidThreadsApi/MovedImportTopic.php', + 'Flow\\Import\\LiquidThreadsApi\\PageRevisionedObject' => __DIR__ . '/includes/Import/LiquidThreadsApi/PageRevisionedObject.php', + 'Flow\\Import\\LiquidThreadsApi\\RemoteApiBackend' => __DIR__ . '/includes/Import/LiquidThreadsApi/RemoteApiBackend.php', + 'Flow\\Import\\LiquidThreadsApi\\ReplyIterator' => __DIR__ . '/includes/Import/LiquidThreadsApi/ReplyIterator.php', + 'Flow\\Import\\LiquidThreadsApi\\RevisionIterator' => __DIR__ . '/includes/Import/LiquidThreadsApi/RevisionIterator.php', + 'Flow\\Import\\LiquidThreadsApi\\ScriptedImportRevision' => __DIR__ . '/includes/Import/LiquidThreadsApi/ScriptedImportRevision.php', + 'Flow\\Import\\LiquidThreadsApi\\TopicIterator' => __DIR__ . '/includes/Import/LiquidThreadsApi/TopicIterator.php', 'Flow\\Import\\NullImportSourceStore' => __DIR__ . '/includes/Import/ImportSourceStore.php', 'Flow\\Import\\PageImportState' => __DIR__ . '/includes/Import/Importer.php', 'Flow\\Import\\Plain\\ImportHeader' => __DIR__ . '/includes/Import/Plain/ImportHeader.php', diff --git a/includes/Import/LiquidThreadsApi/ApiBackend.php b/includes/Import/LiquidThreadsApi/ApiBackend.php new file mode 100644 index 0000000..254cfc1 --- /dev/null +++ b/includes/Import/LiquidThreadsApi/ApiBackend.php @@ -0,0 +1,181 @@ +<?php + +namespace Flow\Import\LiquidThreadsApi; + +use ApiBase; +use ApiMain; +use Exception; +use FauxRequest; +use Flow\Container; +use Flow\Import\ImportException; +use Flow\Import\IImportSource; +use Flow\Import\ApiNullResponseException; +use Http; +use RequestContext; +use Psr\Log\LoggerAwareInterface; +use Psr\Log\LoggerInterface; +use Psr\Log\NullLogger; +use UsageException; +use User; + +abstract class ApiBackend implements LoggerAwareInterface { + + /** + * @var LoggerInterface + */ + protected $logger; + + public function __construct() { + $this->logger = new NullLogger; + } + + public function setLogger( LoggerInterface $logger ) { + $this->logger = $logger; + } + + /** + * Retrieves LiquidThreads data from the API + * + * @param array $conditions The parameters to pass to select the threads. Usually used in two ways: with thstartid/thpage, or with ththreadid + * @return array Data as returned under query.threads by the API + * @throws ApiNotFoundException Thrown when the remote api reports that the provided conditions + * have no matching records. + * @throws ImportException When an error is received from the remote api. This is often either + * a bad request or lqt threw an exception trying to respond to a valid request. + */ + public function retrieveThreadData( array $conditions ) { + $params = array( + 'action' => 'query', + 'list' => 'threads', + 'thprop' => 'id|subject|page|parent|ancestor|created|modified|author|summaryid|type|rootid|replies', + 'format' => 'json', + 'limit' => ApiBase::LIMIT_BIG1, + ); + $data = $this->apiCall( $params + $conditions ); + + if ( ! isset( $data['query']['threads'] ) ) { + if ( $this->isNotFoundError( $data ) ) { + $message = "Did not find thread with conditions: " . json_encode( $conditions ); + $this->logger->debug( __METHOD__ . ": $message" ); + throw new ApiNotFoundException( $message ); + } else { + $this->logger->error( __METHOD__ . ': Failed API call against ' . $this->getKey() . ' with conditions : ' . json_encode( $conditions ) ); + throw new ImportException( "Null response from API module:" . json_encode( $data ) ); + } + } + + $firstThread = reset( $data['query']['threads'] ); + if ( ! isset( $firstThread['replies'] ) ) { + throw new ImportException( "Foreign API does not support reply exporting:" . json_encode( $data ) ); + } + + return $data['query']['threads']; + } + + /** + * Retrieves data about a set of pages from the API + * + * @param array $pageIds Page IDs to return data for. + * @return array The query.pages part of the API response. + * @throws \MWException + */ + public function retrievePageDataById( array $pageIds ) { + if ( !$pageIds ) { + throw new \MWException( 'At least one page id must be provided' ); + } + + return $this->retrievePageData( array( + 'pageids' => implode( '|', $pageIds ), + ) ); + } + + /** + * Retrieves data about the latest revision of the titles + * from the API + * + * @param string[] $titles Titles to return data for + * @return array The query.pages prt of the API response. + * @throws \MWException + * @throws ImportException + */ + public function retrieveTopRevisionByTitle( array $titles ) { + if ( !$titles ) { + throw new \MWException( 'At least one title must be provided' ); + } + + return $this->retrievePageData( array( + 'titles' => implode( '|', $titles ), + 'rvlimit' => 1, + 'rvdir' => 'older', + ), true ); + } + + /** + * Retrieves data about a set of pages from the API + * + * @param array $conditions Conditions to retrieve pages by; to be sent to the API. + * @param bool $expectContinue Pass true here when caller expects more revisions to exist than + * they are requesting information about. + * @return array The query.pages part of the API response. + * @throws ApiNotFoundException Thrown when the remote api reports that the provided conditions + * have no matching records. + * @throws ImportException When an error is received from the remote api. This is often either + * a bad request or lqt threw an exception trying to respond to a valid request. + * @throws ImportException When more revisions are available than can be returned in a single + * query and the calling code does not set $expectContinue to true. + */ + public function retrievePageData( array $conditions, $expectContinue = false ) { + $conditions += array( + 'action' => 'query', + 'prop' => 'revisions', + 'rvprop' => 'timestamp|user|content|ids', + 'format' => 'json', + 'rvlimit' => 5000, + 'rvdir' => 'newer', + 'continue' => '', + 'limit' => ApiBase::LIMIT_BIG1, + ); + $data = $this->apiCall( $conditions ); + + if ( ! isset( $data['query'] ) ) { + $this->logger->error( __METHOD__ . ': Failed API call against ' . $this->getKey() . ' with conditions : ' . json_encode( $conditions ) ); + if ( $this->isNotFoundError( $data ) ) { + $message = "Did not find pages: " . json_encode( $conditions ); + $this->logger->debug( __METHOD__ . ": $message" ); + throw new ApiNotFoundException( $message ); + } else { + throw new ImportException( "Null response from API module: " . json_encode( $data ) ); + } + } elseif ( !$expectContinue && isset( $data['continue'] ) ) { + throw new ImportException( "More revisions than can be retrieved for conditions, import would be incomplete: " . json_encode( $conditions ) ); + } + + return $data['query']['pages']; + } + + /** + * Calls the remote API + * + * @param array $params The API request to send + * @param int $retry Retry the request on failure this many times + * @return array API return value, decoded from JSON into an array. + */ + abstract function apiCall( array $params, $retry = 1 ); + + /** + * @return string A unique identifier for this backend. + */ + abstract function getKey(); + + /** + * @param array $apiResponse + * @return bool + */ + protected function isNotFoundError( $apiResponse ) { + // LQT has some bugs where not finding the requested item in the database throws + // returns this exception. + $expect = 'Exception Caught: DatabaseBase::makeList: empty input for field thread_parent'; + return false !== strpos( $apiResponse['error']['info'], $expect ); + } +} + diff --git a/includes/Import/LiquidThreadsApi/CachedApiData.php b/includes/Import/LiquidThreadsApi/CachedApiData.php new file mode 100644 index 0000000..83a6d70 --- /dev/null +++ b/includes/Import/LiquidThreadsApi/CachedApiData.php @@ -0,0 +1,18 @@ +<?php + +namespace Flow\Import\LiquidThreadsApi; + +use ArrayIterator; +use Iterator; + +/** + * Abstract class to store ID-indexed cached data. + */ +abstract class CachedApiData extends CachedData { + protected $backend; + + function __construct( ApiBackend $backend ) { + $this->backend = $backend; + } +} + diff --git a/includes/Import/LiquidThreadsApi/CachedData.php b/includes/Import/LiquidThreadsApi/CachedData.php index ccca036..5f3504e 100644 --- a/includes/Import/LiquidThreadsApi/CachedData.php +++ b/includes/Import/LiquidThreadsApi/CachedData.php @@ -88,87 +88,3 @@ } } -abstract class CachedApiData extends CachedData { - protected $backend; - - function __construct( ApiBackend $backend ) { - $this->backend = $backend; - } -} - -/** - * Cached LiquidThreads thread data. - */ -class CachedThreadData extends CachedApiData { - protected $topics = array(); - - protected function addData( array $data ) { - parent::addData( $data ); - - foreach( $data as $thread ) { - if ( self::isTopic( $thread ) ) { - $this->topics[$thread['id']] = true; - } - } - ksort( $this->topics ); - } - - /** - * Get the IDs of loaded threads that are top-level topics. - * - * @return array List of thread IDs in ascending order. - */ - public function getTopics() { - return array_keys( $this->topics ); - } - - /** - * Create an iterator for the contained topic ids in ascending order - * - * @return Iterator<integer> - */ - public function getTopicIdIterator() { - return new ArrayIterator( $this->getTopics() ); - } - - /** - * Retrieve data for threads from the given page starting with the provided - * offset. - * - * @param string $pageName - * @param integer $startId - * @return array Associative result array - */ - public function getFromPage( $pageName, $startId = 0 ) { - $data = $this->backend->retrieveThreadData( array( - 'thpage' => $pageName, - 'thstartid' => $startId - ) ); - $this->addData( $data ); - - return $data; - } - - protected function retrieve( array $ids ) { - return $this->backend->retrieveThreadData( array( - 'thid' => implode( '|', $ids ), - ) ); - } - - /** - * @param array $thread - * @return bool - */ - public static function isTopic( array $thread ) { - return $thread['parent'] === null; - } -} - -/** - * Cached MediaWiki page data. - */ -class CachedPageData extends CachedApiData { - protected function retrieve( array $ids ) { - return $this->backend->retrievePageDataByID( $ids ); - } -} diff --git a/includes/Import/LiquidThreadsApi/CachedPageData.php b/includes/Import/LiquidThreadsApi/CachedPageData.php new file mode 100644 index 0000000..1d29fc3 --- /dev/null +++ b/includes/Import/LiquidThreadsApi/CachedPageData.php @@ -0,0 +1,16 @@ +<?php + +namespace Flow\Import\LiquidThreadsApi; + +use ArrayIterator; +use Iterator; + +/** + * Abstract class to store ID-indexed cached data. + */ +class CachedPageData extends CachedApiData { + protected function retrieve( array $ids ) { + return $this->backend->retrievePageDataByID( $ids ); + } +} + diff --git a/includes/Import/LiquidThreadsApi/CachedThreadData.php b/includes/Import/LiquidThreadsApi/CachedThreadData.php new file mode 100644 index 0000000..b843cea --- /dev/null +++ b/includes/Import/LiquidThreadsApi/CachedThreadData.php @@ -0,0 +1,75 @@ +<?php + +namespace Flow\Import\LiquidThreadsApi; + +use ArrayIterator; +use Iterator; + +/** + * Abstract class to store ID-indexed cached data. + */ +class CachedThreadData extends CachedApiData { + protected $topics = array(); + + protected function addData( array $data ) { + parent::addData( $data ); + + foreach( $data as $thread ) { + if ( self::isTopic( $thread ) ) { + $this->topics[$thread['id']] = true; + } + } + ksort( $this->topics ); + } + + /** + * Get the IDs of loaded threads that are top-level topics. + * + * @return array List of thread IDs in ascending order. + */ + public function getTopics() { + return array_keys( $this->topics ); + } + + /** + * Create an iterator for the contained topic ids in ascending order + * + * @return Iterator<integer> + */ + public function getTopicIdIterator() { + return new ArrayIterator( $this->getTopics() ); + } + + /** + * Retrieve data for threads from the given page starting with the provided + * offset. + * + * @param string $pageName + * @param integer $startId + * @return array Associative result array + */ + public function getFromPage( $pageName, $startId = 0 ) { + $data = $this->backend->retrieveThreadData( array( + 'thpage' => $pageName, + 'thstartid' => $startId + ) ); + $this->addData( $data ); + + return $data; + } + + protected function retrieve( array $ids ) { + return $this->backend->retrieveThreadData( array( + 'thid' => implode( '|', $ids ), + ) ); + } + + /** + * @param array $thread + * @return bool + */ + public static function isTopic( array $thread ) { + return $thread['parent'] === null; + } +} + diff --git a/includes/Import/LiquidThreadsApi/ImportHeader.php b/includes/Import/LiquidThreadsApi/ImportHeader.php new file mode 100644 index 0000000..bff28eb --- /dev/null +++ b/includes/Import/LiquidThreadsApi/ImportHeader.php @@ -0,0 +1,101 @@ +<?php + +namespace Flow\Import\LiquidThreadsApi; + +use ArrayIterator; +use Flow\Import\IImportHeader; +use Flow\Import\IImportObject; +use Flow\Import\IImportPost; +use Flow\Import\IImportSummary; +use Flow\Import\IImportTopic; +use Flow\Import\ImportException; +use Flow\Import\IObjectRevision; +use Flow\Import\IRevisionableObject; +use Iterator; +use MWTimestamp; +use Title; +use User; + +class ImportHeader extends PageRevisionedObject implements IImportHeader { + /** @var ApiBackend **/ + protected $api; + /** @var string **/ + protected $title; + /** @var array **/ + protected $pageData; + /** @var ImportSource **/ + protected $source; + /** + * User used for script-originated actions, such as cleanup edits. + * Does not apply to actual posts, which retain their original users. + * + * @var User + */ + protected $destinationScriptUser; + + public function __construct( ApiBackend $api, ImportSource $source, $title, User $destinationScriptUser ) { + $this->api = $api; + $this->title = $title; + $this->source = $source; + $this->pageData = null; + $this->destinationScriptUser = $destinationScriptUser; + } + + public function getRevisions() { + if ( $this->pageData === null ) { + // Previous revisions of the header are preserved in the underlying wikitext + // page history. Only the top revision is imported. + $response = $this->api->retrieveTopRevisionByTitle( array( $this->title ) ); + $this->pageData = reset( $response ); + } + + $revisions = array(); + + if ( isset( $this->pageData['revisions'] ) && count( $this->pageData['revisions'] ) > 0 ) { + $lastLqtRevision = new ImportRevision( end( $this->pageData['revisions'] ), $this ); + + $titleObject = Title::newFromText( $this->title ); + $cleanupRevision = $this->createHeaderCleanupRevision( $lastLqtRevision, $titleObject ); + + $revisions = array( $lastLqtRevision, $cleanupRevision ); + } + + return new ArrayIterator( $revisions ); + } + + /** + * @param IObjectRevision $lastRevision last imported header revision + * @param Title $archiveTitle archive page title associated with header + * @return IObjectRevision generated revision for cleanup edit + */ + protected function createHeaderCleanupRevision( IObjectRevision $lastRevision, Title $archiveTitle ) { + $wikitextForLastRevision = $lastRevision->getText(); + // This is will remove all instances, without attempting to check if it's in + // nowiki, etc. It also ignores case and spaces in places where it doesn't + // matter. + $newWikitext = preg_replace( + '/{{\s*#useliquidthreads:\s*1\s*}}/i', + '', + $wikitextForLastRevision + ); + $templateName = wfMessage( 'flow-importer-lqt-converted-template' )->inContentLanguage()->plain(); + $arguments = implode( '|', array( + 'archive=' . $archiveTitle->getPrefixedText(), + 'date=' . MWTimestamp::getInstance()->timestamp->format( 'Y-m-d' ), + ) ); + + $newWikitext .= "\n\n{{{$templateName}|$arguments}}"; + $cleanupRevision = new ScriptedImportRevision( + $this, + $this->destinationScriptUser, + $newWikitext, + $lastRevision->getTimestamp() + ); + return $cleanupRevision; + } + + public function getObjectKey() { + return $this->source->getObjectKey( 'header_for', $this->title ); + } +} + diff --git a/includes/Import/LiquidThreadsApi/ImportPost.php b/includes/Import/LiquidThreadsApi/ImportPost.php new file mode 100644 index 0000000..49b05ac --- /dev/null +++ b/includes/Import/LiquidThreadsApi/ImportPost.php @@ -0,0 +1,104 @@ +<?php + +namespace Flow\Import\LiquidThreadsApi; + +use ArrayIterator; +use Flow\Import\IImportHeader; +use Flow\Import\IImportObject; +use Flow\Import\IImportPost; +use Flow\Import\IImportSummary; +use Flow\Import\IImportTopic; +use Flow\Import\ImportException; +use Flow\Import\IObjectRevision; +use Flow\Import\IRevisionableObject; +use Iterator; +use MWTimestamp; +use Title; +use User; + +class ImportPost extends PageRevisionedObject implements IImportPost { + + /** + * @var array + */ + protected $apiResponse; + + /** + * @param ImportSource $source + * @param array $apiResponse + */ + public function __construct( ImportSource $source, array $apiResponse ) { + parent::__construct( $source, $apiResponse['rootid'] ); + $this->apiResponse = $apiResponse; + } + + /** + * @return string + */ + public function getAuthor() { + return $this->apiResponse['author']['name']; + } + + /** + * @return string|false + */ + public function getCreatedTimestamp() { + return wfTimestamp( TS_MW, $this->apiResponse['created'] ); + } + + /** + * @return string|false + */ + public function getModifiedTimestamp() { + return wfTimestamp( TS_MW, $this->apiResponse['modified'] ); + } + + /** + * @return string + */ + public function getText() { + $pageData = $this->importSource->getPageData( $this->apiResponse['rootid'] ); + $revision = $pageData['revisions'][0]; + if ( defined( 'ApiResult::META_CONTENT' ) ) { + $contentKey = isset( $revision[ApiResult::META_CONTENT] ) + ? $revision[ApiResult::META_CONTENT] + : '*'; + } else { + $contentKey = '*'; + } + + return $revision[$contentKey]; + } + + public function getTitle() { + $pageData = $this->importSource->getPageData( $this->apiResponse['rootid'] ); + + return Title::newFromText( $pageData['title'] ); + } + + /** + * @return Iterator<IImportPost> + */ + public function getReplies() { + return new ReplyIterator( $this ); + } + + /** + * @return array + */ + public function getApiResponse() { + return $this->apiResponse; + } + + /** + * @return ImportSource + */ + public function getSource() { + return $this->importSource; + } + + public function getObjectKey() { + return $this->importSource->getObjectKey( 'thread_id', $this->apiResponse['id'] ); + } +} + diff --git a/includes/Import/LiquidThreadsApi/ImportRevision.php b/includes/Import/LiquidThreadsApi/ImportRevision.php new file mode 100644 index 0000000..217d1ac --- /dev/null +++ b/includes/Import/LiquidThreadsApi/ImportRevision.php @@ -0,0 +1,64 @@ +<?php + +namespace Flow\Import\LiquidThreadsApi; + +use ArrayIterator; +use Flow\Import\IImportHeader; +use Flow\Import\IImportObject; +use Flow\Import\IImportPost; +use Flow\Import\IImportSummary; +use Flow\Import\IImportTopic; +use Flow\Import\ImportException; +use Flow\Import\IObjectRevision; +use Flow\Import\IRevisionableObject; +use Iterator; +use MWTimestamp; +use Title; +use User; + +class ImportRevision implements IObjectRevision { + /** @var IImportObject **/ + protected $parentObject; + + /** @var array **/ + protected $apiResponse; + + /** + * Creates an ImportRevision based on a MW page revision + * + * @param array $apiResponse An element from api.query.revisions + * @param IImportObject $parentObject + */ + function __construct( array $apiResponse, IImportObject $parentObject ) { + $this->apiResponse = $apiResponse; + $this->parent = $parentObject; + } + + /** + * @return string + */ + public function getText() { + if ( defined( 'ApiResult::META_CONTENT' ) ) { + $contentKey = isset( $this->apiResponse[ApiResult::META_CONTENT] ) + ? $this->apiResponse[ApiResult::META_CONTENT] + : '*'; + } else { + $contentKey = '*'; + } + + return $this->apiResponse[$contentKey]; + } + + public function getTimestamp() { + return wfTimestamp( TS_MW, $this->apiResponse['timestamp'] ); + } + + public function getAuthor() { + return $this->apiResponse['user']; + } + + public function getObjectKey() { + return $this->parent->getObjectKey() . ':rev:' . $this->apiResponse['revid']; + } +} + diff --git a/includes/Import/LiquidThreadsApi/ImportSource.php b/includes/Import/LiquidThreadsApi/ImportSource.php new file mode 100644 index 0000000..837461f --- /dev/null +++ b/includes/Import/LiquidThreadsApi/ImportSource.php @@ -0,0 +1,170 @@ +<?php + +namespace Flow\Import\LiquidThreadsApi; + +use ApiBase; +use ApiMain; +use Exception; +use FauxRequest; +use Flow\Container; +use Flow\Import\ImportException; +use Flow\Import\IImportSource; +use Flow\Import\ApiNullResponseException; +use Http; +use RequestContext; +use Psr\Log\LoggerAwareInterface; +use Psr\Log\LoggerInterface; +use Psr\Log\NullLogger; +use UsageException; +use User; + +class ImportSource implements IImportSource { + // Thread types defined by LQT which are returned via api + const THREAD_TYPE_NORMAL = 0; + const THREAD_TYPE_MOVED = 1; + const THREAD_TYPE_DELETED = 2; + const THREAD_TYPE_HIDDEN = 4; + + /** + * @var ApiBackend + */ + protected $api; + + /** + * @var string + */ + protected $pageName; + + /** + * @var CachedThreadData + */ + protected $threadData; + + /** + * @var CachedPageData + */ + protected $pageData; + + /** + * @param ApiBackend $apiBackend + * @param string $pageName + */ + public function __construct( ApiBackend $apiBackend, $pageName ) { + $this->api = $apiBackend; + $this->pageName = $pageName; + + $this->threadData = new CachedThreadData( $this->api ); + $this->pageData = new CachedPageData( $this->api ); + } + + /** + * {@inheritDoc} + */ + public function getHeader() { + $controller = Container::get( 'occupation_controller' ); + + return new ImportHeader( $this->api, $this, $this->pageName, $controller->getTalkpageManager() ); + } + + /** + * {@inheritDoc} + */ + public function getTopics() { + return new TopicIterator( $this, $this->threadData, $this->pageName ); + } + + /** + * @param integer $id + * @return ImportTopic|null + */ + public function getTopic( $id ) { + $data = $this->threadData->get( $id ); + switch ( $data['type'] ) { + // Standard thread + case self::THREAD_TYPE_NORMAL: + return new ImportTopic( $this, $data ); + + // The topic no longer exists at the queried location, but + // a stub was left behind pointing to it. This modified + // version of ImportTopic gracefully adjusts the #REDIRECT + // into a template to keep a similar output to lqt. + case self::THREAD_TYPE_MOVED: + return new MovedImportTopic( $this, $data ); + + // To get these back from the api we would have to send the `showdeleted` + // query param. As we are not requesting them, just ignore for now. + case self::THREAD_TYPE_DELETED: + return null; + + // Was assigned but never used by LQT. + case self::THREAD_TYPE_HIDDEN: + return null; + } + } + + /** + * @param integer $id + * @return ImportPost + */ + public function getPost( $id ) { + return new ImportPost( $this, $this->threadData->get( $id ) ); + } + + /** + * @param integer $id + * @return array + */ + public function getThreadData( $id ) { + if ( is_array( $id ) ) { + return $this->threadData->getMulti( $id ); + } else { + return $this->threadData->get( $id ); + } + } + + /** + * @param integer[]|integer $pageIds + * @return array + */ + public function getPageData( $pageIds ) { + if ( is_array( $pageIds ) ) { + return $this->pageData->getMulti( $pageIds ); + } else { + return $this->pageData->get( $pageIds ); + } + } + + /** + * @param string $pageName + * @param integer $startId + * @return array + */ + public function getFromPage( $pageName, $startId = 0 ) { + return $this->threadData->getFromPage( $pageName, $startId ); + } + + /** + * Gets a unique identifier for the wiki being imported + * @return string Usually either a string 'local' or an API URL + */ + public function getApiKey() { + return $this->api->getKey(); + } + + /** + * Returns a key uniquely representing an object determined by arguments. + * Parameters: Zero or more strings that uniquely represent the object + * for this ImportSource + * + * @return string Unique key + */ + public function getObjectKey( /* $args */ ) { + $components = array_merge( + array( 'lqt-api', $this->getApiKey() ), + func_get_args() + ); + + return implode( ':', $components ); + } +} + diff --git a/includes/Import/LiquidThreadsApi/ImportSummary.php b/includes/Import/LiquidThreadsApi/ImportSummary.php new file mode 100644 index 0000000..bde0a9e --- /dev/null +++ b/includes/Import/LiquidThreadsApi/ImportSummary.php @@ -0,0 +1,36 @@ +<?php + +namespace Flow\Import\LiquidThreadsApi; + +use ArrayIterator; +use Flow\Import\IImportHeader; +use Flow\Import\IImportObject; +use Flow\Import\IImportPost; +use Flow\Import\IImportSummary; +use Flow\Import\IImportTopic; +use Flow\Import\ImportException; +use Flow\Import\IObjectRevision; +use Flow\Import\IRevisionableObject; +use Iterator; +use MWTimestamp; +use Title; +use User; + +class ImportSummary extends PageRevisionedObject implements IImportSummary { + /** @var ImportSource **/ + protected $source; + + /** + * @param array $apiResponse + * @param ImportSource $source + * @throws ImportException + */ + public function __construct( array $apiResponse, ImportSource $source ) { + parent::__construct( $source, $apiResponse['pageid'] ); + } + + public function getObjectKey() { + return $this->importSource->getObjectKey( 'summary_id', $this->pageId ); + } +} + diff --git a/includes/Import/LiquidThreadsApi/ImportTopic.php b/includes/Import/LiquidThreadsApi/ImportTopic.php new file mode 100644 index 0000000..726b100 --- /dev/null +++ b/includes/Import/LiquidThreadsApi/ImportTopic.php @@ -0,0 +1,89 @@ +<?php + +namespace Flow\Import\LiquidThreadsApi; + +use ArrayIterator; +use Flow\Import\IImportHeader; +use Flow\Import\IImportObject; +use Flow\Import\IImportPost; +use Flow\Import\IImportSummary; +use Flow\Import\IImportTopic; +use Flow\Import\ImportException; +use Flow\Import\IObjectRevision; +use Flow\Import\IRevisionableObject; +use Iterator; +use MWTimestamp; +use Title; +use User; + +class ImportTopic extends ImportPost implements IImportTopic, IObjectRevision { + /** + * @return string + */ + public function getText() { + return $this->apiResponse['subject']; + } + + public function getAuthor() { + return $this->apiResponse['author']['name']; + } + + public function getRevisions() { + // we only have access to a single revision of the topic + return new ArrayIterator( array( $this ) ); + } + + public function getReplies() { + $topPost = new ImportPost( $this->importSource, $this->apiResponse ); + return new ArrayIterator( array( $topPost ) ); + } + + public function getTimestamp() { + return wfTimestamp( TS_MW, $this->apiResponse['created'] ); + } + + /** + * @return IImportSummary|null + */ + public function getTopicSummary() { + $id = $this->getSummaryId(); + if ( $id > 0 ) { + $data = $this->importSource->getPageData( $id ); + if ( isset( $data['revisions'][0] ) ) { + return new ImportSummary( $data, $this->importSource ); + } else { + return null; + } + } else { + return null; + } + } + + /** + * @return integer + */ + protected function getSummaryId() { + return $this->apiResponse['summaryid']; + } + + /** + * This needs to have a different value than the same apiResponse in an ImportPost. + * The ImportPost version refers to the first response to the topic. + */ + public function getObjectKey() { + return 'topic' . $this->importSource->getObjectKey( 'thread_id', $this->apiResponse['id'] ); + } + + public function getLogType() { + return "lqt-to-flow-topic"; + } + + public function getLogParameters() { + return array( + 'lqt_thread_id' => $this->apiResponse['id'], + 'lqt_orig_title' => $this->getTitle()->getPrefixedText(), + 'lqt_subject' => $this->getText(), + ); + } +} + diff --git a/includes/Import/LiquidThreadsApi/Iterators.php b/includes/Import/LiquidThreadsApi/Iterators.php deleted file mode 100644 index e86a8b5..0000000 --- a/includes/Import/LiquidThreadsApi/Iterators.php +++ /dev/null @@ -1,253 +0,0 @@ -<?php - -namespace Flow\Import\LiquidThreadsApi; - -use ArrayIterator; -use Flow\Import\IImportObject; -use Iterator; - -class TopicIterator implements Iterator { - /** - * @var ImportSource - */ - protected $importSource; - - /** - * @var CachedThreadData Access point for api data - */ - protected $threadData; - - /** - * @var integer|false|null Lqt id of the current topic, false if no current topic, null if unknown. - */ - protected $current = false; - - /** - * @var ImportTopic The current topic. - */ - protected $currentTopic = null; - - /** - * @var string Name of the remote page the topics exist on - */ - protected $pageName; - - /** - * @var Iterator A list of topic ids. Iterator used to simplify maintaining - * an explicit position within the list. - */ - protected $topicIdIterator; - - /** - * @var integer The maximum id received by self::loadMore - */ - protected $maxId; - - /** - * @param ImportSource $source - * @param CachedThreadData $threadData - * @param string $pageName - */ - public function __construct( ImportSource $source, CachedThreadData $threadData, $pageName ) { - $this->importSource = $source; - $this->threadData = $threadData; - $this->pageName = $pageName; - $this->topicIdIterator = new ArrayIterator( $threadData->getTopics() ); - $this->rewind(); - } - - /** - * @return ImportTopic - */ - public function current() { - if ( $this->current === false ) { - return null; - } - return $this->currentTopic; - } - - /** - * @return integer - */ - public function key() { - return $this->current; - } - - public function next() { - if ( !$this->valid() ) { - return; - } - - $lastOffset = $this->key(); - do { - while( $this->topicIdIterator->valid() ) { - $topicId = $this->topicIdIterator->current(); - $this->topicIdIterator->next(); - - // this topic id has been seen before. - if ( $topicId <= $lastOffset ) { - continue; - } - - // hidden and deleted threads come back as null - $topic = $this->importSource->getTopic( $topicId ); - if ( $topic === null ) { - continue; - } - - $this->current = $topicId; - $this->currentTopic = $topic; - return; - } - } while( $this->loadMore() ); - - // nothing found, nothing more to load - $this->current = false; - } - - public function rewind() { - $this->current = null; - $this->topicIdIterator->rewind(); - $this->next(); - } - - /** - * @return bool - */ - public function valid() { - return $this->current !== false; - } - - /** - * @return bool True when more topics were loaded - */ - protected function loadMore() { - try { - // + 1 to not return the existing max topic - $output = $this->threadData->getFromPage( $this->pageName, $this->maxId + 1 ); - } catch ( ApiNotFoundException $e ) { - // No more results, end loop - return false; - } - - $this->maxId = max( array_keys( $output ) ); - $this->topicIdIterator = new ArrayIterator( $this->threadData->getTopics() ); - $this->topicIdIterator->rewind(); - - // Keep looping until we get a not found error - return true; - } -} - -class ReplyIterator implements Iterator { - /** @var ImportPost **/ - protected $post; - /** @var array Array of thread IDs **/ - protected $threadReplies; - /** @var int **/ - protected $replyIndex; - /** @var ImportPost|null */ - protected $current; - - public function __construct( ImportPost $post ) { - $this->post = $post; - $this->replyIndex = 0; - - $apiResponse = $post->getApiResponse(); - $this->threadReplies = array_values( $apiResponse['replies'] ); - } - - /** - * @return ImportPost|null - */ - public function current() { - return $this->current; - } - - /** - * @return integer - */ - public function key() { - return $this->replyIndex; - } - - public function next() { - while( ++$this->replyIndex < count( $this->threadReplies ) ) { - try { - $replyId = $this->threadReplies[$this->replyIndex]['id']; - $this->current = $this->post->getSource()->getPost( $replyId ); - return; - } catch ( ApiNotFoundException $e ) { - // while loop fall-through handles our error case - } - } - - // Nothing found, set current to null - $this->current = null; - } - - public function rewind() { - $this->replyIndex = -1; - $this->next(); - } - - public function valid() { - return $this->current !== null; - } -} - -/** - * Iterates over the revisions of a foreign page to produce - * revisions of a Flow object. - */ -class RevisionIterator implements Iterator { - /** @var array **/ - protected $pageData; - - /** @var int **/ - protected $pointer; - - /** @var IImportObject **/ - protected $parent; - - public function __construct( array $pageData, IImportObject $parent, $factory = null ) { - $this->pageData = $pageData; - $this->pointer = 0; - $this->parent = $parent; - $this->factory = $factory ?: function( $data, $parent ) { - return new ImportRevision( $data, $parent ); - }; - } - - protected function getRevisionCount() { - if ( isset( $this->pageData['revisions'] ) ) { - return count( $this->pageData['revisions'] ); - } else { - return 0; - } - } - - public function valid() { - return $this->pointer < $this->getRevisionCount(); - } - - public function next() { - ++$this->pointer; - } - - public function key() { - return $this->pointer; - } - - public function rewind() { - $this->pointer = 0; - } - - public function current() { - return call_user_func( - $this->factory, - $this->pageData['revisions'][$this->pointer], - $this->parent - ); - } -} diff --git a/includes/Import/LiquidThreadsApi/LocalApiBackend.php b/includes/Import/LiquidThreadsApi/LocalApiBackend.php new file mode 100644 index 0000000..76a5773 --- /dev/null +++ b/includes/Import/LiquidThreadsApi/LocalApiBackend.php @@ -0,0 +1,65 @@ +<?php + +namespace Flow\Import\LiquidThreadsApi; + +use ApiBase; +use ApiMain; +use Exception; +use FauxRequest; +use Flow\Container; +use Flow\Import\ImportException; +use Flow\Import\IImportSource; +use Flow\Import\ApiNullResponseException; +use Http; +use RequestContext; +use Psr\Log\LoggerAwareInterface; +use Psr\Log\LoggerInterface; +use Psr\Log\NullLogger; +use UsageException; +use User; + +class LocalApiBackend extends ApiBackend { + /** + * @var User|null + */ + protected $user; + + public function __construct( User $user = null ) { + parent::__construct(); + $this->user = $user; + } + + public function getKey() { + return 'local'; + } + + public function apiCall( array $params, $retry = 1 ) { + try { + $context = new RequestContext; + $context->setRequest( new FauxRequest( $params ) ); + if ( $this->user ) { + $context->setUser( $this->user ); + } + + $api = new ApiMain( $context ); + $api->execute(); + if ( defined( 'ApiResult::META_CONTENT' ) ) { + return ApiResult::removeMetadata( $api->getResult()->getResultData() ); + } else { + return $api->getResult()->getData(); + } + } catch ( UsageException $exception ) { + // Mimic the behaviour when called remotely + return array( 'error' => $exception->getMessageArray() ); + } catch ( Exception $exception ) { + // Mimic behaviour when called remotely + return array( + 'error' => array( + 'code' => 'internal_api_error_' . get_class( $exception ), + 'info' => 'Exception Caught: ' . $exception->getMessage(), + ), + ); + } + } +} + diff --git a/includes/Import/LiquidThreadsApi/MovedImportPost.php b/includes/Import/LiquidThreadsApi/MovedImportPost.php new file mode 100644 index 0000000..90550cf --- /dev/null +++ b/includes/Import/LiquidThreadsApi/MovedImportPost.php @@ -0,0 +1,28 @@ +<?php + +namespace Flow\Import\LiquidThreadsApi; + +use ArrayIterator; +use Flow\Import\IImportHeader; +use Flow\Import\IImportObject; +use Flow\Import\IImportPost; +use Flow\Import\IImportSummary; +use Flow\Import\IImportTopic; +use Flow\Import\ImportException; +use Flow\Import\IObjectRevision; +use Flow\Import\IRevisionableObject; +use Iterator; +use MWTimestamp; +use Title; +use User; + +class MovedImportPost extends ImportPost { + public function getRevisions() { + $factory = function( $data, $parent ) { + return new MovedImportRevision( $data, $parent ); + }; + $pageData = $this->importSource->getPageData( $this->pageId ); + return new RevisionIterator( $pageData, $this, $factory ); + } +} + diff --git a/includes/Import/LiquidThreadsApi/MovedImportRevision.php b/includes/Import/LiquidThreadsApi/MovedImportRevision.php new file mode 100644 index 0000000..bb027f2 --- /dev/null +++ b/includes/Import/LiquidThreadsApi/MovedImportRevision.php @@ -0,0 +1,47 @@ +<?php + +namespace Flow\Import\LiquidThreadsApi; + +use ArrayIterator; +use Flow\Import\IImportHeader; +use Flow\Import\IImportObject; +use Flow\Import\IImportPost; +use Flow\Import\IImportSummary; +use Flow\Import\IImportTopic; +use Flow\Import\ImportException; +use Flow\Import\IObjectRevision; +use Flow\Import\IRevisionableObject; +use Iterator; +use MWTimestamp; +use Title; +use User; + +class MovedImportRevision extends ImportRevision { + /** + * Rewrites the '#REDIRECT [[...]]' of an autogenerated lqt moved + * thread stub into a template. While we don't re-write the link + * here, after importing the referenced thread LqtRedirector will + * make that Thread page a redirect to the Flow topic, essentially + * making these links still work. + */ + public function getText() { + $text = parent::getText(); + $content = \ContentHandler::makeContent( $text, null, CONTENT_MODEL_WIKITEXT ); + $target = $content->getRedirectTarget(); + if ( !$target ) { + throw new ImportException( "Could not detect redirect within: $text" ); + } + + // To get the new talk page that this belongs to we would need to query the api + // for the new topic, for now not bothering. + $template = wfMessage( 'flow-importer-lqt-moved-thread-template' )->inContentLanguage()->plain(); + $arguments = implode( '|', array( + 'author=' . parent::getAuthor(), + 'date=' . MWTimestamp::getInstance( $this->apiResponse['timestamp'] )->timestamp->format( 'Y-m-d' ), + 'title=' . $target->getPrefixedText(), + ) ); + + return "{{{$template}|$arguments}}"; + } +} + diff --git a/includes/Import/LiquidThreadsApi/MovedImportTopic.php b/includes/Import/LiquidThreadsApi/MovedImportTopic.php new file mode 100644 index 0000000..c802e12 --- /dev/null +++ b/includes/Import/LiquidThreadsApi/MovedImportTopic.php @@ -0,0 +1,25 @@ +<?php + +namespace Flow\Import\LiquidThreadsApi; + +use ArrayIterator; +use Flow\Import\IImportHeader; +use Flow\Import\IImportObject; +use Flow\Import\IImportPost; +use Flow\Import\IImportSummary; +use Flow\Import\IImportTopic; +use Flow\Import\ImportException; +use Flow\Import\IObjectRevision; +use Flow\Import\IRevisionableObject; +use Iterator; +use MWTimestamp; +use Title; +use User; + +class MovedImportTopic extends ImportTopic { + public function getReplies() { + $topPost = new MovedImportPost( $this->importSource, $this->apiResponse ); + return new ArrayIterator( array( $topPost ) ); + } +} + diff --git a/includes/Import/LiquidThreadsApi/Objects.php b/includes/Import/LiquidThreadsApi/Objects.php deleted file mode 100644 index b198e2e..0000000 --- a/includes/Import/LiquidThreadsApi/Objects.php +++ /dev/null @@ -1,446 +0,0 @@ -<?php - -namespace Flow\Import\LiquidThreadsApi; - -use ArrayIterator; -use Flow\Import\IImportHeader; -use Flow\Import\IImportObject; -use Flow\Import\IImportPost; -use Flow\Import\IImportSummary; -use Flow\Import\IImportTopic; -use Flow\Import\ImportException; -use Flow\Import\IObjectRevision; -use Flow\Import\IRevisionableObject; -use Iterator; -use MWTimestamp; -use Title; -use User; - -abstract class PageRevisionedObject implements IRevisionableObject { - /** @var int **/ - protected $pageId; - - /** - * @var ImportSource - */ - protected $importSource; - - /** - * @param ImportSource $source - * @param int $pageId ID of the remote page - */ - function __construct( $source, $pageId ) { - $this->importSource = $source; - $this->pageId = $pageId; - } - - public function getRevisions() { - $pageData = $this->importSource->getPageData( $this->pageId ); - return new RevisionIterator( $pageData, $this ); - } -} - -class ImportPost extends PageRevisionedObject implements IImportPost { - - /** - * @var array - */ - protected $apiResponse; - - /** - * @param ImportSource $source - * @param array $apiResponse - */ - public function __construct( ImportSource $source, array $apiResponse ) { - parent::__construct( $source, $apiResponse['rootid'] ); - $this->apiResponse = $apiResponse; - } - - /** - * @return string - */ - public function getAuthor() { - return $this->apiResponse['author']['name']; - } - - /** - * @return string|false - */ - public function getCreatedTimestamp() { - return wfTimestamp( TS_MW, $this->apiResponse['created'] ); - } - - /** - * @return string|false - */ - public function getModifiedTimestamp() { - return wfTimestamp( TS_MW, $this->apiResponse['modified'] ); - } - - /** - * @return string - */ - public function getText() { - $pageData = $this->importSource->getPageData( $this->apiResponse['rootid'] ); - $revision = $pageData['revisions'][0]; - if ( defined( 'ApiResult::META_CONTENT' ) ) { - $contentKey = isset( $revision[ApiResult::META_CONTENT] ) - ? $revision[ApiResult::META_CONTENT] - : '*'; - } else { - $contentKey = '*'; - } - - return $revision[$contentKey]; - } - - public function getTitle() { - $pageData = $this->importSource->getPageData( $this->apiResponse['rootid'] ); - - return Title::newFromText( $pageData['title'] ); - } - - /** - * @return Iterator<IImportPost> - */ - public function getReplies() { - return new ReplyIterator( $this ); - } - - /** - * @return array - */ - public function getApiResponse() { - return $this->apiResponse; - } - - /** - * @return ImportSource - */ - public function getSource() { - return $this->importSource; - } - - public function getObjectKey() { - return $this->importSource->getObjectKey( 'thread_id', $this->apiResponse['id'] ); - } -} - -/** - * This is a bit of a weird model, acting as a revision of itself. - */ -class ImportTopic extends ImportPost implements IImportTopic, IObjectRevision { - /** - * @return string - */ - public function getText() { - return $this->apiResponse['subject']; - } - - public function getAuthor() { - return $this->apiResponse['author']['name']; - } - - public function getRevisions() { - // we only have access to a single revision of the topic - return new ArrayIterator( array( $this ) ); - } - - public function getReplies() { - $topPost = new ImportPost( $this->importSource, $this->apiResponse ); - return new ArrayIterator( array( $topPost ) ); - } - - public function getTimestamp() { - return wfTimestamp( TS_MW, $this->apiResponse['created'] ); - } - - /** - * @return IImportSummary|null - */ - public function getTopicSummary() { - $id = $this->getSummaryId(); - if ( $id > 0 ) { - $data = $this->importSource->getPageData( $id ); - if ( isset( $data['revisions'][0] ) ) { - return new ImportSummary( $data, $this->importSource ); - } else { - return null; - } - } else { - return null; - } - } - - /** - * @return integer - */ - protected function getSummaryId() { - return $this->apiResponse['summaryid']; - } - - /** - * This needs to have a different value than the same apiResponse in an ImportPost. - * The ImportPost version refers to the first response to the topic. - */ - public function getObjectKey() { - return 'topic' . $this->importSource->getObjectKey( 'thread_id', $this->apiResponse['id'] ); - } - - public function getLogType() { - return "lqt-to-flow-topic"; - } - - public function getLogParameters() { - return array( - 'lqt_thread_id' => $this->apiResponse['id'], - 'lqt_orig_title' => $this->getTitle()->getPrefixedText(), - 'lqt_subject' => $this->getText(), - ); - } -} - -class ImportSummary extends PageRevisionedObject implements IImportSummary { - /** @var ImportSource **/ - protected $source; - - /** - * @param array $apiResponse - * @param ImportSource $source - * @throws ImportException - */ - public function __construct( array $apiResponse, ImportSource $source ) { - parent::__construct( $source, $apiResponse['pageid'] ); - } - - public function getObjectKey() { - return $this->importSource->getObjectKey( 'summary_id', $this->pageId ); - } -} - -class ImportRevision implements IObjectRevision { - /** @var IImportObject **/ - protected $parentObject; - - /** @var array **/ - protected $apiResponse; - - /** - * Creates an ImportRevision based on a MW page revision - * - * @param array $apiResponse An element from api.query.revisions - * @param IImportObject $parentObject - */ - function __construct( array $apiResponse, IImportObject $parentObject ) { - $this->apiResponse = $apiResponse; - $this->parent = $parentObject; - } - - /** - * @return string - */ - public function getText() { - if ( defined( 'ApiResult::META_CONTENT' ) ) { - $contentKey = isset( $this->apiResponse[ApiResult::META_CONTENT] ) - ? $this->apiResponse[ApiResult::META_CONTENT] - : '*'; - } else { - $contentKey = '*'; - } - - return $this->apiResponse[$contentKey]; - } - - public function getTimestamp() { - return wfTimestamp( TS_MW, $this->apiResponse['timestamp'] ); - } - - public function getAuthor() { - return $this->apiResponse['user']; - } - - public function getObjectKey() { - return $this->parent->getObjectKey() . ':rev:' . $this->apiResponse['revid']; - } -} - -// The Moved* series of topics handle the LQT move stubs. They need to -// have their revision content rewriten from #REDIRECT to a template that -// has visible output like lqt generated per-request. -class MovedImportTopic extends ImportTopic { - public function getReplies() { - $topPost = new MovedImportPost( $this->importSource, $this->apiResponse ); - return new ArrayIterator( array( $topPost ) ); - } -} - -class MovedImportPost extends ImportPost { - public function getRevisions() { - $factory = function( $data, $parent ) { - return new MovedImportRevision( $data, $parent ); - }; - $pageData = $this->importSource->getPageData( $this->pageId ); - return new RevisionIterator( $pageData, $this, $factory ); - } -} - -class MovedImportRevision extends ImportRevision { - /** - * Rewrites the '#REDIRECT [[...]]' of an autogenerated lqt moved - * thread stub into a template. While we don't re-write the link - * here, after importing the referenced thread LqtRedirector will - * make that Thread page a redirect to the Flow topic, essentially - * making these links still work. - */ - public function getText() { - $text = parent::getText(); - $content = \ContentHandler::makeContent( $text, null, CONTENT_MODEL_WIKITEXT ); - $target = $content->getRedirectTarget(); - if ( !$target ) { - throw new ImportException( "Could not detect redirect within: $text" ); - } - - // To get the new talk page that this belongs to we would need to query the api - // for the new topic, for now not bothering. - $template = wfMessage( 'flow-importer-lqt-moved-thread-template' )->inContentLanguage()->plain(); - $arguments = implode( '|', array( - 'author=' . parent::getAuthor(), - 'date=' . MWTimestamp::getInstance( $this->apiResponse['timestamp'] )->timestamp->format( 'Y-m-d' ), - 'title=' . $target->getPrefixedText(), - ) ); - - return "{{{$template}|$arguments}}"; - } -} - -// Represents a revision the script makes on its own behalf, using a script user -class ScriptedImportRevision implements IObjectRevision { - /** @var IImportObject **/ - protected $parentObject; - - /** @var User */ - protected $destinationScriptUser; - - /** @var string */ - protected $revisionText; - - /** @var string */ - protected $timestamp; - - /** - * Creates a ScriptedImportRevision with the current timestamp, given a script user - * and arbitrary text. - * - * @param IImportObject $parentObject Object this is a revision of - * @param User $destinationScriptUser User that performed this scripted edit - * @param string $revisionText Text of revision - */ - function __construct( IImportObject $parentObject, User $destinationScriptUser, $revisionText ) { - $this->parent = $parentObject; - $this->destinationScriptUser = $destinationScriptUser; - $this->revisionText = $revisionText; - $this->timestamp = wfTimestampNow(); - } - - public function getText() { - return $this->revisionText; - } - - public function getTimestamp() { - return $this->timestamp; - } - - public function getAuthor() { - return $this->destinationScriptUser->getName(); - } - - // XXX: This is called but never used, but if it were, including getText and getAuthor in - // the key might not be desirable, because we don't necessarily want to re-import - // the revision when these change. - public function getObjectKey() { - return $this->parent->getObjectKey() . ':rev:scripted:' . md5( $this->getText() . $this->getAuthor() ); - } -} - -class ImportHeader extends PageRevisionedObject implements IImportHeader { - /** @var ApiBackend **/ - protected $api; - /** @var string **/ - protected $title; - /** @var array **/ - protected $pageData; - /** @var ImportSource **/ - protected $source; - /** - * User used for script-originated actions, such as cleanup edits. - * Does not apply to actual posts, which retain their original users. - * - * @var User - */ - protected $destinationScriptUser; - - public function __construct( ApiBackend $api, ImportSource $source, $title, User $destinationScriptUser ) { - $this->api = $api; - $this->title = $title; - $this->source = $source; - $this->pageData = null; - $this->destinationScriptUser = $destinationScriptUser; - } - - public function getRevisions() { - if ( $this->pageData === null ) { - // Previous revisions of the header are preserved in the underlying wikitext - // page history. Only the top revision is imported. - $response = $this->api->retrieveTopRevisionByTitle( array( $this->title ) ); - $this->pageData = reset( $response ); - } - - $revisions = array(); - - if ( isset( $this->pageData['revisions'] ) && count( $this->pageData['revisions'] ) > 0 ) { - $lastLqtRevision = new ImportRevision( end( $this->pageData['revisions'] ), $this ); - - $titleObject = Title::newFromText( $this->title ); - $cleanupRevision = $this->createHeaderCleanupRevision( $lastLqtRevision, $titleObject ); - - $revisions = array( $lastLqtRevision, $cleanupRevision ); - } - - return new ArrayIterator( $revisions ); - } - - /** - * @param IObjectRevision $lastRevision last imported header revision - * @param Title $archiveTitle archive page title associated with header - * @return IObjectRevision generated revision for cleanup edit - */ - protected function createHeaderCleanupRevision( IObjectRevision $lastRevision, Title $archiveTitle ) { - $wikitextForLastRevision = $lastRevision->getText(); - // This is will remove all instances, without attempting to check if it's in - // nowiki, etc. It also ignores case and spaces in places where it doesn't - // matter. - $newWikitext = preg_replace( - '/{{\s*#useliquidthreads:\s*1\s*}}/i', - '', - $wikitextForLastRevision - ); - $templateName = wfMessage( 'flow-importer-lqt-converted-template' )->inContentLanguage()->plain(); - $arguments = implode( '|', array( - 'archive=' . $archiveTitle->getPrefixedText(), - 'date=' . MWTimestamp::getInstance()->timestamp->format( 'Y-m-d' ), - ) ); - - $newWikitext .= "\n\n{{{$templateName}|$arguments}}"; - $cleanupRevision = new ScriptedImportRevision( - $this, - $this->destinationScriptUser, - $newWikitext, - $lastRevision->getTimestamp() - ); - return $cleanupRevision; - } - - public function getObjectKey() { - return $this->source->getObjectKey( 'header_for', $this->title ); - } -} diff --git a/includes/Import/LiquidThreadsApi/PageRevisionedObject.php b/includes/Import/LiquidThreadsApi/PageRevisionedObject.php new file mode 100644 index 0000000..2b9b490 --- /dev/null +++ b/includes/Import/LiquidThreadsApi/PageRevisionedObject.php @@ -0,0 +1,42 @@ +<?php + +namespace Flow\Import\LiquidThreadsApi; + +use ArrayIterator; +use Flow\Import\IImportHeader; +use Flow\Import\IImportObject; +use Flow\Import\IImportPost; +use Flow\Import\IImportSummary; +use Flow\Import\IImportTopic; +use Flow\Import\ImportException; +use Flow\Import\IObjectRevision; +use Flow\Import\IRevisionableObject; +use Iterator; +use MWTimestamp; +use Title; +use User; + +abstract class PageRevisionedObject implements IRevisionableObject { + /** @var int **/ + protected $pageId; + + /** + * @var ImportSource + */ + protected $importSource; + + /** + * @param ImportSource $source + * @param int $pageId ID of the remote page + */ + function __construct( $source, $pageId ) { + $this->importSource = $source; + $this->pageId = $pageId; + } + + public function getRevisions() { + $pageData = $this->importSource->getPageData( $this->pageId ); + return new RevisionIterator( $pageData, $this ); + } +} + diff --git a/includes/Import/LiquidThreadsApi/RemoteApiBackend.php b/includes/Import/LiquidThreadsApi/RemoteApiBackend.php new file mode 100644 index 0000000..e0756d6 --- /dev/null +++ b/includes/Import/LiquidThreadsApi/RemoteApiBackend.php @@ -0,0 +1,66 @@ +<?php + +namespace Flow\Import\LiquidThreadsApi; + +use ApiBase; +use ApiMain; +use Exception; +use FauxRequest; +use Flow\Container; +use Flow\Import\ImportException; +use Flow\Import\IImportSource; +use Flow\Import\ApiNullResponseException; +use Http; +use RequestContext; +use Psr\Log\LoggerAwareInterface; +use Psr\Log\LoggerInterface; +use Psr\Log\NullLogger; +use UsageException; +use User; + +class RemoteApiBackend extends ApiBackend { + /** + * @param string + */ + protected $apiUrl; + + /** + * @param string|null + */ + protected $cacheDir; + + /** + * @param string $apiUrl + * @param string|null $cacheDir + */ + public function __construct( $apiUrl, $cacheDir = null ) { + parent::__construct(); + $this->apiUrl = $apiUrl; + $this->cacheDir = $cacheDir; + } + + public function getKey() { + return $this->apiUrl; + } + + public function apiCall( array $params, $retry = 1 ) { + $params['format'] = 'json'; + $url = wfAppendQuery( $this->apiUrl, $params ); + $file = $this->cacheDir . '/' . md5( $url ) . '.cache'; + $this->logger->debug( __METHOD__ . ": $url" ); + if ( $this->cacheDir && file_exists( $file ) ) { + $result = file_get_contents( $file ); + } else { + do { + $result = Http::get( $url ); + } while ( $result === false && --$retry >= 0 ); + + if ( $this->cacheDir && file_put_contents( $file, $result ) === false ) { + $this->logger->warning( "Failed writing cached api result to $file" ); + } + } + + return json_decode( $result, true ); + } +} + diff --git a/includes/Import/LiquidThreadsApi/ReplyIterator.php b/includes/Import/LiquidThreadsApi/ReplyIterator.php new file mode 100644 index 0000000..5a23f10 --- /dev/null +++ b/includes/Import/LiquidThreadsApi/ReplyIterator.php @@ -0,0 +1,65 @@ +<?php + +namespace Flow\Import\LiquidThreadsApi; + +use ArrayIterator; +use Flow\Import\IImportObject; +use Iterator; + +class ReplyIterator implements Iterator { + /** @var ImportPost **/ + protected $post; + /** @var array Array of thread IDs **/ + protected $threadReplies; + /** @var int **/ + protected $replyIndex; + /** @var ImportPost|null */ + protected $current; + + public function __construct( ImportPost $post ) { + $this->post = $post; + $this->replyIndex = 0; + + $apiResponse = $post->getApiResponse(); + $this->threadReplies = array_values( $apiResponse['replies'] ); + } + + /** + * @return ImportPost|null + */ + public function current() { + return $this->current; + } + + /** + * @return integer + */ + public function key() { + return $this->replyIndex; + } + + public function next() { + while( ++$this->replyIndex < count( $this->threadReplies ) ) { + try { + $replyId = $this->threadReplies[$this->replyIndex]['id']; + $this->current = $this->post->getSource()->getPost( $replyId ); + return; + } catch ( ApiNotFoundException $e ) { + // while loop fall-through handles our error case + } + } + + // Nothing found, set current to null + $this->current = null; + } + + public function rewind() { + $this->replyIndex = -1; + $this->next(); + } + + public function valid() { + return $this->current !== null; + } +} + diff --git a/includes/Import/LiquidThreadsApi/RevisionIterator.php b/includes/Import/LiquidThreadsApi/RevisionIterator.php new file mode 100644 index 0000000..501077d --- /dev/null +++ b/includes/Import/LiquidThreadsApi/RevisionIterator.php @@ -0,0 +1,60 @@ +<?php + +namespace Flow\Import\LiquidThreadsApi; + +use ArrayIterator; +use Flow\Import\IImportObject; +use Iterator; + +class RevisionIterator implements Iterator { + /** @var array **/ + protected $pageData; + + /** @var int **/ + protected $pointer; + + /** @var IImportObject **/ + protected $parent; + + public function __construct( array $pageData, IImportObject $parent, $factory = null ) { + $this->pageData = $pageData; + $this->pointer = 0; + $this->parent = $parent; + $this->factory = $factory ?: function( $data, $parent ) { + return new ImportRevision( $data, $parent ); + }; + } + + protected function getRevisionCount() { + if ( isset( $this->pageData['revisions'] ) ) { + return count( $this->pageData['revisions'] ); + } else { + return 0; + } + } + + public function valid() { + return $this->pointer < $this->getRevisionCount(); + } + + public function next() { + ++$this->pointer; + } + + public function key() { + return $this->pointer; + } + + public function rewind() { + $this->pointer = 0; + } + + public function current() { + return call_user_func( + $this->factory, + $this->pageData['revisions'][$this->pointer], + $this->parent + ); + } +} + diff --git a/includes/Import/LiquidThreadsApi/ScriptedImportRevision.php b/includes/Import/LiquidThreadsApi/ScriptedImportRevision.php new file mode 100644 index 0000000..8cb9166 --- /dev/null +++ b/includes/Import/LiquidThreadsApi/ScriptedImportRevision.php @@ -0,0 +1,66 @@ +<?php + +namespace Flow\Import\LiquidThreadsApi; + +use ArrayIterator; +use Flow\Import\IImportHeader; +use Flow\Import\IImportObject; +use Flow\Import\IImportPost; +use Flow\Import\IImportSummary; +use Flow\Import\IImportTopic; +use Flow\Import\ImportException; +use Flow\Import\IObjectRevision; +use Flow\Import\IRevisionableObject; +use Iterator; +use MWTimestamp; +use Title; +use User; + +class ScriptedImportRevision implements IObjectRevision { + /** @var IImportObject **/ + protected $parentObject; + + /** @var User */ + protected $destinationScriptUser; + + /** @var string */ + protected $revisionText; + + /** @var string */ + protected $timestamp; + + /** + * Creates a ScriptedImportRevision with the current timestamp, given a script user + * and arbitrary text. + * + * @param IImportObject $parentObject Object this is a revision of + * @param User $destinationScriptUser User that performed this scripted edit + * @param string $revisionText Text of revision + */ + function __construct( IImportObject $parentObject, User $destinationScriptUser, $revisionText ) { + $this->parent = $parentObject; + $this->destinationScriptUser = $destinationScriptUser; + $this->revisionText = $revisionText; + $this->timestamp = wfTimestampNow(); + } + + public function getText() { + return $this->revisionText; + } + + public function getTimestamp() { + return $this->timestamp; + } + + public function getAuthor() { + return $this->destinationScriptUser->getName(); + } + + // XXX: This is called but never used, but if it were, including getText and getAuthor in + // the key might not be desirable, because we don't necessarily want to re-import + // the revision when these change. + public function getObjectKey() { + return $this->parent->getObjectKey() . ':rev:scripted:' . md5( $this->getText() . $this->getAuthor() ); + } +} + diff --git a/includes/Import/LiquidThreadsApi/Source.php b/includes/Import/LiquidThreadsApi/Source.php deleted file mode 100644 index dfe4264..0000000 --- a/includes/Import/LiquidThreadsApi/Source.php +++ /dev/null @@ -1,421 +0,0 @@ -<?php - -namespace Flow\Import\LiquidThreadsApi; - -use ApiBase; -use ApiMain; -use Exception; -use FauxRequest; -use Flow\Container; -use Flow\Import\ImportException; -use Flow\Import\IImportSource; -use Flow\Import\ApiNullResponseException; -use Http; -use RequestContext; -use Psr\Log\LoggerAwareInterface; -use Psr\Log\LoggerInterface; -use Psr\Log\NullLogger; -use UsageException; -use User; - -class ImportSource implements IImportSource { - // Thread types defined by LQT which are returned via api - const THREAD_TYPE_NORMAL = 0; - const THREAD_TYPE_MOVED = 1; - const THREAD_TYPE_DELETED = 2; - const THREAD_TYPE_HIDDEN = 4; - - /** - * @var ApiBackend - */ - protected $api; - - /** - * @var string - */ - protected $pageName; - - /** - * @var CachedThreadData - */ - protected $threadData; - - /** - * @var CachedPageData - */ - protected $pageData; - - /** - * @param ApiBackend $apiBackend - * @param string $pageName - */ - public function __construct( ApiBackend $apiBackend, $pageName ) { - $this->api = $apiBackend; - $this->pageName = $pageName; - - $this->threadData = new CachedThreadData( $this->api ); - $this->pageData = new CachedPageData( $this->api ); - } - - /** - * {@inheritDoc} - */ - public function getHeader() { - $controller = Container::get( 'occupation_controller' ); - - return new ImportHeader( $this->api, $this, $this->pageName, $controller->getTalkpageManager() ); - } - - /** - * {@inheritDoc} - */ - public function getTopics() { - return new TopicIterator( $this, $this->threadData, $this->pageName ); - } - - /** - * @param integer $id - * @return ImportTopic|null - */ - public function getTopic( $id ) { - $data = $this->threadData->get( $id ); - switch ( $data['type'] ) { - // Standard thread - case self::THREAD_TYPE_NORMAL: - return new ImportTopic( $this, $data ); - - // The topic no longer exists at the queried location, but - // a stub was left behind pointing to it. This modified - // version of ImportTopic gracefully adjusts the #REDIRECT - // into a template to keep a similar output to lqt. - case self::THREAD_TYPE_MOVED: - return new MovedImportTopic( $this, $data ); - - // To get these back from the api we would have to send the `showdeleted` - // query param. As we are not requesting them, just ignore for now. - case self::THREAD_TYPE_DELETED: - return null; - - // Was assigned but never used by LQT. - case self::THREAD_TYPE_HIDDEN: - return null; - } - } - - /** - * @param integer $id - * @return ImportPost - */ - public function getPost( $id ) { - return new ImportPost( $this, $this->threadData->get( $id ) ); - } - - /** - * @param integer $id - * @return array - */ - public function getThreadData( $id ) { - if ( is_array( $id ) ) { - return $this->threadData->getMulti( $id ); - } else { - return $this->threadData->get( $id ); - } - } - - /** - * @param integer[]|integer $pageIds - * @return array - */ - public function getPageData( $pageIds ) { - if ( is_array( $pageIds ) ) { - return $this->pageData->getMulti( $pageIds ); - } else { - return $this->pageData->get( $pageIds ); - } - } - - /** - * @param string $pageName - * @param integer $startId - * @return array - */ - public function getFromPage( $pageName, $startId = 0 ) { - return $this->threadData->getFromPage( $pageName, $startId ); - } - - /** - * Gets a unique identifier for the wiki being imported - * @return string Usually either a string 'local' or an API URL - */ - public function getApiKey() { - return $this->api->getKey(); - } - - /** - * Returns a key uniquely representing an object determined by arguments. - * Parameters: Zero or more strings that uniquely represent the object - * for this ImportSource - * - * @return string Unique key - */ - public function getObjectKey( /* $args */ ) { - $components = array_merge( - array( 'lqt-api', $this->getApiKey() ), - func_get_args() - ); - - return implode( ':', $components ); - } -} - -abstract class ApiBackend implements LoggerAwareInterface { - - /** - * @var LoggerInterface - */ - protected $logger; - - public function __construct() { - $this->logger = new NullLogger; - } - - public function setLogger( LoggerInterface $logger ) { - $this->logger = $logger; - } - - /** - * Retrieves LiquidThreads data from the API - * - * @param array $conditions The parameters to pass to select the threads. Usually used in two ways: with thstartid/thpage, or with ththreadid - * @return array Data as returned under query.threads by the API - * @throws ApiNotFoundException Thrown when the remote api reports that the provided conditions - * have no matching records. - * @throws ImportException When an error is received from the remote api. This is often either - * a bad request or lqt threw an exception trying to respond to a valid request. - */ - public function retrieveThreadData( array $conditions ) { - $params = array( - 'action' => 'query', - 'list' => 'threads', - 'thprop' => 'id|subject|page|parent|ancestor|created|modified|author|summaryid|type|rootid|replies', - 'format' => 'json', - 'limit' => ApiBase::LIMIT_BIG1, - ); - $data = $this->apiCall( $params + $conditions ); - - if ( ! isset( $data['query']['threads'] ) ) { - if ( $this->isNotFoundError( $data ) ) { - $message = "Did not find thread with conditions: " . json_encode( $conditions ); - $this->logger->debug( __METHOD__ . ": $message" ); - throw new ApiNotFoundException( $message ); - } else { - $this->logger->error( __METHOD__ . ': Failed API call against ' . $this->getKey() . ' with conditions : ' . json_encode( $conditions ) ); - throw new ImportException( "Null response from API module:" . json_encode( $data ) ); - } - } - - $firstThread = reset( $data['query']['threads'] ); - if ( ! isset( $firstThread['replies'] ) ) { - throw new ImportException( "Foreign API does not support reply exporting:" . json_encode( $data ) ); - } - - return $data['query']['threads']; - } - - /** - * Retrieves data about a set of pages from the API - * - * @param array $pageIds Page IDs to return data for. - * @return array The query.pages part of the API response. - * @throws \MWException - */ - public function retrievePageDataById( array $pageIds ) { - if ( !$pageIds ) { - throw new \MWException( 'At least one page id must be provided' ); - } - - return $this->retrievePageData( array( - 'pageids' => implode( '|', $pageIds ), - ) ); - } - - /** - * Retrieves data about the latest revision of the titles - * from the API - * - * @param string[] $titles Titles to return data for - * @return array The query.pages prt of the API response. - * @throws \MWException - * @throws ImportException - */ - public function retrieveTopRevisionByTitle( array $titles ) { - if ( !$titles ) { - throw new \MWException( 'At least one title must be provided' ); - } - - return $this->retrievePageData( array( - 'titles' => implode( '|', $titles ), - 'rvlimit' => 1, - 'rvdir' => 'older', - ), true ); - } - - /** - * Retrieves data about a set of pages from the API - * - * @param array $conditions Conditions to retrieve pages by; to be sent to the API. - * @param bool $expectContinue Pass true here when caller expects more revisions to exist than - * they are requesting information about. - * @return array The query.pages part of the API response. - * @throws ApiNotFoundException Thrown when the remote api reports that the provided conditions - * have no matching records. - * @throws ImportException When an error is received from the remote api. This is often either - * a bad request or lqt threw an exception trying to respond to a valid request. - * @throws ImportException When more revisions are available than can be returned in a single - * query and the calling code does not set $expectContinue to true. - */ - public function retrievePageData( array $conditions, $expectContinue = false ) { - $conditions += array( - 'action' => 'query', - 'prop' => 'revisions', - 'rvprop' => 'timestamp|user|content|ids', - 'format' => 'json', - 'rvlimit' => 5000, - 'rvdir' => 'newer', - 'continue' => '', - 'limit' => ApiBase::LIMIT_BIG1, - ); - $data = $this->apiCall( $conditions ); - - if ( ! isset( $data['query'] ) ) { - $this->logger->error( __METHOD__ . ': Failed API call against ' . $this->getKey() . ' with conditions : ' . json_encode( $conditions ) ); - if ( $this->isNotFoundError( $data ) ) { - $message = "Did not find pages: " . json_encode( $conditions ); - $this->logger->debug( __METHOD__ . ": $message" ); - throw new ApiNotFoundException( $message ); - } else { - throw new ImportException( "Null response from API module: " . json_encode( $data ) ); - } - } elseif ( !$expectContinue && isset( $data['continue'] ) ) { - throw new ImportException( "More revisions than can be retrieved for conditions, import would be incomplete: " . json_encode( $conditions ) ); - } - - return $data['query']['pages']; - } - - /** - * Calls the remote API - * - * @param array $params The API request to send - * @param int $retry Retry the request on failure this many times - * @return array API return value, decoded from JSON into an array. - */ - abstract function apiCall( array $params, $retry = 1 ); - - /** - * @return string A unique identifier for this backend. - */ - abstract function getKey(); - - /** - * @param array $apiResponse - * @return bool - */ - protected function isNotFoundError( $apiResponse ) { - // LQT has some bugs where not finding the requested item in the database throws - // returns this exception. - $expect = 'Exception Caught: DatabaseBase::makeList: empty input for field thread_parent'; - return false !== strpos( $apiResponse['error']['info'], $expect ); - } -} - -class RemoteApiBackend extends ApiBackend { - /** - * @param string - */ - protected $apiUrl; - - /** - * @param string|null - */ - protected $cacheDir; - - /** - * @param string $apiUrl - * @param string|null $cacheDir - */ - public function __construct( $apiUrl, $cacheDir = null ) { - parent::__construct(); - $this->apiUrl = $apiUrl; - $this->cacheDir = $cacheDir; - } - - public function getKey() { - return $this->apiUrl; - } - - public function apiCall( array $params, $retry = 1 ) { - $params['format'] = 'json'; - $url = wfAppendQuery( $this->apiUrl, $params ); - $file = $this->cacheDir . '/' . md5( $url ) . '.cache'; - $this->logger->debug( __METHOD__ . ": $url" ); - if ( $this->cacheDir && file_exists( $file ) ) { - $result = file_get_contents( $file ); - } else { - do { - $result = Http::get( $url ); - } while ( $result === false && --$retry >= 0 ); - - if ( $this->cacheDir && file_put_contents( $file, $result ) === false ) { - $this->logger->warning( "Failed writing cached api result to $file" ); - } - } - - return json_decode( $result, true ); - } -} - -class LocalApiBackend extends ApiBackend { - /** - * @var User|null - */ - protected $user; - - public function __construct( User $user = null ) { - parent::__construct(); - $this->user = $user; - } - - public function getKey() { - return 'local'; - } - - public function apiCall( array $params, $retry = 1 ) { - try { - $context = new RequestContext; - $context->setRequest( new FauxRequest( $params ) ); - if ( $this->user ) { - $context->setUser( $this->user ); - } - - $api = new ApiMain( $context ); - $api->execute(); - if ( defined( 'ApiResult::META_CONTENT' ) ) { - return ApiResult::removeMetadata( $api->getResult()->getResultData() ); - } else { - return $api->getResult()->getData(); - } - } catch ( UsageException $exception ) { - // Mimic the behaviour when called remotely - return array( 'error' => $exception->getMessageArray() ); - } catch ( Exception $exception ) { - // Mimic behaviour when called remotely - return array( - 'error' => array( - 'code' => 'internal_api_error_' . get_class( $exception ), - 'info' => 'Exception Caught: ' . $exception->getMessage(), - ), - ); - } - } -} diff --git a/includes/Import/LiquidThreadsApi/TopicIterator.php b/includes/Import/LiquidThreadsApi/TopicIterator.php new file mode 100644 index 0000000..a647968 --- /dev/null +++ b/includes/Import/LiquidThreadsApi/TopicIterator.php @@ -0,0 +1,141 @@ +<?php + +namespace Flow\Import\LiquidThreadsApi; + +use ArrayIterator; +use Flow\Import\IImportObject; +use Iterator; + +class TopicIterator implements Iterator { + /** + * @var ImportSource + */ + protected $importSource; + + /** + * @var CachedThreadData Access point for api data + */ + protected $threadData; + + /** + * @var integer|false|null Lqt id of the current topic, false if no current topic, null if unknown. + */ + protected $current = false; + + /** + * @var ImportTopic The current topic. + */ + protected $currentTopic = null; + + /** + * @var string Name of the remote page the topics exist on + */ + protected $pageName; + + /** + * @var Iterator A list of topic ids. Iterator used to simplify maintaining + * an explicit position within the list. + */ + protected $topicIdIterator; + + /** + * @var integer The maximum id received by self::loadMore + */ + protected $maxId; + + /** + * @param ImportSource $source + * @param CachedThreadData $threadData + * @param string $pageName + */ + public function __construct( ImportSource $source, CachedThreadData $threadData, $pageName ) { + $this->importSource = $source; + $this->threadData = $threadData; + $this->pageName = $pageName; + $this->topicIdIterator = new ArrayIterator( $threadData->getTopics() ); + $this->rewind(); + } + + /** + * @return ImportTopic + */ + public function current() { + if ( $this->current === false ) { + return null; + } + return $this->currentTopic; + } + + /** + * @return integer + */ + public function key() { + return $this->current; + } + + public function next() { + if ( !$this->valid() ) { + return; + } + + $lastOffset = $this->key(); + do { + while( $this->topicIdIterator->valid() ) { + $topicId = $this->topicIdIterator->current(); + $this->topicIdIterator->next(); + + // this topic id has been seen before. + if ( $topicId <= $lastOffset ) { + continue; + } + + // hidden and deleted threads come back as null + $topic = $this->importSource->getTopic( $topicId ); + if ( $topic === null ) { + continue; + } + + $this->current = $topicId; + $this->currentTopic = $topic; + return; + } + } while( $this->loadMore() ); + + // nothing found, nothing more to load + $this->current = false; + } + + public function rewind() { + $this->current = null; + $this->topicIdIterator->rewind(); + $this->next(); + } + + /** + * @return bool + */ + public function valid() { + return $this->current !== false; + } + + /** + * @return bool True when more topics were loaded + */ + protected function loadMore() { + try { + // + 1 to not return the existing max topic + $output = $this->threadData->getFromPage( $this->pageName, $this->maxId + 1 ); + } catch ( ApiNotFoundException $e ) { + // No more results, end loop + return false; + } + + $this->maxId = max( array_keys( $output ) ); + $this->topicIdIterator = new ArrayIterator( $this->threadData->getTopics() ); + $this->topicIdIterator->rewind(); + + // Keep looping until we get a not found error + return true; + } +} + -- To view, visit https://gerrit.wikimedia.org/r/192744 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Iae5f6ecac74b4a37abf14fe32824b34c9c92b655 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Flow Gerrit-Branch: master Gerrit-Owner: EBernhardson <ebernhard...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits