Brion VIBBER has uploaded a new change for review. https://gerrit.wikimedia.org/r/314433
Change subject: Revert "Rewrite discovery of TimedText tracks" ...................................................................... Revert "Rewrite discovery of TimedText tracks" This reverts commit 1383299c828c479f537c6dfc060dce79c639e8c5. This reverts commit 518a6a9c3394a28245e956ada8c05d73ee17f374. Too flaky, had several errors in production. Needs more testing on the LB local remote case. Change-Id: Id801ff4ff2298121cc210ad8b59ec914b15f549b --- M TimedMediaHandler.php M handlers/TextHandler/TextHandler.php 2 files changed, 140 insertions(+), 164 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/TimedMediaHandler refs/changes/33/314433/1 diff --git a/TimedMediaHandler.php b/TimedMediaHandler.php index ada8f33..1d4faf1 100644 --- a/TimedMediaHandler.php +++ b/TimedMediaHandler.php @@ -290,6 +290,8 @@ $wgAutoloadClasses['WAVHandler'] = "$timedMediaDir/handlers/WAVHandler/WAVHandler.php"; // Text handler +$wgAutoloadClasses['ForeignApiQueryAllPages'] = + "$timedMediaDir/handlers/TextHandler/TextHandler.php"; $wgAutoloadClasses['TextHandler'] = "$timedMediaDir/handlers/TextHandler/TextHandler.php"; $wgAutoloadClasses['TimedTextPage'] = "$timedMediaDir/TimedTextPage.php"; diff --git a/handlers/TextHandler/TextHandler.php b/handlers/TextHandler/TextHandler.php index 3eb284e..605fbe2 100644 --- a/handlers/TextHandler/TextHandler.php +++ b/handlers/TextHandler/TextHandler.php @@ -8,10 +8,61 @@ * TODO On "new" timedtext language save purge all pages where file exists */ +/** + * Subclass ApiMain but query other db + */ +class ForeignApiQueryAllPages extends ApiQueryAllPages { + public function __construct( $mDb, $query, $moduleName ) { + global $wgTimedTextForeignNamespaces; + + $this->foreignDb = $mDb; + + $wikiID = $this->foreignDb->getWikiID(); + if ( isset( $wgTimedTextForeignNamespaces[ $wikiID ] ) ) { + $this->foreignNs = $wgTimedTextForeignNamespaces[ $wikiID ]; + } else { + $this->foreignNs = NS_TIMEDTEXT; + } + parent::__construct( $query, $moduleName, 'ap' ); + } + + protected function getDB() { + return $this->foreignDb; + } + + protected function parseMultiValue( $valueName, $value, $allowMultiple, $allowedValues ) { + // foreignnNs might not be defined localy, + // catch the undefined error here + if ( $valueName == 'apnamespace' + && $value == $this->foreignNs + && $allowMultiple == false + ) { + return $this->foreignNs; + } + return parent::parseMultiValue( $valueName, $value, $allowMultiple, $allowedValues ); + } + + /** + * An alternative to titleToKey() that doesn't trim trailing spaces + * + * + * @FIXME: I'M A BIG HACK + * + * @param string $titlePart Title part with spaces + * @return string Title part with underscores + */ + public function titlePartToKey( $titlePart, $defaultNamespace = NS_MAIN ) { + $t = Title::newFromText( $titlePart . 'x' ); + if ( !$t ) { + $this->dieUsageMsg( [ 'invalidtitle', $titlePart ] ); + } + return substr( $t->getPrefixedDBkey(), 0, -1 ); + } +} + class TextHandler { // lazy init remote Namespace number public $remoteNs = null; - public $remoteNsName = null; /** * @var File @@ -69,11 +120,8 @@ if ( isset( $data['query'] ) && isset( $data['query']['namespaces'] ) ) { // get the ~last~ timed text namespace defined foreach ( $data['query']['namespaces'] as $ns ) { - if ( isset( $ns['canonical'] ) && $ns['canonical'] === 'TimedText' ) { + if ( $ns['*'] == 'TimedText' ) { $this->remoteNs = $ns['id']; - $this->remoteNsName = $ns['*']; - wfDebug( "Discovered remoteNs: $this->remoteNs and name: $this->remoteNsName \n" ); - break; } } } @@ -83,59 +131,25 @@ } /** - * Retrieve a list of TimedText pages in the database that start with - * the name of the file associated with this handler. - * - * If the file is on a foreign repo, will query the ForeignDb - * - * @return ResultWrapper|bool - */ - function getTextPages() { - $ns = $this->getTimedTextNamespace(); - if ( $ns === false ) { - wfDebug( 'Repo: ' . $this->file->repo->getName() . " does not have a TimedText namespace \n" ); - // No timed text namespace, don't try to look up timed text tracks - return false; - } - $dbr = $this->file->getRepo()->getSlaveDB(); - $prefix = $this->file->getTitle()->getDBkey(); - return $dbr->select( - 'page', - [ 'page_namespace', 'page_title' ], - [ - 'page_namespace' => $ns, - 'page_title ' . $dbr->buildLike( $prefix, $dbr->anyString() ) - ], - __METHOD__, - [ - 'LIMIT' => 300, - 'ORDER BY' => 'page_title' - ] - ); - } - - /** - * Build the api query to find TimedText pages belonging to a remote file * @return array|bool */ - function getRemoteTextPagesQuery() { + function getTextPagesQuery() { $ns = $this->getTimedTextNamespace(); if ( $ns === false ) { - wfDebug( 'Repo: ' . $this->file->repo->getName() . " does not have a TimedText namespace \n" ); + wfDebug( "Repo: " . $this->file->repo->getName() . " does not have a TimedText namesapce \n" ); // No timed text namespace, don't try to look up timed text tracks return false; } return [ 'action' => 'query', - 'titles' => $this->file->getTitle()->getPrefixedDBkey(), - 'prop' => 'videoinfo', - 'viprop' => 'timedtext', - 'formatversion' => '2', + 'list' => 'allpages', + 'apnamespace' => $ns, + 'aplimit' => 300, + 'apprefix' => $this->file->getTitle()->getDBkey() ]; } /** - * Retrieve the text sources belonging to a remote file * @return array|mixed */ function getRemoteTextSources() { @@ -154,7 +168,7 @@ wfDebug( "miss\n" ); } wfDebug( "Get text tracks from remote api \n" ); - $query = $this->getRemoteTextPagesQuery(); + $query = $this->getTextPagesQuery(); // Error in getting timed text namespace return empty array; if ( $query === false ) { @@ -169,111 +183,99 @@ } /** - * Retrieve the text sources belonging to a foreign db accessible file - * @return array - */ - function getForeignDBTextSources() { - $data = $this->getTextPages(); - if ( $data !== false ) { - return $this->getTextTracksFromRows( $data ); - } - return []; - } - - /** - * Retrieve the text sources belonging to a local file * @return array */ function getLocalTextSources() { global $wgEnableLocalTimedText; if ( $wgEnableLocalTimedText ) { - $data = $this->getTextPages(); - if ( $data !== false ) { - return $this->getTextTracksFromRows( $data ); + // Init $this->textTracks + $params = new FauxRequest( $this->getTextPagesQuery() ); + $api = new ApiMain( $params ); + $api->execute(); + if ( defined( 'ApiResult::META_CONTENT' ) ) { + $data = $api->getResult()->getResultData( null, [ 'Strip' => 'all' ] ); + } else { + $data = $api->getResultData(); } + wfDebug( print_r( $data, true ) ); + // Get the list of language Names + return $this->getTextTracksFromData( $data ); + } else { + return []; } - return []; } /** - * Build an array of track information using a Database result - * Handles both local and foreign Db results - * - * @param ResultWrapper $data Database result with page titles - * @return array + * @return array|mixed */ - function getTextTracksFromRows( ResultWrapper $data ) { - $textTracks = []; - $providerName = $this->file->repo->getName(); - // commons is called shared in production. normalize it to wikimediacommons - if ( $providerName === 'shared' ) { - $providerName = 'wikimediacommons'; - } - // Provider name should be the same as the interwiki map + function getForeignDBTextSources() { + // Init $this->textTracks + $params = new FauxRequest( $this->getTextPagesQuery() ); + $api = new ApiMain( $params ); + $api->profileIn(); + $query = new ApiQuery( $api, 'foo', 'bar' ); + $query->profileIn(); + $module = new ForeignApiQueryAllPages( $this->file->getRepo()->getSlaveDB(), $query, 'allpages' ); + $module->profileIn(); + $module->execute(); + $module->profileOut(); + $query->profileOut(); + $api->profileOut(); - if ( !$this->file->isLocal() ) { - $namespaceName = $this->getForeignNamespaceName(); + if ( defined( 'ApiResult::META_CONTENT' ) ) { + $data = $module->getResult()->getResultData( null, [ 'Strip' => 'all' ] ); + } else { + $data = $module->getResultData(); } - $langNames = Language::fetchLanguageNames( null, 'mw' ); - - foreach ( $data as $row ) { - // Note, the namespace ID of this title might be 'unknown' - // to our configuration if this is called in ForeignDb situations - if ( $this->file->isLocal() ) { - $subTitle = Title::newFromRow( $row ); - } else { - $subTitle = new ForeignTitle( $row->page_namespace, $namespaceName, $row->page_title ); - } - $titleParts = explode( '.', $row->page_title ); - if ( count( $titleParts ) >= 3 ) { - $timedTextExtension = array_pop( $titleParts ); - $languageKey = array_pop( $titleParts ); - $contentType = $this->getContentType( $timedTextExtension ); - } else { - continue; - } - // If there is no valid language continue: - if ( !isset( $langNames[ $languageKey ] ) ) { - continue; - } - - $textTracks[] = [ - // @todo Should eventually add special entry point and output proper WebVTT format: - // http://www.whatwg.org/specs/web-apps/current-work/webvtt.html - 'src' => $this->getFullURL( $subTitle, $contentType ), - 'kind' => 'subtitles', - 'type' => $contentType, - 'title' => $this->getPrefixedDBkey( $subTitle ), - 'provider' => $providerName, - 'srclang' => $languageKey, - 'dir' => Language::factory( $languageKey )->getDir(), - 'label' => wfMessage( 'timedmedia-subtitle-language', - $langNames[ $languageKey ], - $languageKey )->text() - ]; - } - return $textTracks; + // Get the list of language Names + return $this->getTextTracksFromData( $data ); } /** - * Build an array of track information using an API result - * @param mixed $data JSON decoded result from a query API request + * @param $data * @return array */ function getTextTracksFromData( $data ) { $textTracks = []; - if ( $data !== null && $data['query'] && $data['query']['pages'] ) { - foreach ( $data['query']['pages'] as $page ) { - if ( $page['videoinfo'] ) { - foreach ( $page['videoinfo'] as $info ) { - if ( $info['timedtext'] ) { - foreach ( $info['timedtext'] as $track ) { - // Add validation ? - $textTracks[] = $track; - } - } - } + $providerName = $this->file->repo->getName(); + // commons is called shared in production. normalize it to wikimediacommons + if ( $providerName == 'shared' ) { + $providerName = 'wikimediacommons'; + } + // Provider name should be the same as the interwiki map + // @@todo more testing with this: + + $langNames = Language::fetchLanguageNames( null, 'mw' ); + if ( $data['query'] && $data['query']['allpages'] ) { + foreach ( $data['query']['allpages'] as $page ) { + $subTitle = Title::newFromText( $page['title'] ); + $tileParts = explode( '.', $page['title'] ); + if ( count( $tileParts ) >= 3 ) { + $timedTextExtension = array_pop( $tileParts ); + $languageKey = array_pop( $tileParts ); + $contentType = $this->getContentType( $timedTextExtension ); + } else { + continue; } + // If there is no valid language continue: + if ( !isset( $langNames[ $languageKey ] ) ) { + continue; + } + $namespacePrefix = "TimedText:"; + $textTracks[] = [ + // @todo Should eventually add special entry point and output proper WebVTT format: + // http://www.whatwg.org/specs/web-apps/current-work/webvtt.html + 'src' => $this->getFullURL( $page['title'], $contentType ), + 'kind' => 'subtitles', + 'type' => $contentType, + 'title' => $namespacePrefix . $subTitle->getDBkey(), + 'provider' => $providerName, + 'srclang' => $languageKey, + 'dir' => Language::factory( $languageKey )->getDir(), + 'label' => wfMessage( 'timedmedia-subtitle-language', + $langNames[ $languageKey ], + $languageKey )->text() + ]; } } return $textTracks; @@ -288,43 +290,16 @@ return ''; } - function getForeignNamespaceName() { - if ( $this->remoteNs !== null ) { - return $this->remoteNsName; - } - /* Else, we use the canonical namespace, since we can't look up the actual one */ - return strtr( MWNamespace::getCanonicalName( NS_TIMEDTEXT ), ' ', '_' ); - } - - /** - * Retrieve a namespace prefixed and underscored title - * @param Title|ForeignTitle $pageTitle - * @return string - */ - function getPrefixedDBkey( $pageTitle ) { - if ( $pageTitle instanceof Title ) { - return TitleFormatter::getPrefixedDBkey( $pageTitle ); - } elseif ( $pageTitle instanceof ForeignTitle ) { - return $pageTitle->getFullText(); - } - return null; - } - - /** - * Retrieve a url to the raw subtitle file - * Only use for local and foreignDb requests - * - * @param Title|ForeignTitle $pageTitle - * @return string - */ function getFullURL( $pageTitle, $contentType ) { - if ( $pageTitle instanceof Title ) { - return $pageTitle->getFullURL( [ + if ( $this->file->isLocal() ) { + $subTitle = Title::newFromText( $pageTitle ); + return $subTitle->getFullURL( [ 'action' => 'raw', 'ctype' => $contentType ] ); - } elseif ( $pageTitle instanceof ForeignTitle ) { - $query = 'title=' . wfUrlencode( $pageTitle->getFullText() ) . '&'; + // } elseif ( $this->file->repo instanceof ForeignDBViaLBRepo ) { + } else { + $query = 'title=' . wfUrlencode( $pageTitle ) . '&'; $query .= wfArrayToCgi( [ 'action' => 'raw', 'ctype' => $contentType @@ -332,6 +307,5 @@ // Note: This will return false if scriptDirUrl is not set for repo. return $this->file->repo->makeUrl( $query ); } - return null; } } -- To view, visit https://gerrit.wikimedia.org/r/314433 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Id801ff4ff2298121cc210ad8b59ec914b15f549b Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/TimedMediaHandler Gerrit-Branch: wmf/1.28.0-wmf.21 Gerrit-Owner: Brion VIBBER <br...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits