jenkins-bot has submitted this change and it was merged. Change subject: Use Wikimedia REST API for accessing page data in Content Translation ......................................................................
Use Wikimedia REST API for accessing page data in Content Translation Bug: T92359 Change-Id: I85f5bf4005075326791c87cdadbaeac07316e03c --- M config.defaults.js M pageloader/PageLoader.js 2 files changed, 34 insertions(+), 6 deletions(-) Approvals: KartikMistry: Looks good to me, approved jenkins-bot: Verified diff --git a/config.defaults.js b/config.defaults.js index bb1b583..c7275b1 100644 --- a/config.defaults.js +++ b/config.defaults.js @@ -10,6 +10,7 @@ allowCORS: '*', // Parsoid API URL 'parsoid.api': 'http://parsoid-lb.eqiad.wikimedia.org', + 'restbase.url': 'https://$lang.wikipedia.org/api/rest_v1/page/html/$title', // Apertium web API URL 'mt.apertium.api': 'http://apertium.wmflabs.org', 'mt.yandex.api': 'https://translate.yandex.net', diff --git a/pageloader/PageLoader.js b/pageloader/PageLoader.js index 49a9ac2..2f985ed 100644 --- a/pageloader/PageLoader.js +++ b/pageloader/PageLoader.js @@ -13,6 +13,26 @@ conf = require( __dirname + '/../utils/Conf.js' ); /** + * Cheap body extraction. + * + * This is safe as we know that the HTML we are receiving from Parsoid is + * serialized as XML. + * Restbase does not support body only retrieval of content. + * See https://phabricator.wikimedia.org/T95199 + * @param {string} html + * @return {string} body of the html passed, wrapped in <body> tag. + */ +function cheapBodyInnerHTML( html ) { + var match = /<body[^>]*>([\s\S]*)<\/body>/.exec( html ); + + if ( !match ) { + throw new Error( 'No HTML body found!' ); + } else { + return '<body>' + match[ 1 ] + '</body>'; + } +} + +/** * @class ParsoidPageLoader * * @param {string} page @@ -28,9 +48,14 @@ var url, deferred = Q.defer(); - url = conf( 'parsoid.api' ) + '/' + this.sourceLanguage + 'wiki/' + - encodeURIComponent( this.page ) + '?body=1'; - + if ( conf( 'restbase.url' ) ) { + url = conf( 'restbase.url' ) + .replace( '$lang', this.sourceLanguage ) + .replace( '$title', encodeURIComponent( this.page ) ); + } else { + url = conf( 'parsoid.api' ) + '/' + this.sourceLanguage + 'wiki/' + + encodeURIComponent( this.page ); + } request( url, function ( error, response, body ) { if ( error ) { @@ -41,10 +66,12 @@ deferred.reject( new Error( 'Error while fetching page: ' + body ) ); return; } - deferred.resolve( { - body: response.body, - revision: response.headers[ 'content-revision-id' ] + body: cheapBodyInnerHTML( response.body ), + // Restbase returns revision ID in etag header. + // Example: + // ETag: "123456/c4e494da-ee8f-11e4-83a1-8b80de1cde5f" + revision: response.headers.etag.split( '/' )[ 0 ].replace( '"', '' ) } ); } ); -- To view, visit https://gerrit.wikimedia.org/r/207039 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I85f5bf4005075326791c87cdadbaeac07316e03c Gerrit-PatchSet: 5 Gerrit-Project: mediawiki/services/cxserver Gerrit-Branch: master Gerrit-Owner: Santhosh <santhosh.thottin...@gmail.com> Gerrit-Reviewer: Alexandros Kosiaris <akosia...@wikimedia.org> Gerrit-Reviewer: KartikMistry <kartik.mis...@gmail.com> Gerrit-Reviewer: Nikerabbit <niklas.laxst...@gmail.com> Gerrit-Reviewer: Santhosh <santhosh.thottin...@gmail.com> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits