jenkins-bot has submitted this change and it was merged.

Change subject: Use Wikimedia REST API for accessing page data in Content 
Translation
......................................................................


Use Wikimedia REST API for accessing page data in Content Translation

Bug: T92359
Change-Id: I85f5bf4005075326791c87cdadbaeac07316e03c
---
M config.defaults.js
M pageloader/PageLoader.js
2 files changed, 34 insertions(+), 6 deletions(-)

Approvals:
  KartikMistry: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/config.defaults.js b/config.defaults.js
index bb1b583..c7275b1 100644
--- a/config.defaults.js
+++ b/config.defaults.js
@@ -10,6 +10,7 @@
        allowCORS: '*',
        // Parsoid API URL
        'parsoid.api': 'http://parsoid-lb.eqiad.wikimedia.org',
+       'restbase.url': 
'https://$lang.wikipedia.org/api/rest_v1/page/html/$title',
        // Apertium web API URL
        'mt.apertium.api': 'http://apertium.wmflabs.org',
        'mt.yandex.api': 'https://translate.yandex.net',
diff --git a/pageloader/PageLoader.js b/pageloader/PageLoader.js
index 49a9ac2..2f985ed 100644
--- a/pageloader/PageLoader.js
+++ b/pageloader/PageLoader.js
@@ -13,6 +13,26 @@
        conf = require( __dirname + '/../utils/Conf.js' );
 
 /**
+ * Cheap body extraction.
+ *
+ * This is safe as we know that the HTML we are receiving from Parsoid is
+ * serialized as XML.
+ * Restbase does not support body only retrieval of content.
+ * See https://phabricator.wikimedia.org/T95199
+ * @param {string} html
+ * @return {string} body of the html passed, wrapped in <body> tag.
+ */
+function cheapBodyInnerHTML( html ) {
+       var match = /<body[^>]*>([\s\S]*)<\/body>/.exec( html );
+
+       if ( !match ) {
+               throw new Error( 'No HTML body found!' );
+       } else {
+               return '<body>' + match[ 1 ] + '</body>';
+       }
+}
+
+/**
  * @class ParsoidPageLoader
  *
  * @param {string} page
@@ -28,9 +48,14 @@
        var url,
                deferred = Q.defer();
 
-       url = conf( 'parsoid.api' ) + '/' + this.sourceLanguage + 'wiki/' +
-               encodeURIComponent( this.page ) + '?body=1';
-
+       if ( conf( 'restbase.url' ) ) {
+               url = conf( 'restbase.url' )
+                       .replace( '$lang', this.sourceLanguage )
+                       .replace( '$title', encodeURIComponent( this.page ) );
+       } else {
+               url = conf( 'parsoid.api' ) + '/' + this.sourceLanguage + 
'wiki/' +
+                       encodeURIComponent( this.page );
+       }
        request( url,
                function ( error, response, body ) {
                        if ( error ) {
@@ -41,10 +66,12 @@
                                deferred.reject( new Error( 'Error while 
fetching page: ' + body ) );
                                return;
                        }
-
                        deferred.resolve( {
-                               body: response.body,
-                               revision: response.headers[ 
'content-revision-id' ]
+                               body: cheapBodyInnerHTML( response.body ),
+                               // Restbase returns revision ID in etag  header.
+                               // Example:
+                               //     ETag: 
"123456/c4e494da-ee8f-11e4-83a1-8b80de1cde5f"
+                               revision: response.headers.etag.split( '/' )[ 0 
].replace( '"', '' )
                        } );
                }
        );

-- 
To view, visit https://gerrit.wikimedia.org/r/207039
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I85f5bf4005075326791c87cdadbaeac07316e03c
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/services/cxserver
Gerrit-Branch: master
Gerrit-Owner: Santhosh <santhosh.thottin...@gmail.com>
Gerrit-Reviewer: Alexandros Kosiaris <akosia...@wikimedia.org>
Gerrit-Reviewer: KartikMistry <kartik.mis...@gmail.com>
Gerrit-Reviewer: Nikerabbit <niklas.laxst...@gmail.com>
Gerrit-Reviewer: Santhosh <santhosh.thottin...@gmail.com>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to