Jsahleen has uploaded a new change for review.
https://gerrit.wikimedia.org/r/160059
Change subject: Publishing: Get revision id from Parsoid content-revision-id
header
......................................................................
Publishing: Get revision id from Parsoid content-revision-id header
* Modifies PageLoader::getRevision to get revision id from Parsoid
content-revision-id header if present and falls back to getting
revision id from about attribute on response.body html opening tag.
* Modifies PageLoader::load so it returns the whole response not just body
* Modifies callback function in page load route in ContentTranslationService.js
so it handles the entire response and only passes body to segmentation.
Bug: 70755
Change-Id: I141fb5ddf527320925e985af53cca29acd22a40b
---
M ContentTranslationService.js
M pageloader/PageLoader.js
2 files changed, 18 insertions(+), 11 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/cxserver
refs/changes/59/160059/1
diff --git a/ContentTranslationService.js b/ContentTranslationService.js
index 15af905..abc454e 100644
--- a/ContentTranslationService.js
+++ b/ContentTranslationService.js
@@ -55,12 +55,12 @@
pageloader = new PageLoader( title, sourceLanguage );
pageloader.load().then(
- function ( data ) {
+ function ( response ) {
var segmenter, segmentedContent, revision;
try {
logger.debug( 'Page fetched' );
- revision = pageloader.getRevision( data );
- segmenter = new CXSegmenter( data,
sourceLanguage );
+ revision = pageloader.getRevision( response );
+ segmenter = new CXSegmenter( response.body,
sourceLanguage );
segmenter.segment();
segmentedContent =
segmenter.getSegmentedContent();
} catch ( error ) {
diff --git a/pageloader/PageLoader.js b/pageloader/PageLoader.js
index 849a47f..30c38d7 100644
--- a/pageloader/PageLoader.js
+++ b/pageloader/PageLoader.js
@@ -39,7 +39,7 @@
return;
}
- deferred.resolve( body );
+ deferred.resolve( response );
}
);
@@ -47,16 +47,23 @@
};
/**
- * Gets article revision number from Parsoid html
- * @param {string} html The html returned from Parsoid
+ * Gets article revision id from Parsoid content-revision-id header.
+ * If header does not exist, extracts revision id from html about attr.
+ * @param {json} response The response returned from Parsoid
* @return {integer} the revison id
*/
-PageLoader.prototype.getRevision = function ( data ) {
- var snippet, revision;
+PageLoader.prototype.getRevision = function ( response ) {
+ var snippet, revision, body;
- // Cut down data string to make regex more efficient
- snippet = data.substr( data.indexOf( '<html' ), data.indexOf( '<head' )
);
- revision = snippet.match( /about=".*\/revision\/(.*?)">/ )[ 1 ];
+ if( response.headers[ 'content-revision-id' ] ) {
+ revision = response.headers[ 'content-revision-id' ];
+ } else {
+ body = response.body;
+ // Cut body string down to just the html opening tag
+ // to make the regex more efficient
+ snippet = body.substr( body.indexOf( '<html' ), body.indexOf(
'<head' ) );
+ revision = snippet.match( /about=".*\/revision\/(.*?)">/ )[ 1 ];
+ }
return parseInt( revision );
};
--
To view, visit https://gerrit.wikimedia.org/r/160059
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I141fb5ddf527320925e985af53cca29acd22a40b
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/cxserver
Gerrit-Branch: master
Gerrit-Owner: Jsahleen <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits