Santhosh has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/363804 )
Change subject: WIP: Adaptation in cxserver ...................................................................... WIP: Adaptation in cxserver Change-Id: I8f748fb290767cf3ca913448d66695a0dec00736 --- A adapt/Adapter.js A adapt/TranslationUnits/Image.js A adapt/TranslationUnits/Link.js A adapt/TranslationUnits/Reference.js A adapt/TranslationUnits/index.js M lineardoc/Doc.js M lineardoc/TextBlock.js 7 files changed, 231 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/cxserver refs/changes/04/363804/1 diff --git a/adapt/Adapter.js b/adapt/Adapter.js new file mode 100644 index 0000000..d71b5dc --- /dev/null +++ b/adapt/Adapter.js @@ -0,0 +1,53 @@ +'use strict'; + +var LinearDoc = require( '../lineardoc' ); + +function Adapter( content, sourceLanguage, targetLanguage ) { + this.parser = new LinearDoc.Parser(); + this.parser.init(); + this.sourceLanguage = sourceLanguage; + this.targetLanguage = targetLanguage; + this.content = content; + this.originalDoc = null; + this.adaptedDoc = null; +} + +Adapter.prototype.adapt = function () { + this.parser.write( this.content ); + this.originalDoc = this.parser.builder.doc; + this.adaptedDoc = this.originalDoc.adapt( this.getAdapter.bind( this ) ); + return this.adaptedDoc; +}; + +/** + * Get the adapter for the given tag(translation unit). + */ +Adapter.prototype.getAdapter = function ( element ) { + var name, match = false, translationUnit, translationUnits, context; + translationUnits = require( __dirname + '/TranslationUnits/' ); + for ( name in translationUnits ) { + translationUnit = translationUnits[ name ]; + if ( translationUnit.matchTagNames ) { + match = translationUnit.matchTagNames.includes( element.name ); + } + if ( translationUnit.matchRdfaTypes ) { + match = translationUnit.matchRdfaTypes.includes( element.attributes.rel ) || translationUnit.matchRdfaTypes.includes( element.attributes.typeof ); + } + if ( match ) { + break; + } + } + if ( !match ) { + console.log( 'No adapter for' ); + console.dir( element ); + return null; + } + context = { + sourceLanguage: this.sourceLanguage, + targetLanguage: this.targetLanguage + }; + console.log( 'Adapting: ' + translationUnit.name ); + return new translationUnits[ name ]( element, context ); +}; + +module.exports.Adapter = Adapter; diff --git a/adapt/TranslationUnits/Image.js b/adapt/TranslationUnits/Image.js new file mode 100644 index 0000000..1715ee3 --- /dev/null +++ b/adapt/TranslationUnits/Image.js @@ -0,0 +1,78 @@ +function MWImage( node, context ) { + this.node = node; + this.context = context; +} + +MWImage.name = 'image'; +MWImage.matchTagNames = [ 'figure' ]; +MWImage.matchRdfaTypes = [ 'mw:Image/Thumb' ]; + +MWImage.prototype.adapt = function() { + var imageSource; + + this.node.attributes[ 'adapted' ] = 'true'; + console.dir( this.node ); + /* + this.sourceImage = this.sourceDocument.getElementsByTagName( 'img' )[ 0 ]; + this.sourceResource = this.sourceImage.attributes[ 'resource' ); + imageSource = this.sourceImage.getAttribute( 'src' ); + if ( !this.isCommonsImage( imageSource ) ) { + // Create an empty paragraph + this.targetDocument = document.createElement( 'p' ); + this.setTargetId( this.targetDocument ); + mw.log( '[CX] Could not adapt non Commons image ' + imageSource ); + this.emit( 'adapt', this.targetDocument, this.MTProvider ); + return; + } + + mw.log( '[CX] Adapting image ' + imageSource );*/ + return this.node; +}; + +/** + * Adapt the image namespace to target language. + * @return {[type]} [description] + */ +MWImage.prototype.adaptNamespace = function () { + return this.requestManager.getNamespaceAlias( + this.targetLanguage, + 'File' + ).then( function ( namespaceAlias ) { + this.targetResource = this.sourceResource.replace( + /(\.\/)*(.+)(:)/g, + '$1' + namespaceAlias + '$3' + ); + this.targetImage.setAttribute( 'resource', this.targetResource ); + }.bind( this ) ); +}; + +/** + * Check if an image is coming from Commons or not. Uses the URL pattern of the common file + * repository to determine whether the image is stored there. + * @static + * @param {string} imageSrc + * @return {boolean} + */ +MWImage.isCommonsImage = function ( imageSrc ) { + return imageSrc.indexOf( '//upload.wikimedia.org/wikipedia/commons/' ) === 0; +}; + +/** + * Adapt the image's alignment settings for the target language. + * + * @param {Element} targetFigure + */ +MWImage.prototype.adaptImageAlignment = function ( targetFigure ) { + if ( $.uls.data.getDir( this.sourceLanguage ) === $.uls.data.getDir( this.targetLanguage ) ) { + // If the target language's direction is the same, there's nothing to do + return; + } + + // If the image has an explicit alignment class in HTML, + // this means that it has explicit alignment defined in wiki syntax. + // It must be explicitly flipped if the target language's direction is different. + targetFigure.classList.toggle( 'mw-halign-left' ); + targetFigure.classList.toggle( 'mw-halign-right' ); +}; + +module.exports = MWImage; diff --git a/adapt/TranslationUnits/Link.js b/adapt/TranslationUnits/Link.js new file mode 100644 index 0000000..d73092a --- /dev/null +++ b/adapt/TranslationUnits/Link.js @@ -0,0 +1,15 @@ +function MWLink( node, context ) { + this.node = node; + this.context = context; +} + +MWLink.name = 'link'; +MWLink.matchTagNames = [ 'a' ]; +MWLink.matchRdfaTypes = [ 'mw:WikiLink' ]; + +MWLink.prototype.adapt = function() { + this.node.attributes[ 'adapted' ] = 'true'; + return this.node; +}; + +module.exports = MWLink; diff --git a/adapt/TranslationUnits/Reference.js b/adapt/TranslationUnits/Reference.js new file mode 100644 index 0000000..33e5c65 --- /dev/null +++ b/adapt/TranslationUnits/Reference.js @@ -0,0 +1,15 @@ +function MWReference( node, context ) { + this.node = node; + this.context = context; +} + +MWReference.name = 'reference'; +MWReference.matchTagNames = [ 'span' ]; +MWReference.matchRdfaTypes = [ 'dc:references', 'mw:Extension/ref' ]; + +MWReference.prototype.adapt = function() { + this.node.attributes[ 'adapted' ] = 'true'; + return this.node; +}; + +module.exports = MWReference; diff --git a/adapt/TranslationUnits/index.js b/adapt/TranslationUnits/index.js new file mode 100644 index 0000000..6a64596 --- /dev/null +++ b/adapt/TranslationUnits/index.js @@ -0,0 +1,5 @@ +module.exports = { + Link: require( './Link.js' ), + Image: require( './Image.js' ), + Reference: require( './Reference.js' ) +}; diff --git a/lineardoc/Doc.js b/lineardoc/Doc.js index a5d1661..cc51ff9 100644 --- a/lineardoc/Doc.js +++ b/lineardoc/Doc.js @@ -131,6 +131,50 @@ }; /** + * Segment the document into sentences + * + * @method + * @param {Function} getAdapter Function taking a tag, returning adapted output + * @return {Doc} Segmented version of document TODO: warning: *shallow copied*. + */ +Doc.prototype.adapt = function ( getAdapter ) { + var i, len, item, tag, textBlock, adapter, + newDoc = new Doc(); + + if ( this.wrapperTag ) { + adapter = getAdapter( this.wrapperTag ); + if ( adapter ) { + newDoc = new Doc( adapter.adapt() ); + } + } + for ( i = 0, len = this.items.length; i < len; i++ ) { + item = this.items[ i ]; + if ( this.items[ i ].type === 'open' ) { + tag = Utils.cloneOpenTag( item.item ); + if ( i + 1 < len && this.items[ i + 1 ].type === 'textblock' ) { + tag.children = this.items[ i + 1 ].item; + } + // TDOD: Make async + adapter = getAdapter( tag ); + if ( adapter ) { + newDoc.addItem( item.type, adapter.adapt() ); + } else { + newDoc.addItem( item.type, tag ); + } + } else if ( this.items[ i ].type !== 'textblock' ) { + newDoc.addItem( item.type, item.item ); + } else { + textBlock = item.item; + newDoc.addItem( + 'textblock', + textBlock.adapt( getAdapter ) + ); + } + } + return newDoc; +}; + +/** * Dump an XML version of the linear representation, for debugging * * @method diff --git a/lineardoc/TextBlock.js b/lineardoc/TextBlock.js index 0845095..1b77970 100644 --- a/lineardoc/TextBlock.js +++ b/lineardoc/TextBlock.js @@ -412,4 +412,25 @@ return dump; }; +TextBlock.prototype.adapt = function ( getAdapter ) { + var i, j, chunk, tags, len, adapter; + for ( i = 0, len = this.textChunks.length; i < len; i++ ) { + chunk = this.textChunks[ i ]; + tags = chunk.tags; + for ( j = 0; j < tags.length; j++ ) { + adapter = getAdapter( tags[ j ] ); + if ( adapter ) { + tags[ j ] = adapter.adapt(); + } + } + if ( chunk.inlineContent ) { + if ( chunk.inlineContent.adapt ) { + // sub-doc: concatenate + chunk.inlineContent = chunk.inlineContent.adapt( getAdapter ); + } + } + } + return this; +}; + module.exports = TextBlock; -- To view, visit https://gerrit.wikimedia.org/r/363804 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I8f748fb290767cf3ca913448d66695a0dec00736 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/cxserver Gerrit-Branch: master Gerrit-Owner: Santhosh <santhosh.thottin...@gmail.com> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits