Santhosh has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/200516

Change subject: Youdao MT client
......................................................................

Youdao MT client

Proof of concept. Translates with a valid key, but cannot do
annotation mapping.

Change-Id: Ie904d29f5cb698a70aeb946e237ad5e345d33485
---
M config.defaults.js
M index.js
M mt/MTClient.js
A mt/Youdao.js
M mt/index.js
A tests/mt/Youdao.test.js
6 files changed, 170 insertions(+), 5 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/cxserver 
refs/changes/16/200516/4

diff --git a/config.defaults.js b/config.defaults.js
index ff5e2be..fb2330b 100644
--- a/config.defaults.js
+++ b/config.defaults.js
@@ -14,6 +14,8 @@
        'mt.apertium.api': 'http://apertium.wmflabs.org',
        'mt.yandex.api': 'https://translate.yandex.net',
        'mt.yandex.key': null,
+       'mt.youdao.api': 'https://fanyi.youdao.com/paidapi/fanyiapi',
+       'mt.youdao.key': null,
        // Use SSL?
        secure: false,
        // SSL key filename
@@ -22,8 +24,8 @@
        cert: null,
        // Service registry
        registry: {
-               source: [ 'af', 'an', 'ar', 'az', 'bg', 'bs', 'ca', 'cr', 'cy', 
'en', 'eo', 'es', 'fr', 'gl', 'gu', 'hi', 'hr', 'id', 'ja', 'kk', 'km', 'kn', 
'ky', 'kz', 'min', 'mk', 'ms', 'mt', 'nl', 'no', 'nn', 'oc', 'pa', 'pl', 'pt', 
'ru', 'sh', 'simple', 'sl', 'tr', 'tt', 'uk', 'ur', 'uz', 'vi', 'xh', 'zh' ],
-               target: [ 'af', 'an', 'ar', 'az', 'bg', 'bs', 'ca', 'cr', 'cy', 
'eo', 'es', 'fr', 'gl', 'gu', 'hi', 'hr', 'id', 'ja', 'kk', 'km', 'kn', 'ky', 
'kz', 'min', 'mk', 'ms', 'mt', 'nl', 'no', 'nn', 'oc', 'pa', 'pl', 'pt', 'ru', 
'sh', 'simple', 'sl', 'tt', 'tr', 'uk', 'ur', 'uz', 'vi', 'xh', 'zh' ],
+               source: [ 'af', 'an', 'ar', 'az', 'bg', 'bs', 'ca', 'cr', 'cy', 
'en', 'eo', 'es', 'fr', 'gl', 'gu', 'hi', 'hr', 'id', 'ja', 'kk', 'km', 'kn', 
'ky', 'kz', 'min', 'mk', 'ms', 'mt', 'nl', 'no', 'nn', 'oc', 'pa', 'pl', 'pt', 
'ru', 'sh', 'simple', 'sl', 'tr', 'tt', 'uk', 'ur', 'uz', 'vi', 'xh', 'zh', 
'zh-Hans', 'zh-Hant' ],
+               target: [ 'af', 'an', 'ar', 'az', 'bg', 'bs', 'ca', 'cr', 'cy', 
'eo', 'es', 'fr', 'gl', 'gu', 'hi', 'hr', 'id', 'ja', 'kk', 'km', 'kn', 'ky', 
'kz', 'min', 'mk', 'ms', 'mt', 'nl', 'no', 'nn', 'oc', 'pa', 'pl', 'pt', 'ru', 
'sh', 'simple', 'sl', 'tt', 'tr', 'uk', 'ur', 'uz', 'vi', 'xh', 'zh', 
'zh-Hans', 'zh-Hant' ],
                mt: {
                        Apertium: {
                                af: [ 'nl' ],
@@ -62,6 +64,14 @@
                        Yandex: {
                                en: [ 'ru' ]
                        },
+                       Youdao: {
+                               en: [ 'zh', 'zh-Hans', 'ja' ],
+                               simple: [ 'zh', 'zh-Hans' ],
+                               ja: [ 'zh-Hans', 'zh' ],
+                               zh: [ 'en', 'ja' ],
+                               'zh-Hans': [ 'en', 'ja' ],
+                               'zh-Hant': [ 'en', 'ja' ]
+                       },
                        defaults: {
                                'en-ru': 'Yandex'
                        }
diff --git a/index.js b/index.js
index 4051b72..e27bbfa 100644
--- a/index.js
+++ b/index.js
@@ -2,6 +2,7 @@
        Segmenter: require( './segmentation/CXSegmenter.js' ).CXSegmenter,
        Apertium: require( './mt/Apertium.js' ),
        Yandex: require( './mt/Yandex.js' ),
+       Youdao: require( './mt/Youdao.js' ),
        MTClient: require( './mt/MTClient.js' ),
        LinearDoc: require( './lineardoc' ),
        Dictionary: require( './dictionary' )
diff --git a/mt/MTClient.js b/mt/MTClient.js
index 3477a93..53c0ef6 100644
--- a/mt/MTClient.js
+++ b/mt/MTClient.js
@@ -177,18 +177,23 @@
  */
 MTClient.prototype.translateLines = function ( sourceLang, targetLang, 
sourceLines ) {
        var sourceLinesText,
+               self = this,
                deferred = Q.defer();
 
        // Join lines into single string. Separator must break sentences and 
pass through unchanged
        // Using Devangari seperator Double Danda twice.
-       sourceLinesText = sourceLines.join( '.॥॥.' );
+       sourceLinesText = sourceLines.join( this.getSentenceDelimiter() );
 
        this.translateText(
                sourceLang,
                targetLang,
                sourceLinesText
        ).then( function ( targetLinesText ) {
-               var targetText = targetLinesText.split( /\.॥॥\./g );
+               var targetText;
+
+               targetText = Array.isArray( targetLinesText ) ?
+                       targetLinesText :
+                       targetLinesText.split( self.getSentenceDelimiter() );
                deferred.resolve( targetText );
        }, function ( error ) {
                logger.error( error.toString() );
@@ -197,6 +202,10 @@
        return deferred.promise;
 };
 
+MTClient.prototype.getSentenceDelimiter = function () {
+       return '.॥॥.';
+};
+
 /**
  * Create variants of the text, with a different annotation uppercased in each.
  * @param {string} lang Language code
diff --git a/mt/Youdao.js b/mt/Youdao.js
new file mode 100644
index 0000000..8b982dd
--- /dev/null
+++ b/mt/Youdao.js
@@ -0,0 +1,87 @@
+var languageCodes, Q = require( 'q' ),
+       util = require( 'util' ),
+       request = require( 'request' ),
+       conf = require( __dirname + '/../utils/Conf.js' ),
+       MTClient = require( './MTClient.js' );
+
+languageCodes = {
+       'en>zh': 'EN2ZH_CN', // English to Chinese Simplified
+       'en>ja': 'EN2JA', // English to Chinese Simplified
+       'simple>zh': 'EN2ZH_CN', // Simple English to Chinese Simplified
+       'en>zh-Hans': 'EN2ZH_CN', // English to Chinese Simplified
+       'ja>zh-Hans': 'JA2ZH_CN', // Japanese to Chinese Simplified,
+       'ja>zh': 'JA2ZH_CN', // Japanese to Chinese Simplified,
+       'ko>zh-Hans': 'KR2ZH_CN', // Korean to Chinese Simplified
+       'fr>zh-Hans': 'FR2ZH_CN', // Korean to Chinese Simplified
+       'ru>zh-Hans': 'RU2ZH_CN', // Russian to Chinese Simplified
+       'es>zh-Hans': 'SP2ZH_CN', // Spanish to Chinese Simplified
+       'zh-Hans>en': 'ZH_CN2EN', // Chinese Simplified to English
+       'zh-Hant>en': 'ZH_CN2EN', // Chinese Traditional to English
+       'zh>ja': 'ZH_CN2JA' // Chinese Traditional to English
+};
+// TODO: There are some more languages. See Youdao API documentation
+
+function Youdao() {
+
+}
+
+util.inherits( Youdao, MTClient );
+
+/**
+ * Youdao expect multiple sentences delimited by new line character
+ */
+Youdao.prototype.getSentenceDelimiter = function () {
+       return '\n';
+};
+
+/**
+ * Translate plain text with Youdao API
+ * Youdao is not capable of HTML translation with all annotation
+ * mapping. For translating HTML, It use CX's annotation mapping on top
+ * of the plaintext translation. Hence it inherits translateHTML method
+ * of MTClient.
+ * @param {string} sourceLang Source language code
+ * @param {string} targetLang Target language code
+ * @param {string} sourceText Source language text
+ * @return {Object} Deferred promise: Target language text
+ */
+Youdao.prototype.translateText = function ( sourceLang, targetLang, sourceText 
) {
+       var deferred = Q.defer(),
+               postData;
+
+       postData = {
+               url: conf( 'mt.youdao.api' ),
+               form: {
+                       key: conf( 'mt.youdao.key' ),
+                       doctype: 'json',
+                       type: 'data',
+                       q: sourceText,
+                       l: languageCodes[ sourceLang + '>' + targetLang ],
+                       transtype: 'translate'
+               }
+       };
+
+       // Youdao paid api accept POST requests, open API accepts GET requests.
+       // url = postData.url + '?' + querystring.stringify( postData.form );
+       request.post( postData,
+               function ( error, response, body ) {
+                       var message;
+
+                       if ( error ) {
+                               deferred.reject( new Error( error ) );
+                               return;
+                       }
+                       if ( response.statusCode !== 200 ) {
+                               message = 'Error ' + response.statusCode;
+                               message += ' sourceText={' + sourceText + '}, 
body={' + body + '}';
+                               deferred.reject( new Error( message ) );
+                               return;
+                       }
+
+                       deferred.resolve( JSON.parse( body ).translation[ 0 ] );
+               }
+       );
+       return deferred.promise;
+};
+
+module.exports = Youdao;
diff --git a/mt/index.js b/mt/index.js
index acf5730..b6b20d6 100644
--- a/mt/index.js
+++ b/mt/index.js
@@ -1,4 +1,5 @@
 module.exports = {
        Apertium: require( './Apertium.js' ),
-       Yandex: require( './Yandex.js' )
+       Yandex: require( './Yandex.js' ),
+       Youdao: require( './Youdao.js' )
 };
diff --git a/tests/mt/Youdao.test.js b/tests/mt/Youdao.test.js
new file mode 100644
index 0000000..65bf988
--- /dev/null
+++ b/tests/mt/Youdao.test.js
@@ -0,0 +1,57 @@
+QUnit.module( 'Youdao' );
+
+var tests;
+
+tests = [
+       {
+               sourceLang: 'en',
+               targetLang: 'zh-Hans',
+               title: 'English to Chinese',
+               source: '<p>A <b>Japanese</b> <i>BBC</i> article</p>',
+               target: '<p>日本广播公司的一篇文章</p>',
+       },
+       {
+               sourceLang: 'zh-Hant',
+               targetLang: 'en',
+               title: 'Chinese Traditional to English',
+               source: '<p>我們寫字</p>',
+               target: '<p>We write</p>'
+    },
+       {
+               sourceLang: 'zh-Hans',
+               targetLang: 'en',
+               title: 'Chinese Simplified to English',
+               source: '<p>我们写字</p>',
+               target: '<p>We write</p>'
+    }
+];
+
+QUnit.test( 'Youdao wrapper tests', function ( assert ) {
+       QUnit.expect( tests.length );
+
+       function resumeTests( i ) {
+               var test,
+                       youdao = new CX.Youdao();
+
+               if ( i >= tests.length ) {
+                       return;
+               }
+               test = tests[ i ];
+
+               QUnit.stop();
+               youdao.translate( test.sourceLang, test.targetLang, test.source 
).then( function ( target ) {
+                       assert.strictEqual(
+                               target,
+                               test.target,
+                               test.title
+                       );
+                       QUnit.start();
+                       resumeTests( i + 1 );
+               }, function ( error ) {
+                       assert.ok( false, test.title + ': ' + error );
+                       QUnit.start();
+                       resumeTests( i + 1 );
+               } );
+       }
+       resumeTests( 0 );
+} );

-- 
To view, visit https://gerrit.wikimedia.org/r/200516
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ie904d29f5cb698a70aeb946e237ad5e345d33485
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/services/cxserver
Gerrit-Branch: master
Gerrit-Owner: Santhosh <santhosh.thottin...@gmail.com>
Gerrit-Reviewer: Amire80 <amir.ahar...@mail.huji.ac.il>
Gerrit-Reviewer: Arrbee <run...@gmail.com>
Gerrit-Reviewer: Divec <da...@troi.org>
Gerrit-Reviewer: KartikMistry <kartik.mis...@gmail.com>
Gerrit-Reviewer: Nikerabbit <niklas.laxst...@gmail.com>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to