Nikerabbit has submitted this change and it was merged.

Change subject: Add ApiQueryContentTranslationCorpora
......................................................................


Add ApiQueryContentTranslationCorpora

To be used together with ApiQueryPublishedTranslations with meta
data for testing or incremental collection. Not intented for mass
usage, for which we will provide dumps in different formats.

Change-Id: I951187a95b031575caa759510a81cdce73a47570
---
A api/ApiQueryContentTranslationCorpora.php
M extension.json
M i18n/api/en.json
M i18n/api/qqq.json
A includes/CorporaLookup.php
5 files changed, 182 insertions(+), 2 deletions(-)

Approvals:
  Santhosh: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/api/ApiQueryContentTranslationCorpora.php 
b/api/ApiQueryContentTranslationCorpora.php
new file mode 100644
index 0000000..f05a4dc
--- /dev/null
+++ b/api/ApiQueryContentTranslationCorpora.php
@@ -0,0 +1,86 @@
+<?php
+/**
+ * Api module for querying Content Translation parallel corpora.
+ *
+ * @file
+ * @copyright See AUTHORS.txt
+ * @license GPL-2.0+
+ */
+
+use ContentTranslation\Database;
+use ContentTranslation\CorporaLookup;
+
+/**
+ * Api module for querying Content Translation parallel corpora.
+ *
+ * @ingroup API ContentTranslationAPI
+ */
+class ApiQueryContentTranslationCorpora extends ApiQueryBase {
+       protected $types = array(
+               CorporaLookup::TYPE_SOURCE,
+               CorporaLookup::TYPE_MT,
+               CorporaLookup::TYPE_USER,
+       );
+
+       public function execute() {
+               $params = $this->extractRequestParams();
+               $result = $this->getResult();
+
+               $db = Database::getConnection( DB_SLAVE );
+               $lookup = new CorporaLookup( $db );
+               $data = $lookup->getByTranslationId( $params['translationid'] );
+
+               $types = array_flip( $params['types'] );
+               $data = $this->filterTypes( $data, $types );
+
+               if ( $params['striphtml'] ) {
+                       $data = $this->stripHtml( $data );
+               }
+
+               $result->addValue( array( 'query', $this->getModuleName() ), 
'sections', $data );
+       }
+
+       protected function filterTypes( array $data, array $prop ) {
+               foreach ( $data as $id => $section ) {
+                       foreach ( $this->types as $type ) {
+                               if ( !isset( $prop[$type] ) ) {
+                                       unset( $data[$id][$type] );
+                               }
+                       }
+               }
+
+               return $data;
+       }
+
+       protected function stripHtml( array $data ) {
+               foreach ( $data as $id => $section ) {
+                       foreach ( $this->types as $type ) {
+                               if ( isset( $data[$id][$type] ) ) {
+                                       $data[$id][$type]['content'] = 
Sanitizer::stripAllTags( $data[$id][$type]['content'] );
+                               }
+                       }
+               }
+
+               return $data;
+       }
+
+       public function getAllowedParams() {
+               $params = array(
+                       'translationid' => array(
+                               ApiBase::PARAM_TYPE => 'integer',
+                               ApiBase::PARAM_REQUIRED => true,
+                       ),
+                       'striphtml' => array(
+                               ApiBase::PARAM_TYPE => 'boolean',
+                               ApiBase::PARAM_DFLT => false,
+                       ),
+                       'types' => array(
+                               ApiBase::PARAM_TYPE => array( 'source', 'mt', 
'user' ),
+                               ApiBase::PARAM_DFLT => 'source|mt|user',
+                               ApiBase::PARAM_ISMULTI => true,
+                       ),
+               );
+
+               return $params;
+       }
+}
diff --git a/extension.json b/extension.json
index 9b56517..8c42e66 100644
--- a/extension.json
+++ b/extension.json
@@ -46,9 +46,10 @@
        },
        "APIListModules": {
                "contenttranslation": "ApiQueryContentTranslation",
-               "contenttranslationsuggestions": 
"ApiQueryContentTranslationSuggestions",
-               "contenttranslationstats": "ApiQueryContentTranslationStats",
+               "contenttranslationcorpora": 
"ApiQueryContentTranslationCorpora",
                "contenttranslationlangtrend": 
"ApiQueryContentTranslationLanguageTrend",
+               "contenttranslationstats": "ApiQueryContentTranslationStats",
+               "contenttranslationsuggestions": 
"ApiQueryContentTranslationSuggestions",
                "cxpublishedtranslations": "ApiQueryPublishedTranslations"
        },
        "MessagesDirs": {
@@ -67,11 +68,13 @@
                "ApiContentTranslationSuggestionList": 
"api/ApiContentTranslationSuggestionList.php",
                "ApiContentTranslationToken": 
"api/ApiContentTranslationToken.php",
                "ApiQueryContentTranslation": 
"api/ApiQueryContentTranslation.php",
+               "ApiQueryContentTranslationCorpora": 
"api/ApiQueryContentTranslationCorpora.php",
                "ApiQueryContentTranslationSuggestions": 
"api/ApiQueryContentTranslationSuggestions.php",
                "ApiQueryContentTranslationLanguageTrend": 
"api/ApiQueryContentTranslationLanguageTrend.php",
                "ApiQueryContentTranslationStats": 
"api/ApiQueryContentTranslationStats.php",
                "ApiQueryPublishedTranslations": 
"api/ApiQueryPublishedTranslations.php",
                "ContentTranslationHooks": "ContentTranslation.hooks.php",
+               "ContentTranslation\\CorporaLookup": 
"includes/CorporaLookup.php",
                "ContentTranslation\\Database": "includes/Database.php",
                "ContentTranslation\\Draft": "includes/Draft.php",
                "ContentTranslation\\EchoNotificationPresentationModel": 
"includes/EchoNotificationPresentationModel.php",
diff --git a/i18n/api/en.json b/i18n/api/en.json
index 0ceab83..0723af9 100644
--- a/i18n/api/en.json
+++ b/i18n/api/en.json
@@ -33,6 +33,9 @@
        "apihelp-query+contenttranslation-example-1": "Get translations started 
by the current user.",
        "apihelp-query+contenttranslation-example-2": "Get translations draft 
by ID.",
        "apihelp-query+contenttranslation-example-3": "Find any translation for 
the given title between given language pair",
+       "apihelp-query+contenttranslationcorpora-description": "Get the section 
aligned parallel text for a given a translation. See also 
<code>list=cxpublishedtranslations</code>. Dumps are provided in different 
formats for high volume access.",
+       "apihelp-query+contenttranslationcorpora-param-translationid": "ID of 
the translation.",
+       "apihelp-query+contenttranslationcorpora-param-striphtml": "Whether to 
strip all HTML tags to return plaintext.",
        "apihelp-query+contenttranslationstats-description": "Get Content 
Translation statistics.",
        "apihelp-query+contenttranslationstats-example-1": "Get Content 
Translation statistics for all languages.",
        "apihelp-cxconfiguration-description": "Fetch the Content Translation 
configuration json for the given language pair.",
diff --git a/i18n/api/qqq.json b/i18n/api/qqq.json
index 652cd76..ea605a6 100644
--- a/i18n/api/qqq.json
+++ b/i18n/api/qqq.json
@@ -28,6 +28,9 @@
        "apihelp-query+contenttranslation-example-1": 
"{{doc-apihelp-example|query+contenttranslation}}",
        "apihelp-query+contenttranslation-example-2": 
"{{doc-apihelp-example|query+contenttranslation}}",
        "apihelp-query+contenttranslation-example-3": 
"{{doc-apihelp-example|query+contenttranslation}}",
+       "apihelp-query+contenttranslationcorpora-description": 
"{{doc-apihelp-description|query+contenttranslationcorpora}}",
+       "apihelp-query+contenttranslationcorpora-param-translationid": 
"{{doc-apihelp-param|query+contenttranslationcorpora|translationid}}",
+       "apihelp-query+contenttranslationcorpora-param-striphtml": 
"{{doc-apihelp-param|query+contenttranslationcorpora|striphtml}}",
        "apihelp-query+contenttranslationstats-description": 
"{{doc-apihelp-description|query+contenttranslationstats}}",
        "apihelp-query+contenttranslationstats-example-1": 
"{{doc-apihelp-example|query+contenttranslationstats}}",
        "apihelp-cxconfiguration-description": 
"{{doc-apihelp-description|cxconfiguration}}",
diff --git a/includes/CorporaLookup.php b/includes/CorporaLookup.php
new file mode 100644
index 0000000..6767eaf
--- /dev/null
+++ b/includes/CorporaLookup.php
@@ -0,0 +1,85 @@
+<?php
+/**
+ * Lookup data from corpora table.
+ *
+ * @file
+ * @copyright See AUTHORS.txt
+ * @license GPL-2.0+
+ */
+
+namespace ContentTranslation;
+
+class CorporaLookup {
+       const TYPE_SOURCE = 'source';
+       const TYPE_MT = 'mt';
+       const TYPE_USER = 'user';
+
+       /**
+        * @var \IDatabase
+        */
+       protected $db;
+
+       public function __construct( \IDatabase $db ) {
+               $this->db = $db;
+       }
+
+       /**
+        * @param int $id Translation id
+        * @return array
+        */
+       public function getByTranslationId( $id ) {
+               $fields = array(
+                       'cxc_translation_id',
+                       'cxc_origin',
+                       'cxc_section_id',
+                       'cxc_timestamp',
+                       'cxc_sequence_id',
+                       'cxc_content',
+               );
+
+               $conds = array(
+                       'cxc_translation_id' => intval( $id ),
+               );
+
+               $res = $this->db->select( 'cx_corpora', $fields, $conds, 
__METHOD__ );
+
+               return self::format( $res );
+       }
+
+       protected static function format( \ResultWrapper $rows ) {
+               $sections = array();
+
+               foreach ( $rows as $row ) {
+                       // Here I am assuming sequence ids are unique and wont 
be re-used
+                       $id = $row->cxc_section_id;
+                       $type = self::isMT( $row->cxc_origin ) ? self::TYPE_MT 
: $row->cxc_origin;
+
+                       if ( !isset( $sections[$id] ) ) {
+                               $sections[$id] = array(
+                                       'sequenceid' => 
(int)$row->cxc_sequence_id,
+                                       self::TYPE_SOURCE => null,
+                                       self::TYPE_MT => null,
+                                       self::TYPE_USER => null,
+                               );
+                       }
+
+                       $blob = array(
+                               'engine' => $type === self::TYPE_MT ? 
$row->cxc_origin : null,
+                               'content' => $row->cxc_content,
+                               // TS_ISO_8601 was chosen because it includes 
explicit timezone
+                               'timestamp' => wfTimestamp( TS_ISO_8601, 
$row->cxc_timestamp ),
+                       );
+
+                       // In the future 'user' could be an array, but for now 
to keep it simple and consistent,
+                       // just allow one blob (the latest & final user version)
+                       $sections[$id][$type] = $blob;
+               }
+
+               return $sections;
+       }
+
+       protected static function isMT( $type ) {
+               return $type !== self::TYPE_SOURCE && $type !== self::TYPE_USER;
+       }
+
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/257287
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I951187a95b031575caa759510a81cdce73a47570
Gerrit-PatchSet: 9
Gerrit-Project: mediawiki/extensions/ContentTranslation
Gerrit-Branch: master
Gerrit-Owner: Nikerabbit <[email protected]>
Gerrit-Reviewer: Nikerabbit <[email protected]>
Gerrit-Reviewer: Santhosh <[email protected]>
Gerrit-Reviewer: Siebrand <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to