Nikerabbit has submitted this change and it was merged.
Change subject: Add ApiQueryContentTranslationCorpora
......................................................................
Add ApiQueryContentTranslationCorpora
To be used together with ApiQueryPublishedTranslations with meta
data for testing or incremental collection. Not intented for mass
usage, for which we will provide dumps in different formats.
Change-Id: I951187a95b031575caa759510a81cdce73a47570
---
A api/ApiQueryContentTranslationCorpora.php
M extension.json
M i18n/api/en.json
M i18n/api/qqq.json
A includes/CorporaLookup.php
5 files changed, 182 insertions(+), 2 deletions(-)
Approvals:
Santhosh: Looks good to me, approved
jenkins-bot: Verified
diff --git a/api/ApiQueryContentTranslationCorpora.php
b/api/ApiQueryContentTranslationCorpora.php
new file mode 100644
index 0000000..f05a4dc
--- /dev/null
+++ b/api/ApiQueryContentTranslationCorpora.php
@@ -0,0 +1,86 @@
+<?php
+/**
+ * Api module for querying Content Translation parallel corpora.
+ *
+ * @file
+ * @copyright See AUTHORS.txt
+ * @license GPL-2.0+
+ */
+
+use ContentTranslation\Database;
+use ContentTranslation\CorporaLookup;
+
+/**
+ * Api module for querying Content Translation parallel corpora.
+ *
+ * @ingroup API ContentTranslationAPI
+ */
+class ApiQueryContentTranslationCorpora extends ApiQueryBase {
+ protected $types = array(
+ CorporaLookup::TYPE_SOURCE,
+ CorporaLookup::TYPE_MT,
+ CorporaLookup::TYPE_USER,
+ );
+
+ public function execute() {
+ $params = $this->extractRequestParams();
+ $result = $this->getResult();
+
+ $db = Database::getConnection( DB_SLAVE );
+ $lookup = new CorporaLookup( $db );
+ $data = $lookup->getByTranslationId( $params['translationid'] );
+
+ $types = array_flip( $params['types'] );
+ $data = $this->filterTypes( $data, $types );
+
+ if ( $params['striphtml'] ) {
+ $data = $this->stripHtml( $data );
+ }
+
+ $result->addValue( array( 'query', $this->getModuleName() ),
'sections', $data );
+ }
+
+ protected function filterTypes( array $data, array $prop ) {
+ foreach ( $data as $id => $section ) {
+ foreach ( $this->types as $type ) {
+ if ( !isset( $prop[$type] ) ) {
+ unset( $data[$id][$type] );
+ }
+ }
+ }
+
+ return $data;
+ }
+
+ protected function stripHtml( array $data ) {
+ foreach ( $data as $id => $section ) {
+ foreach ( $this->types as $type ) {
+ if ( isset( $data[$id][$type] ) ) {
+ $data[$id][$type]['content'] =
Sanitizer::stripAllTags( $data[$id][$type]['content'] );
+ }
+ }
+ }
+
+ return $data;
+ }
+
+ public function getAllowedParams() {
+ $params = array(
+ 'translationid' => array(
+ ApiBase::PARAM_TYPE => 'integer',
+ ApiBase::PARAM_REQUIRED => true,
+ ),
+ 'striphtml' => array(
+ ApiBase::PARAM_TYPE => 'boolean',
+ ApiBase::PARAM_DFLT => false,
+ ),
+ 'types' => array(
+ ApiBase::PARAM_TYPE => array( 'source', 'mt',
'user' ),
+ ApiBase::PARAM_DFLT => 'source|mt|user',
+ ApiBase::PARAM_ISMULTI => true,
+ ),
+ );
+
+ return $params;
+ }
+}
diff --git a/extension.json b/extension.json
index 9b56517..8c42e66 100644
--- a/extension.json
+++ b/extension.json
@@ -46,9 +46,10 @@
},
"APIListModules": {
"contenttranslation": "ApiQueryContentTranslation",
- "contenttranslationsuggestions":
"ApiQueryContentTranslationSuggestions",
- "contenttranslationstats": "ApiQueryContentTranslationStats",
+ "contenttranslationcorpora":
"ApiQueryContentTranslationCorpora",
"contenttranslationlangtrend":
"ApiQueryContentTranslationLanguageTrend",
+ "contenttranslationstats": "ApiQueryContentTranslationStats",
+ "contenttranslationsuggestions":
"ApiQueryContentTranslationSuggestions",
"cxpublishedtranslations": "ApiQueryPublishedTranslations"
},
"MessagesDirs": {
@@ -67,11 +68,13 @@
"ApiContentTranslationSuggestionList":
"api/ApiContentTranslationSuggestionList.php",
"ApiContentTranslationToken":
"api/ApiContentTranslationToken.php",
"ApiQueryContentTranslation":
"api/ApiQueryContentTranslation.php",
+ "ApiQueryContentTranslationCorpora":
"api/ApiQueryContentTranslationCorpora.php",
"ApiQueryContentTranslationSuggestions":
"api/ApiQueryContentTranslationSuggestions.php",
"ApiQueryContentTranslationLanguageTrend":
"api/ApiQueryContentTranslationLanguageTrend.php",
"ApiQueryContentTranslationStats":
"api/ApiQueryContentTranslationStats.php",
"ApiQueryPublishedTranslations":
"api/ApiQueryPublishedTranslations.php",
"ContentTranslationHooks": "ContentTranslation.hooks.php",
+ "ContentTranslation\\CorporaLookup":
"includes/CorporaLookup.php",
"ContentTranslation\\Database": "includes/Database.php",
"ContentTranslation\\Draft": "includes/Draft.php",
"ContentTranslation\\EchoNotificationPresentationModel":
"includes/EchoNotificationPresentationModel.php",
diff --git a/i18n/api/en.json b/i18n/api/en.json
index 0ceab83..0723af9 100644
--- a/i18n/api/en.json
+++ b/i18n/api/en.json
@@ -33,6 +33,9 @@
"apihelp-query+contenttranslation-example-1": "Get translations started
by the current user.",
"apihelp-query+contenttranslation-example-2": "Get translations draft
by ID.",
"apihelp-query+contenttranslation-example-3": "Find any translation for
the given title between given language pair",
+ "apihelp-query+contenttranslationcorpora-description": "Get the section
aligned parallel text for a given a translation. See also
<code>list=cxpublishedtranslations</code>. Dumps are provided in different
formats for high volume access.",
+ "apihelp-query+contenttranslationcorpora-param-translationid": "ID of
the translation.",
+ "apihelp-query+contenttranslationcorpora-param-striphtml": "Whether to
strip all HTML tags to return plaintext.",
"apihelp-query+contenttranslationstats-description": "Get Content
Translation statistics.",
"apihelp-query+contenttranslationstats-example-1": "Get Content
Translation statistics for all languages.",
"apihelp-cxconfiguration-description": "Fetch the Content Translation
configuration json for the given language pair.",
diff --git a/i18n/api/qqq.json b/i18n/api/qqq.json
index 652cd76..ea605a6 100644
--- a/i18n/api/qqq.json
+++ b/i18n/api/qqq.json
@@ -28,6 +28,9 @@
"apihelp-query+contenttranslation-example-1":
"{{doc-apihelp-example|query+contenttranslation}}",
"apihelp-query+contenttranslation-example-2":
"{{doc-apihelp-example|query+contenttranslation}}",
"apihelp-query+contenttranslation-example-3":
"{{doc-apihelp-example|query+contenttranslation}}",
+ "apihelp-query+contenttranslationcorpora-description":
"{{doc-apihelp-description|query+contenttranslationcorpora}}",
+ "apihelp-query+contenttranslationcorpora-param-translationid":
"{{doc-apihelp-param|query+contenttranslationcorpora|translationid}}",
+ "apihelp-query+contenttranslationcorpora-param-striphtml":
"{{doc-apihelp-param|query+contenttranslationcorpora|striphtml}}",
"apihelp-query+contenttranslationstats-description":
"{{doc-apihelp-description|query+contenttranslationstats}}",
"apihelp-query+contenttranslationstats-example-1":
"{{doc-apihelp-example|query+contenttranslationstats}}",
"apihelp-cxconfiguration-description":
"{{doc-apihelp-description|cxconfiguration}}",
diff --git a/includes/CorporaLookup.php b/includes/CorporaLookup.php
new file mode 100644
index 0000000..6767eaf
--- /dev/null
+++ b/includes/CorporaLookup.php
@@ -0,0 +1,85 @@
+<?php
+/**
+ * Lookup data from corpora table.
+ *
+ * @file
+ * @copyright See AUTHORS.txt
+ * @license GPL-2.0+
+ */
+
+namespace ContentTranslation;
+
+class CorporaLookup {
+ const TYPE_SOURCE = 'source';
+ const TYPE_MT = 'mt';
+ const TYPE_USER = 'user';
+
+ /**
+ * @var \IDatabase
+ */
+ protected $db;
+
+ public function __construct( \IDatabase $db ) {
+ $this->db = $db;
+ }
+
+ /**
+ * @param int $id Translation id
+ * @return array
+ */
+ public function getByTranslationId( $id ) {
+ $fields = array(
+ 'cxc_translation_id',
+ 'cxc_origin',
+ 'cxc_section_id',
+ 'cxc_timestamp',
+ 'cxc_sequence_id',
+ 'cxc_content',
+ );
+
+ $conds = array(
+ 'cxc_translation_id' => intval( $id ),
+ );
+
+ $res = $this->db->select( 'cx_corpora', $fields, $conds,
__METHOD__ );
+
+ return self::format( $res );
+ }
+
+ protected static function format( \ResultWrapper $rows ) {
+ $sections = array();
+
+ foreach ( $rows as $row ) {
+ // Here I am assuming sequence ids are unique and wont
be re-used
+ $id = $row->cxc_section_id;
+ $type = self::isMT( $row->cxc_origin ) ? self::TYPE_MT
: $row->cxc_origin;
+
+ if ( !isset( $sections[$id] ) ) {
+ $sections[$id] = array(
+ 'sequenceid' =>
(int)$row->cxc_sequence_id,
+ self::TYPE_SOURCE => null,
+ self::TYPE_MT => null,
+ self::TYPE_USER => null,
+ );
+ }
+
+ $blob = array(
+ 'engine' => $type === self::TYPE_MT ?
$row->cxc_origin : null,
+ 'content' => $row->cxc_content,
+ // TS_ISO_8601 was chosen because it includes
explicit timezone
+ 'timestamp' => wfTimestamp( TS_ISO_8601,
$row->cxc_timestamp ),
+ );
+
+ // In the future 'user' could be an array, but for now
to keep it simple and consistent,
+ // just allow one blob (the latest & final user version)
+ $sections[$id][$type] = $blob;
+ }
+
+ return $sections;
+ }
+
+ protected static function isMT( $type ) {
+ return $type !== self::TYPE_SOURCE && $type !== self::TYPE_USER;
+ }
+
+}
--
To view, visit https://gerrit.wikimedia.org/r/257287
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I951187a95b031575caa759510a81cdce73a47570
Gerrit-PatchSet: 9
Gerrit-Project: mediawiki/extensions/ContentTranslation
Gerrit-Branch: master
Gerrit-Owner: Nikerabbit <[email protected]>
Gerrit-Reviewer: Nikerabbit <[email protected]>
Gerrit-Reviewer: Santhosh <[email protected]>
Gerrit-Reviewer: Siebrand <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits