Petrb has uploaded a new change for review. https://gerrit.wikimedia.org/r/186338
Change subject: Implemented a new XML format ...................................................................... Implemented a new XML format which is actually just a pure XML - a copy of androidXML with no special android specific customizations Change-Id: I8345e61e79530cc570ec711c9ac6c14cb8bc51a0 --- M Autoload.php A ffs/SimpleXml.php 2 files changed, 209 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Translate refs/changes/38/186338/1 diff --git a/Autoload.php b/Autoload.php index d58f480..c1ca56a 100644 --- a/Autoload.php +++ b/Autoload.php @@ -184,6 +184,7 @@ $al['RubyYamlFFS'] = "$dir/ffs/RubyYamlFFS.php"; $al['ShapadoJsFFS'] = "$dir/ffs/JavaScriptFFS.php"; $al['SimpleFFS'] = "$dir/ffs/SimpleFFS.php"; +$al['SimpleXmlFFS'] = "$dir/ffs/SimpleXmlFFS.php"; $al['XliffFFS'] = "$dir/ffs/XliffFFS.php"; $al['YamlFFS'] = "$dir/ffs/YamlFFS.php"; /**@}*/ diff --git a/ffs/SimpleXml.php b/ffs/SimpleXml.php new file mode 100644 index 0000000..fd4e0a6 --- /dev/null +++ b/ffs/SimpleXml.php @@ -0,0 +1,208 @@ +<?php +/** + * Support for a simple XML format that is basically just a standard XML file + * which contains localization in format of: + * <?xml version="1.0" encoding="UTF-8"?> + * <resource> + * <string name="name">English</string> + * <!-- more keys --> + * </resource> + * This format is similar to Android format and is almost compatible, but unlike + * that one, it uses standard Canonical XML Version 1.0, according to + * http://www.ietf.org/rfc/rfc3076.txt + * + * @file + * @author Petr Bena + * @license GPL-2.0+ + */ + +/** + * Support for XML translations + * @since 2015-01-23 + * @ingroup FFS + */ +class SimpleXmlFFS extends SimpleFFS { + protected static $pluralWords = array( + 'zero' => 1, + 'one' => 1, + 'two' => 1, + 'few' => 1, + 'many' => 1, + 'other' => 1, + ); + + public function supportsFuzzy() { + return 'yes'; + } + + public function getFileExtensions() { + return array( '.xml' ); + } + + /** + * @param string $data + * @return array Parsed data. + */ + public function readFromVariable( $data ) { + $reader = new SimpleXMLElement( $data ); + + $messages = array(); + $mangler = $this->group->getMangler(); + + /** @var SimpleXMLElement $element */ + foreach ( $reader as $element ) { + $key = (string)$element['name']; + + if ( $element->getName() === 'string' ) { + $value = $this->readElementContents( $element ); + } elseif ( $element->getName() === 'plurals' ) { + $forms = array(); + foreach ( $element as $item ) { + $forms[(string)$item['quantity']] = $item; + } + $value = $this->flattenPlural( $forms ); + } else { + wfDebug( __METHOD__ . ': Unknown XML element name.' ); + continue; + } + + if ( isset( $element['fuzzy'] ) && (string)$element['fuzzy'] === 'true' ) { + $value = TRANSLATE_FUZZY . $value; + } + + $messages[$key] = $value; + } + + return array( + 'AUTHORS' => array(), // @todo + 'MESSAGES' => $mangler->mangle( $messages ), + ); + } + + protected function writeReal( MessageCollection $collection ) { + $template = <<<XML +<?xml version="1.0" encoding="utf-8"?> +<resources></resources> +XML; + + $writer = new SimpleXMLElement( $template ); + $mangler = $this->group->getMangler(); + + $collection->filter( 'hastranslation', false ); + if ( count( $collection ) === 0 ) { + return ''; + } + + /** + * @var $m TMessage + */ + foreach ( $collection as $key => $m ) { + $key = $mangler->unmangle( $key ); + + $value = $m->translation(); + $value = str_replace( TRANSLATE_FUZZY, '', $value ); + + // Handle plurals + if ( strpos( $value, '{{PLURAL' ) === false ) { + $element = $writer->addChild( 'string', $value ); + } else { + $element = $writer->addChild( 'plurals' ); + $forms = $this->unflattenPlural( $value ); + foreach ( $forms as $quantity => $content ) { + $item = $element->addChild( 'item', $content ); + $item->addAttribute( 'quantity', $quantity ); + } + } + + $element->addAttribute( 'name', $key ); + // This is non-standard + if ( $m->hasTag( 'fuzzy' ) ) { + $element->addAttribute( 'fuzzy', 'true' ); + } + } + + // Make the output pretty with DOMDocument + $dom = new DOMDocument( '1.0' ); + $dom->formatOutput = true; + $dom->loadXML( $writer->asXML() ); + + return $dom->saveXML(); + } + + /** + * Flattens array of plurals into string. + * + * @param array $forms array + * @return string + */ + protected function flattenPlural( array $forms ) { + $pls = '{{PLURAL'; + foreach ( $forms as $key => $value ) { + $pls .= "|$key=$value"; + } + + $pls .= "}}"; + return $pls; + } + + /** + * Converts the flattened plural into messages + * + * @param string $message + * @return array + */ + protected function unflattenPlural( $message ) { + $regex = '~\{\{PLURAL\|(.*?)}}~s'; + $matches = array(); + $match = array(); + + while ( preg_match( $regex, $message, $match ) ) { + $uniqkey = TranslateUtils::getPlaceholder(); + $matches[$uniqkey] = $match; + $message = preg_replace( $regex, $uniqkey, $message, 1 ); + } + + // No plurals, should not happen. + if ( !count( $matches ) ) { + return array(); + } + + // The final array of alternative plurals forms. + $alts = array(); + + /* + * Then loop trough each plural block and replacing the placeholders + * to construct the alternatives. Produces invalid output if there is + * multiple plural bocks which don't have the same set of keys. + */ + $pluralChoice = implode( '|', array_keys( self::$pluralWords ) ); + $regex = "~($pluralChoice)\s*=\s*(.+)~s"; + foreach ( $matches as $ph => $plu ) { + $forms = explode( '|', $plu[1] ); + + foreach ( $forms as $form ) { + if ( $form === '' ) { + continue; + } + + $match = array(); + if ( !preg_match( $regex, $form, $match ) ) { + // No quantity key was provided + continue; + } + + $formWord = $match[1]; + $value = $match[2]; + if ( !isset( $alts[$formWord] ) ) { + $alts[$formWord] = $message; + } + + $string = $alts[$formWord]; + + $alts[$formWord] = str_replace( $ph, $value, $string ); + } + } + + return $alts; + } +} -- To view, visit https://gerrit.wikimedia.org/r/186338 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I8345e61e79530cc570ec711c9ac6c14cb8bc51a0 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Translate Gerrit-Branch: master Gerrit-Owner: Petrb <benap...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits