Nikerabbit has uploaded a new change for review. https://gerrit.wikimedia.org/r/184148
Change subject: Implement message group configuration validation ...................................................................... Implement message group configuration validation * Validation is optional, but automatically performed if the metayaml library is available. Added it to suggest section in composer.json. * All twn Yaml files are validated under 200 ms, which is fast enough in my opinion. * Moved some code out of TranslateYaml to MessageGroupConfigurationParser where I also placed the new validation code. * Added MetaYamlSchemaExtender interface, which I unfortunately can't use because there is no function to check whether a class implements and interface. There are functions that provide the information but they are harder to use and could be slower, so I went for duck typing instead. Left the class in, however, in case this wll change in the future as well as for other use cases. Constructing these objects is non-trivial, in case you wonder why I don't use instanceof. Change-Id: I09e481d30d068177db3f71d2ebfffa87c394060c --- M Autoload.php A MessageGroupConfigurationParser.php M MessageGroups.php A MetaYamlSchemaExtender.php M composer.json A data/group-yaml-schema.yaml M ffs/FlatPhpFFS.php M ffs/GettextFFS.php M ffs/JavaFFS.php M ffs/YamlFFS.php M messagegroups/FileBasedMessageGroup.php M stringmangler/StringMatcher.php M utils/TranslateYaml.php 13 files changed, 437 insertions(+), 46 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Translate refs/changes/48/184148/1 diff --git a/Autoload.php b/Autoload.php index d58f480..11edc22 100644 --- a/Autoload.php +++ b/Autoload.php @@ -22,6 +22,8 @@ $al['MessageCollection'] = "$dir/MessageCollection.php"; $al['MessageDefinitions'] = "$dir/MessageCollection.php"; $al['MessageGroups'] = "$dir/MessageGroups.php"; +$al['MessageGroupConfigurationParser'] = "$dir/MessageGroupConfigurationParser.php"; +$al['MetaYamlSchemaExtender'] = "$dir/MetaYamlSchemaExtender.php"; $al['TMessage'] = "$dir/Message.php"; $al['ThinMessage'] = "$dir/Message.php"; $al['TranslateEditAddons'] = "$dir/TranslateEditAddons.php"; diff --git a/MessageGroupConfigurationParser.php b/MessageGroupConfigurationParser.php new file mode 100644 index 0000000..5f5a37e --- /dev/null +++ b/MessageGroupConfigurationParser.php @@ -0,0 +1,155 @@ +<?php +/** + * + * @file + * @author Niklas Laxström + * @license GPL-2.0+ + */ + +/** + * Utility class to parse and validate message group configurations. + * @since 2014.01 + */ +class MessageGroupConfigurationParser { + protected $baseSchema; + + public function __construct() { + // Don't perform validations if library not available + if ( class_exists( 'RomaricDrigon\MetaYaml\MetaYaml' ) ) { + $this->baseSchema = $this->getBaseSchema(); + } + } + + /** + * Easy to use function to get valid group configurations from YAML. Those not matching + * schema will be ignored, if schema validation is enabled. + * + * @param string $data Yaml + * @param callable $callback Optional callback which is called on errors. Parameters are + * document index, processed configuration and error message. + * @return array Group configurations indexed by message group id. + */ + public function getHopefullyValidConfigurations( $data, $callback = null ) { + if ( !is_callable( $callback ) ) { + $callback = function () {/*noop*/}; + } + + $documents = self::getDocumentsFromYaml( $data ); + $configurations = self::parseDocuments( $documents ); + $groups = array(); + + if ( is_array( $this->baseSchema ) ) { + foreach ( $configurations as $index => $config ) { + try { + $this->validate( $config ); + $groups[$config['BASIC']['id']] = $config; + } catch ( Exception $e ) { + $callback( $index, $config, $e->getMessage() ); + } + } + } else { + foreach ( $configurations as $index => $config ) { + if ( isset( $config['BASIC']['id'] ) ) { + $groups[$config['BASIC']['id']] = $config; + } else { + $callback( $index, $config, 'id is missing' ); + } + } + } + + return $groups; + } + + /** + * Given a Yaml string, returns the non-empty documents as an array. + * + * @param string $data + * @return string[] + */ + public function getDocumentsFromYaml( $data ) { + return preg_split( "/^---$/m", $data, -1, PREG_SPLIT_NO_EMPTY ); + } + + /** + * Returns group configurations from YAML documents. If there is document containing template, + * it will be merged with other configurations. + * + * @param string $data + * @return array Unvalidated group configurations + */ + public function parseDocuments( array $documents ) { + $groups = array(); + $template = false; + foreach ( $documents as $document ) { + $document = TranslateYaml::loadString( $document ); + + if ( isset( $document['TEMPLATE'] ) ) { + $template = $document['TEMPLATE']; + } else { + $groups[] = $document; + } + } + + if ( $template ) { + foreach ( $groups as $i => $group ) { + $groups[$i] = self::mergeTemplate( $template, $group ); + // Little hack to allow aggregate groups to be defined in same file with other groups. + if ( $groups[$i]['BASIC']['class'] === 'AggregateMessageGroup' ) { + unset( $groups[$i]['FILES'] ); + } + } + } + + return $groups; + } + + public function getBaseSchema() { + return TranslateYaml::load( __DIR__ . '/data/group-yaml-schema.yaml' ); + } + + /** + * Validates group configuration against schema. + * + * @param array $config + * @throws Exception If configuration is not valid. + */ + public function validate( array $config ) { + $schema = $this->baseSchema; + + foreach ( $config as $sectionName => $section ) { + if ( !isset( $section['class'] ) ) { + continue; + } + + $class = $section['class']; + // There is no sane way to check whether *class* implements interface in PHP + if ( !method_exists( $class, 'getExtraSchema' ) ) { + continue; + } + + $extra = call_user_func( array( $class, 'getExtraSchema' ) ); + $schema = array_replace_recursive( $schema, $extra ); + } + + $schema = new RomaricDrigon\MetaYaml\MetaYaml( $schema ); + $schema->validate( $config ); + } + + /** + * Merges a document template (base) to actual definition (specific) + * @param array $base + * @param array $specific + * @return array + */ + public static function mergeTemplate( array $base, array $specific ) { + foreach ( $specific as $key => $value ) { + if ( is_array( $value ) && isset( $base[$key] ) && is_array( $base[$key] ) ) { + $base[$key] = self::mergeTemplate( $base[$key], $value ); + } else { + $base[$key] = $value; + } + } + + return $base; + } +} diff --git a/MessageGroups.php b/MessageGroups.php index 4adfd34..c35f92a 100644 --- a/MessageGroups.php +++ b/MessageGroups.php @@ -119,10 +119,17 @@ wfProfileOut( __METHOD__ . '-hook' ); wfProfileIn( __METHOD__ . '-yaml' ); + $parser = new MessageGroupConfigurationParser(); foreach ( $wgTranslateGroupFiles as $configFile ) { - wfDebug( $configFile . "\n" ); $deps[] = new FileDependency( realpath( $configFile ) ); - $fgroups = TranslateYaml::parseGroupFile( $configFile ); + + $yaml = file_get_contents( $configFile ); + $fgroups = $parser->getHopefullyValidConfigurations( + $yaml, + function ( $index, $config, $errmsg ) use ( $configFile ) { + trigger_error( "Document $index in $configFile is invalid: $errmsg", E_USER_WARNING ); + } + ); foreach ( $fgroups as $id => $conf ) { if ( !empty( $conf['AUTOLOAD'] ) && is_array( $conf['AUTOLOAD'] ) ) { diff --git a/MetaYamlSchemaExtender.php b/MetaYamlSchemaExtender.php new file mode 100644 index 0000000..8891f83 --- /dev/null +++ b/MetaYamlSchemaExtender.php @@ -0,0 +1,25 @@ +<?php + +/** + * Message groups are usually configured in YAML, though the actual storage format does not matter, + * because they are parsed to PHP arrays anyway. The configuration consists of sections, and in some + * section there is key 'class' which defines the class implementing that part of behavior. These + * classes can take custom parameters, so in essense our configuration format is open-ended. To + * implement proper validation, those classes can extend the schema runtime by implemeting this + * interface. Validation is implemented with the MetaYaml library. + * + * Because neither is_a nor instanceof accept class names, validation code will check directly + * whether this method exists, whether the class implements the interface or not. + * + * @see https://github.com/romaricdrigon/MetaYaml + * @see https://www.mediawiki.org/wiki/Help:Extension:Translate/Group_configuration + * @since 2014.01 + */ +interface MetaYamlSchemaExtender { + /** + * Return a data structure that will be merged with the base schema. It is not possible to remove + * things. + * @return array + */ + public static function getExtraSchema(); +} diff --git a/composer.json b/composer.json index 0df5528..0786359 100644 --- a/composer.json +++ b/composer.json @@ -41,7 +41,8 @@ "suggest": { "mediawiki/babel": "Users can easily indicate their language proficiency on their user page", "mediawiki/translation-notifications": "Manage communication with translators", - "mustangostang/spyc": "More recent version of the bundled spyc library" + "mustangostang/spyc": "More recent version of the bundled spyc library", + "romaricdrigon/metayaml": "If you want to validate message group configurations" }, "autoload": { "files": ["Translate.php"] diff --git a/data/group-yaml-schema.yaml b/data/group-yaml-schema.yaml new file mode 100644 index 0000000..ed13ff9 --- /dev/null +++ b/data/group-yaml-schema.yaml @@ -0,0 +1,78 @@ +root: + _type: array + _children: + BASIC: + _type: array + _required: true + _children: + class: + _type: text + _not_empty: true + codeBrowser: + _type: text + description: + _type: text + icon: + _type: text + id: + _type: text + _not_empty: true + label: + _type: text + _not_empty: true + meta: + _type: boolean + namespace: + _type: text + sourcelanguage: + _type: text + _description: defaults to "en" + MANGLER: + _type: array + _children: + class: + _type: text + _not_empty: true + CHECKER: + _type: array + _children: + class: + _type: text + _not_empty: true + checks: + _type: prototype + _prototype: + _type: text + INSERTABLES: + _type: array + _children: + class: + _type: text + _not_empty: true + TAGS: + _type: prototype + _prototype: + _type: prototype + _max_items: 99999 # default is 200, which is too little + _prototype: + _type: text + AUTOLOAD: + _type: array + _ignore_extra_keys: true + _children: [] + GROUPS: + _type: prototype + _min_items: 1 + _prototype: + _type: text + LANGUAGES: + _type: array + _children: + whitelist: + _type: prototype + _prototype: + _type: text + blacklist: + _type: prototype + _prototype: + _type: text diff --git a/ffs/FlatPhpFFS.php b/ffs/FlatPhpFFS.php index 55a49e7..4dbab6b 100644 --- a/ffs/FlatPhpFFS.php +++ b/ffs/FlatPhpFFS.php @@ -13,7 +13,7 @@ * Implements file format support for PHP files which consist of multiple * variable assignments. */ -class FlatPhpFFS extends SimpleFFS { +class FlatPhpFFS extends SimpleFFS implements MetaYamlSchemaExtender { public function getFileExtensions() { return array( '.php' ); } @@ -136,4 +136,24 @@ return $output; } + + public static function getExtraSchema() { + $schema = array( + 'root' => array( + '_type' => 'array', + '_children' => array( + 'FILES' => array( + '_type' => 'array', + '_children' => array( + 'header' => array( + '_type' => 'text', + ), + ) + ) + ) + ) + ); + + return $schema; + } } diff --git a/ffs/GettextFFS.php b/ffs/GettextFFS.php index 50c0a9a..f1e8e4d 100644 --- a/ffs/GettextFFS.php +++ b/ffs/GettextFFS.php @@ -19,7 +19,7 @@ * New-style FFS class that implements support for gettext file format. * @ingroup FFS */ -class GettextFFS extends SimpleFFS { +class GettextFFS extends SimpleFFS implements MetaYamlSchemaExtender { public function supportsFuzzy() { return 'yes'; } @@ -664,4 +664,31 @@ return $splitPlurals; } + + public static function getExtraSchema() { + $schema = array( + 'root' => array( + '_type' => 'array', + '_children' => array( + 'FILES' => array( + '_type' => 'array', + '_children' => array( + 'header' => array( + '_type' => 'text', + ), + 'keyAlgorithm' => array( + '_type' => 'enum', + '_values' => array( 'simple', 'legacy' ), + ), + 'CtxtAsKey' => array( + '_type' => 'boolean', + ), + ) + ) + ) + ) + ); + + return $schema; + } } diff --git a/ffs/JavaFFS.php b/ffs/JavaFFS.php index 66badcd..227d27d 100644 --- a/ffs/JavaFFS.php +++ b/ffs/JavaFFS.php @@ -9,7 +9,7 @@ * \c keySeparator which defaults to '='. * @ingroup FFS */ -class JavaFFS extends SimpleFFS { +class JavaFFS extends SimpleFFS implements MetaYamlSchemaExtender { public function supportsFuzzy() { return 'write'; } @@ -250,4 +250,27 @@ return $output; } + + public static function getExtraSchema() { + $schema = array( + 'root' => array( + '_type' => 'array', + '_children' => array( + 'FILES' => array( + '_type' => 'array', + '_children' => array( + 'header' => array( + '_type' => 'text', + ), + 'keySeparator' => array( + '_type' => 'text', + ), + ) + ) + ) + ) + ); + + return $schema; + } } diff --git a/ffs/YamlFFS.php b/ffs/YamlFFS.php index 26320e2..3a64f2e 100644 --- a/ffs/YamlFFS.php +++ b/ffs/YamlFFS.php @@ -7,7 +7,7 @@ * If it is set to true, all messages will under language code. * @ingroup FFS */ -class YamlFFS extends SimpleFFS { +class YamlFFS extends SimpleFFS implements MetaYamlSchemaExtender { public function getFileExtensions() { return array( '.yaml', '.yml' ); } @@ -247,4 +247,24 @@ public function unflattenPlural( $key, $value ) { return array( $key => $value ); } + + public static function getExtraSchema() { + $schema = array( + 'root' => array( + '_type' => 'array', + '_children' => array( + 'FILES' => array( + '_type' => 'array', + '_children' => array( + 'codeAsRoot' => array( + '_type' => 'boolean', + ), + ) + ) + ) + ) + ); + + return $schema; + } } diff --git a/messagegroups/FileBasedMessageGroup.php b/messagegroups/FileBasedMessageGroup.php index a25703e..df64233 100644 --- a/messagegroups/FileBasedMessageGroup.php +++ b/messagegroups/FileBasedMessageGroup.php @@ -16,7 +16,7 @@ * custom type of message groups. * @ingroup MessageGroup */ -class FileBasedMessageGroup extends MessageGroupBase { +class FileBasedMessageGroup extends MessageGroupBase implements MetaYamlSchemaExtender { protected $reverseCodeMap; /** @@ -146,4 +146,40 @@ return $code; } } + + public static function getExtraSchema() { + $schema = array( + 'root' => array( + '_type' => 'array', + '_children' => array( + 'FILES' => array( + '_type' => 'array', + '_children' => array( + 'class' => array( + '_type' => 'text', + '_not_empty' => true, + ), + 'codeMap' => array( + '_type' => 'array', + '_ignore_extra_keys' => true, + '_children' => array(), + ), + 'definitionFile' => array( + '_type' => 'text', + ), + 'sourcePattern' => array( + '_type' => 'text', + '_not_empty' => true, + ), + 'targetPattern' => array( + '_type' => 'text', + ), + ) + ) + ) + ) + ); + + return $schema; + } } diff --git a/stringmangler/StringMatcher.php b/stringmangler/StringMatcher.php index f58d8af..88c12c6 100644 --- a/stringmangler/StringMatcher.php +++ b/stringmangler/StringMatcher.php @@ -11,7 +11,7 @@ * It supports exact matches and patterns with any-wildcard (*). * All matching strings are prefixed with the same prefix. */ -class StringMatcher implements StringMangler { +class StringMatcher implements StringMangler, MetaYamlSchemaExtender { /// Prefix for mangled message keys protected $sPrefix = ''; /// Exact message keys @@ -218,4 +218,31 @@ // Also check that the indexing starts from zero return !array_key_exists( 0, $array ); } + + public static function getExtraSchema() { + $schema = array( + 'root' => array( + '_type' => 'array', + '_children' => array( + 'MANGLER' => array( + '_type' => 'array', + '_children' => array( + 'prefix' => array( + '_type' => 'text', + '_not_empty' => true + ), + 'patterns' => array( + '_type' => 'array', + '_required' => true, + '_ignore_extra_keys' => true, + '_children' => array(), + ), + ) + ) + ) + ) + ); + + return $schema; + } } diff --git a/utils/TranslateYaml.php b/utils/TranslateYaml.php index d3ae363..9d7d592 100644 --- a/utils/TranslateYaml.php +++ b/utils/TranslateYaml.php @@ -15,53 +15,23 @@ */ class TranslateYaml { /** - * @param $filename string - * @return array + * @deprecated in 2014.01 */ public static function parseGroupFile( $filename ) { $data = file_get_contents( $filename ); - $documents = preg_split( "/^---$/m", $data, -1, PREG_SPLIT_NO_EMPTY ); - $groups = array(); - $template = false; - foreach ( $documents as $document ) { - $document = self::loadString( $document ); + wfDeprecated( 'Use MessageGroupConfigurationParser' ); + $parser = new MessageGroupConfigurationParser(); - if ( isset( $document['TEMPLATE'] ) ) { - $template = $document['TEMPLATE']; - } else { - if ( !isset( $document['BASIC']['id'] ) ) { - $error = "No path ./BASIC/id (group id not defined) "; - $error .= "in YAML document located in $filename"; - trigger_error( $error ); - continue; - } - $groups[$document['BASIC']['id']] = $document; - } - } - - foreach ( $groups as $i => $group ) { - $groups[$i] = self::mergeTemplate( $template, $group ); - } - - return $groups; + return $parser->getHopefullyValidConfigurations( $data ); } /** - * Merges a document template (base) to actual definition (specific) - * @param $base - * @param $specific - * @return array + * @deprecated in 2014.01 */ public static function mergeTemplate( $base, $specific ) { - foreach ( $specific as $key => $value ) { - if ( is_array( $value ) && isset( $base[$key] ) && is_array( $base[$key] ) ) { - $base[$key] = self::mergeTemplate( $base[$key], $value ); - } else { - $base[$key] = $value; - } - } + wfDeprecated( 'Use MessageGroupConfigurationParser' ); - return $base; + return MessageGroupConfigurationParser::mergeTemplate( $base, $specific ); } /** -- To view, visit https://gerrit.wikimedia.org/r/184148 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I09e481d30d068177db3f71d2ebfffa87c394060c Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Translate Gerrit-Branch: master Gerrit-Owner: Nikerabbit <niklas.laxst...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits