Michele Locati <[email protected]> writes: > - we need to integrate something like the cldr-to-gettext-plural-rules > tool of mine: that's not a big problem - just a rewrite from php to c.
I've tried that to familiarize myself with CLDR. See the attached patch, which adds 'cldr-plural' utility to gettext-tools. To make the comparison easier, I'm also attaching the generated output for the languages currently defined in plural-table.c. > - we have to take for sure that the CLDR repository structure does not > change: that's a problem (for instance, the CLDR team moved the json > data from http://unicode.org/Public/cldr/ to GitHub) Unlike the JSON files, I suppose the XML files will remain available from the canonical location. If it is not the case, we can make the location customizable or provide a shell-script wrapper like /usr/share/gettext/projects/*/team-address. Regards, -- Daiki Ueno
>From 314bd1b1b8f487c3dd9a4761ee451323bda3680d Mon Sep 17 00:00:00 2001 From: Daiki Ueno <[email protected]> Date: Thu, 21 May 2015 13:03:50 +0900 Subject: [PATCH] gettext-tools: Add a new utility cldr-plural * Makefile.am (noinst_PROGRAMS): Add cldr-plural. (cldr_plural_SOURCES): New variable. (cldr_plural_LDADD): New variable. * cldr-plural-exp.h: New file. * cldr-plural-exp.c: New file. * cldr-plural.y: New file. --- gettext-tools/src/Makefile.am | 4 +- gettext-tools/src/cldr-plural-exp.c | 571 ++++++++++++++++++++++++++++++++++++ gettext-tools/src/cldr-plural-exp.h | 133 +++++++++ gettext-tools/src/cldr-plural.y | 465 +++++++++++++++++++++++++++++ 4 files changed, 1172 insertions(+), 1 deletion(-) create mode 100644 gettext-tools/src/cldr-plural-exp.c create mode 100644 gettext-tools/src/cldr-plural-exp.h create mode 100644 gettext-tools/src/cldr-plural.y diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am index 9a23be0..81c47b1 100644 --- a/gettext-tools/src/Makefile.am +++ b/gettext-tools/src/Makefile.am @@ -29,7 +29,7 @@ msgcmp msgfmt msgmerge msgunfmt xgettext \ msgattrib msgcat msgcomm msgconv msgen msgexec msgfilter msggrep msginit msguniq \ recode-sr-latin -noinst_PROGRAMS = hostname urlget +noinst_PROGRAMS = hostname urlget cldr-plural lib_LTLIBRARIES = libgettextsrc.la @@ -236,6 +236,8 @@ endif recode_sr_latin_SOURCES = recode-sr-latin.c filter-sr-latin.c hostname_SOURCES = hostname.c urlget_SOURCES = urlget.c +cldr_plural_SOURCES = cldr-plural-exp.c cldr-plural.y +cldr_plural_LDADD = $(LDADD) -lm # How to build libgettextsrc.la. # Need ../gnulib-lib/libgettextlib.la. diff --git a/gettext-tools/src/cldr-plural-exp.c b/gettext-tools/src/cldr-plural-exp.c new file mode 100644 index 0000000..61902f1 --- /dev/null +++ b/gettext-tools/src/cldr-plural-exp.c @@ -0,0 +1,571 @@ +/* Unicode CLDR plural rule parser and converter + Copyright (C) 2015 Free Software Foundation, Inc. + + This file was written by Daiki Ueno <[email protected]>, 2015. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include "unistr.h" +#include "xalloc.h" +#include <math.h> + +#include "cldr-plural-exp.h" +#include "cldr-plural.h" + +/* The grammar of Unicode CLDR plural rules is defined at: + http://unicode.org/reports/tr35/tr35-numbers.html#Plural_rules_syntax + + This implementation only supports the "preferred" form, which + doesn't support obsolete keywords "in", "is", "not", and "within". + + Unlike gettext, CLDR allows an unsigned decimal value as an + operand, in addition to unsigned integers. For simplicity, we + treat decimal relations as if it is a constant truth value. + + The implementation is largely based on the idea of Michele Locati's + cldr-to-gettext-plural-rules: + https://github.com/mlocati/cldr-to-gettext-plural-rules */ + +void +cldr_plural_range_free (struct cldr_plural_range_ty *range) +{ + if (range->start != range->end) + free (range->start); + free (range->end); + free (range); +} + +void +cldr_plural_range_list_free (struct cldr_plural_range_list_ty *ranges) +{ + while (ranges->nitems-- > 0) + cldr_plural_range_free (ranges->items[ranges->nitems]); + free (ranges->items); + free (ranges); +} + +void +cldr_plural_condition_free (struct cldr_plural_condition_ty *condition) +{ + if (condition->type == CLDR_PLURAL_CONDITION_AND + || condition->type == CLDR_PLURAL_CONDITION_OR) + { + cldr_plural_condition_free (condition->value.conditions[0]); + cldr_plural_condition_free (condition->value.conditions[1]); + } + else if (condition->type == CLDR_PLURAL_CONDITION_RELATION) + cldr_plural_relation_free (condition->value.relation); + free (condition); +} + +void +cldr_plural_relation_free (struct cldr_plural_relation_ty *relation) +{ + free (relation->expression); + cldr_plural_range_list_free (relation->ranges); + free (relation); +} + +static void +cldr_plural_rule_free (struct cldr_plural_rule_ty *rule) +{ + free (rule->name); + cldr_plural_condition_free (rule->condition); + free (rule); +} + +static void +cldr_plural_rule_list_free (struct cldr_plural_rule_list_ty *rules) +{ + while (rules->nitems-- > 0) + cldr_plural_rule_free (rules->items[rules->nitems]); + free (rules->items); + free (rules); +} + +static struct cldr_plural_rule_list_ty * +cldr_plural_parse (const char *input) +{ + struct cldr_plural_parse_args arg; + + memset (&arg, 0, sizeof (struct cldr_plural_parse_args)); + arg.cp = input; + arg.cp_end = input + strlen (input); + arg.result = XMALLOC (struct cldr_plural_rule_list_ty); + memset (arg.result, 0, sizeof (struct cldr_plural_rule_list_ty)); + + if (yyparse (&arg) != 0) + return NULL; + + return arg.result; +} + +#define OPERAND_ZERO_P(o) \ + (((o)->type == CLDR_PLURAL_OPERAND_INTEGER \ + && (o)->value.ival == 0) \ + || ((o)->type == CLDR_PLURAL_OPERAND_DECIMAL \ + && (o)->value.dval.d == 0)) + +static enum cldr_plural_condition +eval_relation (struct cldr_plural_relation_ty *relation) +{ + switch (relation->expression->operand) + { + case 'n': case 'i': + { + /* Coerce decimal values in ranges into integers. */ + size_t i; + for (i = 0; i < relation->ranges->nitems; i++) + { + struct cldr_plural_range_ty *range = relation->ranges->items[i]; + if (range->start->type == CLDR_PLURAL_OPERAND_DECIMAL) + { + range->start->type = CLDR_PLURAL_OPERAND_INTEGER; + range->start->value.ival = ceil (range->start->value.dval.d); + } + if (range->end->type == CLDR_PLURAL_OPERAND_DECIMAL) + { + range->end->type = CLDR_PLURAL_OPERAND_INTEGER; + range->end->value.ival = floor (range->end->value.dval.d); + } + } + relation->expression->operand = 'i'; + } + break; + case 'f': case 't': + case 'v': case 'w': + { + /* Since plural expression in gettext only supports unsigned + integer, turn relations whose operand is either 'f', 't', + 'v', or 'w' into a constant truth value. */ + /* FIXME: check mod? */ + size_t i; + for (i = 0; i < relation->ranges->nitems; i++) + { + struct cldr_plural_range_ty *range = relation->ranges->items[i]; + if ((relation->type == CLDR_PLURAL_RELATION_EQUAL + && (!OPERAND_ZERO_P (range->start) + || !OPERAND_ZERO_P (range->end))) + || (relation->type == CLDR_PLURAL_RELATION_NOT_EQUAL + && (OPERAND_ZERO_P (range->start) + || OPERAND_ZERO_P (range->end)))) + return CLDR_PLURAL_CONDITION_FALSE; + } + return CLDR_PLURAL_CONDITION_TRUE; + } + break; + } + return CLDR_PLURAL_CONDITION_RELATION; +} + +static void +eval_condition (struct cldr_plural_condition_ty *condition) +{ + if (condition->type == CLDR_PLURAL_CONDITION_AND) + { + eval_condition (condition->value.conditions[0]); + eval_condition (condition->value.conditions[1]); + + if (condition->value.conditions[0]->type + == CLDR_PLURAL_CONDITION_FALSE + || condition->value.conditions[1]->type + == CLDR_PLURAL_CONDITION_FALSE) + { + cldr_plural_condition_free (condition->value.conditions[0]); + cldr_plural_condition_free (condition->value.conditions[1]); + condition->type = CLDR_PLURAL_CONDITION_FALSE; + } + else if (condition->value.conditions[0]->type + == CLDR_PLURAL_CONDITION_TRUE + && condition->value.conditions[1]->type + == CLDR_PLURAL_CONDITION_TRUE) + { + cldr_plural_condition_free (condition->value.conditions[0]); + cldr_plural_condition_free (condition->value.conditions[1]); + condition->type = CLDR_PLURAL_CONDITION_TRUE; + } + else if (condition->value.conditions[0]->type + == CLDR_PLURAL_CONDITION_TRUE) + { + struct cldr_plural_condition_ty *original + = condition->value.conditions[1]; + cldr_plural_condition_free (condition->value.conditions[0]); + condition->type = condition->value.conditions[1]->type; + condition->value = condition->value.conditions[1]->value; + free (original); + } + else if (condition->value.conditions[1]->type + == CLDR_PLURAL_CONDITION_TRUE) + { + struct cldr_plural_condition_ty *original + = condition->value.conditions[0]; + cldr_plural_condition_free (condition->value.conditions[1]); + condition->type = condition->value.conditions[0]->type; + condition->value = condition->value.conditions[0]->value; + free (original); + } + } + else if (condition->type == CLDR_PLURAL_CONDITION_OR) + { + eval_condition (condition->value.conditions[0]); + eval_condition (condition->value.conditions[1]); + + if (condition->value.conditions[0]->type + == CLDR_PLURAL_CONDITION_TRUE + || condition->value.conditions[1]->type + == CLDR_PLURAL_CONDITION_TRUE) + { + cldr_plural_condition_free (condition->value.conditions[0]); + cldr_plural_condition_free (condition->value.conditions[1]); + condition->type = CLDR_PLURAL_CONDITION_TRUE; + } + else if (condition->value.conditions[0]->type + == CLDR_PLURAL_CONDITION_FALSE + && condition->value.conditions[1]->type + == CLDR_PLURAL_CONDITION_FALSE) + { + cldr_plural_condition_free (condition->value.conditions[0]); + cldr_plural_condition_free (condition->value.conditions[1]); + condition->type = CLDR_PLURAL_CONDITION_FALSE; + } + else if (condition->value.conditions[0]->type + == CLDR_PLURAL_CONDITION_FALSE) + { + struct cldr_plural_condition_ty *original + = condition->value.conditions[1]; + cldr_plural_condition_free (condition->value.conditions[0]); + condition->type = condition->value.conditions[1]->type; + condition->value = condition->value.conditions[1]->value; + free (original); + } + else if (condition->value.conditions[1]->type + == CLDR_PLURAL_CONDITION_FALSE) + { + struct cldr_plural_condition_ty *original + = condition->value.conditions[0]; + cldr_plural_condition_free (condition->value.conditions[1]); + condition->type = condition->value.conditions[0]->type; + condition->value = condition->value.conditions[0]->value; + free (original); + } + } + else + { + enum cldr_plural_condition value = + eval_relation (condition->value.relation); + if (value == CLDR_PLURAL_CONDITION_TRUE + || value == CLDR_PLURAL_CONDITION_FALSE) + { + cldr_plural_relation_free (condition->value.relation); + condition->type = value; + } + } +} + +static void +print_expression (struct cldr_plural_expression_ty *expression, bool space) +{ + if (expression->mod == 0) + printf ("n"); + else + printf (space ? "n %% %d" : "n%%%d", expression->mod); +} + +static void +print_relation (struct cldr_plural_relation_ty *relation, + enum cldr_plural_condition parent, bool space) +{ + if (relation->type == CLDR_PLURAL_RELATION_EQUAL) + { + size_t i; + if (parent == CLDR_PLURAL_CONDITION_AND + && relation->ranges->nitems > 1) + putchar ('('); + for (i = 0; i < relation->ranges->nitems; i++) + { + struct cldr_plural_range_ty *range = relation->ranges->items[i]; + if (i > 0) + printf (" || "); + if (range->start->value.ival == range->end->value.ival) + { + print_expression (relation->expression, space); + printf (space && relation->ranges->nitems == 1 + ? " == %d" : "==%d", + range->start->value.ival); + } + else if (range->start->value.ival == 0) + { + print_expression (relation->expression, false); + printf ("<=%d", range->end->value.ival); + } + else + { + if (parent == CLDR_PLURAL_CONDITION_OR + || relation->ranges->nitems > 1) + putchar ('('); + print_expression (relation->expression, false); + printf (">=%d", range->start->value.ival); + printf (" && "); + print_expression (relation->expression, false); + printf ("<=%d", range->end->value.ival); + if (parent == CLDR_PLURAL_CONDITION_OR + || relation->ranges->nitems > 1) + putchar (')'); + } + } + if (parent == CLDR_PLURAL_CONDITION_AND + && relation->ranges->nitems > 1) + putchar (')'); + } + else + { + size_t i; + if (parent == CLDR_PLURAL_CONDITION_OR + && relation->ranges->nitems > 1) + putchar ('('); + for (i = 0; i < relation->ranges->nitems; i++) + { + struct cldr_plural_range_ty *range = relation->ranges->items[i]; + if (i > 0) + printf (" && "); + if (range->start->value.ival == range->end->value.ival) + { + print_expression (relation->expression, space); + printf (space && relation->ranges->nitems == 1 + ? " != %d" : "!=%d", range->start->value.ival); + } + else if (range->start->value.ival == 0) + { + print_expression (relation->expression, false); + printf (">%d", range->end->value.ival); + } + else + { + if (parent == CLDR_PLURAL_CONDITION_AND + || relation->ranges->nitems > 1) + putchar ('('); + print_expression (relation->expression, false); + printf ("<%d", range->start->value.ival); + printf (" || "); + print_expression (relation->expression, false); + printf (">%d", range->end->value.ival); + if (parent == CLDR_PLURAL_CONDITION_AND + || relation->ranges->nitems > 1) + putchar (')'); + } + } + if (parent == CLDR_PLURAL_CONDITION_OR + && relation->ranges->nitems > 1) + putchar (')'); + } +} + +static bool +print_condition (struct cldr_plural_condition_ty *condition, + enum cldr_plural_condition parent, bool space) +{ + if (condition->type == CLDR_PLURAL_CONDITION_AND) + { + if (parent == CLDR_PLURAL_CONDITION_OR) + putchar ('('); + print_condition (condition->value.conditions[0], + CLDR_PLURAL_CONDITION_AND, false); + printf (" && "); + print_condition (condition->value.conditions[1], + CLDR_PLURAL_CONDITION_AND, false); + if (parent == CLDR_PLURAL_CONDITION_OR) + putchar (')'); + return true; + } + else if (condition->type == CLDR_PLURAL_CONDITION_OR) + { + if (parent == CLDR_PLURAL_CONDITION_AND) + putchar ('('); + print_condition (condition->value.conditions[0], + CLDR_PLURAL_CONDITION_OR, false); + printf (" || "); + print_condition (condition->value.conditions[1], + CLDR_PLURAL_CONDITION_OR, false); + if (parent == CLDR_PLURAL_CONDITION_AND) + putchar (')'); + return true; + } + else if (condition->type == CLDR_PLURAL_CONDITION_RELATION) + { + print_relation (condition->value.relation, parent, space); + return true; + } + return false; +} + +#define RULE_PRINTABLE_P(r) \ + ((r)->condition->type != CLDR_PLURAL_CONDITION_TRUE \ + && (r)->condition->type != CLDR_PLURAL_CONDITION_FALSE) + +/* Convert n == N into n != N. */ +static bool +print_condition_negation (struct cldr_plural_condition_ty *condition) +{ + if (condition->type == CLDR_PLURAL_CONDITION_RELATION + && condition->value.relation->type == CLDR_PLURAL_RELATION_EQUAL + && condition->value.relation->ranges->nitems == 1 + && condition->value.relation->ranges->items[0]->start + == condition->value.relation->ranges->items[0]->end) + { + printf ("nplurals=2; plural=(n != %d);\n", + condition->value.relation->ranges->items[0]->start->value.ival); + return true; + } + return false; +} + +/* Convert n == 0,...,N into n > N. */ +static bool +print_condition_greater (struct cldr_plural_condition_ty *condition) +{ + if (condition->type == CLDR_PLURAL_CONDITION_RELATION + && condition->value.relation->type == CLDR_PLURAL_RELATION_EQUAL) + { + int last = -1; + size_t i; + for (i = 0; i < condition->value.relation->ranges->nitems; i++) + { + struct cldr_plural_range_ty *range = + condition->value.relation->ranges->items[i]; + if (range->start->type != CLDR_PLURAL_OPERAND_INTEGER + || range->end->type != CLDR_PLURAL_OPERAND_INTEGER + || range->start->value.ival != last + 1) + break; + last = range->end->value.ival; + } + if (i == condition->value.relation->ranges->nitems) + { + struct cldr_plural_range_ty *range = + condition->value.relation->ranges->items[i - 1]; + printf ("nplurals=2; plural=(n > %d);\n", + range->end->value.ival); + return true; + } + } + return false; +} + +typedef bool (*print_condition_function_ty) (struct cldr_plural_condition_ty *); +static print_condition_function_ty print_condition_functions[] = + { + print_condition_negation, + print_condition_greater + }; + +#define SIZEOF(a) (sizeof(a) / sizeof(a[0])) + +static void +process_rule_list (struct cldr_plural_rule_list_ty *rules) +{ + size_t i; + size_t count; + size_t nplurals; + + /* Prune trivial conditions. */ + for (i = 0, nplurals = 0; i < rules->nitems; i++) + { + struct cldr_plural_rule_ty *rule = rules->items[i]; + eval_condition (rule->condition); + if (RULE_PRINTABLE_P (rules->items[i])) + nplurals++; + } + + /* Special case when rules is empty. */ + if (nplurals == 0) + { + printf ("nplurals=1; plural=0;\n"); + return; + } + + /* If we have only one printable rule, apply some heuristics. */ + if (nplurals == 1) + { + struct cldr_plural_condition_ty *condition; + size_t j; + + for (j = 0; j < rules->nitems; j++) + if (RULE_PRINTABLE_P (rules->items[j])) + break; + + condition = rules->items[j]->condition; + for (j = 0; j < SIZEOF (print_condition_functions); j++) + if (print_condition_functions[j] (condition)) + return; + } + + /* If there are more printable rules, build a tertiary operator. */ + printf ("nplurals=%zu; plural=(", nplurals + 1); + for (i = 0, count = 0; i < rules->nitems; i++) + { + struct cldr_plural_rule_ty *rule = rules->items[i]; + if (print_condition (rule->condition, CLDR_PLURAL_CONDITION_FALSE, nplurals == 1) + && rules->nitems > 1) + { + bool printable_left = false; + size_t j; + + for (j = i + 1; j < rules->nitems; j++) + if (RULE_PRINTABLE_P (rules->items[j])) + printable_left = true; + + if (i < rules->nitems - 1 && printable_left) + printf (" ? %zu : ", count++); + } + } + if (rules->nitems > 1) + printf (" ? %zu : %zu", count, count + 1); + printf (");\n"); +} + +int +main (int argc, char **argv) +{ + char *line = NULL; + size_t line_size = 0; + + for (;;) + { + int line_len; + struct cldr_plural_rule_list_ty *result; + + line_len = getline (&line, &line_size, stdin); + if (line_len < 0) + break; + if (line_len > 0 && line[line_len - 1] == '\n') + line[--line_len] = '\0'; + + result = cldr_plural_parse (line); + if (result) + { + process_rule_list (result); + cldr_plural_rule_list_free (result); + } + } + + free (line); + return 0; +} diff --git a/gettext-tools/src/cldr-plural-exp.h b/gettext-tools/src/cldr-plural-exp.h new file mode 100644 index 0000000..1c0c70c --- /dev/null +++ b/gettext-tools/src/cldr-plural-exp.h @@ -0,0 +1,133 @@ +/* Unicode CLDR plural rule parser and converter + Copyright (C) 2015 Free Software Foundation, Inc. + + This file was written by Daiki Ueno <[email protected]>, 2015. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _CLDR_PLURAL_EXP_H +#define _CLDR_PLURAL_EXP_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +enum cldr_plural_operand + { + CLDR_PLURAL_OPERAND_INTEGER, + CLDR_PLURAL_OPERAND_DECIMAL + }; + +struct cldr_plural_operand_ty +{ + enum cldr_plural_operand type; + union + { + int ival; + struct + { + double d; + int nfractions; + } dval; + } value; +}; + +enum cldr_plural_relation + { + CLDR_PLURAL_RELATION_EQUAL, + CLDR_PLURAL_RELATION_NOT_EQUAL + }; + +struct cldr_plural_range_ty +{ + struct cldr_plural_operand_ty *start; + struct cldr_plural_operand_ty *end; +}; + +struct cldr_plural_range_list_ty +{ + struct cldr_plural_range_ty **items; + size_t nitems; + size_t nitems_max; +}; + +struct cldr_plural_expression_ty +{ + /* 'n', 'i', 'f', 't', 'v', 'w' */ + int operand; + + /* 0 if not given */ + int mod; +}; + +struct cldr_plural_relation_ty +{ + struct cldr_plural_expression_ty *expression; + enum cldr_plural_relation type; + struct cldr_plural_range_list_ty *ranges; +}; + +enum cldr_plural_condition + { + CLDR_PLURAL_CONDITION_AND, + CLDR_PLURAL_CONDITION_OR, + CLDR_PLURAL_CONDITION_RELATION, + CLDR_PLURAL_CONDITION_TRUE, + CLDR_PLURAL_CONDITION_FALSE + }; + +struct cldr_plural_condition_ty +{ + enum cldr_plural_condition type; + union + { + struct cldr_plural_relation_ty *relation; + struct cldr_plural_condition_ty *conditions[2]; + } value; +}; + +struct cldr_plural_rule_ty +{ + char *name; + struct cldr_plural_condition_ty *condition; +}; + +struct cldr_plural_rule_list_ty +{ + struct cldr_plural_rule_ty **items; + size_t nitems; + size_t nitems_max; +}; + +struct cldr_plural_parse_args +{ + const char *cp; + const char *cp_end; + struct cldr_plural_rule_list_ty *result; +}; + +extern void +cldr_plural_range_free (struct cldr_plural_range_ty *range); +extern void +cldr_plural_range_list_free (struct cldr_plural_range_list_ty *ranges); +extern void +cldr_plural_condition_free (struct cldr_plural_condition_ty *condition); +extern void +cldr_plural_relation_free (struct cldr_plural_relation_ty *relation); + +#ifdef __cplusplus +} +#endif + +#endif /* _CLDR_PLURAL_EXP_H */ diff --git a/gettext-tools/src/cldr-plural.y b/gettext-tools/src/cldr-plural.y new file mode 100644 index 0000000..9db4a67 --- /dev/null +++ b/gettext-tools/src/cldr-plural.y @@ -0,0 +1,465 @@ +/* Unicode CLDR plural rule parser and converter + Copyright (C) 2015 Free Software Foundation, Inc. + + This file was written by Daiki Ueno <[email protected]>, 2015. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +%{ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include "unistr.h" +#include "xalloc.h" + +#include "cldr-plural-exp.h" +#include "cldr-plural.h" + +/* Prototypes for local functions. */ +static int yylex (YYSTYPE *lval, struct cldr_plural_parse_args *arg); +static void yyerror (struct cldr_plural_parse_args *arg, const char *str); + +/* Allocation of expressions. */ + +static struct cldr_plural_rule_ty * +new_rule (char *name, struct cldr_plural_condition_ty *condition) +{ + struct cldr_plural_rule_ty *result = + XMALLOC (struct cldr_plural_rule_ty); + result->name = name; + result->condition = condition; + return result; +} + +static struct cldr_plural_condition_ty * +new_leaf_condition (struct cldr_plural_relation_ty *relation) +{ + struct cldr_plural_condition_ty *result = + XMALLOC (struct cldr_plural_condition_ty); + result->type = CLDR_PLURAL_CONDITION_RELATION; + result->value.relation = relation; + return result; +} + +static struct cldr_plural_condition_ty * +new_branch_condition (enum cldr_plural_condition type, + struct cldr_plural_condition_ty *condition0, + struct cldr_plural_condition_ty *condition1) +{ + struct cldr_plural_condition_ty *result = + XMALLOC (struct cldr_plural_condition_ty); + result->type = type; + result->value.conditions[0] = condition0; + result->value.conditions[1] = condition1; + return result; +} + +static struct cldr_plural_relation_ty * +new_relation (struct cldr_plural_expression_ty *expression, + enum cldr_plural_relation type, + struct cldr_plural_range_list_ty *ranges) +{ + struct cldr_plural_relation_ty *result = + XMALLOC (struct cldr_plural_relation_ty); + result->expression = expression; + result->type = type; + result->ranges = ranges; + return result; +} + +static struct cldr_plural_expression_ty * +new_expression (int operand, int mod) +{ + struct cldr_plural_expression_ty *result = + XMALLOC (struct cldr_plural_expression_ty); + result->operand = operand; + result->mod = mod; + return result; +} + +static struct cldr_plural_range_list_ty * +add_range (struct cldr_plural_range_list_ty *ranges, + struct cldr_plural_range_ty *range) +{ + if (ranges->nitems == ranges->nitems_max) + { + ranges->nitems_max = ranges->nitems_max * 2 + 1; + ranges->items = xrealloc (ranges->items, + sizeof (struct cldr_plural_range_ty *) + * ranges->nitems_max); + } + ranges->items[ranges->nitems++] = range; + return ranges; +} + +static struct cldr_plural_range_ty * +new_range (struct cldr_plural_operand_ty *start, + struct cldr_plural_operand_ty *end) +{ + struct cldr_plural_range_ty *result = + XMALLOC (struct cldr_plural_range_ty); + result->start = start; + result->end = end; + return result; +} +%} + +%parse-param {struct cldr_plural_parse_args *arg} +%lex-param {struct cldr_plural_parse_args *arg} +%define api.pure full + +%union { + char *sval; + struct cldr_plural_condition_ty *cval; + struct cldr_plural_relation_ty *lval; + struct cldr_plural_expression_ty *eval; + struct cldr_plural_range_ty *gval; + struct cldr_plural_operand_ty *oval; + struct cldr_plural_range_list_ty *rval; + int ival; +} + +%destructor { free ($$); } <sval> +%destructor { cldr_plural_condition_free ($$); } <cval> +%destructor { cldr_plural_relation_free ($$); } <lval> +%destructor { free ($$); } <eval> +%destructor { cldr_plural_range_free ($$); } <gval> +%destructor { free ($$); } <oval> +%destructor { cldr_plural_range_list_free ($$); } <rval> +%destructor { } <ival> + +%token AND OR RANGE ELLIPSIS OTHER AT_INTEGER AT_DECIMAL +%token<sval> KEYWORD +%token<oval> INTEGER DECIMAL +%token<ival> OPERAND +%type<cval> condition and_condition +%type<lval> relation +%type<eval> expression +%type<gval> range range_or_integer +%type<rval> range_list + +%% + +rules: rule + | rules ';' rule + ; + +rule: KEYWORD ':' condition samples + { + struct cldr_plural_rule_ty *rule = new_rule ($1, $3); + struct cldr_plural_rule_list_ty *result = arg->result; + if (result->nitems == result->nitems_max) + { + result->nitems_max = result->nitems_max * 2 + 1; + result->items = xrealloc (result->items, + sizeof (struct cldr_plural_rule_ty *) + * result->nitems_max); + } + result->items[result->nitems++] = rule; + } + | OTHER ':' samples + ; + +condition: and_condition + { + $$ = $1; + } + | condition OR and_condition + { + $$ = new_branch_condition (CLDR_PLURAL_CONDITION_OR, $1, $3); + } + ; + +and_condition: relation + { + $$ = new_leaf_condition ($1); + } + | and_condition AND relation + { + $$ = new_branch_condition (CLDR_PLURAL_CONDITION_AND, + $1, + new_leaf_condition ($3)); + } + ; + +relation: expression '=' range_list + { + $$ = new_relation ($1, CLDR_PLURAL_RELATION_EQUAL, $3); + } + | expression '!' range_list + { + $$ = new_relation ($1, CLDR_PLURAL_RELATION_NOT_EQUAL, $3); + } + ; + +expression: OPERAND + { + $$ = new_expression ($1, 0); + } + | OPERAND '%' INTEGER + { + $$ = new_expression ($1, $3->value.ival); + } + ; + +range_list: range_or_integer + { + struct cldr_plural_range_list_ty *ranges = + XMALLOC (struct cldr_plural_range_list_ty); + memset (ranges, 0, sizeof (struct cldr_plural_range_list_ty)); + $$ = add_range (ranges, $1); + } + | range_list ',' range_or_integer + { + $$ = add_range ($1, $3); + } + ; + +range_or_integer: range + { + $$ = $1; + } + | INTEGER + { + $$ = new_range ($1, $1); + } + ; + +range: INTEGER RANGE INTEGER + { + $$ = new_range ($1, $3); + } + ; + +/* FIXME: collect samples */ +samples: at_integer at_decimal + ; + +at_integer: %empty + | AT_INTEGER sample_list + ; + +at_decimal: %empty + | AT_DECIMAL sample_list + ; + +sample_list: sample_list1 sample_ellipsis + ; +sample_list1: sample_range + | sample_list1 ',' sample_range + ; +sample_ellipsis: %empty + | ',' ELLIPSIS + ; + +sample_range: DECIMAL + | DECIMAL '~' DECIMAL + | INTEGER + | INTEGER '~' INTEGER + ; + +%% + +static int +yylex (YYSTYPE *lval, struct cldr_plural_parse_args *arg) +{ + const char *exp = arg->cp; + ucs4_t uc; + int length; + int result; + static char *buffer; + static size_t bufmax; + size_t bufpos; + + while (1) + { + if (exp[0] == '\0') + { + arg->cp = exp; + return YYEOF; + } + + if (exp[0] != ' ' && exp[0] != '\t') + break; + + ++exp; + } + + length = u8_mbtouc (&uc, (const uint8_t *) exp, arg->cp_end - exp); + if (uc == 0x2026) + { + arg->cp = exp + length; + return ELLIPSIS; + } + else if (strncmp ("...", exp, 3) == 0) + { + arg->cp = exp + 3; + return ELLIPSIS; + } + else if (strncmp ("..", exp, 2) == 0) + { + arg->cp = exp + 2; + return RANGE; + } + else if (strncmp ("other", exp, 5) == 0) + { + arg->cp = exp + 5; + return OTHER; + } + else if (strncmp ("@integer", exp, 8) == 0) + { + arg->cp = exp + 8; + return AT_INTEGER; + } + else if (strncmp ("@decimal", exp, 8) == 0) + { + arg->cp = exp + 8; + return AT_DECIMAL; + } + + result = *exp++; + switch (result) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + { + unsigned long int ival = result - '0'; + + while (exp[0] >= '0' && exp[0] <= '9') + { + ival *= 10; + ival += exp[0] - '0'; + ++exp; + } + + lval->oval = XMALLOC (struct cldr_plural_operand_ty); + if (exp[0] == '.' && exp[1] >= '0' && exp[1] <= '9') + { + double dval = ival; + int denominator = 10, nfractions = 0; + ++exp; + while (exp[0] >= '0' && exp[0] <= '9') + { + dval += (exp[0] - '0') / (double) denominator; + denominator *= 10; + ++nfractions; + ++exp; + } + lval->oval->type = CLDR_PLURAL_OPERAND_DECIMAL; + lval->oval->value.dval.d = dval; + lval->oval->value.dval.nfractions = nfractions; + result = DECIMAL; + } + else + { + lval->oval->type = CLDR_PLURAL_OPERAND_INTEGER; + lval->oval->value.ival = ival; + result = INTEGER; + } + } + break; + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': + case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': + case 'v': case 'w': case 'x': case 'y': case 'z': + bufpos = 0; + for (;;) + { + if (bufpos >= bufmax) + { + bufmax = 2 * bufmax + 10; + buffer = xrealloc (buffer, bufmax); + } + buffer[bufpos++] = result; + result = *exp; + switch (result) + { + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'o': + case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': + case 'z': + ++exp; + continue; + default: + break; + } + break; + } + + if (bufpos >= bufmax) + { + bufmax = 2 * bufmax + 10; + buffer = xrealloc (buffer, bufmax); + } + buffer[bufpos] = '\0'; + + /* Operands. */ + if (bufpos == 1) + { + switch (buffer[0]) + { + case 'n': case 'i': case 'f': case 't': case 'v': case 'w': + arg->cp = exp; + lval->ival = buffer[0]; + return OPERAND; + default: + break; + } + } + + /* Keywords. */ + if (strcmp (buffer, "and") == 0) + { + arg->cp = exp; + return AND; + } + else if (strcmp (buffer, "or") == 0) + { + arg->cp = exp; + return OR; + } + + lval->sval = xstrdup (buffer); + result = KEYWORD; + break; + case '!': + if (exp[0] == '=') + { + ++exp; + result = '!'; + } + else + result = YYERRCODE; + break; + default: + break; + } + + arg->cp = exp; + + return result; +} + +static void +yyerror (struct cldr_plural_parse_args *arg, char const *s) +{ + fprintf (stderr, "%s\n", s); +} -- 2.1.0
ja nplurals=1; plural=0; vi nplurals=1; plural=0; ko nplurals=1; plural=0; en nplurals=2; plural=(n != 1); de nplurals=2; plural=(n != 1); nl nplurals=2; plural=(n != 1); sv nplurals=2; plural=(n != 1); da nplurals=2; plural=(n != 1); no nplurals=2; plural=(n != 1); nb nplurals=2; plural=(n != 1); nn nplurals=2; plural=(n != 1); fo nplurals=2; plural=(n != 1); es nplurals=2; plural=(n != 1); pt nplurals=2; plural=(n != 1); it nplurals=2; plural=(n != 1); bg nplurals=2; plural=(n != 1); el nplurals=2; plural=(n != 1); fi nplurals=2; plural=(n != 1); et nplurals=2; plural=(n != 1); he nplurals=4; plural=(n==1 ? 0 : n==2 ? 1 : n>10 && n%10==0 ? 2 : 3); eo nplurals=2; plural=(n != 1); hu nplurals=2; plural=(n != 1); tr nplurals=2; plural=(n != 1); pt_BR fr nplurals=2; plural=(n > 1); lv nplurals=3; plural=(n%10==0 || (n%100>=11 && n%100<=19) ? 0 : n%10==1 && n%100!=11 ? 1 : 2); ga nplurals=5; plural=(n==1 ? 0 : n==2 ? 1 : n>=3 && n<=6 ? 2 : n>=7 && n<=10 ? 3 : 4); ro nplurals=3; plural=(n==1 ? 0 : n==0 || (n!=1 && n%100>=1 && n%100<=19) ? 1 : 2); lt nplurals=3; plural=(n%10==1 && (n%100<11 || n%100>19) ? 0 : n%10>=2 && n%10<=9 && (n%100<11 || n%100>19) ? 1 : 2); ru nplurals=4; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<12 || n%100>14) ? 1 : n%10==0 || (n%10>=5 && n%10<=9) || (n%100>=11 && n%100<=14) ? 2 : 3); uk nplurals=4; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<12 || n%100>14) ? 1 : n%10==0 || (n%10>=5 && n%10<=9) || (n%100>=11 && n%100<=14) ? 2 : 3); be nplurals=4; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<12 || n%100>14) ? 1 : n%10==0 || (n%10>=5 && n%10<=9) || (n%100>=11 && n%100<=14) ? 2 : 3); sr nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<12 || n%100>14) ? 1 : 2); hr nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<12 || n%100>14) ? 1 : 2); cs nplurals=3; plural=(n==1 ? 0 : n>=2 && n<=4 ? 1 : 2); sk nplurals=3; plural=(n==1 ? 0 : n>=2 && n<=4 ? 1 : 2); pl nplurals=4; plural=(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<12 || n%100>14) ? 1 : (n!=1 && n%10<=1) || (n%10>=5 && n%10<=9) || (n%100>=12 && n%100<=14) ? 2 : 3); sl nplurals=4; plural=(n%100==1 ? 0 : n%100==2 ? 1 : n%100>=3 && n%100<=4 ? 2 : 3);
