Chris Lattner wrote:
On Nov 21, 2008, at 4:23 AM, Sebastian Redl wrote:
I was planning to implement a modifier that works like this:
"We have %0 %c0{1:mouse|:mice}"
i.e. a simple case statement. ("case parameter 0 in") The modifer picks an integral argument and executes the case. It could be more complicated for
languages that, say, have different forms for 0, 1, 2, and more cases:
"%c0{0:something|1:somethingelse|2:yetanotherthing|:finaloption}"

Sure, this works for me. A few requests: please use a longer modifier name than "c" (I don't have any specific suggestions though). My intent is for tricky things to use longer names (like 'select') and very few but common things to use short names like 's'. Second, please consider supporting ranges: %whatever{3-7:blah}, so we can handle Polish:
http://www.gnu.org/software/automake/manual/gettext/Plural-forms.html
Well, here's my patch. This was a fun little exercise. The modifier is called plural, and the syntax is explained at length in the comments.
Third, please make sure you have a testcase for this so that we know if something breaks.
I've got a small test program which uses a copy of the code, but how do I add a proper test case for this feature? My small program isn't run as part of the regressions, and is bound to go out of sync.

Sebastian
Index: lib/Basic/Diagnostic.cpp
===================================================================
--- lib/Basic/Diagnostic.cpp    (revision 59832)
+++ lib/Basic/Diagnostic.cpp    (working copy)
@@ -290,7 +290,136 @@
     OutStr.push_back('s');
 }
 
+// Helpers for HandlePluralModifier
+static bool EvalPluralExpr(unsigned ValNo, const char *Start, const char *End);
+static bool TestPluralRange(unsigned Val, const char *&Start, const char *End);
+static unsigned PluralNumber(const char *&Start, const char *End);
 
+/// HandlePluralModifier - Handle the integer 'plural' modifier. This is used
+/// for complex plural forms, or in languages where all plurals are complex.
+/// The syntax is: %plural{cond1:form1|cond2:form2|:form3}, where condn are
+/// conditions that are tested in order, the form corresponding to the first
+/// that applies being emitted. The empty condition is always true, making the
+/// last form a default case.
+/// Conditions are simple boolean expressions, where n is the number argument.
+/// Here are the rules.
+/// condition  := expression | empty
+/// empty      :=                             -> always true
+/// expression := or-expr | numeric
+/// or-expr    := numeric ',' expression
+/// numeric    := range                       -> true if n in range
+///             | '%' number '=' range        -> true if n % number in range
+/// range      := number
+///             | '(' number ',' number ')'   -> ranges are inclusive both ends
+///
+/// Here are some examples from the GNU gettext manual written in this form:
+/// English:
+/// {1:form0|:form1}
+/// Latvian:
+/// {0:form2|%100=11,%10=0,%10=(2,9):form1|:form0}
+/// Gaeilge:
+/// {1:form0|2:form1|:form2}
+/// Romanian:
+/// {1:form0|0,%100=(1,19):form1|:form2}
+/// Lithuanian:
+/// {%10=0,%100=(10,19):form2|%10=1:form0|:form1}
+/// Russian (requires repeated form):
+/// {%100=(11,14):form2|%10=1:form0|%10=(2,4):form1|:form2}
+/// Slovak
+/// {1:form0|(2,4):form1|:form2}
+/// Polish (requires repeated form):
+/// {1:form0|%100=(10,20):form2|%10=(2,4):form1|:form2}
+static void HandlePluralModifier(unsigned ValNo,
+                                 const char *Argument, unsigned ArgumentLen,
+                                 llvm::SmallVectorImpl<char> &OutStr)
+{
+  const char *ArgumentEnd = Argument + ArgumentLen;
+  while (Argument != ArgumentEnd) {
+    const char *ExprEnd = Argument;
+    while (*ExprEnd != ':') {
+      assert(ExprEnd != ArgumentEnd && "Plural missing expression end");
+      ++ExprEnd;
+    }
+    if (EvalPluralExpr(ValNo, Argument, ExprEnd)) {
+      Argument = ExprEnd + 1;
+      while (ExprEnd != ArgumentEnd && *ExprEnd != '|')
+        ++ExprEnd;
+      OutStr.append(Argument, ExprEnd);
+      return;
+    }
+    Argument = std::find(Argument, ArgumentEnd - 1, '|') + 1;
+  }
+  assert(false && "Plural expression didn't match.");
+}
+
+/// EvalPluralExpr - Actual expression evaluator for HandlePluralModifier.
+static bool EvalPluralExpr(unsigned ValNo, const char *Start, const char *End)
+{
+  // Empty condition?
+  if (*Start == ':')
+    return true;
+
+  while (1) {
+    char C = *Start;
+    if(C == '%') {
+      // Modulo expression
+      ++Start;
+      unsigned Arg = PluralNumber(Start, End);
+      assert(*Start == '=' && "Bad plural expression syntax: expected =");
+      ++Start;
+      unsigned ValMod = ValNo % Arg;
+      if (TestPluralRange(ValMod, Start, End))
+        return true;
+    } else if(C == '(' || (C >= '0' && C <= '9')) {
+      // Range expression
+      if (TestPluralRange(ValNo, Start, End))
+        return true;
+    } else {
+      assert(false && "Bad plural expression syntax: unexpected character");
+    }
+
+    // Scan for next or-expr part.
+    while (Start != End && *Start != ',')
+      ++Start;
+    if(Start == End)
+      break;
+    ++Start;
+  }
+  return false;
+}
+
+/// TestPluralRange - Test if Val is in the parsed range. Modifies Start.
+static bool TestPluralRange(unsigned Val, const char *&Start, const char *End)
+{
+  if (*Start != '(') {
+    unsigned Ref = PluralNumber(Start, End);
+    return Ref == Val;
+  }
+
+  ++Start;
+  unsigned Low = PluralNumber(Start, End);
+  assert(*Start == ',' && "Bad plural expression syntax: expected ,");
+  ++Start;
+  unsigned High = PluralNumber(Start, End);
+  assert(*Start == ')' && "Bad plural expression syntax: expected )");
+  ++Start;
+  return Low <= Val && Val <= High;
+}
+
+/// PluralNumber - Parse an unsigned integer and advance Start.
+static unsigned PluralNumber(const char *&Start, const char *End)
+{
+  // Programming 101: Parse a decimal number :-)
+  unsigned Val = 0;
+  while (Start != End && *Start >= '0' && *Start <= '9') {
+    Val *= 10;
+    Val += *Start - '0';
+    ++Start;
+  }
+  return Val;
+}
+
+
 /// FormatDiagnostic - Format this diagnostic into a string, substituting the
 /// formal arguments into the %0 slots.  The result is appended onto the Str
 /// array.
@@ -372,6 +501,8 @@
         HandleSelectModifier((unsigned)Val, Argument, ArgumentLen, OutStr);
       } else if (ModifierIs(Modifier, ModifierLen, "s")) {
         HandleIntegerSModifier(Val, OutStr);
+      } else if (ModifierIs(Modifier, ModifierLen, "plural")) {
+        HandlePluralModifier((unsigned)Val, Argument, ArgumentLen, OutStr);
       } else {
         assert(ModifierLen == 0 && "Unknown integer modifier");
         // FIXME: Optimize
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to