This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch NLPCRAFT-247
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-247 by this push:
new cb5028b WIP.
cb5028b is described below
commit cb5028b37c26444d07cb23b5d3e3a6c64a3c96da
Author: Aaron Radzinski <[email protected]>
AuthorDate: Tue Feb 23 23:57:50 2021 -0800
WIP.
---
.../nlpcraft/common/makro/NCMacroParser.scala | 2 +-
.../nlpcraft/common/makro/antlr4/NCMacroDsl.g4 | 12 ++++++--
.../common/makro/antlr4/NCMacroDslLexer.interp | 2 +-
.../common/makro/antlr4/NCMacroDslLexer.java | 33 +++++++++++-----------
.../mgrs/nlp/enrichers/limit/NCLimitEnricher.scala | 8 +++---
.../common/makro/NCMacroCompilerSpec.scala | 2 ++
6 files changed, 34 insertions(+), 25 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroParser.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroParser.scala
index 0f79cea..56d4b20 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroParser.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroParser.scala
@@ -69,7 +69,7 @@ object NCMacroParser {
* - '{A|B|_}' denotes either 'A', or 'B' or nothing ('_').
* - '{A}[1,2]' denotes 'A' or 'A A'.
* - '{A}[0,1]' denotes 'A' or nothing (just like '{A|_}').
- * - '\' can be used only for escaping '{}\<>_[]|,' special symbols.
+ * - '\' must be used for escaping any of '{}\<>_[]|,' special symbols.
*
* Examples:
* "A {B|C}[1,2] D" ⇒ "A B D", "A C D", "A B B D", "A C C D"
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.g4
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.g4
index d214077..85ceabf 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.g4
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.g4
@@ -46,17 +46,25 @@ fragment ESC_CHAR: [{}\\<>_[\]|,];
fragment ESC: '\\' ESC_CHAR;
fragment TXT_CHAR
: [~!@#$%^&*()+.]
- | [-=<>/\\;:`]
+ | [-=<>/\\;:`'"]
| '\u00B7'
| 'A'..'Z'
| 'a'..'z'
| '0'..'9'
| '\u0300'..'\u036F'
+ | '\u00A0'..'\u00FF' /* Latin-1 Supplement. */
+ | '\u0100'..'\u017F' /* Latin Extended-A. */
+ | '\u0180'..'\u024F' /* Latin Extended-B. */
+ | '\u1E02'..'\u1EF3' /* Latin Extended Additional. */
+ | '\u0259'..'\u0292' /* IPA Extensions. */
+ | '\u02B0'..'\u02FF' /* Spacing modifier letters. */
| '\u203F'..'\u2040'
| '\u00C0'..'\u00D6'
| '\u00D8'..'\u00F6'
| '\u00F8'..'\u02FF'
- | '\u0370'..'\u037D'
+ | '\u0370'..'\u03FF' /* Greek and Coptic. */
+ | '\u1F01'..'\u1FFF' /* Greek Extended. */
+ | '\u0400'..'\u04FF' /* Cyrillic. */
| '\u037F'..'\u1FFF'
| '\u200C'..'\u200D'
| '\u2070'..'\u218F'
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.interp
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.interp
index 184581f..48cd14d 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.interp
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.interp
@@ -50,4 +50,4 @@ mode names:
DEFAULT_MODE
atn:
-[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 2, 13, 78, 8, 1,
4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 4, 8,
9, 8, 4, 9, 9, 9, 4, 10, 9, 10, 4, 11, 9, 11, 4, 12, 9, 12, 4, 13, 9, 13, 4,
14, 9, 14, 4, 15, 9, 15, 3, 2, 3, 2, 3, 3, 3, 3, 3, 4, 3, 4, 3, 5, 3, 5, 3, 6,
3, 6, 3, 7, 3, 7, 3, 8, 3, 8, 3, 9, 3, 9, 3, 10, 3, 10, 3, 10, 3, 11, 5, 11,
52, 10, 11, 3, 12, 3, 12, 3, 12, 7, 12, 57, 10, 12, 12, 12, 14, 12, 60, 11, 12,
5, 12, 62, 10, 12, 3, 13 [...]
\ No newline at end of file
+[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 2, 13, 78, 8, 1,
4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 4, 8,
9, 8, 4, 9, 9, 9, 4, 10, 9, 10, 4, 11, 9, 11, 4, 12, 9, 12, 4, 13, 9, 13, 4,
14, 9, 14, 4, 15, 9, 15, 3, 2, 3, 2, 3, 3, 3, 3, 3, 4, 3, 4, 3, 5, 3, 5, 3, 6,
3, 6, 3, 7, 3, 7, 3, 8, 3, 8, 3, 9, 3, 9, 3, 10, 3, 10, 3, 10, 3, 11, 5, 11,
52, 10, 11, 3, 12, 3, 12, 3, 12, 7, 12, 57, 10, 12, 12, 12, 14, 12, 60, 11, 12,
5, 12, 62, 10, 12, 3, 13 [...]
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.java
index 889771c..6d8b92f 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.java
@@ -113,23 +113,22 @@ public class NCMacroDslLexer extends Lexer {
"\f\3\f\7\f9\n\f\f\f\16\f<\13\f\5\f>\n\f\3\r\3\r\6\rB\n\r\r\r\16\rC\3\16"+
"\6\16G\n\16\r\16\16\16H\3\16\3\16\3\17\3\17\2\2\20\3\3\5\4\7\5\t\6\13"+
"\7\r\b\17\t\21\2\23\2\25\2\27\n\31\13\33\f\35\r\3\2\7\b\2..>>@@]_aa}\177"+
-
"\27\2##%(*-/@B\\^^``b|\u0080\u0080\u00b9\u00b9\u00c2\u00d8\u00da\u00f8"+
-
"\u00fa\u037f\u0381\u2001\u200e\u200f\u2041\u2042\u2072\u2191\u2c02\u2ff1"+
-
"\u3003\ud801\uf902\ufdd1\ufdf2\uffff\3\2\63;\4\2\62;aa\5\2\13\f\16\17"+
-
"\"\"\2O\2\3\3\2\2\2\2\5\3\2\2\2\2\7\3\2\2\2\2\t\3\2\2\2\2\13\3\2\2\2\2"+
-
"\r\3\2\2\2\2\17\3\2\2\2\2\27\3\2\2\2\2\31\3\2\2\2\2\33\3\2\2\2\2\35\3"+
-
"\2\2\2\3\37\3\2\2\2\5!\3\2\2\2\7#\3\2\2\2\t%\3\2\2\2\13\'\3\2\2\2\r)\3"+
-
"\2\2\2\17+\3\2\2\2\21-\3\2\2\2\23/\3\2\2\2\25\63\3\2\2\2\27=\3\2\2\2\31"+
- "A\3\2\2\2\33F\3\2\2\2\35L\3\2\2\2\37 \7}\2\2
\4\3\2\2\2!\"\7\177\2\2\""+
-
"\6\3\2\2\2#$\7]\2\2$\b\3\2\2\2%&\7_\2\2&\n\3\2\2\2\'(\7~\2\2(\f\3\2\2"+
-
"\2)*\7.\2\2*\16\3\2\2\2+,\7a\2\2,\20\3\2\2\2-.\t\2\2\2.\22\3\2\2\2/\60"+
-
"\7^\2\2\60\61\5\21\t\2\61\24\3\2\2\2\62\64\t\3\2\2\63\62\3\2\2\2\64\26"+
-
"\3\2\2\2\65>\7\62\2\2\66:\t\4\2\2\679\t\5\2\28\67\3\2\2\29<\3\2\2\2:8"+
-
"\3\2\2\2:;\3\2\2\2;>\3\2\2\2<:\3\2\2\2=\65\3\2\2\2=\66\3\2\2\2>\30\3\2"+
-
"\2\2?B\5\25\13\2@B\5\23\n\2A?\3\2\2\2A@\3\2\2\2BC\3\2\2\2CA\3\2\2\2CD"+
-
"\3\2\2\2D\32\3\2\2\2EG\t\6\2\2FE\3\2\2\2GH\3\2\2\2HF\3\2\2\2HI\3\2\2\2"+
-
"IJ\3\2\2\2JK\b\16\2\2K\34\3\2\2\2LM\13\2\2\2M\36\3\2\2\2\t\2\63:=ACH\3"+
- "\b\2\2";
+
"\21\2#-/@B\\^^``b|\u0080\u0080\u00a2\u2001\u200e\u200f\u2041\u2042\u2072"+
+
"\u2191\u2c02\u2ff1\u3003\ud801\uf902\ufdd1\ufdf2\uffff\3\2\63;\4\2\62"+
+
";aa\5\2\13\f\16\17\"\"\2O\2\3\3\2\2\2\2\5\3\2\2\2\2\7\3\2\2\2\2\t\3\2"+
+
"\2\2\2\13\3\2\2\2\2\r\3\2\2\2\2\17\3\2\2\2\2\27\3\2\2\2\2\31\3\2\2\2\2"+
+
"\33\3\2\2\2\2\35\3\2\2\2\3\37\3\2\2\2\5!\3\2\2\2\7#\3\2\2\2\t%\3\2\2\2"+
+
"\13\'\3\2\2\2\r)\3\2\2\2\17+\3\2\2\2\21-\3\2\2\2\23/\3\2\2\2\25\63\3\2"+
+ "\2\2\27=\3\2\2\2\31A\3\2\2\2\33F\3\2\2\2\35L\3\2\2\2\37
\7}\2\2 \4\3\2"+
+
"\2\2!\"\7\177\2\2\"\6\3\2\2\2#$\7]\2\2$\b\3\2\2\2%&\7_\2\2&\n\3\2\2\2"+
+
"\'(\7~\2\2(\f\3\2\2\2)*\7.\2\2*\16\3\2\2\2+,\7a\2\2,\20\3\2\2\2-.\t\2"+
+
"\2\2.\22\3\2\2\2/\60\7^\2\2\60\61\5\21\t\2\61\24\3\2\2\2\62\64\t\3\2\2"+
+
"\63\62\3\2\2\2\64\26\3\2\2\2\65>\7\62\2\2\66:\t\4\2\2\679\t\5\2\28\67"+
+
"\3\2\2\29<\3\2\2\2:8\3\2\2\2:;\3\2\2\2;>\3\2\2\2<:\3\2\2\2=\65\3\2\2\2"+
+
"=\66\3\2\2\2>\30\3\2\2\2?B\5\25\13\2@B\5\23\n\2A?\3\2\2\2A@\3\2\2\2BC"+
+
"\3\2\2\2CA\3\2\2\2CD\3\2\2\2D\32\3\2\2\2EG\t\6\2\2FE\3\2\2\2GH\3\2\2\2"+
+
"HF\3\2\2\2HI\3\2\2\2IJ\3\2\2\2JK\b\16\2\2K\34\3\2\2\2LM\13\2\2\2M\36\3"+
+ "\2\2\2\t\2\63:=ACH\3\b\2\2";
public static final ATN _ATN =
new ATNDeserializer().deserialize(_serializedATN.toCharArray());
static {
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
index d0ed457..9d3c93c 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
@@ -48,13 +48,13 @@ object NCLimitEnricher extends NCProbeEnricher {
// - digits (like `25`),
// - word numbers (like `twenty two`) or
// - fuzzy numbers (like `few`).
- private final val CD = "[CD]"
+ private final val CD = "'CD'"
// Possible elements:
- // - Any macros,
- // - Special symbol CD (which designates obvious number or fuzzy number
word)
+ // - Any macros.
+ // - Special symbol CD (which designates obvious number or fuzzy number
word).
// - Any simple word.
- // Note that `CD` is optional (DFLT_LIMIT will be used)
+ // Note that `CD` is optional (DFLT_LIMIT will be used).
private final val SYNONYMS = Seq(
s"<TOP_WORDS> {of|_} {$CD|_} {<POST_WORDS>|_}",
s"$CD of",
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/common/makro/NCMacroCompilerSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/common/makro/NCMacroCompilerSpec.scala
index 0df7a4c..4560faa 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/common/makro/NCMacroCompilerSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/common/makro/NCMacroCompilerSpec.scala
@@ -98,6 +98,8 @@ class NCMacroCompilerSpec {
checkEq("""a {/abc.\{\}*/ |/d/} c""", Seq("""a /abc.\{\}*/ c""",
"a /d/ c"))
checkEq("""a .{b\, |_}. c""", Seq("""a . b\, . c""", "a . . c"))
checkEq("a { {b|c}|_}.", Seq("a .", "a b .", "a c ."))
+ checkEq("°", Seq("°"))
+
}
@Test