This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch NLPCRAFT-474
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-474 by this push:
new 4191d0d Create NCIDL.g4
4191d0d is described below
commit 4191d0d9867a0c4ea0bc0a227bbce90cf4c1e6a1
Author: Aaron Radzinski <[email protected]>
AuthorDate: Fri Jan 21 11:50:34 2022 -0800
Create NCIDL.g4
---
.../internal/intent/compiler/antlr4/NCIDL.g4 | 360 +++++++++++++++++++++
1 file changed, 360 insertions(+)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/compiler/antlr4/NCIDL.g4
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/compiler/antlr4/NCIDL.g4
new file mode 100644
index 0000000..f3bb151
--- /dev/null
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/compiler/antlr4/NCIDL.g4
@@ -0,0 +1,360 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+grammar NCIDL;
+
+// Parser.
+idl: idlDecls EOF; // Intent enty point.
+idlDecls
+ : idlDecl
+ | idlDecls idlDecl
+ ;
+idlDecl
+ : intent // Intent declaration.
+ | frag // Fragment declaration.
+ | imprt // External URL containing IDL declarations (recursive parsing).
+ ;
+imprt: 'import' LPAR qstring RPAR;
+frag: fragId termDecls;
+fragId: FRAG ASSIGN id;
+fragRef: FRAG LPAR id fragMeta? RPAR;
+fragMeta: COMMA jsonObj;
+intent: intentId optDecl? flowDecl? metaDecl? termDecls;
+intentId: 'intent' ASSIGN id;
+mtdDecl: DIV mtdRef DIV;
+flowDecl: 'flow' ASSIGN (qstring | mtdDecl);
+metaDecl: 'meta' ASSIGN jsonObj;
+optDecl: 'options' ASSIGN jsonObj;
+jsonObj
+ : LBRACE jsonPair (COMMA jsonPair)* RBRACE
+ | LBRACE RBRACE
+ ;
+jsonPair: qstring COLON jsonVal;
+jsonVal
+ : qstring
+ | MINUS? INT REAL? EXP?
+ | jsonObj
+ | jsonArr
+ | BOOL
+ | NULL
+ ;
+jsonArr
+ : LBR jsonVal (COMMA jsonVal)* RBR
+ | LBR RBR
+ ;
+termDecls
+ : termDecl
+ | termDecls termDecl;
+termDecl
+ : term
+ | fragRef
+ ;
+termEq
+ : ASSIGN // Do not use conversation.
+ | TILDA // Use conversation.
+ ;
+term: 'term' termId? termEq ((LBRACE vars? expr RBRACE) | mtdDecl) minMax?;
+mtdRef: javaFqn? POUND id;
+javaFqn
+ : javaClass
+ | javaFqn DOT javaClass
+ ;
+javaClass
+ : id
+ // We need to include keywords to make sure they don't conflict.
+ | IMPORT
+ | INTENT
+ | OPTIONS
+ | FLOW
+ | META
+ | TERM
+ | FRAG
+ ;
+termId: LPAR id RPAR;
+expr
+ // NOTE: order of productions defines precedence.
+ : op=(MINUS | NOT) expr # unaryExpr
+ | LPAR expr RPAR # parExpr
+ | expr op=(MULT | DIV | MOD) expr # multDivModExpr
+ | expr op=(PLUS | MINUS) expr # plusMinusExpr
+ | expr op=(LTEQ | GTEQ | LT | GT) expr # compExpr
+ | expr op=(EQ | NEQ) expr # eqNeqExpr
+ | expr op=(AND | OR) expr # andOrExpr
+ | atom # atomExpr
+ | (FUN_NAME | POUND) LPAR paramList? RPAR # callExpr
+ | (FUN_NAME | POUND) # callExpr
+ | AT id # varRef
+ ;
+vars
+ : varDecl
+ | vars varDecl
+ ;
+varDecl: AT id ASSIGN expr;
+paramList
+ : expr
+ | paramList COMMA expr
+ ;
+atom
+ : NULL
+ | INT REAL? EXP?
+ | BOOL
+ | qstring
+ ;
+qstring
+ : SQSTRING
+ | DQSTRING
+ ;
+minMax
+ : minMaxShortcut
+ | minMaxRange
+ ;
+minMaxShortcut
+ : PLUS
+ | QUESTION
+ | MULT
+ ;
+minMaxRange: LBR INT COMMA INT RBR;
+id
+ : ID
+ | FUN_NAME // Function name can overlap with ID so we detect both.
+ ;
+
+// Lexer.
+FUN_NAME
+ : 'meta_ent'
+ | 'meta_cfg'
+ | 'meta_intent'
+ | 'meta_req'
+ | 'meta_sys'
+ | 'meta_conv'
+ | 'meta_frag'
+ | 'json'
+ | 'if'
+ | 'tok_id'
+ | 'tok_lemma'
+ | 'tok_stem'
+ | 'tok_txt'
+ | 'tok_norm_txt'
+ | 'tok_req_id'
+ | 'tok_pos'
+ | 'tok_sparsity'
+ | 'tok_unid'
+ | 'tok_is_abstract'
+ | 'tok_is_bracketed'
+ | 'tok_is_direct'
+ | 'tok_is_permutated'
+ | 'tok_is_english'
+ | 'tok_is_freeword'
+ | 'tok_is_quoted'
+ | 'tok_is_stopword'
+ | 'tok_is_swear'
+ | 'tok_is_user'
+ | 'tok_is_wordnet'
+ | 'tok_index'
+ | 'tok_is_first'
+ | 'tok_is_last'
+ | 'tok_is_between_ids'
+ | 'tok_is_between_groups'
+ | 'tok_is_between_parents'
+ | 'tok_is_before_id'
+ | 'tok_is_before_group'
+ | 'tok_is_before_parent'
+ | 'tok_is_after_id'
+ | 'tok_is_after_group'
+ | 'tok_is_after_parent'
+ | 'tok_ancestors'
+ | 'tok_parent'
+ | 'tok_groups'
+ | 'tok_value'
+ | 'tok_aliases'
+ | 'tok_start_idx'
+ | 'tok_end_idx'
+ | 'tok_this'
+ | 'tok_find_part'
+ | 'tok_has_part'
+ | 'tok_find_parts'
+ | 'tok_count'
+ | 'tok_all'
+ | 'tok_all_for_id'
+ | 'tok_all_for_parent'
+ | 'tok_all_for_group'
+ | 'req_id'
+ | 'req_normtext'
+ | 'req_tstamp'
+ | 'req_addr'
+ | 'req_agent'
+ | 'user_id'
+ | 'user_fname'
+ | 'user_lname'
+ | 'user_email'
+ | 'user_admin'
+ | 'user_signup_tstamp'
+ | 'comp_id'
+ | 'comp_name'
+ | 'comp_website'
+ | 'comp_country'
+ | 'comp_region'
+ | 'comp_city'
+ | 'comp_addr'
+ | 'comp_postcode'
+ | 'trim'
+ | 'regex'
+ | 'strip'
+ | 'uppercase'
+ | 'lowercase'
+ | 'is_alpha'
+ | 'is_alphanum'
+ | 'is_whitespace'
+ | 'is_num'
+ | 'is_numspace'
+ | 'is_alphaspace'
+ | 'is_alphanumspace'
+ | 'split'
+ | 'split_trim'
+ | 'starts_with'
+ | 'ends_with'
+ | 'index_of'
+ | 'contains'
+ | 'substr'
+ | 'replace'
+ | 'abs'
+ | 'ceil'
+ | 'floor'
+ | 'rint'
+ | 'round'
+ | 'signum'
+ | 'sqrt'
+ | 'cbrt'
+ | 'pi'
+ | 'to_double'
+ | 'to_int'
+ | 'euler'
+ | 'acos'
+ | 'asin'
+ | 'atan'
+ | 'cos'
+ | 'sin'
+ | 'tan'
+ | 'cosh'
+ | 'sinh'
+ | 'tanh'
+ | 'atan2'
+ | 'degrees'
+ | 'radians'
+ | 'exp'
+ | 'expm1'
+ | 'hypot'
+ | 'log'
+ | 'log10'
+ | 'log1p'
+ | 'pow'
+ | 'rand'
+ | 'square'
+ | 'list'
+ | 'get'
+ | 'has'
+ | 'has_any'
+ | 'has_all'
+ | 'first'
+ | 'last'
+ | 'keys'
+ | 'values'
+ | 'length'
+ | 'count'
+ | 'size'
+ | 'sort'
+ | 'reverse'
+ | 'is_empty'
+ | 'non_empty'
+ | 'distinct'
+ | 'concat'
+ | 'to_string'
+ | 'max'
+ | 'min'
+ | 'avg'
+ | 'stdev'
+ | 'year'
+ | 'month'
+ | 'day_of_month'
+ | 'day_of_week'
+ | 'day_of_year'
+ | 'hour'
+ | 'minute'
+ | 'second'
+ | 'week_of_month'
+ | 'week_of_year'
+ | 'quarter'
+ | 'now'
+ | 'or_else'
+ ;
+
+IMPORT : 'import' ;
+INTENT : 'intent' ;
+OPTIONS : 'options' ;
+FLOW : 'flow' ;
+META : 'meta' ;
+TERM : 'term' ;
+FRAG: 'fragment'; // To resolve ambiguity with ANTLR4 keyword.
+SQSTRING: SQUOTE ((~'\'') | ('\\''\''))* SQUOTE; // Allow for \' (escaped
single quote) in the string.
+DQSTRING: DQUOTE ((~'"') | ('\\''"'))* DQUOTE; // Allow for \" (escape double
quote) in the string.
+BOOL: 'true' | 'false';
+NULL: 'null';
+EQ: '==';
+NEQ: '!=';
+GTEQ: '>=';
+LTEQ: '<=';
+GT: '>';
+LT: '<';
+AND: '&&';
+OR: '||';
+VERT: '|';
+NOT: '!';
+LPAR: '(';
+RPAR: ')';
+LBRACE: '{';
+RBRACE: '}';
+SQUOTE: '\'';
+DQUOTE: '"';
+TILDA: '~';
+LBR: '[';
+RBR: ']';
+POUND: '#';
+COMMA: ',';
+COLON: ':';
+MINUS: '-';
+DOT: '.';
+UNDERSCORE: '_';
+ASSIGN: '=';
+PLUS: '+';
+QUESTION: '?';
+MULT: '*';
+DIV: '/';
+MOD: '%';
+AT: '@';
+DOLLAR: '$';
+INT: '0' | [1-9] [_0-9]*;
+REAL: DOT [0-9]+;
+EXP: [Ee] [+\-]? INT;
+fragment UNI_CHAR // International chars.
+ : ~[\u0000-\u007F\uD800-\uDBFF] // Covers all characters above 0x7F which
are not a surrogate.
+ | [\uD800-\uDBFF] [\uDC00-\uDFFF] // Covers UTF-16 surrogate pairs
encodings for U+10000 to U+10FFFF.
+ ;
+fragment LETTER: [a-zA-Z];
+ID:
(UNI_CHAR|UNDERSCORE|LETTER|DOLLAR)+(UNI_CHAR|DOLLAR|LETTER|[0-9]|COLON|MINUS|UNDERSCORE)*;
+COMMENT : ('//' ~[\r\n]* '\r'? ('\n'| EOF) | '/*' .*? '*/' ) -> skip;
+WS: [ \r\t\u000C\n]+ -> skip;
+ErrorChar: .;