This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch NLPCRAFT-474
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-474 by this push:
     new 4191d0d  Create NCIDL.g4
4191d0d is described below

commit 4191d0d9867a0c4ea0bc0a227bbce90cf4c1e6a1
Author: Aaron Radzinski <[email protected]>
AuthorDate: Fri Jan 21 11:50:34 2022 -0800

    Create NCIDL.g4
---
 .../internal/intent/compiler/antlr4/NCIDL.g4       | 360 +++++++++++++++++++++
 1 file changed, 360 insertions(+)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/compiler/antlr4/NCIDL.g4
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/compiler/antlr4/NCIDL.g4
new file mode 100644
index 0000000..f3bb151
--- /dev/null
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/compiler/antlr4/NCIDL.g4
@@ -0,0 +1,360 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+grammar NCIDL;
+
+// Parser.
+idl: idlDecls EOF; // Intent enty point.
+idlDecls
+    : idlDecl
+    | idlDecls idlDecl
+    ;
+idlDecl
+    : intent // Intent declaration.
+    | frag // Fragment declaration.
+    | imprt // External URL containing IDL declarations (recursive parsing).
+    ;
+imprt: 'import' LPAR qstring RPAR;
+frag: fragId termDecls;
+fragId: FRAG ASSIGN id;
+fragRef: FRAG LPAR id fragMeta? RPAR;
+fragMeta: COMMA jsonObj;
+intent: intentId optDecl? flowDecl? metaDecl? termDecls;
+intentId: 'intent' ASSIGN id;
+mtdDecl: DIV mtdRef DIV;
+flowDecl: 'flow' ASSIGN (qstring | mtdDecl);
+metaDecl: 'meta' ASSIGN jsonObj;
+optDecl: 'options' ASSIGN jsonObj;
+jsonObj
+    : LBRACE jsonPair (COMMA jsonPair)* RBRACE
+    | LBRACE RBRACE
+    ;
+jsonPair: qstring COLON jsonVal;
+jsonVal
+    : qstring
+    | MINUS? INT REAL? EXP?
+    | jsonObj
+    | jsonArr
+    | BOOL
+    | NULL
+    ;
+jsonArr
+    : LBR jsonVal (COMMA jsonVal)* RBR
+    | LBR RBR
+    ;
+termDecls
+    : termDecl
+    | termDecls termDecl;
+termDecl
+    : term
+    | fragRef
+    ;
+termEq
+    : ASSIGN // Do not use conversation.
+    | TILDA // Use conversation.
+    ;
+term: 'term' termId? termEq ((LBRACE vars? expr RBRACE) | mtdDecl) minMax?;
+mtdRef: javaFqn? POUND id;
+javaFqn
+    : javaClass
+    | javaFqn DOT javaClass
+    ;
+javaClass
+    : id
+    // We need to include keywords to make sure they don't conflict.
+    | IMPORT
+    | INTENT
+    | OPTIONS
+    | FLOW
+    | META
+    | TERM
+    | FRAG
+    ;
+termId: LPAR id RPAR;
+expr
+    // NOTE: order of productions defines precedence.
+    : op=(MINUS | NOT) expr # unaryExpr
+    | LPAR expr RPAR # parExpr
+    | expr op=(MULT | DIV | MOD) expr # multDivModExpr
+    | expr op=(PLUS | MINUS) expr # plusMinusExpr
+    | expr op=(LTEQ | GTEQ | LT | GT) expr # compExpr
+    | expr op=(EQ | NEQ) expr # eqNeqExpr
+    | expr op=(AND | OR) expr # andOrExpr
+    | atom # atomExpr
+    | (FUN_NAME | POUND) LPAR paramList? RPAR # callExpr
+    | (FUN_NAME | POUND) # callExpr
+    | AT id # varRef
+    ;
+vars
+    : varDecl
+    | vars varDecl
+    ;
+varDecl: AT id ASSIGN expr;
+paramList
+    : expr
+    | paramList COMMA expr
+    ;
+atom
+    : NULL
+    | INT REAL? EXP?
+    | BOOL
+    | qstring
+    ;
+qstring
+    : SQSTRING
+    | DQSTRING
+    ;
+minMax
+    : minMaxShortcut
+    | minMaxRange
+    ;
+minMaxShortcut
+    : PLUS
+    | QUESTION
+    | MULT
+    ;
+minMaxRange: LBR INT COMMA INT RBR;
+id
+    : ID
+    | FUN_NAME // Function name can overlap with ID so we detect both.
+    ;
+
+// Lexer.
+FUN_NAME
+    : 'meta_ent'
+    | 'meta_cfg'
+    | 'meta_intent'
+    | 'meta_req'
+    | 'meta_sys'
+    | 'meta_conv'
+    | 'meta_frag'
+    | 'json'
+    | 'if'
+    | 'tok_id'
+    | 'tok_lemma'
+    | 'tok_stem'
+    | 'tok_txt'
+    | 'tok_norm_txt'
+    | 'tok_req_id'
+    | 'tok_pos'
+    | 'tok_sparsity'
+    | 'tok_unid'
+    | 'tok_is_abstract'
+    | 'tok_is_bracketed'
+    | 'tok_is_direct'
+    | 'tok_is_permutated'
+    | 'tok_is_english'
+    | 'tok_is_freeword'
+    | 'tok_is_quoted'
+    | 'tok_is_stopword'
+    | 'tok_is_swear'
+    | 'tok_is_user'
+    | 'tok_is_wordnet'
+    | 'tok_index'
+    | 'tok_is_first'
+    | 'tok_is_last'
+    | 'tok_is_between_ids'
+    | 'tok_is_between_groups'
+    | 'tok_is_between_parents'
+    | 'tok_is_before_id'
+    | 'tok_is_before_group'
+    | 'tok_is_before_parent'
+    | 'tok_is_after_id'
+    | 'tok_is_after_group'
+    | 'tok_is_after_parent'
+    | 'tok_ancestors'
+    | 'tok_parent'
+    | 'tok_groups'
+    | 'tok_value'
+    | 'tok_aliases'
+    | 'tok_start_idx'
+    | 'tok_end_idx'
+    | 'tok_this'
+    | 'tok_find_part'
+    | 'tok_has_part'
+    | 'tok_find_parts'
+    | 'tok_count'
+    | 'tok_all'
+    | 'tok_all_for_id'
+    | 'tok_all_for_parent'
+    | 'tok_all_for_group'
+    | 'req_id'
+    | 'req_normtext'
+    | 'req_tstamp'
+    | 'req_addr'
+    | 'req_agent'
+    | 'user_id'
+    | 'user_fname'
+    | 'user_lname'
+    | 'user_email'
+    | 'user_admin'
+    | 'user_signup_tstamp'
+    | 'comp_id'
+    | 'comp_name'
+    | 'comp_website'
+    | 'comp_country'
+    | 'comp_region'
+    | 'comp_city'
+    | 'comp_addr'
+    | 'comp_postcode'
+    | 'trim'
+    | 'regex'
+    | 'strip'
+    | 'uppercase'
+    | 'lowercase'
+    | 'is_alpha'
+    | 'is_alphanum'
+    | 'is_whitespace'
+    | 'is_num'
+    | 'is_numspace'
+    | 'is_alphaspace'
+    | 'is_alphanumspace'
+    | 'split'
+    | 'split_trim'
+    | 'starts_with'
+    | 'ends_with'
+    | 'index_of'
+    | 'contains'
+    | 'substr'
+    | 'replace'
+    | 'abs'
+    | 'ceil'
+    | 'floor'
+    | 'rint'
+    | 'round'
+    | 'signum'
+    | 'sqrt'
+    | 'cbrt'
+    | 'pi'
+    | 'to_double'
+    | 'to_int'
+    | 'euler'
+    | 'acos'
+    | 'asin'
+    | 'atan'
+    | 'cos'
+    | 'sin'
+    | 'tan'
+    | 'cosh'
+    | 'sinh'
+    | 'tanh'
+    | 'atan2'
+    | 'degrees'
+    | 'radians'
+    | 'exp'
+    | 'expm1'
+    | 'hypot'
+    | 'log'
+    | 'log10'
+    | 'log1p'
+    | 'pow'
+    | 'rand'
+    | 'square'
+    | 'list'
+    | 'get'
+    | 'has'
+    | 'has_any'
+    | 'has_all'
+    | 'first'
+    | 'last'
+    | 'keys'
+    | 'values'
+    | 'length'
+    | 'count'
+    | 'size'
+    | 'sort'
+    | 'reverse'
+    | 'is_empty'
+    | 'non_empty'
+    | 'distinct'
+    | 'concat'
+    | 'to_string'
+    | 'max'
+    | 'min'
+    | 'avg'
+    | 'stdev'
+    | 'year'
+    | 'month'
+    | 'day_of_month'
+    | 'day_of_week'
+    | 'day_of_year'
+    | 'hour'
+    | 'minute'
+    | 'second'
+    | 'week_of_month'
+    | 'week_of_year'
+    | 'quarter'
+    | 'now'
+    | 'or_else'
+    ;
+
+IMPORT : 'import' ;
+INTENT : 'intent' ;
+OPTIONS : 'options' ;
+FLOW : 'flow' ;
+META : 'meta' ;
+TERM : 'term' ;
+FRAG: 'fragment'; // To resolve ambiguity with ANTLR4 keyword.
+SQSTRING: SQUOTE ((~'\'') | ('\\''\''))* SQUOTE; // Allow for \' (escaped 
single quote) in the string.
+DQSTRING: DQUOTE ((~'"') | ('\\''"'))* DQUOTE; // Allow for \" (escape double 
quote) in the string.
+BOOL: 'true' | 'false';
+NULL: 'null';
+EQ: '==';
+NEQ: '!=';
+GTEQ: '>=';
+LTEQ: '<=';
+GT: '>';
+LT: '<';
+AND: '&&';
+OR: '||';
+VERT: '|';
+NOT: '!';
+LPAR: '(';
+RPAR: ')';
+LBRACE: '{';
+RBRACE: '}';
+SQUOTE: '\'';
+DQUOTE: '"';
+TILDA: '~';
+LBR: '[';
+RBR: ']';
+POUND: '#';
+COMMA: ',';
+COLON: ':';
+MINUS: '-';
+DOT: '.';
+UNDERSCORE: '_';
+ASSIGN: '=';
+PLUS: '+';
+QUESTION: '?';
+MULT: '*';
+DIV: '/';
+MOD: '%';
+AT: '@';
+DOLLAR: '$';
+INT: '0' | [1-9] [_0-9]*;
+REAL: DOT [0-9]+;
+EXP: [Ee] [+\-]? INT;
+fragment UNI_CHAR // International chars.
+    : ~[\u0000-\u007F\uD800-\uDBFF] // Covers all characters above 0x7F which 
are not a surrogate.
+    | [\uD800-\uDBFF] [\uDC00-\uDFFF] // Covers UTF-16 surrogate pairs 
encodings for U+10000 to U+10FFFF.
+    ;
+fragment LETTER: [a-zA-Z];
+ID: 
(UNI_CHAR|UNDERSCORE|LETTER|DOLLAR)+(UNI_CHAR|DOLLAR|LETTER|[0-9]|COLON|MINUS|UNDERSCORE)*;
+COMMENT : ('//' ~[\r\n]* '\r'? ('\n'| EOF) | '/*' .*? '*/' ) -> skip;
+WS: [ \r\t\u000C\n]+ -> skip;
+ErrorChar: .;

Reply via email to