[incubator-nlpcraft] branch NLPCRAFT-504 updated: Synonyms processing bugfix.

sergeykamov Mon, 04 Jul 2022 01:34:55 -0700

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-504
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/NLPCRAFT-504 by this push:
     new 47df111c Synonyms processing bugfix.
47df111c is described below

commit 47df111c0cf1221c3155da47cf9a1d4d1fd67e84
Author: Sergey Kamov <[email protected]>
AuthorDate: Mon Jul 4 11:34:44 2022 +0300

    Synonyms processing bugfix.
---
 .../entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala  | 6 +++---
 .../nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala    | 7 ++++++-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
index 8aa82f12..5dda3214 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
@@ -150,7 +150,7 @@ private[semantic] object NCSemanticSynonymsProcessor 
extends LazyLogging:
         macroParser: NCMacroParser,
         elemId: String,
         syns: Set[String]
-    ): Seq[Seq[NCSemanticSynonymChunk]] =
+    ): List[List[NCSemanticSynonymChunk]] =
         case class RegexHolder(text: String, var used: Boolean = false):
             private def stripSuffix(fix: String, s: String): String = 
s.slice(fix.length, s.length - fix.length)
 
@@ -195,8 +195,8 @@ private[semantic] object NCSemanticSynonymsProcessor 
extends LazyLogging:
                                 regex.used = true
                                 Option(regex.mkChunk())
                         case None => Option(NCSemanticSynonymChunk(TEXT, 
tok.getText, stemmer.stem(tok.getText.toLowerCase)))
-                ).toSeq
-            }).toSeq
+                ).toList
+            }).toList.filter(_.nonEmpty)
 
     /**
       *
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
index ded8618a..3f35e7d1 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
@@ -49,7 +49,9 @@ class NCSemanticEntityParserSpec:
                 // Elements data.
                 E("t6", props = Map("testKey" -> "testValue")),
                 // Regex.
-                E("t7", synonyms = Set("x //[a-d]+//"))
+                E("t7", synonyms = Set("x //[a-d]+//")),
+                // Empty synonyms.
+                E("t8", synonyms = Set("{A|_} {B|_}"))
             )
         )
 
@@ -119,5 +121,8 @@ class NCSemanticEntityParserSpec:
         check("value the 5", "t5", value = Option("value5")) // With stopword 
inside.
         check("t6", "t6", elemData = Option(Map("testKey" -> "testValue")))
         check("the x abc x abe", "t7") // `x abc` should be matched, `x abe` 
shouldn't.
+        check("A B", "t8")
+        check("A", "t8")
+        check("B", "t8")
 
         checkMultiple("t1 the x abc the x the abc", "t1", "t7", "t7")
\ No newline at end of file

[incubator-nlpcraft] branch NLPCRAFT-504 updated: Synonyms processing bugfix.

Reply via email to