This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-504
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-504 by this push:
new 47df111c Synonyms processing bugfix.
47df111c is described below
commit 47df111c0cf1221c3155da47cf9a1d4d1fd67e84
Author: Sergey Kamov <[email protected]>
AuthorDate: Mon Jul 4 11:34:44 2022 +0300
Synonyms processing bugfix.
---
.../entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala | 6 +++---
.../nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala | 7 ++++++-
2 files changed, 9 insertions(+), 4 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
index 8aa82f12..5dda3214 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
@@ -150,7 +150,7 @@ private[semantic] object NCSemanticSynonymsProcessor
extends LazyLogging:
macroParser: NCMacroParser,
elemId: String,
syns: Set[String]
- ): Seq[Seq[NCSemanticSynonymChunk]] =
+ ): List[List[NCSemanticSynonymChunk]] =
case class RegexHolder(text: String, var used: Boolean = false):
private def stripSuffix(fix: String, s: String): String =
s.slice(fix.length, s.length - fix.length)
@@ -195,8 +195,8 @@ private[semantic] object NCSemanticSynonymsProcessor
extends LazyLogging:
regex.used = true
Option(regex.mkChunk())
case None => Option(NCSemanticSynonymChunk(TEXT,
tok.getText, stemmer.stem(tok.getText.toLowerCase)))
- ).toSeq
- }).toSeq
+ ).toList
+ }).toList.filter(_.nonEmpty)
/**
*
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
index ded8618a..3f35e7d1 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
@@ -49,7 +49,9 @@ class NCSemanticEntityParserSpec:
// Elements data.
E("t6", props = Map("testKey" -> "testValue")),
// Regex.
- E("t7", synonyms = Set("x //[a-d]+//"))
+ E("t7", synonyms = Set("x //[a-d]+//")),
+ // Empty synonyms.
+ E("t8", synonyms = Set("{A|_} {B|_}"))
)
)
@@ -119,5 +121,8 @@ class NCSemanticEntityParserSpec:
check("value the 5", "t5", value = Option("value5")) // With stopword
inside.
check("t6", "t6", elemData = Option(Map("testKey" -> "testValue")))
check("the x abc x abe", "t7") // `x abc` should be matched, `x abe`
shouldn't.
+ check("A B", "t8")
+ check("A", "t8")
+ check("B", "t8")
checkMultiple("t1 the x abc the x the abc", "t1", "t7", "t7")
\ No newline at end of file