This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-472
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push:
new 40e7ced WIP.
40e7ced is described below
commit 40e7ced750349c44ac2052d129113b038f19ad1f
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Dec 29 23:32:56 2021 +0300
WIP.
---
.../nlp/entity/parser/semantic/impl/NCSynonym.scala | 18 ++++++++++--------
.../entity/parser/semantic/impl/NCSynonymChunk.scala | 2 ++
2 files changed, 12 insertions(+), 8 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSynonym.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSynonym.scala
index fa3b36e..0fe259b 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSynonym.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSynonym.scala
@@ -36,14 +36,16 @@ case class NCSynonym(
if isText then
stem == toks.map(_.getStem).mkString(" ")
else
- chunks.zip(toks).forall { case (chunk, tok) =>
- if chunk.stem != null then
- chunk.stem == tok.getStem
- else
- def match0(tokTxt: String) =
chunk.regex.matcher(tokTxt).matches()
-
- match0(tok.getText) || match0(tok.getText.toLowerCase)
- }
+ chunks.zip(toks).
+ sortBy { case (chunk, _) => if chunk.isText then 0 else 1
}.
+ forall { (chunk, tok) =>
+ if chunk.isText then
+ chunk.stem == tok.getStem
+ else
+ def match0(txt: String) =
chunk.regex.matcher(txt).matches()
+
+ match0(tok.getText) ||
match0(tok.getText.toLowerCase)
+ }
)
override def compareTo(o: NCSynonym): Int = Integer.compare(regexCount,
o.regexCount)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSynonymChunk.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSynonymChunk.scala
index 9ac33ed..fc782c9 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSynonymChunk.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSynonymChunk.scala
@@ -39,5 +39,7 @@ case class NCSynonymChunk(
require(text != null)
require(kind != null)
+ val isText: Boolean = text != null
+
override def toString = s"($text|$kind)"
}