[incubator-nlpcraft] branch NLPCRAFT-472 updated: WIP.

sergeykamov Wed, 29 Dec 2021 12:33:13 -0800

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-472
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push:
     new 40e7ced  WIP.
40e7ced is described below

commit 40e7ced750349c44ac2052d129113b038f19ad1f
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Dec 29 23:32:56 2021 +0300

    WIP.
---
 .../nlp/entity/parser/semantic/impl/NCSynonym.scala    | 18 ++++++++++--------
 .../entity/parser/semantic/impl/NCSynonymChunk.scala   |  2 ++
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSynonym.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSynonym.scala
index fa3b36e..0fe259b 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSynonym.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSynonym.scala
@@ -36,14 +36,16 @@ case class NCSynonym(
             if isText then
                 stem == toks.map(_.getStem).mkString(" ")
             else
-                chunks.zip(toks).forall { case (chunk, tok) =>
-                    if chunk.stem != null then
-                        chunk.stem == tok.getStem
-                    else
-                        def match0(tokTxt: String) = 
chunk.regex.matcher(tokTxt).matches()
-
-                        match0(tok.getText) || match0(tok.getText.toLowerCase)
-                }
+                chunks.zip(toks).
+                    sortBy { case (chunk, _) => if chunk.isText then 0 else 1 
}.
+                    forall { (chunk, tok) =>
+                        if chunk.isText then
+                            chunk.stem == tok.getStem
+                        else
+                            def match0(txt: String) = 
chunk.regex.matcher(txt).matches()
+
+                            match0(tok.getText) || 
match0(tok.getText.toLowerCase)
+                    }
         )
 
     override def compareTo(o: NCSynonym): Int = Integer.compare(regexCount, 
o.regexCount)
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSynonymChunk.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSynonymChunk.scala
index 9ac33ed..fc782c9 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSynonymChunk.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSynonymChunk.scala
@@ -39,5 +39,7 @@ case class NCSynonymChunk(
     require(text != null)
     require(kind != null)
 
+    val isText: Boolean = text != null
+
     override def toString = s"($text|$kind)"
 }

[incubator-nlpcraft] branch NLPCRAFT-472 updated: WIP.

Reply via email to