This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-472
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push:
new c338a38 WIP.
c338a38 is described below
commit c338a3896fd2078530421eefe924757993858bc9
Author: Sergey Kamov <[email protected]>
AuthorDate: Thu Dec 30 16:28:46 2021 +0300
WIP.
---
.../semantic/impl/NCSemanticEntityParserImpl.scala | 2 +-
.../semantic/impl/NCSemanticSynonymsProcessor.scala | 19 +++++++++++++++++--
2 files changed, 18 insertions(+), 3 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
index 665f0a7..5f355e7 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
@@ -34,7 +34,7 @@ import scala.jdk.CollectionConverters.*
object NCSemanticEntityParserImpl:
def apply(stemmer: NCSemanticTextStemmer, macros: Jmap[String, String],
elems: JList[NCSemanticElement]): NCSemanticEntityParserImpl =
require(stemmer != null)
- require(macros != null)
+ require(elems != null)
new NCSemanticEntityParserImpl(
stemmer,
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
index 3374d8d..e296472 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
@@ -24,6 +24,7 @@ import org.apache.nlpcraft.internal.makro.NCMacroParser
import org.apache.nlpcraft.nlp.entity.parser.semantic.*
import
org.apache.nlpcraft.nlp.entity.parser.semantic.impl.NCSemanticChunkKind.*
+import com.typesafe.scalalogging.LazyLogging
import java.io.InputStream
import java.util
import java.util.List as JList
@@ -39,12 +40,26 @@ private[impl] case class NCSemanticSynonymsHolder(
/**
*
*/
-private[impl] object NCSemanticSynonymsProcessor:
+private[impl] object NCSemanticSynonymsProcessor extends LazyLogging:
private final val SUSP_SYNS_CHARS = Seq("?", "*", "+")
private final val REGEX_FIX = "//"
+ // TODO: extend.
private def validate(macros: Map[String, String], elements:
Seq[NCSemanticElement]): Unit =
- () // TODO:
+ if (elements == null || elements.isEmpty)
+ throw new NCException("Elements cannot be empty") // TODO:
+
+ for (e <- elements)
+ if (e.getSynonyms != null)
+ val susp = e.getSynonyms.asScala.filter(syn =>
!syn.contains("//") && SUSP_SYNS_CHARS.exists(susp => syn.contains(susp)))
+
+ if susp.nonEmpty then
+ logger.warn(
+ s"Suspicious synonyms detected (use of
${SUSP_SYNS_CHARS.map(s => s"'$s'").mkString(", ")} chars) [" +
+ s"elementId=${e.getId}, " +
+ s"synonyms=[${susp.mkString(", ")}]" +
+ s"]"
+ )
private def startsAndEnds(fix: String, s: String): Boolean =
s.startsWith(fix) && s.endsWith(fix)
private def mkChunk(stemmer: NCSemanticTextStemmer, chunk: String):
NCSemanticSynonymChunk =