This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-472
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push:
new 1032b87 WIP.
1032b87 is described below
commit 1032b8786d4b2f64f4c9ce8261bbd47248f55182
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Dec 29 22:26:15 2021 +0300
WIP.
---
.../semantic/impl/NCSemanticEntityParserImpl.scala | 24 +++++++++++-----------
.../org/apache/nlpcraft/nlp/util/NCTestUtils.scala | 2 +-
2 files changed, 13 insertions(+), 13 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
index 841e6f3..d05c85e 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
@@ -17,19 +17,18 @@
package org.apache.nlpcraft.nlp.entity.parser.semantic.impl
-import java.util.List as JList
-import java.util.Map as Jmap
-import java.io.File
-import org.apache.nlpcraft.nlp.entity.parser.semantic.{NCSemanticElement, *}
+import com.typesafe.scalalogging.LazyLogging
import org.apache.nlpcraft.*
import org.apache.nlpcraft.internal.makro.NCMacroParser
import org.apache.nlpcraft.internal.util.NCUtils
+import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.NCSynonymChunkKind.*
+import org.apache.nlpcraft.nlp.entity.parser.semantic.*
-import java.util.regex.{Pattern, PatternSyntaxException}
+import java.io.File
+import java.util.{List as JList, Map as Jmap}
+import java.util.regex.*
import scala.collection.mutable
import scala.jdk.CollectionConverters.*
-import NCSynonymChunkKind.*
-import com.typesafe.scalalogging.LazyLogging
object NCSemanticEntityParserImpl:
def apply(stemmer: NCStemmer, macros: Jmap[String, String], elems:
JList[NCSemanticElement]): NCSemanticEntityParserImpl =
@@ -110,7 +109,7 @@ object NCSemanticEntityParserImpl:
Piece(combo, stops4Delete.map(del => combo.filter(t =>
!del.contains(t))).filter(_.nonEmpty))
})
-import NCSemanticEntityParserImpl._
+import
org.apache.nlpcraft.nlp.entity.parser.semantic.impl.NCSemanticEntityParserImpl.*
class NCSemanticEntityParserImpl(stemmer: NCStemmer, macros: Map[String,
String], elements: Seq[NCSemanticElement]) extends NCEntityParser with
LazyLogging:
private var sortedSyns: Map[Int, Map[String, Seq[NCSynonym]]] = _
@@ -138,13 +137,13 @@ class NCSemanticEntityParserImpl(stemmer: NCStemmer,
macros: Map[String, String]
s"]"
)
- // TODO: parsing
+ // TODO: NCSynonym + trim for lines etc
all += Holder(
e.getId,
syns.
flatMap(p.expand).
- map(_.split(" ").map(p => mkChunk(stemmer,
p)).toIndexedSeq).toSeq.
- map(chunks => NCSynonym(false, false, false, null,
chunks))
+ map(t => cfg.getTokenizer.tokenize(cfg,
t).asScala.map(w => mkChunk(stemmer, w.getText)).toSeq).
+ toSeq.map(chunks => NCSynonym(false, false, false,
null, chunks))
)
})
@@ -177,7 +176,8 @@ class NCSemanticEntityParserImpl(stemmer: NCStemmer,
macros: Map[String, String]
override def getRequestId: String =
req.getRequestId
override def getId: String = elemId
- for (piece <- getPieces(toks.asScala.toSeq); extra <- piece.extra)
+ val value = getPieces(toks.asScala.toSeq)
+ for (piece <- value; extra <- Seq(piece.main) ++ piece.extra)
tryMatch(piece.main, extra)
entities.toSeq.asJava
diff --git
a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
index 526d650..609479c 100644
--- a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
+++ b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
@@ -80,6 +80,6 @@ object NCTestUtils:
val t = make
val started = now()
- t.start(null) // TODO: fix it.
+ t.start(NCTestConfig.EN_MDL_CFG)
println(s"'${t.getClass.getSimpleName}' created in ${started -
start}ms and started in ${now() - started}ms.")
t
\ No newline at end of file