This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/master by this push:
     new 9d16b07  Stems lowecase usage fixed.
9d16b07 is described below

commit 9d16b07d1dfded1895992e8eb555a4c9d61b1f78
Author: Sergey Kamov <[email protected]>
AuthorDate: Thu Mar 17 12:11:57 2022 +0300

    Stems lowecase usage fixed.
---
 .../src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java    | 2 +-
 .../nlp/entity/parser/impl/semantic/NCSemanticEntityParserImpl.scala  | 4 ++--
 .../nlp/entity/parser/impl/semantic/NCSemanticSynonymsProcessor.scala | 4 ++--
 .../src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala     | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
index 991ddb9..2efb746 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
@@ -58,7 +58,7 @@ public class NCModelPipelineBuilder {
 
             @Override
             public synchronized String stem(String txt) {
-                return ps.stem(txt.toLowerCase()); // TODO:
+                return ps.stem(txt);
             }
         };
     }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticEntityParserImpl.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticEntityParserImpl.scala
index 3bbdc48..f960c9d 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticEntityParserImpl.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticEntityParserImpl.scala
@@ -211,7 +211,7 @@ class NCSemanticEntityParserImpl(
         val toks = toksList.asScala.toSeq
         if toks.exists(_.get[String]("stopword") == null) then 
warnMissedProperty("stopword")
 
-        val stems = toks.map(p => p -> stemmer.stem(p.getText)).toMap
+        val stems = toks.map(p => p -> 
stemmer.stem(p.getText.toLowerCase)).toMap
         val stems4Lemms =
             var ok = true
             val seq =
@@ -221,7 +221,7 @@ class NCSemanticEntityParserImpl(
                         t -> lemma
 
             if ok then
-                seq.toMap.map { (tok, lemma) => tok -> stemmer.stem(lemma) }
+                seq.toMap.map { (tok, lemma) => tok -> 
stemmer.stem(lemma.toLowerCase) }
             else
                 warnMissedProperty("lemma")
                 Map.empty
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticSynonymsProcessor.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticSynonymsProcessor.scala
index 3f8bdc5..f047b42 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticSynonymsProcessor.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticSynonymsProcessor.scala
@@ -197,7 +197,7 @@ private[impl] object NCSemanticSynonymsProcessor extends 
LazyLogging:
                             else
                                 regex.used = true
                                 Some(regex.mkChunk())
-                        case None => Option(NCSemanticSynonymChunk(TEXT, 
tok.getText, stemmer.stem(tok.getText)))
+                        case None => Option(NCSemanticSynonymChunk(TEXT, 
tok.getText, stemmer.stem(tok.getText.toLowerCase)))
                 ).toSeq
             }).toSeq
 
@@ -236,7 +236,7 @@ private[impl] object NCSemanticSynonymsProcessor extends 
LazyLogging:
 
             def add(syns: Seq[NCSemanticSynonym]): Unit = buf ++= 
syns.map(Holder(_, elemId))
             def addSpec(txt: String, value: String = null): Unit =
-                buf += 
Holder(NCSemanticSynonym(Seq(NCSemanticSynonymChunk(TEXT, txt, 
stemmer.stem(txt))), value), elemId)
+                buf += 
Holder(NCSemanticSynonym(Seq(NCSemanticSynonymChunk(TEXT, txt, 
stemmer.stem(txt.toLowerCase))), value), elemId)
 
             addSpec(elemId)
 
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
index b73af1d..ec74cc2 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
@@ -129,7 +129,7 @@ object NCTestUtils:
     private def mkSemanticStemmer: NCSemanticStemmer =
         new NCSemanticStemmer():
             private val ps = new PorterStemmer
-            override def stem(txt: String): String = ps.synchronized { 
ps.stem(txt.toLowerCase) }
+            override def stem(txt: String): String = ps.synchronized { 
ps.stem(txt) }
 
 
     /**

Reply via email to