This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch opennlp-2.x
in repository https://gitbox.apache.org/repos/asf/opennlp.git


The following commit(s) were added to refs/heads/opennlp-2.x by this push:
     new a70273ce [2.x] OPENNLP-1781: SentenceDetectorME throws 
StringIndexOutOfBoundsException when sentence starts with an abbreviation (#882)
a70273ce is described below

commit a70273ce3f1343a88f6face3992627785b480e8e
Author: Richard Zowalla <[email protected]>
AuthorDate: Tue Oct 14 13:20:55 2025 +0200

    [2.x] OPENNLP-1781: SentenceDetectorME throws 
StringIndexOutOfBoundsException when sentence starts with an abbreviation (#882)
---
 .../opennlp/tools/sentdetect/SentenceDetectorME.java  |  3 ++-
 .../sentdetect/SentenceDetectorMEGermanTest.java      | 19 +++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java 
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
index 26146c67..f64768df 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
@@ -345,7 +345,8 @@ public class SentenceDetectorME implements 
SentenceDetector, Probabilistic {
       if (tokenPosition == -1) {
         continue; // skip fast
       }
-      final char prevChar = s.charAt(tokenPosition - 1);
+
+      final char prevChar = s.charAt(tokenPosition == 0 ? tokenPosition : 
tokenPosition - 1);
       int tokenLength = token.length();
       if (tokenPosition + tokenLength < candidateIndex || tokenPosition > 
candidateIndex ||
         /*
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEGermanTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEGermanTest.java
index b34285dc..33560133 100644
--- 
a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEGermanTest.java
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEGermanTest.java
@@ -151,6 +151,24 @@ public class SentenceDetectorMEGermanTest extends 
AbstractSentenceDetectorTest {
             () -> assertEquals(2, probs.length));
   }
 
+  /*
+    * A reproducer and test for OPENNLP-1781.
+   */
+  @Test
+  void testSentDetectWithAbbreviationsAtSentenceStart() {
+    prepareResources(true);
+
+    final String sent1 = "S. Träume sind eine Verbindung von Gedanken.";
+
+    final String[] sents = sentenceDetector.sentDetect(sent1);
+    final double[] probs = sentenceDetector.probs();
+
+    assertAll(
+        () -> assertEquals(1, sents.length),
+        () -> assertEquals(sent1, sents[0]),
+        () -> assertEquals(1, probs.length));
+  }
+
   /*
    * A reproducer and test for OPENNLP-1767.
    * It checks that sentence detection with common abbreviations works 
correctly,
@@ -163,6 +181,7 @@ public class SentenceDetectorMEGermanTest extends 
AbstractSentenceDetectorTest {
       "Der Auto stand schief. Wer hat es dort geparkt?",
       "Es lag am DBMS. Die Performance muss verbessert werden.",
       "Siehe Buch S. 17f. Dort ist es zu finden.",
+      "S. Buch S. 17f. Dort ist es zu finden.", // OPENNLP-1781
       "Sie trank einen Mocca. Er schmeckte ihr!",
       "Der Anker hängt zu Beginn des Bugs. Es ist vertaut.",
       "Das Verfahren testet auf HIV. Es ist präzise."

Reply via email to