This is an automated email from the ASF dual-hosted git repository.

janhoy pushed a commit to branch branch_10_0
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/branch_10_0 by this push:
     new 066a230dcb2 SOLR-17958 Deprecate TikaLanguageIdentifierUpdateProcessor 
(#3776)
066a230dcb2 is described below

commit 066a230dcb2d7b822b58d5e0f604492db42bcfe2
Author: Jan Høydahl <[email protected]>
AuthorDate: Fri Oct 17 00:27:09 2025 +0200

    SOLR-17958 Deprecate TikaLanguageIdentifierUpdateProcessor (#3776)
    
    (cherry picked from commit 6b33f387423ac00350828339c4ba9bf2d07ced73)
---
 solr/CHANGES.txt                                                     | 2 ++
 .../solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java | 2 ++
 .../processor/TikaLanguageIdentifierUpdateProcessorFactory.java      | 2 ++
 .../processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java  | 1 +
 .../modules/indexing-guide/pages/language-detection.adoc             | 5 +++++
 .../modules/upgrade-notes/pages/major-changes-in-solr-9.adoc         | 2 ++
 6 files changed, 14 insertions(+)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 56e4ef8748f..1e9a31b5784 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -335,6 +335,8 @@ Other Changes
 
 * SOLR-17956: XLSXResponseWriter has been deprecated and will be removed in a 
future release. (Jan Høydahl)
 
+* SOLR-17958: The Tika Language Identifier is deprecated. Use one of the other 
detectors instead. (Jan Høydahl)
+
 * SOLR-17952: Stream decorator test refactoring - use underscore rather than 
dot in aliases (Andy Webb)
 
 ==================  9.9.1 ==================
diff --git 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java
 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java
index 97fb7917fd6..bde2f62437c 100644
--- 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java
+++ 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java
@@ -34,7 +34,9 @@ import org.slf4j.LoggerFactory;
  * 
href="https://solr.apache.org/guide/solr/latest/indexing-guide/language-detection.html#configuring-tika-language-detection";>https://solr.apache.org/guide/solr/latest/indexing-guide/language-detection.html#configuring-tika-language-detection</a>
  *
  * @since 3.5
+ * @deprecated Since 9.10, use {@link OpenNLPLangDetectUpdateProcessor} 
instead.
  */
+@Deprecated(since = "9.10")
 public class TikaLanguageIdentifierUpdateProcessor extends 
LanguageIdentifierUpdateProcessor {
 
   private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
diff --git 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java
 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java
index 66cb977ed50..1728390c065 100644
--- 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java
+++ 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java
@@ -43,7 +43,9 @@ import org.apache.solr.util.plugin.SolrCoreAware;
  * 
href="https://solr.apache.org/guide/solr/latest/indexing-guide/language-detection.html#configuring-tika-language-detection";>https://solr.apache.org/guide/solr/latest/indexing-guide/language-detection.html#configuring-tika-language-detection</a>
  *
  * @since 3.5
+ * @deprecated Since 9.10, use {@link OpenNLPLangDetectUpdateProcessorFactory} 
instead.
  */
+@Deprecated(since = "9.10")
 public class TikaLanguageIdentifierUpdateProcessorFactory extends 
UpdateRequestProcessorFactory
     implements SolrCoreAware, LangIdParams {
 
diff --git 
a/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java
 
b/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java
index b110b397b05..3c47c2a9de0 100644
--- 
a/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java
+++ 
b/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java
@@ -20,6 +20,7 @@ import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.junit.Test;
 
+@SuppressWarnings("deprecation")
 public class TikaLanguageIdentifierUpdateProcessorFactoryTest
     extends LanguageIdentifierUpdateProcessorFactoryTestCase {
   @Override
diff --git 
a/solr/solr-ref-guide/modules/indexing-guide/pages/language-detection.adoc 
b/solr/solr-ref-guide/modules/indexing-guide/pages/language-detection.adoc
index f4f5ab158f3..a2ab17638fa 100644
--- a/solr/solr-ref-guide/modules/indexing-guide/pages/language-detection.adoc
+++ b/solr/solr-ref-guide/modules/indexing-guide/pages/language-detection.adoc
@@ -55,6 +55,11 @@ Here is an example of a minimal Tika `langid` configuration 
in `solrconfig.xml`:
 </processor>
 ----
 
+[IMPORTANT]
+====
+This detector is deprecated and may be removed in a future version.
+====
+
 === Configuring LangDetect Language Detection
 
 Here is an example of a minimal LangDetect `langid` configuration in 
`solrconfig.xml`:
diff --git 
a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc 
b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc
index a99ad2b4e4f..d07e8d8081d 100644
--- 
a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc
+++ 
b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc
@@ -82,6 +82,8 @@ Java has removed support for the Security Manager starting 
with Java 24; therefo
 
 The `XLSXResponseWriter` is now deprecated.
 
+The Tika Language Identifier is deprecated. Use one of the other detectors 
instead.
+
 The Extraction module can now extract documents using an external Tika Server.
 The local in-process Tika 1.x extractor backend is deprecated and will go away 
in 10.0.
 

Reply via email to