This is an automated email from the ASF dual-hosted git repository.

janhoy pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new 0a951f2c3f9 SOLR-17958 Deprecate TikaLanguageIdentifierUpdateProcessor 
(#3776) (#3783)
0a951f2c3f9 is described below

commit 0a951f2c3f9815c33068a6fc2b44e3cc00739a21
Author: Jan Høydahl <[email protected]>
AuthorDate: Fri Oct 17 01:46:30 2025 +0200

    SOLR-17958 Deprecate TikaLanguageIdentifierUpdateProcessor (#3776) (#3783)
    
    (cherry picked from commit 6b33f387423ac00350828339c4ba9bf2d07ced73)
---
 solr/CHANGES.txt                                                     | 2 ++
 .../solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java | 2 ++
 .../processor/TikaLanguageIdentifierUpdateProcessorFactory.java      | 2 ++
 .../processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java  | 1 +
 .../modules/indexing-guide/pages/language-detection.adoc             | 5 +++++
 .../modules/upgrade-notes/pages/major-changes-in-solr-9.adoc         | 2 ++
 6 files changed, 14 insertions(+)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index c1cdd645982..72fb2b0f147 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -64,6 +64,8 @@ Other Changes
 * SOLR-17541: Deprecate `CloudHttp2SolrClient.Builder#withHttpClient` in favor 
of `CloudHttp2SolrClient.Builder#withInternalClientBuilder`.
   Deprecate `LBHttp2SolrClient.Builder#withListenerFactory` in favor of 
`LBHttp2SolrClient.Builder#withListenerFactories`. (James Dyer)
 
+* SOLR-17958: The Tika Language Identifier is deprecated. Use one of the other 
detectors instead. (Jan Høydahl)
+
 * SOLR-17952: Stream decorator test refactoring - use underscore rather than 
dot in aliases (Andy Webb)
 
 * SOLR-17956: XLSXResponseWriter has been deprecated and will be removed in a 
future release. (Jan Høydahl)
diff --git 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java
 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java
index 97fb7917fd6..bde2f62437c 100644
--- 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java
+++ 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java
@@ -34,7 +34,9 @@ import org.slf4j.LoggerFactory;
  * 
href="https://solr.apache.org/guide/solr/latest/indexing-guide/language-detection.html#configuring-tika-language-detection";>https://solr.apache.org/guide/solr/latest/indexing-guide/language-detection.html#configuring-tika-language-detection</a>
  *
  * @since 3.5
+ * @deprecated Since 9.10, use {@link OpenNLPLangDetectUpdateProcessor} 
instead.
  */
+@Deprecated(since = "9.10")
 public class TikaLanguageIdentifierUpdateProcessor extends 
LanguageIdentifierUpdateProcessor {
 
   private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
diff --git 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java
 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java
index 66cb977ed50..1728390c065 100644
--- 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java
+++ 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java
@@ -43,7 +43,9 @@ import org.apache.solr.util.plugin.SolrCoreAware;
  * 
href="https://solr.apache.org/guide/solr/latest/indexing-guide/language-detection.html#configuring-tika-language-detection";>https://solr.apache.org/guide/solr/latest/indexing-guide/language-detection.html#configuring-tika-language-detection</a>
  *
  * @since 3.5
+ * @deprecated Since 9.10, use {@link OpenNLPLangDetectUpdateProcessorFactory} 
instead.
  */
+@Deprecated(since = "9.10")
 public class TikaLanguageIdentifierUpdateProcessorFactory extends 
UpdateRequestProcessorFactory
     implements SolrCoreAware, LangIdParams {
 
diff --git 
a/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java
 
b/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java
index b110b397b05..3c47c2a9de0 100644
--- 
a/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java
+++ 
b/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java
@@ -20,6 +20,7 @@ import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.junit.Test;
 
+@SuppressWarnings("deprecation")
 public class TikaLanguageIdentifierUpdateProcessorFactoryTest
     extends LanguageIdentifierUpdateProcessorFactoryTestCase {
   @Override
diff --git 
a/solr/solr-ref-guide/modules/indexing-guide/pages/language-detection.adoc 
b/solr/solr-ref-guide/modules/indexing-guide/pages/language-detection.adoc
index f4f5ab158f3..a2ab17638fa 100644
--- a/solr/solr-ref-guide/modules/indexing-guide/pages/language-detection.adoc
+++ b/solr/solr-ref-guide/modules/indexing-guide/pages/language-detection.adoc
@@ -55,6 +55,11 @@ Here is an example of a minimal Tika `langid` configuration 
in `solrconfig.xml`:
 </processor>
 ----
 
+[IMPORTANT]
+====
+This detector is deprecated and may be removed in a future version.
+====
+
 === Configuring LangDetect Language Detection
 
 Here is an example of a minimal LangDetect `langid` configuration in 
`solrconfig.xml`:
diff --git 
a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc 
b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc
index 8ed7762c360..17baa85a47c 100644
--- 
a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc
+++ 
b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc
@@ -90,6 +90,8 @@ Java has removed support for the Security Manager starting 
with Java 24; therefo
 
 The `XLSXResponseWriter` is now deprecated.
 
+The Tika Language Identifier is deprecated. Use one of the other detectors 
instead.
+
 The Extraction module can now extract documents using an external Tika Server.
 The local in-process Tika 1.x extractor backend is deprecated and will go away 
in 10.0.
 

Reply via email to