This is an automated email from the ASF dual-hosted git repository.
janhoy pushed a commit to branch branch_10_0
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_10_0 by this push:
new 066a230dcb2 SOLR-17958 Deprecate TikaLanguageIdentifierUpdateProcessor
(#3776)
066a230dcb2 is described below
commit 066a230dcb2d7b822b58d5e0f604492db42bcfe2
Author: Jan Høydahl <[email protected]>
AuthorDate: Fri Oct 17 00:27:09 2025 +0200
SOLR-17958 Deprecate TikaLanguageIdentifierUpdateProcessor (#3776)
(cherry picked from commit 6b33f387423ac00350828339c4ba9bf2d07ced73)
---
solr/CHANGES.txt | 2 ++
.../solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java | 2 ++
.../processor/TikaLanguageIdentifierUpdateProcessorFactory.java | 2 ++
.../processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java | 1 +
.../modules/indexing-guide/pages/language-detection.adoc | 5 +++++
.../modules/upgrade-notes/pages/major-changes-in-solr-9.adoc | 2 ++
6 files changed, 14 insertions(+)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 56e4ef8748f..1e9a31b5784 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -335,6 +335,8 @@ Other Changes
* SOLR-17956: XLSXResponseWriter has been deprecated and will be removed in a
future release. (Jan Høydahl)
+* SOLR-17958: The Tika Language Identifier is deprecated. Use one of the other
detectors instead. (Jan Høydahl)
+
* SOLR-17952: Stream decorator test refactoring - use underscore rather than
dot in aliases (Andy Webb)
================== 9.9.1 ==================
diff --git
a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java
b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java
index 97fb7917fd6..bde2f62437c 100644
---
a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java
+++
b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java
@@ -34,7 +34,9 @@ import org.slf4j.LoggerFactory;
*
href="https://solr.apache.org/guide/solr/latest/indexing-guide/language-detection.html#configuring-tika-language-detection">https://solr.apache.org/guide/solr/latest/indexing-guide/language-detection.html#configuring-tika-language-detection</a>
*
* @since 3.5
+ * @deprecated Since 9.10, use {@link OpenNLPLangDetectUpdateProcessor}
instead.
*/
+@Deprecated(since = "9.10")
public class TikaLanguageIdentifierUpdateProcessor extends
LanguageIdentifierUpdateProcessor {
private static final Logger log =
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
diff --git
a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java
b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java
index 66cb977ed50..1728390c065 100644
---
a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java
+++
b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java
@@ -43,7 +43,9 @@ import org.apache.solr.util.plugin.SolrCoreAware;
*
href="https://solr.apache.org/guide/solr/latest/indexing-guide/language-detection.html#configuring-tika-language-detection">https://solr.apache.org/guide/solr/latest/indexing-guide/language-detection.html#configuring-tika-language-detection</a>
*
* @since 3.5
+ * @deprecated Since 9.10, use {@link OpenNLPLangDetectUpdateProcessorFactory}
instead.
*/
+@Deprecated(since = "9.10")
public class TikaLanguageIdentifierUpdateProcessorFactory extends
UpdateRequestProcessorFactory
implements SolrCoreAware, LangIdParams {
diff --git
a/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java
b/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java
index b110b397b05..3c47c2a9de0 100644
---
a/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java
+++
b/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java
@@ -20,6 +20,7 @@ import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.junit.Test;
+@SuppressWarnings("deprecation")
public class TikaLanguageIdentifierUpdateProcessorFactoryTest
extends LanguageIdentifierUpdateProcessorFactoryTestCase {
@Override
diff --git
a/solr/solr-ref-guide/modules/indexing-guide/pages/language-detection.adoc
b/solr/solr-ref-guide/modules/indexing-guide/pages/language-detection.adoc
index f4f5ab158f3..a2ab17638fa 100644
--- a/solr/solr-ref-guide/modules/indexing-guide/pages/language-detection.adoc
+++ b/solr/solr-ref-guide/modules/indexing-guide/pages/language-detection.adoc
@@ -55,6 +55,11 @@ Here is an example of a minimal Tika `langid` configuration
in `solrconfig.xml`:
</processor>
----
+[IMPORTANT]
+====
+This detector is deprecated and may be removed in a future version.
+====
+
=== Configuring LangDetect Language Detection
Here is an example of a minimal LangDetect `langid` configuration in
`solrconfig.xml`:
diff --git
a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc
b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc
index a99ad2b4e4f..d07e8d8081d 100644
---
a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc
+++
b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc
@@ -82,6 +82,8 @@ Java has removed support for the Security Manager starting
with Java 24; therefo
The `XLSXResponseWriter` is now deprecated.
+The Tika Language Identifier is deprecated. Use one of the other detectors
instead.
+
The Extraction module can now extract documents using an external Tika Server.
The local in-process Tika 1.x extractor backend is deprecated and will go away
in 10.0.