This is an automated email from the ASF dual-hosted git repository.
janhoy pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 8ad8b59a88a SOLR-17575: Fixed broken backwards compatibility with the
legacy "langid.whitelist" config in Solr Langid (#2886)
8ad8b59a88a is described below
commit 8ad8b59a88ab35f745148952e3d45f72188d514c
Author: Alexander Zagniotov <[email protected]>
AuthorDate: Thu Nov 28 01:01:45 2024 -0800
SOLR-17575: Fixed broken backwards compatibility with the legacy
"langid.whitelist" config in Solr Langid (#2886)
Co-authored-by: Alexander Zagniotov <[email protected]>
Co-authored-by: Jan Høydahl <[email protected]>
(cherry picked from commit cebdb2d790a656b949e0f1fcb6893333c63602e1)
---
solr/CHANGES.txt | 2 ++
.../LanguageIdentifierUpdateProcessor.java | 13 +++++------
...geIdentifierUpdateProcessorFactoryTestCase.java | 26 ++++++++++++++++++++++
3 files changed, 34 insertions(+), 7 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index e9da72a86b6..9c99c063be9 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -103,6 +103,8 @@ Bug Fixes
* SOLR-16976: Remove log4j-jul jar and use slf4j bridge for JUL to prevent
exception from being logged when remote JMX
is enabled (Shawn Heisey, Stephen Zhou, Eric Pugh, Christine Poerschke,
David Smiley)
+* SOLR-17575: Fixed broken backwards compatibility with the legacy
"langid.whitelist" config in Solr Langid. (Jan Høydahl, Alexander Zagniotov)
+
Dependency Upgrades
---------------------
(No changes)
diff --git
a/solr/modules/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
b/solr/modules/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
index 21921440cae..f4f1b9cc83c 100644
---
a/solr/modules/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
+++
b/solr/modules/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
@@ -111,8 +111,8 @@ public abstract class LanguageIdentifierUpdateProcessor
extends UpdateRequestPro
overwrite = params.getBool(OVERWRITE, false);
langAllowlist = new HashSet<>();
threshold = params.getDouble(THRESHOLD, DOCID_THRESHOLD_DEFAULT);
- String legacyAllowList = params.get(LANG_WHITELIST, "");
- if (legacyAllowList.length() > 0) {
+ final String legacyAllowList = params.get(LANG_WHITELIST, "").trim();
+ if (!legacyAllowList.isEmpty()) {
// nowarn compile time string concatenation
log.warn(
LANG_WHITELIST
@@ -120,11 +120,10 @@ public abstract class LanguageIdentifierUpdateProcessor
extends UpdateRequestPro
+ LANG_ALLOWLIST
+ " instead."); // nowarn
}
- if (params.get(LANG_ALLOWLIST, legacyAllowList).length() > 0) {
- for (String lang : params.get(LANG_ALLOWLIST, "").split(",")) {
- langAllowlist.add(lang);
- }
- }
+ Arrays.stream(params.get(LANG_ALLOWLIST, legacyAllowList).split(","))
+ .map(String::trim)
+ .filter(lang -> !lang.isEmpty())
+ .forEach(langAllowlist::add);
// Mapping params (field centric)
enableMapping = params.getBool(MAP_ENABLE, false);
diff --git
a/solr/modules/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java
b/solr/modules/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java
index 4d8d398a25c..15e62d11a50 100644
---
a/solr/modules/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java
+++
b/solr/modules/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java
@@ -18,6 +18,7 @@ package org.apache.solr.update.processor;
import java.util.ArrayList;
import java.util.List;
+import java.util.Set;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
@@ -464,6 +465,31 @@ public abstract class
LanguageIdentifierUpdateProcessorFactoryTestCase extends S
assertTrue(mappedIndividual.containsKey("text2_ru"));
}
+ @Test
+ public void testAllowlist() throws Exception {
+ ModifiableSolrParams parameters = new ModifiableSolrParams();
+ parameters.add("langid.fl", "name,subject");
+ parameters.add("langid.langField", "language_s");
+ parameters.add("langid.allowlist", "no,en ,, ,sv, sv");
+ liProcessor = createLangIdProcessor(parameters);
+
+ // Make sure that empty language codes have been filtered out and others
trimmed.
+ assertEquals(Set.of("no", "en", "sv"), liProcessor.langAllowlist);
+ }
+
+ @Test
+ public void testAllowlistBackwardsCompatabilityWithLegacyAllowlist() throws
Exception {
+ // The "legacy allowlist" is "langid.whitelist"
+ ModifiableSolrParams parameters = new ModifiableSolrParams();
+ parameters.add("langid.fl", "name,subject");
+ parameters.add("langid.langField", "language_s");
+ parameters.add("langid.whitelist", "no,en ,, ,sv, sv");
+ liProcessor = createLangIdProcessor(parameters);
+
+ // Make sure that empty language codes have been filtered out and others
trimmed.
+ assertEquals(Set.of("no", "en", "sv"), liProcessor.langAllowlist);
+ }
+
// Various utility methods
private SolrInputDocument englishDoc() {