This is an automated email from the ASF dual-hosted git repository.

janhoy pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new 8ad8b59a88a SOLR-17575: Fixed broken backwards compatibility with the 
legacy "langid.whitelist" config in Solr Langid (#2886)
8ad8b59a88a is described below

commit 8ad8b59a88ab35f745148952e3d45f72188d514c
Author: Alexander Zagniotov <azagnio...@gmail.com>
AuthorDate: Thu Nov 28 01:01:45 2024 -0800

    SOLR-17575: Fixed broken backwards compatibility with the legacy 
"langid.whitelist" config in Solr Langid (#2886)
    
    Co-authored-by: Alexander Zagniotov <azagnio...@box.com>
    Co-authored-by: Jan Høydahl <jan...@apache.org>
    (cherry picked from commit cebdb2d790a656b949e0f1fcb6893333c63602e1)
---
 solr/CHANGES.txt                                   |  2 ++
 .../LanguageIdentifierUpdateProcessor.java         | 13 +++++------
 ...geIdentifierUpdateProcessorFactoryTestCase.java | 26 ++++++++++++++++++++++
 3 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index e9da72a86b6..9c99c063be9 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -103,6 +103,8 @@ Bug Fixes
 * SOLR-16976: Remove log4j-jul jar and use slf4j bridge for JUL to prevent 
exception from being logged when remote JMX
   is enabled (Shawn Heisey, Stephen Zhou, Eric Pugh, Christine Poerschke, 
David Smiley)
 
+* SOLR-17575: Fixed broken backwards compatibility with the legacy 
"langid.whitelist" config in Solr Langid. (Jan Høydahl, Alexander Zagniotov)
+
 Dependency Upgrades
 ---------------------
 (No changes)
diff --git 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
index 21921440cae..f4f1b9cc83c 100644
--- 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
+++ 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
@@ -111,8 +111,8 @@ public abstract class LanguageIdentifierUpdateProcessor 
extends UpdateRequestPro
       overwrite = params.getBool(OVERWRITE, false);
       langAllowlist = new HashSet<>();
       threshold = params.getDouble(THRESHOLD, DOCID_THRESHOLD_DEFAULT);
-      String legacyAllowList = params.get(LANG_WHITELIST, "");
-      if (legacyAllowList.length() > 0) {
+      final String legacyAllowList = params.get(LANG_WHITELIST, "").trim();
+      if (!legacyAllowList.isEmpty()) {
         // nowarn compile time string concatenation
         log.warn(
             LANG_WHITELIST
@@ -120,11 +120,10 @@ public abstract class LanguageIdentifierUpdateProcessor 
extends UpdateRequestPro
                 + LANG_ALLOWLIST
                 + " instead."); // nowarn
       }
-      if (params.get(LANG_ALLOWLIST, legacyAllowList).length() > 0) {
-        for (String lang : params.get(LANG_ALLOWLIST, "").split(",")) {
-          langAllowlist.add(lang);
-        }
-      }
+      Arrays.stream(params.get(LANG_ALLOWLIST, legacyAllowList).split(","))
+          .map(String::trim)
+          .filter(lang -> !lang.isEmpty())
+          .forEach(langAllowlist::add);
 
       // Mapping params (field centric)
       enableMapping = params.getBool(MAP_ENABLE, false);
diff --git 
a/solr/modules/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java
 
b/solr/modules/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java
index 4d8d398a25c..15e62d11a50 100644
--- 
a/solr/modules/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java
+++ 
b/solr/modules/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java
@@ -18,6 +18,7 @@ package org.apache.solr.update.processor;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Set;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.ModifiableSolrParams;
@@ -464,6 +465,31 @@ public abstract class 
LanguageIdentifierUpdateProcessorFactoryTestCase extends S
     assertTrue(mappedIndividual.containsKey("text2_ru"));
   }
 
+  @Test
+  public void testAllowlist() throws Exception {
+    ModifiableSolrParams parameters = new ModifiableSolrParams();
+    parameters.add("langid.fl", "name,subject");
+    parameters.add("langid.langField", "language_s");
+    parameters.add("langid.allowlist", "no,en ,, ,sv, sv");
+    liProcessor = createLangIdProcessor(parameters);
+
+    // Make sure that empty language codes have been filtered out and others 
trimmed.
+    assertEquals(Set.of("no", "en", "sv"), liProcessor.langAllowlist);
+  }
+
+  @Test
+  public void testAllowlistBackwardsCompatabilityWithLegacyAllowlist() throws 
Exception {
+    // The "legacy allowlist" is "langid.whitelist"
+    ModifiableSolrParams parameters = new ModifiableSolrParams();
+    parameters.add("langid.fl", "name,subject");
+    parameters.add("langid.langField", "language_s");
+    parameters.add("langid.whitelist", "no,en ,, ,sv, sv");
+    liProcessor = createLangIdProcessor(parameters);
+
+    // Make sure that empty language codes have been filtered out and others 
trimmed.
+    assertEquals(Set.of("no", "en", "sv"), liProcessor.langAllowlist);
+  }
+
   // Various utility methods
 
   private SolrInputDocument englishDoc() {

Reply via email to