This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git


The following commit(s) were added to refs/heads/master by this push:
     new 2cdfac1a [CODEC-317] ColognePhonetic can create duplicate consecutive 
codes in some cases.
2cdfac1a is described below

commit 2cdfac1a8e34ffba32603d97d81173158b16ba04
Author: Gary Gregory <[email protected]>
AuthorDate: Mon Feb 16 18:49:17 2026 -0500

    [CODEC-317] ColognePhonetic can create duplicate consecutive codes in
    some cases.
---
 src/changes/changes.xml                            |  1 +
 .../commons/codec/language/ColognePhonetic.java    | 35 ++++++++++++----------
 .../codec/language/ColognePhoneticTest.java        | 10 +++----
 3 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 2bdba713..886426a4 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -46,6 +46,7 @@ The <action> type attribute can be add,update,fix,remove.
     <release version="1.22.0" date="YYYY-MM-DD" description="This is a feature 
and maintenance release. Java 8 or later is required.">
       <!-- FIX -->
       <action type="fix" dev="ggregory" due-to="Shalu Jha, Andrey, Gary 
Gregory" issue="CODEC-249">Fix Incorrect transform of CH digraph according 
Metaphone basic rules #423.</action>
+      <action type="fix" dev="ggregory" due-to="DRUser123, Shalu Jha, Gary 
Gregory" issue="CODEC-317">ColognePhonetic can create duplicate consecutive 
codes in some cases.</action>
       <!-- ADD -->
       <action type="add" dev="ggregory" due-to="Inkeet, Gary Gregory, Wolff 
Bock von Wuelfingen" issue="CODEC-326">Add Base58 support.</action>
       <action type="add" dev="ggregory" due-to="Gary Gregory">Add 
BaseNCodecInputStream.AbstracBuilder.setByteArray(byte[]).</action>
diff --git 
a/src/main/java/org/apache/commons/codec/language/ColognePhonetic.java 
b/src/main/java/org/apache/commons/codec/language/ColognePhonetic.java
index debcb219..b5708ffa 100644
--- a/src/main/java/org/apache/commons/codec/language/ColognePhonetic.java
+++ b/src/main/java/org/apache/commons/codec/language/ColognePhonetic.java
@@ -270,11 +270,15 @@ public class ColognePhonetic implements StringEncoder {
          * @param code the code to store.
          */
         public void put(final char code) {
-            if (code != CHAR_IGNORE && lastCode != code && (code != '0' || 
length == 0)) {
+            final boolean accept = code != CHAR_IGNORE;
+            final boolean nonZ = code != '0';
+            if (accept && lastCode != code && (nonZ || length == 0)) {
                 data[length] = code;
                 length++;
             }
-            lastCode = code;
+            if (nonZ && accept) {
+                lastCode = code;
+            }
         }
     }
     // Predefined char arrays for better performance and less GC load
@@ -398,8 +402,8 @@ public class ColognePhonetic implements StringEncoder {
     @Override
     public Object encode(final Object object) throws EncoderException {
         if (!(object instanceof String)) {
-            throw new EncoderException("This method's parameter was expected 
to be of the type " + String.class.getName() + ". But actually it was of the 
type "
-                    + object.getClass().getName() + ".");
+            throw new EncoderException(String.format("This method's parameter 
was expected to be of the type %s. But actually it was of the type %s.",
+                    String.class.getName(), object.getClass().getName()));
         }
         return encode((String) object);
     }
@@ -434,20 +438,19 @@ public class ColognePhonetic implements StringEncoder {
     private char[] preprocess(final String text) {
         // This converts German small sharp s (Eszett) to SS
         final char[] chrs = text.toUpperCase(Locale.GERMAN).toCharArray();
-
         for (int index = 0; index < chrs.length; index++) {
             switch (chrs[index]) {
-                case '\u00C4': // capital A, umlaut mark
-                    chrs[index] = 'A';
-                    break;
-                case '\u00DC': // capital U, umlaut mark
-                    chrs[index] = 'U';
-                    break;
-                case '\u00D6': // capital O, umlaut mark
-                    chrs[index] = 'O';
-                    break;
-                default:
-                    break;
+            case '\u00C4': // capital A, umlaut mark
+                chrs[index] = 'A';
+                break;
+            case '\u00DC': // capital U, umlaut mark
+                chrs[index] = 'U';
+                break;
+            case '\u00D6': // capital O, umlaut mark
+                chrs[index] = 'O';
+                break;
+            default:
+                break;
             }
         }
         return chrs;
diff --git 
a/src/test/java/org/apache/commons/codec/language/ColognePhoneticTest.java 
b/src/test/java/org/apache/commons/codec/language/ColognePhoneticTest.java
index f04f7406..bab6d6a9 100644
--- a/src/test/java/org/apache/commons/codec/language/ColognePhoneticTest.java
+++ b/src/test/java/org/apache/commons/codec/language/ColognePhoneticTest.java
@@ -162,14 +162,14 @@ class ColognePhoneticTest extends 
AbstractStringEncoderTest<ColognePhonetic> {
             Arguments.arguments("weber", "317"),
             Arguments.arguments("wagner", "3467"),
             Arguments.arguments("becker", "147"),
-            Arguments.arguments("hoffmann", "0366"),
+            Arguments.arguments("hoffmann", "036"),
             Arguments.arguments("sch\u00C4fer", "837"), // schÄfer - why upper 
case A-umlaut ?
             Arguments.arguments("sch\u00e4fer", "837"), // schäfer - add 
equivalent lower-case
             Arguments.arguments("Breschnew", "17863"),
             Arguments.arguments("Wikipedia", "3412"),
             Arguments.arguments("peter", "127"),
             Arguments.arguments("pharma", "376"),
-            Arguments.arguments("m\u00f6nchengladbach", "664645214"), // 
mönchengladbach
+            Arguments.arguments("m\u00f6nchengladbach", "64645214"), // 
mönchengladbach
             Arguments.arguments("deutsch", "28"),
             Arguments.arguments("deutz", "28"),
             Arguments.arguments("hamburg", "06174"),
@@ -181,9 +181,9 @@ class ColognePhoneticTest extends 
AbstractStringEncoderTest<ColognePhonetic> {
             Arguments.arguments("matsch", "68"),
             Arguments.arguments("matz", "68"),
             Arguments.arguments("Arbeitsamt", "071862"),
-            Arguments.arguments("Eberhard", "01772"),
-            Arguments.arguments("Eberhardt", "01772"),
-            Arguments.arguments("Celsius", "8588"),
+            Arguments.arguments("Eberhard", "0172"),
+            Arguments.arguments("Eberhardt", "0172"),
+            Arguments.arguments("Celsius", "858"),
             Arguments.arguments("Ace", "08"),
             Arguments.arguments("shch", "84"), // CODEC-254
             Arguments.arguments("xch", "484"), // CODEC-255

Reply via email to