This is an automated email from the ASF dual-hosted git repository.

sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git


The following commit(s) were added to refs/heads/master by this push:
     new 7972242  CODEC-255 ColognePhonetic handles x incorrectly
7972242 is described below

commit 79722429dad5256ac0d3871d4ef2977b4e7a383d
Author: Sebb <[email protected]>
AuthorDate: Tue Jun 18 20:09:14 2019 +0100

    CODEC-255 ColognePhonetic handles x incorrectly
---
 src/changes/changes.xml                            |  3 +-
 .../commons/codec/language/ColognePhonetic.java    | 75 +++++++++++-----------
 .../codec/language/ColognePhoneticTest.java        |  1 +
 3 files changed, 39 insertions(+), 40 deletions(-)

diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index fcdbd5b..d759821 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -44,7 +44,8 @@ The <action> type attribute can be add,update,fix,remove.
   <body>
 
     <release version="1.13" date="YYYY-MM-DD" description="TBD">
-      <action issue="CODEC-254" dev="sebb" due-to="Holger Grote" 
type="fix">ColognePhonetic does not treat the letter H correct</action>      
+      <action issue="CODEC-255" dev="sebb" due-to="Holger Grote" 
type="fix">ColognePhonetic handles x incorrectly</action>      
+      <action issue="CODEC-254" dev="sebb" due-to="Holger Grote" 
type="fix">ColognePhonetic does not treat the letter H correctly</action>      
       <action issue="CODEC-257" dev="ggregory" type="update">Update from Java 
7 to Java 8</action>      
       <action issue="CODEC-134" dev="tmousaw-ptc" type="fix">Reject any decode 
request for a value that is impossible to encode to for Base32/Base64 rather 
than blindly decoding.</action>      
     </release>
diff --git 
a/src/main/java/org/apache/commons/codec/language/ColognePhonetic.java 
b/src/main/java/org/apache/commons/codec/language/ColognePhonetic.java
index d1ebadb..329d9bb 100644
--- a/src/main/java/org/apache/commons/codec/language/ColognePhonetic.java
+++ b/src/main/java/org/apache/commons/codec/language/ColognePhonetic.java
@@ -192,6 +192,8 @@ public class ColognePhonetic implements StringEncoder {
     private static final char[] AHKOQUX = new char[] { 'A', 'H', 'K', 'O', 
'Q', 'U', 'X' };
     private static final char[] DTX = new char[] { 'D', 'T', 'X' };
 
+    private static final char CHAR_IGNORE = '-';    // is this character to be 
ignored?
+
     /**
      * This class is not thread-safe; the field {@link #length} is mutable.
      * However, it is not shared between threads, as it is constructed on 
demand
@@ -227,13 +229,25 @@ public class ColognePhonetic implements StringEncoder {
 
     private class CologneOutputBuffer extends CologneBuffer {
 
+        private char lastCode;
+
         public CologneOutputBuffer(final int buffSize) {
             super(buffSize);
+            lastCode = '/'; // impossible value
         }
 
-        public void addRight(final char chr) {
-            data[length] = chr;
-            length++;
+        /**
+         * Store the next code in the output buffer, keeping track of the 
previous code.
+         * '0' is only stored if it is the first entry.
+         * Ignored chars are never stored.
+         * If the code is the same as the last code (whether stored or not) it 
is not stored.
+         */
+        public void put(final char code) {
+            if (code != CHAR_IGNORE && lastCode != code && (code != '0' || 
length == 0)) {
+                data[length] = code;
+                length++;
+            }
+            lastCode = code;
         }
 
         @Override
@@ -250,11 +264,6 @@ public class ColognePhonetic implements StringEncoder {
             super(data);
         }
 
-        public void addLeft(final char ch) {
-            length++;
-            data[getNextPos()] = ch;
-        }
-
         @Override
         protected char[] copyData(final int start, final int length) {
             final char[] newData = new char[length];
@@ -310,13 +319,7 @@ public class ColognePhonetic implements StringEncoder {
 
         char nextChar;
 
-        final char CHAR_FIRST_POS = '/'; // are we processing the first 
character?
-        final char CHAR_IGNORE = '-';    // is this character to be ignored?
-
         char lastChar = CHAR_IGNORE;
-        char lastCode = CHAR_FIRST_POS;
-        boolean firstChar = true; // are we generating the first digit?
-        char code;
         char chr;
 
         while (input.length() > 0) {
@@ -333,55 +336,49 @@ public class ColognePhonetic implements StringEncoder {
             }
 
             if (arrayContains(AEIJOUY, chr)) {
-                code = '0';
+                output.put('0');
             } else if (chr == 'B' || (chr == 'P' && nextChar != 'H')) {
-                code = '1';
+                output.put('1');
             } else if ((chr == 'D' || chr == 'T') && !arrayContains(CSZ, 
nextChar)) {
-                code = '2';
+                output.put('2');
             } else if (arrayContains(FPVW, chr)) {
-                code = '3';
+                output.put('3');
             } else if (arrayContains(GKQ, chr)) {
-                code = '4';
+                output.put('4');
             } else if (chr == 'X' && !arrayContains(CKQ, lastChar)) {
-                code = '4';
-                input.addLeft('S');
+                output.put('4');
+                output.put('8');
             } else if (chr == 'S' || chr == 'Z') {
-                code = '8';
+                output.put('8');
             } else if (chr == 'C') {
-                if (firstChar) {
+                if (output.length() == 0) {
                     if (arrayContains(AHKLOQRUX, nextChar)) {
-                        code = '4';
+                        output.put('4');
                     } else {
-                        code = '8';
+                        output.put('8');
                     }
                 } else {
                     if (arrayContains(SZ, lastChar) || !arrayContains(AHKOQUX, 
nextChar)) {
-                        code = '8';
+                        output.put('8');
                     } else {
-                        code = '4';
+                        output.put('4');
                     }
                 }
             } else if (arrayContains(DTX, chr)) {
-                code = '8';
+                output.put('8');
             } else if (chr == 'R') {
-                code = '7';
+                output.put('7');
             } else if (chr == 'L') {
-                code = '5';
+                output.put('5');
             } else if (chr == 'M' || chr == 'N') {
-                code = '6';
+                output.put('6');
             } else if (chr == 'H') {
-                code = CHAR_IGNORE;
+                output.put(CHAR_IGNORE); // needed by put
             } else {
-                code = chr; // should not happen?
-            }
-
-            if (code != CHAR_IGNORE && lastCode != code && (code != '0' || 
firstChar)) {
-                output.addRight(code);
-                firstChar = false; // no longer processing first output digit
+                // ignored; should not happen
             }
 
             lastChar = chr;
-            lastCode = code;
         }
         return output.toString();
     }
diff --git 
a/src/test/java/org/apache/commons/codec/language/ColognePhoneticTest.java 
b/src/test/java/org/apache/commons/codec/language/ColognePhoneticTest.java
index e6ddc90..e6f8777 100644
--- a/src/test/java/org/apache/commons/codec/language/ColognePhoneticTest.java
+++ b/src/test/java/org/apache/commons/codec/language/ColognePhoneticTest.java
@@ -194,6 +194,7 @@ public class ColognePhoneticTest extends 
StringEncoderAbstractTest<ColognePhonet
             {"Celsius", "8588"},
             {"Ace", "08"},
             {"shch", "84"}, // CODEC-254
+            {"xch", "484"}, // CODEC-255
             {"heithabu", "021"}};
         this.checkEncodings(data);
     }

Reply via email to