This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 90d854faa TIKA-4349 -- allow uppercasing of hex encoded digests in the 
CommonsDigester (#2044)
90d854faa is described below

commit 90d854faa2a711e0c467ec851399a0c928d037de
Author: Tim Allison <[email protected]>
AuthorDate: Wed Nov 13 13:57:36 2024 -0500

    TIKA-4349 -- allow uppercasing of hex encoded digests in the 
CommonsDigester (#2044)
    
    * TIKA-4349 -- allow uppercasing of hex encoded digests in the 
CommonsDigester
---
 .../test/java/org/apache/tika/cli/TikaCLITest.java |  8 +--
 .../tika/parser/digestutils/CommonsDigester.java   | 62 ++++++++++++++++------
 .../apache/tika/parser/DigestingParserTest.java    |  4 +-
 3 files changed, 51 insertions(+), 23 deletions(-)

diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java 
b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
index 0e55ac7db..ec6e7df1a 100644
--- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
+++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
@@ -139,7 +139,7 @@ public class TikaCLITest {
         String content = getParamOutContent("-x", resourcePrefix + 
"alice.cli.test");
         assertTrue(content.contains("?xml version=\"1.0\" 
encoding=\"UTF-8\"?"));
 
-        content = getParamOutContent("-x", "--digest=SHA256", resourcePrefix + 
"alice.cli.test");
+        content = getParamOutContent("-x", "--digest=sha256", resourcePrefix + 
"alice.cli.test");
         assertTrue(content.contains("<meta name=\"X-TIKA:digest:SHA256\" 
content=\"e90779adbac09c4ee"));
 
     }
@@ -155,7 +155,7 @@ public class TikaCLITest {
         assertTrue(content.contains("html 
xmlns=\"http://www.w3.org/1999/xhtml";));
         assertTrue(content.contains("<title></title>"), "Expanded 
<title></title> element should be present");
 
-        content = getParamOutContent("-h", "--digest=SHA384", resourcePrefix + 
"alice.cli.test");
+        content = getParamOutContent("-h", "--digest=sha384", resourcePrefix + 
"alice.cli.test");
         assertTrue(content.contains("<meta name=\"X-TIKA:digest:SHA384\" 
content=\"c69ea023f5da95a026"));
     }
 
@@ -207,7 +207,7 @@ public class TikaCLITest {
 
         content = getParamOutContent("-m", "--digest=SHA512", resourcePrefix + 
"alice.cli.test");
         assertTrue(content.contains("text/plain"));
-        assertTrue(content.contains("X-TIKA:digest:SHA512: 
dd459d99bc19ff78fd31fbae46e0"));
+        assertTrue(content.contains("X-TIKA:digest:SHA512: 
DD459D99BC19FF78FD31FBAE46E0"));
     }
 
     /**
@@ -459,7 +459,7 @@ public class TikaCLITest {
 
     @Test
     public void testDigestInJson() throws Exception {
-        String content = getParamOutContent("-J", "-r", "-t", "--digest=MD5", 
resourcePrefix + "test_recursive_embedded.docx");
+        String content = getParamOutContent("-J", "-r", "-t", "--digest=md5", 
resourcePrefix + "test_recursive_embedded.docx");
         assertTrue(content.contains("\"X-TIKA:digest:MD5\" : 
\"59f626e09a8c16ab6dbc2800c685f772\","));
         assertTrue(content.contains("\"X-TIKA:digest:MD5\" : 
\"f9627095ef86c482e61d99f0cc1cf87d\""));
     }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/CommonsDigester.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/CommonsDigester.java
index 7da68e7af..44617ab34 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/CommonsDigester.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/CommonsDigester.java
@@ -43,7 +43,8 @@ public class CommonsDigester extends CompositeDigester {
     /**
      * Include a string representing the comma-separated algorithms to run: 
e.g. "md5,sha1".
      * If you want base 32 encoding instead of hexadecimal, add ":32" to  the 
algorithm, e.g.
-     * "md5,sha1:32"
+     * "md5,sha1:32". If you want uppercase digests for the hexadecimal 
encoder,
+     * use uppercase in the algorithm name, e.g. "MD5".
      * <p/>
      * Will throw an IllegalArgumentException if an algorithm isn't supported
      *
@@ -70,7 +71,7 @@ public class CommonsDigester extends CompositeDigester {
         for (DigestAlgorithm algorithm : algorithms) {
             digesters[i++] =
                     new InputStreamDigester(markLimit, 
algorithm.getJavaName(), algorithm.name(),
-                            new HexEncoder());
+                            new HexEncoder(false));
         }
         return digesters;
     }
@@ -129,20 +130,7 @@ public class CommonsDigester extends CompositeDigester {
         int i = 0;
         for (String digest : digests) {
             String[] parts = digest.split(":");
-            DigestingParser.Encoder encoder = null;
-            if (parts.length > 1) {
-                if (parts[1].equals("16")) {
-                    encoder = new HexEncoder();
-                } else if (parts[1].equals("32")) {
-                    encoder = new Base32Encoder();
-                } else if (parts[1].equals("64")) {
-                    encoder = new Base64Encoder();
-                } else {
-                    throw new IllegalArgumentException("Value must be '16', 
'32' or '64'");
-                }
-            } else {
-                encoder = new HexEncoder();
-            }
+            DigestingParser.Encoder encoder = getEncoder(parts);
             DigestAlgorithm digestAlgorithm = getDigestAlgorithm(parts[0]);
             digesters[i++] = new InputStreamDigester(markLimit, 
digestAlgorithm.getJavaName(),
                     digestAlgorithm.name(), encoder);
@@ -150,6 +138,25 @@ public class CommonsDigester extends CompositeDigester {
         return digesters;
     }
 
+    private static DigestingParser.Encoder getEncoder(String[] parts) {
+        DigestingParser.Encoder encoder = null;
+        boolean uc = parts[0].matches("[A-Z0-9]{1,20}");
+        if (parts.length > 1) {
+            if (parts[1].equals("16")) {
+                encoder = new HexEncoder(uc);
+            } else if (parts[1].equals("32")) {
+                encoder = new Base32Encoder();
+            } else if (parts[1].equals("64")) {
+                encoder = new Base64Encoder();
+            } else {
+                throw new IllegalArgumentException("Value must be '16', '32' 
or '64'");
+            }
+        } else {
+            encoder = new HexEncoder(uc);
+        }
+        return encoder;
+    }
+
     public enum DigestAlgorithm {
         //those currently available in commons.digest
         MD2("MD2"), MD5("MD5"), SHA1("SHA-1"), SHA256("SHA-256"), 
SHA384("SHA-384"),
@@ -171,10 +178,31 @@ public class CommonsDigester extends CompositeDigester {
         }
     }
 
+    private static abstract class CasingEncoderBase implements 
DigestingParser.Encoder {
+        private final boolean upperCase;
+        private CasingEncoderBase(boolean upperCase) {
+            this.upperCase = upperCase;
+        }
+
+    }
     private static class HexEncoder implements DigestingParser.Encoder {
+        private final boolean upperCase;
+        private HexEncoder(boolean upperCase) {
+            this.upperCase = upperCase;
+        }
+
         @Override
         public String encode(byte[] bytes) {
-            return Hex.encodeHexString(bytes);
+            return toCase(Hex.encodeHexString(bytes));
+        }
+
+        String toCase(String digest) {
+            if (upperCase) {
+                return digest.toUpperCase(Locale.ROOT);
+            } else {
+                //this is redundant, but useful for future proofing?
+                return digest.toLowerCase(Locale.ROOT);
+            }
         }
     }
 
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/DigestingParserTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/DigestingParserTest.java
index 7ed78e055..44922ecf7 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/DigestingParserTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/DigestingParserTest.java
@@ -90,7 +90,7 @@ public class DigestingParserTest extends TikaTest {
 
 
         expected.put(CommonsDigester.DigestAlgorithm.MD2, 
"d768c8e27b0b52c6eaabfaa7122d1d4f");
-        expected.put(CommonsDigester.DigestAlgorithm.MD5, 
"59f626e09a8c16ab6dbc2800c685f772");
+        expected.put(CommonsDigester.DigestAlgorithm.MD5, 
"59F626E09A8C16AB6DBC2800C685F772");
         expected.put(CommonsDigester.DigestAlgorithm.SHA1, 
"PIPQAHIWHLEQ3DVFJQCQ7L22HADZPCFG");
         expected.put(CommonsDigester.DigestAlgorithm.SHA256,
                 "c4b7fab030a8b6a9d6691f6699ac8e6f" + 
"82bc53764a0f1430d134ae3b70c32654");
@@ -105,7 +105,7 @@ public class DigestingParserTest extends TikaTest {
         Metadata m = new Metadata();
         XMLResult xml = getXML("test_recursive_embedded.docx",
                 new DigestingParser(AUTO_DETECT_PARSER,
-                        new CommonsDigester(UNLIMITED, 
"md5,sha256,sha384,sha512,sha1:32"), false)
+                        new CommonsDigester(UNLIMITED, 
"MD5,sha256,sha384,sha512,sha1:32"), false)
                 , m);
         for (CommonsDigester.DigestAlgorithm algo : new 
CommonsDigester.DigestAlgorithm[]{
                 CommonsDigester.DigestAlgorithm.MD5, 
CommonsDigester.DigestAlgorithm.SHA1,

Reply via email to