This is an automated email from the ASF dual-hosted git repository. tilman pushed a commit to branch branch_2x in repository https://gitbox.apache.org/repos/asf/tika.git
commit ac123173d434dc0ea2261a5aaf0adc8331a407d9 Author: Tilman Hausherr <til...@apache.org> AuthorDate: Sat Aug 10 19:40:11 2024 +0200 TIKA-4290: replace deprecated --- .../detect/gzip/GZipSpecializationDetector.java | 8 +++-- .../org/apache/tika/parser/txt/CharsetMatch.java | 34 ++++++++++++++-------- .../detect/zip/DefaultZipContainerDetector.java | 11 +++---- 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/main/java/org/apache/tika/detect/gzip/GZipSpecializationDetector.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/main/java/org/apache/tika/detect/gzip/GZipSpecializationDetector.java index 8618df45e..a5cedbffe 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/main/java/org/apache/tika/detect/gzip/GZipSpecializationDetector.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/main/java/org/apache/tika/detect/gzip/GZipSpecializationDetector.java @@ -22,7 +22,7 @@ import java.nio.charset.StandardCharsets; import java.util.zip.GZIPInputStream; import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; -import org.apache.commons.compress.utils.IOUtils; +import org.apache.commons.io.IOUtils; import org.apache.commons.io.input.UnsynchronizedByteArrayInputStream; import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream; @@ -46,7 +46,9 @@ public class GZipSpecializationDetector implements Detector { input.mark(2); byte[] firstTwo = new byte[2]; try { - IOUtils.readFully(input, firstTwo); + // do not change this to commons-io IOUtils.readFully because + // org.apache.tika.parser.AutoDetectParserConfigTest tests will fail + org.apache.commons.compress.utils.IOUtils.readFully(input, firstTwo); } finally { input.reset(); } @@ -62,7 +64,7 @@ public class GZipSpecializationDetector implements Detector { int buffSize = 1024; UnsynchronizedByteArrayOutputStream gzippedBytes = UnsynchronizedByteArrayOutputStream.builder().get(); try { - IOUtils.copyRange(input, buffSize, gzippedBytes); + IOUtils.copyLarge(input, gzippedBytes, 0, buffSize); } catch (IOException e) { //swallow } finally { diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-text-module/src/main/java/org/apache/tika/parser/txt/CharsetMatch.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-text-module/src/main/java/org/apache/tika/parser/txt/CharsetMatch.java index 2bab9037f..beb56f394 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-text-module/src/main/java/org/apache/tika/parser/txt/CharsetMatch.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-text-module/src/main/java/org/apache/tika/parser/txt/CharsetMatch.java @@ -99,7 +99,12 @@ public class CharsetMatch implements Comparable<CharsetMatch> { InputStream inputStream = fInputStream; if (inputStream == null) { - inputStream = new UnsynchronizedByteArrayInputStream(fRawInput, 0, fRawLength); + try { + inputStream = UnsynchronizedByteArrayInputStream.builder().setByteArray(fRawInput).setLength(fRawLength).get(); + } + catch (IOException ex) { + return null; + } } try { @@ -115,9 +120,10 @@ public class CharsetMatch implements Comparable<CharsetMatch> { * to the original byte data supplied to the Charset detect operation. * * @return a String created from the converted input data. + * @throws IOException * @stable ICU 3.4 */ - public String getString() throws java.io.IOException { + public String getString() throws IOException { return getString(-1); } @@ -133,24 +139,24 @@ public class CharsetMatch implements Comparable<CharsetMatch> { * source of the data is an input stream, or -1 for * unlimited length. * @return a String created from the converted input data. + * @throws IOException * @stable ICU 3.4 */ - public String getString(int maxLength) throws java.io.IOException { + public String getString(int maxLength) throws IOException { String result = null; if (fInputStream != null) { StringBuilder sb = new StringBuilder(); char[] buffer = new char[1024]; - Reader reader = getReader(); - int max = maxLength < 0 ? Integer.MAX_VALUE : maxLength; - int bytesRead = 0; - - while ((bytesRead = reader.read(buffer, 0, Math.min(max, 1024))) > 0) { - sb.append(buffer, 0, bytesRead); - max -= bytesRead; + try (Reader reader = getReader()) { + int max = maxLength < 0 ? Integer.MAX_VALUE : maxLength; + int bytesRead; + + while ((bytesRead = reader.read(buffer, 0, Math.min(max, 1024))) > 0) { + sb.append(buffer, 0, bytesRead); + max -= bytesRead; + } } - reader.close(); - return sb.toString(); } else { String name = getNormalizedName(); @@ -235,6 +241,7 @@ public class CharsetMatch implements Comparable<CharsetMatch> { * @throws ClassCastException if the argument is not a CharsetMatch. * @stable ICU 4.4 */ + @Override public int compareTo(CharsetMatch other) { int compareResult = 0; if (this.fConfidence > other.fConfidence) { @@ -251,6 +258,7 @@ public class CharsetMatch implements Comparable<CharsetMatch> { * @param o the CharsetMatch object to compare against * @return true if equal */ + @Override public boolean equals(Object o) { if (o instanceof CharsetMatch) { CharsetMatch that = (CharsetMatch) o; @@ -265,11 +273,13 @@ public class CharsetMatch implements Comparable<CharsetMatch> { * * @return the hashCode */ + @Override public int hashCode() { return fConfidence; } // gave us a byte array. + @Override public String toString() { String s = "Match of " + fCharsetName; if (getLanguage() != null) { diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/DefaultZipContainerDetector.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/DefaultZipContainerDetector.java index 1dd10041d..824e03917 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/DefaultZipContainerDetector.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-zip-commons/src/main/java/org/apache/tika/detect/zip/DefaultZipContainerDetector.java @@ -121,10 +121,10 @@ public class DefaultZipContainerDetector implements Detector { return TIFF; } try { - String name = ArchiveStreamFactory.detect(new UnsynchronizedByteArrayInputStream(prefix, - 0, length)); + String name = ArchiveStreamFactory.detect( + UnsynchronizedByteArrayInputStream.builder().setByteArray(prefix).setLength(length).get()); return PackageConstants.getMediaType(name); - } catch (ArchiveException e) { + } catch (IOException | ArchiveException e) { return MediaType.OCTET_STREAM; } } @@ -132,9 +132,10 @@ public class DefaultZipContainerDetector implements Detector { static MediaType detectCompressorFormat(byte[] prefix, int length) { try { String type = - CompressorStreamFactory.detect(new UnsynchronizedByteArrayInputStream(prefix, 0, length)); + CompressorStreamFactory.detect( + UnsynchronizedByteArrayInputStream.builder().setByteArray(prefix).setLength(length).get()); return CompressorConstants.getMediaType(type); - } catch (CompressorException e) { + } catch (IOException | CompressorException e) { return MediaType.OCTET_STREAM; } }