This is an automated email from the ASF dual-hosted git repository. ndipiazza pushed a commit to branch TIKA-4272-docker in repository https://gitbox.apache.org/repos/asf/tika.git
commit 843a26b970ac86bfb92e5cf2787fa9bc0b17bbc6 Merge: 9fbaadfad 9543d034c Author: Nicholas DiPiazza <ndipia...@apache.org> AuthorDate: Sun Aug 18 10:08:59 2024 -0500 Merge branch 'main' of github.com:apache/tika into TIKA-4272-docker .../src/main/java/org/apache/tika/gui/TikaGUI.java | 9 +- .../test/java/org/apache/tika/bundle/BundleIT.java | 2 +- .../main/java/org/apache/tika/config/Param.java | 7 +- .../java/org/apache/tika/detect/MagicDetector.java | 3 +- .../AbstractEmbeddedDocumentBytesHandler.java | 18 ++- .../ParsingEmbeddedDocumentExtractor.java | 4 +- .../java/org/apache/tika/io/TikaInputStream.java | 2 +- .../java/org/apache/tika/metadata/Metadata.java | 19 ++- .../main/java/org/apache/tika/mime/MagicMatch.java | 2 +- .../main/java/org/apache/tika/mime/MimeTypes.java | 4 +- .../tika/parser/external/ExternalParser.java | 4 +- .../external/ExternalParsersConfigReader.java | 2 +- .../java/org/apache/tika/pipes/PipesClient.java | 4 +- .../org/apache/tika/pipes/PipesConfigBase.java | 3 +- .../java/org/apache/tika/pipes/PipesServer.java | 8 +- .../extractor/EmbeddedDocumentBytesConfig.java | 64 +++++----- .../EmittingEmbeddedDocumentBytesHandler.java | 4 +- .../pipesiterator/fs/FileSystemPipesIterator.java | 2 +- .../org/apache/tika/parser/mock/MockParser.java | 2 +- .../org/apache/tika/pipes/PipesServerTest.java | 6 +- .../org/apache/tika/utils/ConcurrentUtilsTest.java | 20 +-- .../org/apache/tika/eval/app/reports/Report.java | 6 +- .../eval/core/textstats/TextProfileSignature.java | 2 +- .../java/org/apache/tika/example/MyFirstTika.java | 7 +- .../java/org/apache/tika/example/RecentFiles.java | 2 +- .../org/apache/tika/example/TIAParsingExample.java | 4 +- .../apache/tika/example/TextStatsFromTikaEval.java | 1 - .../org/apache/tika/fuzzing/cli/FuzzingCLI.java | 6 +- tika-grpc/pom.xml | 6 +- .../tika/langdetect/tika/LanguageIdentifier.java | 13 +- .../langdetect/tika/LanguageProfilerBuilder.java | 11 +- tika-parent/pom.xml | 31 ++--- .../geoinfo/GeographicInformationParser.java | 10 +- .../apache/tika/parser/isatab/ISArchiveParser.java | 10 +- .../tika/parser/sqlite3/SQLite3ParserTest.java | 3 +- .../tika-parsers-ml/tika-age-recogniser/pom.xml | 138 ++++++++++++++++++++- .../tika/dl/imagerec/DL4JInceptionV3Net.java | 17 +-- .../tika/parser/pot/PooledTimeSeriesParser.java | 5 +- .../apache/tika/parser/ctakes/CTAKESConfig.java | 16 ++- .../geo/topic/gazetteer/GeoGazetteerClient.java | 4 +- .../apache/tika/parser/journal/TEIDOMParser.java | 22 +++- .../tika/parser/ner/grobid/GrobidNERecogniser.java | 137 ++++++++++---------- .../tika/parser/iwork/AutoPageNumberUtils.java | 6 +- .../parser/iwork/iwana/IWork13PackageParser.java | 2 +- .../parser/iwork/iwana/IWork18PackageParser.java | 2 +- .../tika/detect/apple/IWorkDetectorTest.java | 4 +- .../java/org/apache/tika/parser/mp3/Mp3Parser.java | 2 +- .../org/apache/tika/parser/video/FLVParser.java | 3 +- .../tika/parser/dwg/DWGReadFormatRemover.java | 2 +- .../apache/tika/parser/code/SourceCodeParser.java | 14 +-- .../org/apache/tika/parser/html/HtmlHandler.java | 8 +- .../org/apache/tika/parser/html/JSoupParser.java | 2 +- .../StandardHtmlEncodingDetector.java | 2 +- .../tika/parser/image/ImageMetadataExtractor.java | 2 +- .../org/apache/tika/parser/image/PSDParser.java | 4 +- .../apache/tika/parser/jdbc/JDBCTableReader.java | 2 +- .../tika/parser/mail/MailContentHandler.java | 2 +- .../org/apache/tika/parser/mbox/MboxParser.java | 4 +- .../detect/microsoft/ooxml/OPCPackageDetector.java | 5 +- .../microsoft/MSEmbeddedStreamTranslator.java | 2 +- .../tika/parser/microsoft/JackcessExtractor.java | 2 +- .../apache/tika/parser/microsoft/OfficeParser.java | 4 +- .../tika/parser/microsoft/OutlookExtractor.java | 6 +- .../tika/parser/microsoft/chm/ChmItsfHeader.java | 5 +- .../tika/parser/microsoft/chm/ChmItspHeader.java | 6 +- .../tika/parser/microsoft/chm/ChmParser.java | 2 +- .../tika/parser/microsoft/libpst/EmailVisitor.java | 3 +- .../apache/tika/parser/microsoft/onenote/GUID.java | 2 +- .../microsoft/onenote/OneNoteTreeWalker.java | 8 +- .../parser/microsoft/onenote/PropertyValue.java | 2 +- .../streamobj/RevisionManifestDataElementData.java | 3 +- .../streamobj/StorageManifestDataElementData.java | 3 +- .../streamobj/StreamObjectHeaderEnd16bit.java | 2 +- .../onenote/fsshttpb/streamobj/basic/CellID.java | 5 +- .../fsshttpb/streamobj/basic/CellIDArray.java | 3 +- .../fsshttpb/streamobj/basic/ExGUIDArray.java | 3 +- .../space/ObjectSpaceObjectStreamOfContextIDs.java | 3 +- .../space/ObjectSpaceObjectStreamOfOIDs.java | 3 +- .../space/ObjectSpaceObjectStreamOfOSIDs.java | 3 +- .../ooxml/xwpf/ml2006/BinaryDataHandler.java | 4 +- .../parser/microsoft/pst/PSTMailItemParser.java | 2 +- .../parser/microsoft/rtf/RTFObjDataParser.java | 17 +-- .../parser/microsoft/ooxml/OOXMLParserTest.java | 15 +-- .../parser/microsoft/ooxml/TruncatedOOXMLTest.java | 6 +- .../tika/parser/microsoft/rtf/RTFParserTest.java | 6 +- .../java/org/apache/tika/parser/dbf/DBFCell.java | 3 +- .../apache/tika/parser/dbf/DBFColumnHeader.java | 3 +- .../org/apache/tika/parser/epub/EpubParser.java | 10 +- .../org/apache/tika/parser/epub/OPFParser.java | 2 +- .../apache/tika/parser/ocr/ImagePreprocessor.java | 2 +- .../apache/tika/parser/ocr/TesseractOCRParser.java | 2 +- .../apache/tika/parser/pdf/AbstractPDF2XHTML.java | 6 +- .../java/org/apache/tika/parser/pdf/PDF2XHTML.java | 7 +- .../java/org/apache/tika/parser/pdf/PDFParser.java | 4 +- .../apache/tika/parser/pdf/PDFParserConfig.java | 2 +- .../renderer/pdf/pdfbox/NoTextPDFRenderer.java | 2 +- .../renderer/pdf/pdfbox/TextOnlyPDFRenderer.java | 2 +- .../pdf/pdfbox/VectorGraphicsOnlyPDFRenderer.java | 2 +- .../detect/gzip/GZipSpecializationDetector.java | 10 +- .../apache/tika/parser/pkg/CompressorParser.java | 4 +- .../org/apache/tika/parser/pkg/PackageParser.java | 15 ++- .../org/apache/tika/parser/pkg/ZipParserTest.java | 3 +- .../org/apache/tika/parser/txt/CharsetMatch.java | 34 +++-- .../tika/parser/txt/Icu4jEncodingDetector.java | 3 +- .../apache/tika/parser/txt/BOMDetectorTest.java | 6 +- .../org/apache/tika/parser/wacz/WACZParser.java | 6 +- .../apache/tika/parser/xml/FictionBookParser.java | 2 +- .../apache/tika/parser/xmp/JempboxExtractor.java | 7 +- .../detect/zip/DefaultZipContainerDetector.java | 19 +-- .../tika/detect/zip/StreamingDetectContext.java | 2 +- .../org/apache/tika/zip/utils/ZipSalvager.java | 6 +- .../apache/tika/config/TikaDetectorConfigTest.java | 2 +- .../java/org/apache/tika/mime/OneOffMimeTest.java | 2 +- .../tika/parser/ocr/TesseractOCRParserTest.java | 2 +- .../pkg/CompositeZipContainerDetectorTest.java | 6 +- .../apache/tika/parser/pkg/PackageParserTest.java | 7 ++ .../testZipEntryNameCharsetShiftSJIS.zip | Bin 0 -> 330 bytes .../tika/pipes/emitter/azblob/AZBlobEmitter.java | 17 +-- .../tika/pipes/emitter/jdbc/JDBCEmitter.java | 4 - .../pipes/emitter/opensearch/OpenSearchClient.java | 2 +- .../tika/pipes/fetcher/azblob/AZBlobFetcher.java | 6 +- .../tika/pipes/fetcher/http/HttpFetcher.java | 70 +++++++++-- .../pipes/fetcher/http/config/HttpHeaders.java | 4 + .../tika/pipes/fetcher/http/HttpFetcherTest.java | 9 ++ .../src/test/resources/tika-config-http.xml | 5 +- .../tika-fetcher-microsoft-graph/pom.xml | 13 +- .../pipes/pipesiterator/csv/CSVPipesIterator.java | 2 +- .../pipes/reporters/jdbc/JDBCPipesReporter.java | 4 +- .../reporters/opensearch/OpenSearchClient.java | 2 +- .../apache/tika/serialization/JsonMetadata.java | 9 +- .../tika/serialization/JsonMetadataList.java | 4 +- .../serialization/ParseContextDeserializer.java | 78 ++++++++++++ .../tika/serialization/ParseContextSerializer.java | 5 +- .../tika/serialization/TikaJsonDeserializer.java | 35 ++++-- .../tika/serialization/TikaJsonSerializer.java | 30 ++--- .../serialization/pipes/JsonFetchEmitTuple.java | 37 +----- .../TestParseContextSerialization.java | 67 ++++++++++ .../pipes/JsonFetchEmitTupleTest.java | 1 - .../apache/tika/server/client/TikaClientCLI.java | 2 +- tika-server/tika-server-core/pom.xml | 2 +- .../tika/server/core/FetcherStreamFactory.java | 2 +- .../apache/tika/server/core/TikaServerConfig.java | 2 +- .../tika/server/core/resource/TikaMimeTypes.java | 2 +- .../tika/server/core/resource/TikaWelcome.java | 2 +- .../server/core/resource/UnpackerResource.java | 4 +- .../org/apache/tika/server/core/CXFTestBase.java | 71 ++++++----- .../apache/tika/server/standard/TikaPipesTest.java | 54 ++++++-- 147 files changed, 985 insertions(+), 569 deletions(-) diff --cc tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml index 66951cf2c,8ea120051..9d5d28bf0 --- a/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-microsoft-graph/pom.xml @@@ -56,6 -58,16 +58,11 @@@ </exclusion> </exclusions> </dependency> + <dependency> + <groupId>com.azure</groupId> + <artifactId>azure-json</artifactId> + <version>${azure.json.version}</version> + </dependency> - <dependency> - <groupId>${project.groupId}</groupId> - <artifactId>tika-core</artifactId> - <version>${project.version}</version> - </dependency> <dependency> <groupId>com.microsoft.graph</groupId> <artifactId>microsoft-graph</artifactId>