nddipiazza commented on code in PR #2655: URL: https://github.com/apache/tika/pull/2655#discussion_r2874737816
########## tika-e2e-tests/tika-grpc/src/test/java/org/apache/tika/pipes/ignite/IgniteConfigStoreTest.java: ########## @@ -0,0 +1,622 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.pipes.ignite; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.time.Duration; +import java.time.temporal.ChronoUnit; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.stream.Stream; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; + +import com.fasterxml.jackson.databind.ObjectMapper; +import io.grpc.ManagedChannel; +import io.grpc.ManagedChannelBuilder; +import io.grpc.stub.StreamObserver; +import lombok.extern.slf4j.Slf4j; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.condition.DisabledOnOs; +import org.junit.jupiter.api.condition.OS; +import org.testcontainers.containers.DockerComposeContainer; +import org.testcontainers.containers.output.Slf4jLogConsumer; +import org.testcontainers.containers.wait.strategy.Wait; +import org.testcontainers.junit.jupiter.Testcontainers; + +import org.apache.tika.FetchAndParseReply; +import org.apache.tika.FetchAndParseRequest; +import org.apache.tika.SaveFetcherReply; +import org.apache.tika.SaveFetcherRequest; +import org.apache.tika.TikaGrpc; +import org.apache.tika.pipes.fetcher.fs.FileSystemFetcherConfig; + +/** + * End-to-end test for Ignite ConfigStore. + * Tests that fetchers saved via gRPC are persisted in Ignite + * and available in the forked PipesServer process. + */ +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +@Testcontainers +@Slf4j +@Tag("E2ETest") +@DisabledOnOs(value = OS.WINDOWS, disabledReason = "Maven not on PATH and Docker/Testcontainers not supported on Windows CI") +class IgniteConfigStoreTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final int MAX_STARTUP_TIMEOUT = 120; + private static final File TEST_FOLDER = new File("target", "govdocs1"); + private static final int GOV_DOCS_FROM_IDX = Integer.parseInt(System.getProperty("govdocs1.fromIndex", "1")); + private static final int GOV_DOCS_TO_IDX = Integer.parseInt(System.getProperty("govdocs1.toIndex", "1")); + private static final String DIGITAL_CORPORA_ZIP_FILES_URL = "https://corp.digitalcorpora.org/corpora/files/govdocs1/zipfiles"; + private static final boolean USE_LOCAL_SERVER = Boolean.parseBoolean(System.getProperty("tika.e2e.useLocalServer", "false")); + private static final int GRPC_PORT = Integer.parseInt(System.getProperty("tika.e2e.grpcPort", "50052")); + + private static DockerComposeContainer<?> igniteComposeContainer; + private static Process localGrpcProcess; + + @BeforeAll + static void setupIgnite() throws Exception { + // Clean up any orphaned processes from previous runs + if (USE_LOCAL_SERVER) { + log.info("Cleaning up any orphaned processes from previous runs"); + try { + killProcessOnPort(GRPC_PORT); + killProcessOnPort(3344); + killProcessOnPort(10800); + } catch (Exception e) { + log.debug("No orphaned processes to clean up"); + } + } + + // Load govdocs1 if not already loaded + if (!TEST_FOLDER.exists() || TEST_FOLDER.listFiles().length == 0) { + downloadAndUnzipGovdocs1(GOV_DOCS_FROM_IDX, GOV_DOCS_TO_IDX); + } + + if (USE_LOCAL_SERVER) { + startLocalGrpcServer(); + } else { + startDockerGrpcServer(); + } + } + + private static void startLocalGrpcServer() throws Exception { + log.info("Starting local tika-grpc server using Maven"); + + // Find the tika root directory - it should contain both tika-grpc and tika-e2e-tests + Path currentDir = Path.of("").toAbsolutePath(); + Path tikaRootDir = currentDir; + + // Navigate up to find the directory that contains both tika-grpc and tika-e2e-tests + while (tikaRootDir != null && + !(Files.exists(tikaRootDir.resolve("tika-grpc")) && + Files.exists(tikaRootDir.resolve("tika-e2e-tests")))) { + tikaRootDir = tikaRootDir.getParent(); + } + + if (tikaRootDir == null) { + throw new IllegalStateException("Cannot find tika root directory. " + + "Current dir: " + currentDir + ". " + + "Please run from within the tika project."); + } + + Path tikaGrpcDir = tikaRootDir.resolve("tika-grpc"); + if (!Files.exists(tikaGrpcDir)) { + throw new IllegalStateException("Cannot find tika-grpc directory at: " + tikaGrpcDir); + } + + // Use different config for local vs Docker + String configFileName = "tika-config-ignite-local.json"; + Path configFile = Path.of("src/test/resources/" + configFileName).toAbsolutePath(); + + if (!Files.exists(configFile)) { + throw new IllegalStateException("Config file not found: " + configFile); + } + + log.info("Tika root: {}", tikaRootDir); + log.info("Using tika-grpc from: {}", tikaGrpcDir); + log.info("Using config file: {}", configFile); + + // Use mvn exec:exec to run as external process (not exec:java which breaks ServiceLoader) + String javaHome = System.getProperty("java.home"); + String javaCmd = javaHome + "/bin/java"; + + ProcessBuilder pb = new ProcessBuilder( + "mvn", + "exec:exec", + "-Dexec.executable=" + javaCmd, + "-Dexec.args=" + + "--add-opens=java.base/java.lang=ALL-UNNAMED " + + "--add-opens=java.base/java.lang.invoke=ALL-UNNAMED " + + "--add-opens=java.base/java.lang.reflect=ALL-UNNAMED " + Review Comment: test -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
