This is an automated email from the ASF dual-hosted git repository.

ndipiazza pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 84166bafb9 TIKA-4679: Add HTTP/2 support to tika-server via Jetty 
http2-server (#2672)
84166bafb9 is described below

commit 84166bafb9bffb8ff166ee5f3394de6456555e3d
Author: Nicholas DiPiazza <[email protected]>
AuthorDate: Wed Mar 25 15:43:59 2026 +0000

    TIKA-4679: Add HTTP/2 support to tika-server via Jetty http2-server (#2672)
    
    * Adding jetty http2 dependency allows cxf to support http2 requests.
    
    * TIKA-4679: Add e2e test module for HTTP/2 tika-server
    
    - Add tika-e2e-tests/tika-server module with TikaServerHttp2Test
    - Test starts the real fat-jar and verifies HTTP/2 (h2c) responses via
      Java HttpClient configured with Version.HTTP_2
    - Wire module into tika-e2e-tests/pom.xml modules list
    - Module is skipped by default; enable with -Pe2e profile
    
    Co-authored-by: Copilot <[email protected]>
    
    * TIKA-4679: Fix e2e test - skip when fat-jar absent, use /status 
health-check
    
    - Add Assumptions.assumeTrue(jar.exists()) so tests skip gracefully when
      tika-server-standard fat-jar hasn't been built (CI without prior install)
    - Change startup health-check from / to /status (more reliable 200 response)
    - Increase startup timeout to 90s for slower CI environments
    
    Co-authored-by: Copilot <[email protected]>
    
    * TIKA-4679: Fix e2e HTTP/2 test server startup and health-check
    
    - Use tika-server-standard assembly zip (unpacked via dependency plugin)
      instead of thin jar, so the required lib/ dependencies are available
    - Health-check endpoint changed from /status to / (root always returns 200;
      /status requires explicit endpoint config to be enabled)
    - Pre-negotiate h2c before PUT /tika parse test: h2c Upgrade requires a
      no-body request first; GET / establishes the HTTP/2 connection so the
      subsequent PUT reuses it correctly
    - Drop --noFork flag (TikaServerCli does not recognize it; server runs
      its own fork management independently)
    
    Co-authored-by: Copilot <[email protected]>
    
    * TIKA-4679: Address Copilot review comments on TikaServerHttp2Test
    
    - Remove unused moduleDir variable; initialize repoRoot directly
    - stopServer() now uses waitFor(5s) + destroyForcibly() + waitFor(30s)
      to avoid indefinite blocking if SIGTERM doesn't terminate the process
    
    Co-authored-by: Copilot <[email protected]>
    
    ---------
    
    Co-authored-by: Lawrence Moorehead <[email protected]>
    Co-authored-by: Copilot <[email protected]>
---
 tika-e2e-tests/pom.xml                             |   1 +
 tika-e2e-tests/tika-server/pom.xml                 | 139 +++++++++++++
 .../tika/server/e2e/TikaServerHttp2Test.java       | 217 +++++++++++++++++++++
 tika-parent/pom.xml                                |   5 +
 tika-server/tika-server-core/pom.xml               |   6 +-
 .../server/core/TikaServerIntegrationTest.java     |  22 +++
 6 files changed, 389 insertions(+), 1 deletion(-)

diff --git a/tika-e2e-tests/pom.xml b/tika-e2e-tests/pom.xml
index 35f152d1b5..1880505979 100644
--- a/tika-e2e-tests/pom.xml
+++ b/tika-e2e-tests/pom.xml
@@ -59,6 +59,7 @@
 
     <modules>
         <module>tika-grpc</module>
+        <module>tika-server</module>
     </modules>
 
     <dependencyManagement>
diff --git a/tika-e2e-tests/tika-server/pom.xml 
b/tika-e2e-tests/tika-server/pom.xml
new file mode 100644
index 0000000000..9689026bfd
--- /dev/null
+++ b/tika-e2e-tests/tika-server/pom.xml
@@ -0,0 +1,139 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
https://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>org.apache.tika</groupId>
+        <artifactId>tika-e2e-tests</artifactId>
+        <version>${revision}</version>
+        <relativePath>../pom.xml</relativePath>
+    </parent>
+
+    <artifactId>tika-e2e-tests-server</artifactId>
+    <packaging>jar</packaging>
+    <name>Apache Tika E2E Tests: REST Server</name>
+    <description>End-to-end tests for tika-server-standard, including HTTP/2 
support verification</description>
+
+    <properties>
+        <!-- Path to the tika-server-standard binary assembly zip built in the 
same reactor -->
+        
<tika.server.zip>${project.basedir}/../../tika-server/tika-server-standard/target/tika-server-standard-${revision}-bin.zip</tika.server.zip>
+        <!-- Directory where the assembly is unpacked before tests run -->
+        
<tika.server.home>${project.build.directory}/tika-server-dist</tika.server.home>
+    </properties>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-api</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-core</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-slf4j2-impl</artifactId>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-surefire-plugin</artifactId>
+                <configuration>
+                    <!-- Skip by default; run with -Pe2e -->
+                    <skipTests>true</skipTests>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.rat</groupId>
+                <artifactId>apache-rat-plugin</artifactId>
+                <configuration>
+                    <inputExcludes>
+                        <inputExclude>**/README*.md</inputExclude>
+                        <inputExclude>src/test/resources/**</inputExclude>
+                    </inputExcludes>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+    <profiles>
+        <profile>
+            <id>e2e</id>
+            <build>
+                <plugins>
+                    <plugin>
+                        <groupId>org.apache.maven.plugins</groupId>
+                        <artifactId>maven-dependency-plugin</artifactId>
+                        <executions>
+                            <execution>
+                                <id>unpack-tika-server</id>
+                                <phase>process-test-resources</phase>
+                                <goals>
+                                    <goal>unpack</goal>
+                                </goals>
+                                <configuration>
+                                    <artifactItems>
+                                        <artifactItem>
+                                            <groupId>org.apache.tika</groupId>
+                                            
<artifactId>tika-server-standard</artifactId>
+                                            <version>${revision}</version>
+                                            <classifier>bin</classifier>
+                                            <type>zip</type>
+                                            <overWrite>false</overWrite>
+                                            
<outputDirectory>${tika.server.home}</outputDirectory>
+                                        </artifactItem>
+                                    </artifactItems>
+                                </configuration>
+                            </execution>
+                        </executions>
+                    </plugin>
+                    <plugin>
+                        <groupId>org.apache.maven.plugins</groupId>
+                        <artifactId>maven-surefire-plugin</artifactId>
+                        <configuration>
+                            <skipTests>false</skipTests>
+                            <systemPropertyVariables>
+                                
<tika.server.home>${tika.server.home}</tika.server.home>
+                            </systemPropertyVariables>
+                        </configuration>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+    </profiles>
+</project>
diff --git 
a/tika-e2e-tests/tika-server/src/test/java/org/apache/tika/server/e2e/TikaServerHttp2Test.java
 
b/tika-e2e-tests/tika-server/src/test/java/org/apache/tika/server/e2e/TikaServerHttp2Test.java
new file mode 100644
index 0000000000..3e642cc46b
--- /dev/null
+++ 
b/tika-e2e-tests/tika-server/src/test/java/org/apache/tika/server/e2e/TikaServerHttp2Test.java
@@ -0,0 +1,217 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server.e2e;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.net.ServerSocket;
+import java.net.URI;
+import java.net.http.HttpClient;
+import java.net.http.HttpRequest;
+import java.net.http.HttpResponse;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.time.Duration;
+import java.time.Instant;
+
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.Assumptions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * End-to-end test verifying that tika-server-standard supports HTTP/2 (h2c 
cleartext).
+ *
+ * Starts the real fat-jar, sends a request using Java's HttpClient configured 
for HTTP/2,
+ * and asserts the response was served over HTTP/2. This validates the runtime 
classpath
+ * has the Jetty http2-server jar and CXF negotiates h2c correctly.
+ *
+ * Run with: mvn test -pl tika-e2e-tests/tika-server -Pe2e
+ *
+ * Inspired by Lawrence Moorehead's original contribution (elemdisc/tika PR#1, 
TIKA-4679).
+ */
+@Tag("E2ETest")
+public class TikaServerHttp2Test {
+
+    private static final Logger log = 
LoggerFactory.getLogger(TikaServerHttp2Test.class);
+    private static final long SERVER_STARTUP_TIMEOUT_MS = 90_000;
+    /** Health-check polls root (/), which always returns 200 without 
requiring endpoint config. */
+    private static final String HEALTH_PATH = "/";
+
+    private Process serverProcess;
+    private int port;
+    private String endPoint;
+
+    @BeforeEach
+    void startServer() throws Exception {
+        port = findFreePort();
+        endPoint = "http://localhost:"; + port;
+
+        String serverHome = System.getProperty("tika.server.home");
+        if (serverHome == null) {
+            // fall back to conventional location relative to this module
+            Path repoRoot = Paths.get("").toAbsolutePath();
+            while (repoRoot != null && 
!repoRoot.resolve("tika-server").toFile().isDirectory()) {
+                repoRoot = repoRoot.getParent();
+            }
+            if (repoRoot == null) {
+                throw new IllegalStateException("Cannot locate tika root. Pass 
-Dtika.server.home=/path/to/extracted-assembly");
+            }
+            serverHome = 
repoRoot.resolve("tika-e2e-tests/tika-server/target/tika-server-dist").toAbsolutePath().toString();
+        }
+
+        Path serverJar = Paths.get(serverHome, "tika-server.jar");
+        Assumptions.assumeTrue(Files.exists(serverJar),
+                "tika-server.jar not found at " + serverJar + "; skipping 
HTTP/2 e2e test. " +
+                "Build with: mvn package -pl tika-server/tika-server-standard 
&& " +
+                "mvn test -pl tika-e2e-tests/tika-server -Pe2e");
+
+        log.info("Starting tika-server from: {}", serverJar);
+        ProcessBuilder pb = new ProcessBuilder(
+                "java", "-jar", "tika-server.jar",
+                "-p", String.valueOf(port),
+                "-h", "localhost"
+        );
+        pb.directory(Paths.get(serverHome).toFile());
+        pb.redirectErrorStream(true);
+        serverProcess = pb.start();
+
+        // Drain output in background so the process doesn't block
+        Thread drainThread = new Thread(() -> {
+            try (BufferedReader reader = new BufferedReader(
+                    new InputStreamReader(serverProcess.getInputStream(), 
UTF_8))) {
+                String line;
+                while ((line = reader.readLine()) != null) {
+                    log.info("tika-server: {}", line);
+                }
+            } catch (Exception e) {
+                log.debug("Server output stream closed", e);
+            }
+        });
+        drainThread.setDaemon(true);
+        drainThread.start();
+
+        awaitServerStartup();
+    }
+
+    @AfterEach
+    void stopServer() throws Exception {
+        if (serverProcess != null && serverProcess.isAlive()) {
+            serverProcess.destroy();
+            if (!serverProcess.waitFor(5, 
java.util.concurrent.TimeUnit.SECONDS)) {
+                serverProcess.destroyForcibly();
+                serverProcess.waitFor(30, 
java.util.concurrent.TimeUnit.SECONDS);
+            }
+        }
+    }
+
+    @Test
+    void testH2cTikaEndpoint() throws Exception {
+        HttpClient httpClient = HttpClient.newBuilder()
+                .version(HttpClient.Version.HTTP_2)
+                .build();
+        HttpRequest request = HttpRequest.newBuilder()
+                .uri(URI.create(endPoint + "/tika"))
+                .header("Accept", "text/plain")
+                .GET()
+                .build();
+
+        HttpResponse<String> response = httpClient.send(request, 
HttpResponse.BodyHandlers.ofString(UTF_8));
+
+        assertEquals(200, response.statusCode(), "Expected 200 from /tika");
+        assertEquals(HttpClient.Version.HTTP_2, response.version(),
+                "Expected HTTP/2 protocol; server may be missing http2-server 
on classpath");
+        log.info("HTTP/2 h2c verified: {} {}", response.statusCode(), 
response.version());
+    }
+
+    @Test
+    void testH2cParseEndpoint() throws Exception {
+        HttpClient httpClient = HttpClient.newBuilder()
+                .version(HttpClient.Version.HTTP_2)
+                .build();
+
+        // First: GET / to negotiate h2c upgrade, establishing an HTTP/2 
connection
+        HttpRequest warmup = HttpRequest.newBuilder()
+                .uri(URI.create(endPoint + "/"))
+                .GET()
+                .build();
+        httpClient.send(warmup, HttpResponse.BodyHandlers.discarding());
+
+        // Now PUT /tika — the existing HTTP/2 connection is reused
+        byte[] body = "Hello, HTTP/2 world!".getBytes(UTF_8);
+        HttpRequest request = HttpRequest.newBuilder()
+                .uri(URI.create(endPoint + "/tika"))
+                .header("Content-Type", "text/plain")
+                .PUT(HttpRequest.BodyPublishers.ofByteArray(body))
+                .build();
+
+        HttpResponse<String> response = httpClient.send(request, 
HttpResponse.BodyHandlers.ofString(UTF_8));
+
+        assertEquals(200, response.statusCode(), "Expected 200 from /tika");
+        assertEquals(HttpClient.Version.HTTP_2, response.version(),
+                "Expected HTTP/2 protocol on /tika endpoint");
+        log.info("HTTP/2 parse endpoint verified: {} bytes returned over {}", 
response.body().length(), response.version());
+    }
+
+    private void awaitServerStartup() throws Exception {
+        // Use HTTP/1.1 for the health-check poll so we don't depend on HTTP/2 
during startup.
+        // Both connectTimeout and request timeout are set to avoid hanging 
when Jetty has bound
+        // the port but CXF has not yet finished initializing (accepts TCP but 
doesn't respond).
+        HttpClient pollClient = HttpClient.newBuilder()
+                .version(HttpClient.Version.HTTP_1_1)
+                .connectTimeout(Duration.ofSeconds(5))
+                .build();
+
+        Instant deadline = Instant.now().plusMillis(SERVER_STARTUP_TIMEOUT_MS);
+        while (Instant.now().isBefore(deadline)) {
+            if (!serverProcess.isAlive()) {
+                throw new IllegalStateException(
+                        "tika-server process exited unexpectedly with code " + 
serverProcess.exitValue());
+            }
+            try {
+                HttpRequest pollRequest = HttpRequest.newBuilder()
+                        .uri(URI.create(endPoint + HEALTH_PATH))
+                        .timeout(Duration.ofSeconds(5))
+                        .GET()
+                        .build();
+                HttpResponse<Void> resp = pollClient.send(pollRequest, 
HttpResponse.BodyHandlers.discarding());
+                if (resp.statusCode() == 200) {
+                    log.info("tika-server ready on port {}", port);
+                    return;
+                }
+                log.debug("Server returned {} on {}; still waiting...", 
resp.statusCode(), HEALTH_PATH);
+            } catch (Exception e) {
+                log.debug("Waiting for server on port {}: {}", port, 
e.getMessage());
+            }
+            Thread.sleep(1000);
+        }
+        throw new IllegalStateException("tika-server did not start within " + 
SERVER_STARTUP_TIMEOUT_MS + " ms");
+    }
+
+    private static int findFreePort() throws Exception {
+        try (ServerSocket s = new ServerSocket(0)) {
+            return s.getLocalPort();
+        }
+    }
+}
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index 5ca149aa3b..52b7cd7ce1 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -593,6 +593,11 @@
         <artifactId>http2-common</artifactId>
         <version>${jetty.http2.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.eclipse.jetty.http2</groupId>
+        <artifactId>http2-server</artifactId>
+        <version>${jetty.http2.version}</version>
+      </dependency>
       <dependency>
         <groupId>org.jsoup</groupId>
         <artifactId>jsoup</artifactId>
diff --git a/tika-server/tika-server-core/pom.xml 
b/tika-server/tika-server-core/pom.xml
index d6e1aa125c..0d4873759e 100644
--- a/tika-server/tika-server-core/pom.xml
+++ b/tika-server/tika-server-core/pom.xml
@@ -89,6 +89,10 @@
       <groupId>org.apache.cxf</groupId>
       <artifactId>cxf-rt-transports-http-jetty</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty.http2</groupId>
+      <artifactId>http2-server</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.apache.cxf</groupId>
       <artifactId>cxf-rt-rs-security-cors</artifactId>
@@ -261,4 +265,4 @@
   <scm>
     <tag>3.0.0-rc1</tag>
   </scm>
-</project>
\ No newline at end of file
+</project>
diff --git 
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerIntegrationTest.java
 
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerIntegrationTest.java
index 943c85ddee..accbe372d9 100644
--- 
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerIntegrationTest.java
+++ 
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerIntegrationTest.java
@@ -24,7 +24,11 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
+import java.net.URI;
 import java.net.URISyntaxException;
+import java.net.http.HttpClient;
+import java.net.http.HttpRequest;
+import java.net.http.HttpResponse;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
@@ -100,6 +104,24 @@ public class TikaServerIntegrationTest extends 
IntegrationTestBase {
         testBaseline();
     }
 
+    @Test
+    public void testH2c() throws Exception {
+        startProcess(new String[]{"-config", 
getConfig("tika-config-server-basic.json")});
+        awaitServerStartup();
+        // Using HttpClient in order to check Http2 Version
+        HttpClient httpClient = HttpClient.newBuilder()
+                .version(HttpClient.Version.HTTP_2)
+                .build();
+        HttpRequest request = HttpRequest.newBuilder()
+                .uri(URI.create(endPoint + STATUS_PATH))
+                .header("Accept", "application/json")
+                .GET()
+                .build();
+        HttpResponse<String> response = httpClient.send(request, 
HttpResponse.BodyHandlers.ofString(UTF_8));
+        assertEquals(200, response.statusCode());
+        assertEquals(HttpClient.Version.HTTP_2, response.version());
+    }
+
     @Test
     public void testOOM() throws Exception {
         // With pipes-based parsing, OOM in a child process should NOT crash 
the server

Reply via email to