This is an automated email from the ASF dual-hosted git repository. markt pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tomcat.git
commit b2141dc2f15887dcbcdb802bbae397b2d32da7f8 Author: Mark Thomas <ma...@apache.org> AuthorDate: Tue Jul 22 15:39:35 2025 +0100 Fix bloom filter index for JARs in packed WARs --- .../webresources/AbstractArchiveResourceSet.java | 8 +- .../apache/catalina/webresources/JarContents.java | 96 +++++++++++++--------- .../catalina/webresources/JarWarResourceSet.java | 24 ++++++ webapps/docs/changelog.xml | 4 + 4 files changed, 89 insertions(+), 43 deletions(-) diff --git a/java/org/apache/catalina/webresources/AbstractArchiveResourceSet.java b/java/org/apache/catalina/webresources/AbstractArchiveResourceSet.java index eace3665a7..ff0ca34b33 100644 --- a/java/org/apache/catalina/webresources/AbstractArchiveResourceSet.java +++ b/java/org/apache/catalina/webresources/AbstractArchiveResourceSet.java @@ -36,12 +36,12 @@ public abstract class AbstractArchiveResourceSet extends AbstractResourceSet { private URL baseUrl; private String baseUrlString; - private JarFile archive = null; + protected JarFile archive = null; protected Map<String,JarEntry> archiveEntries = null; protected final Object archiveLock = new Object(); - private long archiveUseCount = 0; - private JarContents jarContents; - private boolean retainBloomFilterForArchives = false; + protected long archiveUseCount = 0; + protected JarContents jarContents; + protected boolean retainBloomFilterForArchives = false; protected final void setBaseUrl(URL baseUrl) { this.baseUrl = baseUrl; diff --git a/java/org/apache/catalina/webresources/JarContents.java b/java/org/apache/catalina/webresources/JarContents.java index 58e0bff993..31438bd88d 100644 --- a/java/org/apache/catalina/webresources/JarContents.java +++ b/java/org/apache/catalina/webresources/JarContents.java @@ -17,6 +17,7 @@ package org.apache.catalina.webresources; import java.util.BitSet; +import java.util.Collection; import java.util.Enumeration; import java.util.jar.JarEntry; import java.util.jar.JarFile; @@ -27,8 +28,7 @@ import java.util.jar.JarFile; * from the beginning of the key. The hash methods are simple but good enough for this purpose. */ public final class JarContents { - private final BitSet bits1; - private final BitSet bits2; + /** * Constant used by a typical hashing method. */ @@ -44,6 +44,10 @@ public final class JarContents { */ private static final int TABLE_SIZE = 2048; + private final BitSet bits1 = new BitSet(TABLE_SIZE); + private final BitSet bits2 = new BitSet(TABLE_SIZE); + + /** * Parses the passed-in jar and populates the bit array. * @@ -51,52 +55,66 @@ public final class JarContents { */ public JarContents(JarFile jar) { Enumeration<JarEntry> entries = jar.entries(); - bits1 = new BitSet(TABLE_SIZE); - bits2 = new BitSet(TABLE_SIZE); - while (entries.hasMoreElements()) { JarEntry entry = entries.nextElement(); - String name = entry.getName(); - int startPos = 0; - - // If the path starts with a slash, that's not useful information. - // Skipping it increases the significance of our key by - // removing an insignificant character. - boolean precedingSlash = name.charAt(0) == '/'; - if (precedingSlash) { - startPos = 1; - } + processEntry(entry); + } + } - // Versioned entries should be added to the table according to their real name - if (name.startsWith("META-INF/versions/", startPos)) { - int i = name.indexOf('/', 18 + startPos); - if (i > 0) { - int version = Integer.parseInt(name.substring(18 + startPos, i)); - if (version <= Runtime.version().feature()) { - startPos = i + 1; - } - } - if (startPos == name.length()) { - continue; + + /** + * Populates the bit array from the provided set of JAR entries. + * + * @param entries The set of entries for the JAR file being processed + */ + public JarContents(Collection<JarEntry> entries) { + for (JarEntry entry : entries) { + processEntry(entry); + } + } + + + private void processEntry(JarEntry entry) { + String name = entry.getName(); + int startPos = 0; + + // If the path starts with a slash, that's not useful information. + // Skipping it increases the significance of our key by + // removing an insignificant character. + boolean precedingSlash = name.charAt(0) == '/'; + if (precedingSlash) { + startPos = 1; + } + + // Versioned entries should be added to the table according to their real name + if (name.startsWith("META-INF/versions/", startPos)) { + int i = name.indexOf('/', 18 + startPos); + if (i > 0) { + int version = Integer.parseInt(name.substring(18 + startPos, i)); + if (version <= Runtime.version().feature()) { + startPos = i + 1; } } + if (startPos == name.length()) { + return; + } + } - // Find the correct table slot - int pathHash1 = hashcode(name, startPos, HASH_PRIME_1); - int pathHash2 = hashcode(name, startPos, HASH_PRIME_2); + // Find the correct table slot + int pathHash1 = hashcode(name, startPos, HASH_PRIME_1); + int pathHash2 = hashcode(name, startPos, HASH_PRIME_2); - bits1.set(pathHash1 % TABLE_SIZE); - bits2.set(pathHash2 % TABLE_SIZE); + bits1.set(pathHash1 % TABLE_SIZE); + bits2.set(pathHash2 % TABLE_SIZE); - // While directory entry names always end in "/", application code - // may look them up without the trailing "/". Add this second form. - if (entry.isDirectory()) { - pathHash1 = hashcode(name, startPos, name.length() - 1, HASH_PRIME_1); - pathHash2 = hashcode(name, startPos, name.length() - 1, HASH_PRIME_2); + // While directory entry names always end in "/", application code + // may look them up without the trailing "/". Add this second form. + if (entry.isDirectory()) { + pathHash1 = hashcode(name, startPos, name.length() - 1, HASH_PRIME_1); + pathHash2 = hashcode(name, startPos, name.length() - 1, HASH_PRIME_2); - bits1.set(pathHash1 % TABLE_SIZE); - bits2.set(pathHash2 % TABLE_SIZE); - } + bits1.set(pathHash1 % TABLE_SIZE); + bits2.set(pathHash2 % TABLE_SIZE); } } diff --git a/java/org/apache/catalina/webresources/JarWarResourceSet.java b/java/org/apache/catalina/webresources/JarWarResourceSet.java index 8ab6418499..f557c6e714 100644 --- a/java/org/apache/catalina/webresources/JarWarResourceSet.java +++ b/java/org/apache/catalina/webresources/JarWarResourceSet.java @@ -27,6 +27,7 @@ import java.util.jar.JarEntry; import java.util.jar.JarFile; import java.util.jar.JarInputStream; import java.util.jar.Manifest; +import java.util.zip.ZipFile; import org.apache.catalina.LifecycleException; import org.apache.catalina.WebResource; @@ -146,11 +147,34 @@ public class JarWarResourceSet extends AbstractArchiveResourceSet { } } } + WebResourceRoot root = getRoot(); + if (root.getArchiveIndexStrategyEnum().getUsesBloom()) { + jarContents = new JarContents(archiveEntries.values()); + retainBloomFilterForArchives = root.getArchiveIndexStrategyEnum().getRetain(); + } return archiveEntries; } } + /** + * {@inheritDoc} + * <p> + * JarWar needs to generate jarContents for the inner JAR, not the outer WAR. + */ + @Override + protected JarFile openJarFile() throws IOException { + synchronized (archiveLock) { + if (archive == null) { + archive = new JarFile(new File(getBase()), true, ZipFile.OPEN_READ, Runtime.version()); + // Don't populate JarContents here. Populate at the end of getArchiveEntries() + } + archiveUseCount++; + return archive; + } + } + + protected void processArchivesEntriesForMultiRelease() { int targetVersion = Runtime.version().feature(); diff --git a/webapps/docs/changelog.xml b/webapps/docs/changelog.xml index 1ca1fb7c6a..4c415e3a5d 100644 --- a/webapps/docs/changelog.xml +++ b/webapps/docs/changelog.xml @@ -172,6 +172,10 @@ performance. (markt) </update> <!-- Entries for backport and removal before 12.0.0-M1 below this line --> + <fix> + Fix bloom filter population for archive indexing when using an unpacked + WAR containing one or more JAR files. (markt) + </fix> </changelog> </subsection> <subsection name="Coyote"> --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org