Hi Eirik,

I think this is shaping up nicely.

If we ensure the array returned from getMetaInfVersions is already
sorted (by e.g. using a TreeMap during parse), we could remove the
stream expression. And if we check bounds in getVersionedEntry we don't
need the field in JarFile at all.

In initCEN I think you need to subtract 9 from nlen:

getMetaVersion(cen, pos + CENHDR + 9, nlen - 9)

I suspect this meant that most entries that would have been a
valid entry were filtered out, so your patch might have ended
up with an empty versions array in your tests.

And in getMetaVersion I think we can avoid creating Strings and parse
the version inline. By using 0 as the "not a version" value we can
simplify it further:

            int version = 0;
            for (int i = vstart; i < nend; i++) {
                final byte c = name[i];
                if (c == '/') {
                    return version;
                }
                if (c < '0' || c > '9') {
                    return 0;
                }
                version = version * 10 + c - '0';
                // Check for overflow
                if (version < 0) {
                    return 0;
                }
            }

(Maybe we need to reject leading zeros..)

I've uploaded a version here for your convenience - let me know if
you're happy with the suggestions/edits:

http://cr.openjdk.java.net/~redestad/scratch/eirik_mr_versions.00/

Thanks!

/Claes

On 2020-04-12 13:26, Eirik Bjørsnøs wrote:

Claes,

Since the ZIP index is already scanned for META-INF/ names in ZipFile.Source.initCEN, it might make sense to move version scanning there. This would also allow identifying version strings at the byte array level.

Here's a patch which implements this:

diff --git a/src/java.base/share/classes/java/util/jar/JarFile.java b/src/java.base/share/classes/java/util/jar/JarFile.java
index 1ec0f5bdae..988cc837f0 100644
--- a/src/java.base/share/classes/java/util/jar/JarFile.java
+++ b/src/java.base/share/classes/java/util/jar/JarFile.java
@@ -49,6 +49,7 @@ import java.util.Locale;
  import java.util.NoSuchElementException;
  import java.util.Objects;
  import java.util.function.Function;
+import java.util.stream.IntStream;
  import java.util.stream.Stream;
  import java.util.zip.ZipEntry;
  import java.util.zip.ZipException;
@@ -161,7 +162,7 @@ public class JarFile extends ZipFile {
      private final Runtime.Version version;  // current version
      private final int versionFeature;       // version.feature()
      private boolean isMultiRelease;         // is jar multi-release?
-
+    private int[] versions;                 // which versions does the jar contain
      // indicates if Class-Path attribute present
      private boolean hasClassPathAttribute;
      // true if manifest checked for special attributes
@@ -599,12 +600,13 @@ public class JarFile extends ZipFile {
      }

      private JarEntry getVersionedEntry(String name, JarEntry je) {
-        if (BASE_VERSION_FEATURE < versionFeature) {
+        int[] versions = this.versions;
+        if (BASE_VERSION_FEATURE < versionFeature && versions != null && versions.length > 0) {
              if (!name.startsWith(META_INF)) {
                  // search for versioned entry
-                int v = versionFeature;
-                while (v > BASE_VERSION_FEATURE) {
-                    JarFileEntry vje = getEntry0(META_INF_VERSIONS + v + "/" + name);
+                int v = versions.length - 1;
+                while (v >= 0) {
+                    JarFileEntry vje = getEntry0(META_INF_VERSIONS + versions[v] + "/" + name);
                      if (vje != null) {
                          return vje.withBasename(name);
                      }
@@ -1016,9 +1018,16 @@ public class JarFile extends ZipFile {
                          byte[] lbuf = new byte[512];
                          Attributes attr = new Attributes();
                          attr.read(new Manifest.FastInputStream(
-                            new ByteArrayInputStream(b)), lbuf);
-                        isMultiRelease = Boolean.parseBoolean(
-                            attr.getValue(Attributes.Name.MULTI_RELEASE));
+                                new ByteArrayInputStream(b)), lbuf);
+                        if(Boolean.parseBoolean(
+  attr.getValue(Attributes.Name.MULTI_RELEASE))) {
+                            isMultiRelease = true;
+                            versions = IntStream.of(JUZFA.getMetaInfVersions(this)) +                                    .filter(v -> v >= BASE_VERSION_FEATURE && v <= versionFeature)
+                                    .sorted()
+                                    .toArray();
+                        }
+
                      }
                  }
              }
diff --git a/src/java.base/share/classes/java/util/zip/ZipFile.java b/src/java.base/share/classes/java/util/zip/ZipFile.java
index 274e8788d1..25eeb57555 100644
--- a/src/java.base/share/classes/java/util/zip/ZipFile.java
+++ b/src/java.base/share/classes/java/util/zip/ZipFile.java
@@ -48,6 +48,7 @@ import java.util.Iterator;
  import java.util.Objects;
  import java.util.NoSuchElementException;
  import java.util.Set;
+import java.util.HashSet;
  import java.util.Spliterator;
  import java.util.Spliterators;
  import java.util.WeakHashMap;
@@ -1063,6 +1064,11 @@ public class ZipFile implements ZipConstants, Closeable {
                      return zip.getMetaInfEntryNames();
                  }
                  @Override
+                public int[] getMetaInfVersions(ZipFile zip) {
+                    final int[] metaVersions = zip.res.zsrc.metaVersions;
+                    return metaVersions == null ? new int[0] : metaVersions;
+                }
+                @Override
                  public JarEntry getEntry(ZipFile zip, String name,
                      Function<String, JarEntry> func) {
                      return (JarEntry)zip.getEntry(name, func);
@@ -1097,6 +1103,7 @@ public class ZipFile implements ZipConstants, Closeable {
          private byte[] comment;              // zip file comment
                                              // list of meta entries in META-INF dir
          private int[] metanames;
+        private int[] metaVersions;          // list of unique versions found in META-INF/versions/          private final boolean startsWithLoc; // true, if zip file starts with LOCSIG (usually true)

          // A Hashmap for all entries.
@@ -1424,6 +1431,8 @@ public class ZipFile implements ZipConstants, Closeable {

              // list for all meta entries
              ArrayList<Integer> metanamesList = null;
+            // Set of all version numbers seen in META-INF/versions/
+            Set<Integer> metaVersionsSet = null;

              // Iterate through the entries in the central directory
              int i = 0;
@@ -1461,6 +1470,12 @@ public class ZipFile implements ZipConstants, Closeable {
                      if (metanamesList == null)
                          metanamesList = new ArrayList<>(4);
                      metanamesList.add(pos);
+                    int version = getMetaVersion(cen, pos + CENHDR + 9, nlen) > +                    if(version != -1) {
+                        if(metaVersionsSet == null)
+                            metaVersionsSet = new HashSet<>();
+                        metaVersionsSet.add(version);
+                    }
                  }
                  // skip ext and comment
                  pos += (CENHDR + nlen + elen + clen);
@@ -1473,6 +1488,13 @@ public class ZipFile implements ZipConstants, Closeable {
                      metanames[j] = metanamesList.get(j);
                  }
              }
+            if(metaVersionsSet != null) {
+                metaVersions = new int[metaVersionsSet.size()];
+                int c = 0;
+                for (Integer version : metaVersionsSet) {
+                    metaVersions[c++] = version;
+                }
+            }
              if (pos + ENDHDR != cen.length) {
                  zerror("invalid CEN header (bad header size)");
              }
@@ -1556,6 +1578,49 @@ public class ZipFile implements ZipConstants, Closeable {
                  && (name[off]         ) == '/';
          }

+        /*
+         * If bytes represents a non-directory name beginning
+         * with "versions/", and continuing with an integer (version)
+         * followed by a '/', then return the parsed version integer.
+         * Otherwise, return -1
+         */
+        private static int getMetaVersion(byte[] name, int off, int len) {
+            int nend = off+len;
+            if(! (len > 9                        // "versions/".length()
+                    && name[off + len - 1] != '/'  // non-directory
+                    && (name[off++] | 0x20) == 'v'
+                    && (name[off++] | 0x20) == 'e'
+                    && (name[off++] | 0x20) == 'r'
+                    && (name[off++] | 0x20) == 's'
+                    && (name[off++] | 0x20) == 'i'
+                    && (name[off++] | 0x20) == 'o'
+                    && (name[off++] | 0x20) == 'n'
+                    && (name[off++] | 0x20) == 's'
+                    && (name[off++]         ) == '/')) {
+                return -1;
+            }
+            int vstart = off;
+            for(int i = vstart; i < nend; i++) {
+                final byte c = name[i];
+                if(c != '/' && (c < '0' || c > '9')) {
+                    return -1;
+                }
+                if(c == '/') {
+                    int vlen = i - vstart;
+
+                    if(vlen < 1) {
+                        return -1;
+                    }
+                    try {
+                        return Integer.parseInt(new String(name, vstart, vlen, UTF_8.INSTANCE));
+                    } catch (NumberFormatException e) {
+                        return -1;
+                    }
+                }
+            }
+            return -1;
+        }
+
          /**
           * Returns the number of CEN headers in a central directory.
           * Will not throw, even if the zip file is corrupt.
diff --git a/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java b/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java
index 10187e2f50..0d808b9286 100644
--- a/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java +++ b/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java
@@ -35,6 +35,7 @@ import java.util.zip.ZipFile;
  public interface JavaUtilZipFileAccess {
      public boolean startsWithLocHeader(ZipFile zip);
      public String[] getMetaInfEntryNames(ZipFile zip);
+    public int[] getMetaInfVersions(ZipFile zip);
     public JarEntry getEntry(ZipFile zip, String name, Function<String, JarEntry> func);      public Enumeration<JarEntry> entries(ZipFile zip, Function<String, JarEntry> func);      public Stream<JarEntry> stream(ZipFile zip, Function<String, JarEntry> func);




On Sun, Apr 12, 2020 at 10:50 AM Eirik Bjørsnøs <eir...@gmail.com <mailto:eir...@gmail.com>> wrote:

        I think you could tune away a significant part of that up front
        cost by
        using JUZFA.entryNameStream(this) instead of
        this.stream().map(ZipEntry::getName). This will avoid expanding each
        entry into a JarEntry internally. Perhaps this gets the up-front
        overhead down to more acceptable levels..?


    I found JUZFA.getMetaInfEntryNames which made the up front scanning
    cost evaporate.

    Should be fast for other jars as well, at least when META-INF/ is
    sparsely populated.

    Here's an updated patch:

    diff --git a/src/java.base/share/classes/java/util/jar/JarFile.java
    b/src/java.base/share/classes/java/util/jar/JarFile.java
    index 1ec0f5bdae..95472604c0 100644
    --- a/src/java.base/share/classes/java/util/jar/JarFile.java
    +++ b/src/java.base/share/classes/java/util/jar/JarFile.java
    @@ -161,7 +161,7 @@ public class JarFile extends ZipFile {
          private final Runtime.Version version;  // current version
          private final int versionFeature;       // version.feature()
          private boolean isMultiRelease;         // is jar multi-release?
    -
    +    private int[] versions;                 // which versions does
    the jar contain
          // indicates if Class-Path attribute present
          private boolean hasClassPathAttribute;
          // true if manifest checked for special attributes
    @@ -599,12 +599,13 @@ public class JarFile extends ZipFile {
          }

          private JarEntry getVersionedEntry(String name, JarEntry je) {
    -        if (BASE_VERSION_FEATURE < versionFeature) {
    +        int[] versions = this.versions;
    +        if (BASE_VERSION_FEATURE < versionFeature && versions !=
    null && versions.length > 0) {
                  if (!name.startsWith(META_INF)) {
                      // search for versioned entry
    -                int v = versionFeature;
    -                while (v > BASE_VERSION_FEATURE) {
    -                    JarFileEntry vje = getEntry0(META_INF_VERSIONS
    + v + "/" + name);
    +                int v = versions.length - 1;
    +                while (v >= 0) {
    +                    JarFileEntry vje = getEntry0(META_INF_VERSIONS
    + versions[v] + "/" + name);
                          if (vje != null) {
                              return vje.withBasename(name);
                          }
    @@ -1016,9 +1017,18 @@ public class JarFile extends ZipFile {
                              byte[] lbuf = new byte[512];
                              Attributes attr = new Attributes();
                              attr.read(new Manifest.FastInputStream(
    -                            new ByteArrayInputStream(b)), lbuf);
    -                        isMultiRelease = Boolean.parseBoolean(
-  attr.getValue(Attributes.Name.MULTI_RELEASE));
    +                                new ByteArrayInputStream(b)), lbuf);
    +                        if(Boolean.parseBoolean(
+  attr.getValue(Attributes.Name.MULTI_RELEASE))) {
    +                            isMultiRelease = true;
    +                            versions =
    Stream.of(JUZFA.getMetaInfEntryNames(this))
    +                                    .mapToInt(this::parseVersion)
    +                                    .filter(v -> v != -1 && v >=
    BASE_VERSION_FEATURE && v <= versionFeature)
    +                                    .distinct()
    +                                    .sorted()
    +                                    .toArray();
    +                        }
    +
                          }
                      }
                  }
    @@ -1026,6 +1036,27 @@ public class JarFile extends ZipFile {
              }
          }

    +    /**
    +     * If {@code entryName} is a a versioned entry, parse and
    return the version as an integer, otherwise return -1
    +     */
    +    private int parseVersion(String entryName) {
    +        if(!entryName.startsWith(META_INF_VERSIONS)) {
    +            return -1;
    +        }
    +
    +        int separator = entryName.indexOf("/",
    META_INF_VERSIONS.length());
    +
    +        if(separator == -1) {
    +            return -1;
    +        }
    +
    +        try {
    +            return Integer.parseInt(entryName,
    META_INF_VERSIONS.length(), separator, 10);
    +        } catch (NumberFormatException e) {
    +            return -1;
    +        }
    +    }
    +
          synchronized void ensureInitialization() {
              try {
                  maybeInstantiateVerifier();

    Eirik.

Reply via email to