karlpauls commented on a change in pull request #74: URL: https://github.com/apache/sling-org-apache-sling-feature-cpconverter/pull/74#discussion_r620191019
########## File path: src/main/java/org/apache/sling/feature/cpconverter/handlers/BundleEntryHandler.java ########## @@ -109,111 +154,309 @@ public void handle(@NotNull String path, @NotNull Archive archive, @NotNull Entr logger.debug("Start level {} was extracted from path {}", startLevel, path); } - try (JarInputStream jarInput = new JarInputStream(Objects.requireNonNull(archive.openInputStream(entry)))) { - Properties properties = readGav(entry.getName(), jarInput); - manifest = jarInput.getManifest(); - - if (!properties.isEmpty()) { - groupId = getCheckedProperty(properties, NAME_GROUP_ID); - artifactId = getCheckedProperty(properties, NAME_ARTIFACT_ID); - version = getCheckedProperty(properties, NAME_VERSION); - classifier = properties.getProperty(NAME_CLASSIFIER); - } else { // maybe the included jar is just an OSGi bundle but not a valid Maven artifact - groupId = getCheckedProperty(manifest, BUNDLE_SYMBOLIC_NAME); - // Make sure there are not spaces in the name to adhere to the Maven Group Id specification - groupId = groupId.replace(' ', '_').replace(':', '_').replace('/', '_').replace('\\', '_'); - if (groupId.indexOf('.') != -1) { - artifactId = groupId.substring(groupId.lastIndexOf('.') + 1); - groupId = groupId.substring(0, groupId.lastIndexOf('.')); + String bundleName = entry.getName(); + // Remove the leading path + int idx = bundleName.lastIndexOf('/'); + if (idx >= 0) { + bundleName = bundleName.substring(idx + 1); + } + // Remove the extension + int edx = bundleName.lastIndexOf('.'); + if (edx > 0) { + bundleName = bundleName.substring(0, edx); + } + + // create a temporary JAR file (extracted from archive) + Path tmpBundleJar = Files.createTempFile(converter.getTempDirectory().toPath(), "extracted", bundleName + ".jar"); + try (OutputStream output = Files.newOutputStream(tmpBundleJar); + InputStream input = Objects.requireNonNull(archive.openInputStream(entry))) { + IOUtils.copy(input, output); + } + + try (JarFile jarFile = new JarFile(tmpBundleJar.toFile())) { + // first extract bundle metadata from JAR input stream + ArtifactId id = extractArtifactId(bundleName, jarFile); + + try (InputStream strippedBundleInput = extractSlingInitialContent(id, jarFile, converter, runMode)) { + Objects.requireNonNull(converter.getArtifactsDeployer()).deploy(new InputStreamArtifactWriter(strippedBundleInput), id); + Objects.requireNonNull(converter.getFeaturesManager()).addArtifact(runMode, id, startLevel); + + String exportHeader = Objects.requireNonNull(jarFile.getManifest()).getMainAttributes().getValue(Constants.EXPORT_PACKAGE); + if (exportHeader != null) { + for (Clause clause : Parser.parseHeader(exportHeader)) { + converter.getFeaturesManager().addAPIRegionExport(runMode, clause.getName()); + } } - if (artifactId == null || artifactId.isEmpty()) { - artifactId = groupId; + } + } finally { + Files.delete(tmpBundleJar); + } + } + + @NotNull InputStream extractSlingInitialContent(@NotNull ArtifactId bundleArtifactId, @NotNull JarFile jarFile, @NotNull ContentPackage2FeatureModelConverter converter, @Nullable String runMode) throws Exception { + if (!extractSlingInitialContent) { + return new FileInputStream(jarFile.getName()); + } + // parse "Sling-Initial-Content" header + Manifest manifest = Objects.requireNonNull(jarFile.getManifest()); + Iterator<PathEntry> pathEntries = PathEntry.getContentPaths(manifest, -1); + if (pathEntries == null) { + return new FileInputStream(jarFile.getName()); + } + logger.info("Extracting Sling-Initial-Content from '{}'", bundleArtifactId); + Collection<PathEntry> pathEntryList = new ArrayList<>(); + pathEntries.forEachRemaining(pathEntryList::add); + + // remove header + manifest.getMainAttributes().remove(new Attributes.Name(PathEntry.CONTENT_HEADER)); + Path newBundleFile = Files.createTempFile(converter.getTempDirectory().toPath(), "newBundle", ".jar"); + + // create JAR file to prevent extracting it twice and for random access + JcrNamespaceRegistry namespaceRegistry = createNamespaceRegistry(manifest, jarFile, converter.getFeaturesManager().getNamespaceUriByPrefix()); + + Map<PackageType, VaultPackageAssembler> packageAssemblers = new EnumMap<>(PackageType.class); + try (OutputStream fileOutput = Files.newOutputStream(newBundleFile, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING); + JarOutputStream bundleOutput = new JarOutputStream(fileOutput, manifest)) { + + for (Enumeration<JarEntry> e = jarFile.entries(); e.hasMoreElements();) { + JarEntry jarEntry = e.nextElement(); + if (!jarEntry.isDirectory()) { + try (InputStream input = jarFile.getInputStream(jarEntry)) { + if (!extractSlingInitialContent(jarEntry, input, bundleArtifactId, pathEntryList, packageAssemblers, namespaceRegistry, converter)) { + // skip manifest, as already written in the constructor (as first entry) + if (jarEntry.getName().equals(JarFile.MANIFEST_NAME)) { + continue; + } + // copy entry as is to the stripped bundle + ZipEntry ze = new ZipEntry(jarEntry.getName()); + bundleOutput.putNextEntry(ze); + IOUtils.copy(input, bundleOutput); + bundleOutput.closeEntry(); + } + } } - Version osgiVersion = Version.parseVersion(getCheckedProperty(manifest, BUNDLE_VERSION)); - version = osgiVersion.getMajor() + "." + osgiVersion.getMinor() + "." + osgiVersion.getMicro() + (osgiVersion.getQualifier().isEmpty() ? "" : "-" + osgiVersion.getQualifier()); } } + // add additional content packages to feature model + finalizePackageAssembly(packageAssemblers, converter, runMode); + + // return stripped bundle's inputstream which must be deleted on close + return Files.newInputStream(newBundleFile, StandardOpenOption.READ, StandardOpenOption.DELETE_ON_CLOSE); + } - try (InputStream input = archive.openInputStream(entry)) { - if (input != null) { - ArtifactId id = new ArtifactId(groupId, artifactId, version, classifier, JAR_TYPE); + /** + * + * @param jarEntry + * @param bundleFileInputStream + * @param pathEntriesStream + * @param packageAssemblers + * @param converter + * @return {@code true} in case the given entry was part of the initial content otherwise {@code false} + * @throws Exception + */ + boolean extractSlingInitialContent(@NotNull JarEntry jarEntry, @NotNull InputStream bundleFileInputStream, @NotNull ArtifactId bundleArtifactId, @NotNull Collection<PathEntry> pathEntries, @NotNull Map<PackageType, VaultPackageAssembler> packageAssemblers, @NotNull JcrNamespaceRegistry nsRegistry, @NotNull ContentPackage2FeatureModelConverter converter) throws Exception { + final String entryName = jarEntry.getName(); + // check if current JAR entry is initial content + Optional<PathEntry> pathEntry = pathEntries.stream().filter(p -> entryName.startsWith(p.getPath())).findFirst(); + if (!pathEntry.isPresent()) { + return false; + } + Map.Entry<ContentParser, ParserOptions> contentParserAndOptions = getContentParserForEntry(jarEntry, pathEntry.get()); + + // https://sling.apache.org/documentation/bundles/content-loading-jcr-contentloader.html#file-name-escaping + String repositoryPath = (pathEntry.get().getTarget() != null ? pathEntry.get().getTarget() : "/") + URLDecoder.decode(entryName.substring(pathEntry.get().getPath().length()), "UTF-8"); + String contentPackagePath = org.apache.jackrabbit.vault.util.Constants.ROOT_DIR + PlatformNameFormat.getPlatformPath(repositoryPath); + + // in which content package should this end up? + VaultPackageAssembler packageAssembler = initPackageAssemblerForPath(bundleArtifactId, repositoryPath, pathEntry.get(), packageAssemblers, converter); + Path tmpInputFile = null; + if (contentParserAndOptions != null) { + // convert to docview xml + tmpInputFile = Files.createTempFile(converter.getTempDirectory().toPath(), "docview", ".xml"); + try (OutputStream docViewOutput = Files.newOutputStream(tmpInputFile, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING); + DocViewSerializerContentHandler contentHandler = new DocViewSerializerContentHandler(docViewOutput, nsRegistry)) { + contentParserAndOptions.getKey().parse(contentHandler, bundleFileInputStream, contentParserAndOptions.getValue()); + contentPackagePath = FilenameUtils.removeExtension(contentPackagePath) + ".xml"; + } catch (IOException e) { + throw new IOException("Can not parse " + jarEntry, e); + } catch (DocViewSerializerContentHandlerException e) { + throw new IOException("Can not convert " + jarEntry + " to enhanced DocView format", e); + } + } - Objects.requireNonNull(converter.getArtifactsDeployer()).deploy(new InputStreamArtifactWriter(input), id); + // does entry in initial content need to be extracted into feature model (e.g. for OSGi configurations)? + EntryHandler entryHandler = converter.getHandlersManager().getEntryHandlerByEntryPath(contentPackagePath); Review comment: there is one in composum if you use this version: https://search.maven.org/remotecontent?filepath=com/composum/nodes/composum-nodes-aem-package/2.3.0/composum-nodes-aem-package-2.3.0.zip ``` [INFO] Created package com.composum.nodes:composum-nodes-config-apps:2.3.0 out of Sling-Initial-Content from 'com.composum.nodes:composum-nodes-config:2.3.0' [INFO] Processing configuration 'org.apache.sling.jcr.base.internal.LoginAdminWhitelist.fragment~composum_core_v2'. ``` (Obviously, that was after I fixed the two issues) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org