damccorm commented on code in PR #31796: URL: https://github.com/apache/beam/pull/31796#discussion_r1670574378
########## runners/prism/java/src/main/java/org/apache/beam/runners/prism/PrismLocator.java: ########## @@ -0,0 +1,253 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.prism; + +import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.PosixFilePermission; +import java.nio.file.attribute.PosixFilePermissions; +import java.util.Iterator; +import java.util.Set; +import java.util.function.BiConsumer; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; +import org.apache.beam.sdk.util.ReleaseInfo; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Splitter; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Strings; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.hash.HashCode; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.hash.Hashing; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.io.ByteStreams; + +/** + * Locates a Prism executable based on a user's default operating system and architecture + * environment or a {@link PrismPipelineOptions#getPrismLocation()} override. Handles the download, + * unzip, {@link PosixFilePermissions}, as needed. For {@link #GITHUB_DOWNLOAD_PREFIX} sources, + * additionally performs a SHA512 verification. + */ +class PrismLocator { + static final String OS_NAME_PROPERTY = "os.name"; + static final String ARCH_PROPERTY = "os.arch"; + static final String USER_HOME_PROPERTY = "user.home"; + + private static final String ZIP_EXT = "zip"; + private static final String SHA512_EXT = "sha512"; + private static final ReleaseInfo RELEASE_INFO = ReleaseInfo.getReleaseInfo(); + private static final String PRISM_BIN_PATH = ".apache_beam/cache/prism/bin"; + private static final Set<PosixFilePermission> PERMS = + PosixFilePermissions.fromString("rwxr-xr-x"); + private static final String GITHUB_DOWNLOAD_PREFIX = + "https://github.com/apache/beam/releases/download"; + private static final String GITHUB_TAG_PREFIX = "https://github.com/apache/beam/releases/tag"; + + private final PrismPipelineOptions options; + + PrismLocator(PrismPipelineOptions options) { + this.options = options; + } + + /** + * Downloads and prepares a Prism executable for use with the {@link PrismRunner}, executed by the + * {@link PrismExecutor}. The returned {@link String} is the absolute path to the Prism + * executable. + */ + String resolve() throws IOException { + + String from = + String.format("%s/v%s/%s.zip", GITHUB_DOWNLOAD_PREFIX, getSDKVersion(), buildFileName()); + + if (!Strings.isNullOrEmpty(options.getPrismLocation())) { + checkArgument( + !options.getPrismLocation().startsWith(GITHUB_TAG_PREFIX), + "Provided --prismLocation URL is not an Apache Beam Github " + + "Release page URL or download URL: ", + from); + + from = options.getPrismLocation(); + } + + String fromFileName = getNameWithoutExtension(from); + Path to = Paths.get(userHome(), PRISM_BIN_PATH, fromFileName); + + if (Files.exists(to)) { + return to.toString(); + } + + createDirectoryIfNeeded(to); + + if (from.startsWith("http")) { Review Comment: Is this possible? Don't above checks preclude this condition? ########## runners/prism/java/src/main/java/org/apache/beam/runners/prism/PrismLocator.java: ########## @@ -0,0 +1,253 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.prism; + +import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.PosixFilePermission; +import java.nio.file.attribute.PosixFilePermissions; +import java.util.Iterator; +import java.util.Set; +import java.util.function.BiConsumer; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; +import org.apache.beam.sdk.util.ReleaseInfo; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Splitter; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Strings; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.hash.HashCode; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.hash.Hashing; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.io.ByteStreams; + +/** + * Locates a Prism executable based on a user's default operating system and architecture + * environment or a {@link PrismPipelineOptions#getPrismLocation()} override. Handles the download, + * unzip, {@link PosixFilePermissions}, as needed. For {@link #GITHUB_DOWNLOAD_PREFIX} sources, + * additionally performs a SHA512 verification. + */ +class PrismLocator { + static final String OS_NAME_PROPERTY = "os.name"; + static final String ARCH_PROPERTY = "os.arch"; + static final String USER_HOME_PROPERTY = "user.home"; + + private static final String ZIP_EXT = "zip"; + private static final String SHA512_EXT = "sha512"; + private static final ReleaseInfo RELEASE_INFO = ReleaseInfo.getReleaseInfo(); + private static final String PRISM_BIN_PATH = ".apache_beam/cache/prism/bin"; + private static final Set<PosixFilePermission> PERMS = + PosixFilePermissions.fromString("rwxr-xr-x"); + private static final String GITHUB_DOWNLOAD_PREFIX = + "https://github.com/apache/beam/releases/download"; + private static final String GITHUB_TAG_PREFIX = "https://github.com/apache/beam/releases/tag"; + + private final PrismPipelineOptions options; + + PrismLocator(PrismPipelineOptions options) { + this.options = options; + } + + /** + * Downloads and prepares a Prism executable for use with the {@link PrismRunner}, executed by the + * {@link PrismExecutor}. The returned {@link String} is the absolute path to the Prism + * executable. + */ + String resolve() throws IOException { + + String from = + String.format("%s/v%s/%s.zip", GITHUB_DOWNLOAD_PREFIX, getSDKVersion(), buildFileName()); + + if (!Strings.isNullOrEmpty(options.getPrismLocation())) { + checkArgument( + !options.getPrismLocation().startsWith(GITHUB_TAG_PREFIX), + "Provided --prismLocation URL is not an Apache Beam Github " + + "Release page URL or download URL: ", + from); + + from = options.getPrismLocation(); + } + + String fromFileName = getNameWithoutExtension(from); + Path to = Paths.get(userHome(), PRISM_BIN_PATH, fromFileName); + + if (Files.exists(to)) { + return to.toString(); + } + + createDirectoryIfNeeded(to); + + if (from.startsWith("http")) { + String result = resolve(new URL(from), to); + checkState(Files.exists(to), "Resolved location does not exist: %s", result); + return result; + } + + String result = resolve(Paths.get(from), to); + checkState(Files.exists(to), "Resolved location does not exist: %s", result); + return result; + } + + private String resolve(URL from, Path to) throws IOException { + if (from.toString().startsWith(GITHUB_DOWNLOAD_PREFIX)) { Review Comment: Will this always be true? Should we just assert this instead? ########## runners/prism/java/src/main/java/org/apache/beam/runners/prism/PrismLocator.java: ########## @@ -0,0 +1,253 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.prism; + +import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.PosixFilePermission; +import java.nio.file.attribute.PosixFilePermissions; +import java.util.Iterator; +import java.util.Set; +import java.util.function.BiConsumer; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; +import org.apache.beam.sdk.util.ReleaseInfo; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Splitter; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Strings; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.hash.HashCode; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.hash.Hashing; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.io.ByteStreams; + +/** + * Locates a Prism executable based on a user's default operating system and architecture + * environment or a {@link PrismPipelineOptions#getPrismLocation()} override. Handles the download, + * unzip, {@link PosixFilePermissions}, as needed. For {@link #GITHUB_DOWNLOAD_PREFIX} sources, + * additionally performs a SHA512 verification. + */ +class PrismLocator { + static final String OS_NAME_PROPERTY = "os.name"; + static final String ARCH_PROPERTY = "os.arch"; + static final String USER_HOME_PROPERTY = "user.home"; + + private static final String ZIP_EXT = "zip"; + private static final String SHA512_EXT = "sha512"; + private static final ReleaseInfo RELEASE_INFO = ReleaseInfo.getReleaseInfo(); + private static final String PRISM_BIN_PATH = ".apache_beam/cache/prism/bin"; + private static final Set<PosixFilePermission> PERMS = + PosixFilePermissions.fromString("rwxr-xr-x"); + private static final String GITHUB_DOWNLOAD_PREFIX = + "https://github.com/apache/beam/releases/download"; + private static final String GITHUB_TAG_PREFIX = "https://github.com/apache/beam/releases/tag"; + + private final PrismPipelineOptions options; + + PrismLocator(PrismPipelineOptions options) { + this.options = options; + } + + /** + * Downloads and prepares a Prism executable for use with the {@link PrismRunner}, executed by the + * {@link PrismExecutor}. The returned {@link String} is the absolute path to the Prism + * executable. + */ + String resolve() throws IOException { + + String from = + String.format("%s/v%s/%s.zip", GITHUB_DOWNLOAD_PREFIX, getSDKVersion(), buildFileName()); + + if (!Strings.isNullOrEmpty(options.getPrismLocation())) { + checkArgument( + !options.getPrismLocation().startsWith(GITHUB_TAG_PREFIX), + "Provided --prismLocation URL is not an Apache Beam Github " + + "Release page URL or download URL: ", + from); + + from = options.getPrismLocation(); + } + + String fromFileName = getNameWithoutExtension(from); + Path to = Paths.get(userHome(), PRISM_BIN_PATH, fromFileName); + + if (Files.exists(to)) { + return to.toString(); + } + + createDirectoryIfNeeded(to); + + if (from.startsWith("http")) { + String result = resolve(new URL(from), to); + checkState(Files.exists(to), "Resolved location does not exist: %s", result); + return result; + } + + String result = resolve(Paths.get(from), to); + checkState(Files.exists(to), "Resolved location does not exist: %s", result); + return result; + } + + private String resolve(URL from, Path to) throws IOException { + if (from.toString().startsWith(GITHUB_DOWNLOAD_PREFIX)) { + URL shaSumReference = new URL(from + "." + SHA512_EXT); + validateShaSum512(shaSumReference, from); Review Comment: What does this validation get us actually? What vector are we protecting against here? ########## runners/prism/java/src/main/java/org/apache/beam/runners/prism/PrismLocator.java: ########## @@ -0,0 +1,253 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.prism; + +import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.PosixFilePermission; +import java.nio.file.attribute.PosixFilePermissions; +import java.util.Iterator; +import java.util.Set; +import java.util.function.BiConsumer; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; +import org.apache.beam.sdk.util.ReleaseInfo; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Splitter; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Strings; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.hash.HashCode; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.hash.Hashing; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.io.ByteStreams; + +/** + * Locates a Prism executable based on a user's default operating system and architecture + * environment or a {@link PrismPipelineOptions#getPrismLocation()} override. Handles the download, + * unzip, {@link PosixFilePermissions}, as needed. For {@link #GITHUB_DOWNLOAD_PREFIX} sources, + * additionally performs a SHA512 verification. + */ +class PrismLocator { + static final String OS_NAME_PROPERTY = "os.name"; + static final String ARCH_PROPERTY = "os.arch"; + static final String USER_HOME_PROPERTY = "user.home"; + + private static final String ZIP_EXT = "zip"; + private static final String SHA512_EXT = "sha512"; + private static final ReleaseInfo RELEASE_INFO = ReleaseInfo.getReleaseInfo(); + private static final String PRISM_BIN_PATH = ".apache_beam/cache/prism/bin"; + private static final Set<PosixFilePermission> PERMS = + PosixFilePermissions.fromString("rwxr-xr-x"); + private static final String GITHUB_DOWNLOAD_PREFIX = + "https://github.com/apache/beam/releases/download"; + private static final String GITHUB_TAG_PREFIX = "https://github.com/apache/beam/releases/tag"; + + private final PrismPipelineOptions options; + + PrismLocator(PrismPipelineOptions options) { + this.options = options; + } + + /** + * Downloads and prepares a Prism executable for use with the {@link PrismRunner}, executed by the + * {@link PrismExecutor}. The returned {@link String} is the absolute path to the Prism + * executable. + */ + String resolve() throws IOException { + + String from = + String.format("%s/v%s/%s.zip", GITHUB_DOWNLOAD_PREFIX, getSDKVersion(), buildFileName()); + + if (!Strings.isNullOrEmpty(options.getPrismLocation())) { + checkArgument( + !options.getPrismLocation().startsWith(GITHUB_TAG_PREFIX), + "Provided --prismLocation URL is not an Apache Beam Github " + + "Release page URL or download URL: ", + from); + + from = options.getPrismLocation(); + } + + String fromFileName = getNameWithoutExtension(from); + Path to = Paths.get(userHome(), PRISM_BIN_PATH, fromFileName); + + if (Files.exists(to)) { + return to.toString(); + } + + createDirectoryIfNeeded(to); + + if (from.startsWith("http")) { + String result = resolve(new URL(from), to); + checkState(Files.exists(to), "Resolved location does not exist: %s", result); + return result; + } + + String result = resolve(Paths.get(from), to); + checkState(Files.exists(to), "Resolved location does not exist: %s", result); + return result; + } + + private String resolve(URL from, Path to) throws IOException { + if (from.toString().startsWith(GITHUB_DOWNLOAD_PREFIX)) { + URL shaSumReference = new URL(from + "." + SHA512_EXT); + validateShaSum512(shaSumReference, from); + } + + BiConsumer<URL, Path> downloadFn = PrismLocator::download; + if (from.getPath().endsWith(ZIP_EXT)) { + downloadFn = PrismLocator::unzip; + } + downloadFn.accept(from, to); + + Files.setPosixFilePermissions(to, PERMS); + + return to.toString(); + } + + private String resolve(Path from, Path to) throws IOException { + + BiConsumer<InputStream, Path> copyFn = PrismLocator::copy; + if (from.endsWith(ZIP_EXT)) { + copyFn = PrismLocator::unzip; + } + + copyFn.accept(from.toUri().toURL().openStream(), to); + ByteStreams.copy(from.toUri().toURL().openStream(), Files.newOutputStream(to)); + Files.setPosixFilePermissions(to, PERMS); + + return to.toString(); + } + + String buildFileName() { + String version = getSDKVersion(); + return String.format("apache_beam-v%s-prism-%s-%s", version, os(), arch()); + } + + private static void unzip(URL from, Path to) { + try { + unzip(from.openStream(), to); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static void unzip(InputStream from, Path to) { + try (OutputStream out = Files.newOutputStream(to)) { + ZipInputStream zis = new ZipInputStream(from); + for (ZipEntry entry = zis.getNextEntry(); entry != null; entry = zis.getNextEntry()) { + InputStream in = ByteStreams.limit(zis, entry.getSize()); + ByteStreams.copy(in, out); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static void copy(InputStream from, Path to) { + try { + ByteStreams.copy(from, Files.newOutputStream(to)); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static void download(URL from, Path to) { + try { + ByteStreams.copy(from.openStream(), Files.newOutputStream(to)); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static void validateShaSum512(URL shaSumReference, URL source) throws IOException { + try (InputStream in = shaSumReference.openStream()) { + String rawContent = new String(ByteStreams.toByteArray(in), StandardCharsets.UTF_8); + checkState(!Strings.isNullOrEmpty(rawContent)); + String reference = ""; + Iterator<String> split = Splitter.onPattern("\\s+").split(rawContent).iterator(); + if (split.hasNext()) { + reference = split.next(); + } + checkState(!Strings.isNullOrEmpty(reference)); + + HashCode toVerify = Hashing.sha512().hashBytes(ByteStreams.toByteArray(source.openStream())); + checkState( + reference.equals(toVerify.toString()), + "Expected sha512 derived from: %s does not equal expected: %s, got: %s", + source, + reference, + toVerify.toString()); + } + } + + private static String getNameWithoutExtension(String path) { + return org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.io.Files + .getNameWithoutExtension(path); + } + + private String getSDKVersion() { + if (Strings.isNullOrEmpty(options.getPrismVersionOverride())) { + return RELEASE_INFO.getSdkVersion(); + } + return options.getPrismVersionOverride(); + } + + private static String os() { + String result = mustGetPropertyAsLowerCase(OS_NAME_PROPERTY); + if (result.contains("mac")) { + return "darwin"; + } + return result; + } + + private static String arch() { + String result = mustGetPropertyAsLowerCase(ARCH_PROPERTY); + if (result.contains("aarch")) { + return "arm64"; + } + return result; + } + + private static String userHome() { + return mustGetPropertyAsLowerCase(USER_HOME_PROPERTY); + } + + private static String mustGetPropertyAsLowerCase(String name) { + return checkStateNotNull(System.getProperty(name), "System property: " + name + " not set") + .toLowerCase(); + } + + private static void createDirectoryIfNeeded(Path path) throws IOException { + Path parent = path.getParent(); + if (parent == null) { + return; + } + if (parent.toFile().exists()) { + return; + } Review Comment: I think all of this is handled by `Files.createDirectories`. The second check at least is for sure - https://docs.oracle.com/javase/8/docs/api/java/nio/file/Files.html#createDirectories-java.nio.file.Path-java.nio.file.attribute.FileAttribute...- -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
