zentol commented on a change in pull request #16286: URL: https://github.com/apache/flink/pull/16286#discussion_r662086061
########## File path: flink-clients/src/test/java/org/apache/flink/client/program/PackagedProgramRetrieverImplTest.java ########## @@ -0,0 +1,573 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.client.program; + +import org.apache.flink.api.common.JobID; +import org.apache.flink.api.dag.Pipeline; +import org.apache.flink.client.deployment.application.EntryClassInformationProvider; +import org.apache.flink.client.deployment.executors.PipelineExecutorUtils; +import org.apache.flink.client.testjar.ClasspathProvider; +import org.apache.flink.configuration.ConfigConstants; +import org.apache.flink.configuration.ConfigUtils; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.configuration.CoreOptions; +import org.apache.flink.configuration.PipelineOptions; +import org.apache.flink.configuration.PipelineOptionsInternal; +import org.apache.flink.runtime.execution.librarycache.FlinkUserCodeClassLoaders; +import org.apache.flink.runtime.jobgraph.JobGraph; +import org.apache.flink.runtime.jobgraph.SavepointRestoreSettings; +import org.apache.flink.util.ChildFirstClassLoader; +import org.apache.flink.util.ExceptionUtils; +import org.apache.flink.util.FileUtils; +import org.apache.flink.util.FlinkException; +import org.apache.flink.util.TestLogger; + +import org.hamcrest.collection.IsIterableContainingInAnyOrder; +import org.hamcrest.core.IsInstanceOf; +import org.junit.Assume; +import org.junit.Ignore; +import org.junit.Rule; +import org.junit.Test; + +import java.io.File; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.core.Is.is; +import static org.hamcrest.core.IsNot.not; +import static org.junit.Assert.fail; + +/** {@code PackagedProgramRetrieverImplTest} tests {@link PackagedProgramRetrieverImpl}. */ +public class PackagedProgramRetrieverImplTest extends TestLogger { + + @Rule public ClasspathProvider classpathProvider = new ClasspathProvider(); + + @Test + public void testDeriveEntryClassInformationForPythonBasedOnJobName() throws FlinkException { + // TODO: see FLINK-23154 + Assume.assumeTrue( + "This tests only succeeds if " + ConfigConstants.ENV_FLINK_OPT_DIR + " is set.", + System.getProperty(ConfigConstants.ENV_FLINK_OPT_DIR) != null); + final EntryClassInformationProvider informationProvider = + PackagedProgramRetrieverImpl.createEntryClassInformationProvider( + null, + Collections.emptyList(), + null, + PackagedProgramUtils.getPythonDriverClassName(), + new String[0]); + assertThat(informationProvider.getJobClassName().isPresent(), is(true)); + assertThat( + informationProvider.getJobClassName().get(), + is(PackagedProgramUtils.getPythonDriverClassName())); + assertThat(informationProvider.getJarFile().isPresent(), is(true)); + assertThat( + informationProvider.getJarFile().get(), + is(PackagedProgramUtils.getPythonJar().getFile())); + } + + @Test + public void testDeriveEntryClassInformationForPythonBasedOnParameter() throws FlinkException { + // TODO: see FLINK-23154 + Assume.assumeTrue( + "This tests only succeeds if " + ConfigConstants.ENV_FLINK_OPT_DIR + " is set.", + System.getProperty(ConfigConstants.ENV_FLINK_OPT_DIR) != null); + + final EntryClassInformationProvider informationProvider = + PackagedProgramRetrieverImpl.createEntryClassInformationProvider( + null, Collections.emptyList(), null, null, new String[] {"--python"}); + + assertThat(informationProvider.getJobClassName().isPresent(), is(true)); + assertThat( + informationProvider.getJobClassName().get(), + is(PackagedProgramUtils.getPythonDriverClassName())); + assertThat(informationProvider.getJarFile().isPresent(), is(true)); + assertThat( + informationProvider.getJarFile().get(), + is(PackagedProgramUtils.getPythonJar().getFile())); + } + + @Test + public void testDeriveEntryClassInformationForCustomJar() + throws FlinkException, MalformedURLException { + // make loading from system classpath fail to make sure that it's not triggered + classpathProvider.setSystemClasspathWithTwoEntryClasses(); + + final String jobClassName = "SomeJobClassName"; + final File jarFile = new File("some/jar/file.jar"); + final EntryClassInformationProvider informationProvider = + PackagedProgramRetrieverImpl.createEntryClassInformationProvider( + null, null, jarFile, jobClassName, new String[0]); + assertThat(informationProvider.getJobClassName().isPresent(), is(true)); + assertThat(informationProvider.getJobClassName().get(), is(jobClassName)); + assertThat(informationProvider.getJarFile().isPresent(), is(true)); + assertThat(informationProvider.getJarFile().get(), is(jarFile)); + } + + @Test + // TODO: we might want to change this behavior triggering a failure here + public void testDeriveEntryClassInformationFromSystemClasspathWithNonExistingJobClassName() + throws IOException, FlinkException { + classpathProvider.setSystemClasspathWithEntryClass(); + + final String jobClassName = "SomeJobClassNotBeingOnTheSystemClasspath"; + final EntryClassInformationProvider informationProvider = + PackagedProgramRetrieverImpl.createEntryClassInformationProvider( + null, null, null, jobClassName, new String[0]); + assertThat(informationProvider.getJobClassName().isPresent(), is(true)); + assertThat(informationProvider.getJobClassName().get(), is(jobClassName)); + assertThat(informationProvider.getJarFile().isPresent(), is(false)); + } + + @Test + public void testDeriveEntryClassInformationFromSystemClasspathWithExistingJobClassName() + throws IOException, FlinkException { + classpathProvider.setSystemClasspathWithEntryClass(); + + final EntryClassInformationProvider informationProvider = + PackagedProgramRetrieverImpl.createEntryClassInformationProvider( + null, null, null, classpathProvider.getJobClassName(), new String[0]); + assertThat(informationProvider.getJobClassName().isPresent(), is(true)); + assertThat( + informationProvider.getJobClassName().get(), + is(classpathProvider.getJobClassName())); + assertThat(informationProvider.getJarFile().isPresent(), is(false)); + } + + @Test + public void testDeriveEntryClassInformationFromSystemClasspathExtractingTheJobClassFromThere() + throws IOException, FlinkException { + classpathProvider.setSystemClasspathWithEntryClass(); + + final EntryClassInformationProvider informationProvider = + PackagedProgramRetrieverImpl.createEntryClassInformationProvider( + null, null, null, null, new String[0]); + assertThat(informationProvider.getJobClassName().isPresent(), is(true)); + assertThat( + informationProvider.getJobClassName().get(), + is(classpathProvider.getJobClassName())); + assertThat(informationProvider.getJarFile().isPresent(), is(false)); + } + + @Test + public void testDeriveEntryClassInformationFromClasspathWithJobClass() + throws IOException, FlinkException { + final EntryClassInformationProvider informationProvider = + PackagedProgramRetrieverImpl.createEntryClassInformationProvider( + // the user directory must be specified + classpathProvider.getUserDirectoryWithTwoEntryClasses(), + // the user classpath is derived from the user directory outside of the + // method + classpathProvider.getURLUserClasspathWithTwoEntryClasses(), + null, + // we have to specify the job class - otherwise the call would fail due to + // two main method being present + classpathProvider.getJobClassName(), + new String[0]); + assertThat(informationProvider.getJobClassName().isPresent(), is(true)); + assertThat( + informationProvider.getJobClassName().get(), + is(classpathProvider.getJobClassName())); + assertThat(informationProvider.getJarFile().isPresent(), is(false)); + } + + @Test + public void testDeriveEntryClassInformationFromClasspathWithNoJobClass() + throws IOException, FlinkException { + final EntryClassInformationProvider informationProvider = + PackagedProgramRetrieverImpl.createEntryClassInformationProvider( + // the user directory must be specified + classpathProvider.getUserDirectoryWithOneEntryClass(), + // the user classpath is derived from the user directory outside of the + // method + classpathProvider.getURLUserClasspathWithEntryClass(), + null, + // no job class name is specified which enables looking for the entry class + // on the user classpath + null, + new String[0]); + assertThat(informationProvider.getJobClassName().isPresent(), is(true)); + assertThat( + informationProvider.getJobClassName().get(), + is(classpathProvider.getJobClassName())); + assertThat(informationProvider.getJarFile().isPresent(), is(false)); + } + + @Test + public void testCreateWithUserLibDir() throws FlinkException { + final PackagedProgramRetriever retriever = + PackagedProgramRetrieverImpl.create( + classpathProvider.getUserDirectoryWithOneEntryClass(), + null, + classpathProvider.getJobClassName(), + new String[0], + new Configuration()); + + // the right information is picked up without any error + assertThat( + retriever.getPackagedProgram().getMainClassName(), + is(classpathProvider.getJobClassName())); + } + + @Test + public void testJobGraphRetrieval() + throws IOException, FlinkException, ProgramInvocationException { + final int parallelism = 42; + final JobID jobId = new JobID(); + + final Configuration configuration = new Configuration(); + configuration.setInteger(CoreOptions.DEFAULT_PARALLELISM, parallelism); + configuration.set(PipelineOptionsInternal.PIPELINE_FIXED_JOB_ID, jobId.toHexString()); + + final String expectedSuffix = "suffix"; + final PackagedProgramRetriever retriever = + PackagedProgramRetrieverImpl.create( + null, + classpathProvider.getTestJobClassName(), + classpathProvider.getTestJobArgs(expectedSuffix), + new Configuration()); + + final JobGraph jobGraph = retrieveJobGraph(retriever, configuration); + + assertThat( + jobGraph.getName(), + is(classpathProvider.getTestJobClassName() + "-" + expectedSuffix)); + assertThat(jobGraph.getSavepointRestoreSettings(), is(SavepointRestoreSettings.none())); + assertThat(jobGraph.getMaximumParallelism(), is(parallelism)); + assertThat(jobGraph.getJobID(), is(jobId)); + } + + @Test + public void testJobGraphRetrievalFromJar() + throws IOException, FlinkException, ProgramInvocationException { + final String expectedSuffix = "suffix"; + final PackagedProgramRetriever retrieverUnderTest = + PackagedProgramRetrieverImpl.create( + classpathProvider.getUserDirectoryWithTestJob(), + null, + null, + classpathProvider.getTestJobArgs(expectedSuffix), + new Configuration()); + + final JobGraph jobGraph = retrieveJobGraph(retrieverUnderTest, new Configuration()); + + assertThat( + jobGraph.getName(), + is(classpathProvider.getTestJobClassName() + "-" + expectedSuffix)); + } + + @Test + public void testJobGraphRetrievalJobClassNameHasPrecedenceOverClasspath() + throws IOException, FlinkException, ProgramInvocationException { + // TODO: I don't get this test: It feels like it does not add any extra value - it does the + // same like testTooManyEntryClassesOnSystemClasspath + final String expectedSuffix = "suffix"; + final PackagedProgramRetriever retrieverUnderTest = + // Both a class name is specified and a JAR "is" on the class path + // The class name should have precedence. + PackagedProgramRetrieverImpl.create( + null, + classpathProvider.getTestJobClassName(), + classpathProvider.getTestJobArgs(expectedSuffix), + new Configuration()); + + final JobGraph jobGraph = retrieveJobGraph(retrieverUnderTest, new Configuration()); + + assertThat(jobGraph.getName(), is(classpathProvider.getTestJobClassName() + "-suffix")); + } + + @Test + public void testSavepointRestoreSettings() + throws FlinkException, IOException, ProgramInvocationException { + final Configuration configuration = new Configuration(); + final SavepointRestoreSettings savepointRestoreSettings = + SavepointRestoreSettings.forPath("foobar", true); + final JobID jobId = new JobID(); + + configuration.setString(PipelineOptionsInternal.PIPELINE_FIXED_JOB_ID, jobId.toHexString()); + SavepointRestoreSettings.toConfiguration(savepointRestoreSettings, configuration); + + final String expectedSuffix = "suffix"; + final PackagedProgramRetriever retrieverUnderTest = + PackagedProgramRetrieverImpl.create( + null, + classpathProvider.getTestJobClassName(), + classpathProvider.getTestJobArgs(expectedSuffix), + new Configuration()); + + final JobGraph jobGraph = retrieveJobGraph(retrieverUnderTest, configuration); + + assertThat(jobGraph.getSavepointRestoreSettings(), is(savepointRestoreSettings)); + assertThat(jobGraph.getJobID(), is(jobId)); + } + + @Test + public void testFailIfJobDirDoesNotHaveEntryClass() { + try { + PackagedProgramRetrieverImpl.create( + classpathProvider.getUserDirectoryWithNoEntryClass(), + classpathProvider.getTestJobClassName(), + classpathProvider.getTestJobArgs("suffix"), + new Configuration()); + fail("This case should throw exception !"); + } catch (FlinkException e) { + assertThat( + ExceptionUtils.findThrowableWithMessage( + e, + String.format( + "Could not find the provided job class (%s) in the user lib directory.", + classpathProvider.getTestJobClassName())) + .isPresent(), + is(true)); + } + } + + // TODO: this test checks the same code path as + // testDeriveEntryClassInformationFromSystemClasspathWithNonExistingJobClassName + // We should make it fail early if the class is not present on the system classpath + // Right now, the test is failing because no error is thrown + @Ignore + @Test(expected = FlinkException.class) + public void testEntryClassNotFoundOnSystemClasspath() throws FlinkException { + PackagedProgramRetrieverImpl.create( + null, "NotExistingClass", new String[0], new Configuration()); + } + + @Test(expected = FlinkException.class) + public void testEntryClassNotFoundOnUserClasspath() throws FlinkException { + PackagedProgramRetrieverImpl.create( + classpathProvider.getUserDirectoryWithNoEntryClass(), + classpathProvider.getTestJobClassName(), + new String[0], + new Configuration()); + } + + @Test(expected = FlinkException.class) + public void testTooManyEntryClassesOnSystemClasspath() throws FlinkException { + PackagedProgramRetrieverImpl.create(null, null, new String[0], new Configuration()); + } + + @Test + public void testTooManyEntryClassesOnUserClasspath() throws FlinkException, IOException { + classpathProvider.setSystemClasspathWithTwoEntryClasses(); + final PackagedProgramRetrieverImpl retriever = + PackagedProgramRetrieverImpl.create( + classpathProvider.getUserDirectoryWithTwoEntryClasses(), + classpathProvider.getTestJobClassName(), + new String[0], + new Configuration()); + assertThat( + retriever.getPackagedProgram().getMainClassName(), + is(classpathProvider.getTestJobClassName())); + } + + @Test + public void testRetrieveCorrectUserClasspathsWithoutSpecifiedEntryClass() + throws IOException, FlinkException, ProgramInvocationException { + final PackagedProgramRetriever retrieverUnderTest = + PackagedProgramRetrieverImpl.create( + classpathProvider.getUserDirectoryWithOneEntryClass(), + null, + classpathProvider.getTestJobArgs("suffix"), + new Configuration()); + final JobGraph jobGraph = retrieveJobGraph(retrieverUnderTest, new Configuration()); + final List<String> actualClasspath = + jobGraph.getClasspaths().stream().map(URL::toString).collect(Collectors.toList()); + + final Path workingDirectory = FileUtils.getCurrentWorkingDirectory(); + final List<String> expectedClasspath = new ArrayList<>(); + for (File file : classpathProvider.getUserDirectoryWithOneEntryClass().listFiles()) { + if (!file.getName().endsWith("jar")) { + // only jars are expected + continue; + } + + Path relativePath = FileUtils.relativizePath(workingDirectory, file.toPath()); + expectedClasspath.add(FileUtils.toURL(relativePath).toString()); + } + + assertThat( + actualClasspath, + IsIterableContainingInAnyOrder.containsInAnyOrder(expectedClasspath.toArray())); + } + + @Test + public void testRetrieveCorrectUserClasspathsWithSpecifiedEntryClass() + throws IOException, FlinkException, ProgramInvocationException { + final PackagedProgramRetriever retrieverUnderTest = + PackagedProgramRetrieverImpl.create( + classpathProvider.getUserDirectoryWithOneEntryClass(), + classpathProvider.getJobClassName(), + classpathProvider.getTestJobArgs("suffix"), + new Configuration()); + final JobGraph jobGraph = retrieveJobGraph(retrieverUnderTest, new Configuration()); + final List<String> actualClasspath = + jobGraph.getClasspaths().stream().map(URL::toString).collect(Collectors.toList()); + + final Path workingDirectory = FileUtils.getCurrentWorkingDirectory(); + final List<String> expectedClasspath = new ArrayList<>(); + for (File file : classpathProvider.getUserDirectoryWithOneEntryClass().listFiles()) { + if (!file.getName().endsWith("jar")) { + // only jars are expected + continue; + } + + Path relativePath = FileUtils.relativizePath(workingDirectory, file.toPath()); + expectedClasspath.add(FileUtils.toURL(relativePath).toString()); + } + + assertThat( + actualClasspath, + IsIterableContainingInAnyOrder.containsInAnyOrder(expectedClasspath.toArray())); + } + + @Test + public void testRetrieveFromJarFileWithoutUserLib() + throws IOException, FlinkException, ProgramInvocationException { + final PackagedProgramRetriever retrieverUnderTest = + PackagedProgramRetrieverImpl.create( + null, + classpathProvider.getTestJobJar(), + null, + classpathProvider.getTestJobArgs("suffix"), + new Configuration()); + final JobGraph jobGraph = retrieveJobGraph(retrieverUnderTest, new Configuration()); + + assertThat( + jobGraph.getUserJars(), + containsInAnyOrder( + new org.apache.flink.core.fs.Path( + classpathProvider.getTestJobJar().toURI()))); + assertThat(jobGraph.getClasspaths().isEmpty(), is(true)); + } + + @Test + public void testRetrieveFromJarFileWithUserLib() + throws IOException, FlinkException, ProgramInvocationException { + final PackagedProgramRetriever retrieverUnderTest = + PackagedProgramRetrieverImpl.create( + classpathProvider.getUserDirectoryWithOneEntryClass(), + classpathProvider.getTestJobJar(), + null, + classpathProvider.getTestJobArgs("suffix"), + new Configuration()); + final JobGraph jobGraph = retrieveJobGraph(retrieverUnderTest, new Configuration()); + + assertThat( + jobGraph.getUserJars(), + containsInAnyOrder( + new org.apache.flink.core.fs.Path( + classpathProvider.getTestJobJar().toURI()))); + final List<String> actualClasspath = + jobGraph.getClasspaths().stream().map(URL::toString).collect(Collectors.toList()); + final Path workingDirectory = FileUtils.getCurrentWorkingDirectory(); + final List<String> expectedClasspath = new ArrayList<>(); + for (File file : classpathProvider.getUserDirectoryWithOneEntryClass().listFiles()) { + if (!file.getName().endsWith("jar")) { + // only jars are expected + continue; + } + + Path relativePath = FileUtils.relativizePath(workingDirectory, file.toPath()); + expectedClasspath.add(FileUtils.toURL(relativePath).toString()); + } + + assertThat( + actualClasspath, + IsIterableContainingInAnyOrder.containsInAnyOrder(expectedClasspath.toArray())); + } + + @Test + public void testChildFirstDefaultConfiguration() throws FlinkException { Review comment: Shouldn't this then belong into a `PackagedProgramTest`? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@flink.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org