Repository: mesos Updated Branches: refs/heads/master e17be1925 -> 53a79b229
Added a test verifying that DefaultExecutor tasks can use nvidia GPUs. Review: https://reviews.apache.org/r/61282/ Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/dd06684d Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/dd06684d Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/dd06684d Branch: refs/heads/master Commit: dd06684dbcefe98a6ba2b46b8bdf4eda718cee44 Parents: e17be19 Author: Gastón Kleiman <gas...@mesosphere.io> Authored: Fri Aug 25 11:40:21 2017 -0700 Committer: Vinod Kone <vinodk...@gmail.com> Committed: Fri Aug 25 11:40:21 2017 -0700 ---------------------------------------------------------------------- .../containerizer/nvidia_gpu_isolator_tests.cpp | 92 ++++++++++++++++++++ 1 file changed, 92 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/dd06684d/src/tests/containerizer/nvidia_gpu_isolator_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/containerizer/nvidia_gpu_isolator_tests.cpp b/src/tests/containerizer/nvidia_gpu_isolator_tests.cpp index 9a78ae6..f9b26bc 100644 --- a/src/tests/containerizer/nvidia_gpu_isolator_tests.cpp +++ b/src/tests/containerizer/nvidia_gpu_isolator_tests.cpp @@ -649,6 +649,98 @@ TEST_F(NvidiaGpuTest, ROOT_NVIDIA_GPU_VolumeShouldInject) ASSERT_FALSE(volume->shouldInject(manifest.get())); } + +// This test verifies that the DefaultExecutor is able to launch tasks +// with restricted access to GPUs. +// It launches a task with 1 GPU and verifies that a call to +// `nvidia-smi` both succeeds and reports exactly 1 GPU available. +TEST_F(NvidiaGpuTest, ROOT_CGROUPS_NVIDIA_GPU_DefaultExecutorVerifyDeviceAccess) +{ + Try<Owned<cluster::Master>> master = StartMaster(); + ASSERT_SOME(master); + + // Turn on Nvidia GPU isolation. + // Assume at least one GPU is available for isolation. + slave::Flags flags = CreateSlaveFlags(); + flags.isolation = "filesystem/linux,cgroups/devices,gpu/nvidia"; + flags.resources = "cpus:1"; // To override the default with gpus:0. + +#ifndef USE_SSL_SOCKET + // Disable operator API authentication for the default executor. Executor + // authentication currently has SSL as a dependency, so we cannot require + // executors to authenticate with the agent operator API if Mesos was not + // built with SSL support. + flags.authenticate_http_readwrite = false; +#endif // USE_SSL_SOCKET + + Owned<MasterDetector> detector = master.get()->createDetector(); + Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags); + ASSERT_SOME(slave); + + MockScheduler sched; + + FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO; + frameworkInfo.add_capabilities()->set_type( + FrameworkInfo::Capability::GPU_RESOURCES); + + MesosSchedulerDriver driver( + &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL); + + Future<FrameworkID> frameworkId; + EXPECT_CALL(sched, registered(&driver, _, _)) + .WillOnce(FutureArg<1>(&frameworkId)); + + Future<vector<Offer>> offers; + EXPECT_CALL(sched, resourceOffers(_, _)) + .WillOnce(FutureArg<1>(&offers)) + .WillRepeatedly(Return()); // Ignore subsequent offers. + + driver.start(); + + AWAIT_READY(frameworkId); + + Resources resources = Resources::parse("cpus:0.1;mem:32;disk:32").get(); + + ExecutorInfo executorInfo; + executorInfo.set_type(ExecutorInfo::DEFAULT); + executorInfo.mutable_executor_id()->CopyFrom(DEFAULT_EXECUTOR_ID); + executorInfo.mutable_framework_id()->CopyFrom(frameworkId.get()); + executorInfo.mutable_resources()->CopyFrom(resources); + + AWAIT_READY(offers); + EXPECT_NE(0u, offers->size()); + + const Offer& offer = offers->front(); + const SlaveID& slaveId = offer.slave_id(); + + TaskInfo taskInfo = createTask( + slaveId, + Resources::parse("cpus:0.1;mem:128;gpus:1").get(), + "NUM_GPUS=`nvidia-smi --list-gpus | wc -l`;\n" + "if [ \"$NUM_GPUS\" != \"1\" ]; then\n" + " exit 1;\n" + "fi"); + + TaskGroupInfo taskGroup = createTaskGroupInfo({taskInfo}); + + Future<TaskStatus> statusRunning, statusFinished; + + EXPECT_CALL(sched, statusUpdate(_, _)) + .WillOnce(FutureArg<1>(&statusRunning)) + .WillOnce(FutureArg<1>(&statusFinished)); + + driver.acceptOffers({offer.id()}, {LAUNCH_GROUP(executorInfo, taskGroup)}); + + AWAIT_READY(statusRunning); + ASSERT_EQ(TASK_RUNNING, statusRunning->state()); + + AWAIT_READY(statusFinished); + ASSERT_EQ(TASK_FINISHED, statusFinished->state()); + + driver.stop(); + driver.join(); +} + } // namespace tests { } // namespace internal { } // namespace mesos {