[ 
https://issues.apache.org/jira/browse/MESOS-1365?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14007482#comment-14007482
 ] 

Jie Yu commented on MESOS-1365:
-------------------------------

Looks like the second offer, which is expected to be received by sched2, can be 
sent to sched1. If we don't decline the offer in sched1, sched2 may never 
receive the offer.

> SlaveRecoveryTest/0.MultipleFrameworks is flaky
> -----------------------------------------------
>
>                 Key: MESOS-1365
>                 URL: https://issues.apache.org/jira/browse/MESOS-1365
>             Project: Mesos
>          Issue Type: Bug
>          Components: test
>            Reporter: Dominic Hamon
>            Assignee: Jie Yu
>            Priority: Minor
>             Fix For: 0.19.0
>
>
> --gtest_repeat=-1 --gtest_shuffle --gtest_break_on_failure
> {noformat}
> [ RUN      ] SlaveRecoveryTest/0.MultipleFrameworks
> WARNING: Logging before InitGoogleLogging() is written to STDERR
> I0513 15:42:05.931761  4320 exec.cpp:131] Version: 0.19.0
> I0513 15:42:05.936698  4340 exec.cpp:205] Executor registered on slave 
> 20140513-154204-16842879-51872-13062-0
> Registered executor on artoo
> Starting task 51991f97-f5fd-4905-ad0f-02668083af7c
> Forked command at 4367
> sh -c 'sleep 1000'
> WARNING: Logging before InitGoogleLogging() is written to STDERR
> I0513 15:42:06.915061  4408 exec.cpp:131] Version: 0.19.0
> I0513 15:42:06.931149  4435 exec.cpp:205] Executor registered on slave 
> 20140513-154204-16842879-51872-13062-0
> Registered executor on artoo
> Starting task eaf5d8d6-3a6c-4ee1-84c1-fae20fb1df83
> sh -c 'sleep 1000'
> Forked command at 4439
> I0513 15:42:06.998332  4340 exec.cpp:251] Received reconnect request from 
> slave 20140513-154204-16842879-51872-13062-0
> I0513 15:42:06.998414  4436 exec.cpp:251] Received reconnect request from 
> slave 20140513-154204-16842879-51872-13062-0
> I0513 15:42:07.006350  4437 exec.cpp:228] Executor re-registered on slave 
> 20140513-154204-16842879-51872-13062-0
> Re-registered executor on artoo
> I0513 15:42:07.027039  4337 exec.cpp:378] Executor asked to shutdown
> Shutting down
> Sending SIGTERM to process tree at pid 4367
> Killing the following process trees:
> [ 
> -+- 4367 sh -c sleep 1000 
>  \--- 4368 sleep 1000 
> ]
> ../../src/tests/slave_recovery_tests.cpp:2807: Failure
> Value of: status1.get().state()
>   Actual: TASK_FAILED
> Expected: TASK_KILLED
> Program received signal SIGSEGV, Segmentation fault.
> testing::UnitTest::AddTestPartResult (this=0x154dac0 
> <testing::UnitTest::GetInstance()::instance>, 
> result_type=testing::TestPartResult::kFatalFailure, file_name=0xeb6b6c 
> "../../src/tests/slave_recovery_tests.cpp", line_number=2807, message=..., 
> os_stack_trace=...) at gmock-1.6.0/gtest/src/gtest.cc:3795
> 3795          *static_cast<volatile int*>(NULL) = 1;
> (gdb) bt
> #0  testing::UnitTest::AddTestPartResult (this=0x154dac0 
> <testing::UnitTest::GetInstance()::instance>, 
> result_type=testing::TestPartResult::kFatalFailure, file_name=0xeb6b6c 
> "../../src/tests/slave_recovery_tests.cpp", line_number=2807, message=..., 
> os_stack_trace=...) at gmock-1.6.0/gtest/src/gtest.cc:3795
> #1  0x0000000000df98b9 in testing::internal::AssertHelper::operator= 
> (this=0x7fffffffb860, message=...) at gmock-1.6.0/gtest/src/gtest.cc:356
> #2  0x0000000000cdfa57 in 
> SlaveRecoveryTest_MultipleFrameworks_Test<mesos::internal::slave::MesosContainerizer>::TestBody
>  (this=0x1954db0) at ../../src/tests/slave_recovery_tests.cpp:2807
> #3  0x0000000000e22583 in 
> testing::internal::HandleSehExceptionsInMethodIfSupported<testing::Test, 
> void> (object=0x1954db0, method=&virtual testing::Test::TestBody(), 
> location=0xed0af0 "the test body") at gmock-1.6.0/gtest/src/gtest.cc:2090
> #4  0x0000000000e12467 in 
> testing::internal::HandleExceptionsInMethodIfSupported<testing::Test, void> 
> (object=0x1954db0, method=&virtual testing::Test::TestBody(), 
> location=0xed0af0 "the test body") at gmock-1.6.0/gtest/src/gtest.cc:2126
> #5  0x0000000000e010d5 in testing::Test::Run (this=0x1954db0) at 
> gmock-1.6.0/gtest/src/gtest.cc:2161
> #6  0x0000000000e01ceb in testing::TestInfo::Run (this=0x158cf80) at 
> gmock-1.6.0/gtest/src/gtest.cc:2338
> #7  0x0000000000e02387 in testing::TestCase::Run (this=0x158a880) at 
> gmock-1.6.0/gtest/src/gtest.cc:2445
> #8  0x0000000000e079ed in testing::internal::UnitTestImpl::RunAllTests 
> (this=0x1558b40) at gmock-1.6.0/gtest/src/gtest.cc:4237
> #9  0x0000000000e1ec83 in 
> testing::internal::HandleSehExceptionsInMethodIfSupported<testing::internal::UnitTestImpl,
>  bool> (object=0x1558b40, method=(bool 
> (testing::internal::UnitTestImpl::*)(testing::internal::UnitTestImpl * 
> const)) 0xe07700 <testing::internal::UnitTestImpl::RunAllTests()>, 
>     location=0xed1219 "auxiliary test code (environments or event 
> listeners)") at gmock-1.6.0/gtest/src/gtest.cc:2090
> #10 0x0000000000e14217 in 
> testing::internal::HandleExceptionsInMethodIfSupported<testing::internal::UnitTestImpl,
>  bool> (object=0x1558b40, method=(bool 
> (testing::internal::UnitTestImpl::*)(testing::internal::UnitTestImpl * 
> const)) 0xe07700 <testing::internal::UnitTestImpl::RunAllTests()>, 
>     location=0xed1219 "auxiliary test code (environments or event 
> listeners)") at gmock-1.6.0/gtest/src/gtest.cc:2126
> #11 0x0000000000e076d7 in testing::UnitTest::Run (this=0x154dac0 
> <testing::UnitTest::GetInstance()::instance>) at 
> gmock-1.6.0/gtest/src/gtest.cc:3872
> #12 0x0000000000b99887 in main (argc=1, argv=0x7fffffffd9f8) at 
> ../../src/tests/main.cpp:107
> (gdb) frame 2
> #2  0x0000000000cdfa57 in 
> SlaveRecoveryTest_MultipleFrameworks_Test<mesos::internal::slave::MesosContainerizer>::TestBody
>  (this=0x1954db0) at ../../src/tests/slave_recovery_tests.cpp:2807
> 2807      ASSERT_EQ(TASK_KILLED, status1.get().state());
> (gdb) p status1
> $1 = {data = {<std::__shared_ptr<process::Future<mesos::TaskStatus>::Data, 
> 2>> = {_M_ptr = 0x1963140, _M_refcount = {_M_pi = 0x198a620}}, <No data 
> fields>}}
> (gdb) p status1.get()
> $2 = (const mesos::TaskStatus &) @0x7fffdc5bf5f0: 
> {<google::protobuf::Message> = {<google::protobuf::MessageLite> = 
> {_vptr$MessageLite = 0x7ffff74bc940 <vtable for mesos::TaskStatus+16>}, <No 
> data fields>}, static kTaskIdFieldNumber = 1, static kStateFieldNumber = 2, 
> static kMessageFieldNumber = 4, 
>   static kDataFieldNumber = 3, static kSlaveIdFieldNumber = 5, static 
> kTimestampFieldNumber = 6, _unknown_fields_ = {fields_ = 0x0}, task_id_ = 
> 0x7fffdc5ce9a0, message_ = 0x7fffdc5f5880, data_ = 0x154b4b0 
> <google::protobuf::internal::kEmptyString>, slave_id_ = 0x7fffdc59c4f0, 
> timestamp_ = 1429688582.046252, 
>   state_ = 3, _cached_size_ = 0, _has_bits_ = {55}, static default_instance_ 
> = 0x0}
> (gdb) p status1.get().state()
> $3 = mesos::TASK_FAILED
> (gdb) list
> 2802      // Kill task 1.
> 2803      driver1.killTask(task1.task_id());
> 2804
> 2805      // Wait for TASK_KILLED update.
> 2806      AWAIT_READY(status1);
> 2807      ASSERT_EQ(TASK_KILLED, status1.get().state());
> 2808
> 2809      // Kill task 2.
> 2810      driver2.killTask(task2.task_id());
> 2811
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.2#6252)

Reply via email to