From: Kevin Wolf <kw...@redhat.com>

For the block job drain test, don't only test draining the source and
the target node, but create a backing chain for the source
(source_backing <- source <- source_overlay) and test draining each of
the nodes in it.

When using iothreads, the source node (and therefore the job) is in a
different AioContext than the drain, which happens from the main
thread. This way, the main thread waits in AIO_WAIT_WHILE() for the
iothread to make process and aio_wait_kick() is required to notify it.
The test validates that calling bdrv_wakeup() for a child or a parent
node will actually notify AIO_WAIT_WHILE() instead of letting it hang.

Increase the sleep time a bit (to 1 ms) because the test case is racy
and with the shorter sleep, it didn't reproduce the bug it is supposed
to test for me under 'rr record -n'.

This was because bdrv_drain_invoke_entry() (in the main thread) was only
called after the job had already reached the pause point, so we got a
bdrv_dec_in_flight() from the main thread and the additional
aio_wait_kick() when the job becomes idle (that we really wanted to test
here) wasn't even necessary any more to make progress.

Signed-off-by: Kevin Wolf <kw...@redhat.com>
Reviewed-by: Eric Blake <ebl...@redhat.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
---
 tests/test-bdrv-drain.c | 77 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 69 insertions(+), 8 deletions(-)

diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
index f367e6cdc1..c9f29c8b10 100644
--- a/tests/test-bdrv-drain.c
+++ b/tests/test-bdrv-drain.c
@@ -786,6 +786,7 @@ typedef struct TestBlockJob {
     BlockJob common;
     int run_ret;
     int prepare_ret;
+    bool running;
     bool should_complete;
 } TestBlockJob;
 
@@ -818,12 +819,17 @@ static int coroutine_fn test_job_run(Job *job, Error 
**errp)
 {
     TestBlockJob *s = container_of(job, TestBlockJob, common.job);
 
+    /* We are running the actual job code past the pause point in
+     * job_co_entry(). */
+    s->running = true;
+
     job_transition_to_ready(&s->common.job);
     while (!s->should_complete) {
         /* Avoid job_sleep_ns() because it marks the job as !busy. We want to
          * emulate some actual activity (probably some I/O) here so that drain
          * has to wait for this activity to stop. */
-        qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
+        qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000);
+
         job_pause_point(&s->common.job);
     }
 
@@ -856,11 +862,19 @@ enum test_job_result {
     TEST_JOB_FAIL_PREPARE,
 };
 
-static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
-                                 enum test_job_result result)
+enum test_job_drain_node {
+    TEST_JOB_DRAIN_SRC,
+    TEST_JOB_DRAIN_SRC_CHILD,
+    TEST_JOB_DRAIN_SRC_PARENT,
+};
+
+static void test_blockjob_common_drain_node(enum drain_type drain_type,
+                                            bool use_iothread,
+                                            enum test_job_result result,
+                                            enum test_job_drain_node 
drain_node)
 {
     BlockBackend *blk_src, *blk_target;
-    BlockDriverState *src, *target;
+    BlockDriverState *src, *src_backing, *src_overlay, *target, *drain_bs;
     BlockJob *job;
     TestBlockJob *tjob;
     IOThread *iothread = NULL;
@@ -869,8 +883,32 @@ static void test_blockjob_common(enum drain_type 
drain_type, bool use_iothread,
 
     src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR,
                                &error_abort);
+    src_backing = bdrv_new_open_driver(&bdrv_test, "source-backing",
+                                       BDRV_O_RDWR, &error_abort);
+    src_overlay = bdrv_new_open_driver(&bdrv_test, "source-overlay",
+                                       BDRV_O_RDWR, &error_abort);
+
+    bdrv_set_backing_hd(src_overlay, src, &error_abort);
+    bdrv_unref(src);
+    bdrv_set_backing_hd(src, src_backing, &error_abort);
+    bdrv_unref(src_backing);
+
     blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
-    blk_insert_bs(blk_src, src, &error_abort);
+    blk_insert_bs(blk_src, src_overlay, &error_abort);
+
+    switch (drain_node) {
+    case TEST_JOB_DRAIN_SRC:
+        drain_bs = src;
+        break;
+    case TEST_JOB_DRAIN_SRC_CHILD:
+        drain_bs = src_backing;
+        break;
+    case TEST_JOB_DRAIN_SRC_PARENT:
+        drain_bs = src_overlay;
+        break;
+    default:
+        g_assert_not_reached();
+    }
 
     if (use_iothread) {
         iothread = iothread_new();
@@ -906,11 +944,21 @@ static void test_blockjob_common(enum drain_type 
drain_type, bool use_iothread,
     job_start(&job->job);
     aio_context_release(ctx);
 
+    if (use_iothread) {
+        /* job_co_entry() is run in the I/O thread, wait for the actual job
+         * code to start (we don't want to catch the job in the pause point in
+         * job_co_entry(). */
+        while (!tjob->running) {
+            aio_poll(qemu_get_aio_context(), false);
+        }
+    }
+
     g_assert_cmpint(job->job.pause_count, ==, 0);
     g_assert_false(job->job.paused);
+    g_assert_true(tjob->running);
     g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */
 
-    do_drain_begin_unlocked(drain_type, src);
+    do_drain_begin_unlocked(drain_type, drain_bs);
 
     if (drain_type == BDRV_DRAIN_ALL) {
         /* bdrv_drain_all() drains both src and target */
@@ -921,7 +969,7 @@ static void test_blockjob_common(enum drain_type 
drain_type, bool use_iothread,
     g_assert_true(job->job.paused);
     g_assert_false(job->job.busy); /* The job is paused */
 
-    do_drain_end_unlocked(drain_type, src);
+    do_drain_end_unlocked(drain_type, drain_bs);
 
     if (use_iothread) {
         /* paused is reset in the I/O thread, wait for it */
@@ -969,7 +1017,7 @@ static void test_blockjob_common(enum drain_type 
drain_type, bool use_iothread,
 
     blk_unref(blk_src);
     blk_unref(blk_target);
-    bdrv_unref(src);
+    bdrv_unref(src_overlay);
     bdrv_unref(target);
 
     if (iothread) {
@@ -977,6 +1025,19 @@ static void test_blockjob_common(enum drain_type 
drain_type, bool use_iothread,
     }
 }
 
+static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
+                                 enum test_job_result result)
+{
+    test_blockjob_common_drain_node(drain_type, use_iothread, result,
+                                    TEST_JOB_DRAIN_SRC);
+    test_blockjob_common_drain_node(drain_type, use_iothread, result,
+                                    TEST_JOB_DRAIN_SRC_CHILD);
+    if (drain_type == BDRV_SUBTREE_DRAIN) {
+        test_blockjob_common_drain_node(drain_type, use_iothread, result,
+                                        TEST_JOB_DRAIN_SRC_PARENT);
+    }
+}
+
 static void test_blockjob_drain_all(void)
 {
     test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_SUCCESS);
-- 
2.17.1


Reply via email to