[PATCH v2 3/5] throttle: support read-only and write-only

2023-06-27 Thread zhenwei pi
Only one direction is necessary in several scenarios:
- a read-only disk
- operations on a device are considered as *write* only. For example,
  encrypt/decrypt/sign/verify operations on a cryptodev use a single
  *write* timer(read timer callback is defined, but never invoked).

Allow a single direction in throttle, this reduces memory, and uplayer
does not need a dummy callback any more.

Signed-off-by: zhenwei pi 
---
 util/throttle.c | 32 ++--
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/util/throttle.c b/util/throttle.c
index 5642e61763..c0bd0c26c3 100644
--- a/util/throttle.c
+++ b/util/throttle.c
@@ -199,12 +199,17 @@ static bool throttle_compute_timer(ThrottleState *ts,
 void throttle_timers_attach_aio_context(ThrottleTimers *tt,
 AioContext *new_context)
 {
-tt->timers[THROTTLE_READ] =
-aio_timer_new(new_context, tt->clock_type, SCALE_NS,
-  tt->timer_cb[THROTTLE_READ], tt->timer_opaque);
-tt->timers[THROTTLE_WRITE] =
-aio_timer_new(new_context, tt->clock_type, SCALE_NS,
-  tt->timer_cb[THROTTLE_WRITE], tt->timer_opaque);
+if (tt->timer_cb[THROTTLE_READ]) {
+tt->timers[THROTTLE_READ] =
+aio_timer_new(new_context, tt->clock_type, SCALE_NS,
+  tt->timer_cb[THROTTLE_READ], tt->timer_opaque);
+}
+
+if (tt->timer_cb[THROTTLE_WRITE]) {
+tt->timers[THROTTLE_WRITE] =
+aio_timer_new(new_context, tt->clock_type, SCALE_NS,
+  tt->timer_cb[THROTTLE_WRITE], tt->timer_opaque);
+}
 }
 
 /*
@@ -235,6 +240,7 @@ void throttle_timers_init(ThrottleTimers *tt,
   QEMUTimerCB *write_timer_cb,
   void *timer_opaque)
 {
+assert(read_timer_cb || write_timer_cb);
 memset(tt, 0, sizeof(ThrottleTimers));
 
 tt->clock_type = clock_type;
@@ -247,7 +253,9 @@ void throttle_timers_init(ThrottleTimers *tt,
 /* destroy a timer */
 static void throttle_timer_destroy(QEMUTimer **timer)
 {
-assert(*timer != NULL);
+if (*timer == NULL) {
+return;
+}
 
 timer_free(*timer);
 *timer = NULL;
@@ -272,7 +280,7 @@ void throttle_timers_destroy(ThrottleTimers *tt)
 /* is any throttling timer configured */
 bool throttle_timers_are_initialized(ThrottleTimers *tt)
 {
-if (tt->timers[0]) {
+if (tt->timers[THROTTLE_READ] || tt->timers[THROTTLE_WRITE]) {
 return true;
 }
 
@@ -424,8 +432,12 @@ bool throttle_schedule_timer(ThrottleState *ts,
 {
 int64_t now = qemu_clock_get_ns(tt->clock_type);
 int64_t next_timestamp;
+QEMUTimer *timer;
 bool must_wait;
 
+timer = is_write ? tt->timers[THROTTLE_WRITE] : tt->timers[THROTTLE_READ];
+assert(timer);
+
 must_wait = throttle_compute_timer(ts,
is_write,
now,
@@ -437,12 +449,12 @@ bool throttle_schedule_timer(ThrottleState *ts,
 }
 
 /* request throttled and timer pending -> do nothing */
-if (timer_pending(tt->timers[is_write])) {
+if (timer_pending(timer)) {
 return true;
 }
 
 /* request throttled and timer not pending -> arm timer */
-timer_mod(tt->timers[is_write], next_timestamp);
+timer_mod(timer, next_timestamp);
 return true;
 }
 
-- 
2.34.1




[PATCH v2 4/5] test-throttle: test read only and write only

2023-06-27 Thread zhenwei pi
Signed-off-by: zhenwei pi 
---
 tests/unit/test-throttle.c | 66 ++
 1 file changed, 66 insertions(+)

diff --git a/tests/unit/test-throttle.c b/tests/unit/test-throttle.c
index a60b5fe22e..5547837a58 100644
--- a/tests/unit/test-throttle.c
+++ b/tests/unit/test-throttle.c
@@ -184,6 +184,70 @@ static void test_init(void)
 throttle_timers_destroy(tt);
 }
 
+static void test_init_readonly(void)
+{
+int i;
+
+tt = &tgm.throttle_timers;
+
+/* fill the structures with crap */
+memset(&ts, 1, sizeof(ts));
+memset(tt, 1, sizeof(*tt));
+
+/* init structures */
+throttle_init(&ts);
+throttle_timers_init(tt, ctx, QEMU_CLOCK_VIRTUAL,
+ read_timer_cb, NULL, &ts);
+
+/* check initialized fields */
+g_assert(tt->clock_type == QEMU_CLOCK_VIRTUAL);
+g_assert(tt->timers[THROTTLE_READ]);
+g_assert(!tt->timers[THROTTLE_WRITE]);
+
+/* check other fields where cleared */
+g_assert(!ts.previous_leak);
+g_assert(!ts.cfg.op_size);
+for (i = 0; i < BUCKETS_COUNT; i++) {
+g_assert(!ts.cfg.buckets[i].avg);
+g_assert(!ts.cfg.buckets[i].max);
+g_assert(!ts.cfg.buckets[i].level);
+}
+
+throttle_timers_destroy(tt);
+}
+
+static void test_init_writeonly(void)
+{
+int i;
+
+tt = &tgm.throttle_timers;
+
+/* fill the structures with crap */
+memset(&ts, 1, sizeof(ts));
+memset(tt, 1, sizeof(*tt));
+
+/* init structures */
+throttle_init(&ts);
+throttle_timers_init(tt, ctx, QEMU_CLOCK_VIRTUAL,
+ NULL, write_timer_cb, &ts);
+
+/* check initialized fields */
+g_assert(tt->clock_type == QEMU_CLOCK_VIRTUAL);
+g_assert(!tt->timers[THROTTLE_READ]);
+g_assert(tt->timers[THROTTLE_WRITE]);
+
+/* check other fields where cleared */
+g_assert(!ts.previous_leak);
+g_assert(!ts.cfg.op_size);
+for (i = 0; i < BUCKETS_COUNT; i++) {
+g_assert(!ts.cfg.buckets[i].avg);
+g_assert(!ts.cfg.buckets[i].max);
+g_assert(!ts.cfg.buckets[i].level);
+}
+
+throttle_timers_destroy(tt);
+}
+
 static void test_destroy(void)
 {
 int i;
@@ -752,6 +816,8 @@ int main(int argc, char **argv)
 g_test_add_func("/throttle/leak_bucket",test_leak_bucket);
 g_test_add_func("/throttle/compute_wait",   test_compute_wait);
 g_test_add_func("/throttle/init",   test_init);
+g_test_add_func("/throttle/init_readonly",  test_init_readonly);
+g_test_add_func("/throttle/init_writeonly", test_init_writeonly);
 g_test_add_func("/throttle/destroy",test_destroy);
 g_test_add_func("/throttle/have_timer", test_have_timer);
 g_test_add_func("/throttle/detach_attach",  test_detach_attach);
-- 
2.34.1




[PATCH v2 5/5] cryptodev: use NULL throttle timer cb for read direction

2023-06-27 Thread zhenwei pi
Operations on a crytpodev are considered as *write* only, the callback
of read direction is never invoked. Use NULL instead of an unreachable
path(cryptodev_backend_throttle_timer_cb on read direction).

Signed-off-by: zhenwei pi 
---
 backends/cryptodev.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/backends/cryptodev.c b/backends/cryptodev.c
index 7d29517843..5cfa25c61c 100644
--- a/backends/cryptodev.c
+++ b/backends/cryptodev.c
@@ -331,8 +331,7 @@ static void cryptodev_backend_set_throttle(CryptoDevBackend 
*backend, int field,
 if (!enabled) {
 throttle_init(&backend->ts);
 throttle_timers_init(&backend->tt, qemu_get_aio_context(),
- QEMU_CLOCK_REALTIME,
- cryptodev_backend_throttle_timer_cb, /* FIXME */
+ QEMU_CLOCK_REALTIME, NULL,
  cryptodev_backend_throttle_timer_cb, backend);
 }
 
-- 
2.34.1




[PATCH v2 0/5] Misc fixes for throttle

2023-06-27 Thread zhenwei pi
v1 -> v2:
- rename 'ThrottleTimerType' to 'ThrottleType'
- add assertion to throttle_schedule_timer

Something remained:
- 'bool is_write' is no longer appropriate, the related functions
  need to use 'ThrottleType throttle' instead. To avoid changes from
  other subsystems in this series, do this work in a followup series
  after there patches apply.


v1:
- introduce enum ThrottleTimerType instead of timers[0], timer[1]...
- support read-only and write-only for throttle
- adapt related test codes
- cryptodev uses a write-only throttle timer

Zhenwei Pi (5):
  throttle: introduce enum ThrottleType
  test-throttle: use enum ThrottleType
  throttle: support read-only and write-only
  test-throttle: test read only and write only
  cryptodev: use NULL throttle timer cb for read direction

 backends/cryptodev.c   |  3 +-
 include/qemu/throttle.h| 11 --
 tests/unit/test-throttle.c | 72 --
 util/throttle.c| 36 +--
 4 files changed, 103 insertions(+), 19 deletions(-)

-- 
2.34.1




[PATCH v2 2/5] test-throttle: use enum ThrottleType

2023-06-27 Thread zhenwei pi
Use enum ThrottleType instead in the throttle test codes.

Signed-off-by: zhenwei pi 
---
 tests/unit/test-throttle.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/unit/test-throttle.c b/tests/unit/test-throttle.c
index 7adb5e6652..a60b5fe22e 100644
--- a/tests/unit/test-throttle.c
+++ b/tests/unit/test-throttle.c
@@ -169,8 +169,8 @@ static void test_init(void)
 
 /* check initialized fields */
 g_assert(tt->clock_type == QEMU_CLOCK_VIRTUAL);
-g_assert(tt->timers[0]);
-g_assert(tt->timers[1]);
+g_assert(tt->timers[THROTTLE_READ]);
+g_assert(tt->timers[THROTTLE_WRITE]);
 
 /* check other fields where cleared */
 g_assert(!ts.previous_leak);
@@ -191,7 +191,7 @@ static void test_destroy(void)
 throttle_timers_init(tt, ctx, QEMU_CLOCK_VIRTUAL,
  read_timer_cb, write_timer_cb, &ts);
 throttle_timers_destroy(tt);
-for (i = 0; i < 2; i++) {
+for (i = 0; i < THROTTLE_MAX; i++) {
 g_assert(!tt->timers[i]);
 }
 }
-- 
2.34.1




[PATCH v2 1/5] throttle: introduce enum ThrottleType

2023-06-27 Thread zhenwei pi
Use enum ThrottleType instead of number index.

Signed-off-by: zhenwei pi 
---
 include/qemu/throttle.h | 11 ---
 util/throttle.c | 16 +---
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/include/qemu/throttle.h b/include/qemu/throttle.h
index 05f6346137..ba6293eeef 100644
--- a/include/qemu/throttle.h
+++ b/include/qemu/throttle.h
@@ -99,13 +99,18 @@ typedef struct ThrottleState {
 int64_t previous_leak;/* timestamp of the last leak done */
 } ThrottleState;
 
+typedef enum {
+THROTTLE_READ = 0,
+THROTTLE_WRITE,
+THROTTLE_MAX
+} ThrottleType;
+
 typedef struct ThrottleTimers {
-QEMUTimer *timers[2]; /* timers used to do the throttling */
+QEMUTimer *timers[THROTTLE_MAX];/* timers used to do the throttling */
 QEMUClockType clock_type; /* the clock used */
 
 /* Callbacks */
-QEMUTimerCB *read_timer_cb;
-QEMUTimerCB *write_timer_cb;
+QEMUTimerCB *timer_cb[THROTTLE_MAX];
 void *timer_opaque;
 } ThrottleTimers;
 
diff --git a/util/throttle.c b/util/throttle.c
index 81f247a8d1..5642e61763 100644
--- a/util/throttle.c
+++ b/util/throttle.c
@@ -199,10 +199,12 @@ static bool throttle_compute_timer(ThrottleState *ts,
 void throttle_timers_attach_aio_context(ThrottleTimers *tt,
 AioContext *new_context)
 {
-tt->timers[0] = aio_timer_new(new_context, tt->clock_type, SCALE_NS,
-  tt->read_timer_cb, tt->timer_opaque);
-tt->timers[1] = aio_timer_new(new_context, tt->clock_type, SCALE_NS,
-  tt->write_timer_cb, tt->timer_opaque);
+tt->timers[THROTTLE_READ] =
+aio_timer_new(new_context, tt->clock_type, SCALE_NS,
+  tt->timer_cb[THROTTLE_READ], tt->timer_opaque);
+tt->timers[THROTTLE_WRITE] =
+aio_timer_new(new_context, tt->clock_type, SCALE_NS,
+  tt->timer_cb[THROTTLE_WRITE], tt->timer_opaque);
 }
 
 /*
@@ -236,8 +238,8 @@ void throttle_timers_init(ThrottleTimers *tt,
 memset(tt, 0, sizeof(ThrottleTimers));
 
 tt->clock_type = clock_type;
-tt->read_timer_cb = read_timer_cb;
-tt->write_timer_cb = write_timer_cb;
+tt->timer_cb[THROTTLE_READ] = read_timer_cb;
+tt->timer_cb[THROTTLE_WRITE] = write_timer_cb;
 tt->timer_opaque = timer_opaque;
 throttle_timers_attach_aio_context(tt, aio_context);
 }
@@ -256,7 +258,7 @@ void throttle_timers_detach_aio_context(ThrottleTimers *tt)
 {
 int i;
 
-for (i = 0; i < 2; i++) {
+for (i = 0; i < THROTTLE_MAX; i++) {
 throttle_timer_destroy(&tt->timers[i]);
 }
 }
-- 
2.34.1




Re: [PULL 00/30] Next patches

2023-06-27 Thread Juan Quintela
Richard Henderson  wrote:
> On 6/22/23 18:54, Juan Quintela wrote:
>> The following changes since commit b455ce4c2f300c8ba47cba7232dd03261368a4cb:
>>Merge tag 'q800-for-8.1-pull-request'
>> ofhttps://github.com/vivier/qemu-m68k  into staging (2023-06-22
>> 10:18:32 +0200)
>> are available in the Git repository at:
>>https://gitlab.com/juan.quintela/qemu.git  tags/next-pull-request
>> for you to fetch changes up to
>> 23e4307eadc1497bd0a11ca91041768f15963b68:
>>migration/rdma: Split qemu_fopen_rdma() into input/output
>> functions (2023-06-22 18:11:58 +0200)
>> 
>> Migration Pull request (20230621) take 2
>> In this pull request the only change is fixing 32 bits complitaion
>> issue.
>> Please apply.
>> [take 1]
>> - fix for multifd thread creation (fabiano)
>> - dirtylimity (hyman)
>>* migration-test will go on next PULL request, as it has failures.
>> - Improve error description (tejus)
>> - improve -incoming and set parameters before calling incoming (wei)
>> - migration atomic counters reviewed patches (quintela)
>> - migration-test refacttoring reviewed (quintela)
>
> New failure with check-cfi-x86_64:
>
> https://gitlab.com/qemu-project/qemu/-/jobs/4527202764#L188
>
> /builds/qemu-project/qemu/build/pyvenv/bin/meson test  --no-rebuild -t
> 0  --num-processes 1 --print-errorlogs
>   1/350 qemu:qtest+qtest-x86_64 / qtest-x86_64/qom-test
>   OK 6.55s   8 subtests passed
> ▶   2/350 ERROR:../tests/qtest/migration-test.c:320:check_guests_ram:
> assertion failed: (bad == 0) ERROR
>   2/350 qemu:qtest+qtest-x86_64 / qtest-x86_64/migration-test
>   ERROR 151.99s   killed by signal 6 SIGABRT

> G_TEST_DBUS_DAEMON=/builds/qemu-project/qemu/tests/dbus-vmstate-daemon.sh
> MALLOC_PERTURB_=3 QTEST_QEMU_IMG=./qemu-img
> QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon
> QTEST_QEMU_BINARY=./qemu-system-x86_64
> /builds/qemu-project/qemu/build/tests/qtest/migration-test --tap
>-k
> ― ✀  ―
> stderr:
> qemu-system-x86_64: Unable to read from socket: Connection reset by peer
> Memory content inconsistency at 4f65000 first_byte = 30 last_byte = 2f
> current = 88 hit_edge = 1
> **
> ERROR:../tests/qtest/migration-test.c:320:check_guests_ram: assertion failed: 
> (bad == 0)
>
> (test program exited with status code -6)
> ――

Still running in bisect mode (this takes forever).

[cc'ing stefan and kevin]

And now I get this problem with gcov:

https://gitlab.com/juan.quintela/qemu/-/jobs/4546094720

357/423 qemu:block / io-qcow2-copy-before-write
ERROR   6.23s   exit status 1
>>> PYTHON=/builds/juan.quintela/qemu/build/pyvenv/bin/python3 
>>> MALLOC_PERTURB_=154 /builds/juan.quintela/qemu/build/pyvenv/bin/python3 
>>> /builds/juan.quintela/qemu/build/../tests/qemu-iotests/check -tap -qcow2 
>>> copy-before-write --source-dir 
>>> /builds/juan.quintela/qemu/tests/qemu-iotests --build-dir 
>>> /builds/juan.quintela/qemu/build/tests/qemu-iotests
― ✀  ―
stderr:
--- /builds/juan.quintela/qemu/tests/qemu-iotests/tests/copy-before-write.out
+++ 
/builds/juan.quintela/qemu/build/scratch/qcow2-file-copy-before-write/copy-before-write.out.bad
@@ -1,5 +1,21 @@
-
+...F
+==
+FAIL: test_timeout_break_snapshot (__main__.TestCbwError)
+--
+Traceback (most recent call last):
+  File 
"/builds/juan.quintela/qemu/tests/qemu-iotests/tests/copy-before-write", line 
210, in test_timeout_break_snapshot
+self.assertEqual(log, """\
+AssertionError: 'wrot[195 chars]read 1048576/1048576 bytes at offset 0\n1 
MiB,[46 chars]c)\n' != 'wrot[195 chars]read failed: Permission denied\n'
+  wrote 524288/524288 bytes at offset 0
+  512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+  wrote 524288/524288 bytes at offset 524288
+  512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++ read failed: Permission denied
+- read 1048576/1048576 bytes at offset 0
+- 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+
 --
 Ran 4 tests
-OK
+FAILED (failures=1)
(test program exited with status code 1)
――

I have no clue how I can make qtests to fail with my changes.

Specially with a read permission.

Any clue?

Later, Juan.

PD. Yeap, continuing the bisect.




Re: [PATCH v4 16/24] nbd/server: Support 64-bit block status

2023-06-27 Thread Vladimir Sementsov-Ogievskiy

On 08.06.23 16:56, Eric Blake wrote:

The NBD spec states that if the client negotiates extended headers,
the server must avoid NBD_REPLY_TYPE_BLOCK_STATUS and instead use
NBD_REPLY_TYPE_BLOCK_STATUS_EXT which supports 64-bit lengths, even if
the reply does not need more than 32 bits.  As of this patch,
client->mode is still never NBD_MODE_EXTENDED, so the code added here
does not take effect until the next patch enables negotiation.

For now, all metacontexts that we know how to export never populate
more than 32 bits of information, so we don't have to worry about
NBD_REP_ERR_EXT_HEADER_REQD or filtering during handshake, and we
always send all zeroes for the upper 32 bits of status during
NBD_CMD_BLOCK_STATUS.

Note that we previously had some interesting size-juggling on call
chains, such as:

nbd_co_send_block_status(uint32_t length)
-> blockstatus_to_extents(uint32_t bytes)
   -> bdrv_block_status_above(bytes, &uint64_t num)
   -> nbd_extent_array_add(uint64_t num)
 -> store num in 32-bit length

But we were lucky that it never overflowed: bdrv_block_status_above
never sets num larger than bytes, and we had previously been capping
'bytes' at 32 bits (since the protocol does not allow sending a larger
request without extended headers).  This patch adds some assertions
that ensure we continue to avoid overflowing 32 bits for a narrow



[..]


@@ -2162,19 +2187,23 @@ static void 
nbd_extent_array_convert_to_be(NBDExtentArray *ea)
   * would result in an incorrect range reported to the client)
   */
  static int nbd_extent_array_add(NBDExtentArray *ea,
-uint32_t length, uint32_t flags)
+uint64_t length, uint32_t flags)
  {
  assert(ea->can_add);

  if (!length) {
  return 0;
  }
+if (!ea->extended) {
+assert(length <= UINT32_MAX);
+}

  /* Extend previous extent if flags are the same */
  if (ea->count > 0 && flags == ea->extents[ea->count - 1].flags) {
-uint64_t sum = (uint64_t)length + ea->extents[ea->count - 1].length;
+uint64_t sum = length + ea->extents[ea->count - 1].length;

-if (sum <= UINT32_MAX) {
+assert(sum >= length);
+if (sum <= UINT32_MAX || ea->extended) {


that "if" and uint64_t sum was to avoid overflow. I think, we can't just 
assert, instead include the check into if:

if (sum >= length && (sum <= UINT32_MAX || ea->extended) {

 ...

}

with this:
Reviewed-by: Vladimir Sementsov-Ogievskiy 

--
Best regards,
Vladimir




Re: [PATCH v4 17/24] nbd/server: Enable initial support for extended headers

2023-06-27 Thread Vladimir Sementsov-Ogievskiy

On 08.06.23 16:56, Eric Blake wrote:

Time to start supporting clients that request extended headers.  Now
we can finally reach the code added across several previous patches.

Even though the NBD spec has been altered to allow us to accept
NBD_CMD_READ larger than the max payload size (provided our response
is a hole or broken up over more than one data chunk), we are not
planning to take advantage of that, and continue to cap NBD_CMD_READ
to 32M regardless of header size.

For NBD_CMD_WRITE_ZEROES and NBD_CMD_TRIM, the block layer already
supports 64-bit operations without any effort on our part.  For
NBD_CMD_BLOCK_STATUS, the client's length is a hint, and the previous
patch took care of implementing the required
NBD_REPLY_TYPE_BLOCK_STATUS_EXT.

We do not yet support clients that want to do request payload
filtering of NBD_CMD_BLOCK_STATUS; that will be added in later
patches, but is not essential for qemu as a client since qemu only
requests the single context base:allocation.

Signed-off-by: Eric Blake


Reviewed-by: Vladimir Sementsov-Ogievskiy 

--
Best regards,
Vladimir




Re: [PATCH v4 18/24] nbd/client: Plumb errp through nbd_receive_replies

2023-06-27 Thread Vladimir Sementsov-Ogievskiy

On 08.06.23 16:56, Eric Blake wrote:

Instead of ignoring the low-level error just to refabricate our own
message to pass to the caller, we can just plump the caller's errp
down to the low level.

Signed-off-by: Eric Blake 
---

v4: new patch [Vladimir]
---
  block/nbd.c | 16 ++--
  1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/block/nbd.c b/block/nbd.c
index 57123c17f94..c17ce935f17 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -417,7 +417,8 @@ static void coroutine_fn GRAPH_RDLOCK 
nbd_reconnect_attempt(BDRVNBDState *s)
  reconnect_delay_timer_del(s);
  }

-static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t cookie)
+static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t cookie,
+Error **errp)
  {
  int ret;
  uint64_t ind = COOKIE_TO_INDEX(cookie), ind2;
@@ -458,9 +459,12 @@ static coroutine_fn int nbd_receive_replies(BDRVNBDState 
*s, uint64_t cookie)

  /* We are under mutex and cookie is 0. We have to do the dirty work. 
*/
  assert(s->reply.cookie == 0);
-ret = nbd_receive_reply(s->bs, s->ioc, &s->reply, NULL);
-if (ret <= 0) {
-ret = ret ? ret : -EIO;
+ret = nbd_receive_reply(s->bs, s->ioc, &s->reply, errp);
+if (ret == 0) {
+ret = -EIO;
+error_setg(errp, "server dropped connection");


we also need to set errp for further negative returns in the function


+}
+if (ret < 0) {
  nbd_channel_error(s, ret);
  return ret;
  }
@@ -843,9 +847,9 @@ static coroutine_fn int nbd_co_do_receive_one_chunk(
  }
  *request_ret = 0;

-ret = nbd_receive_replies(s, cookie);
+ret = nbd_receive_replies(s, cookie, errp);
  if (ret < 0) {
-error_setg(errp, "Connection closed");
+error_prepend(errp, "Connection closed: ");
  return -EIO;
  }
  assert(s->ioc);


--
Best regards,
Vladimir




Re: [PATCH v4 19/24] nbd/client: Initial support for extended headers

2023-06-27 Thread Vladimir Sementsov-Ogievskiy

On 08.06.23 16:56, Eric Blake wrote:

Update the client code to be able to send an extended request, and
parse an extended header from the server.  Note that since we reject
any structured reply with a too-large payload, we can always normalize
a valid header back into the compact form, so that the caller need not
deal with two branches of a union.  Still, until a later patch lets
the client negotiate extended headers, the code added here should not
be reached.  Note that because of the different magic numbers, it is
just as easy to trace and then tolerate a non-compliant server sending
the wrong header reply as it would be to insist that the server is
compliant.

Signed-off-by: Eric Blake 
---

v4: split off errp handling to separate patch [Vladimir], better
function naming [Vladimir]
---
  include/block/nbd.h |   3 +-
  block/nbd.c |   2 +-
  nbd/client.c| 100 +---
  nbd/trace-events|   3 +-
  4 files changed, 72 insertions(+), 36 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index dc05f5981fb..af80087e2cd 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -389,7 +389,8 @@ int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo 
*info,
   Error **errp);
  int nbd_send_request(QIOChannel *ioc, NBDRequest *request);
  int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc,
-   NBDReply *reply, Error **errp);
+   NBDReply *reply, NBDMode mode,
+   Error **errp);
  int nbd_client(int fd);
  int nbd_disconnect(int fd);
  int nbd_errno_to_system_errno(int err);
diff --git a/block/nbd.c b/block/nbd.c
index c17ce935f17..e281fac43d1 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -459,7 +459,7 @@ static coroutine_fn int nbd_receive_replies(BDRVNBDState 
*s, uint64_t cookie,

  /* We are under mutex and cookie is 0. We have to do the dirty work. 
*/
  assert(s->reply.cookie == 0);
-ret = nbd_receive_reply(s->bs, s->ioc, &s->reply, errp);
+ret = nbd_receive_reply(s->bs, s->ioc, &s->reply, s->info.mode, errp);
  if (ret == 0) {
  ret = -EIO;
  error_setg(errp, "server dropped connection");
diff --git a/nbd/client.c b/nbd/client.c
index 1495a9b0ab1..a4598a95427 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -1352,22 +1352,29 @@ int nbd_disconnect(int fd)

  int nbd_send_request(QIOChannel *ioc, NBDRequest *request)
  {
-uint8_t buf[NBD_REQUEST_SIZE];
+uint8_t buf[NBD_EXTENDED_REQUEST_SIZE];
+size_t len;

-assert(request->mode <= NBD_MODE_STRUCTURED); /* TODO handle extended */
-assert(request->len <= UINT32_MAX);
  trace_nbd_send_request(request->from, request->len, request->cookie,
 request->flags, request->type,
 nbd_cmd_lookup(request->type));

-stl_be_p(buf, NBD_REQUEST_MAGIC);
  stw_be_p(buf + 4, request->flags);
  stw_be_p(buf + 6, request->type);
  stq_be_p(buf + 8, request->cookie);
  stq_be_p(buf + 16, request->from);
-stl_be_p(buf + 24, request->len);
+if (request->mode >= NBD_MODE_EXTENDED) {
+stl_be_p(buf, NBD_EXTENDED_REQUEST_MAGIC);
+stq_be_p(buf + 24, request->len);
+len = NBD_EXTENDED_REQUEST_SIZE;
+} else {
+assert(request->len <= UINT32_MAX);
+stl_be_p(buf, NBD_REQUEST_MAGIC);
+stl_be_p(buf + 24, request->len);
+len = NBD_REQUEST_SIZE;
+}

-return nbd_write(ioc, buf, sizeof(buf), NULL);
+return nbd_write(ioc, buf, len, NULL);
  }

  /* nbd_receive_simple_reply
@@ -1394,30 +1401,36 @@ static int nbd_receive_simple_reply(QIOChannel *ioc, 
NBDSimpleReply *reply,
  return 0;
  }

-/* nbd_receive_structured_reply_chunk
+/* nbd_receive_reply_chunk_header
   * Read structured reply chunk except magic field (which should be already
- * read).
+ * read).  Normalize into the compact form.
   * Payload is not read.
   */
-static int nbd_receive_structured_reply_chunk(QIOChannel *ioc,
-  NBDStructuredReplyChunk *chunk,
-  Error **errp)
+static int nbd_receive_reply_chunk_header(QIOChannel *ioc, NBDReply *chunk,
+  Error **errp)
  {
  int ret;
+size_t len;
+uint64_t payload_len;

-assert(chunk->magic == NBD_STRUCTURED_REPLY_MAGIC);
+if (chunk->magic == NBD_STRUCTURED_REPLY_MAGIC) {
+len = sizeof(chunk->structured);
+} else {
+assert(chunk->magic == NBD_EXTENDED_REPLY_MAGIC);
+len = sizeof(chunk->extended);
+}

  ret = nbd_read(ioc, (uint8_t *)chunk + sizeof(chunk->magic),
-   sizeof(*chunk) - sizeof(chunk->magic), "structured chunk",
+   len - sizeof(chunk->magic), "structured chunk",
 errp);
  if (ret < 0) {
 

Re: [PATCH v4 20/24] nbd/client: Accept 64-bit block status chunks

2023-06-27 Thread Vladimir Sementsov-Ogievskiy

On 08.06.23 16:56, Eric Blake wrote:

Once extended mode is enabled, we need to accept 64-bit status replies
(even for replies that don't exceed a 32-bit length).  It is easier to
normalize narrow replies into wide format so that the rest of our code
only has to handle one width.  Although a server is non-compliant if
it sends a 64-bit reply in compact mode, or a 32-bit reply in extended
mode, it is still easy enough to tolerate these mismatches.

In normal execution, we are only requesting "base:allocation" which
never exceeds 32 bits for flag values. But during testing with
x-dirty-bitmap, we can force qemu to connect to some other context
that might have 64-bit status bit; however, we ignore those upper bits
(other than mapping qemu:allocation-depth into something that
'qemu-img map --output=json' can expose), and since that only affects
testing, we really don't bother with checking whether more than the
two least-significant bits are set.

Signed-off-by: Eric Blake 


Reviewed-by: Vladimir Sementsov-Ogievskiy 

[..]


+ * count at 0 for a narrow server).  However, it's easy enough to
+ * ignore the server's noncompliance without killing the
   * connection; just ignore trailing extents, and clamp things to
   * the length of our request.
   */
-if (chunk->length > sizeof(context_id) + sizeof(*extent)) {
-trace_nbd_parse_blockstatus_compliance("more than one extent");
+if (count != wide ||
+chunk->length > sizeof(context_id) + wide * sizeof(count) + len) {


this calculation is done twice. I'd put it into expected_chunk_length variable.


+trace_nbd_parse_blockstatus_compliance("unexpected extent count");
  }
  if (extent->length > orig_length) {
  extent->length = orig_length;
@@ -1123,7 +1136,7 @@ nbd_co_receive_cmdread_reply(BDRVNBDState *s, uint64_t 
cookie,

  static int coroutine_fn
  nbd_co_receive_blockstatus_reply(BDRVNBDState *s, uint64_t cookie,


[..]

--
Best regards,
Vladimir




Re: [PATCH v4 21/24] nbd/client: Request extended headers during negotiation

2023-06-27 Thread Vladimir Sementsov-Ogievskiy

On 08.06.23 16:56, Eric Blake wrote:

All the pieces are in place for a client to finally request extended
headers.  Note that we must not request extended headers when qemu-nbd
is used to connect to the kernel module (as nbd.ko does not expect
them, but expects us to do the negotiation in userspace before handing
the socket over to the kernel), but there is no harm in all other
clients requesting them.

Extended headers are not essential to the information collected during
'qemu-nbd --list', but probing for it gives us one more piece of
information in that output.  Update the iotests affected by the new
line of output.

Signed-off-by: Eric Blake


Reviewed-by: Vladimir Sementsov-Ogievskiy 

--
Best regards,
Vladimir




Re: [PATCH v4 22/24] nbd/server: Refactor list of negotiated meta contexts

2023-06-27 Thread Vladimir Sementsov-Ogievskiy

On 08.06.23 16:56, Eric Blake wrote:

Peform several minor refactorings of how the list of negotiated meta
contexts is managed, to make upcoming patches easier: Promote the
internal type NBDExportMetaContexts to the public opaque type
NBDMetaContexts, and mark exp const.  Use a shorter member name in
NBDClient.  Hoist calls to nbd_check_meta_context() earlier in their
callers, as the number of negotiated contexts may impact the flags
exposed in regards to an export, which in turn requires a new
parameter.


Hmm, a bit of semantic change: we drop context in nbd_check_meta_export() even 
when we report error. Pre-patch we call nbd_check_meta_export() only on success 
path.. Still, it seems even more safe.

Reviewed-by: Vladimir Sementsov-Ogievskiy 

  Drop a redundant parameter to nbd_negotiate_meta_queries.

No semantic change intended.

Signed-off-by: Eric Blake 



--
Best regards,
Vladimir




Re: [Libguestfs] [PATCH v4 23/24] nbd/server: Prepare for per-request filtering of BLOCK_STATUS

2023-06-27 Thread Vladimir Sementsov-Ogievskiy

On 08.06.23 22:15, Eric Blake wrote:

On Thu, Jun 08, 2023 at 08:56:52AM -0500, Eric Blake wrote:

The next commit will add support for the optional extension
NBD_CMD_FLAG_PAYLOAD during NBD_CMD_BLOCK_STATUS, where the client can
request that the server only return a subset of negotiated contexts,
rather than all contexts.  To make that task easier, this patch
populates the list of contexts to return on a per-command basis (for
now, identical to the full set of negotiated contexts).

Signed-off-by: Eric Blake 
---




+++ b/nbd/server.c
@@ -2491,6 +2491,8 @@ static int coroutine_fn 
nbd_co_receive_request(NBDRequestData *req, NBDRequest *
  error_setg(errp, "No memory");
  return -ENOMEM;
  }
+} else if (request->type == NBD_CMD_BLOCK_STATUS) {
+request->contexts = &client->contexts;
  }


THere are paths where request->contexts is left NULL but request->type
was set...


@@ -2841,6 +2848,11 @@ static coroutine_fn void nbd_trip(void *opaque)
  } else {
  ret = nbd_handle_request(client, &request, req->data, &local_err);
  }
+if (request.type == NBD_CMD_BLOCK_STATUS &&
+request.contexts != &client->contexts) {
+g_free(request.contexts->bitmaps);
+g_free(request.contexts);
+}


so I think this also needs to be tweaked to check that
request.contexts is non-NULL before dereferencing to free bitmaps.



agree (and I missed this during my review :)

suggest:

if (request.contexts && request.contexts != &client->contexts) {
  g_free(..)
  g_free(..)
}


--
Best regards,
Vladimir




Re: [PATCH v2 1/5] migration: Use proper indentation for migration.json

2023-06-27 Thread Peter Xu
On Thu, Jun 22, 2023 at 09:50:15PM +0200, Juan Quintela wrote:
> We broke it with dirtyrate limit patches.
> 
> Signed-off-by: Juan Quintela 

Acked-by: Peter Xu 

-- 
Peter Xu




Re: [PATCH v2 5/5] migration: Deprecate old compression method

2023-06-27 Thread Peter Xu
On Thu, Jun 22, 2023 at 09:50:19PM +0200, Juan Quintela wrote:
> Signed-off-by: Juan Quintela 

Acked-by: Peter Xu 

-- 
Peter Xu




Re: [PATCH v4 24/24] nbd/server: Add FLAG_PAYLOAD support to CMD_BLOCK_STATUS

2023-06-27 Thread Vladimir Sementsov-Ogievskiy

On 08.06.23 16:56, Eric Blake wrote:

Allow a client to request a subset of negotiated meta contexts.  For
example, a client may ask to use a single connection to learn about
both block status and dirty bitmaps, but where the dirty bitmap
queries only need to be performed on a subset of the disk; forcing the
server to compute that information on block status queries in the rest
of the disk is wasted effort (both at the server, and on the amount of
traffic sent over the wire to be parsed and ignored by the client).

Qemu as an NBD client never requests to use more than one meta
context, so it has no need to use block status payloads.  Testing this
instead requires support from libnbd, which CAN access multiple meta
contexts in parallel from a single NBD connection; an interop test
submitted to the libnbd project at the same time as this patch
demonstrates the feature working, as well as testing some corner cases
(for example, when the payload length is longer than the export
length), although other corner cases (like passing the same id
duplicated) requires a protocol fuzzer because libnbd is not wired up
to break the protocol that badly.

This also includes tweaks to 'qemu-nbd --list' to show when a server
is advertising the capability, and to the testsuite to reflect the
addition to that output.

Signed-off-by: Eric Blake 
---


[..]


+static int
+nbd_co_block_status_payload_read(NBDClient *client, NBDRequest *request,
+ Error **errp)
+{
+int payload_len = request->len;
+g_autofree char *buf = NULL;
+size_t count, i, nr_bitmaps;
+uint32_t id;
+
+assert(request->len <= NBD_MAX_BUFFER_SIZE);
+assert(client->contexts.exp == client->exp);
+nr_bitmaps = client->exp->nr_export_bitmaps;
+request->contexts = g_new0(NBDMetaContexts, 1);
+request->contexts->exp = client->exp;
+
+if (payload_len % sizeof(uint32_t) ||
+payload_len < sizeof(NBDBlockStatusPayload) ||
+payload_len > (sizeof(NBDBlockStatusPayload) +
+   sizeof(id) * client->contexts.count)) {
+goto skip;
+}
+
+buf = g_malloc(payload_len);
+if (nbd_read(client->ioc, buf, payload_len,
+ "CMD_BLOCK_STATUS data", errp) < 0) {
+return -EIO;
+}
+trace_nbd_co_receive_request_payload_received(request->cookie,
+  payload_len);
+request->contexts->bitmaps = g_new0(bool, nr_bitmaps);
+count = (payload_len - sizeof(NBDBlockStatusPayload)) / sizeof(id);
+payload_len = 0;
+
+for (i = 0; i < count; i++) {
+id = ldl_be_p(buf + sizeof(NBDBlockStatusPayload) + sizeof(id) * i);
+if (id == NBD_META_ID_BASE_ALLOCATION) {
+if (request->contexts->base_allocation) {
+goto skip;
+}
+request->contexts->base_allocation = true;
+} else if (id == NBD_META_ID_ALLOCATION_DEPTH) {
+if (request->contexts->allocation_depth) {
+goto skip;
+}
+request->contexts->allocation_depth = true;
+} else {


maybe,

int ind = id - NBD_META_ID_DIRTY_BITMAP;



+if (id - NBD_META_ID_DIRTY_BITMAP > nr_bitmaps ||



= ?



+request->contexts->bitmaps[id - NBD_META_ID_DIRTY_BITMAP]) {
+goto skip;
+}
+request->contexts->bitmaps[id - NBD_META_ID_DIRTY_BITMAP] = true;
+}
+}
+
+request->len = ldq_be_p(buf);
+request->contexts->count = count;
+return 0;
+
+ skip:
+trace_nbd_co_receive_block_status_payload_compliance(request->from,
+ request->len);
+request->len = request->contexts->count = 0;
+return nbd_drop(client->ioc, payload_len, errp);
+}
+
  /* nbd_co_receive_request
   * Collect a client request. Return 0 if request looks valid, -EIO to drop
   * connection right away, -EAGAIN to indicate we were interrupted and the
@@ -2470,7 +2552,13 @@ static int coroutine_fn 
nbd_co_receive_request(NBDRequestData *req, NBDRequest *


[..]


@@ -2712,7 +2804,8 @@ static coroutine_fn int nbd_handle_request(NBDClient 
*client,
"discard failed", errp);

  case NBD_CMD_BLOCK_STATUS:
-if (!request->len) {
+assert(request->contexts);
+if (!request->len && !(request->flags & NBD_CMD_FLAG_PAYLOAD_LEN)) {


why not error-out for len=0 in case of payload?


  return nbd_send_generic_reply(client, request, -EINVAL,
"need non-zero length", errp);
  }
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 1d155fc2c66..cbca062 100644



--
Best regards,
Vladimir