[Qemu-devel] [PATCH v8 10/11] block-backend: Add blk_co_copy_range

2018-05-31 Thread Fam Zheng
It's a BlockBackend wrapper of the BDS interface.

Signed-off-by: Fam Zheng 
Reviewed-by: Stefan Hajnoczi 
---
 block/block-backend.c  | 18 ++
 include/sysemu/block-backend.h |  4 
 2 files changed, 22 insertions(+)

diff --git a/block/block-backend.c b/block/block-backend.c
index 89f47b00ea..d55c328736 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -2211,3 +2211,21 @@ void blk_unregister_buf(BlockBackend *blk, void *host)
 {
 bdrv_unregister_buf(blk_bs(blk), host);
 }
+
+int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
+   BlockBackend *blk_out, int64_t off_out,
+   int bytes, BdrvRequestFlags flags)
+{
+int r;
+r = blk_check_byte_request(blk_in, off_in, bytes);
+if (r) {
+return r;
+}
+r = blk_check_byte_request(blk_out, off_out, bytes);
+if (r) {
+return r;
+}
+return bdrv_co_copy_range(blk_in->root, off_in,
+  blk_out->root, off_out,
+  bytes, flags);
+}
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 92ab624fac..8d03d493c2 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -232,4 +232,8 @@ void blk_set_force_allow_inactivate(BlockBackend *blk);
 void blk_register_buf(BlockBackend *blk, void *host, size_t size);
 void blk_unregister_buf(BlockBackend *blk, void *host);
 
+int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
+   BlockBackend *blk_out, int64_t off_out,
+   int bytes, BdrvRequestFlags flags);
+
 #endif
-- 
2.17.0




[Qemu-devel] [PATCH v8 11/11] qemu-img: Convert with copy offloading

2018-05-31 Thread Fam Zheng
The new blk_co_copy_range interface offers a more efficient way in the
case of network based storage. Make use of it to allow faster convert
operation.

Since copy offloading cannot do zero detection ('-S') and compression
(-c), only try it when these options are not used.

Signed-off-by: Fam Zheng 
Reviewed-by: Stefan Hajnoczi 
---
 qemu-img.c | 50 --
 1 file changed, 48 insertions(+), 2 deletions(-)

diff --git a/qemu-img.c b/qemu-img.c
index 976b437da0..75f1610aa0 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1547,6 +1547,7 @@ typedef struct ImgConvertState {
 bool compressed;
 bool target_has_backing;
 bool wr_in_order;
+bool copy_range;
 int min_sparse;
 size_t cluster_sectors;
 size_t buf_sectors;
@@ -1740,6 +1741,37 @@ static int coroutine_fn convert_co_write(ImgConvertState 
*s, int64_t sector_num,
 return 0;
 }
 
+static int coroutine_fn convert_co_copy_range(ImgConvertState *s, int64_t 
sector_num,
+  int nb_sectors)
+{
+int n, ret;
+
+while (nb_sectors > 0) {
+BlockBackend *blk;
+int src_cur;
+int64_t bs_sectors, src_cur_offset;
+int64_t offset;
+
+convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
+offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
+blk = s->src[src_cur];
+bs_sectors = s->src_sectors[src_cur];
+
+n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
+
+ret = blk_co_copy_range(blk, offset, s->target,
+sector_num << BDRV_SECTOR_BITS,
+n << BDRV_SECTOR_BITS, 0);
+if (ret < 0) {
+return ret;
+}
+
+sector_num += n;
+nb_sectors -= n;
+}
+return 0;
+}
+
 static void coroutine_fn convert_co_do_copy(void *opaque)
 {
 ImgConvertState *s = opaque;
@@ -1762,6 +1794,7 @@ static void coroutine_fn convert_co_do_copy(void *opaque)
 int n;
 int64_t sector_num;
 enum ImgConvertBlockStatus status;
+bool copy_range;
 
 qemu_co_mutex_lock(&s->lock);
 if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
@@ -1791,7 +1824,9 @@ static void coroutine_fn convert_co_do_copy(void *opaque)
 s->allocated_sectors, 0);
 }
 
-if (status == BLK_DATA) {
+retry:
+copy_range = s->copy_range && s->status == BLK_DATA;
+if (status == BLK_DATA && !copy_range) {
 ret = convert_co_read(s, sector_num, n, buf);
 if (ret < 0) {
 error_report("error while reading sector %" PRId64
@@ -1813,7 +1848,15 @@ static void coroutine_fn convert_co_do_copy(void *opaque)
 }
 
 if (s->ret == -EINPROGRESS) {
-ret = convert_co_write(s, sector_num, n, buf, status);
+if (copy_range) {
+ret = convert_co_copy_range(s, sector_num, n);
+if (ret) {
+s->copy_range = false;
+goto retry;
+}
+} else {
+ret = convert_co_write(s, sector_num, n, buf, status);
+}
 if (ret < 0) {
 error_report("error while writing sector %" PRId64
  ": %s", sector_num, strerror(-ret));
@@ -1936,6 +1979,7 @@ static int img_convert(int argc, char **argv)
 ImgConvertState s = (ImgConvertState) {
 /* Need at least 4k of zeros for sparse detection */
 .min_sparse = 8,
+.copy_range = true,
 .buf_sectors= IO_BUF_SIZE / BDRV_SECTOR_SIZE,
 .wr_in_order= true,
 .num_coroutines = 8,
@@ -1976,6 +2020,7 @@ static int img_convert(int argc, char **argv)
 break;
 case 'c':
 s.compressed = true;
+s.copy_range = false;
 break;
 case 'o':
 if (!is_valid_option_list(optarg)) {
@@ -2017,6 +2062,7 @@ static int img_convert(int argc, char **argv)
 }
 
 s.min_sparse = sval / BDRV_SECTOR_SIZE;
+s.copy_range = false;
 break;
 }
 case 'p':
-- 
2.17.0




[Qemu-devel] [PATCH v8 04/11] raw: Implement copy offloading

2018-05-31 Thread Fam Zheng
Just pass down to ->file.

Signed-off-by: Fam Zheng 
Reviewed-by: Stefan Hajnoczi 
---
 block/raw-format.c | 32 
 1 file changed, 32 insertions(+)

diff --git a/block/raw-format.c b/block/raw-format.c
index b69a0674b3..f2e468df6f 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -497,6 +497,36 @@ static int raw_probe_geometry(BlockDriverState *bs, 
HDGeometry *geo)
 return bdrv_probe_geometry(bs->file->bs, geo);
 }
 
+static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs,
+   BdrvChild *src, uint64_t 
src_offset,
+   BdrvChild *dst, uint64_t 
dst_offset,
+   uint64_t bytes, 
BdrvRequestFlags flags)
+{
+int ret;
+
+ret = raw_adjust_offset(bs, &src_offset, bytes, false);
+if (ret) {
+return ret;
+}
+return bdrv_co_copy_range_from(bs->file, src_offset, dst, dst_offset,
+   bytes, flags);
+}
+
+static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
+ BdrvChild *src, uint64_t 
src_offset,
+ BdrvChild *dst, uint64_t 
dst_offset,
+ uint64_t bytes, BdrvRequestFlags 
flags)
+{
+int ret;
+
+ret = raw_adjust_offset(bs, &dst_offset, bytes, true);
+if (ret) {
+return ret;
+}
+return bdrv_co_copy_range_to(src, src_offset, bs->file, dst_offset, bytes,
+ flags);
+}
+
 BlockDriver bdrv_raw = {
 .format_name  = "raw",
 .instance_size= sizeof(BDRVRawState),
@@ -513,6 +543,8 @@ BlockDriver bdrv_raw = {
 .bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes,
 .bdrv_co_pdiscard = &raw_co_pdiscard,
 .bdrv_co_block_status = &raw_co_block_status,
+.bdrv_co_copy_range_from = &raw_co_copy_range_from,
+.bdrv_co_copy_range_to  = &raw_co_copy_range_to,
 .bdrv_truncate= &raw_truncate,
 .bdrv_getlength   = &raw_getlength,
 .has_variable_length  = true,
-- 
2.17.0




[Qemu-devel] [PATCH v8 06/11] file-posix: Implement bdrv_co_copy_range

2018-05-31 Thread Fam Zheng
With copy_file_range(2), we can implement the bdrv_co_copy_range
semantics.

Signed-off-by: Fam Zheng 
---
 block/file-posix.c  | 98 +++--
 configure   | 17 +++
 include/block/raw-aio.h | 10 -
 3 files changed, 120 insertions(+), 5 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 5a602cfe37..513d371bb1 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -59,6 +59,7 @@
 #ifdef __linux__
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -187,6 +188,8 @@ typedef struct RawPosixAIOData {
 #define aio_ioctl_cmd   aio_nbytes /* for QEMU_AIO_IOCTL */
 off_t aio_offset;
 int aio_type;
+int aio_fd2;
+off_t aio_offset2;
 } RawPosixAIOData;
 
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
@@ -1446,6 +1449,49 @@ static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData 
*aiocb)
 return -ENOTSUP;
 }
 
+#ifndef HAVE_COPY_FILE_RANGE
+static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd,
+ off_t *out_off, size_t len, unsigned int flags)
+{
+#ifdef __NR_copy_file_range
+return syscall(__NR_copy_file_range, in_fd, in_off, out_fd,
+   out_off, len, flags);
+#else
+errno = ENOSYS;
+return -1;
+#endif
+}
+#endif
+
+static ssize_t handle_aiocb_copy_range(RawPosixAIOData *aiocb)
+{
+uint64_t bytes = aiocb->aio_nbytes;
+off_t in_off = aiocb->aio_offset;
+off_t out_off = aiocb->aio_offset2;
+
+while (bytes) {
+ssize_t ret = copy_file_range(aiocb->aio_fildes, &in_off,
+  aiocb->aio_fd2, &out_off,
+  bytes, 0);
+if (ret == -EINTR) {
+continue;
+}
+if (ret < 0) {
+if (errno == ENOSYS) {
+return -ENOTSUP;
+} else {
+return -errno;
+}
+}
+if (!ret) {
+/* No progress (e.g. when beyond EOF), fall back to buffer I/O. */
+return -ENOTSUP;
+}
+bytes -= ret;
+}
+return 0;
+}
+
 static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
 {
 int ret = -EOPNOTSUPP;
@@ -1526,6 +1572,9 @@ static int aio_worker(void *arg)
 case QEMU_AIO_WRITE_ZEROES:
 ret = handle_aiocb_write_zeroes(aiocb);
 break;
+case QEMU_AIO_COPY_RANGE:
+ret = handle_aiocb_copy_range(aiocb);
+break;
 default:
 fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
 ret = -EINVAL;
@@ -1536,9 +1585,10 @@ static int aio_worker(void *arg)
 return ret;
 }
 
-static int paio_submit_co(BlockDriverState *bs, int fd,
-  int64_t offset, QEMUIOVector *qiov,
-  int bytes, int type)
+static int paio_submit_co_full(BlockDriverState *bs, int fd,
+   int64_t offset, int fd2, int64_t offset2,
+   QEMUIOVector *qiov,
+   int bytes, int type)
 {
 RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
 ThreadPool *pool;
@@ -1546,6 +1596,8 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
 acb->bs = bs;
 acb->aio_type = type;
 acb->aio_fildes = fd;
+acb->aio_fd2 = fd2;
+acb->aio_offset2 = offset2;
 
 acb->aio_nbytes = bytes;
 acb->aio_offset = offset;
@@ -1561,6 +1613,13 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
 return thread_pool_submit_co(pool, aio_worker, acb);
 }
 
+static inline int paio_submit_co(BlockDriverState *bs, int fd,
+ int64_t offset, QEMUIOVector *qiov,
+ int bytes, int type)
+{
+return paio_submit_co_full(bs, fd, offset, -1, 0, qiov, bytes, type);
+}
+
 static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
 int64_t offset, QEMUIOVector *qiov, int bytes,
 BlockCompletionFunc *cb, void *opaque, int type)
@@ -2451,6 +2510,35 @@ static void raw_abort_perm_update(BlockDriverState *bs)
 raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL);
 }
 
+static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs,
+   BdrvChild *src, uint64_t 
src_offset,
+   BdrvChild *dst, uint64_t 
dst_offset,
+   uint64_t bytes, 
BdrvRequestFlags flags)
+{
+return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, 
flags);
+}
+
+static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
+ BdrvChild *src, uint64_t 
src_offset,
+ BdrvChild *dst, uint64_t 
dst_offset,
+ uint64_t bytes, BdrvRequestFlags 
flags)
+{
+BDRVRawState *s = bs->opaque;
+BDRVRawState *src_s;
+
+assert(dst->bs =

[Qemu-devel] [PATCH v8 03/11] raw: Check byte range uniformly

2018-05-31 Thread Fam Zheng
We don't verify the request range against s->size in the I/O callbacks
except for raw_co_pwritev. This is inconsistent (especially for
raw_co_pwrite_zeroes and raw_co_pdiscard), so fix them, in the meanwhile
make the helper reusable by the coming new callbacks.

Note that in most cases the block layer already verifies the request
byte range against our reported image length, before invoking the driver
callbacks.  The exception is during image creating, after
blk_set_allow_write_beyond_eof(blk, true) is called. But in that case,
the requests are not directly from the user or guest. So there is no
visible behavior change in adding the check code.

The int64_t -> uint64_t inconsistency, as shown by the type casting, is
pre-existing due to the interface.

Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Eric Blake 
Signed-off-by: Fam Zheng 
---
 block/raw-format.c | 64 --
 1 file changed, 39 insertions(+), 25 deletions(-)

diff --git a/block/raw-format.c b/block/raw-format.c
index fe33693a2d..b69a0674b3 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -167,16 +167,37 @@ static void raw_reopen_abort(BDRVReopenState *state)
 state->opaque = NULL;
 }
 
+/* Check and adjust the offset, against 'offset' and 'size' options. */
+static inline int raw_adjust_offset(BlockDriverState *bs, uint64_t *offset,
+uint64_t bytes, bool is_write)
+{
+BDRVRawState *s = bs->opaque;
+
+if (s->has_size && (*offset > s->size || bytes > (s->size - *offset))) {
+/* There's not enough space for the write, or the read request is
+ * out-of-range. Don't read/write anything to prevent leaking out of
+ * the size specified in options. */
+return is_write ? -ENOSPC : -EINVAL;;
+}
+
+if (*offset > INT64_MAX - s->offset) {
+return -EINVAL;
+}
+*offset += s->offset;
+
+return 0;
+}
+
 static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
   uint64_t bytes, QEMUIOVector *qiov,
   int flags)
 {
-BDRVRawState *s = bs->opaque;
+int ret;
 
-if (offset > UINT64_MAX - s->offset) {
-return -EINVAL;
+ret = raw_adjust_offset(bs, &offset, bytes, false);
+if (ret) {
+return ret;
 }
-offset += s->offset;
 
 BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
 return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
@@ -186,23 +207,11 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState 
*bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov,
int flags)
 {
-BDRVRawState *s = bs->opaque;
 void *buf = NULL;
 BlockDriver *drv;
 QEMUIOVector local_qiov;
 int ret;
 
-if (s->has_size && (offset > s->size || bytes > (s->size - offset))) {
-/* There's not enough space for the data. Don't write anything and just
- * fail to prevent leaking out of the size specified in options. */
-return -ENOSPC;
-}
-
-if (offset > UINT64_MAX - s->offset) {
-ret = -EINVAL;
-goto fail;
-}
-
 if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) {
 /* Handling partial writes would be a pain - so we just
  * require that guests have 512-byte request alignment if
@@ -237,7 +246,10 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState 
*bs, uint64_t offset,
 qiov = &local_qiov;
 }
 
-offset += s->offset;
+ret = raw_adjust_offset(bs, &offset, bytes, true);
+if (ret) {
+goto fail;
+}
 
 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
 ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
@@ -267,22 +279,24 @@ static int coroutine_fn 
raw_co_pwrite_zeroes(BlockDriverState *bs,
  int64_t offset, int bytes,
  BdrvRequestFlags flags)
 {
-BDRVRawState *s = bs->opaque;
-if (offset > UINT64_MAX - s->offset) {
-return -EINVAL;
+int ret;
+
+ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true);
+if (ret) {
+return ret;
 }
-offset += s->offset;
 return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
 static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
 int64_t offset, int bytes)
 {
-BDRVRawState *s = bs->opaque;
-if (offset > UINT64_MAX - s->offset) {
-return -EINVAL;
+int ret;
+
+ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true);
+if (ret) {
+return ret;
 }
-offset += s->offset;
 return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
 }
 
-- 
2.17.0




[Qemu-devel] [PATCH v8 08/11] iscsi: Create and use iscsi_co_wait_for_task

2018-05-31 Thread Fam Zheng
This loop is repeated a growing number times. Make a helper.

Signed-off-by: Fam Zheng 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Eric Blake 
---
 block/iscsi.c | 54 ---
 1 file changed, 17 insertions(+), 37 deletions(-)

diff --git a/block/iscsi.c b/block/iscsi.c
index 6d0035d4b9..6a365cb07b 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -556,6 +556,17 @@ static inline bool iscsi_allocmap_is_valid(IscsiLun 
*iscsilun,
offset / iscsilun->cluster_size) == size);
 }
 
+static void coroutine_fn iscsi_co_wait_for_task(IscsiTask *iTask,
+IscsiLun *iscsilun)
+{
+while (!iTask->complete) {
+iscsi_set_events(iscsilun);
+qemu_mutex_unlock(&iscsilun->mutex);
+qemu_coroutine_yield();
+qemu_mutex_lock(&iscsilun->mutex);
+}
+}
+
 static int coroutine_fn
 iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
 QEMUIOVector *iov, int flags)
@@ -617,12 +628,7 @@ retry:
 scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
   iov->niov);
 #endif
-while (!iTask.complete) {
-iscsi_set_events(iscsilun);
-qemu_mutex_unlock(&iscsilun->mutex);
-qemu_coroutine_yield();
-qemu_mutex_lock(&iscsilun->mutex);
-}
+iscsi_co_wait_for_task(&iTask, iscsilun);
 
 if (iTask.task != NULL) {
 scsi_free_scsi_task(iTask.task);
@@ -693,13 +699,7 @@ retry:
 ret = -ENOMEM;
 goto out_unlock;
 }
-
-while (!iTask.complete) {
-iscsi_set_events(iscsilun);
-qemu_mutex_unlock(&iscsilun->mutex);
-qemu_coroutine_yield();
-qemu_mutex_lock(&iscsilun->mutex);
-}
+iscsi_co_wait_for_task(&iTask, iscsilun);
 
 if (iTask.do_retry) {
 if (iTask.task != NULL) {
@@ -863,13 +863,8 @@ retry:
 #if LIBISCSI_API_VERSION < (20160603)
 scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, 
iov->niov);
 #endif
-while (!iTask.complete) {
-iscsi_set_events(iscsilun);
-qemu_mutex_unlock(&iscsilun->mutex);
-qemu_coroutine_yield();
-qemu_mutex_lock(&iscsilun->mutex);
-}
 
+iscsi_co_wait_for_task(&iTask, iscsilun);
 if (iTask.task != NULL) {
 scsi_free_scsi_task(iTask.task);
 iTask.task = NULL;
@@ -906,12 +901,7 @@ retry:
 return -ENOMEM;
 }
 
-while (!iTask.complete) {
-iscsi_set_events(iscsilun);
-qemu_mutex_unlock(&iscsilun->mutex);
-qemu_coroutine_yield();
-qemu_mutex_lock(&iscsilun->mutex);
-}
+iscsi_co_wait_for_task(&iTask, iscsilun);
 
 if (iTask.task != NULL) {
 scsi_free_scsi_task(iTask.task);
@@ -1143,12 +1133,7 @@ retry:
 goto out_unlock;
 }
 
-while (!iTask.complete) {
-iscsi_set_events(iscsilun);
-qemu_mutex_unlock(&iscsilun->mutex);
-qemu_coroutine_yield();
-qemu_mutex_lock(&iscsilun->mutex);
-}
+iscsi_co_wait_for_task(&iTask, iscsilun);
 
 if (iTask.task != NULL) {
 scsi_free_scsi_task(iTask.task);
@@ -1244,12 +1229,7 @@ retry:
 return -ENOMEM;
 }
 
-while (!iTask.complete) {
-iscsi_set_events(iscsilun);
-qemu_mutex_unlock(&iscsilun->mutex);
-qemu_coroutine_yield();
-qemu_mutex_lock(&iscsilun->mutex);
-}
+iscsi_co_wait_for_task(&iTask, iscsilun);
 
 if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
 iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
-- 
2.17.0




[Qemu-devel] [PATCH v8 05/11] qcow2: Implement copy offloading

2018-05-31 Thread Fam Zheng
The two callbacks are implemented quite similarly to the read/write
functions: bdrv_co_copy_range_from maps for read and calls into bs->file
or bs->backing depending on the allocation status; bdrv_co_copy_range_to
maps for write and calls into bs->file.

Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Fam Zheng 
---
 block/qcow2.c | 229 +++---
 1 file changed, 199 insertions(+), 30 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 6d532470a8..8f89c4fe72 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1762,6 +1762,39 @@ static int coroutine_fn 
qcow2_co_block_status(BlockDriverState *bs,
 return status;
 }
 
+static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs,
+QCowL2Meta **pl2meta,
+bool link_l2)
+{
+int ret = 0;
+QCowL2Meta *l2meta = *pl2meta;
+
+while (l2meta != NULL) {
+QCowL2Meta *next;
+
+if (!ret && link_l2) {
+ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
+if (ret) {
+goto out;
+}
+}
+
+/* Take the request off the list of running requests */
+if (l2meta->nb_clusters != 0) {
+QLIST_REMOVE(l2meta, next_in_flight);
+}
+
+qemu_co_queue_restart_all(&l2meta->dependent_requests);
+
+next = l2meta->next;
+g_free(l2meta);
+l2meta = next;
+}
+out:
+*pl2meta = l2meta;
+return ret;
+}
+
 static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
 uint64_t bytes, QEMUIOVector *qiov,
 int flags)
@@ -2048,24 +2081,9 @@ static coroutine_fn int 
qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
 }
 }
 
-while (l2meta != NULL) {
-QCowL2Meta *next;
-
-ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
-if (ret < 0) {
-goto fail;
-}
-
-/* Take the request off the list of running requests */
-if (l2meta->nb_clusters != 0) {
-QLIST_REMOVE(l2meta, next_in_flight);
-}
-
-qemu_co_queue_restart_all(&l2meta->dependent_requests);
-
-next = l2meta->next;
-g_free(l2meta);
-l2meta = next;
+ret = qcow2_handle_l2meta(bs, &l2meta, true);
+if (ret) {
+goto fail;
 }
 
 bytes -= cur_bytes;
@@ -2076,18 +2094,7 @@ static coroutine_fn int 
qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
 ret = 0;
 
 fail:
-while (l2meta != NULL) {
-QCowL2Meta *next;
-
-if (l2meta->nb_clusters != 0) {
-QLIST_REMOVE(l2meta, next_in_flight);
-}
-qemu_co_queue_restart_all(&l2meta->dependent_requests);
-
-next = l2meta->next;
-g_free(l2meta);
-l2meta = next;
-}
+qcow2_handle_l2meta(bs, &l2meta, false);
 
 qemu_co_mutex_unlock(&s->lock);
 
@@ -3274,6 +3281,166 @@ static coroutine_fn int 
qcow2_co_pdiscard(BlockDriverState *bs,
 return ret;
 }
 
+static int coroutine_fn
+qcow2_co_copy_range_from(BlockDriverState *bs,
+ BdrvChild *src, uint64_t src_offset,
+ BdrvChild *dst, uint64_t dst_offset,
+ uint64_t bytes, BdrvRequestFlags flags)
+{
+BDRVQcow2State *s = bs->opaque;
+int ret;
+unsigned int cur_bytes; /* number of bytes in current iteration */
+BdrvChild *child = NULL;
+BdrvRequestFlags cur_flags;
+
+assert(!bs->encrypted);
+qemu_co_mutex_lock(&s->lock);
+
+while (bytes != 0) {
+uint64_t copy_offset = 0;
+/* prepare next request */
+cur_bytes = MIN(bytes, INT_MAX);
+cur_flags = flags;
+
+ret = qcow2_get_cluster_offset(bs, src_offset, &cur_bytes, 
©_offset);
+if (ret < 0) {
+goto out;
+}
+
+switch (ret) {
+case QCOW2_CLUSTER_UNALLOCATED:
+if (bs->backing && bs->backing->bs) {
+int64_t backing_length = bdrv_getlength(bs->backing->bs);
+if (src_offset >= backing_length) {
+cur_flags |= BDRV_REQ_ZERO_WRITE;
+} else {
+child = bs->backing;
+cur_bytes = MIN(cur_bytes, backing_length - src_offset);
+copy_offset = src_offset;
+}
+} else {
+cur_flags |= BDRV_REQ_ZERO_WRITE;
+}
+break;
+
+case QCOW2_CLUSTER_ZERO_PLAIN:
+case QCOW2_CLUSTER_ZERO_ALLOC:
+cur_flags |= BDRV_REQ_ZERO_WRITE;
+break;
+
+case QCOW2_CLUSTER_COMPRESSED:
+ret = -ENOTSUP;
+goto out;
+break;
+
+case QCOW2_CLUSTER_NORMAL:
+child = bs->file;
+c

[Qemu-devel] [PATCH v8 02/11] block: Introduce API for copy offloading

2018-05-31 Thread Fam Zheng
Introduce the bdrv_co_copy_range() API for copy offloading.  Block
drivers implementing this API support efficient copy operations that
avoid reading each block from the source device and writing it to the
destination devices.  Examples of copy offload primitives are SCSI
EXTENDED COPY and Linux copy_file_range(2).

Signed-off-by: Fam Zheng 
Reviewed-by: Stefan Hajnoczi 
---
 block/io.c| 97 +++
 include/block/block.h | 32 +
 include/block/block_int.h | 38 +++
 3 files changed, 167 insertions(+)

diff --git a/block/io.c b/block/io.c
index ca96b487eb..b7beaeeb9f 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2835,3 +2835,100 @@ void bdrv_unregister_buf(BlockDriverState *bs, void 
*host)
 bdrv_unregister_buf(child->bs, host);
 }
 }
+
+static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
+uint64_t src_offset,
+BdrvChild *dst,
+uint64_t dst_offset,
+uint64_t bytes,
+BdrvRequestFlags flags,
+bool recurse_src)
+{
+int ret;
+
+if (!src || !dst || !src->bs || !dst->bs) {
+return -ENOMEDIUM;
+}
+ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
+if (ret) {
+return ret;
+}
+
+ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes);
+if (ret) {
+return ret;
+}
+if (flags & BDRV_REQ_ZERO_WRITE) {
+return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, flags);
+}
+
+if (!src->bs->drv->bdrv_co_copy_range_from
+|| !dst->bs->drv->bdrv_co_copy_range_to
+|| src->bs->encrypted || dst->bs->encrypted) {
+return -ENOTSUP;
+}
+if (recurse_src) {
+return src->bs->drv->bdrv_co_copy_range_from(src->bs,
+ src, src_offset,
+ dst, dst_offset,
+ bytes, flags);
+} else {
+return dst->bs->drv->bdrv_co_copy_range_to(dst->bs,
+   src, src_offset,
+   dst, dst_offset,
+   bytes, flags);
+}
+}
+
+/* Copy range from @src to @dst.
+ *
+ * See the comment of bdrv_co_copy_range for the parameter and return value
+ * semantics. */
+int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
+ BdrvChild *dst, uint64_t dst_offset,
+ uint64_t bytes, BdrvRequestFlags 
flags)
+{
+return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
+   bytes, flags, true);
+}
+
+/* Copy range from @src to @dst.
+ *
+ * See the comment of bdrv_co_copy_range for the parameter and return value
+ * semantics. */
+int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
+   BdrvChild *dst, uint64_t dst_offset,
+   uint64_t bytes, BdrvRequestFlags flags)
+{
+return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
+   bytes, flags, false);
+}
+
+int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
+BdrvChild *dst, uint64_t dst_offset,
+uint64_t bytes, BdrvRequestFlags flags)
+{
+BdrvTrackedRequest src_req, dst_req;
+BlockDriverState *src_bs = src->bs;
+BlockDriverState *dst_bs = dst->bs;
+int ret;
+
+bdrv_inc_in_flight(src_bs);
+bdrv_inc_in_flight(dst_bs);
+tracked_request_begin(&src_req, src_bs, src_offset,
+  bytes, BDRV_TRACKED_READ);
+tracked_request_begin(&dst_req, dst_bs, dst_offset,
+  bytes, BDRV_TRACKED_WRITE);
+
+wait_serialising_requests(&src_req);
+wait_serialising_requests(&dst_req);
+ret = bdrv_co_copy_range_from(src, src_offset,
+  dst, dst_offset,
+  bytes, flags);
+
+tracked_request_end(&src_req);
+tracked_request_end(&dst_req);
+bdrv_dec_in_flight(src_bs);
+bdrv_dec_in_flight(dst_bs);
+return ret;
+}
diff --git a/include/block/block.h b/include/block/block.h
index 3894edda9d..6cc6c7e699 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -611,4 +611,36 @@ bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, 
const char *name,
  */
 void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
 void bdrv_unregister_buf(BlockDriver

[Qemu-devel] [PATCH v8 07/11] iscsi: Query and save device designator when opening

2018-05-31 Thread Fam Zheng
The device designator data returned in INQUIRY command will be useful to
fill in source/target fields during copy offloading. Do this when
connecting to the target and save the data for later use.

Signed-off-by: Fam Zheng 
Reviewed-by: Stefan Hajnoczi 
---
 block/iscsi.c | 41 +
 1 file changed, 41 insertions(+)

diff --git a/block/iscsi.c b/block/iscsi.c
index 3fd7203916..6d0035d4b9 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -68,6 +68,7 @@ typedef struct IscsiLun {
 QemuMutex mutex;
 struct scsi_inquiry_logical_block_provisioning lbp;
 struct scsi_inquiry_block_limits bl;
+struct scsi_inquiry_device_designator *dd;
 unsigned char *zeroblock;
 /* The allocmap tracks which clusters (pages) on the iSCSI target are
  * allocated and which are not. In case a target returns zeros for
@@ -1740,6 +1741,30 @@ static QemuOptsList runtime_opts = {
 },
 };
 
+static void iscsi_save_designator(IscsiLun *lun,
+  struct scsi_inquiry_device_identification 
*inq_di)
+{
+struct scsi_inquiry_device_designator *desig, *copy = NULL;
+
+for (desig = inq_di->designators; desig; desig = desig->next) {
+if (desig->association ||
+desig->designator_type > SCSI_DESIGNATOR_TYPE_NAA) {
+continue;
+}
+/* NAA works better than T10 vendor ID based designator. */
+if (!copy || copy->designator_type < desig->designator_type) {
+copy = desig;
+}
+}
+if (copy) {
+lun->dd = g_new(struct scsi_inquiry_device_designator, 1);
+*lun->dd = *copy;
+lun->dd->next = NULL;
+lun->dd->designator = g_malloc(copy->designator_length);
+memcpy(lun->dd->designator, copy->designator, copy->designator_length);
+}
+}
+
 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
   Error **errp)
 {
@@ -1922,6 +1947,7 @@ static int iscsi_open(BlockDriverState *bs, QDict 
*options, int flags,
 struct scsi_task *inq_task;
 struct scsi_inquiry_logical_block_provisioning *inq_lbp;
 struct scsi_inquiry_block_limits *inq_bl;
+struct scsi_inquiry_device_identification *inq_di;
 switch (inq_vpd->pages[i]) {
 case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
 inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
@@ -1947,6 +1973,17 @@ static int iscsi_open(BlockDriverState *bs, QDict 
*options, int flags,
sizeof(struct scsi_inquiry_block_limits));
 scsi_free_scsi_task(inq_task);
 break;
+case SCSI_INQUIRY_PAGECODE_DEVICE_IDENTIFICATION:
+inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
+
SCSI_INQUIRY_PAGECODE_DEVICE_IDENTIFICATION,
+(void **) &inq_di, errp);
+if (inq_task == NULL) {
+ret = -EINVAL;
+goto out;
+}
+iscsi_save_designator(iscsilun, inq_di);
+scsi_free_scsi_task(inq_task);
+break;
 default:
 break;
 }
@@ -2003,6 +2040,10 @@ static void iscsi_close(BlockDriverState *bs)
 iscsi_logout_sync(iscsi);
 }
 iscsi_destroy_context(iscsi);
+if (iscsilun->dd) {
+g_free(iscsilun->dd->designator);
+g_free(iscsilun->dd);
+}
 g_free(iscsilun->zeroblock);
 iscsi_allocmap_free(iscsilun);
 qemu_mutex_destroy(&iscsilun->mutex);
-- 
2.17.0




[Qemu-devel] [PATCH v8 09/11] iscsi: Implement copy offloading

2018-05-31 Thread Fam Zheng
Issue EXTENDED COPY (LID1) command to implement the copy_range API.

The parameter data construction code is modified from libiscsi's
iscsi-dd.c.

Signed-off-by: Fam Zheng 
Reviewed-by: Stefan Hajnoczi 
---
 block/iscsi.c| 219 +++
 include/scsi/constants.h |   4 +
 2 files changed, 223 insertions(+)

diff --git a/block/iscsi.c b/block/iscsi.c
index 6a365cb07b..5ea75646d9 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -2205,6 +2205,221 @@ static void coroutine_fn 
iscsi_co_invalidate_cache(BlockDriverState *bs,
 iscsi_allocmap_invalidate(iscsilun);
 }
 
+static int coroutine_fn iscsi_co_copy_range_from(BlockDriverState *bs,
+ BdrvChild *src,
+ uint64_t src_offset,
+ BdrvChild *dst,
+ uint64_t dst_offset,
+ uint64_t bytes,
+ BdrvRequestFlags flags)
+{
+return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, 
flags);
+}
+
+static struct scsi_task *iscsi_xcopy_task(int param_len)
+{
+struct scsi_task *task;
+
+task = g_new0(struct scsi_task, 1);
+
+task->cdb[0] = EXTENDED_COPY;
+task->cdb[10]= (param_len >> 24) & 0xFF;
+task->cdb[11]= (param_len >> 16) & 0xFF;
+task->cdb[12]= (param_len >> 8) & 0xFF;
+task->cdb[13]= param_len & 0xFF;
+task->cdb_size   = 16;
+task->xfer_dir   = SCSI_XFER_WRITE;
+task->expxferlen = param_len;
+
+return task;
+}
+
+static void iscsi_populate_target_desc(unsigned char *desc, IscsiLun *lun)
+{
+struct scsi_inquiry_device_designator *dd = lun->dd;
+
+memset(desc, 0, 32);
+desc[0] = IDENT_DESCR_TGT_DESCR;
+desc[4] = dd->code_set;
+desc[5] = (dd->designator_type & 0xF)
+| ((dd->association & 3) << 4);
+desc[7] = dd->designator_length;
+memcpy(desc + 8, dd->designator, dd->designator_length);
+
+desc[28] = 0;
+desc[29] = (lun->block_size >> 16) & 0xFF;
+desc[30] = (lun->block_size >> 8) & 0xFF;
+desc[31] = lun->block_size & 0xFF;
+}
+
+static void iscsi_xcopy_desc_hdr(uint8_t *hdr, int dc, int cat, int src_index,
+ int dst_index)
+{
+hdr[0] = 0x02; /* BLK_TO_BLK_SEG_DESCR */
+hdr[1] = ((dc << 1) | cat) & 0xFF;
+hdr[2] = (XCOPY_BLK2BLK_SEG_DESC_SIZE >> 8) & 0xFF;
+/* don't account for the first 4 bytes in descriptor header*/
+hdr[3] = (XCOPY_BLK2BLK_SEG_DESC_SIZE - 4 /* SEG_DESC_SRC_INDEX_OFFSET */) 
& 0xFF;
+hdr[4] = (src_index >> 8) & 0xFF;
+hdr[5] = src_index & 0xFF;
+hdr[6] = (dst_index >> 8) & 0xFF;
+hdr[7] = dst_index & 0xFF;
+}
+
+static void iscsi_xcopy_populate_desc(uint8_t *desc, int dc, int cat,
+  int src_index, int dst_index, int 
num_blks,
+  uint64_t src_lba, uint64_t dst_lba)
+{
+iscsi_xcopy_desc_hdr(desc, dc, cat, src_index, dst_index);
+
+/* The caller should verify the request size */
+assert(num_blks < 65536);
+desc[10] = (num_blks >> 8) & 0xFF;
+desc[11] = num_blks & 0xFF;
+desc[12] = (src_lba >> 56) & 0xFF;
+desc[13] = (src_lba >> 48) & 0xFF;
+desc[14] = (src_lba >> 40) & 0xFF;
+desc[15] = (src_lba >> 32) & 0xFF;
+desc[16] = (src_lba >> 24) & 0xFF;
+desc[17] = (src_lba >> 16) & 0xFF;
+desc[18] = (src_lba >> 8) & 0xFF;
+desc[19] = src_lba & 0xFF;
+desc[20] = (dst_lba >> 56) & 0xFF;
+desc[21] = (dst_lba >> 48) & 0xFF;
+desc[22] = (dst_lba >> 40) & 0xFF;
+desc[23] = (dst_lba >> 32) & 0xFF;
+desc[24] = (dst_lba >> 24) & 0xFF;
+desc[25] = (dst_lba >> 16) & 0xFF;
+desc[26] = (dst_lba >> 8) & 0xFF;
+desc[27] = dst_lba & 0xFF;
+}
+
+static void iscsi_xcopy_populate_header(unsigned char *buf, int list_id, int 
str,
+int list_id_usage, int prio,
+int tgt_desc_len,
+int seg_desc_len, int inline_data_len)
+{
+buf[0] = list_id;
+buf[1] = ((str & 1) << 5) | ((list_id_usage & 3) << 3) | (prio & 7);
+buf[2] = (tgt_desc_len >> 8) & 0xFF;
+buf[3] = tgt_desc_len & 0xFF;
+buf[8] = (seg_desc_len >> 24) & 0xFF;
+buf[9] = (seg_desc_len >> 16) & 0xFF;
+buf[10] = (seg_desc_len >> 8) & 0xFF;
+buf[11] = seg_desc_len & 0xFF;
+buf[12] = (inline_data_len >> 24) & 0xFF;
+buf[13] = (inline_data_len >> 16) & 0xFF;
+buf[14] = (inline_data_len >> 8) & 0xFF;
+buf[15] = inline_data_len & 0xFF;
+}
+
+static void iscsi_xcopy_data(struct iscsi_data *data,
+ IscsiLun *src, int64_t src_lba,
+ IscsiLun *dst, int64_t dst_lba,
+ uint16_t num_blocks)
+{
+uint8_t *buf;
+  

[Qemu-devel] [PATCH v8 01/11] docker: Update fedora image to 28

2018-05-31 Thread Fam Zheng
Signed-off-by: Fam Zheng 
---
 tests/docker/dockerfiles/fedora.docker | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/docker/dockerfiles/fedora.docker 
b/tests/docker/dockerfiles/fedora.docker
index b706f42405..65d7761cf5 100644
--- a/tests/docker/dockerfiles/fedora.docker
+++ b/tests/docker/dockerfiles/fedora.docker
@@ -1,4 +1,4 @@
-FROM fedora:27
+FROM fedora:28
 ENV PACKAGES \
 ccache gettext git tar PyYAML sparse flex bison python3 bzip2 hostname \
 gcc gcc-c++ llvm clang make perl which bc findutils glib2-devel \
-- 
2.17.0




[Qemu-devel] [PATCH v8 00/11] qemu-img convert with copy offloading

2018-05-31 Thread Fam Zheng
v8: Fix compiling against new glibc and libiscsi on Fedora 28 where v7 had
conflict definitions. [Stefan, myself]
- Add HAVE_COPY_FILE_RANGE in configure.
- Drop IDENT_DESCR_TGT_DESCR from scsi constants header.

v7: Fix qcow2.

v6: Pick up rev-by from Stefan and Eric.
Tweak patch 2 commit message.

v5: - Fix raw offset/bytes check for read. [Eric]
- Fix qcow2_handle_l2meta. [Stefan]
- Add coroutine_fn whereever appropriate. [Stefan]

v4: - Fix raw offset and size. [Eric]
- iscsi: Drop unnecessary return values and variables in favor of
  constants. [Stefan]
- qcow2: Handle small backing case. [Stefan]
- file-posix: Translate ENOSYS to ENOTSUP. [Stefan]
- API documentation and commit message. [Stefan]
- Add rev-by to patches 3, 5 - 10. [Stefan, Eric]

This series introduces block layer API for copy offloading and makes use of it
in qemu-img convert.

For now we implemented the operation in local file protocol with
copy_file_range(2).  Besides that it's possible to add similar to iscsi, nfs
and potentially more.

As far as its usage goes, in addition to qemu-img convert, we can emulate
offloading in scsi-disk (handle EXTENDED COPY command), and use the API in
block jobs too.

Fam Zheng (11):
  docker: Update fedora image to 28
  block: Introduce API for copy offloading
  raw: Check byte range uniformly
  raw: Implement copy offloading
  qcow2: Implement copy offloading
  file-posix: Implement bdrv_co_copy_range
  iscsi: Query and save device designator when opening
  iscsi: Create and use iscsi_co_wait_for_task
  iscsi: Implement copy offloading
  block-backend: Add blk_co_copy_range
  qemu-img: Convert with copy offloading

 block/block-backend.c  |  18 ++
 block/file-posix.c |  98 +++-
 block/io.c |  97 
 block/iscsi.c  | 314 ++---
 block/qcow2.c  | 229 +++---
 block/raw-format.c |  96 ++--
 configure  |  17 ++
 include/block/block.h  |  32 +++
 include/block/block_int.h  |  38 +++
 include/block/raw-aio.h|  10 +-
 include/scsi/constants.h   |   4 +
 include/sysemu/block-backend.h |   4 +
 qemu-img.c |  50 +++-
 tests/docker/dockerfiles/fedora.docker |   2 +-
 14 files changed, 909 insertions(+), 100 deletions(-)

-- 
2.17.0




Re: [Qemu-devel] Questions about the flow of interrupt simulation

2018-05-31 Thread Eva Chen
​​
Thanks deeply for your clear explanation!
Let me try to conclude what I learned from your description.

qemu_set_irq()   qemu_set_irq()

[ device ]   ->  [ GIC ]  -> [ CPU ]


Detailed:
[device]
=>qemu_set_irq()
[GIC]
=>gic_set_irq()
=>gic_update()
=>qemu_set_irq()
[CPU]
=> arm_cpu_set_irq()

GIC decides to send an irq to CPU or not depends on the level that device
sends or others reason like interrupt mask.
GIC turns the interrupt into an interrupt type:  ARM_CPU_HARD/FIQ/VIRQ/VFIQ.
In arm_cpu_set_irq(), it turns   ARM_CPU_HARD/FIQ/VIRQ/VFIQ into
CPU_INTERRUPT_HARD/FIQ/VIRQ/VFIQ.
cpu_interrupt() sets cpu->interrupt_request.

Then I have some following questions:
1. There are two kinds of interrupt:  edge triggered and level triggered.
I have seen two code segment related to the level: gic_set_irq() and
arm_cpu_set_irq().
In gic_set_irq(), if level == GIC_TEST_LEVEL(irq, cm), which means the
level is not changed, will return.
In arm_cpu_set_irq() said that if level ==1, call cpu_interrupt(). if
level==0, call cpu_reset_interrupt(), which will clean up that irq bits..
Does that mean all interrupt in arm are level triggered(high level)?
How to know the triggered type of interrupt?

2. interrupt signal will be passed through GIC from device to CPU. There
are four types of interrupt in CPU: CPU_INTERRUPT_HARD/FIQ/VIRQ/VFIQ.
Where exactly define the CPU_INTERRUPT_{type} that device's interrupt
corresponded?

3. I have seen others device's code under qemu/hw directory. Almost all
device will call qemu_set_irq() at the end of device's read/write. Is that
for the purpose of a device to tell CPU that it has done some works?
but the second parameter of qemu_set_irq(), level, will be set to a
different value(not always 1 or 0), which sometimes will cause the
interrupt return at gic_set_irq() instead of passing to CPU.
What does the interrupt at the end of device_read/write(device_update())
mean?

I think I might ask some basic questions of interrupt. If there is any
documents or tutorial about interrupt, please share with me,  I am willing
to study it.

Sincerely,
Eva


2018-05-24 21:40 GMT+08:00 Peter Maydell :

> On 23 May 2018 at 11:45, Eva Chen  wrote:
> > Hi,
> >
> > I have added a new virtual device under qemu/hw, and a device driver in
> the
> > source code of guest OS (I use arm linux).
> > Since I want to add an interrupt to the virtual device, I am tracing the
> > flow of interrupt simulation of QEMU.
> > However, I have some questions about the flow of interrupt, and wondering
> > if I could ask here for some help.
> >
> > I use $qemu-system-aarch64 -machine virt.
> > Take the flow of interrupt that generated from the UART pl011 device as
> > example.
> > First an interrupt, CPU_INTERRUPT_HARD, will be sent from device to GIC
> for
> > telling CPU that this device finished some job, then this interrupt wiil
> be
> > transformed into EXCP_IRQ in aarch64_cpu_do_interrupt() and interrupt
> > handled by CPU.
>
> This isn't quite the way it works.
>
> What happens is more like:
>  * pl011 device calls qemu_set_irq() on its outbound IRQ line
>  * the board model has connected that line up to one of the GIC's
>input lines
>  * the GIC function on the other end of the qemu_irq is called
>and does the necessary processing of the input line state change.
>(For the gicv2 this is the gic_set_irq() function.)
>  * if the GIC state is such that it should now signal an IRQ to
>the CPU it will do that. (This is not necessarily going to happen:
>for instance the interrupt might be disabled in the GIC, or
>another higher priority interrupt might already be being signaled.)
>The GIC does this by calling qemu_set_irq() on its outbound
>IRQ or FIQ line.
>  * the CPU object on the other end of that IRQ or FIQ line will have
>its arm_cpu_set_irq() function called. All this function does is
>make a note so that we stop executing code and start doing
>something else; we do that by calling cpu_interrupt(). We use
>CPU_INTERRUPT_HARD/FIQ/VIRQ/VFIQ here to distinguish which of our
>4 interrupt lines this is; nothing outside the CPU cares about
>those names.
>  * cpu_interrupt() itself doesn't do anything, it just sets a flag
>which the thread executing guest cpu code will check.
>  * in the guest-cpu-execution code path you have traced the relevant
>flow here:
>> 2. CPU handling : cpu_exec() => arm_cpu_exec_interrupt() =>
>> aarch64_cpu_do_interrupt()
>> Guest CPU's pc (env->pc) will be changed to the address of interrupt
>> service routine in aarch64_cpu_do_interrupt().
>
> Some notes:
>  (1) I've assumed GICv2 in the above; the GICv3 is a bit more complex
> but the basic idea is similar
>  (2) The GIC is a device model that is not "special" just because
> it happens to be an interrupt controller. It's just a model of a
> bit of hardware that has some input lines and some outp

[Qemu-devel] [PATCH v4] qga: add mountpoint usage to GuestFilesystemInfo

2018-05-31 Thread Chen Hanxiao
From: Chen Hanxiao 

This patch adds support for getting the usage of mounted
filesystem.
It's very useful when we try to monitor guest's filesystem.

Cc: Michael Roth 
Cc: Eric Blake 
Signed-off-by: Chen Hanxiao 

---
v2:
   add description in qapi-schema and version numbers
v3:
   use float for usage to get more precision.
v4:
   make usage as a best-effort query and mark it as optional.

 qga/commands-posix.c | 17 +
 qga/qapi-schema.json |  3 ++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index 0dc219dbcf..4facc76953 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -46,6 +46,7 @@ extern char **environ;
 #include 
 #include 
 #include 
+#include 
 
 #ifdef FIFREEZE
 #define CONFIG_FSFREEZE
@@ -1072,6 +1073,9 @@ static GuestFilesystemInfo *build_guest_fsinfo(struct 
FsMount *mount,
Error **errp)
 {
 GuestFilesystemInfo *fs = g_malloc0(sizeof(*fs));
+struct statvfs buf;
+unsigned long used, nonroot_total;
+double usage;
 char *devpath = g_strdup_printf("/sys/dev/block/%u:%u",
 mount->devmajor, mount->devminor);
 
@@ -1079,7 +1083,20 @@ static GuestFilesystemInfo *build_guest_fsinfo(struct 
FsMount *mount,
 fs->type = g_strdup(mount->devtype);
 build_guest_fsinfo_for_device(devpath, fs, errp);
 
+if (statvfs(fs->mountpoint, &buf)) {
+fs->has_usage = false;
+fs->usage = -1;
+} else {
+used = buf.f_blocks - buf.f_bfree;
+nonroot_total = used + buf.f_bavail;
+usage = (double) used / nonroot_total;
+
+fs->has_usage = true;
+fs->usage = usage;
+}
+
 g_free(devpath);
+
 return fs;
 }
 
diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
index 17884c7c70..fcd427e86d 100644
--- a/qga/qapi-schema.json
+++ b/qga/qapi-schema.json
@@ -846,6 +846,7 @@
 # @name: disk name
 # @mountpoint: mount point path
 # @type: file system type string
+# @usage: file system usage, fraction between 0 and 1 (since 3.0)
 # @disk: an array of disk hardware information that the volume lies on,
 #which may be empty if the disk type is not supported
 #
@@ -853,7 +854,7 @@
 ##
 { 'struct': 'GuestFilesystemInfo',
   'data': {'name': 'str', 'mountpoint': 'str', 'type': 'str',
-   'disk': ['GuestDiskAddress']} }
+   '*usage': 'number', 'disk': ['GuestDiskAddress']} }
 
 ##
 # @guest-get-fsinfo:
-- 
2.17.0




Re: [Qemu-devel] [PATCH 2/3] pc-bios/s390-ccw/net: Add support for pxelinux-style config files

2018-05-31 Thread Thomas Huth
On 30.05.2018 13:07, Viktor VM Mihajlovski wrote:
> On 30.05.2018 11:16, Thomas Huth wrote:
>> Since it is quite cumbersome to manually create a combined kernel with
>> initrd image for network booting, we now support loading via pxelinux
>> configuration files, too. In these files, the kernel, initrd and command
>> line parameters can be specified seperately, and the firmware then takes
>> care of glueing everything together in memory after the files have been
>> downloaded. See this URL for details about the config file layout:
>> https://www.syslinux.org/wiki/index.php?title=PXELINUX
>>
>> The user can either specify a config file directly as bootfile via DHCP
>> (but in this case, the file has to start either with "default" or a "#"
>> comment so we can distinguish it from binary kernels), or a folder (i.e.
>> the bootfile name must end with "/") where the firmware should look for
>> the typical pxelinux.cfg file names, e.g. based on MAC or IP address.
>> We also support the pxelinux.cfg DHCP options 209 and 210 from RFC 5071.
>>
>> Signed-off-by: Thomas Huth 
>> ---
>>  pc-bios/s390-ccw/netboot.mak |  7 ++--
>>  pc-bios/s390-ccw/netmain.c   | 79 
>> +++-
>>  2 files changed, 82 insertions(+), 4 deletions(-)
> [...]
>> diff --git a/pc-bios/s390-ccw/netmain.c b/pc-bios/s390-ccw/netmain.c
>> index 7533cf7..e84bb2b 100644
>> --- a/pc-bios/s390-ccw/netmain.c
>> +++ b/pc-bios/s390-ccw/netmain.c
> [...]
>> @@ -301,6 +363,18 @@ static int net_try_direct_tftp_load(filename_ip_t 
>> *fn_ip)
>>  if (!strncmp("* ", cfgbuf, 2)) {
>>  return handle_ins_cfg(fn_ip, cfgbuf, rc);
>>  }
>> +if (!strncasecmp("default", cfgbuf, 7) || !strncmp("# ", cfgbuf, 
>> 2)) {
> Minor, but I'm wondering whether this is not too cautious and could rule
> out valid config files. You might just unconditionally call
> pxelinux_parse_cfg and let it find out if this is as pxelinux config
> file or not.

I thought about this for a while, but I think I'd rather avoid this.
It's also possible that the user tried to load a small binary which
accidentally contained a string like "label" and "kernel", and then the
bios would try to interpret this as config file instead of just running
the binary.

If you feel really unhappy about the magic string matching with
"default" and "# " here, I think it would be better to remove this magic
completely instead. The original pxelinux loader and petitboot also do
not support this, but rely on the DHCP options 209 and 210 instead
(which we also support now). I only added this magic here for my own
convenience, since the built-in DHCP server of QEMU does not support the
options 209 and 210, and I still wanted to have a way to quickly change
from one config file to another... but now that the code is basically up
and running, it's not really required anymore, so removing this should
be fine. Alternatively, we could use a more sophisticated magic here,
like "# pxelinux" for example, just for the developers' convenience ...
what do you think?

 Thomas



Re: [Qemu-devel] Recording I/O activity after KVM does a VMEXIT

2018-05-31 Thread Pavel Dovgalyuk
Hi,

 

I’m not familiar with KVM, but I know successful attempts of replaying the 
execution by logging IO and MMIO in TCG mode.

The difference in CPU I/O and VM I/O is the following. In icount we record 
anything coming into the VM, but not into the CPU.

It means that the whole packet is recorded. Virtual hardware behaves 
deterministically and therefore CPU will get identical

input in case of replay, because the whole recorded packet is injected again by 
the filter.

 

Pavel Dovgalyuk

 

From: Arnabjyoti Kalita [mailto:akal...@cs.stonybrook.edu] 
Sent: Thursday, May 31, 2018 11:14 PM
To: Pavel Dovgalyuk
Cc: Stefan Hajnoczi; qemu-devel@nongnu.org; Pavel Dovgalyuk
Subject: Re: [Qemu-devel] Recording I/O activity after KVM does a VMEXIT

 

Dear Pavel,

 

Thank you for your answer. I am not being able to understand the difference 
between CPU I/Os and VM I/Os. Would any network packet that comes into the 
Guest OS from the outside be a part of VM I/O or CPU I/O ? I am only interested 
in "recording" and "replaying" those network packets that come from the outside 
into the networking backend and not the other way around. Say for example when 
I get a VMExit because of the arrival of a network packet, I will use the 
VMExit reason : "KVM_EXIT_MMIO"  to trace back to "e1000_mmio_write()" which I 
expect should be enough to record network packets that come from the outside 
and write to the guest address space for "e1000" devices. In such a case, I 
think I will not have to use the "network-filter" backend that you use to 
record VM I/O only. Let me know if you find errors in my approach.

 

I will try to see how I can record disk packets. If disk packets use other ways 
of writing to the guest memory apart from a normal VMExit, I will try to find 
it out. Eventually I hope that it will use one of the available disk front-end 
functions to write to the guest memory from the disk, just like e1000 does with 
an "e1000_mmio_write()" call. 

 

Thanks and best regards,

Arnab

 

 

 

 

 



 

On Thu, May 31, 2018 at 8:44 AM, Pavel Dovgalyuk  wrote:

> From: Stefan Hajnoczi [mailto:stefa...@gmail.com]
> On Wed, May 30, 2018 at 11:19:13PM -0400, Arnabjyoti Kalita wrote:
> > I am trying to implement a 'minimal' record-replay mechanism for KVM, which
> > is similar to the one existing for TCG via -icount. I am trying to record
> > I/O events only (specifically disk and network events) when KVM does a
> > VMEXIT. This has led me to the function kvm_cpu_exec where I can clearly
> > see the different ways of handling all of the possible VMExit cases (like
> > PIO, MMIO etc.). To record network packets, I am working with the e1000
> > hardware device.
> >
> > Can I make sure that all of the network I/O, atleast for the e1000 device
> > happens through the KVM_EXIT_MMIO case and subsequent use of the
> > address_space_rw() function ? Do I also need to look at other functions as
> > well ? Also for recording disk activity, can I make sure that looking out
> > for the KVM_EXIT_MMIO and/or KVM_EXIT_PIO cases in the vmexit mechanism,
> > will be enough ?
> >
> > Let me know if there are other details that I need to take care of. I am
> > using QEMU 2.11 on a x86-64 CPU and the guest runs a Linux Kernel 4.4 with
> > Ubuntu 16.04.

The main icount-based record/replay advantage is that we don't record
any CPU IO. We record only VM IO (e.g., by using the network filter).

Disk devices may transfer data to CPU using DMA, therefore intercepting
only VMExit cases will not be enough.

Pavel Dovgalyuk

 



Re: [Qemu-devel] [PATCH 2/9 V5] hostmem-file: add the 'pmem' option

2018-05-31 Thread Junyan He
Because we have disk backend imitation for NVDimm kind memory, I think it is 
more

flexible for user to specify the real or not real pmem rather than we check in 
qemu

using the pmem_is_pmem API


From: Stefan Hajnoczi 
Sent: Thursday, May 31, 2018 1:09:42 PM
To: junyan...@gmx.com
Cc: qemu-devel@nongnu.org; Haozhong Zhang; xiaoguangrong.e...@gmail.com; 
crosthwaite.pe...@gmail.com; m...@redhat.com; dgilb...@redhat.com; 
ehabk...@redhat.com; quint...@redhat.com; Junyan He; stefa...@redhat.com; 
pbonz...@redhat.com; imamm...@redhat.com; r...@twiddle.net
Subject: Re: [Qemu-devel] [PATCH 2/9 V5] hostmem-file: add the 'pmem' option

On Thu, May 10, 2018 at 10:08:51AM +0800, junyan...@gmx.com wrote:
> From: Junyan He 
>
> When QEMU emulates vNVDIMM labels and migrates vNVDIMM devices, it
> needs to know whether the backend storage is a real persistent memory,
> in order to decide whether special operations should be performed to
> ensure the data persistence.
>
> This boolean option 'pmem' allows users to specify whether the backend
> storage of memory-backend-file is a real persistent memory. If
> 'pmem=on', QEMU will set the flag RAM_PMEM in the RAM block of the
> corresponding memory region.

I'm still not sure if this option is necessary since pmem_is_pmem() is
available with the introduction of the libpmem dependency.  Why can't it
be used?

Stefan


Re: [Qemu-devel] [qemu PATCH v4 0/4] support NFIT platform capabilities

2018-05-31 Thread Elliott, Robert (Persistent Memory)
> Ping on this series.  Rob, I think I've addressed all your feedback.
> Can you please verify?

I haven't tested it, but it reads OK.  I'm OK with just extending
the valid count for bits set to one for now; we can add a new
argument later if a need arises for extending it to express new bits
set to zero.





Re: [Qemu-devel] [PATCH v7 0/5] virtio-balloon: free page hint reporting support

2018-05-31 Thread Peter Xu
On Fri, Jun 01, 2018 at 12:58:24PM +0800, Peter Xu wrote:
> On Tue, Apr 24, 2018 at 02:13:43PM +0800, Wei Wang wrote:
> > This is the deivce part implementation to add a new feature,
> > VIRTIO_BALLOON_F_FREE_PAGE_HINT to the virtio-balloon device. The device
> > receives the guest free page hints from the driver and clears the
> > corresponding bits in the dirty bitmap, so that those free pages are
> > not transferred by the migration thread to the destination.
> > 
> > - Test Environment
> > Host: Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz
> > Guest: 8G RAM, 4 vCPU
> > Migration setup: migrate_set_speed 100G, migrate_set_downtime 2 second
> > 
> > - Test Results
> > - Idle Guest Live Migration Time (results are averaged over 10 runs):
> > - Optimization v.s. Legacy = 271ms vs 1769ms --> ~86% reduction
> > - Guest with Linux Compilation Workload (make bzImage -j4):
> > - Live Migration Time (average)
> >   Optimization v.s. Legacy = 1265ms v.s. 2634ms --> ~51% reduction
> > - Linux Compilation Time
> >   Optimization v.s. Legacy = 4min56s v.s. 5min3s
> >   --> no obvious difference
> > 
> > - Source Code
> > - QEMU:  https://github.com/wei-w-wang/qemu-free-page-lm.git
> > - Linux: https://github.com/wei-w-wang/linux-free-page-lm.git
> 
> Hi, Wei,
> 
> I have a very high-level question to the series.
> 
> IIUC the core idea for this series is that we can avoid sending some
> of the pages if we know that we don't need to send them.  I think this
> is based on the fact that on the destination side all the pages are by
> default zero after they are malloced.  While before this series, IIUC
> any migration will send every single page to destination, no matter
> whether it's zeroed or not.  So I'm uncertain about whether this will
> affect the received bitmap on the destination side.  Say, before this
> series, the received bitmap will directly cover the whole RAM bitmap
> after migration is finished, now it's won't.  Will there be any side
> effect?  I don't see obvious issue now, but just raise this question
> up.
> 
> Meanwhile, this reminds me about a more funny idea: whether we can
> just avoid sending the zero pages directly from QEMU's perspective.
> In other words, can we just do nothing if save_zero_page() detected
> that the page is zero (I guess the is_zero_range() can be fast too,
> but I don't know exactly how fast it is)?  And how that would be
> differed from this page hinting way in either performance and other
> aspects.

I noticed a problem (after I wrote the above paragraph 5 minutes
ago...): when a page was valid and sent to the destination (with
non-zero data), however after a while that page was zeroed.  Then if
we don't send zero pages at all, we won't send the page after it's
zeroed.  Then on the destination side we'll have a stale non-zero
page.  Is my understanding correct?  Will that be a problem to this
series too where a valid page can be possibly freed and hinted?

> 
> I haven't digged into the kernel patches yet so I have totally no idea
> on the detailed implementation of the page hinting.  Please feel free
> to correct me if there is obvious misunderstandings.
> 
> Regards,
> 
> -- 
> Peter Xu

-- 
Peter Xu



Re: [Qemu-devel] [PATCH v7 0/5] virtio-balloon: free page hint reporting support

2018-05-31 Thread Peter Xu
On Tue, Apr 24, 2018 at 02:13:43PM +0800, Wei Wang wrote:
> This is the deivce part implementation to add a new feature,
> VIRTIO_BALLOON_F_FREE_PAGE_HINT to the virtio-balloon device. The device
> receives the guest free page hints from the driver and clears the
> corresponding bits in the dirty bitmap, so that those free pages are
> not transferred by the migration thread to the destination.
> 
> - Test Environment
> Host: Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz
> Guest: 8G RAM, 4 vCPU
> Migration setup: migrate_set_speed 100G, migrate_set_downtime 2 second
> 
> - Test Results
> - Idle Guest Live Migration Time (results are averaged over 10 runs):
> - Optimization v.s. Legacy = 271ms vs 1769ms --> ~86% reduction
> - Guest with Linux Compilation Workload (make bzImage -j4):
> - Live Migration Time (average)
>   Optimization v.s. Legacy = 1265ms v.s. 2634ms --> ~51% reduction
> - Linux Compilation Time
>   Optimization v.s. Legacy = 4min56s v.s. 5min3s
>   --> no obvious difference
> 
> - Source Code
> - QEMU:  https://github.com/wei-w-wang/qemu-free-page-lm.git
> - Linux: https://github.com/wei-w-wang/linux-free-page-lm.git

Hi, Wei,

I have a very high-level question to the series.

IIUC the core idea for this series is that we can avoid sending some
of the pages if we know that we don't need to send them.  I think this
is based on the fact that on the destination side all the pages are by
default zero after they are malloced.  While before this series, IIUC
any migration will send every single page to destination, no matter
whether it's zeroed or not.  So I'm uncertain about whether this will
affect the received bitmap on the destination side.  Say, before this
series, the received bitmap will directly cover the whole RAM bitmap
after migration is finished, now it's won't.  Will there be any side
effect?  I don't see obvious issue now, but just raise this question
up.

Meanwhile, this reminds me about a more funny idea: whether we can
just avoid sending the zero pages directly from QEMU's perspective.
In other words, can we just do nothing if save_zero_page() detected
that the page is zero (I guess the is_zero_range() can be fast too,
but I don't know exactly how fast it is)?  And how that would be
differed from this page hinting way in either performance and other
aspects.

I haven't digged into the kernel patches yet so I have totally no idea
on the detailed implementation of the page hinting.  Please feel free
to correct me if there is obvious misunderstandings.

Regards,

-- 
Peter Xu



Re: [Qemu-devel] [PATCH v7 3/5] migration: API to clear bits of guest free pages from the dirty bitmap

2018-05-31 Thread Peter Xu
On Tue, Apr 24, 2018 at 02:13:46PM +0800, Wei Wang wrote:
> This patch adds an API to clear bits corresponding to guest free pages
> from the dirty bitmap. Spilt the free page block if it crosses the QEMU
> RAMBlock boundary.
> 
> Signed-off-by: Wei Wang 
> CC: Dr. David Alan Gilbert 
> CC: Juan Quintela 
> CC: Michael S. Tsirkin 
> ---
>  include/migration/misc.h |  2 ++
>  migration/ram.c  | 44 
>  2 files changed, 46 insertions(+)
> 
> diff --git a/include/migration/misc.h b/include/migration/misc.h
> index 4ebf24c..113320e 100644
> --- a/include/migration/misc.h
> +++ b/include/migration/misc.h
> @@ -14,11 +14,13 @@
>  #ifndef MIGRATION_MISC_H
>  #define MIGRATION_MISC_H
>  
> +#include "exec/cpu-common.h"
>  #include "qemu/notify.h"
>  
>  /* migration/ram.c */
>  
>  void ram_mig_init(void);
> +void qemu_guest_free_page_hint(void *addr, size_t len);
>  
>  /* migration/block.c */
>  
> diff --git a/migration/ram.c b/migration/ram.c
> index 9a72b1a..0147548 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -2198,6 +2198,50 @@ static int ram_init_all(RAMState **rsp)
>  }
>  
>  /*
> + * This function clears bits of the free pages reported by the caller from 
> the
> + * migration dirty bitmap. @addr is the host address corresponding to the
> + * start of the continuous guest free pages, and @len is the total bytes of
> + * those pages.
> + */
> +void qemu_guest_free_page_hint(void *addr, size_t len)
> +{
> +RAMBlock *block;
> +ram_addr_t offset;
> +size_t used_len, start, npages;

Do we need to check here on whether a migration is in progress?  Since
if not I'm not sure whether this hint still makes any sense any more,
and more importantly it seems to me that block->bmap below at [1] is
only valid during a migration.  So I'm not sure whether QEMU will
crash if this function is called without a running migration.

> +
> +for (; len > 0; len -= used_len) {
> +block = qemu_ram_block_from_host(addr, false, &offset);
> +if (unlikely(!block)) {
> +return;

We should never reach here, should we?  Assuming the callers of this
function should always pass in a correct host address. If we are very
sure that the host addr should be valid, could we just assert?

> +}
> +
> +/*
> + * This handles the case that the RAMBlock is resized after the free
> + * page hint is reported.
> + */
> +if (unlikely(offset > block->used_length)) {
> +return;
> +}
> +
> +if (len <= block->used_length - offset) {
> +used_len = len;
> +} else {
> +used_len = block->used_length - offset;
> +addr += used_len;
> +}
> +
> +start = offset >> TARGET_PAGE_BITS;
> +npages = used_len >> TARGET_PAGE_BITS;
> +
> +qemu_mutex_lock(&ram_state->bitmap_mutex);

So now I think I understand the lock can still be meaningful since
this function now can be called outside the migration thread (e.g., in
vcpu thread).  But still it would be nice to mention it somewhere on
the truth of the lock.

Regards,

> +ram_state->migration_dirty_pages -=
> +  bitmap_count_one_with_offset(block->bmap, start, 
> npages);
> +bitmap_clear(block->bmap, start, npages);

[1]

> +qemu_mutex_unlock(&ram_state->bitmap_mutex);
> +}
> +}
> +
> +/*
>   * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
>   * long-running RCU critical section.  When rcu-reclaims in the code
>   * start to become numerous it will be necessary to reduce the
> -- 
> 1.8.3.1
> 
> 

-- 
Peter Xu



Re: [Qemu-devel] [Qemu-block] [PATCH v2 8/8] block: Remove unused sector-based vectored I/O

2018-05-31 Thread Jeff Cody
On Thu, May 31, 2018 at 03:50:46PM -0500, Eric Blake wrote:
> We are gradually moving away from sector-based interfaces, towards
> byte-based.  Now that all callers of vectored I/O have been converted
> to use our preferred byte-based bdrv_co_p{read,write}v(), we can
> delete the unused bdrv_co_{read,write}v().
> 
> Furthermore, this gets rid of the signature difference between the
> public bdrv_co_writev() and the callback .bdrv_co_writev (the
> latter still exists, because some drivers still need more work
> before they are fully byte-based).
> 
> Signed-off-by: Eric Blake 
> Reviewed-by: Stefan Hajnoczi 

Reviewed-by: Jeff Cody 

> ---
> v2: commit typo fix [Kashyap]
> ---
>  include/block/block.h |  4 
>  block/io.c| 36 
>  2 files changed, 40 deletions(-)
> 
> diff --git a/include/block/block.h b/include/block/block.h
> index 3894edda9de..fe40d2929ac 100644
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -285,10 +285,6 @@ int bdrv_pwrite(BdrvChild *child, int64_t offset, const 
> void *buf, int bytes);
>  int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov);
>  int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
>   const void *buf, int count);
> -int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num,
> -   int nb_sectors, QEMUIOVector *qiov);
> -int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num,
> -   int nb_sectors, QEMUIOVector *qiov);
>  /*
>   * Efficiently zero a region of the disk image.  Note that this is a regular
>   * I/O request like read or write and should have a reasonable size.  This
> diff --git a/block/io.c b/block/io.c
> index ca96b487eb8..1d86bfc0072 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -1341,24 +1341,6 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
>  return ret;
>  }
> 
> -static int coroutine_fn bdrv_co_do_readv(BdrvChild *child,
> -int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
> -BdrvRequestFlags flags)
> -{
> -if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
> -return -EINVAL;
> -}
> -
> -return bdrv_co_preadv(child, sector_num << BDRV_SECTOR_BITS,
> -  nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
> -}
> -
> -int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num,
> -   int nb_sectors, QEMUIOVector *qiov)
> -{
> -return bdrv_co_do_readv(child, sector_num, nb_sectors, qiov, 0);
> -}
> -
>  static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
>  int64_t offset, int bytes, BdrvRequestFlags flags)
>  {
> @@ -1801,24 +1783,6 @@ out:
>  return ret;
>  }
> 
> -static int coroutine_fn bdrv_co_do_writev(BdrvChild *child,
> -int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
> -BdrvRequestFlags flags)
> -{
> -if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
> -return -EINVAL;
> -}
> -
> -return bdrv_co_pwritev(child, sector_num << BDRV_SECTOR_BITS,
> -   nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
> -}
> -
> -int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num,
> -int nb_sectors, QEMUIOVector *qiov)
> -{
> -return bdrv_co_do_writev(child, sector_num, nb_sectors, qiov, 0);
> -}
> -
>  int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
> int bytes, BdrvRequestFlags flags)
>  {
> -- 
> 2.14.3
> 
> 



Re: [Qemu-devel] [Qemu-block] [PATCH v2 6/8] replication: Switch to byte-based calls

2018-05-31 Thread Jeff Cody
On Thu, May 31, 2018 at 03:50:44PM -0500, Eric Blake wrote:
> We are gradually moving away from sector-based interfaces, towards
> byte-based.  Make the change for the last few sector-based calls
> into the block layer from the replication driver.
> 
> Ideally, the replication driver should switch to doing everything
> byte-based, but that's a more invasive change that requires a
> bit more auditing.
> 
> Signed-off-by: Eric Blake 

Reviewed-by: Jeff Cody 

> ---
>  block/replication.c | 14 --
>  1 file changed, 8 insertions(+), 6 deletions(-)
> 
> diff --git a/block/replication.c b/block/replication.c
> index 826db7b3049..6349d6958e4 100644
> --- a/block/replication.c
> +++ b/block/replication.c
> @@ -246,13 +246,14 @@ static coroutine_fn int 
> replication_co_readv(BlockDriverState *bs,
>  backup_cow_request_begin(&req, child->bs->job,
>   sector_num * BDRV_SECTOR_SIZE,
>   remaining_bytes);
> -ret = bdrv_co_readv(bs->file, sector_num, remaining_sectors,
> -qiov);
> +ret = bdrv_co_preadv(bs->file, sector_num * BDRV_SECTOR_SIZE,
> + remaining_bytes, qiov, 0);
>  backup_cow_request_end(&req);
>  goto out;
>  }
> 
> -ret = bdrv_co_readv(bs->file, sector_num, remaining_sectors, qiov);
> +ret = bdrv_co_preadv(bs->file, sector_num * BDRV_SECTOR_SIZE,
> + remaining_sectors * BDRV_SECTOR_SIZE, qiov, 0);
>  out:
>  return replication_return_value(s, ret);
>  }
> @@ -279,8 +280,8 @@ static coroutine_fn int 
> replication_co_writev(BlockDriverState *bs,
>  }
> 
>  if (ret == 0) {
> -ret = bdrv_co_writev(top, sector_num,
> - remaining_sectors, qiov);
> +ret = bdrv_co_pwritev(top, sector_num * BDRV_SECTOR_SIZE,
> +  remaining_sectors * BDRV_SECTOR_SIZE, qiov, 0);
>  return replication_return_value(s, ret);
>  }
> 
> @@ -306,7 +307,8 @@ static coroutine_fn int 
> replication_co_writev(BlockDriverState *bs,
>  qemu_iovec_concat(&hd_qiov, qiov, bytes_done, count);
> 
>  target = ret ? top : base;
> -ret = bdrv_co_writev(target, sector_num, n, &hd_qiov);
> +ret = bdrv_co_pwritev(target, sector_num * BDRV_SECTOR_SIZE,
> +  n * BDRV_SECTOR_SIZE, &hd_qiov, 0);
>  if (ret < 0) {
>  goto out1;
>  }
> -- 
> 2.14.3
> 
> 



[Qemu-devel] vIOMMU Posted-interrupt implementation - atomic operation?

2018-05-31 Thread Jintack Lim
Hi,

I'm implementing Posted-interrupt functionality in vIOMMU. According
to Vt-d spec 5.2.3, IOMMU performs a coherent atomic read-modify-write
operation of the posted-interrupt descriptor. I wonder how can we
achieve this considering the guest can modify the same
posted-interrupt descriptor anytime. Is there any existing mechanism
that I can use in QEMU?

Thanks,
Jintack




Re: [Qemu-devel] [PATCH v7 2/5] migration: use bitmap_mutex in migration_bitmap_clear_dirty

2018-05-31 Thread Peter Xu
On Tue, Apr 24, 2018 at 02:13:45PM +0800, Wei Wang wrote:
> The bitmap mutex is used to synchronize threads to update the dirty
> bitmap and the migration_dirty_pages counter. This patch makes
> migration_bitmap_clear_dirty update the bitmap and counter under the
> mutex.
> 
> Signed-off-by: Wei Wang 
> CC: Dr. David Alan Gilbert 
> CC: Juan Quintela 
> CC: Michael S. Tsirkin 
> ---
>  migration/ram.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/migration/ram.c b/migration/ram.c
> index 0e90efa..9a72b1a 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -795,11 +795,14 @@ static inline bool 
> migration_bitmap_clear_dirty(RAMState *rs,
>  {
>  bool ret;
>  
> +qemu_mutex_lock(&rs->bitmap_mutex);
>  ret = test_and_clear_bit(page, rb->bmap);
>  
>  if (ret) {
>  rs->migration_dirty_pages--;
>  }
> +qemu_mutex_unlock(&rs->bitmap_mutex);
> +
>  return ret;
>  }
>  
> -- 
> 1.8.3.1
> 
> 

Do we need the lock after all?

I see that we introduced this lock due to device hotplug at dd63169766
("migration: extend migration_bitmap", 2015-07-07), however now we
actually don't allow that to happen any more after commit b06424de62
("migration: Disable hotplug/unplug during migration", 2017-04-21).
I'm not sure whether it means that the lock is not needed now.

If so, this patch seems to be unecessary.

Regards,

-- 
Peter Xu



Re: [Qemu-devel] [PATCH 2/3] pc-bios/s390-ccw/net: Add support for pxelinux-style config files

2018-05-31 Thread Thomas Huth
On 31.05.2018 23:25, Farhan Ali wrote:
> 
> 
> On 05/30/2018 05:16 AM, Thomas Huth wrote:
>> Since it is quite cumbersome to manually create a combined kernel with
>> initrd image for network booting, we now support loading via pxelinux
>> configuration files, too. In these files, the kernel, initrd and command
>> line parameters can be specified seperately, and the firmware then takes
>> care of glueing everything together in memory after the files have been
>> downloaded. See this URL for details about the config file layout:
>> https://www.syslinux.org/wiki/index.php?title=PXELINUX
>>
>> The user can either specify a config file directly as bootfile via DHCP
>> (but in this case, the file has to start either with "default" or a "#"
>> comment so we can distinguish it from binary kernels), or a folder (i.e.
>> the bootfile name must end with "/") where the firmware should look for
>> the typical pxelinux.cfg file names, e.g. based on MAC or IP address.
>> We also support the pxelinux.cfg DHCP options 209 and 210 from RFC 5071.
>>
>> Signed-off-by: Thomas Huth 
>> ---
>>   pc-bios/s390-ccw/netboot.mak |  7 ++--
>>   pc-bios/s390-ccw/netmain.c   | 79
>> +++-
>>   2 files changed, 82 insertions(+), 4 deletions(-)
>>
>> diff --git a/pc-bios/s390-ccw/netboot.mak b/pc-bios/s390-ccw/netboot.mak
>> index a73be36..8af0cfd 100644
>> --- a/pc-bios/s390-ccw/netboot.mak
>> +++ b/pc-bios/s390-ccw/netboot.mak
>> @@ -25,8 +25,9 @@ CTYPE_OBJS = isdigit.o isxdigit.o toupper.o
>>   %.o : $(SLOF_DIR)/lib/libc/ctype/%.c
>>   $(call quiet-command,$(CC) $(LIBC_CFLAGS) -c -o $@
>> $<,"CC","$(TARGET_DIR)$@")
>>   -STRING_OBJS = strcat.o strchr.o strcmp.o strcpy.o strlen.o
>> strncmp.o strncpy.o \
>> -  strstr.o memset.o memcpy.o memmove.o memcmp.o
>> +STRING_OBJS = strcat.o strchr.o strrchr.o strcpy.o strlen.o strncpy.o \
>> +  strcmp.o strncmp.o strcasecmp.o strncasecmp.o strstr.o \
>> +  memset.o memcpy.o memmove.o memcmp.o
>>   %.o : $(SLOF_DIR)/lib/libc/string/%.c
>>   $(call quiet-command,$(CC) $(LIBC_CFLAGS) -c -o $@
>> $<,"CC","$(TARGET_DIR)$@")
>>   @@ -50,7 +51,7 @@ libc.a: $(LIBCOBJS)
>>   # libnet files:
>>     LIBNETOBJS := args.o dhcp.o dns.o icmpv6.o ipv6.o tcp.o udp.o
>> bootp.o \
>> -  dhcpv6.o ethernet.o ipv4.o ndp.o tftp.o
>> +  dhcpv6.o ethernet.o ipv4.o ndp.o tftp.o pxelinux.o
>>   LIBNETCFLAGS := $(QEMU_CFLAGS) -DDHCPARCH=0x1F $(LIBC_INC)
>> $(LIBNET_INC)
>>     %.o : $(SLOF_DIR)/lib/libnet/%.c
>> diff --git a/pc-bios/s390-ccw/netmain.c b/pc-bios/s390-ccw/netmain.c
>> index 7533cf7..e84bb2b 100644
>> --- a/pc-bios/s390-ccw/netmain.c
>> +++ b/pc-bios/s390-ccw/netmain.c
>> @@ -30,6 +30,7 @@
>>   #include 
>>   #include 
>>   #include 
>> +#include 
>>     #include "s390-ccw.h"
>>   #include "virtio.h"
>> @@ -41,12 +42,14 @@ extern char _start[];
>>     #define KERNEL_ADDR ((void *)0L)
>>   #define KERNEL_MAX_SIZE ((long)_start)
>> +#define ARCH_COMMAND_LINE_SIZE  896  /* Taken from Linux
>> kernel */
>>     char stack[PAGE_SIZE * 8] __attribute__((aligned(PAGE_SIZE)));
>>   IplParameterBlock iplb __attribute__((aligned(PAGE_SIZE)));
>>   static char cfgbuf[2048];
>>     static SubChannelId net_schid = { .one = 1 };
>> +static uint8_t mac[6];
>>   static uint64_t dest_timer;
>>     static uint64_t get_timer_ms(void)
>> @@ -158,7 +161,6 @@ static int tftp_load(filename_ip_t *fnip, void
>> *buffer, int len)
>>     static int net_init(filename_ip_t *fn_ip)
>>   {
>> -    uint8_t mac[6];
>>   int rc;
>>     memset(fn_ip, 0, sizeof(filename_ip_t));
>> @@ -233,6 +235,66 @@ static void net_release(filename_ip_t *fn_ip)
>>   }
>>     /**
>> + * Load a kernel with initrd (i.e. with the information that we've
>> got from
>> + * a pxelinux.cfg config file)
>> + */
>> +static int load_kernel_with_initrd(filename_ip_t *fn_ip,
>> +   struct pl_cfg_entry *entry)
>> +{
>> +    int rc;
>> +
>> +    printf("Loading pxelinux.cfg entry '%s'\n", entry->label);
>> +
>> +    if (!entry->kernel) {
>> +    printf("Kernel entry is missing!\n");
>> +    return -1;
>> +    }
>> +
>> +    strncpy(fn_ip->filename, entry->kernel, sizeof(fn_ip->filename));
>> +    rc = tftp_load(fn_ip, KERNEL_ADDR, KERNEL_MAX_SIZE);
>> +    if (rc < 0) {
>> +    return rc;
>> +    }
>> +
>> +    if (entry->initrd) {
>> +    uint64_t iaddr = (rc + 0xfff) & ~0xfffUL;
>> +
>> +    strncpy(fn_ip->filename, entry->initrd,
>> sizeof(fn_ip->filename));
>> +    rc = tftp_load(fn_ip, (void *)iaddr, KERNEL_MAX_SIZE - iaddr);
>> +    if (rc < 0) {
>> +    return rc;
>> +    }
>> +    /* Patch location and size: */
>> +    *(uint64_t *)0x10408 = iaddr;
>> +    *(uint64_t *)0x10410 = rc;
>> +    rc += iaddr;
>> +    }
>> +
>> +    if (entry->append) {
>> +    strncpy((char *)0x10480, entry->append, ARCH_COMMAND_LINE_SIZE);
>> +    }
>> +
>> +    return rc;

Re: [Qemu-devel] [PATCH RFC] qemu-error: introduce error_report_once

2018-05-31 Thread Peter Xu
On Thu, May 31, 2018 at 12:03:17PM +0100, Stefan Hajnoczi wrote:
> On Tue, May 15, 2018 at 05:13:56PM +0800, Peter Xu wrote:
> > I stole the printk_once() macro.
> > 
> > I always wanted to be able to print some error directly if there is a
> > buffer to dump, however we can't use error_report() really quite often
> > when there can be any DDOS attack.  To avoid that, we can introduce a
> > print-once function for it.
> 
> I like the idea.  It solves the problem of guest-triggerable error
> messages that we should know about for troubleshooting but can't due to
> DoS.
> 
> Stefan

Thanks for the positive feedback.  Please feel free to have a look on
the latest version:

Subject: [PATCH v4 0/2] error-report: introduce {error|warn}_report_once
Message-Id: <20180524044454.11792-1-pet...@redhat.com>

Regards,

-- 
Peter Xu



Re: [Qemu-devel] [PATCH v7 4/5] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT

2018-05-31 Thread Wei Wang

On 06/01/2018 01:42 AM, Michael S. Tsirkin wrote:

On Thu, May 31, 2018 at 10:27:00AM +0800, Wei Wang wrote:

On 05/30/2018 08:47 PM, Michael S. Tsirkin wrote:

On Wed, May 30, 2018 at 05:12:09PM +0800, Wei Wang wrote:

On 05/29/2018 11:24 PM, Michael S. Tsirkin wrote:

On Tue, Apr 24, 2018 at 02:13:47PM +0800, Wei Wang wrote:

+/*
+ * Balloon will report pages which were free at the time of this call. As the
+ * reporting happens asynchronously, dirty bit logging must be enabled before
+ * this call is made.
+ */
+void balloon_free_page_start(void)
+{
+balloon_free_page_start_fn(balloon_opaque);
+}

Please create notifier support, not a single global.

OK. The start is called at the end of bitmap_sync, and the stop is called at
the beginning of bitmap_sync. In this case, we will need to add two
migration states, MIGRATION_STATUS_BEFORE_BITMAP_SYNC and
MIGRATION_STATUS_AFTER_BITMAP_SYNC, right?

If that's the way you do it, you need to ask migration guys, not me.

Yeah, I know.. thanks for the virtio part.


+
+static bool virtio_balloon_free_page_support(void *opaque)
+{
+VirtIOBalloon *s = opaque;
+VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT);
or if poison is negotiated.

Will make it
return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT) &&
!virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)

I mean the reverse:
virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT) ||
virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)


If poison has been negotiated you must migrate the
guest supplied value even if you don't use it for hints.


Just a little confused with the logic. Writing it that way means that we are
taking this possibility "virtio_vdev_has_feature(vdev,
VIRTIO_BALLOON_F_FREE_PAGE_HINT)=fasle, virtio_vdev_has_feature(vdev,
VIRTIO_BALLOON_F_PAGE_POISON)=true" into account, and let the support
function return true when F_FREE_PAGE_HINT isn't supported.

All I am saying is that in this configuration, you must migrate
the poison value programmed by guest even if you do not
yet use it without VIRTIO_BALLOON_F_FREE_PAGE_HINT.

Right now you have
a section:
+.needed = virtio_balloon_free_page_support,

which includes the poison value.

So if guest migrates after writing the poison value,
it's lost. Not nice.


If guest doesn't support F_FREE_PAGE_HINT, it doesn't support the free page
reporting (even the free page vq). I'm not sure why we tell the migration
thread that the free page reporting feature is supported via this support
function. If the support function simply returns false when F_FREE_PAGE_HINT
isn't negotiated, the legacy migration already migrates the poisoned pages
(not skipped, but may be compressed).

I think it would be better to simply use the original "return
virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)" here.


So maybe you should put the poison value in a separate section then.


Yes, that looks good to me, thanks.

Best,
Wei




Re: [Qemu-devel] [PATCH v2 7/8] vhdx: Switch to byte-based calls

2018-05-31 Thread Jeff Cody
On Thu, May 31, 2018 at 03:50:45PM -0500, Eric Blake wrote:
> We are gradually moving away from sector-based interfaces, towards
> byte-based.  Make the change for the last few sector-based calls
> into the block layer from the vhdx driver.
> 
> Ideally, the vhdx driver should switch to doing everything
> byte-based, but that's a more invasive change that requires a
> bit more auditing.
> 
> Signed-off-by: Eric Blake 

Reviewed-by: Jeff Cody 

> ---
>  block/vhdx.c | 12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/block/vhdx.c b/block/vhdx.c
> index 0b1e21c7501..295d3276120 100644
> --- a/block/vhdx.c
> +++ b/block/vhdx.c
> @@ -1126,9 +1126,9 @@ static coroutine_fn int vhdx_co_readv(BlockDriverState 
> *bs, int64_t sector_num,
>  break;
>  case PAYLOAD_BLOCK_FULLY_PRESENT:
>  qemu_co_mutex_unlock(&s->lock);
> -ret = bdrv_co_readv(bs->file,
> -sinfo.file_offset >> BDRV_SECTOR_BITS,
> -sinfo.sectors_avail, &hd_qiov);
> +ret = bdrv_co_preadv(bs->file, sinfo.file_offset,
> + sinfo.sectors_avail * BDRV_SECTOR_SIZE,
> + &hd_qiov, 0);
>  qemu_co_mutex_lock(&s->lock);
>  if (ret < 0) {
>  goto exit;
> @@ -1348,9 +1348,9 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState 
> *bs, int64_t sector_num,
>  }
>  /* block exists, so we can just overwrite it */
>  qemu_co_mutex_unlock(&s->lock);
> -ret = bdrv_co_writev(bs->file,
> -sinfo.file_offset >> BDRV_SECTOR_BITS,
> -sectors_to_write, &hd_qiov);
> +ret = bdrv_co_pwritev(bs->file, sinfo.file_offset,
> +  sectors_to_write * BDRV_SECTOR_SIZE,
> +  &hd_qiov, 0);
>  qemu_co_mutex_lock(&s->lock);
>  if (ret < 0) {
>  goto error_bat_restore;
> -- 
> 2.14.3
> 



[Qemu-devel] [PATCH] docker: Update fedora image to 28

2018-05-31 Thread Fam Zheng
Signed-off-by: Fam Zheng 
---
 tests/docker/dockerfiles/fedora.docker | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/docker/dockerfiles/fedora.docker 
b/tests/docker/dockerfiles/fedora.docker
index b706f42405..65d7761cf5 100644
--- a/tests/docker/dockerfiles/fedora.docker
+++ b/tests/docker/dockerfiles/fedora.docker
@@ -1,4 +1,4 @@
-FROM fedora:27
+FROM fedora:28
 ENV PACKAGES \
 ccache gettext git tar PyYAML sparse flex bison python3 bzip2 hostname \
 gcc gcc-c++ llvm clang make perl which bc findutils glib2-devel \
-- 
2.14.3




Re: [Qemu-devel] [PATCH v7 00/10] qemu-img convert with copy offloading

2018-05-31 Thread Fam Zheng
On Wed, 05/30 17:06, Stefan Hajnoczi wrote:
> On Tue, May 29, 2018 at 01:59:49PM +0800, Fam Zheng wrote:
> > v7: Fix qcow2.
> > 
> > v6: Pick up rev-by from Stefan and Eric.
> > Tweak patch 2 commit message.
> > 
> > v5: - Fix raw offset/bytes check for read. [Eric]
> > - Fix qcow2_handle_l2meta. [Stefan]
> > - Add coroutine_fn whereever appropriate. [Stefan]
> > 
> > v4: - Fix raw offset and size. [Eric]
> > - iscsi: Drop unnecessary return values and variables in favor of
> >   constants. [Stefan]
> > - qcow2: Handle small backing case. [Stefan]
> > - file-posix: Translate ENOSYS to ENOTSUP. [Stefan]
> > - API documentation and commit message. [Stefan]
> > - Add rev-by to patches 3, 5 - 10. [Stefan, Eric]
> > 
> > This series introduces block layer API for copy offloading and makes use of 
> > it
> > in qemu-img convert.
> > 
> > For now we implemented the operation in local file protocol with
> > copy_file_range(2).  Besides that it's possible to add similar to iscsi, nfs
> > and potentially more.
> > 
> > As far as its usage goes, in addition to qemu-img convert, we can emulate
> > offloading in scsi-disk (handle EXTENDED COPY command), and use the API in
> > block jobs too.
> 
> Fails to compile on Fedora 28.
> 
> Oops, I guess my glibc is too new.  This type of bug doesn't happen very
> often :D :D :D.
> 
> block/file-posix.c:1452:14: error: static declaration of ‘copy_file_range’ 
> follows non-static declaration
>  static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd,
>   ^~~
> In file included from /home/stefanha/qemu/include/qemu/osdep.h:75,
>  from block/file-posix.c:25:
> /usr/include/unistd.h:1107:9: note: previous declaration of ‘copy_file_range’ 
> was here
>  ssize_t copy_file_range (int __infd, __off64_t *__pinoff,
>  ^~~

It means it's time to update our Fedora docker image! Will fix this by adding a
configure test.

Fam



Re: [Qemu-devel] [Qemu-block] [PATCH v2 1/8] parallels: Switch to byte-based calls

2018-05-31 Thread Jeff Cody
On Thu, May 31, 2018 at 03:50:39PM -0500, Eric Blake wrote:
> We are gradually moving away from sector-based interfaces, towards
> byte-based.  Make the change for the last few sector-based calls
> into the block layer from the parallels driver.
> 
> Ideally, the parallels driver should switch to doing everything
> byte-based, but that's a more invasive change that requires a
> bit more auditing.
> 
> Signed-off-by: Eric Blake 
> Reviewed-by: Stefan Hajnoczi 
> Reviewed-by: Denis V. Lunev 

Reviewed-by: Jeff Cody 

> ---
>  block/parallels.c | 16 ++--
>  1 file changed, 10 insertions(+), 6 deletions(-)
> 
> diff --git a/block/parallels.c b/block/parallels.c
> index 6e9c37f44e1..a761c896d35 100644
> --- a/block/parallels.c
> +++ b/block/parallels.c
> @@ -226,14 +226,15 @@ static int64_t allocate_clusters(BlockDriverState *bs, 
> int64_t sector_num,
>  };
>  qemu_iovec_init_external(&qiov, &iov, 1);
> 
> -ret = bdrv_co_readv(bs->backing, idx * s->tracks, nb_cow_sectors,
> -&qiov);
> +ret = bdrv_co_preadv(bs->backing, idx * s->tracks * BDRV_SECTOR_SIZE,
> + nb_cow_bytes, &qiov, 0);
>  if (ret < 0) {
>  qemu_vfree(iov.iov_base);
>  return ret;
>  }
> 
> -ret = bdrv_co_writev(bs->file, s->data_end, nb_cow_sectors, &qiov);
> +ret = bdrv_co_pwritev(bs->file, s->data_end * BDRV_SECTOR_SIZE,
> +  nb_cow_bytes, &qiov, 0);
>  qemu_vfree(iov.iov_base);
>  if (ret < 0) {
>  return ret;
> @@ -339,7 +340,8 @@ static coroutine_fn int 
> parallels_co_writev(BlockDriverState *bs,
>  qemu_iovec_reset(&hd_qiov);
>  qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
> 
> -ret = bdrv_co_writev(bs->file, position, n, &hd_qiov);
> +ret = bdrv_co_pwritev(bs->file, position * BDRV_SECTOR_SIZE, nbytes,
> +  &hd_qiov, 0);
>  if (ret < 0) {
>  break;
>  }
> @@ -378,7 +380,8 @@ static coroutine_fn int 
> parallels_co_readv(BlockDriverState *bs,
> 
>  if (position < 0) {
>  if (bs->backing) {
> -ret = bdrv_co_readv(bs->backing, sector_num, n, &hd_qiov);
> +ret = bdrv_co_preadv(bs->backing, sector_num * 
> BDRV_SECTOR_SIZE,
> + nbytes, &hd_qiov, 0);
>  if (ret < 0) {
>  break;
>  }
> @@ -386,7 +389,8 @@ static coroutine_fn int 
> parallels_co_readv(BlockDriverState *bs,
>  qemu_iovec_memset(&hd_qiov, 0, 0, nbytes);
>  }
>  } else {
> -ret = bdrv_co_readv(bs->file, position, n, &hd_qiov);
> +ret = bdrv_co_preadv(bs->file, position * BDRV_SECTOR_SIZE, 
> nbytes,
> + &hd_qiov, 0);
>  if (ret < 0) {
>  break;
>  }
> -- 
> 2.14.3
> 
> 



[Qemu-devel] [PATCH v2 16/20] 9p: darwin: Compatibility for f/l*xattr

2018-05-31 Thread Keno Fischer
On darwin `fgetxattr` takes two extra optional arguments,
and the l* variants are not defined (in favor of an extra
flag to the regular variants.

Signed-off-by: Keno Fischer 
---

Changes from v1: New patch, qemu_fgetxattr had previously been
 moved to 9p-util as fgetxattr_follow. The remaining functions
 are required by the proxy-helper.

 Makefile|  6 ++
 fsdev/virtfs-proxy-helper.c |  9 +
 hw/9pfs/9p-local.c  | 12 
 hw/9pfs/9p-util.h   | 17 +
 4 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/Makefile b/Makefile
index 6d588d1..dac6efd 100644
--- a/Makefile
+++ b/Makefile
@@ -544,7 +544,13 @@ qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o 
$(COMMON_LDADDS)
 qemu-keymap$(EXESUF): qemu-keymap.o ui/input-keymap.o $(COMMON_LDADDS)
 
 fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o 
fsdev/9p-marshal.o fsdev/9p-iov-marshal.o $(COMMON_LDADDS)
+ifdef CONFIG_DARWIN
+fsdev/virtfs-proxy-helper$(EXESUF): hw/9pfs/9p-util-darwin.o
+endif
+ifdef CONFIG_LINUX
+fsdev/virtfs-proxy-helper$(EXESUF): hw/9pfs/9p-util-linux.o
 fsdev/virtfs-proxy-helper$(EXESUF): LIBS += -lcap
+endif
 
 scsi/qemu-pr-helper$(EXESUF): scsi/qemu-pr-helper.o scsi/utils.o 
$(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS)
 ifdef CONFIG_MPATH
diff --git a/fsdev/virtfs-proxy-helper.c b/fsdev/virtfs-proxy-helper.c
index 3bc1269..a26f8b8 100644
--- a/fsdev/virtfs-proxy-helper.c
+++ b/fsdev/virtfs-proxy-helper.c
@@ -28,6 +28,7 @@
 #include "qemu/statfs.h"
 #include "9p-iov-marshal.h"
 #include "hw/9pfs/9p-proxy.h"
+#include "hw/9pfs/9p-util.h"
 #include "fsdev/9p-iov-marshal.h"
 
 #define PROGNAME "virtfs-proxy-helper"
@@ -459,7 +460,7 @@ static int do_getxattr(int type, struct iovec *iovec, 
struct iovec *out_iovec)
 v9fs_string_init(&name);
 retval = proxy_unmarshal(iovec, offset, "s", &name);
 if (retval > 0) {
-retval = lgetxattr(path.data, name.data, xattr.data, size);
+retval = qemu_lgetxattr(path.data, name.data, xattr.data, size);
 if (retval < 0) {
 retval = -errno;
 } else {
@@ -469,7 +470,7 @@ static int do_getxattr(int type, struct iovec *iovec, 
struct iovec *out_iovec)
 v9fs_string_free(&name);
 break;
 case T_LLISTXATTR:
-retval = llistxattr(path.data, xattr.data, size);
+retval = qemu_llistxattr(path.data, xattr.data, size);
 if (retval < 0) {
 retval = -errno;
 } else {
@@ -1000,7 +1001,7 @@ static int process_requests(int sock)
 retval = proxy_unmarshal(&in_iovec, PROXY_HDR_SZ, "sssdd", &path,
  &name, &value, &size, &flags);
 if (retval > 0) {
-retval = lsetxattr(path.data,
+retval = qemu_lsetxattr(path.data,
name.data, value.data, size, flags);
 if (retval < 0) {
 retval = -errno;
@@ -1016,7 +1017,7 @@ static int process_requests(int sock)
 retval = proxy_unmarshal(&in_iovec,
  PROXY_HDR_SZ, "ss", &path, &name);
 if (retval > 0) {
-retval = lremovexattr(path.data, name.data);
+retval = qemu_lremovexattr(path.data, name.data);
 if (retval < 0) {
 retval = -errno;
 }
diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index 1f0b1b0..7830526 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -776,16 +776,20 @@ static int local_fstat(FsContext *fs_ctx, int fid_type,
 mode_t tmp_mode;
 dev_t tmp_dev;
 
-if (fgetxattr(fd, "user.virtfs.uid", &tmp_uid, sizeof(uid_t)) > 0) {
+if (qemu_fgetxattr(fd, "user.virtfs.uid",
+   &tmp_uid, sizeof(uid_t)) > 0) {
 stbuf->st_uid = le32_to_cpu(tmp_uid);
 }
-if (fgetxattr(fd, "user.virtfs.gid", &tmp_gid, sizeof(gid_t)) > 0) {
+if (qemu_fgetxattr(fd, "user.virtfs.gid",
+   &tmp_gid, sizeof(gid_t)) > 0) {
 stbuf->st_gid = le32_to_cpu(tmp_gid);
 }
-if (fgetxattr(fd, "user.virtfs.mode", &tmp_mode, sizeof(mode_t)) > 0) {
+if (qemu_fgetxattr(fd, "user.virtfs.mode",
+   &tmp_mode, sizeof(mode_t)) > 0) {
 stbuf->st_mode = le32_to_cpu(tmp_mode);
 }
-if (fgetxattr(fd, "user.virtfs.rdev", &tmp_dev, sizeof(dev_t)) > 0) {
+if (qemu_fgetxattr(fd, "user.virtfs.rdev",
+   &tmp_dev, sizeof(dev_t)) > 0) {
 stbuf->st_rdev = le64_to_cpu(tmp_dev);
 }
 } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h
index 79ed6b2..50a03c7 100644
--- a/hw/9pfs/9p-util.h
+++ b/hw/9pfs/9p-util.h
@@ -19,6 +19,23 @@
 #define O_PATH_9P_UTIL 0
 #endif
 
+#ifdef 

[Qemu-devel] [PATCH v2 20/20] 9p: darwin: configure: Allow VirtFS on Darwin

2018-05-31 Thread Keno Fischer
Signed-off-by: Keno Fischer 
---

Changes from v1: Now builds the proxy-helper on Darwin.

 Makefile.objs |  1 +
 configure | 22 +++---
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/Makefile.objs b/Makefile.objs
index c6c3554..a2245c9 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -104,6 +104,7 @@ common-obj-$(CONFIG_WIN32) += os-win32.o
 common-obj-$(CONFIG_POSIX) += os-posix.o
 
 common-obj-$(CONFIG_LINUX) += fsdev/
+common-obj-$(CONFIG_DARWIN) += fsdev/
 
 common-obj-y += migration/
 
diff --git a/configure b/configure
index a6a4616..4808459 100755
--- a/configure
+++ b/configure
@@ -5535,16 +5535,28 @@ if test "$want_tools" = "yes" ; then
   fi
 fi
 if test "$softmmu" = yes ; then
-  if test "$linux" = yes; then
-if test "$virtfs" != no && test "$cap" = yes && test "$attr" = yes ; then
+  if test "$virtfs" != no; then
+if test "$linux" = yes; then
+  if test "$cap" = yes && test "$attr" = yes ; then
+virtfs=yes
+tools="$tools fsdev/virtfs-proxy-helper\$(EXESUF)"
+  else
+if test "$virtfs" = yes; then
+  error_exit "VirtFS requires libcap devel and libattr devel under 
Linux"
+fi
+virtfs=no
+  fi
+elif test "$darwin" = yes; then
   virtfs=yes
   tools="$tools fsdev/virtfs-proxy-helper\$(EXESUF)"
 else
   if test "$virtfs" = yes; then
-error_exit "VirtFS requires libcap devel and libattr devel"
+error_exit "VirtFS is supported only on Linux and Darwin"
   fi
   virtfs=no
 fi
+  fi
+  if test "$linux" = yes; then
 if test "$mpath" != no && test "$mpathpersist" = yes ; then
   mpath=yes
 else
@@ -,10 +5567,6 @@ if test "$softmmu" = yes ; then
 fi
 tools="$tools scsi/qemu-pr-helper\$(EXESUF)"
   else
-if test "$virtfs" = yes; then
-  error_exit "VirtFS is supported only on Linux"
-fi
-virtfs=no
 if test "$mpath" = yes; then
   error_exit "Multipath is supported only on Linux"
 fi
-- 
2.8.1




[Qemu-devel] [PATCH v2 18/20] 9p: darwin: Implement compatibility for mknodat

2018-05-31 Thread Keno Fischer
Darwin does not support mknodat. However, to avoid race conditions
with later setting the permissions, we must avoid using mknod on
the full path instead. We could try to fchdir, but that would cause
problems if multiple threads try to call mknodat at the same time.
However, luckily there is a solution: Darwin as an (unexposed in the
C library) system call that sets the cwd for the current thread only.
This should suffice to use mknod safely.

Signed-off-by: Keno Fischer 
---

Changes from v1: New patch. The previous series marked mknodat unsupported.

 hw/9pfs/9p-local.c   |  5 +++--
 hw/9pfs/9p-util-darwin.c | 25 +
 hw/9pfs/9p-util-linux.c  |  5 +
 hw/9pfs/9p-util.h|  2 ++
 4 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index 47e8580..c7a2b08 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -668,7 +668,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath 
*dir_path,
 
 if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
 fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-err = mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0);
+err = qemu_mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0);
 if (err == -1) {
 goto out;
 }
@@ -683,7 +683,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath 
*dir_path,
 }
 } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
fs_ctx->export_flags & V9FS_SM_NONE) {
-err = mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev);
+err = qemu_mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev);
 if (err == -1) {
 goto out;
 }
@@ -696,6 +696,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath 
*dir_path,
 
 err_end:
 unlinkat_preserve_errno(dirfd, name, 0);
+
 out:
 close_preserve_errno(dirfd);
 return err;
diff --git a/hw/9pfs/9p-util-darwin.c b/hw/9pfs/9p-util-darwin.c
index ac414bc..49fe7d3 100644
--- a/hw/9pfs/9p-util-darwin.c
+++ b/hw/9pfs/9p-util-darwin.c
@@ -158,3 +158,28 @@ done:
 close_preserve_errno(fd);
 return ret;
 }
+
+#ifndef SYS___pthread_fchdir
+# define SYS___pthread_fchdir 349
+#endif
+
+static int fchdir_thread_local(int fd)
+{
+return syscall(SYS___pthread_fchdir, fd);
+}
+
+int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev)
+{
+int preserved_errno, err;
+if (fchdir_thread_local(dirfd) < 0) {
+return -1;
+}
+err = mknod(filename, mode, dev);
+preserved_errno = errno;
+/* Stop using the thread-local cwd */
+fchdir_thread_local(-1);
+if (err < 0) {
+errno = preserved_errno;
+}
+return err;
+}
diff --git a/hw/9pfs/9p-util-linux.c b/hw/9pfs/9p-util-linux.c
index 3902378..06399c5 100644
--- a/hw/9pfs/9p-util-linux.c
+++ b/hw/9pfs/9p-util-linux.c
@@ -63,3 +63,8 @@ int utimensat_nofollow(int dirfd, const char *filename,
 {
 return utimensat(dirfd, filename, times, AT_SYMLINK_NOFOLLOW);
 }
+
+int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev)
+{
+return mknodat(dirfd, filename, mode, dev);
+}
diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h
index b1dc08a..127564d 100644
--- a/hw/9pfs/9p-util.h
+++ b/hw/9pfs/9p-util.h
@@ -90,4 +90,6 @@ ssize_t fremovexattrat_nofollow(int dirfd, const char 
*filename,
 int utimensat_nofollow(int dirfd, const char *filename,
const struct timespec times[2]);
 
+int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev);
+
 #endif
-- 
2.8.1




[Qemu-devel] [PATCH v6] arm_gicv3_kvm: kvm_dist_get/put: skip the registers banked by GICR

2018-05-31 Thread Shannon Zhao
While we skip the GIC_INTERNAL irqs, we don't change the register offset
accordingly. This will overlap the GICR registers value and leave the
last GIC_INTERNAL irq's registers out of update.

Fix this by skipping the registers banked by GICR.

Also for migration compatibility if the migration source (old version
qemu) doesn't send gicd_no_migration_shift_bug = 1 to destination, then
we shift the data of PPI to get the right data for SPI.

Fixes: 367b9f527becdd20ddf116e17a3c0c2bbc486920
Cc: qemu-sta...@nongnu.org
Reviewed-by: Eric Auger 
Reviewed-by: Peter Maydell 
Signed-off-by: Shannon Zhao 
---
V6: Fix typos and keep same with offset for clroffset 
---
 hw/intc/arm_gicv3_common.c | 79 ++
 hw/intc/arm_gicv3_kvm.c| 38 ++
 include/hw/intc/arm_gicv3_common.h |  1 +
 3 files changed, 118 insertions(+)

diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index 7b54d52..864b7c6 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -27,6 +27,7 @@
 #include "hw/intc/arm_gicv3_common.h"
 #include "gicv3_internal.h"
 #include "hw/arm/linux-boot-if.h"
+#include "sysemu/kvm.h"
 
 static int gicv3_pre_save(void *opaque)
 {
@@ -141,6 +142,79 @@ static const VMStateDescription vmstate_gicv3_cpu = {
 }
 };
 
+static int gicv3_gicd_no_migration_shift_bug_pre_load(void *opaque)
+{
+GICv3State *cs = opaque;
+
+   /*
+* The gicd_no_migration_shift_bug flag is used for migration compatibility
+* for old version QEMU which may have the GICD bmp shift bug under KVM 
mode.
+* Strictly, what we want to know is whether the migration source is using
+* KVM. Since we don't have any way to determine that, we look at whether 
the
+* destination is using KVM; this is close enough because for the older QEMU
+* versions with this bug KVM -> TCG migration didn't work anyway. If the
+* source is a newer QEMU without this bug it will transmit the migration
+* subsection which sets the flag to true; otherwise it will remain set to
+* the value we select here.
+*/
+if (kvm_enabled()) {
+cs->gicd_no_migration_shift_bug = false;
+}
+
+return 0;
+}
+
+static int gicv3_gicd_no_migration_shift_bug_post_load(void *opaque,
+   int version_id)
+{
+GICv3State *cs = opaque;
+
+if (cs->gicd_no_migration_shift_bug) {
+return 0;
+}
+
+/* Older versions of QEMU had a bug in the handling of state save/restore
+ * to the KVM GICv3: they got the offset in the bitmap arrays wrong,
+ * so that instead of the data for external interrupts 32 and up
+ * starting at bit position 32 in the bitmap, it started at bit
+ * position 64. If we're receiving data from a QEMU with that bug,
+ * we must move the data down into the right place.
+ */
+memmove(cs->group, (uint8_t *)cs->group + GIC_INTERNAL / 8,
+sizeof(cs->group) - GIC_INTERNAL / 8);
+memmove(cs->grpmod, (uint8_t *)cs->grpmod + GIC_INTERNAL / 8,
+sizeof(cs->grpmod) - GIC_INTERNAL / 8);
+memmove(cs->enabled, (uint8_t *)cs->enabled + GIC_INTERNAL / 8,
+sizeof(cs->enabled) - GIC_INTERNAL / 8);
+memmove(cs->pending, (uint8_t *)cs->pending + GIC_INTERNAL / 8,
+sizeof(cs->pending) - GIC_INTERNAL / 8);
+memmove(cs->active, (uint8_t *)cs->active + GIC_INTERNAL / 8,
+sizeof(cs->active) - GIC_INTERNAL / 8);
+memmove(cs->edge_trigger, (uint8_t *)cs->edge_trigger + GIC_INTERNAL / 8,
+sizeof(cs->edge_trigger) - GIC_INTERNAL / 8);
+
+/*
+ * While this new version QEMU doesn't have this kind of bug as we fix it,
+ * so it needs to set the flag to true to indicate that and it's necessary
+ * for next migration to work from this new version QEMU.
+ */
+cs->gicd_no_migration_shift_bug = true;
+
+return 0;
+}
+
+const VMStateDescription vmstate_gicv3_gicd_no_migration_shift_bug = {
+.name = "arm_gicv3/gicd_no_migration_shift_bug",
+.version_id = 1,
+.minimum_version_id = 1,
+.pre_load = gicv3_gicd_no_migration_shift_bug_pre_load,
+.post_load = gicv3_gicd_no_migration_shift_bug_post_load,
+.fields = (VMStateField[]) {
+VMSTATE_BOOL(gicd_no_migration_shift_bug, GICv3State),
+VMSTATE_END_OF_LIST()
+}
+};
+
 static const VMStateDescription vmstate_gicv3 = {
 .name = "arm_gicv3",
 .version_id = 1,
@@ -165,6 +239,10 @@ static const VMStateDescription vmstate_gicv3 = {
 VMSTATE_STRUCT_VARRAY_POINTER_UINT32(cpu, GICv3State, num_cpu,
  vmstate_gicv3_cpu, GICv3CPUState),
 VMSTATE_END_OF_LIST()
+},
+.subsections = (const VMStateDescription * []) {
+&vmstate_gicv3_gicd_no_migration_shift_bug,
+NULL
 }
 };
 
@@ -364,6 +442,7 @@ static void arm_gicv3_common_reset(DeviceState *dev)
 gicv3_gicd_group_

[Qemu-devel] [PATCH v2 19/20] 9p: darwin: virtfs-proxy: Implement setuid code for darwin

2018-05-31 Thread Keno Fischer
Darwin does not have linux capabilities, so make that code linux-only.
Darwin also does not have setresuid/gid. The correct way to temporarily
drop capabilities is to call seteuid/gid.

Also factor out the code that acquires acquire_dac_override into a separate
function in the linux implementation. I had originally done this when
I thought it made sense to have only one `setugid` function, but I retained
this because it seems clearer this way.

Signed-off-by: Keno Fischer 
---

Changes from v1: New patch.

 fsdev/virtfs-proxy-helper.c | 200 +++-
 1 file changed, 125 insertions(+), 75 deletions(-)

diff --git a/fsdev/virtfs-proxy-helper.c b/fsdev/virtfs-proxy-helper.c
index d8dd3f5..6baf2a6 100644
--- a/fsdev/virtfs-proxy-helper.c
+++ b/fsdev/virtfs-proxy-helper.c
@@ -82,6 +82,7 @@ static void do_perror(const char *string)
 }
 }
 
+#ifdef CONFIG_LINUX
 static int do_cap_set(cap_value_t *cap_value, int size, int reset)
 {
 cap_t caps;
@@ -121,6 +122,85 @@ error:
 return -1;
 }
 
+static int acquire_dac_override(void)
+{
+cap_value_t cap_list[] = {
+CAP_DAC_OVERRIDE,
+};
+return do_cap_set(cap_list, ARRAY_SIZE(cap_list), 0);
+}
+
+/*
+ * from man 7 capabilities, section
+ * Effect of User ID Changes on Capabilities:
+ * If the effective user ID is changed from nonzero to 0, then the permitted
+ * set is copied to the effective set.  If the effective user ID is changed
+ * from 0 to nonzero, then all capabilities are are cleared from the effective
+ * set.
+ *
+ * The setfsuid/setfsgid man pages warn that changing the effective user ID may
+ * expose the program to unwanted signals, but this is not true anymore: for an
+ * unprivileged (without CAP_KILL) program to send a signal, the real or
+ * effective user ID of the sending process must equal the real or saved user
+ * ID of the target process.  Even when dropping privileges, it is enough to
+ * keep the saved UID to a "privileged" value and virtfs-proxy-helper won't
+ * be exposed to signals.  So just use setresuid/setresgid.
+ */
+static int setugid(int uid, int gid, int *suid, int *sgid)
+{
+int retval;
+
+*suid = geteuid();
+*sgid = getegid();
+
+if (setresgid(-1, gid, *sgid) == -1) {
+retval = -errno;
+goto err_out;
+}
+
+if (setresuid(-1, uid, *suid) == -1) {
+retval = -errno;
+goto err_sgid;
+}
+
+if (uid != 0 || gid != 0) {
+/*
+* We still need DAC_OVERRIDE because we don't change
+* supplementary group ids, and hence may be subjected DAC rules
+*/
+if (acquire_dac_override() < 0) {
+retval = -errno;
+goto err_suid;
+}
+}
+return 0;
+
+err_suid:
+if (setresuid(-1, *suid, *suid) == -1) {
+abort();
+}
+err_sgid:
+if (setresgid(-1, *sgid, *sgid) == -1) {
+abort();
+}
+err_out:
+return retval;
+}
+
+/*
+ * This is used to reset the ugid back with the saved values
+ * There is nothing much we can do checking error values here.
+ */
+static void resetugid(int suid, int sgid)
+{
+if (setresgid(-1, sgid, sgid) == -1) {
+abort();
+}
+if (setresuid(-1, suid, suid) == -1) {
+abort();
+}
+}
+
 static int init_capabilities(void)
 {
 /* helper needs following capabilities only */
@@ -135,6 +215,51 @@ static int init_capabilities(void)
 };
 return do_cap_set(cap_list, ARRAY_SIZE(cap_list), 1);
 }
+#else
+static int setugid(int uid, int gid, int *suid, int *sgid)
+{
+int retval;
+
+*suid = geteuid();
+*sgid = getegid();
+
+if (setegid(gid) == -1) {
+retval = -errno;
+goto err_out;
+}
+
+if (seteuid(uid) == -1) {
+retval = -errno;
+goto err_sgid;
+}
+
+err_sgid:
+if (setgid(*sgid) == -1) {
+abort();
+}
+err_out:
+return retval;
+}
+
+/*
+ * This is used to reset the ugid back with the saved values
+ * There is nothing much we can do checking error values here.
+ */
+static void resetugid(int suid, int sgid)
+{
+if (setegid(sgid) == -1) {
+abort();
+}
+if (seteuid(suid) == -1) {
+abort();
+}
+}
+
+static int init_capabilities(void)
+{
+return 0;
+}
+#endif
 
 static int socket_read(int sockfd, void *buff, ssize_t size)
 {
@@ -279,81 +404,6 @@ static int send_status(int sockfd, struct iovec *iovec, 
int status)
 }
 
 /*
- * from man 7 capabilities, section
- * Effect of User ID Changes on Capabilities:
- * If the effective user ID is changed from nonzero to 0, then the permitted
- * set is copied to the effective set.  If the effective user ID is changed
- * from 0 to nonzero, then all capabilities are are cleared from the effective
- * set.
- *
- * The setfsuid/setfsgid man pages warn that changing the effective user ID may
- * expose the program to unwanted signals, but this is not true anymore: for an
- * unprivileged (without CAP_KILL) program to send a signa

[Qemu-devel] [PATCH v2 12/20] 9p: darwin: Explicitly cast comparisons of mode_t with -1

2018-05-31 Thread Keno Fischer
Comparisons of mode_t with -1 require an explicit cast, since mode_t
is unsigned on Darwin.

Signed-off-by: Keno Fischer 
---

Changes from v1: Split from strchrnul change

 hw/9pfs/9p-local.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index 6222891..1f0b1b0 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -310,7 +310,7 @@ update_map_file:
 if (credp->fc_gid != -1) {
 gid = credp->fc_gid;
 }
-if (credp->fc_mode != -1) {
+if (credp->fc_mode != (mode_t)-1) {
 mode = credp->fc_mode;
 }
 if (credp->fc_rdev != -1) {
@@ -416,7 +416,7 @@ static int local_set_xattrat(int dirfd, const char *path, 
FsCred *credp)
 return err;
 }
 }
-if (credp->fc_mode != -1) {
+if (credp->fc_mode != (mode_t)-1) {
 uint32_t tmp_mode = cpu_to_le32(credp->fc_mode);
 err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.mode", &tmp_mode,
sizeof(mode_t), 0);
-- 
2.8.1




[Qemu-devel] [PATCH v2 15/20] 9p: darwin: *xattr_nofollow implementations

2018-05-31 Thread Keno Fischer
This implements the darwin equivalent of the functions that were
moved to 9p-util(-linux) earlier in this series in the new
9p-util-darwin file.

Signed-off-by: Keno Fischer 
---

Changes from v1:
 * New 9p-util-darwin.c rather than ifdefs in 9p-util.c
 * Drop incorrect AT_NOFOLLOW from the actual call

 hw/9pfs/9p-util-darwin.c | 64 
 hw/9pfs/Makefile.objs|  1 +
 2 files changed, 65 insertions(+)
 create mode 100644 hw/9pfs/9p-util-darwin.c

diff --git a/hw/9pfs/9p-util-darwin.c b/hw/9pfs/9p-util-darwin.c
new file mode 100644
index 000..cdb4c9e
--- /dev/null
+++ b/hw/9pfs/9p-util-darwin.c
@@ -0,0 +1,64 @@
+/*
+ * 9p utilities (Darwin Implementation)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/xattr.h"
+#include "9p-util.h"
+
+ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name,
+ void *value, size_t size)
+{
+int ret;
+int fd = openat_file(dirfd, filename,
+ O_RDONLY | O_PATH_9P_UTIL | O_NOFOLLOW, 0);
+if (fd == -1) {
+return -1;
+}
+ret = fgetxattr(fd, name, value, size, 0, 0);
+close_preserve_errno(fd);
+return ret;
+}
+
+ssize_t flistxattrat_nofollow(int dirfd, const char *filename,
+  char *list, size_t size)
+{
+int ret;
+int fd = openat_file(dirfd, filename,
+ O_RDONLY | O_PATH_9P_UTIL | O_NOFOLLOW, 0);
+if (fd == -1) {
+return -1;
+}
+ret = flistxattr(fd, list, size, 0);
+close_preserve_errno(fd);
+return ret;
+}
+
+ssize_t fremovexattrat_nofollow(int dirfd, const char *filename,
+const char *name)
+{
+int ret;
+int fd = openat_file(dirfd, filename, O_PATH_9P_UTIL | O_NOFOLLOW, 0);
+if (fd == -1) {
+return -1;
+}
+ret = fremovexattr(fd, name, 0);
+close_preserve_errno(fd);
+return ret;
+}
+
+int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name,
+ void *value, size_t size, int flags)
+{
+int ret;
+int fd = openat_file(dirfd, filename, O_PATH_9P_UTIL | O_NOFOLLOW, 0);
+if (fd == -1) {
+return -1;
+}
+ret = fsetxattr(fd, name, value, size, 0, flags);
+close_preserve_errno(fd);
+return ret;
+}
diff --git a/hw/9pfs/Makefile.objs b/hw/9pfs/Makefile.objs
index 083508f..24a8695 100644
--- a/hw/9pfs/Makefile.objs
+++ b/hw/9pfs/Makefile.objs
@@ -1,5 +1,6 @@
 common-obj-y  = 9p.o
 common-obj-$(CONFIG_LINUX) += 9p-util-linux.o
+common-obj-$(CONFIG_DARWIN) += 9p-util-darwin.o
 common-obj-y += 9p-local.o 9p-xattr.o
 common-obj-y += 9p-xattr-user.o 9p-posix-acl.o
 common-obj-y += coth.o cofs.o codir.o cofile.o
-- 
2.8.1




[Qemu-devel] [PATCH v2 17/20] 9p: darwin: Provide a fallback implementation for utimensat

2018-05-31 Thread Keno Fischer
This function is new in Mac OS 10.13. Provide a fallback implementation
when building against older SDKs. The complication in the definition comes
having to separately handle the used SDK version and the target OS version.

- If the SDK version is too low (__MAC_10_13 not defined), utimensat is not
  defined in the header, so we must not try to use it (doing so would error).
- Otherwise, if the targetted OS version is at least 10.13, we know this
  function is available, so we can unconditionally call it.
- Lastly, we check for the availability of the __builtin_available macro to
  potentially insert a dynamic check for this OS version. However, 
__builtin_available
  is only available with sufficiently recent versions of clang and while all
  Apple clang versions that ship with Xcode versions that support the 10.13
  SDK support with builtin, we want to allow building with compilers other
  than Apple clang that may not support this builtin.

Signed-off-by: Keno Fischer 
---

Changes from v1:
 * Correct calculation of tv_usec
 * Support UTIME_NOW/UTIME/OMIT
 * Now covers fsdev/virtfs-proxy-helper.c

 fsdev/virtfs-proxy-helper.c |  3 +-
 hw/9pfs/9p-local.c  |  2 +-
 hw/9pfs/9p-util-darwin.c| 96 +
 hw/9pfs/9p-util-linux.c |  6 +++
 hw/9pfs/9p-util.h   |  8 
 hw/9pfs/9p.c|  1 +
 6 files changed, 113 insertions(+), 3 deletions(-)

diff --git a/fsdev/virtfs-proxy-helper.c b/fsdev/virtfs-proxy-helper.c
index a26f8b8..d8dd3f5 100644
--- a/fsdev/virtfs-proxy-helper.c
+++ b/fsdev/virtfs-proxy-helper.c
@@ -957,8 +957,7 @@ static int process_requests(int sock)
  &spec[0].tv_sec, &spec[0].tv_nsec,
  &spec[1].tv_sec, &spec[1].tv_nsec);
 if (retval > 0) {
-retval = utimensat(AT_FDCWD, path.data, spec,
-   AT_SYMLINK_NOFOLLOW);
+retval = utimensat_nofollow(AT_FDCWD, path.data, spec);
 if (retval < 0) {
 retval = -errno;
 }
diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index 7830526..47e8580 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -1071,7 +1071,7 @@ static int local_utimensat(FsContext *s, V9fsPath 
*fs_path,
 goto out;
 }
 
-ret = utimensat(dirfd, name, buf, AT_SYMLINK_NOFOLLOW);
+ret = utimensat_nofollow(dirfd, name, buf);
 close_preserve_errno(dirfd);
 out:
 g_free(dirpath);
diff --git a/hw/9pfs/9p-util-darwin.c b/hw/9pfs/9p-util-darwin.c
index cdb4c9e..ac414bc 100644
--- a/hw/9pfs/9p-util-darwin.c
+++ b/hw/9pfs/9p-util-darwin.c
@@ -62,3 +62,99 @@ int fsetxattrat_nofollow(int dirfd, const char *filename, 
const char *name,
 close_preserve_errno(fd);
 return ret;
 }
+
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+
+static int update_times_from_stat(int fd, struct timespec times[2],
+  int update0, int update1)
+{
+struct stat buf;
+int ret = fstat(fd, &buf);
+if (ret == -1) {
+return ret;
+}
+if (update0) {
+times[0] = buf.st_atimespec;
+}
+if (update1) {
+times[1] = buf.st_mtimespec;
+}
+return 0;
+}
+
+int utimensat_nofollow(int dirfd, const char *filename,
+   const struct timespec times_in[2])
+{
+int ret, fd;
+int special0, special1;
+struct timeval futimes_buf[2];
+struct timespec times[2];
+memcpy(times, times_in, 2 * sizeof(struct timespec));
+
+/* Check whether we have an SDK version that defines utimensat */
+#if defined(__MAC_10_13)
+# if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_13
+#  define UTIMENSAT_AVAILABLE 1
+# elif __has_builtin(__builtin_available)
+#  define UTIMENSAT_AVAILABLE __builtin_available(macos 10.13, *)
+# else
+#  define UTIMENSAT_AVAILABLE 0
+# endif
+if (UTIMENSAT_AVAILABLE) {
+return utimensat(dirfd, filename, times, AT_SYMLINK_NOFOLLOW);
+}
+#endif
+
+/* utimensat not available. Use futimes. */
+fd = openat_file(dirfd, filename, O_PATH_9P_UTIL | O_NOFOLLOW, 0);
+if (fd == -1) {
+return -1;
+}
+
+special0 = times[0].tv_nsec == UTIME_OMIT;
+special1 = times[1].tv_nsec == UTIME_OMIT;
+if (special0 || special1) {
+/* If both are set, nothing to do */
+if (special0 && special1) {
+ret = 0;
+goto done;
+}
+
+ret = update_times_from_stat(fd, times, special0, special1);
+if (ret < 0) {
+goto done;
+}
+}
+
+special0 = times[0].tv_nsec == UTIME_NOW;
+special1 = times[1].tv_nsec == UTIME_NOW;
+if (special0 || special1) {
+ret = futimes(fd, NULL);
+if (ret < 0) {
+goto done;
+}
+
+/* If both are set, we are done */
+if (special0 && special1) {
+ret = 0;
+goto done;
+}
+
+   

[Qemu-devel] [PATCH v2 10/20] 9p: darwin: Handle struct stat(fs) differences

2018-05-31 Thread Keno Fischer
Signed-off-by: Keno Fischer 
---

Changes since v1:
 * Now also covers fsdev/virtfs-proxy-helper.c

 fsdev/virtfs-proxy-helper.c | 14 +++---
 hw/9pfs/9p-proxy.c  | 17 ++---
 hw/9pfs/9p-synth.c  |  2 ++
 hw/9pfs/9p.c| 16 ++--
 4 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/fsdev/virtfs-proxy-helper.c b/fsdev/virtfs-proxy-helper.c
index 94fb069..3bc1269 100644
--- a/fsdev/virtfs-proxy-helper.c
+++ b/fsdev/virtfs-proxy-helper.c
@@ -506,12 +506,15 @@ static void stat_to_prstat(ProxyStat *pr_stat, struct 
stat *stat)
 pr_stat->st_size = stat->st_size;
 pr_stat->st_blksize = stat->st_blksize;
 pr_stat->st_blocks = stat->st_blocks;
+#ifdef CONFIG_DARWIN
+pr_stat->st_atim_nsec = stat->st_atimespec.tv_nsec;
+pr_stat->st_mtim_nsec = stat->st_mtimespec.tv_nsec;
+pr_stat->st_ctim_nsec = stat->st_ctimespec.tv_nsec;
+#else
 pr_stat->st_atim_sec = stat->st_atim.tv_sec;
-pr_stat->st_atim_nsec = stat->st_atim.tv_nsec;
 pr_stat->st_mtim_sec = stat->st_mtim.tv_sec;
-pr_stat->st_mtim_nsec = stat->st_mtim.tv_nsec;
 pr_stat->st_ctim_sec = stat->st_ctim.tv_sec;
-pr_stat->st_ctim_nsec = stat->st_ctim.tv_nsec;
+#endif
 }
 
 static void statfs_to_prstatfs(ProxyStatFS *pr_stfs, struct statfs *stfs)
@@ -524,10 +527,15 @@ static void statfs_to_prstatfs(ProxyStatFS *pr_stfs, 
struct statfs *stfs)
 pr_stfs->f_bavail = stfs->f_bavail;
 pr_stfs->f_files = stfs->f_files;
 pr_stfs->f_ffree = stfs->f_ffree;
+#ifdef CONFIG_DARWIN
+pr_stfs->f_fsid[0] = stfs->f_fsid.val[0];
+pr_stfs->f_fsid[1] = stfs->f_fsid.val[1];
+#else
 pr_stfs->f_fsid[0] = stfs->f_fsid.__val[0];
 pr_stfs->f_fsid[1] = stfs->f_fsid.__val[1];
 pr_stfs->f_namelen = stfs->f_namelen;
 pr_stfs->f_frsize = stfs->f_frsize;
+#endif
 }
 
 /*
diff --git a/hw/9pfs/9p-proxy.c b/hw/9pfs/9p-proxy.c
index 47a94e0..8a2c174 100644
--- a/hw/9pfs/9p-proxy.c
+++ b/hw/9pfs/9p-proxy.c
@@ -117,10 +117,15 @@ static void prstatfs_to_statfs(struct statfs *stfs, 
ProxyStatFS *prstfs)
 stfs->f_bavail = prstfs->f_bavail;
 stfs->f_files = prstfs->f_files;
 stfs->f_ffree = prstfs->f_ffree;
+#ifdef CONFIG_DARWIN
+stfs->f_fsid.val[0] = prstfs->f_fsid[0] & 0xU;
+stfs->f_fsid.val[1] = prstfs->f_fsid[1] >> 32 & 0xU;
+#else
 stfs->f_fsid.__val[0] = prstfs->f_fsid[0] & 0xU;
 stfs->f_fsid.__val[1] = prstfs->f_fsid[1] >> 32 & 0xU;
 stfs->f_namelen = prstfs->f_namelen;
 stfs->f_frsize = prstfs->f_frsize;
+#endif
 }
 
 /* Converts proxy_stat structure to VFS stat structure */
@@ -137,12 +142,18 @@ static void prstat_to_stat(struct stat *stbuf, ProxyStat 
*prstat)
stbuf->st_size = prstat->st_size;
stbuf->st_blksize = prstat->st_blksize;
stbuf->st_blocks = prstat->st_blocks;
-   stbuf->st_atim.tv_sec = prstat->st_atim_sec;
-   stbuf->st_atim.tv_nsec = prstat->st_atim_nsec;
+   stbuf->st_atime = prstat->st_atim_sec;
stbuf->st_mtime = prstat->st_mtim_sec;
-   stbuf->st_mtim.tv_nsec = prstat->st_mtim_nsec;
stbuf->st_ctime = prstat->st_ctim_sec;
+#ifdef CONFIG_DARWIN
+   stbuf->st_atimespec.tv_nsec = prstat->st_atim_nsec;
+   stbuf->st_mtimespec.tv_nsec = prstat->st_mtim_nsec;
+   stbuf->st_ctimespec.tv_nsec = prstat->st_ctim_nsec;
+#else
+   stbuf->st_atim.tv_nsec = prstat->st_atim_nsec;
+   stbuf->st_mtim.tv_nsec = prstat->st_mtim_nsec;
stbuf->st_ctim.tv_nsec = prstat->st_ctim_nsec;
+#endif
 }
 
 /*
diff --git a/hw/9pfs/9p-synth.c b/hw/9pfs/9p-synth.c
index 54239c9..eb68b42 100644
--- a/hw/9pfs/9p-synth.c
+++ b/hw/9pfs/9p-synth.c
@@ -426,7 +426,9 @@ static int synth_statfs(FsContext *s, V9fsPath *fs_path,
 stbuf->f_bsize = 512;
 stbuf->f_blocks = 0;
 stbuf->f_files = synth_node_count;
+#ifndef CONFIG_DARWIN
 stbuf->f_namelen = NAME_MAX;
+#endif
 return 0;
 }
 
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index a757374..212f569 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -905,11 +905,17 @@ static void stat_to_v9stat_dotl(V9fsState *s, const 
struct stat *stbuf,
 v9lstat->st_blksize = stbuf->st_blksize;
 v9lstat->st_blocks = stbuf->st_blocks;
 v9lstat->st_atime_sec = stbuf->st_atime;
-v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec;
 v9lstat->st_mtime_sec = stbuf->st_mtime;
-v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec;
 v9lstat->st_ctime_sec = stbuf->st_ctime;
+#ifdef CONFIG_DARWIN
+v9lstat->st_atime_nsec = stbuf->st_atimespec.tv_nsec;
+v9lstat->st_mtime_nsec = stbuf->st_mtimespec.tv_nsec;
+v9lstat->st_ctime_nsec = stbuf->st_ctimespec.tv_nsec;
+#else
+v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec;
+v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec;
 v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec;
+#endif
 /* Currently we only support BASIC fields in stat */
 v9lstat->st_result_mask = P9_STATS_BASIC;
 
@@ -2959,9 +2965,15 @@ static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, 
struct

[Qemu-devel] [PATCH v2 09/20] 9p: Properly check/translate flags in unlinkat

2018-05-31 Thread Keno Fischer
This code previously relied on P9_DOTL_AT_REMOVEDIR and AT_REMOVEDIR
having the same numerical value and deferred any errorchecking to the
syscall itself. However, while the former assumption is true on Linux,
it is not true in general. Thus, add appropriate error checking and
translation to the 9p unlinkat server code.

Signed-off-by: Keno Fischer 
---

Changes since v1:
 * Code was moved from 9p-local.c to server entry point in 9p.c

 hw/9pfs/9p.c | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index b80db65..a757374 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -2522,7 +2522,7 @@ static void coroutine_fn v9fs_unlinkat(void *opaque)
 {
 int err = 0;
 V9fsString name;
-int32_t dfid, flags;
+int32_t dfid, flags, rflags = 0;
 size_t offset = 7;
 V9fsPath path;
 V9fsFidState *dfidp;
@@ -2549,6 +2549,15 @@ static void coroutine_fn v9fs_unlinkat(void *opaque)
 goto out_nofid;
 }
 
+if (flags & ~P9_DOTL_AT_REMOVEDIR) {
+err = -EINVAL;
+goto out_nofid;
+}
+
+if (flags & P9_DOTL_AT_REMOVEDIR) {
+rflags |= AT_REMOVEDIR;
+}
+
 dfidp = get_fid(pdu, dfid);
 if (dfidp == NULL) {
 err = -EINVAL;
@@ -2567,7 +2576,7 @@ static void coroutine_fn v9fs_unlinkat(void *opaque)
 if (err < 0) {
 goto out_err;
 }
-err = v9fs_co_unlinkat(pdu, &dfidp->path, &name, flags);
+err = v9fs_co_unlinkat(pdu, &dfidp->path, &name, rflags);
 if (!err) {
 err = offset;
 }
-- 
2.8.1




[Qemu-devel] [PATCH v2 14/20] 9p: darwin: Provide a compatibility definition for XATTR_SIZE_MAX

2018-05-31 Thread Keno Fischer
Signed-off-by: Keno Fischer 
---

No change from v1.

 hw/9pfs/9p.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 70cfab9..24802b9 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -3374,6 +3374,13 @@ out_nofid:
 v9fs_string_free(&name);
 }
 
+#if defined(CONFIG_DARWIN) && !defined(XATTR_SIZE_MAX)
+/* Darwin doesn't seem to define a maximum xattr size in its user
+   user space header, but looking at the kernel source, HFS supports
+   up to INT32_MAX, so use that as the maximum.
+*/
+#define XATTR_SIZE_MAX INT32_MAX
+#endif
 static void coroutine_fn v9fs_xattrcreate(void *opaque)
 {
 int flags;
-- 
2.8.1




[Qemu-devel] [PATCH v2 08/20] 9p: Rename 9p-util -> 9p-util-linux

2018-05-31 Thread Keno Fischer
The current file only has the Linux versions of these functions.
Rename the file accordingly and update the Makefile to only build
it on Linux. A Darwin version of these will follow later in the
series.

Signed-off-by: Keno Fischer 
---

Changes since v1: New patch

 hw/9pfs/9p-util-linux.c | 59 +
 hw/9pfs/9p-util.c   | 59 -
 hw/9pfs/Makefile.objs   |  3 ++-
 3 files changed, 61 insertions(+), 60 deletions(-)
 create mode 100644 hw/9pfs/9p-util-linux.c
 delete mode 100644 hw/9pfs/9p-util.c

diff --git a/hw/9pfs/9p-util-linux.c b/hw/9pfs/9p-util-linux.c
new file mode 100644
index 000..defa3a4
--- /dev/null
+++ b/hw/9pfs/9p-util-linux.c
@@ -0,0 +1,59 @@
+/*
+ * 9p utilities (Linux Implementation)
+ *
+ * Copyright IBM, Corp. 2017
+ *
+ * Authors:
+ *  Greg Kurz 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/xattr.h"
+#include "9p-util.h"
+
+ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name,
+ void *value, size_t size)
+{
+char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+int ret;
+
+ret = lgetxattr(proc_path, name, value, size);
+g_free(proc_path);
+return ret;
+}
+
+ssize_t flistxattrat_nofollow(int dirfd, const char *filename,
+  char *list, size_t size)
+{
+char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+int ret;
+
+ret = llistxattr(proc_path, list, size);
+g_free(proc_path);
+return ret;
+}
+
+ssize_t fremovexattrat_nofollow(int dirfd, const char *filename,
+const char *name)
+{
+char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+int ret;
+
+ret = lremovexattr(proc_path, name);
+g_free(proc_path);
+return ret;
+}
+
+int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name,
+ void *value, size_t size, int flags)
+{
+char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+int ret;
+
+ret = lsetxattr(proc_path, name, value, size, flags);
+g_free(proc_path);
+return ret;
+}
diff --git a/hw/9pfs/9p-util.c b/hw/9pfs/9p-util.c
deleted file mode 100644
index 614b7fc..000
--- a/hw/9pfs/9p-util.c
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * 9p utilities
- *
- * Copyright IBM, Corp. 2017
- *
- * Authors:
- *  Greg Kurz 
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "qemu/xattr.h"
-#include "9p-util.h"
-
-ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name,
- void *value, size_t size)
-{
-char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
-int ret;
-
-ret = lgetxattr(proc_path, name, value, size);
-g_free(proc_path);
-return ret;
-}
-
-ssize_t flistxattrat_nofollow(int dirfd, const char *filename,
-  char *list, size_t size)
-{
-char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
-int ret;
-
-ret = llistxattr(proc_path, list, size);
-g_free(proc_path);
-return ret;
-}
-
-ssize_t fremovexattrat_nofollow(int dirfd, const char *filename,
-const char *name)
-{
-char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
-int ret;
-
-ret = lremovexattr(proc_path, name);
-g_free(proc_path);
-return ret;
-}
-
-int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name,
- void *value, size_t size, int flags)
-{
-char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
-int ret;
-
-ret = lsetxattr(proc_path, name, value, size, flags);
-g_free(proc_path);
-return ret;
-}
diff --git a/hw/9pfs/Makefile.objs b/hw/9pfs/Makefile.objs
index fd90b62..083508f 100644
--- a/hw/9pfs/Makefile.objs
+++ b/hw/9pfs/Makefile.objs
@@ -1,4 +1,5 @@
-common-obj-y  = 9p.o 9p-util.o
+common-obj-y  = 9p.o
+common-obj-$(CONFIG_LINUX) += 9p-util-linux.o
 common-obj-y += 9p-local.o 9p-xattr.o
 common-obj-y += 9p-xattr-user.o 9p-posix-acl.o
 common-obj-y += coth.o cofs.o codir.o cofile.o
-- 
2.8.1




[Qemu-devel] [PATCH v2 13/20] 9p: darwin: Ignore O_{NOATIME, DIRECT}

2018-05-31 Thread Keno Fischer
Darwin doesn't have either of these flags. Darwin does have
F_NOCACHE, which is similar to O_DIRECT, but has different
enough semantics that other projects don't generally map
them automatically. In any case, we don't support O_DIRECT
on Linux at the moment either.

Signed-off-by: Keno Fischer 
---

Changes from v1: Undo accidental formatting change

 hw/9pfs/9p.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 9751246..70cfab9 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -123,11 +123,18 @@ static int dotl_to_open_flags(int flags)
 { P9_DOTL_NONBLOCK, O_NONBLOCK } ,
 { P9_DOTL_DSYNC, O_DSYNC },
 { P9_DOTL_FASYNC, FASYNC },
+#ifndef CONFIG_DARWIN
+{ P9_DOTL_NOATIME, O_NOATIME },
+/* On Darwin, we could map to F_NOCACHE, which is
+   similar, but doesn't quite have the same
+   semantics. However, we don't support O_DIRECT
+   even on linux at the moment, so we just ignore
+   it here. */
 { P9_DOTL_DIRECT, O_DIRECT },
+#endif
 { P9_DOTL_LARGEFILE, O_LARGEFILE },
 { P9_DOTL_DIRECTORY, O_DIRECTORY },
 { P9_DOTL_NOFOLLOW, O_NOFOLLOW },
-{ P9_DOTL_NOATIME, O_NOATIME },
 { P9_DOTL_SYNC, O_SYNC },
 };
 
@@ -156,10 +163,12 @@ static int get_dotl_openflags(V9fsState *s, int oflags)
  */
 flags = dotl_to_open_flags(oflags);
 flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT);
+#ifndef CONFIG_DARWIN
 /*
  * Ignore direct disk access hint until the server supports it.
  */
 flags &= ~O_DIRECT;
+#endif
 return flags;
 }
 
-- 
2.8.1




[Qemu-devel] [PATCH v2 07/20] 9p: Move a couple xattr functions to 9p-util

2018-05-31 Thread Keno Fischer
These functions will need custom implementations on Darwin. Since the
implementation is very similar among all of them, and 9p-util already
has the _nofollow version of fgetxattrat, let's move them all there.

Signed-off-by: Keno Fischer 
---

Changes since v1:
 * fgetxattr_follow is dropped in favor of a different approach
   later in the series.

 hw/9pfs/9p-util.c  | 33 +
 hw/9pfs/9p-util.h  |  4 
 hw/9pfs/9p-xattr.c | 33 -
 3 files changed, 37 insertions(+), 33 deletions(-)

diff --git a/hw/9pfs/9p-util.c b/hw/9pfs/9p-util.c
index f709c27..614b7fc 100644
--- a/hw/9pfs/9p-util.c
+++ b/hw/9pfs/9p-util.c
@@ -24,3 +24,36 @@ ssize_t fgetxattrat_nofollow(int dirfd, const char 
*filename, const char *name,
 g_free(proc_path);
 return ret;
 }
+
+ssize_t flistxattrat_nofollow(int dirfd, const char *filename,
+  char *list, size_t size)
+{
+char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+int ret;
+
+ret = llistxattr(proc_path, list, size);
+g_free(proc_path);
+return ret;
+}
+
+ssize_t fremovexattrat_nofollow(int dirfd, const char *filename,
+const char *name)
+{
+char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+int ret;
+
+ret = lremovexattr(proc_path, name);
+g_free(proc_path);
+return ret;
+}
+
+int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name,
+ void *value, size_t size, int flags)
+{
+char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+int ret;
+
+ret = lsetxattr(proc_path, name, value, size, flags);
+g_free(proc_path);
+return ret;
+}
diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h
index dc0d2e2..79ed6b2 100644
--- a/hw/9pfs/9p-util.h
+++ b/hw/9pfs/9p-util.h
@@ -60,5 +60,9 @@ ssize_t fgetxattrat_nofollow(int dirfd, const char *path, 
const char *name,
  void *value, size_t size);
 int fsetxattrat_nofollow(int dirfd, const char *path, const char *name,
  void *value, size_t size, int flags);
+ssize_t flistxattrat_nofollow(int dirfd, const char *filename,
+  char *list, size_t size);
+ssize_t fremovexattrat_nofollow(int dirfd, const char *filename,
+const char *name);
 
 #endif
diff --git a/hw/9pfs/9p-xattr.c b/hw/9pfs/9p-xattr.c
index d05c1a1..c696d8f 100644
--- a/hw/9pfs/9p-xattr.c
+++ b/hw/9pfs/9p-xattr.c
@@ -60,17 +60,6 @@ ssize_t pt_listxattr(FsContext *ctx, const char *path,
 return name_size;
 }
 
-static ssize_t flistxattrat_nofollow(int dirfd, const char *filename,
- char *list, size_t size)
-{
-char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
-int ret;
-
-ret = llistxattr(proc_path, list, size);
-g_free(proc_path);
-return ret;
-}
-
 /*
  * Get the list and pass to each layer to find out whether
  * to send the data or not
@@ -196,17 +185,6 @@ ssize_t pt_getxattr(FsContext *ctx, const char *path, 
const char *name,
 return local_getxattr_nofollow(ctx, path, name, value, size);
 }
 
-int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name,
- void *value, size_t size, int flags)
-{
-char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
-int ret;
-
-ret = lsetxattr(proc_path, name, value, size, flags);
-g_free(proc_path);
-return ret;
-}
-
 ssize_t local_setxattr_nofollow(FsContext *ctx, const char *path,
 const char *name, void *value, size_t size,
 int flags)
@@ -235,17 +213,6 @@ int pt_setxattr(FsContext *ctx, const char *path, const 
char *name, void *value,
 return local_setxattr_nofollow(ctx, path, name, value, size, flags);
 }
 
-static ssize_t fremovexattrat_nofollow(int dirfd, const char *filename,
-   const char *name)
-{
-char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
-int ret;
-
-ret = lremovexattr(proc_path, name);
-g_free(proc_path);
-return ret;
-}
-
 ssize_t local_removexattr_nofollow(FsContext *ctx, const char *path,
const char *name)
 {
-- 
2.8.1




[Qemu-devel] [PATCH v2 05/20] 9p: Properly set errp in fstatfs error path

2018-05-31 Thread Keno Fischer
In the review of

9p: Avoid warning if FS_IOC_GETVERSION is not defined

Grep Kurz noted this error path was failing to set errp.
Fix that.

Signed-off-by: Keno Fischer 
---

Changes since v1: New patch

 hw/9pfs/9p-local.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index adc169a..576c8e3 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -1420,6 +1420,8 @@ static int local_init(FsContext *ctx, Error **errp)
  */
 if (fstatfs(data->mountfd, &stbuf) < 0) {
 close_preserve_errno(data->mountfd);
+error_setg_errno(errp, errno,
+"failed to stat file system at '%s'", ctx->fs_root);
 goto err;
 }
 switch (stbuf.f_type) {
-- 
2.8.1




[Qemu-devel] [PATCH v2 11/20] 9p: darwin: Handle struct dirent differences

2018-05-31 Thread Keno Fischer
On darwin d_seekoff exists, but is optional and does not seem to
be commonly used by file systems. Use `telldir` instead to obtain
the seek offset.

Signed-off-by: Keno Fischer 
---

Changes since v1:
 * Drop setting d_seekoff in synth_direntry
 * Factor telldir vs d_off logic into v9fs_dent_telldir
 * Error path for telldir failure

 hw/9pfs/9p-synth.c |  2 ++
 hw/9pfs/9p.c   | 36 
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/hw/9pfs/9p-synth.c b/hw/9pfs/9p-synth.c
index eb68b42..a312f8c 100644
--- a/hw/9pfs/9p-synth.c
+++ b/hw/9pfs/9p-synth.c
@@ -221,7 +221,9 @@ static void synth_direntry(V9fsSynthNode *node,
 {
 strcpy(entry->d_name, node->name);
 entry->d_ino = node->attr->inode;
+#ifndef CONFIG_DARWIN
 entry->d_off = off + 1;
+#endif
 }
 
 static struct dirent *synth_get_dentry(V9fsSynthNode *dir,
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 212f569..9751246 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -1738,6 +1738,25 @@ static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, 
V9fsFidState *fidp,
 return offset;
 }
 
+/**
+ * Get the seek offset of a dirent. If not available from the structure itself,
+ * obtain it by calling telldir.
+ */
+static int v9fs_dent_telldir(V9fsPDU *pdu, V9fsFidState *fidp,
+ struct dirent *dent)
+{
+#ifdef CONFIG_DARWIN
+/*
+ * Darwin has d_seekoff, which appears to function similarly to d_off.
+ * However, it does not appear to be supported on all file systems,
+ * so use telldir for correctness.
+ */
+return v9fs_co_telldir(pdu, fidp);
+#else
+return dent->d_off;
+#endif
+}
+
 static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu,
   V9fsFidState *fidp,
   uint32_t max_count)
@@ -1801,7 +1820,11 @@ static int coroutine_fn 
v9fs_do_readdir_with_stat(V9fsPDU *pdu,
 count += len;
 v9fs_stat_free(&v9stat);
 v9fs_path_free(&path);
-saved_dir_pos = dent->d_off;
+saved_dir_pos = v9fs_dent_telldir(pdu, fidp, dent);
+if (saved_dir_pos < 0) {
+err = saved_dir_pos;
+break;
+}
 }
 
 v9fs_readdir_unlock(&fidp->fs.dir);
@@ -1915,7 +1938,7 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, 
V9fsFidState *fidp,
 V9fsString name;
 int len, err = 0;
 int32_t count = 0;
-off_t saved_dir_pos;
+off_t saved_dir_pos, off;
 struct dirent *dent;
 
 /* save the directory position */
@@ -1951,10 +1974,15 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, 
V9fsFidState *fidp,
 /* Fill the other fields with dummy values */
 qid.type = 0;
 qid.version = 0;
+off = v9fs_dent_telldir(pdu, fidp, dent);
+if (off < 0) {
+err = off;
+break;
+}
 
 /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
 len = pdu_marshal(pdu, 11 + count, "Qqbs",
-  &qid, dent->d_off,
+  &qid, off,
   dent->d_type, &name);
 
 v9fs_readdir_unlock(&fidp->fs.dir);
@@ -1966,7 +1994,7 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, 
V9fsFidState *fidp,
 }
 count += len;
 v9fs_string_free(&name);
-saved_dir_pos = dent->d_off;
+saved_dir_pos = off;
 }
 
 v9fs_readdir_unlock(&fidp->fs.dir);
-- 
2.8.1




[Qemu-devel] [PATCH v2 06/20] 9p: Avoid warning if FS_IOC_GETVERSION is not defined

2018-05-31 Thread Keno Fischer
Both `stbuf` and `local_ioc_getversion` where unused when
FS_IOC_GETVERSION was not defined, causing a compiler warning.

Reorgnaize the code to avoid this warning.

Signed-off-by: Keno Fischer 
---

Changes since v1:
 * As request in review, logic is factored into a
   local_ioc_getversion_init function.

 hw/9pfs/9p-local.c | 43 +--
 1 file changed, 25 insertions(+), 18 deletions(-)

diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index 576c8e3..6222891 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -1375,10 +1375,10 @@ static int local_unlinkat(FsContext *ctx, V9fsPath *dir,
 return ret;
 }
 
+#ifdef FS_IOC_GETVERSION
 static int local_ioc_getversion(FsContext *ctx, V9fsPath *path,
 mode_t st_mode, uint64_t *st_gen)
 {
-#ifdef FS_IOC_GETVERSION
 int err;
 V9fsFidOpenState fid_open;
 
@@ -1397,32 +1397,19 @@ static int local_ioc_getversion(FsContext *ctx, 
V9fsPath *path,
 err = ioctl(fid_open.fd, FS_IOC_GETVERSION, st_gen);
 local_close(ctx, &fid_open);
 return err;
-#else
-errno = ENOTTY;
-return -1;
-#endif
 }
+#endif
 
-static int local_init(FsContext *ctx, Error **errp)
+static int local_ioc_getversion_init(FsContext *ctx, LocalData *data)
 {
+#ifdef FS_IOC_GETVERSION
 struct statfs stbuf;
-LocalData *data = g_malloc(sizeof(*data));
 
-data->mountfd = open(ctx->fs_root, O_DIRECTORY | O_RDONLY);
-if (data->mountfd == -1) {
-error_setg_errno(errp, errno, "failed to open '%s'", ctx->fs_root);
-goto err;
-}
-
-#ifdef FS_IOC_GETVERSION
 /*
  * use ioc_getversion only if the ioctl is definied
  */
 if (fstatfs(data->mountfd, &stbuf) < 0) {
-close_preserve_errno(data->mountfd);
-error_setg_errno(errp, errno,
-"failed to stat file system at '%s'", ctx->fs_root);
-goto err;
+return -1;
 }
 switch (stbuf.f_type) {
 case EXT2_SUPER_MAGIC:
@@ -1433,6 +1420,26 @@ static int local_init(FsContext *ctx, Error **errp)
 break;
 }
 #endif
+return 0;
+}
+
+static int local_init(FsContext *ctx, Error **errp)
+{
+LocalData *data = g_malloc(sizeof(*data));
+
+data->mountfd = open(ctx->fs_root, O_DIRECTORY | O_RDONLY);
+if (data->mountfd == -1) {
+error_setg_errno(errp, errno, "failed to open '%s'", ctx->fs_root);
+goto err;
+}
+
+if (local_ioc_getversion_init(ctx, data) < 0) {
+close_preserve_errno(data->mountfd);
+error_setg_errno(errp, errno,
+"failed initialize ioc_getversion for file system at '%s'",
+ctx->fs_root);
+goto err;
+}
 
 if (ctx->export_flags & V9FS_SM_PASSTHROUGH) {
 ctx->xops = passthrough_xattr_ops;
-- 
2.8.1




[Qemu-devel] [PATCH v2 02/20] 9p: proxy: Fix size passed to `connect`

2018-05-31 Thread Keno Fischer
The size to pass to the `connect` call is the size of the entire
`struct sockaddr_un`. Passing anything shorter than this causes errors
on darwin.

Signed-off-by: Keno Fischer 
---

Changes since v1: New patch

 hw/9pfs/9p-proxy.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/hw/9pfs/9p-proxy.c b/hw/9pfs/9p-proxy.c
index e2e0329..47a94e0 100644
--- a/hw/9pfs/9p-proxy.c
+++ b/hw/9pfs/9p-proxy.c
@@ -1088,7 +1088,7 @@ static int proxy_ioc_getversion(FsContext *fs_ctx, 
V9fsPath *path,
 
 static int connect_namedsocket(const char *path, Error **errp)
 {
-int sockfd, size;
+int sockfd;
 struct sockaddr_un helper;
 
 if (strlen(path) >= sizeof(helper.sun_path)) {
@@ -1102,8 +1102,7 @@ static int connect_namedsocket(const char *path, Error 
**errp)
 }
 strcpy(helper.sun_path, path);
 helper.sun_family = AF_UNIX;
-size = strlen(helper.sun_path) + sizeof(helper.sun_family);
-if (connect(sockfd, (struct sockaddr *)&helper, size) < 0) {
+if (connect(sockfd, (struct sockaddr *)&helper, sizeof(helper)) < 0) {
 error_setg_errno(errp, errno, "failed to connect to '%s'", path);
 close(sockfd);
 return -1;
-- 
2.8.1




[Qemu-devel] [PATCH v2 04/20] 9p: linux: Fix a couple Linux assumptions

2018-05-31 Thread Keno Fischer
 - Guard Linux only headers.
 - Add qemu/statfs.h header to abstract over the which
   headers are needed for struct statfs
 - Define `ENOATTR` only if not only defined
   (it's defined in system headers on Darwin).

Signed-off-by: Keno Fischer 
---

Changes since v1:
 * New qemu/statfs.h header to factor out the header selection
   to a common place. I did not write a configure check,
   since the rest of 9p only supports linux/darwin anyway,
   so there didn't seem to be much point, but I can write
   one if requested.
 * Now also covers fsdev/virtfs-proxy-helper.c

 fsdev/file-op-9p.h  |  2 +-
 fsdev/virtfs-proxy-helper.c |  4 +++-
 hw/9pfs/9p-local.c  |  2 ++
 include/qemu/statfs.h   | 19 +++
 include/qemu/xattr.h|  4 +++-
 5 files changed, 28 insertions(+), 3 deletions(-)
 create mode 100644 include/qemu/statfs.h

diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h
index 3fa062b..111f804 100644
--- a/fsdev/file-op-9p.h
+++ b/fsdev/file-op-9p.h
@@ -16,7 +16,7 @@
 
 #include 
 #include 
-#include 
+#include "qemu/statfs.h"
 #include "qemu-fsdev-throttle.h"
 
 #define SM_LOCAL_MODE_BITS0600
diff --git a/fsdev/virtfs-proxy-helper.c b/fsdev/virtfs-proxy-helper.c
index 6f132c5..94fb069 100644
--- a/fsdev/virtfs-proxy-helper.c
+++ b/fsdev/virtfs-proxy-helper.c
@@ -13,17 +13,19 @@
 #include 
 #include 
 #include 
+#ifdef CONFIG_LINUX
 #include 
 #include 
-#include 
 #include 
 #include 
 #ifdef CONFIG_LINUX_MAGIC_H
 #include 
 #endif
+#endif
 #include "qemu-common.h"
 #include "qemu/sockets.h"
 #include "qemu/xattr.h"
+#include "qemu/statfs.h"
 #include "9p-iov-marshal.h"
 #include "hw/9pfs/9p-proxy.h"
 #include "fsdev/9p-iov-marshal.h"
diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index bcf2798..adc169a 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -27,10 +27,12 @@
 #include "qemu/error-report.h"
 #include "qemu/option.h"
 #include 
+#ifdef CONFIG_LINUX
 #include 
 #ifdef CONFIG_LINUX_MAGIC_H
 #include 
 #endif
+#endif
 #include 
 
 #ifndef XFS_SUPER_MAGIC
diff --git a/include/qemu/statfs.h b/include/qemu/statfs.h
new file mode 100644
index 000..dde289f
--- /dev/null
+++ b/include/qemu/statfs.h
@@ -0,0 +1,19 @@
+/*
+ * Host statfs header abstraction
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2, or any
+ * later version.  See the COPYING file in the top-level directory.
+ *
+ */
+#ifndef QEMU_STATFS_H
+#define QEMU_STATFS_H
+
+#ifdef CONFIG_LINUX
+# include 
+#endif
+#ifdef CONFIG_DARWIN
+# include 
+# include 
+#endif
+
+#endif
diff --git a/include/qemu/xattr.h b/include/qemu/xattr.h
index a83fe8e..f1d0f7b 100644
--- a/include/qemu/xattr.h
+++ b/include/qemu/xattr.h
@@ -22,7 +22,9 @@
 #ifdef CONFIG_LIBATTR
 #  include 
 #else
-#  define ENOATTR ENODATA
+#  if !defined(ENOATTR)
+#define ENOATTR ENODATA
+#  endif
 #  include 
 #endif
 
-- 
2.8.1




[Qemu-devel] [PATCH v2 03/20] 9p: xattr: Fix crash due to free of uninitialized value

2018-05-31 Thread Keno Fischer
If the size returned from llistxattr is 0, we skipped the malloc
call, leaving xattr.value uninitialized. However, this value is
later passed to `g_free` without any further checks, causing an
error. Fix that by always calling g_malloc unconditionally. If
`size` is 0, it will return a pointer that is safe to pass to g_free,
likely NULL.

Signed-off-by: Keno Fischer 
---

Changes since v1: New patch

 hw/9pfs/9p.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index d74302d..b80db65 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -3256,8 +3256,8 @@ static void coroutine_fn v9fs_xattrwalk(void *opaque)
 xattr_fidp->fs.xattr.len = size;
 xattr_fidp->fid_type = P9_FID_XATTR;
 xattr_fidp->fs.xattr.xattrwalk_fid = true;
+xattr_fidp->fs.xattr.value = g_malloc0(size);
 if (size) {
-xattr_fidp->fs.xattr.value = g_malloc0(size);
 err = v9fs_co_llistxattr(pdu, &xattr_fidp->path,
  xattr_fidp->fs.xattr.value,
  xattr_fidp->fs.xattr.len);
-- 
2.8.1




[Qemu-devel] [PATCH v2 01/20] cutils: Provide strchrnul

2018-05-31 Thread Keno Fischer
strchrnul is a GNU extension and thus unavailable on a number of targets.
In the review for a commit removing strchrnul from 9p, I was asked to
create a qemu_strchrnul helper to factor out this functionality.
Do so, and use it in a number of other places in the code base that inlined
the replacement pattern in a place where strchrnul could be used.

Signed-off-by: Keno Fischer 
---

Changes since v1: New patch

 hw/9pfs/9p-local.c|  2 +-
 include/qemu/cutils.h |  1 +
 monitor.c |  8 ++--
 util/cutils.c | 13 +
 util/qemu-option.c|  6 +-
 util/uri.c|  6 ++
 6 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index b37b1db..bcf2798 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -65,7 +65,7 @@ int local_open_nofollow(FsContext *fs_ctx, const char *path, 
int flags,
 assert(*path != '/');
 
 head = g_strdup(path);
-c = strchrnul(path, '/');
+c = qemu_strchrnul(path, '/');
 if (*c) {
 /* Intermediate path element */
 head[c - path] = 0;
diff --git a/include/qemu/cutils.h b/include/qemu/cutils.h
index a663340..bc40c30 100644
--- a/include/qemu/cutils.h
+++ b/include/qemu/cutils.h
@@ -122,6 +122,7 @@ int qemu_strnlen(const char *s, int max_len);
  * Returns: the pointer originally in @input.
  */
 char *qemu_strsep(char **input, const char *delim);
+const char *qemu_strchrnul(const char *s, int c);
 time_t mktimegm(struct tm *tm);
 int qemu_fdatasync(int fd);
 int fcntl_setfl(int fd, int flag);
diff --git a/monitor.c b/monitor.c
index 922cfc0..e1f01c4 100644
--- a/monitor.c
+++ b/monitor.c
@@ -798,9 +798,7 @@ static int compare_cmd(const char *name, const char *list)
 p = list;
 for(;;) {
 pstart = p;
-p = strchr(p, '|');
-if (!p)
-p = pstart + strlen(pstart);
+p = qemu_strchrnul(p, '|');
 if ((p - pstart) == len && !memcmp(pstart, name, len))
 return 1;
 if (*p == '\0')
@@ -3401,9 +3399,7 @@ static void cmd_completion(Monitor *mon, const char 
*name, const char *list)
 p = list;
 for(;;) {
 pstart = p;
-p = strchr(p, '|');
-if (!p)
-p = pstart + strlen(pstart);
+p = qemu_strchrnul(p, '|');
 len = p - pstart;
 if (len > sizeof(cmd) - 2)
 len = sizeof(cmd) - 2;
diff --git a/util/cutils.c b/util/cutils.c
index 0de69e6..6e078b0 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -545,6 +545,19 @@ int qemu_strtou64(const char *nptr, const char **endptr, 
int base,
 }
 
 /**
+ * Searches for the first occurrence of 'c' in 's', and returns a pointer
+ * to the trailing null byte if none was found.
+ */
+const char *qemu_strchrnul(const char *s, int c)
+{
+const char *e = strchr(s, c);
+if (!e) {
+e = s + strlen(s);
+}
+return e;
+}
+
+/**
  * parse_uint:
  *
  * @s: String to parse
diff --git a/util/qemu-option.c b/util/qemu-option.c
index 58d1c23..54eca12 100644
--- a/util/qemu-option.c
+++ b/util/qemu-option.c
@@ -77,11 +77,7 @@ const char *get_opt_value(const char *p, char **value)
 
 *value = NULL;
 while (1) {
-offset = strchr(p, ',');
-if (!offset) {
-offset = p + strlen(p);
-}
-
+offset = qemu_strchrnul(p, ',');
 length = offset - p;
 if (*offset != '\0' && *(offset + 1) == ',') {
 length++;
diff --git a/util/uri.c b/util/uri.c
index 8624a7a..8bdef84 100644
--- a/util/uri.c
+++ b/util/uri.c
@@ -52,6 +52,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/cutils.h"
 
 #include "qemu/uri.h"
 
@@ -2266,10 +2267,7 @@ struct QueryParams *query_params_parse(const char *query)
 /* Find the next separator, or end of the string. */
 end = strchr(query, '&');
 if (!end) {
-end = strchr(query, ';');
-}
-if (!end) {
-end = query + strlen(query);
+end = qemu_strchrnul(query, ';');
 }
 
 /* Find the first '=' character between here and end. */
-- 
2.8.1




[Qemu-devel] [PATCH v2 00/20] 9p: Add support for Darwin

2018-05-31 Thread Keno Fischer
This is v2 of my patch series to provide 9p server
support for Darwin.

The patches in this series address review from v1,
now support building the virtfs proxy, as well
as fix bugs found since v1.

Keno Fischer (20):
  cutils: Provide strchrnul
  9p: proxy: Fix size passed to `connect`
  9p: xattr: Fix crash due to free of uninitialized value
  9p: linux: Fix a couple Linux assumptions
  9p: Properly set errp in fstatfs error path
  9p: Avoid warning if FS_IOC_GETVERSION is not defined
  9p: Move a couple xattr functions to 9p-util
  9p: Rename 9p-util -> 9p-util-linux
  9p: Properly error check and translate flags in unlinkat
  9p: darwin: Handle struct stat(fs) differences
  9p: darwin: Handle struct dirent differences
  9p: darwin: Explicitly cast comparisons of mode_t with -1
  9p: darwin: Ignore O_{NOATIME, DIRECT}
  9p: darwin: Provide a compatibility definition for XATTR_SIZE_MAX
  9p: darwin: *xattr_nofollow implementations
  9p: darwin: Compatibility for f/l*xattr
  9p: darwin: Provide a fallback implementation for utimensat
  9p: darwin: Implement compatibility for mknodat
  9p: darwin: virtfs-proxy: Implement setuid code for darwin
  9p: darwin: configure: Allow VirtFS on Darwin

 Makefile|   6 ++
 Makefile.objs   |   1 +
 configure   |  22 +++--
 fsdev/file-op-9p.h  |   2 +-
 fsdev/virtfs-proxy-helper.c | 230 
 hw/9pfs/9p-local.c  |  68 -
 hw/9pfs/9p-proxy.c  |  22 +++--
 hw/9pfs/9p-synth.c  |   4 +
 hw/9pfs/9p-util-darwin.c| 185 +++
 hw/9pfs/9p-util-linux.c |  70 ++
 hw/9pfs/9p-util.c   |  26 -
 hw/9pfs/9p-util.h   |  31 ++
 hw/9pfs/9p-xattr.c  |  33 ---
 hw/9pfs/9p.c|  86 +++--
 hw/9pfs/Makefile.objs   |   4 +-
 include/qemu/cutils.h   |   1 +
 include/qemu/statfs.h   |  19 
 include/qemu/xattr.h|   4 +-
 monitor.c   |   8 +-
 util/cutils.c   |  13 +++
 util/qemu-option.c  |   6 +-
 util/uri.c  |   6 +-
 22 files changed, 636 insertions(+), 211 deletions(-)
 create mode 100644 hw/9pfs/9p-util-darwin.c
 create mode 100644 hw/9pfs/9p-util-linux.c
 delete mode 100644 hw/9pfs/9p-util.c
 create mode 100644 include/qemu/statfs.h

-- 
2.8.1




Re: [Qemu-devel] [PATCH 1/6] gdbstub: Return the fd from gdbserver_start

2018-05-31 Thread Richard Henderson
On 05/31/2018 04:15 PM, Philippe Mathieu-Daudé wrote:
> Hi Richard,
> 
> On 05/31/2018 07:49 PM, Richard Henderson wrote:
>> This will allow us to protect gdbserver_fd from the guest.
>>
>> Signed-off-by: Richard Henderson 
>> ---
>>  gdbstub.c | 5 +++--
>>  1 file changed, 3 insertions(+), 2 deletions(-)
>>
>> diff --git a/gdbstub.c b/gdbstub.c
>> index 6081e719c5..057d0d65c5 100644
>> --- a/gdbstub.c
>> +++ b/gdbstub.c
>> @@ -1890,15 +1890,16 @@ static int gdbserver_open(int port)
>>  int gdbserver_start(int port)
>>  {
>>  gdbserver_fd = gdbserver_open(port);
>> -if (gdbserver_fd < 0)
>> +if (gdbserver_fd < 0) {
>>  return -1;
>> +}
>>  /* accept connections */
>>  if (!gdb_accept()) {
>>  close(gdbserver_fd);
>>  gdbserver_fd = -1;
>>  return -1;
>>  }
>> -return 0;
>> +return gdbserver_fd;
> 
> I agree with your change, but what about !CONFIG_USER_ONLY?

It's still a non-negative number, so the success value is still the same.


r~



Re: [Qemu-devel] [PATCH v3 0/4] Add new CD-ROM related qtests

2018-05-31 Thread John Snow



On 04/27/2018 05:40 AM, Thomas Huth wrote:
> With one of my clean-up patches (see commit 1454509726719e0933c800), I
> recently accidentially broke the "-cdrom" parameter (more precisely
> "-drive if=scsi") on a couple of boards, since there was no error
> detected during the "make check" regression testing. This is clearly an
> indication that we are lacking tests in this area.
> So this small patch series now introduces some tests for CD-ROM drives:
> The first two patches introduce the possibility to check that booting
> from CD-ROM drives still works fine for x86 and s390x, and the third
> patch adds a test that certain machines can at least still be started
> with the "-cdrom" parameter (i.e. that test would have catched the
> mistake that I did with my SCSI cleanup patch).
> 
> v3:
>  - Rebased to current master branch
>  - Add a final patch that adds an entry to the MAINTAINERS file
> 
> v2:
>  - Use g_spawn_sync() instead of execlp() to run genisoimage
>  - The "-cdrom" parameter test is now run on all architectures (with
>machine "none" for the machines that are not explicitly checked)
>  - Some rewordings and improved comments here and there
> 
> Thomas Huth (4):
>   tests/boot-sector: Add magic bytes to s390x boot code header
>   tests/cdrom-test: Test booting from CD-ROM ISO image file
>   tests/cdrom-test: Test that -cdrom parameter is working
>   MAINTAINERS: Add the cdrom-test to John's section
> 
>  MAINTAINERS|   1 +
>  tests/Makefile.include |   2 +
>  tests/boot-sector.c|   9 +-
>  tests/cdrom-test.c | 222 
> +
>  4 files changed, 231 insertions(+), 3 deletions(-)
>  create mode 100644 tests/cdrom-test.c
> 

*coughs*

Thanks, applied to my IDE tree:

https://github.com/jnsnow/qemu/commits/ide
https://github.com/jnsnow/qemu.git

--js



Re: [Qemu-devel] [PATCH v2 00/16] AHCI: tracing improvements

2018-05-31 Thread John Snow



On 05/31/2018 06:28 PM, John Snow wrote:
> This set just adds register names so that the read/write traces make
> more sense on their own without having to memorize register offsets.
> It also splits read/write traces into supported/unsupported subsets,
> so you can just monitor for things that QEMU is likely doing wrong.
> 
> v2:
>  - Added qemu_log_mask(LOG_UNIMP, ...) statements in addition to traces
>for writes to unknown/unsupported registers. (Philippe)
> 
> John Snow (16):
>   ahci: add port register enumeration
>   ahci: modify ahci_port_read to use register numbers
>   ahci: make port read traces more descriptive
>   ahci: fix spacing damage on ahci_port_write
>   ahci: combine identical clauses in port write
>   ahci: modify ahci_port_write to use register numbers
>   ahci: make port write traces more descriptive
>   ahci: delete old port register address definitions
>   ahci: add host register enumeration
>   ahci: fix host register max address
>   ahci: modify ahci_mem_read_32 to work on register numbers
>   ahci: make mem_read_32 traces more descriptive
>   ahci: fix spacing damage on ahci_mem_write
>   ahci: adjust ahci_mem_write to work on registers
>   ahci: delete old host register address definitions
>   ahci: make ahci_mem_write traces more descriptive
> 
>  hw/ide/ahci.c  | 314 
> ++---
>  hw/ide/ahci_internal.h |  63 ++
>  hw/ide/trace-events|  13 +-
>  3 files changed, 241 insertions(+), 149 deletions(-)
> 

Touched up the format strings, and

Thanks, applied to my IDE tree:

https://github.com/jnsnow/qemu/commits/ide
https://github.com/jnsnow/qemu.git

--js



Re: [Qemu-devel] [PATCH 0/3] ahci: fix completion race condition

2018-05-31 Thread John Snow



On 05/30/2018 08:43 PM, John Snow wrote:
> Commit d759c951f changed the main thread lock release/reacquisition,
> and in so doing apparently jostled loose a race condition in the AHCI
> code.
> 
> Patch 2 should be sufficient to fix this, and patches 1 and 3 are just
> little trivial fixes.
> 
> This might be sufficient to fix the bug as reported at
> https://bugs.launchpad.net/qemu/+bug/1769189
> but the nature of the timing changes make it difficult to confirm,
> so I am posting this patchset for the reporters to help test.
> 
> John Snow (3):
>   ahci: trim signatures on raise/lower
>   ahci: fix PxCI register race
>   ahci: don't schedule unnecessary BH
> 
>  hw/ide/ahci.c | 24 +++-
>  1 file changed, 11 insertions(+), 13 deletions(-)
> 

Thanks for the testing and reviews, everyone!

Thanks, applied to my IDE tree:

https://github.com/jnsnow/qemu/commits/ide
https://github.com/jnsnow/qemu.git

--js



Re: [Qemu-devel] [PATCH v2 00/16] AHCI: tracing improvements

2018-05-31 Thread John Snow



On 05/31/2018 06:49 PM, no-re...@patchew.org wrote:
> Hi,
> 
> This series failed docker-mingw@fedora build test. Please find the testing 
> commands and
> their output below. If you have Docker installed, you can probably reproduce 
> it
> locally.
> 

[ blah blah blah ]

> In file included from /tmp/qemu-test/src/hw/ide/ahci.c:30:0:
> /tmp/qemu-test/src/hw/ide/ahci.c: In function 'ahci_mem_write':
> /tmp/qemu-test/src/hw/ide/ahci.c:497:38: error: format '%lx' expects argument 
> of type 'long unsigned int', but argument 3 has type 'hwaddr {aka long long 
> unsigned int}' [-Werror=format=]
>  qemu_log_mask(LOG_UNIMP, "Attempted write to unimplemented 
> register:"
>   ^
> /tmp/qemu-test/src/include/qemu/log.h:85:22: note: in definition of macro 
> 'qemu_log_mask'
>  qemu_log(FMT, ## __VA_ARGS__);  \
>   ^~~
> /tmp/qemu-test/src/hw/ide/ahci.c:498:63: note: format string is defined here
>" AHCI host register %s, offset 0x%lx: 0x%"PRIu64,
>  ~~^
>  %llx
> In file included from /tmp/qemu-test/src/hw/ide/ahci.c:30:0:
> /tmp/qemu-test/src/hw/ide/ahci.c:511:34: error: format '%lx' expects argument 
> of type 'long unsigned int', but argument 2 has type 'hwaddr {aka long long 
> unsigned int}' [-Werror=format=]
>  qemu_log_mask(LOG_UNIMP, "Attempted write to unimplemented register: 
> "
>   ^
> /tmp/qemu-test/src/include/qemu/log.h:85:22: note: in definition of macro 
> 'qemu_log_mask'
>  qemu_log(FMT, ## __VA_ARGS__);  \
>   ^~~
> /tmp/qemu-test/src/hw/ide/ahci.c:512:59: note: format string is defined here
>"AHCI global register at offset 0x%lx: 0x%"PRIu64,
>  ~~^
>  %llx

Rookie stuff; I forgot hwaddr was just uint64_t, which is implemented as
long on x86_64. Replaced address and value specifications both with
"0x%"PRIx64 in both cases.

--js



Re: [Qemu-devel] [PATCH v2 00/16] AHCI: tracing improvements

2018-05-31 Thread John Snow



On 05/31/2018 06:47 PM, no-re...@patchew.org wrote:
> Hi,
> 
> This series seems to have some coding style problems. See output below for
> more information:
> 
> Type: series
> Message-id: 20180531222835.16558-1-js...@redhat.com
> Subject: [Qemu-devel] [PATCH v2 00/16] AHCI: tracing improvements
> 
> === TEST SCRIPT BEGIN ===
> #!/bin/bash
> 
> BASE=base
> n=1
> total=$(git log --oneline $BASE.. | wc -l)
> failed=0
> 
> git config --local diff.renamelimit 0
> git config --local diff.renames True
> git config --local diff.algorithm histogram
> 
> commits="$(git log --format=%H --reverse $BASE..)"
> for c in $commits; do
> echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
> if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; 
> then
> failed=1
> echo
> fi
> n=$((n+1))
> done
> 
> exit $failed
> === TEST SCRIPT END ===
> 
> Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
> From https://github.com/patchew-project/qemu
>  t [tag update]patchew/20180531212435.165261-1-ebl...@redhat.com 
> -> patchew/20180531212435.165261-1-ebl...@redhat.com
>  * [new tag]   patchew/20180531222835.16558-1-js...@redhat.com -> 
> patchew/20180531222835.16558-1-js...@redhat.com
> Switched to a new branch 'test'
> 9932482af2 ahci: make ahci_mem_write traces more descriptive
> c89f4076bd ahci: delete old host register address definitions
> 70f11b4d95 ahci: adjust ahci_mem_write to work on registers
> c13dce7973 ahci: fix spacing damage on ahci_mem_write
> fab5307968 ahci: make mem_read_32 traces more descriptive
> ace1b4ffac ahci: modify ahci_mem_read_32 to work on register numbers
> 3021a82d31 ahci: fix host register max address
> 3c4808a257 ahci: add host register enumeration
> f57758869b ahci: delete old port register address definitions
> 52d04d9b2c ahci: make port write traces more descriptive
> 08f435fad3 ahci: modify ahci_port_write to use register numbers
> 634c79d46b ahci: combine identical clauses in port write
> 50b162ae60 ahci: fix spacing damage on ahci_port_write
> 5895ec8f28 ahci: make port read traces more descriptive
> 68fd051a9a ahci: modify ahci_port_read to use register numbers
> 0478088585 ahci: add port register enumeration
> 
> === OUTPUT BEGIN ===
> Checking PATCH 1/16: ahci: add port register enumeration...
> WARNING: line over 80 characters
> #71: FILE: hw/ide/ahci_internal.h:94:
> +AHCI_PORT_REG_SCR_NOTIF   = 15, /* PxSNTF: SATA phy register: 
> SNotification */
> 
> total: 0 errors, 1 warnings, 72 lines checked
> 
> Your patch has style problems, please review.  If any of these errors
> are false positives report them to the maintainer, see
> CHECKPATCH in MAINTAINERS.
> Checking PATCH 2/16: ahci: modify ahci_port_read to use register numbers...
> Checking PATCH 3/16: ahci: make port read traces more descriptive...
> Checking PATCH 4/16: ahci: fix spacing damage on ahci_port_write...
> ERROR: spaces required around that '|' (ctx:VxV)
> #83: FILE: hw/ide/ahci.c:316:
> +(val & ~(PORT_CMD_RO_MASK|PORT_CMD_ICC_MASK));

I'll fix that in this patch.

>   ^
> 
> total: 1 errors, 0 warnings, 156 lines checked
> 
> Your patch has style problems, please review.  If any of these errors
> are false positives report them to the maintainer, see
> CHECKPATCH in MAINTAINERS.
> 
> Checking PATCH 5/16: ahci: combine identical clauses in port write...
> Checking PATCH 6/16: ahci: modify ahci_port_write to use register numbers...
> Checking PATCH 7/16: ahci: make port write traces more descriptive...
> Checking PATCH 8/16: ahci: delete old port register address definitions...
> Checking PATCH 9/16: ahci: add host register enumeration...
> Checking PATCH 10/16: ahci: fix host register max address...
> Checking PATCH 11/16: ahci: modify ahci_mem_read_32 to work on register 
> numbers...
> Checking PATCH 12/16: ahci: make mem_read_32 traces more descriptive...
> Checking PATCH 13/16: ahci: fix spacing damage on ahci_mem_write...
> Checking PATCH 14/16: ahci: adjust ahci_mem_write to work on registers...
> Checking PATCH 15/16: ahci: delete old host register address definitions...
> Checking PATCH 16/16: ahci: make ahci_mem_write traces more descriptive...
> WARNING: line over 80 characters
> #18: FILE: hw/ide/ahci.c:497:
> +qemu_log_mask(LOG_UNIMP, "Attempted write to unimplemented 
> register:"
> 

TIL that my emacs configuration considers column 80 in-bounds.
Fixed.

> total: 0 errors, 1 warnings, 35 lines checked
> 
> Your patch has style problems, please review.  If any of these errors
> are false positives report them to the maintainer, see
> CHECKPATCH in MAINTAINERS.
> === OUTPUT END ===
> 
> Test command exited with code: 1
> 
> 
> ---
> Email generated automatically by Patchew [http://patchew.org/].
> Please send your feedback to patchew-de...@redhat.com
> 

You're a harsh master, patchew.

--js



Re: [Qemu-devel] [PATCH v2 00/16] AHCI: tracing improvements

2018-05-31 Thread John Snow



On 05/31/2018 07:24 PM, Philippe Mathieu-Daudé wrote:
> On 05/31/2018 07:28 PM, John Snow wrote:
>> This set just adds register names so that the read/write traces make
>> more sense on their own without having to memorize register offsets.
>> It also splits read/write traces into supported/unsupported subsets,
>> so you can just monitor for things that QEMU is likely doing wrong.
>>
>> v2:
>>  - Added qemu_log_mask(LOG_UNIMP, ...) statements in addition to traces
>>for writes to unknown/unsupported registers. (Philippe)
> 
> Thanks,
> 
> With format string and style issues reported by checkpatch fixed:
> Reviewed-by: Philippe Mathieu-Daudé 
> 
> (I recommend you to setup the scripts/git.orderfile to ease reviews).
> 

Ah, yeah, my environment is kind of broken at the moment, sorry :(

Just haven't had the time to figure out why git-publish is broken so I'm
missing a few of my usual pieces.

--js



Re: [Qemu-devel] [PATCH v2 00/16] AHCI: tracing improvements

2018-05-31 Thread Philippe Mathieu-Daudé
On 05/31/2018 07:28 PM, John Snow wrote:
> This set just adds register names so that the read/write traces make
> more sense on their own without having to memorize register offsets.
> It also splits read/write traces into supported/unsupported subsets,
> so you can just monitor for things that QEMU is likely doing wrong.
> 
> v2:
>  - Added qemu_log_mask(LOG_UNIMP, ...) statements in addition to traces
>for writes to unknown/unsupported registers. (Philippe)

Thanks,

With format string and style issues reported by checkpatch fixed:
Reviewed-by: Philippe Mathieu-Daudé 

(I recommend you to setup the scripts/git.orderfile to ease reviews).

> 
> John Snow (16):
>   ahci: add port register enumeration
>   ahci: modify ahci_port_read to use register numbers
>   ahci: make port read traces more descriptive
>   ahci: fix spacing damage on ahci_port_write
>   ahci: combine identical clauses in port write
>   ahci: modify ahci_port_write to use register numbers
>   ahci: make port write traces more descriptive
>   ahci: delete old port register address definitions
>   ahci: add host register enumeration
>   ahci: fix host register max address
>   ahci: modify ahci_mem_read_32 to work on register numbers
>   ahci: make mem_read_32 traces more descriptive
>   ahci: fix spacing damage on ahci_mem_write
>   ahci: adjust ahci_mem_write to work on registers
>   ahci: delete old host register address definitions
>   ahci: make ahci_mem_write traces more descriptive
> 
>  hw/ide/ahci.c  | 314 
> ++---
>  hw/ide/ahci_internal.h |  63 ++
>  hw/ide/trace-events|  13 +-
>  3 files changed, 241 insertions(+), 149 deletions(-)
> 



Re: [Qemu-devel] [PATCH 11/13] 9p: darwin: Mark mknod as unsupported

2018-05-31 Thread Keno Fischer
On Thu, May 31, 2018 at 7:06 PM, Keno Fischer  wrote:
> On Thu, May 31, 2018 at 6:56 PM, Keno Fischer  wrote:
 My concern was that allowing this would cause unexpected
 behavior, since the device numbers will differ between OS X
 and Linux. Though maybe this isn't the place to worry about
 that.
>>>
>>> The numbers may differ indeed but we don't really care since the
>>> server never opens device files. This is just a directory entry.
>>
>> Ok, let me try to implement it. However, I don't think it is possible
>> to implement mknodat (or at least I can't think of how) on Darwin
>> directly. I could use regular mknod, but presumably, this is used
>> to avoid a race condition between creating the device and setting
>> the permissions. Can you think of a good way to resolve that?
>
> Would it work to fchdir in to the directory and use a cwd-relative
> mknod, then fchdir back? Or do we need to maintain the cwd
> while in this code?

Sorry for the triple-self-post here, but I took a look at the Darwin kernel
source and there's an unexposed (from the Darwin C library) syscall that
only changes the current thread's cwd. That seems like it should be safe,
so I'll go ahead and use that to implement mknodat. I'll also submit a
feature request to apple to implement mknodat.



Re: [Qemu-devel] [PATCH 1/6] gdbstub: Return the fd from gdbserver_start

2018-05-31 Thread Philippe Mathieu-Daudé
Hi Richard,

On 05/31/2018 07:49 PM, Richard Henderson wrote:
> This will allow us to protect gdbserver_fd from the guest.
> 
> Signed-off-by: Richard Henderson 
> ---
>  gdbstub.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/gdbstub.c b/gdbstub.c
> index 6081e719c5..057d0d65c5 100644
> --- a/gdbstub.c
> +++ b/gdbstub.c
> @@ -1890,15 +1890,16 @@ static int gdbserver_open(int port)
>  int gdbserver_start(int port)
>  {
>  gdbserver_fd = gdbserver_open(port);
> -if (gdbserver_fd < 0)
> +if (gdbserver_fd < 0) {
>  return -1;
> +}
>  /* accept connections */
>  if (!gdb_accept()) {
>  close(gdbserver_fd);
>  gdbserver_fd = -1;
>  return -1;
>  }
> -return 0;
> +return gdbserver_fd;

I agree with your change, but what about !CONFIG_USER_ONLY?

It should be safe enough documenting the different behaviors in
include/exec/gdbstub.h.

>  }
>  
>  /* Disable gdb stub for child processes.  */
> 



Re: [Qemu-devel] [PATCH v1 4/8] docker: update Travis docker image

2018-05-31 Thread Philippe Mathieu-Daudé
On 05/31/2018 05:14 PM, Alex Bennée wrote:
> 
> Philippe Mathieu-Daudé  writes:
> 
>> Hi Alex,
>>
>> On 05/30/2018 08:06 AM, Alex Bennée wrote:
>>> This is still poorly documented by Travis but according to:
>>>
>>>   
>>> https://docs.travis-ci.com/user/common-build-problems/#Running-a-Container-Based-Docker-Image-Locally
>>>
>>> their reference images are now hosted on Docker Hub. So we update the
>>> FROM line to refer to the new default image. We also need a few
>>> additional tweaks:
>>>
>>>   - re-enable deb-src lines for our build-dep install
>>>   - add explicit PATH definition for tools
>>
>> I don't understand how this is related to QEMU testing, isn't it rather
>> some Travis-ci bug? We don't need to use PhantomJS / Neo4j / Maven.
> 
> It's oddly constructed I'll grant you but I just set the path to what a
> running image has. The normal image is started up with a full systemd
> init whereas we drop directly into the shell.

OK, can you add a comment about it? (so we don't remove what seems
unrelated).

> 
>>
>>>   - force the build USER to be Travis
>>>   - add clang to FEATURES for our test-clang machinery
>>>
>>> Signed-off-by: Alex Bennée 
>>> ---
>>>  tests/docker/dockerfiles/travis.docker | 7 +--
>>>  1 file changed, 5 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/tests/docker/dockerfiles/travis.docker 
>>> b/tests/docker/dockerfiles/travis.docker
>>> index 605b6e429b..6e90f033d5 100644
>>> --- a/tests/docker/dockerfiles/travis.docker
>>> +++ b/tests/docker/dockerfiles/travis.docker
>>> @@ -1,8 +1,11 @@
>>> -FROM quay.io/travisci/travis-ruby
>>> +FROM travisci/ci-garnet:packer-1512502276-986baf0
>>>  ENV DEBIAN_FRONTEND noninteractive
>>>  ENV LANG en_US.UTF-8
>>>  ENV LC_ALL en_US.UTF-8
>>> +RUN cat /etc/apt/sources.list | sed "s/# deb-src/deb-src/" >> 
>>> /etc/apt/sources.list
>>>  RUN apt-get update
>>>  RUN apt-get -y build-dep qemu
>>>  RUN apt-get -y install device-tree-compiler python2.7 python-yaml 
>>> dh-autoreconf gdb strace lsof net-tools
>>> -ENV FEATURES pyyaml

# Travis tools require PhantomJS / Neo4j / Maven accessible
# in their PATH (QEMU build won't access them).

Reviewed-by: Philippe Mathieu-Daudé 

>>> +ENV PATH 
>>> /usr/local/phantomjs/bin:/usr/local/phantomjs:/usr/local/neo4j-3.2.7/bin:/usr/local/maven-3.5.2/bin:/usr/local/cmake-3.9.2/bin:/usr/local/clang-5.0.0/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
>>> +ENV FEATURES clang pyyaml
>>> +USER travis
>>>
> 
> 
> --
> Alex Bennée
> 



Re: [Qemu-devel] [PATCH 11/13] 9p: darwin: Mark mknod as unsupported

2018-05-31 Thread Keno Fischer
On Thu, May 31, 2018 at 6:56 PM, Keno Fischer  wrote:
>>> My concern was that allowing this would cause unexpected
>>> behavior, since the device numbers will differ between OS X
>>> and Linux. Though maybe this isn't the place to worry about
>>> that.
>>
>> The numbers may differ indeed but we don't really care since the
>> server never opens device files. This is just a directory entry.
>
> Ok, let me try to implement it. However, I don't think it is possible
> to implement mknodat (or at least I can't think of how) on Darwin
> directly. I could use regular mknod, but presumably, this is used
> to avoid a race condition between creating the device and setting
> the permissions. Can you think of a good way to resolve that?

Would it work to fchdir in to the directory and use a cwd-relative
mknod, then fchdir back? Or do we need to maintain the cwd
while in this code?



Re: [Qemu-devel] [PATCH v5 0/6] linux-user: Reorg interp_prefix handling

2018-05-31 Thread no-reply
Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20180531224911.23725-1-richard.hender...@linaro.org
Subject: [Qemu-devel] [PATCH v5 0/6] linux-user: Reorg interp_prefix handling

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 * [new tag]   
patchew/20180531224911.23725-1-richard.hender...@linaro.org -> 
patchew/20180531224911.23725-1-richard.hender...@linaro.org
Switched to a new branch 'test'
2755fa1986 linux-user: Use *at functions to implement interp_prefix
8af5f75489 linux-user: Check is_hostfd in mmap syscalls
83374c532a linux-user: Check contains_hostfd in select syscalls
41b80ec144 linux-user: Check is_hostfd in do_syscall
fb3aa3a0d8 linux-user: Add host_fds and manipulators
11567891c5 gdbstub: Return the fd from gdbserver_start

=== OUTPUT BEGIN ===
Checking PATCH 1/6: gdbstub: Return the fd from gdbserver_start...
Checking PATCH 2/6: linux-user: Add host_fds and manipulators...
Checking PATCH 3/6: linux-user: Check is_hostfd in do_syscall...
Checking PATCH 4/6: linux-user: Check contains_hostfd in select syscalls...
Checking PATCH 5/6: linux-user: Check is_hostfd in mmap syscalls...
Checking PATCH 6/6: linux-user: Use *at functions to implement interp_prefix...
ERROR: do not use assignment in if condition
#200: FILE: linux-user/syscall.c:8541:
+if (!(fn = lock_user_string(arg2))) {

ERROR: do not use assignment in if condition
#218: FILE: linux-user/syscall.c:8562:
+if (!(fn = lock_user_string(arg1))) {

ERROR: do not use assignment in if condition
#236: FILE: linux-user/syscall.c:8577:
+if (!(fn = lock_user_string(arg2))) {

ERROR: do not use assignment in if condition
#254: FILE: linux-user/syscall.c:8735:
+if (!(fn = lock_user_string(arg1))) {

ERROR: do not use assignment in if condition
#333: FILE: linux-user/syscall.c:9769:
+if (!(fn = lock_user_string(arg1))) {

ERROR: do not use assignment in if condition
#349: FILE: linux-user/syscall.c:9809:
+if (!(fn = lock_user_string(arg1))) {

ERROR: do not use assignment in if condition
#365: FILE: linux-user/syscall.c:10099:
+if (!(fn = lock_user_string(arg1))) {

ERROR: do not use assignment in if condition
#380: FILE: linux-user/syscall.c:10111:
+if (!(fn = lock_user_string(arg1))) {

ERROR: do not use assignment in if condition
#398: FILE: linux-user/syscall.c:11303:
+if (!(fn = lock_user_string(arg1))) {

ERROR: do not use assignment in if condition
#415: FILE: linux-user/syscall.c:11317:
+if (!(fn = lock_user_string(arg1))) {

ERROR: do not use assignment in if condition
#433: FILE: linux-user/syscall.c:11349:
+if (!(fn = lock_user_string(arg2))) {

ERROR: do not use assignment in if condition
#452: FILE: linux-user/syscall.c:12394:
+if (!(fn = lock_user_string(arg2))) {

ERROR: do not use assignment in if condition
#472: FILE: linux-user/syscall.c:12433:
+if (!(fn = lock_user_string(arg2))) {

total: 13 errors, 0 warnings, 423 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

=== OUTPUT END ===

Test command exited with code: 1


---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [Qemu-devel] [PATCHv2] slirp: Fix spurious error report when sending directly

2018-05-31 Thread Philippe Mathieu-Daudé
On 05/31/2018 05:31 PM, Samuel Thibault wrote:
> Move check to where it actually is useful, and reduce scope of 'len'
> variable along the way.
> 
> Signed-off-by: Samuel Thibault 

Thanks!

Reviewed-by: Philippe Mathieu-Daudé 

> ---
> 
> Difference from v1:
> - move check instead of initializing len.
> 
>  slirp/socket.c | 14 +++---
>  1 file changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/slirp/socket.c b/slirp/socket.c
> index e2a71c9b04..08fe98907d 100644
> --- a/slirp/socket.c
> +++ b/slirp/socket.c
> @@ -340,7 +340,7 @@ sosendoob(struct socket *so)
>   struct sbuf *sb = &so->so_rcv;
>   char buff[2048]; /* XXX Shouldn't be sending more oob data than this */
>  
> - int n, len;
> + int n;
>  
>   DEBUG_CALL("sosendoob");
>   DEBUG_ARG("so = %p", so);
> @@ -359,7 +359,7 @@ sosendoob(struct socket *so)
>* send it all
>*/
>   uint32_t urgc = so->so_urgc;
> - len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
> + int len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
>   if (len > urgc) {
>   len = urgc;
>   }
> @@ -374,13 +374,13 @@ sosendoob(struct socket *so)
>   len += n;
>   }
>   n = slirp_send(so, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */
> - }
> -
>  #ifdef DEBUG
> - if (n != len) {
> - DEBUG_ERROR((dfd, "Didn't send all data urgently X\n"));
> - }
> + if (n != len) {
> + DEBUG_ERROR((dfd, "Didn't send all data urgently 
> X\n"));
> + }
>  #endif
> + }
> +
>   if (n < 0) {
>   return n;
>   }
> 



Re: [Qemu-devel] [PATCH 11/13] 9p: darwin: Mark mknod as unsupported

2018-05-31 Thread Keno Fischer
>> My concern was that allowing this would cause unexpected
>> behavior, since the device numbers will differ between OS X
>> and Linux. Though maybe this isn't the place to worry about
>> that.
>
> The numbers may differ indeed but we don't really care since the
> server never opens device files. This is just a directory entry.

Ok, let me try to implement it. However, I don't think it is possible
to implement mknodat (or at least I can't think of how) on Darwin
directly. I could use regular mknod, but presumably, this is used
to avoid a race condition between creating the device and setting
the permissions. Can you think of a good way to resolve that?



[Qemu-devel] [PATCH 3/6] linux-user: Check is_hostfd in do_syscall

2018-05-31 Thread Richard Henderson
This is the vast majority of all fd inputs, but not all.

Signed-off-by: Richard Henderson 
---
 linux-user/syscall.c | 303 +++
 1 file changed, 280 insertions(+), 23 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index d02c16bbc6..5339f0bc1c 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -8034,6 +8034,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 if (arg3 == 0)
 ret = 0;
 else {
+if (is_hostfd(arg1)) {
+goto ebadf;
+}
 if (!(p = lock_user(VERIFY_WRITE, arg2, arg3, 0)))
 goto efault;
 ret = get_errno(safe_read(arg1, p, arg3));
@@ -8045,6 +8048,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 }
 break;
 case TARGET_NR_write:
+if (is_hostfd(arg1)) {
+goto ebadf;
+}
 if (!(p = lock_user(VERIFY_READ, arg2, arg3, 1)))
 goto efault;
 if (fd_trans_target_to_host_data(arg1)) {
@@ -8072,6 +8078,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 break;
 #endif
 case TARGET_NR_openat:
+if (is_hostfd(arg1)) {
+goto ebadf;
+}
 if (!(p = lock_user_string(arg2)))
 goto efault;
 ret = get_errno(do_openat(cpu_env, arg1, p,
@@ -8082,16 +8091,25 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
arg1,
 break;
 #if defined(TARGET_NR_name_to_handle_at) && defined(CONFIG_OPEN_BY_HANDLE)
 case TARGET_NR_name_to_handle_at:
+if (is_hostfd(arg1)) {
+goto ebadf;
+}
 ret = do_name_to_handle_at(arg1, arg2, arg3, arg4, arg5);
 break;
 #endif
 #if defined(TARGET_NR_open_by_handle_at) && defined(CONFIG_OPEN_BY_HANDLE)
 case TARGET_NR_open_by_handle_at:
+if (is_hostfd(arg1)) {
+goto ebadf;
+}
 ret = do_open_by_handle_at(arg1, arg2, arg3);
 fd_trans_unregister(ret);
 break;
 #endif
 case TARGET_NR_close:
+if (is_hostfd(arg1)) {
+goto ebadf;
+}
 fd_trans_unregister(arg1);
 ret = get_errno(close(arg1));
 break;
@@ -8155,7 +8173,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
 #if defined(TARGET_NR_linkat)
 case TARGET_NR_linkat:
-{
+if (is_hostfd(arg1)) {
+goto ebadf;
+} else {
 void * p2 = NULL;
 if (!arg2 || !arg4)
 goto efault;
@@ -8180,6 +8200,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
 #if defined(TARGET_NR_unlinkat)
 case TARGET_NR_unlinkat:
+if (is_hostfd(arg1)) {
+goto ebadf;
+}
 if (!(p = lock_user_string(arg2)))
 goto efault;
 ret = get_errno(unlinkat(arg1, p, arg3));
@@ -8311,6 +8334,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
 #if defined(TARGET_NR_mknodat)
 case TARGET_NR_mknodat:
+if (is_hostfd(arg1)) {
+goto ebadf;
+}
 if (!(p = lock_user_string(arg2)))
 goto efault;
 ret = get_errno(mknodat(arg1, p, arg3, arg4));
@@ -8334,6 +8360,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 goto unimplemented;
 #endif
 case TARGET_NR_lseek:
+if (is_hostfd(arg1)) {
+goto ebadf;
+}
 ret = get_errno(lseek(arg1, arg2, arg3));
 break;
 #if defined(TARGET_NR_getxpid) && defined(TARGET_ALPHA)
@@ -8484,7 +8513,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
 #if defined(TARGET_NR_futimesat)
 case TARGET_NR_futimesat:
-{
+if (is_hostfd(arg1)) {
+goto ebadf;
+} else {
 struct timeval *tvp, tv[2];
 if (arg3) {
 if (copy_from_user_timeval(&tv[0], arg3)
@@ -8520,6 +8551,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
 #if defined(TARGET_NR_faccessat) && defined(__NR_faccessat)
 case TARGET_NR_faccessat:
+if (is_hostfd(arg1)) {
+goto ebadf;
+}
 if (!(p = lock_user_string(arg2)))
 goto efault;
 ret = get_errno(faccessat(arg1, p, arg3, 0));
@@ -8564,7 +8598,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
 #if defined(TARGET_NR_renameat)
 case TARGET_NR_renameat:
-{
+if (is_hostfd(arg1)) {
+goto ebadf;
+} else {
 void *p2;
 p  = lock_user_string(arg2);
 p2 = lock_user_string(arg4);
@@ -8579,7 +8615,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
 #if defined(TARGET_NR_renameat2)
 case TARGET_NR_renameat2:
-{
+if (is_hostfd(arg1)) {
+goto ebadf;
+} else {
 void *p2;
 p  = lock_user_string(arg2);

Re: [Qemu-devel] [PATCH v2 00/16] AHCI: tracing improvements

2018-05-31 Thread no-reply
Hi,

This series failed docker-mingw@fedora build test. Please find the testing 
commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

Type: series
Message-id: 20180531222835.16558-1-js...@redhat.com
Subject: [Qemu-devel] [PATCH v2 00/16] AHCI: tracing improvements

=== TEST SCRIPT BEGIN ===
#!/bin/bash
set -e
git submodule update --init dtc
# Let docker tests dump environment info
export SHOW_ENV=1
export J=8
time make docker-test-mingw@fedora
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
9932482af2 ahci: make ahci_mem_write traces more descriptive
c89f4076bd ahci: delete old host register address definitions
70f11b4d95 ahci: adjust ahci_mem_write to work on registers
c13dce7973 ahci: fix spacing damage on ahci_mem_write
fab5307968 ahci: make mem_read_32 traces more descriptive
ace1b4ffac ahci: modify ahci_mem_read_32 to work on register numbers
3021a82d31 ahci: fix host register max address
3c4808a257 ahci: add host register enumeration
f57758869b ahci: delete old port register address definitions
52d04d9b2c ahci: make port write traces more descriptive
08f435fad3 ahci: modify ahci_port_write to use register numbers
634c79d46b ahci: combine identical clauses in port write
50b162ae60 ahci: fix spacing damage on ahci_port_write
5895ec8f28 ahci: make port read traces more descriptive
68fd051a9a ahci: modify ahci_port_read to use register numbers
0478088585 ahci: add port register enumeration

=== OUTPUT BEGIN ===
Submodule 'dtc' (git://git.qemu-project.org/dtc.git) registered for path 'dtc'
Cloning into '/var/tmp/patchew-tester-tmp-v_cvirkw/src/dtc'...
Submodule path 'dtc': checked out 'e54388015af1fb4bf04d0bca99caba1074d9cc42'
  BUILD   fedora
make[1]: Entering directory '/var/tmp/patchew-tester-tmp-v_cvirkw/src'
  GEN 
/var/tmp/patchew-tester-tmp-v_cvirkw/src/docker-src.2018-05-31-18.47.55.18742/qemu.tar
Cloning into 
'/var/tmp/patchew-tester-tmp-v_cvirkw/src/docker-src.2018-05-31-18.47.55.18742/qemu.tar.vroot'...
done.
Checking out files:  49% (3050/6180)   
Checking out files:  50% (3090/6180)   
Checking out files:  51% (3152/6180)   
Checking out files:  52% (3214/6180)   
Checking out files:  53% (3276/6180)   
Checking out files:  54% (3338/6180)   
Checking out files:  55% (3399/6180)   
Checking out files:  56% (3461/6180)   
Checking out files:  57% (3523/6180)   
Checking out files:  58% (3585/6180)   
Checking out files:  59% (3647/6180)   
Checking out files:  60% (3708/6180)   
Checking out files:  61% (3770/6180)   
Checking out files:  62% (3832/6180)   
Checking out files:  63% (3894/6180)   
Checking out files:  64% (3956/6180)   
Checking out files:  65% (4017/6180)   
Checking out files:  66% (4079/6180)   
Checking out files:  67% (4141/6180)   
Checking out files:  68% (4203/6180)   
Checking out files:  69% (4265/6180)   
Checking out files:  70% (4326/6180)   
Checking out files:  71% (4388/6180)   
Checking out files:  72% (4450/6180)   
Checking out files:  73% (4512/6180)   
Checking out files:  74% (4574/6180)   
Checking out files:  75% (4635/6180)   
Checking out files:  76% (4697/6180)   
Checking out files:  77% (4759/6180)   
Checking out files:  78% (4821/6180)   
Checking out files:  79% (4883/6180)   
Checking out files:  80% (4944/6180)   
Checking out files:  81% (5006/6180)   
Checking out files:  82% (5068/6180)   
Checking out files:  83% (5130/6180)   
Checking out files:  84% (5192/6180)   
Checking out files:  85% (5253/6180)   
Checking out files:  86% (5315/6180)   
Checking out files:  87% (5377/6180)   
Checking out files:  88% (5439/6180)   
Checking out files:  89% (5501/6180)   
Checking out files:  90% (5562/6180)   
Checking out files:  91% (5624/6180)   
Checking out files:  92% (5686/6180)   
Checking out files:  93% (5748/6180)   
Checking out files:  94% (5810/6180)   
Checking out files:  95% (5871/6180)   
Checking out files:  96% (5933/6180)   
Checking out files:  97% (5995/6180)   
Checking out files:  98% (6057/6180)   
Checking out files:  98% (6105/6180)   
Checking out files:  99% (6119/6180)   
Checking out files: 100% (6180/6180)   
Checking out files: 100% (6180/6180), done.
Your branch is up-to-date with 'origin/test'.
Submodule 'dtc' (git://git.qemu-project.org/dtc.git) registered for path 'dtc'
Cloning into 
'/var/tmp/patchew-tester-tmp-v_cvirkw/src/docker-src.2018-05-31-18.47.55.18742/qemu.tar.vroot/dtc'...
Submodule path 'dtc': checked out 'e54388015af1fb4bf04d0bca99caba1074d9cc42'
Submodule 'ui/keycodemapdb' (git://git.qemu.org/keycodemapdb.git) registered 
for path 'ui/keycodemapdb'
Cloning into 
'/var/tmp/patchew-tester-tmp-v_cvirkw/src/docker-src.2018-05-31-18.47.55.18742/qemu.tar.vroot/ui/keycodemapdb'...
Submodule path 'ui/keycodemapdb': checked out 
'6b3d716e2b6472eb7189d3220552280ef3d832ce'
  COPYRUNNER
RUN test-mingw in qemu:fedora 
Packages installed:
PyYAML-3.12-5.fc27.x86_64
SDL2-devel-2.0.7-2.fc27.x86

[Qemu-devel] [PATCH 6/6] linux-user: Use *at functions to implement interp_prefix

2018-05-31 Thread Richard Henderson
If the interp_prefix is a complete chroot, it may have a *lot* of files.
Setting up the cache for this is quite expensive.

For the most part, we can use the *at versions of various syscalls to
attempt the operation in the prefix.  For the few cases that remain,
attempt the operation in the prefix via concatenation and then retry
if that fails.

Signed-off-by: Richard Henderson 
---
 linux-user/qemu.h|  37 ++
 linux-user/elfload.c |   5 +-
 linux-user/main.c|  26 ++-
 linux-user/syscall.c | 163 +--
 4 files changed, 174 insertions(+), 57 deletions(-)

diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 33dafbe0e4..05a82a3628 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -471,7 +471,44 @@ void mmap_fork_start(void);
 void mmap_fork_end(int child);
 
 /* main.c */
+extern int interp_dirfd;
 extern unsigned long guest_stack_size;
+char *interp_prefix_path(const char *path);
+
+/* If PATH is absolute, attempt an operation first within interp_dirfd,
+ * using OPENAT_EXPR.  If that fails with ENOENT, or if PATH is not
+ * absolute, only use NORMAL_EXPR.
+ */
+#define TRY_INTERP_FD(RET, PATH, OPENAT_EXPR, NORMAL_EXPR)  \
+do {\
+if (interp_dirfd >= 0 && (PATH)[0] == '/') {\
+RET = OPENAT_EXPR;  \
+if (RET != -1 || errno != ENOENT) { \
+break;  \
+}   \
+}   \
+RET = NORMAL_EXPR;  \
+} while (0)
+
+/* If PATH is absolute, attempt an operation first with interp_prefix
+ * prefixed.  If that fails with ENOENT, or if PATH is not absolute,
+ * only attempt with PATH.
+ */
+#define TRY_INTERP_PATH(RET, PATH, EXPR)\
+do {\
+char *new_##PATH = interp_prefix_path(PATH);\
+if (new_##PATH) {   \
+__typeof(PATH) save_##PATH = PATH;  \
+PATH = new_##PATH;  \
+RET = EXPR; \
+free(new_##PATH);   \
+PATH = save_##PATH; \
+if (RET != -1 || errno != ENOENT) { \
+break;  \
+}   \
+}   \
+RET = EXPR; \
+} while (0)
 
 /* user access */
 
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 13bc78d0c8..abdf5bbf01 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -6,7 +6,6 @@
 
 #include "qemu.h"
 #include "disas/disas.h"
-#include "qemu/path.h"
 
 #ifdef _ARCH_PPC64
 #undef ARCH_DLINFO
@@ -2375,7 +2374,9 @@ static void load_elf_interp(const char *filename, struct 
image_info *info,
 {
 int fd, retval;
 
-fd = open(path(filename), O_RDONLY);
+TRY_INTERP_FD(fd, filename,
+  openat(interp_dirfd, filename + 1, O_RDONLY),
+  open(filename, O_RDONLY));
 if (fd < 0) {
 goto exit_perror;
 }
diff --git a/linux-user/main.c b/linux-user/main.c
index ee3f323c08..4e956fc00c 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -23,7 +23,6 @@
 
 #include "qapi/error.h"
 #include "qemu.h"
-#include "qemu/path.h"
 #include "qemu/config-file.h"
 #include "qemu/cutils.h"
 #include "qemu/help_option.h"
@@ -89,9 +88,27 @@ unsigned long reserved_va;
 
 static void usage(int exitcode);
 
+int interp_dirfd;
 static const char *interp_prefix = CONFIG_QEMU_INTERP_PREFIX;
 const char *qemu_uname_release;
 
+char *interp_prefix_path(const char *path)
+{
+size_t i_len, p_len;
+char *ret;
+
+if (interp_prefix == NULL || path[0] != '/') {
+return NULL;
+}
+i_len = strlen(interp_prefix);
+p_len = strlen(path) + 1;
+ret = g_malloc(i_len + p_len);
+
+memcpy(ret, interp_prefix, i_len);
+memcpy(ret + i_len, path, p_len);
+return ret;
+}
+
 /* XXX: on x86 MAP_GROWSDOWN only works if ESP <= address + 32, so
we allocate a bigger stack. Need a better solution, for example
by remapping the process stack directly at the right place */
@@ -671,7 +688,12 @@ int main(int argc, char **argv, char **envp)
 memset(&bprm, 0, sizeof (bprm));
 
 /* Scan interp_prefix dir for replacement files. */
-init_paths(interp_prefix);
+interp_dirfd = open(interp_prefix, O_CLOEXEC | O_DIRECTORY | O_PATH);
+if (interp_dirfd >= 0) {
+add_hostfd(interp_dirfd);
+} else {
+interp_prefix = NULL;
+}
 
 init_qemu_uname_release();
 
diff --git a/linu

[Qemu-devel] [PATCH 4/6] linux-user: Check contains_hostfd in select syscalls

2018-05-31 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 linux-user/syscall.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 5339f0bc1c..b98125829b 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -1506,6 +1506,11 @@ static abi_long do_select(int n,
 if (ret) {
 return ret;
 }
+if (contains_hostfd(&rfds) ||
+contains_hostfd(&wfds) ||
+contains_hostfd(&efds)) {
+return -TARGET_EBADF;
+}
 
 if (target_tv_addr) {
 if (copy_from_user_timeval(&tv, target_tv_addr))
@@ -9392,6 +9397,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
arg1,
 if (ret) {
 goto fail;
 }
+if (contains_hostfd(&rfds) ||
+contains_hostfd(&wfds) ||
+contains_hostfd(&efds)) {
+goto ebadf;
+}
 
 /*
  * This takes a timespec, and not a timeval, so we cannot
-- 
2.17.0




[Qemu-devel] [PATCH 2/6] linux-user: Add host_fds and manipulators

2018-05-31 Thread Richard Henderson
This allows emulation of guest syscalls to reject
manipulations to fds used by the host.

Signed-off-by: Richard Henderson 
---
 linux-user/qemu.h | 30 ++
 linux-user/main.c | 27 ++-
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index c55c8e294b..33dafbe0e4 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -155,6 +155,36 @@ void task_settid(TaskState *);
 void stop_all_tasks(void);
 extern const char *qemu_uname_release;
 extern unsigned long mmap_min_addr;
+extern fd_set host_fds;
+
+/**
+ * is_hostfd:
+ * @fd: file descriptor to check
+ *
+ * Return true if @fd is being used by the host and therefore any
+ * guest system call referencing @fd should return EBADF.
+ */
+static inline bool is_hostfd(int fd)
+{
+return fd >= 0 && fd < FD_SETSIZE && FD_ISSET(fd, &host_fds);
+}
+
+/**
+ * contains_hostfd:
+ * @fds: fd_set of descriptors to check
+ *
+ * Return true if any descriptor in @fds are being used by the host
+ * and therefore the guest system call should return EBADF.
+ */
+bool contains_hostfd(const fd_set *fds);
+
+/**
+ * add_hostfd:
+ * @fd: file descriptor to reserve
+ *
+ * Add @fd to the set of file descriptors to reserve for the host.
+ */
+void add_hostfd(int fd);
 
 /* ??? See if we can avoid exposing so much of the loader internals.  */
 
diff --git a/linux-user/main.c b/linux-user/main.c
index 78d6d3e7eb..ee3f323c08 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -49,6 +49,7 @@ static const char *cpu_type;
 unsigned long mmap_min_addr;
 unsigned long guest_base;
 int have_guest_base;
+fd_set host_fds;
 
 /*
  * When running 32-on-64 we should make sure we can fit all of the possible
@@ -112,6 +113,23 @@ int cpu_get_pic_interrupt(CPUX86State *env)
 }
 #endif
 
+bool contains_hostfd(const fd_set *fds)
+{
+int i;
+for (i = 0; i < ARRAY_SIZE(__FDS_BITS(fds)); ++i) {
+if (__FDS_BITS(fds)[i] & __FDS_BITS(&host_fds)[i]) {
+return true;
+}
+}
+return true;
+}
+
+void add_hostfd(int fd)
+{
+g_assert(fd >= 0 && fd < FD_SETSIZE);
+FD_SET(fd, &host_fds);
+}
+
 /***/
 /* Helper routines for implementing atomic operations.  */
 
@@ -805,12 +823,19 @@ int main(int argc, char **argv, char **envp)
 
 target_cpu_copy_regs(env, regs);
 
+/* Prevent the guest from closing the log file.  */
+if (qemu_logfile && qemu_logfile != stderr) {
+add_hostfd(fileno(qemu_logfile));
+}
+
 if (gdbstub_port) {
-if (gdbserver_start(gdbstub_port) < 0) {
+int fd = gdbserver_start(gdbstub_port);
+if (fd < 0) {
 fprintf(stderr, "qemu: could not open gdbserver on port %d\n",
 gdbstub_port);
 exit(EXIT_FAILURE);
 }
+add_hostfd(fd);
 gdb_handlesig(cpu, 0);
 }
 cpu_loop(env);
-- 
2.17.0




[Qemu-devel] [PATCH v5 0/6] linux-user: Reorg interp_prefix handling

2018-05-31 Thread Richard Henderson
Changes since v4 (2018-01-28)
  * Protect host fds from guest manipulation.
  * Rename the interp_dirfd macro to TRY_INTERP_FD
  * Add TRY_INTERP_PATH for acct, statfs, inotify_add_watch

Changes since v3 (2017-12-29)
  * Use DO/WHILE as the control construct; wrap it in a macro.
  * Introduce linux_user_path to handle the cases *at syscalls
do not cover.

Changes since v2 (2017-12-04)
  * Use IF as the control construct instead of SWITCH.

Changes since v1 (2016-11-??)
  * Require interp_dirfd set before trying the *at path.


r~


Richard Henderson (6):
  gdbstub: Return the fd from gdbserver_start
  linux-user: Add host_fds and manipulators
  linux-user: Check is_hostfd in do_syscall
  linux-user: Check contains_hostfd in select syscalls
  linux-user: Check is_hostfd in mmap syscalls
  linux-user: Use *at functions to implement interp_prefix

 linux-user/qemu.h|  67 ++
 gdbstub.c|   5 +-
 linux-user/elfload.c |   5 +-
 linux-user/main.c|  53 -
 linux-user/syscall.c | 485 ---
 5 files changed, 532 insertions(+), 83 deletions(-)

-- 
2.17.0




[Qemu-devel] [PATCH 5/6] linux-user: Check is_hostfd in mmap syscalls

2018-05-31 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 linux-user/syscall.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index b98125829b..d7513d5dac 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -9605,11 +9605,17 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
arg1,
 v5 = tswapal(v[4]);
 v6 = tswapal(v[5]);
 unlock_user(v, arg1, 0);
+if (is_hostfd(v5)) {
+goto ebadf;
+}
 ret = get_errno(target_mmap(v1, v2, v3,
 target_to_host_bitmask(v4, 
mmap_flags_tbl),
 v5, v6));
 }
 #else
+if (is_hostfd(arg5)) {
+goto ebadf;
+}
 ret = get_errno(target_mmap(arg1, arg2, arg3,
 target_to_host_bitmask(arg4, 
mmap_flags_tbl),
 arg5,
@@ -9622,6 +9628,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #ifndef MMAP_SHIFT
 #define MMAP_SHIFT 12
 #endif
+if (is_hostfd(arg5)) {
+goto ebadf;
+}
 ret = get_errno(target_mmap(arg1, arg2, arg3,
 target_to_host_bitmask(arg4, 
mmap_flags_tbl),
 arg5,
-- 
2.17.0




[Qemu-devel] [PATCH 1/6] gdbstub: Return the fd from gdbserver_start

2018-05-31 Thread Richard Henderson
This will allow us to protect gdbserver_fd from the guest.

Signed-off-by: Richard Henderson 
---
 gdbstub.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gdbstub.c b/gdbstub.c
index 6081e719c5..057d0d65c5 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -1890,15 +1890,16 @@ static int gdbserver_open(int port)
 int gdbserver_start(int port)
 {
 gdbserver_fd = gdbserver_open(port);
-if (gdbserver_fd < 0)
+if (gdbserver_fd < 0) {
 return -1;
+}
 /* accept connections */
 if (!gdb_accept()) {
 close(gdbserver_fd);
 gdbserver_fd = -1;
 return -1;
 }
-return 0;
+return gdbserver_fd;
 }
 
 /* Disable gdb stub for child processes.  */
-- 
2.17.0




Re: [Qemu-devel] [PATCH v2 00/16] AHCI: tracing improvements

2018-05-31 Thread no-reply
Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20180531222835.16558-1-js...@redhat.com
Subject: [Qemu-devel] [PATCH v2 00/16] AHCI: tracing improvements

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 t [tag update]patchew/20180531212435.165261-1-ebl...@redhat.com -> 
patchew/20180531212435.165261-1-ebl...@redhat.com
 * [new tag]   patchew/20180531222835.16558-1-js...@redhat.com -> 
patchew/20180531222835.16558-1-js...@redhat.com
Switched to a new branch 'test'
9932482af2 ahci: make ahci_mem_write traces more descriptive
c89f4076bd ahci: delete old host register address definitions
70f11b4d95 ahci: adjust ahci_mem_write to work on registers
c13dce7973 ahci: fix spacing damage on ahci_mem_write
fab5307968 ahci: make mem_read_32 traces more descriptive
ace1b4ffac ahci: modify ahci_mem_read_32 to work on register numbers
3021a82d31 ahci: fix host register max address
3c4808a257 ahci: add host register enumeration
f57758869b ahci: delete old port register address definitions
52d04d9b2c ahci: make port write traces more descriptive
08f435fad3 ahci: modify ahci_port_write to use register numbers
634c79d46b ahci: combine identical clauses in port write
50b162ae60 ahci: fix spacing damage on ahci_port_write
5895ec8f28 ahci: make port read traces more descriptive
68fd051a9a ahci: modify ahci_port_read to use register numbers
0478088585 ahci: add port register enumeration

=== OUTPUT BEGIN ===
Checking PATCH 1/16: ahci: add port register enumeration...
WARNING: line over 80 characters
#71: FILE: hw/ide/ahci_internal.h:94:
+AHCI_PORT_REG_SCR_NOTIF   = 15, /* PxSNTF: SATA phy register: 
SNotification */

total: 0 errors, 1 warnings, 72 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
Checking PATCH 2/16: ahci: modify ahci_port_read to use register numbers...
Checking PATCH 3/16: ahci: make port read traces more descriptive...
Checking PATCH 4/16: ahci: fix spacing damage on ahci_port_write...
ERROR: spaces required around that '|' (ctx:VxV)
#83: FILE: hw/ide/ahci.c:316:
+(val & ~(PORT_CMD_RO_MASK|PORT_CMD_ICC_MASK));
  ^

total: 1 errors, 0 warnings, 156 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Checking PATCH 5/16: ahci: combine identical clauses in port write...
Checking PATCH 6/16: ahci: modify ahci_port_write to use register numbers...
Checking PATCH 7/16: ahci: make port write traces more descriptive...
Checking PATCH 8/16: ahci: delete old port register address definitions...
Checking PATCH 9/16: ahci: add host register enumeration...
Checking PATCH 10/16: ahci: fix host register max address...
Checking PATCH 11/16: ahci: modify ahci_mem_read_32 to work on register 
numbers...
Checking PATCH 12/16: ahci: make mem_read_32 traces more descriptive...
Checking PATCH 13/16: ahci: fix spacing damage on ahci_mem_write...
Checking PATCH 14/16: ahci: adjust ahci_mem_write to work on registers...
Checking PATCH 15/16: ahci: delete old host register address definitions...
Checking PATCH 16/16: ahci: make ahci_mem_write traces more descriptive...
WARNING: line over 80 characters
#18: FILE: hw/ide/ahci.c:497:
+qemu_log_mask(LOG_UNIMP, "Attempted write to unimplemented 
register:"

total: 0 errors, 1 warnings, 35 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
=== OUTPUT END ===

Test command exited with code: 1


---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

[Qemu-devel] [PATCH v2 16/16] ahci: make ahci_mem_write traces more descriptive

2018-05-31 Thread John Snow
Signed-off-by: John Snow 
---
 hw/ide/ahci.c   | 13 -
 hw/ide/trace-events |  4 +++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 1309f80458..6cab853741 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -494,13 +494,24 @@ static void ahci_mem_write(void *opaque, hwaddr addr,
 /* FIXME report write? */
 break;
 default:
-trace_ahci_mem_write_unknown(s, size, addr, val);
+qemu_log_mask(LOG_UNIMP, "Attempted write to unimplemented 
register:"
+  " AHCI host register %s, offset 0x%lx: 0x%"PRIu64,
+  AHCIHostReg_lookup[regnum], addr, val);
+trace_ahci_mem_write_host_unimpl(s, size,
+ AHCIHostReg_lookup[regnum], addr);
 }
+trace_ahci_mem_write_host(s, size, AHCIHostReg_lookup[regnum],
+ addr, val);
 } else if ((addr >= AHCI_PORT_REGS_START_ADDR) &&
(addr < (AHCI_PORT_REGS_START_ADDR +
 (s->ports * AHCI_PORT_ADDR_OFFSET_LEN {
 ahci_port_write(s, (addr - AHCI_PORT_REGS_START_ADDR) >> 7,
 addr & AHCI_PORT_ADDR_OFFSET_MASK, val);
+} else {
+qemu_log_mask(LOG_UNIMP, "Attempted write to unimplemented register: "
+  "AHCI global register at offset 0x%lx: 0x%"PRIu64,
+  addr, val);
+trace_ahci_mem_write_unimpl(s, size, addr, val);
 }
 }
 
diff --git a/hw/ide/trace-events b/hw/ide/trace-events
index 8149a54db8..e6bd95f52f 100644
--- a/hw/ide/trace-events
+++ b/hw/ide/trace-events
@@ -77,7 +77,9 @@ ahci_mem_read_32_host(void *s, const char *reg, uint64_t 
addr, uint32_t val) "ah
 ahci_mem_read_32_host_default(void *s, const char *reg, uint64_t addr) 
"ahci(%p): unimplemented mem read [reg:%s] @ 0x%"PRIx64
 ahci_mem_read(void *s, unsigned size, uint64_t addr, uint64_t val) "ahci(%p): 
read%u @ 0x%"PRIx64": 0x%016"PRIx64
 ahci_mem_write(void *s, unsigned size, uint64_t addr, uint64_t val) "ahci(%p): 
write%u @ 0x%"PRIx64": 0x%016"PRIx64
-ahci_mem_write_unknown(void *s, unsigned size, uint64_t addr, uint64_t val) 
"ahci(%p): write%u to unknown register 0x%"PRIx64": 0x%016"PRIx64
+ahci_mem_write_host_unimpl(void *s, unsigned size, const char *reg, uint64_t 
addr) "ahci(%p) unimplemented write%u [reg:%s] @ 0x%"PRIx64
+ahci_mem_write_host(void *s, unsigned size, const char *reg, uint64_t addr, 
uint64_t val) "ahci(%p) write%u [reg:%s] @ 0x%"PRIx64": 0x%016"PRIx64
+ahci_mem_write_unimpl(void *s, unsigned size, uint64_t addr, uint64_t val) 
"ahci(%p): write%u to unknown register 0x%"PRIx64": 0x%016"PRIx64
 ahci_set_signature(void *s, int port, uint8_t nsector, uint8_t sector, uint8_t 
lcyl, uint8_t hcyl, uint32_t sig) "ahci(%p)[%d]: set signature sector:0x%02x 
nsector:0x%02x lcyl:0x%02x hcyl:0x%02x (cumulatively: 0x%08x)"
 ahci_reset_port(void *s, int port) "ahci(%p)[%d]: reset port"
 ahci_unmap_fis_address_null(void *s, int port) "ahci(%p)[%d]: Attempt to unmap 
NULL FIS address"
-- 
2.14.3




[Qemu-devel] [PATCH v2 12/16] ahci: make mem_read_32 traces more descriptive

2018-05-31 Thread John Snow
Signed-off-by: John Snow 
---
 hw/ide/ahci.c   | 7 +--
 hw/ide/trace-events | 3 +++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 7edbb30f60..5e902c6615 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -47,7 +47,6 @@ static bool ahci_map_fis_address(AHCIDevice *ad);
 static void ahci_unmap_clb_address(AHCIDevice *ad);
 static void ahci_unmap_fis_address(AHCIDevice *ad);
 
-__attribute__((__unused__)) /* TODO */
 static const char *AHCIHostReg_lookup[AHCI_HOST_REG__COUNT] = {
 [AHCI_HOST_REG_CAP]= "CAP",
 [AHCI_HOST_REG_CTL]= "GHC",
@@ -406,13 +405,17 @@ static uint64_t ahci_mem_read_32(void *opaque, hwaddr 
addr)
 val = s->control_regs.version;
 break;
 default:
-break;
+trace_ahci_mem_read_32_host_default(s, AHCIHostReg_lookup[regnum],
+addr);
 }
+trace_ahci_mem_read_32_host(s, AHCIHostReg_lookup[regnum], addr, val);
 } else if ((addr >= AHCI_PORT_REGS_START_ADDR) &&
(addr < (AHCI_PORT_REGS_START_ADDR +
 (s->ports * AHCI_PORT_ADDR_OFFSET_LEN {
 val = ahci_port_read(s, (addr - AHCI_PORT_REGS_START_ADDR) >> 7,
  addr & AHCI_PORT_ADDR_OFFSET_MASK);
+} else {
+trace_ahci_mem_read_32_default(s, addr, val);
 }
 
 trace_ahci_mem_read_32(s, addr, val);
diff --git a/hw/ide/trace-events b/hw/ide/trace-events
index 1efbbb8114..8149a54db8 100644
--- a/hw/ide/trace-events
+++ b/hw/ide/trace-events
@@ -72,6 +72,9 @@ ahci_trigger_irq(void *s, int port, const char *name, 
uint32_t val, uint32_t old
 ahci_port_write(void *s, int port, const char *reg, int offset, uint32_t val) 
"ahci(%p)[%d]: port write [reg:%s] @ 0x%x: 0x%08x"
 ahci_port_write_unimpl(void *s, int port, const char *reg, int offset, 
uint32_t val) "ahci(%p)[%d]: unimplemented port write [reg:%s] @ 0x%x: 0x%08x"
 ahci_mem_read_32(void *s, uint64_t addr, uint32_t val) "ahci(%p): mem read @ 
0x%"PRIx64": 0x%08x"
+ahci_mem_read_32_default(void *s, uint64_t addr, uint32_t val) "ahci(%p): mem 
read @ 0x%"PRIx64": 0x%08x"
+ahci_mem_read_32_host(void *s, const char *reg, uint64_t addr, uint32_t val) 
"ahci(%p): mem read [reg:%s] @ 0x%"PRIx64": 0x%08x"
+ahci_mem_read_32_host_default(void *s, const char *reg, uint64_t addr) 
"ahci(%p): unimplemented mem read [reg:%s] @ 0x%"PRIx64
 ahci_mem_read(void *s, unsigned size, uint64_t addr, uint64_t val) "ahci(%p): 
read%u @ 0x%"PRIx64": 0x%016"PRIx64
 ahci_mem_write(void *s, unsigned size, uint64_t addr, uint64_t val) "ahci(%p): 
write%u @ 0x%"PRIx64": 0x%016"PRIx64
 ahci_mem_write_unknown(void *s, unsigned size, uint64_t addr, uint64_t val) 
"ahci(%p): write%u to unknown register 0x%"PRIx64": 0x%016"PRIx64
-- 
2.14.3




[Qemu-devel] [PATCH v2 11/16] ahci: modify ahci_mem_read_32 to work on register numbers

2018-05-31 Thread John Snow
Signed-off-by: John Snow 
---
 hw/ide/ahci.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index f2e8bce797..7edbb30f60 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -386,22 +386,27 @@ static uint64_t ahci_mem_read_32(void *opaque, hwaddr 
addr)
 uint32_t val = 0;
 
 if (addr < AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR) {
-switch (addr) {
-case HOST_CAP:
+enum AHCIHostReg regnum = addr / 4;
+assert(regnum < AHCI_HOST_REG__COUNT);
+
+switch (regnum) {
+case AHCI_HOST_REG_CAP:
 val = s->control_regs.cap;
 break;
-case HOST_CTL:
+case AHCI_HOST_REG_CTL:
 val = s->control_regs.ghc;
 break;
-case HOST_IRQ_STAT:
+case AHCI_HOST_REG_IRQ_STAT:
 val = s->control_regs.irqstatus;
 break;
-case HOST_PORTS_IMPL:
+case AHCI_HOST_REG_PORTS_IMPL:
 val = s->control_regs.impl;
 break;
-case HOST_VERSION:
+case AHCI_HOST_REG_VERSION:
 val = s->control_regs.version;
 break;
+default:
+break;
 }
 } else if ((addr >= AHCI_PORT_REGS_START_ADDR) &&
(addr < (AHCI_PORT_REGS_START_ADDR +
-- 
2.14.3




[Qemu-devel] [PATCH v2 01/16] ahci: add port register enumeration

2018-05-31 Thread John Snow
Instead of tracking offsets, lets count the registers.

Signed-off-by: John Snow 
---
 hw/ide/ahci.c  | 25 +
 hw/ide/ahci_internal.h | 29 +
 2 files changed, 54 insertions(+)

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index e22d7be05f..48130c6439 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -46,6 +46,31 @@ static bool ahci_map_fis_address(AHCIDevice *ad);
 static void ahci_unmap_clb_address(AHCIDevice *ad);
 static void ahci_unmap_fis_address(AHCIDevice *ad);
 
+__attribute__((__unused__)) /* TODO */
+static const char *AHCIPortReg_lookup[AHCI_PORT_REG__COUNT] = {
+[AHCI_PORT_REG_LST_ADDR]= "PxCLB",
+[AHCI_PORT_REG_LST_ADDR_HI] = "PxCLBU",
+[AHCI_PORT_REG_FIS_ADDR]= "PxFB",
+[AHCI_PORT_REG_FIS_ADDR_HI] = "PxFBU",
+[AHCI_PORT_REG_IRQ_STAT]= "PxIS",
+[AHCI_PORT_REG_IRQ_MASK]= "PXIE",
+[AHCI_PORT_REG_CMD] = "PxCMD",
+[7] = "Reserved",
+[AHCI_PORT_REG_TFDATA]  = "PxTFD",
+[AHCI_PORT_REG_SIG] = "PxSIG",
+[AHCI_PORT_REG_SCR_STAT]= "PxSSTS",
+[AHCI_PORT_REG_SCR_CTL] = "PxSCTL",
+[AHCI_PORT_REG_SCR_ERR] = "PxSERR",
+[AHCI_PORT_REG_SCR_ACT] = "PxSACT",
+[AHCI_PORT_REG_CMD_ISSUE]   = "PxCI",
+[AHCI_PORT_REG_SCR_NOTIF]   = "PxSNTF",
+[AHCI_PORT_REG_FIS_CTL] = "PxFBS",
+[AHCI_PORT_REG_DEV_SLEEP]   = "PxDEVSLP",
+[18 ... 27] = "Reserved",
+[AHCI_PORT_REG_VENDOR_1 ...
+ AHCI_PORT_REG_VENDOR_4]= "PxVS",
+};
+
 static const char *AHCIPortIRQ_lookup[AHCI_PORT_IRQ__COUNT] = {
 [AHCI_PORT_IRQ_BIT_DHRS] = "DHRS",
 [AHCI_PORT_IRQ_BIT_PSS]  = "PSS",
diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h
index 1a25d6c039..eb7e1eefc0 100644
--- a/hw/ide/ahci_internal.h
+++ b/hw/ide/ahci_internal.h
@@ -74,6 +74,34 @@
 #define HOST_CAP_NCQ  (1 << 30) /* Native Command Queueing */
 #define HOST_CAP_64   (1U << 31) /* PCI DAC (64-bit DMA) support */
 
+/* registers for each SATA port */
+enum AHCIPortReg {
+AHCI_PORT_REG_LST_ADDR= 0, /* PxCLB: command list DMA addr */
+AHCI_PORT_REG_LST_ADDR_HI = 1, /* PxCLBU: command list DMA addr hi */
+AHCI_PORT_REG_FIS_ADDR= 2, /* PxFB: FIS rx buf addr */
+AHCI_PORT_REG_FIS_ADDR_HI = 3, /* PxFBU: FIX rx buf addr hi */
+AHCI_PORT_REG_IRQ_STAT= 4, /* PxIS: interrupt status */
+AHCI_PORT_REG_IRQ_MASK= 5, /* PxIE: interrupt enable/disable mask */
+AHCI_PORT_REG_CMD = 6, /* PxCMD: port command */
+/* RESERVED */
+AHCI_PORT_REG_TFDATA  = 8, /* PxTFD: taskfile data */
+AHCI_PORT_REG_SIG = 9, /* PxSIG: device TF signature */
+AHCI_PORT_REG_SCR_STAT= 10, /* PxSSTS: SATA phy register: SStatus */
+AHCI_PORT_REG_SCR_CTL = 11, /* PxSCTL: SATA phy register: SControl */
+AHCI_PORT_REG_SCR_ERR = 12, /* PxSERR: SATA phy register: SError */
+AHCI_PORT_REG_SCR_ACT = 13, /* PxSACT: SATA phy register: SActive */
+AHCI_PORT_REG_CMD_ISSUE   = 14, /* PxCI: command issue */
+AHCI_PORT_REG_SCR_NOTIF   = 15, /* PxSNTF: SATA phy register: 
SNotification */
+AHCI_PORT_REG_FIS_CTL = 16, /* PxFBS: Port multiplier switching ctl */
+AHCI_PORT_REG_DEV_SLEEP   = 17, /* PxDEVSLP: device sleep control */
+/* RESERVED */
+AHCI_PORT_REG_VENDOR_1= 28, /* PxVS: Vendor Specific */
+AHCI_PORT_REG_VENDOR_2= 29,
+AHCI_PORT_REG_VENDOR_3= 30,
+AHCI_PORT_REG_VENDOR_4= 31,
+AHCI_PORT_REG__COUNT  = 32
+};
+
 /* registers for each SATA port */
 #define PORT_LST_ADDR 0x00 /* command list DMA addr */
 #define PORT_LST_ADDR_HI  0x04 /* command list DMA addr hi */
@@ -82,6 +110,7 @@
 #define PORT_IRQ_STAT 0x10 /* interrupt status */
 #define PORT_IRQ_MASK 0x14 /* interrupt enable/disable mask */
 #define PORT_CMD  0x18 /* port command */
+
 #define PORT_TFDATA   0x20 /* taskfile data */
 #define PORT_SIG  0x24 /* device TF signature */
 #define PORT_SCR_STAT 0x28 /* SATA phy register: SStatus */
-- 
2.14.3




[Qemu-devel] [PATCH v2 14/16] ahci: adjust ahci_mem_write to work on registers

2018-05-31 Thread John Snow
Actually, this function looks pretty broken, but for now, let's finish
up what this series of commits came here to do.

Signed-off-by: John Snow 
---
 hw/ide/ahci.c | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 568e6b848a..1309f80458 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -468,11 +468,14 @@ static void ahci_mem_write(void *opaque, hwaddr addr,
 }
 
 if (addr < AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR) {
-switch (addr) {
-case HOST_CAP: /* R/WO, RO */
+enum AHCIHostReg regnum = addr / 4;
+assert(regnum < AHCI_HOST_REG__COUNT);
+
+switch (regnum) {
+case AHCI_HOST_REG_CAP: /* R/WO, RO */
 /* FIXME handle R/WO */
 break;
-case HOST_CTL: /* R/W */
+case AHCI_HOST_REG_CTL: /* R/W */
 if (val & HOST_CTL_RESET) {
 ahci_reset(s);
 } else {
@@ -480,14 +483,14 @@ static void ahci_mem_write(void *opaque, hwaddr addr,
 ahci_check_irq(s);
 }
 break;
-case HOST_IRQ_STAT: /* R/WC, RO */
+case AHCI_HOST_REG_IRQ_STAT: /* R/WC, RO */
 s->control_regs.irqstatus &= ~val;
 ahci_check_irq(s);
 break;
-case HOST_PORTS_IMPL: /* R/WO, RO */
+case AHCI_HOST_REG_PORTS_IMPL: /* R/WO, RO */
 /* FIXME handle R/WO */
 break;
-case HOST_VERSION: /* RO */
+case AHCI_HOST_REG_VERSION: /* RO */
 /* FIXME report write? */
 break;
 default:
-- 
2.14.3




[Qemu-devel] [PATCH v2 03/16] ahci: make port read traces more descriptive

2018-05-31 Thread John Snow
A trace is added to let us watch unimplemented registers specifically,
as these are more likely to cause us trouble. Otherwise, the port read
traces now tell us what register is getting hit, which is nicer.

Signed-off-by: John Snow 
---
 hw/ide/ahci.c   | 5 +++--
 hw/ide/trace-events | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 5187bf34ad..5b5ab823f4 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -46,7 +46,6 @@ static bool ahci_map_fis_address(AHCIDevice *ad);
 static void ahci_unmap_clb_address(AHCIDevice *ad);
 static void ahci_unmap_fis_address(AHCIDevice *ad);
 
-__attribute__((__unused__)) /* TODO */
 static const char *AHCIPortReg_lookup[AHCI_PORT_REG__COUNT] = {
 [AHCI_PORT_REG_LST_ADDR]= "PxCLB",
 [AHCI_PORT_REG_LST_ADDR_HI] = "PxCLBU",
@@ -149,10 +148,12 @@ static uint32_t ahci_port_read(AHCIState *s, int port, 
int offset)
 val = pr->cmd_issue;
 break;
 default:
+trace_ahci_port_read_default(s, port, AHCIPortReg_lookup[regnum],
+ offset);
 val = 0;
 }
 
-trace_ahci_port_read(s, port, offset, val);
+trace_ahci_port_read(s, port, AHCIPortReg_lookup[regnum], offset, val);
 return val;
 }
 
diff --git a/hw/ide/trace-events b/hw/ide/trace-events
index 5c0e59ec42..0db18d8271 100644
--- a/hw/ide/trace-events
+++ b/hw/ide/trace-events
@@ -63,7 +63,8 @@ ide_atapi_cmd_read_dma_cb_aio(void *s, int lba, int n) 
"IDEState: %p; aio read:
 ide_atapi_cmd_packet(void *s, uint16_t limit, const char *packet) "IDEState: 
%p; limit=0x%x packet: %s"
 
 # hw/ide/ahci.c
-ahci_port_read(void *s, int port, int offset, uint32_t ret) "ahci(%p)[%d]: 
port read @ 0x%x: 0x%08x"
+ahci_port_read(void *s, int port, const char *reg, int offset, uint32_t ret) 
"ahci(%p)[%d]: port read [reg:%s] @ 0x%x: 0x%08x"
+ahci_port_read_default(void *s, int port, const char *reg, int offset) 
"ahci(%p)[%d]: unimplemented port read [reg:%s] @ 0x%x"
 ahci_irq_raise(void *s) "ahci(%p): raise irq"
 ahci_irq_lower(void *s) "ahci(%p): lower irq"
 ahci_check_irq(void *s, uint32_t old, uint32_t new) "ahci(%p): check irq 
0x%08x --> 0x%08x"
-- 
2.14.3




[Qemu-devel] [PATCH v2 15/16] ahci: delete old host register address definitions

2018-05-31 Thread John Snow
Signed-off-by: John Snow 
---
 hw/ide/ahci_internal.h | 6 --
 1 file changed, 6 deletions(-)

diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h
index f9dcf8b6e6..2953243929 100644
--- a/hw/ide/ahci_internal.h
+++ b/hw/ide/ahci_internal.h
@@ -55,12 +55,6 @@
 #define RX_FIS_UNK0x60 /* offset of Unknown FIS data */
 
 /* global controller registers */
-#define HOST_CAP  0x00 /* host capabilities */
-#define HOST_CTL  0x04 /* global host control */
-#define HOST_IRQ_STAT 0x08 /* interrupt status */
-#define HOST_PORTS_IMPL   0x0c /* bitmap of implemented ports */
-#define HOST_VERSION  0x10 /* AHCI spec. version compliancy */
-
 enum AHCIHostReg {
 AHCI_HOST_REG_CAP= 0,  /* CAP: host capabilities */
 AHCI_HOST_REG_CTL= 1,  /* GHC: global host control */
-- 
2.14.3




[Qemu-devel] [PATCH v2 09/16] ahci: add host register enumeration

2018-05-31 Thread John Snow
Signed-off-by: John Snow 
---
 hw/ide/ahci.c  | 15 +++
 hw/ide/ahci_internal.h | 15 +++
 2 files changed, 30 insertions(+)

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 01463f0f51..f2e8bce797 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -47,6 +47,21 @@ static bool ahci_map_fis_address(AHCIDevice *ad);
 static void ahci_unmap_clb_address(AHCIDevice *ad);
 static void ahci_unmap_fis_address(AHCIDevice *ad);
 
+__attribute__((__unused__)) /* TODO */
+static const char *AHCIHostReg_lookup[AHCI_HOST_REG__COUNT] = {
+[AHCI_HOST_REG_CAP]= "CAP",
+[AHCI_HOST_REG_CTL]= "GHC",
+[AHCI_HOST_REG_IRQ_STAT]   = "IS",
+[AHCI_HOST_REG_PORTS_IMPL] = "PI",
+[AHCI_HOST_REG_VERSION]= "VS",
+[AHCI_HOST_REG_CCC_CTL]= "CCC_CTL",
+[AHCI_HOST_REG_CCC_PORTS]  = "CCC_PORTS",
+[AHCI_HOST_REG_EM_LOC] = "EM_LOC",
+[AHCI_HOST_REG_EM_CTL] = "EM_CTL",
+[AHCI_HOST_REG_CAP2]   = "CAP2",
+[AHCI_HOST_REG_BOHC]   = "BOHC",
+};
+
 static const char *AHCIPortReg_lookup[AHCI_PORT_REG__COUNT] = {
 [AHCI_PORT_REG_LST_ADDR]= "PxCLB",
 [AHCI_PORT_REG_LST_ADDR_HI] = "PxCLBU",
diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h
index db00c9aa39..af366db6f3 100644
--- a/hw/ide/ahci_internal.h
+++ b/hw/ide/ahci_internal.h
@@ -61,6 +61,21 @@
 #define HOST_PORTS_IMPL   0x0c /* bitmap of implemented ports */
 #define HOST_VERSION  0x10 /* AHCI spec. version compliancy */
 
+enum AHCIHostReg {
+AHCI_HOST_REG_CAP= 0,  /* CAP: host capabilities */
+AHCI_HOST_REG_CTL= 1,  /* GHC: global host control */
+AHCI_HOST_REG_IRQ_STAT   = 2,  /* IS: interrupt status */
+AHCI_HOST_REG_PORTS_IMPL = 3,  /* PI: bitmap of implemented ports */
+AHCI_HOST_REG_VERSION= 4,  /* VS: AHCI spec. version compliancy */
+AHCI_HOST_REG_CCC_CTL= 5,  /* CCC_CTL: CCC Control */
+AHCI_HOST_REG_CCC_PORTS  = 6,  /* CCC_PORTS: CCC Ports */
+AHCI_HOST_REG_EM_LOC = 7,  /* EM_LOC: Enclosure Mgmt Location */
+AHCI_HOST_REG_EM_CTL = 8,  /* EM_CTL: Enclosure Mgmt Control */
+AHCI_HOST_REG_CAP2   = 9,  /* CAP2: host capabilities, extended */
+AHCI_HOST_REG_BOHC   = 10, /* BOHC: firmare/os handoff ctrl & status */
+AHCI_HOST_REG__COUNT = 11
+};
+
 /* HOST_CTL bits */
 #define HOST_CTL_RESET(1 << 0)  /* reset controller; self-clear */
 #define HOST_CTL_IRQ_EN   (1 << 1)  /* global IRQ enable */
-- 
2.14.3




[Qemu-devel] [PATCH v2 02/16] ahci: modify ahci_port_read to use register numbers

2018-05-31 Thread John Snow
Signed-off-by: John Snow 
---
 hw/ide/ahci.c | 38 +++---
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 48130c6439..5187bf34ad 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -93,41 +93,42 @@ static const char *AHCIPortIRQ_lookup[AHCI_PORT_IRQ__COUNT] 
= {
 [AHCI_PORT_IRQ_BIT_CPDS] = "CPDS"
 };
 
-static uint32_t  ahci_port_read(AHCIState *s, int port, int offset)
+static uint32_t ahci_port_read(AHCIState *s, int port, int offset)
 {
 uint32_t val;
-AHCIPortRegs *pr;
-pr = &s->dev[port].port_regs;
+AHCIPortRegs *pr = &s->dev[port].port_regs;
+enum AHCIPortReg regnum = offset / sizeof(uint32_t);
+assert(regnum < (AHCI_PORT_ADDR_OFFSET_LEN / sizeof(uint32_t)));
 
-switch (offset) {
-case PORT_LST_ADDR:
+switch (regnum) {
+case AHCI_PORT_REG_LST_ADDR:
 val = pr->lst_addr;
 break;
-case PORT_LST_ADDR_HI:
+case AHCI_PORT_REG_LST_ADDR_HI:
 val = pr->lst_addr_hi;
 break;
-case PORT_FIS_ADDR:
+case AHCI_PORT_REG_FIS_ADDR:
 val = pr->fis_addr;
 break;
-case PORT_FIS_ADDR_HI:
+case AHCI_PORT_REG_FIS_ADDR_HI:
 val = pr->fis_addr_hi;
 break;
-case PORT_IRQ_STAT:
+case AHCI_PORT_REG_IRQ_STAT:
 val = pr->irq_stat;
 break;
-case PORT_IRQ_MASK:
+case AHCI_PORT_REG_IRQ_MASK:
 val = pr->irq_mask;
 break;
-case PORT_CMD:
+case AHCI_PORT_REG_CMD:
 val = pr->cmd;
 break;
-case PORT_TFDATA:
+case AHCI_PORT_REG_TFDATA:
 val = pr->tfdata;
 break;
-case PORT_SIG:
+case AHCI_PORT_REG_SIG:
 val = pr->sig;
 break;
-case PORT_SCR_STAT:
+case AHCI_PORT_REG_SCR_STAT:
 if (s->dev[port].port.ifs[0].blk) {
 val = SATA_SCR_SSTATUS_DET_DEV_PRESENT_PHY_UP |
   SATA_SCR_SSTATUS_SPD_GEN1 | SATA_SCR_SSTATUS_IPM_ACTIVE;
@@ -135,19 +136,18 @@ static uint32_t  ahci_port_read(AHCIState *s, int port, 
int offset)
 val = SATA_SCR_SSTATUS_DET_NODEV;
 }
 break;
-case PORT_SCR_CTL:
+case AHCI_PORT_REG_SCR_CTL:
 val = pr->scr_ctl;
 break;
-case PORT_SCR_ERR:
+case AHCI_PORT_REG_SCR_ERR:
 val = pr->scr_err;
 break;
-case PORT_SCR_ACT:
+case AHCI_PORT_REG_SCR_ACT:
 val = pr->scr_act;
 break;
-case PORT_CMD_ISSUE:
+case AHCI_PORT_REG_CMD_ISSUE:
 val = pr->cmd_issue;
 break;
-case PORT_RESERVED:
 default:
 val = 0;
 }
-- 
2.14.3




[Qemu-devel] [PATCH v2 13/16] ahci: fix spacing damage on ahci_mem_write

2018-05-31 Thread John Snow
Signed-off-by: John Snow 
---
 hw/ide/ahci.c | 47 +++
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 5e902c6615..568e6b848a 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -469,37 +469,36 @@ static void ahci_mem_write(void *opaque, hwaddr addr,
 
 if (addr < AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR) {
 switch (addr) {
-case HOST_CAP: /* R/WO, RO */
-/* FIXME handle R/WO */
-break;
-case HOST_CTL: /* R/W */
-if (val & HOST_CTL_RESET) {
-ahci_reset(s);
-} else {
-s->control_regs.ghc = (val & 0x3) | HOST_CTL_AHCI_EN;
-ahci_check_irq(s);
-}
-break;
-case HOST_IRQ_STAT: /* R/WC, RO */
-s->control_regs.irqstatus &= ~val;
+case HOST_CAP: /* R/WO, RO */
+/* FIXME handle R/WO */
+break;
+case HOST_CTL: /* R/W */
+if (val & HOST_CTL_RESET) {
+ahci_reset(s);
+} else {
+s->control_regs.ghc = (val & 0x3) | HOST_CTL_AHCI_EN;
 ahci_check_irq(s);
-break;
-case HOST_PORTS_IMPL: /* R/WO, RO */
-/* FIXME handle R/WO */
-break;
-case HOST_VERSION: /* RO */
-/* FIXME report write? */
-break;
-default:
-trace_ahci_mem_write_unknown(s, size, addr, val);
+}
+break;
+case HOST_IRQ_STAT: /* R/WC, RO */
+s->control_regs.irqstatus &= ~val;
+ahci_check_irq(s);
+break;
+case HOST_PORTS_IMPL: /* R/WO, RO */
+/* FIXME handle R/WO */
+break;
+case HOST_VERSION: /* RO */
+/* FIXME report write? */
+break;
+default:
+trace_ahci_mem_write_unknown(s, size, addr, val);
 }
 } else if ((addr >= AHCI_PORT_REGS_START_ADDR) &&
(addr < (AHCI_PORT_REGS_START_ADDR +
-(s->ports * AHCI_PORT_ADDR_OFFSET_LEN {
+(s->ports * AHCI_PORT_ADDR_OFFSET_LEN {
 ahci_port_write(s, (addr - AHCI_PORT_REGS_START_ADDR) >> 7,
 addr & AHCI_PORT_ADDR_OFFSET_MASK, val);
 }
-
 }
 
 static const MemoryRegionOps ahci_mem_ops = {
-- 
2.14.3




[Qemu-devel] [PATCH v2 00/16] AHCI: tracing improvements

2018-05-31 Thread John Snow
This set just adds register names so that the read/write traces make
more sense on their own without having to memorize register offsets.
It also splits read/write traces into supported/unsupported subsets,
so you can just monitor for things that QEMU is likely doing wrong.

v2:
 - Added qemu_log_mask(LOG_UNIMP, ...) statements in addition to traces
   for writes to unknown/unsupported registers. (Philippe)

John Snow (16):
  ahci: add port register enumeration
  ahci: modify ahci_port_read to use register numbers
  ahci: make port read traces more descriptive
  ahci: fix spacing damage on ahci_port_write
  ahci: combine identical clauses in port write
  ahci: modify ahci_port_write to use register numbers
  ahci: make port write traces more descriptive
  ahci: delete old port register address definitions
  ahci: add host register enumeration
  ahci: fix host register max address
  ahci: modify ahci_mem_read_32 to work on register numbers
  ahci: make mem_read_32 traces more descriptive
  ahci: fix spacing damage on ahci_mem_write
  ahci: adjust ahci_mem_write to work on registers
  ahci: delete old host register address definitions
  ahci: make ahci_mem_write traces more descriptive

 hw/ide/ahci.c  | 314 ++---
 hw/ide/ahci_internal.h |  63 ++
 hw/ide/trace-events|  13 +-
 3 files changed, 241 insertions(+), 149 deletions(-)

-- 
2.14.3




[Qemu-devel] [PATCH v2 10/16] ahci: fix host register max address

2018-05-31 Thread John Snow
Yes, comment, it ought to be 0x2C.

Signed-off-by: John Snow 
---
 hw/ide/ahci_internal.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h
index af366db6f3..f9dcf8b6e6 100644
--- a/hw/ide/ahci_internal.h
+++ b/hw/ide/ahci_internal.h
@@ -224,8 +224,7 @@ enum AHCIPortIRQ {
 #define SATA_SIGNATURE_CDROM   0xeb140101
 #define SATA_SIGNATURE_DISK0x0101
 
-#define AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR 0x20
-/* Shouldn't this be 0x2c? */
+#define AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR 0x2c
 
 #define AHCI_PORT_REGS_START_ADDR  0x100
 #define AHCI_PORT_ADDR_OFFSET_MASK 0x7f
-- 
2.14.3




[Qemu-devel] [PATCH v2 04/16] ahci: fix spacing damage on ahci_port_write

2018-05-31 Thread John Snow
Churn.

Signed-off-by: John Snow 
---
 hw/ide/ahci.c | 142 +-
 1 file changed, 71 insertions(+), 71 deletions(-)

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 5b5ab823f4..92780990a3 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -279,85 +279,85 @@ static int ahci_cond_start_engines(AHCIDevice *ad)
 return 0;
 }
 
-static void  ahci_port_write(AHCIState *s, int port, int offset, uint32_t val)
+static void ahci_port_write(AHCIState *s, int port, int offset, uint32_t val)
 {
 AHCIPortRegs *pr = &s->dev[port].port_regs;
 
 trace_ahci_port_write(s, port, offset, val);
 switch (offset) {
-case PORT_LST_ADDR:
-pr->lst_addr = val;
-break;
-case PORT_LST_ADDR_HI:
-pr->lst_addr_hi = val;
-break;
-case PORT_FIS_ADDR:
-pr->fis_addr = val;
-break;
-case PORT_FIS_ADDR_HI:
-pr->fis_addr_hi = val;
-break;
-case PORT_IRQ_STAT:
-pr->irq_stat &= ~val;
-ahci_check_irq(s);
-break;
-case PORT_IRQ_MASK:
-pr->irq_mask = val & 0xfdc000ff;
-ahci_check_irq(s);
-break;
-case PORT_CMD:
-/* Block any Read-only fields from being set;
- * including LIST_ON and FIS_ON.
- * The spec requires to set ICC bits to zero after the ICC change
- * is done. We don't support ICC state changes, therefore always
- * force the ICC bits to zero.
- */
-pr->cmd = (pr->cmd & PORT_CMD_RO_MASK) |
-  (val & ~(PORT_CMD_RO_MASK|PORT_CMD_ICC_MASK));
+case PORT_LST_ADDR:
+pr->lst_addr = val;
+break;
+case PORT_LST_ADDR_HI:
+pr->lst_addr_hi = val;
+break;
+case PORT_FIS_ADDR:
+pr->fis_addr = val;
+break;
+case PORT_FIS_ADDR_HI:
+pr->fis_addr_hi = val;
+break;
+case PORT_IRQ_STAT:
+pr->irq_stat &= ~val;
+ahci_check_irq(s);
+break;
+case PORT_IRQ_MASK:
+pr->irq_mask = val & 0xfdc000ff;
+ahci_check_irq(s);
+break;
+case PORT_CMD:
+/* Block any Read-only fields from being set;
+ * including LIST_ON and FIS_ON.
+ * The spec requires to set ICC bits to zero after the ICC change
+ * is done. We don't support ICC state changes, therefore always
+ * force the ICC bits to zero.
+ */
+pr->cmd = (pr->cmd & PORT_CMD_RO_MASK) |
+(val & ~(PORT_CMD_RO_MASK|PORT_CMD_ICC_MASK));
 
-/* Check FIS RX and CLB engines */
-ahci_cond_start_engines(&s->dev[port]);
+/* Check FIS RX and CLB engines */
+ahci_cond_start_engines(&s->dev[port]);
 
-/* XXX usually the FIS would be pending on the bus here and
-   issuing deferred until the OS enables FIS receival.
-   Instead, we only submit it once - which works in most
-   cases, but is a hack. */
-if ((pr->cmd & PORT_CMD_FIS_ON) &&
-!s->dev[port].init_d2h_sent) {
-ahci_init_d2h(&s->dev[port]);
-}
+/* XXX usually the FIS would be pending on the bus here and
+   issuing deferred until the OS enables FIS receival.
+   Instead, we only submit it once - which works in most
+   cases, but is a hack. */
+if ((pr->cmd & PORT_CMD_FIS_ON) &&
+!s->dev[port].init_d2h_sent) {
+ahci_init_d2h(&s->dev[port]);
+}
 
-check_cmd(s, port);
-break;
-case PORT_TFDATA:
-/* Read Only. */
-break;
-case PORT_SIG:
-/* Read Only */
-break;
-case PORT_SCR_STAT:
-/* Read Only */
-break;
-case PORT_SCR_CTL:
-if (((pr->scr_ctl & AHCI_SCR_SCTL_DET) == 1) &&
-((val & AHCI_SCR_SCTL_DET) == 0)) {
-ahci_reset_port(s, port);
-}
-pr->scr_ctl = val;
-break;
-case PORT_SCR_ERR:
-pr->scr_err &= ~val;
-break;
-case PORT_SCR_ACT:
-/* RW1 */
-pr->scr_act |= val;
-break;
-case PORT_CMD_ISSUE:
-pr->cmd_issue |= val;
-check_cmd(s, port);
-break;
-default:
-break;
+check_cmd(s, port);
+break;
+case PORT_TFDATA:
+/* Read Only. */
+break;
+case PORT_SIG:
+/* Read Only */
+break;
+case PORT_SCR_STAT:
+/* Read Only */
+break;
+case PORT_SCR_CTL:
+if (((pr->scr_ctl & AHCI_SCR_SCTL_DET) == 1) &&
+((val & AHCI_SCR_SCTL_DET) == 0)) {
+ahci_reset_port(s, port);
+}
+pr->scr_ctl = val;
+break;
+case

[Qemu-devel] [PATCH v2 05/16] ahci: combine identical clauses in port write

2018-05-31 Thread John Snow
Signed-off-by: John Snow 
---
 hw/ide/ahci.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 92780990a3..476841df58 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -330,11 +330,7 @@ static void ahci_port_write(AHCIState *s, int port, int 
offset, uint32_t val)
 check_cmd(s, port);
 break;
 case PORT_TFDATA:
-/* Read Only. */
-break;
 case PORT_SIG:
-/* Read Only */
-break;
 case PORT_SCR_STAT:
 /* Read Only */
 break;
-- 
2.14.3




[Qemu-devel] [PATCH v2 08/16] ahci: delete old port register address definitions

2018-05-31 Thread John Snow
They're now unused.

Signed-off-by: John Snow 
---
 hw/ide/ahci_internal.h | 18 --
 1 file changed, 18 deletions(-)

diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h
index eb7e1eefc0..db00c9aa39 100644
--- a/hw/ide/ahci_internal.h
+++ b/hw/ide/ahci_internal.h
@@ -102,24 +102,6 @@ enum AHCIPortReg {
 AHCI_PORT_REG__COUNT  = 32
 };
 
-/* registers for each SATA port */
-#define PORT_LST_ADDR 0x00 /* command list DMA addr */
-#define PORT_LST_ADDR_HI  0x04 /* command list DMA addr hi */
-#define PORT_FIS_ADDR 0x08 /* FIS rx buf addr */
-#define PORT_FIS_ADDR_HI  0x0c /* FIS rx buf addr hi */
-#define PORT_IRQ_STAT 0x10 /* interrupt status */
-#define PORT_IRQ_MASK 0x14 /* interrupt enable/disable mask */
-#define PORT_CMD  0x18 /* port command */
-
-#define PORT_TFDATA   0x20 /* taskfile data */
-#define PORT_SIG  0x24 /* device TF signature */
-#define PORT_SCR_STAT 0x28 /* SATA phy register: SStatus */
-#define PORT_SCR_CTL  0x2c /* SATA phy register: SControl */
-#define PORT_SCR_ERR  0x30 /* SATA phy register: SError */
-#define PORT_SCR_ACT  0x34 /* SATA phy register: SActive */
-#define PORT_CMD_ISSUE0x38 /* command issue */
-#define PORT_RESERVED 0x3c /* reserved */
-
 /* Port interrupt bit descriptors */
 enum AHCIPortIRQ {
 AHCI_PORT_IRQ_BIT_DHRS = 0,
-- 
2.14.3




[Qemu-devel] [PATCH v2 06/16] ahci: modify ahci_port_write to use register numbers

2018-05-31 Thread John Snow
Signed-off-by: John Snow 
---
 hw/ide/ahci.c | 32 +---
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 476841df58..efecf849a9 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -282,30 +282,32 @@ static int ahci_cond_start_engines(AHCIDevice *ad)
 static void ahci_port_write(AHCIState *s, int port, int offset, uint32_t val)
 {
 AHCIPortRegs *pr = &s->dev[port].port_regs;
+enum AHCIPortReg regnum = offset / sizeof(uint32_t);
+assert(regnum < (AHCI_PORT_ADDR_OFFSET_LEN / sizeof(uint32_t)));
 
 trace_ahci_port_write(s, port, offset, val);
-switch (offset) {
-case PORT_LST_ADDR:
+switch (regnum) {
+case AHCI_PORT_REG_LST_ADDR:
 pr->lst_addr = val;
 break;
-case PORT_LST_ADDR_HI:
+case AHCI_PORT_REG_LST_ADDR_HI:
 pr->lst_addr_hi = val;
 break;
-case PORT_FIS_ADDR:
+case AHCI_PORT_REG_FIS_ADDR:
 pr->fis_addr = val;
 break;
-case PORT_FIS_ADDR_HI:
+case AHCI_PORT_REG_FIS_ADDR_HI:
 pr->fis_addr_hi = val;
 break;
-case PORT_IRQ_STAT:
+case AHCI_PORT_REG_IRQ_STAT:
 pr->irq_stat &= ~val;
 ahci_check_irq(s);
 break;
-case PORT_IRQ_MASK:
+case AHCI_PORT_REG_IRQ_MASK:
 pr->irq_mask = val & 0xfdc000ff;
 ahci_check_irq(s);
 break;
-case PORT_CMD:
+case AHCI_PORT_REG_CMD:
 /* Block any Read-only fields from being set;
  * including LIST_ON and FIS_ON.
  * The spec requires to set ICC bits to zero after the ICC change
@@ -329,26 +331,26 @@ static void ahci_port_write(AHCIState *s, int port, int 
offset, uint32_t val)
 
 check_cmd(s, port);
 break;
-case PORT_TFDATA:
-case PORT_SIG:
-case PORT_SCR_STAT:
+case AHCI_PORT_REG_TFDATA:
+case AHCI_PORT_REG_SIG:
+case AHCI_PORT_REG_SCR_STAT:
 /* Read Only */
 break;
-case PORT_SCR_CTL:
+case AHCI_PORT_REG_SCR_CTL:
 if (((pr->scr_ctl & AHCI_SCR_SCTL_DET) == 1) &&
 ((val & AHCI_SCR_SCTL_DET) == 0)) {
 ahci_reset_port(s, port);
 }
 pr->scr_ctl = val;
 break;
-case PORT_SCR_ERR:
+case AHCI_PORT_REG_SCR_ERR:
 pr->scr_err &= ~val;
 break;
-case PORT_SCR_ACT:
+case AHCI_PORT_REG_SCR_ACT:
 /* RW1 */
 pr->scr_act |= val;
 break;
-case PORT_CMD_ISSUE:
+case AHCI_PORT_REG_CMD_ISSUE:
 pr->cmd_issue |= val;
 check_cmd(s, port);
 break;
-- 
2.14.3




[Qemu-devel] [PATCH v2 07/16] ahci: make port write traces more descriptive

2018-05-31 Thread John Snow
Signed-off-by: John Snow 
---
 hw/ide/ahci.c   | 8 +++-
 hw/ide/trace-events | 3 ++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index efecf849a9..01463f0f51 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -27,6 +27,7 @@
 #include "hw/pci/pci.h"
 
 #include "qemu/error-report.h"
+#include "qemu/log.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/dma.h"
 #include "hw/ide/internal.h"
@@ -284,8 +285,8 @@ static void ahci_port_write(AHCIState *s, int port, int 
offset, uint32_t val)
 AHCIPortRegs *pr = &s->dev[port].port_regs;
 enum AHCIPortReg regnum = offset / sizeof(uint32_t);
 assert(regnum < (AHCI_PORT_ADDR_OFFSET_LEN / sizeof(uint32_t)));
+trace_ahci_port_write(s, port, AHCIPortReg_lookup[regnum], offset, val);
 
-trace_ahci_port_write(s, port, offset, val);
 switch (regnum) {
 case AHCI_PORT_REG_LST_ADDR:
 pr->lst_addr = val;
@@ -355,6 +356,11 @@ static void ahci_port_write(AHCIState *s, int port, int 
offset, uint32_t val)
 check_cmd(s, port);
 break;
 default:
+trace_ahci_port_write_unimpl(s, port, AHCIPortReg_lookup[regnum],
+ offset, val);
+qemu_log_mask(LOG_UNIMP, "Attempted write to unimplemented register: "
+  "AHCI port %d register %s, offset 0x%x: 0x%"PRIu32,
+  port, AHCIPortReg_lookup[regnum], offset, val);
 break;
 }
 }
diff --git a/hw/ide/trace-events b/hw/ide/trace-events
index 0db18d8271..1efbbb8114 100644
--- a/hw/ide/trace-events
+++ b/hw/ide/trace-events
@@ -69,7 +69,8 @@ ahci_irq_raise(void *s) "ahci(%p): raise irq"
 ahci_irq_lower(void *s) "ahci(%p): lower irq"
 ahci_check_irq(void *s, uint32_t old, uint32_t new) "ahci(%p): check irq 
0x%08x --> 0x%08x"
 ahci_trigger_irq(void *s, int port, const char *name, uint32_t val, uint32_t 
old, uint32_t new, uint32_t effective) "ahci(%p)[%d]: trigger irq +%s (0x%08x); 
irqstat: 0x%08x --> 0x%08x; effective: 0x%08x"
-ahci_port_write(void *s, int port, int offset, uint32_t val) "ahci(%p)[%d]: 
port write @ 0x%x: 0x%08x"
+ahci_port_write(void *s, int port, const char *reg, int offset, uint32_t val) 
"ahci(%p)[%d]: port write [reg:%s] @ 0x%x: 0x%08x"
+ahci_port_write_unimpl(void *s, int port, const char *reg, int offset, 
uint32_t val) "ahci(%p)[%d]: unimplemented port write [reg:%s] @ 0x%x: 0x%08x"
 ahci_mem_read_32(void *s, uint64_t addr, uint32_t val) "ahci(%p): mem read @ 
0x%"PRIx64": 0x%08x"
 ahci_mem_read(void *s, unsigned size, uint64_t addr, uint64_t val) "ahci(%p): 
read%u @ 0x%"PRIx64": 0x%016"PRIx64
 ahci_mem_write(void *s, unsigned size, uint64_t addr, uint64_t val) "ahci(%p): 
write%u @ 0x%"PRIx64": 0x%016"PRIx64
-- 
2.14.3




Re: [Qemu-devel] [qemu PATCH v4 0/4] support NFIT platform capabilities

2018-05-31 Thread Ross Zwisler
On Fri, May 25, 2018 at 08:51:22PM +0300, Michael S. Tsirkin wrote:
> On Mon, May 21, 2018 at 10:31:59AM -0600, Ross Zwisler wrote:
> > Changes since v3:
> >  * Updated the text in docs/nvdimm.txt to make it clear that the value
> >being passed in on the command line in an integer made up of various
> >bit fields. (Rob Elliott)
> >  
> >  * Updated the "Highest Valid Capability" byte to be dynamic based on
> >the highest valid bit in the user's input. (Rob Elliott)
> 
> Igor could you review pls? I think your comments have been addressed.

Ping?  Igor, can you pick this up?



Re: [Qemu-devel] [Qemu-block] [PATCH] block: Ignore generated job QAPI files

2018-05-31 Thread Jeff Cody
On Thu, May 31, 2018 at 04:24:35PM -0500, Eric Blake wrote:
> Commit bf42508f introduced new generated files; make sure they
> don't get accidentally committed from an in-tree build.
> 
> Signed-off-by: Eric Blake 

Reviewed-by: Jeff Cody 

> ---
>  .gitignore | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/.gitignore b/.gitignore
> index 81e1f2fb0f1..9da3b3e6267 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -36,6 +36,7 @@
>  /qapi/qapi-commands-common.[ch]
>  /qapi/qapi-commands-crypto.[ch]
>  /qapi/qapi-commands-introspect.[ch]
> +/qapi/qapi-commands-job.[ch]
>  /qapi/qapi-commands-migration.[ch]
>  /qapi/qapi-commands-misc.[ch]
>  /qapi/qapi-commands-net.[ch]
> @@ -53,6 +54,7 @@
>  /qapi/qapi-events-common.[ch]
>  /qapi/qapi-events-crypto.[ch]
>  /qapi/qapi-events-introspect.[ch]
> +/qapi/qapi-events-job.[ch]
>  /qapi/qapi-events-migration.[ch]
>  /qapi/qapi-events-misc.[ch]
>  /qapi/qapi-events-net.[ch]
> @@ -71,6 +73,7 @@
>  /qapi/qapi-types-common.[ch]
>  /qapi/qapi-types-crypto.[ch]
>  /qapi/qapi-types-introspect.[ch]
> +/qapi/qapi-types-job.[ch]
>  /qapi/qapi-types-migration.[ch]
>  /qapi/qapi-types-misc.[ch]
>  /qapi/qapi-types-net.[ch]
> @@ -88,6 +91,7 @@
>  /qapi/qapi-visit-common.[ch]
>  /qapi/qapi-visit-crypto.[ch]
>  /qapi/qapi-visit-introspect.[ch]
> +/qapi/qapi-visit-job.[ch]
>  /qapi/qapi-visit-migration.[ch]
>  /qapi/qapi-visit-misc.[ch]
>  /qapi/qapi-visit-net.[ch]
> -- 
> 2.14.3
> 
> 



Re: [Qemu-devel] [PATCH 2/3] pc-bios/s390-ccw/net: Add support for pxelinux-style config files

2018-05-31 Thread Farhan Ali




On 05/30/2018 05:16 AM, Thomas Huth wrote:

Since it is quite cumbersome to manually create a combined kernel with
initrd image for network booting, we now support loading via pxelinux
configuration files, too. In these files, the kernel, initrd and command
line parameters can be specified seperately, and the firmware then takes
care of glueing everything together in memory after the files have been
downloaded. See this URL for details about the config file layout:
https://www.syslinux.org/wiki/index.php?title=PXELINUX

The user can either specify a config file directly as bootfile via DHCP
(but in this case, the file has to start either with "default" or a "#"
comment so we can distinguish it from binary kernels), or a folder (i.e.
the bootfile name must end with "/") where the firmware should look for
the typical pxelinux.cfg file names, e.g. based on MAC or IP address.
We also support the pxelinux.cfg DHCP options 209 and 210 from RFC 5071.

Signed-off-by: Thomas Huth 
---
  pc-bios/s390-ccw/netboot.mak |  7 ++--
  pc-bios/s390-ccw/netmain.c   | 79 +++-
  2 files changed, 82 insertions(+), 4 deletions(-)

diff --git a/pc-bios/s390-ccw/netboot.mak b/pc-bios/s390-ccw/netboot.mak
index a73be36..8af0cfd 100644
--- a/pc-bios/s390-ccw/netboot.mak
+++ b/pc-bios/s390-ccw/netboot.mak
@@ -25,8 +25,9 @@ CTYPE_OBJS = isdigit.o isxdigit.o toupper.o
  %.o : $(SLOF_DIR)/lib/libc/ctype/%.c
$(call quiet-command,$(CC) $(LIBC_CFLAGS) -c -o $@ 
$<,"CC","$(TARGET_DIR)$@")
  
-STRING_OBJS = strcat.o strchr.o strcmp.o strcpy.o strlen.o strncmp.o strncpy.o \

- strstr.o memset.o memcpy.o memmove.o memcmp.o
+STRING_OBJS = strcat.o strchr.o strrchr.o strcpy.o strlen.o strncpy.o \
+ strcmp.o strncmp.o strcasecmp.o strncasecmp.o strstr.o \
+ memset.o memcpy.o memmove.o memcmp.o
  %.o : $(SLOF_DIR)/lib/libc/string/%.c
$(call quiet-command,$(CC) $(LIBC_CFLAGS) -c -o $@ 
$<,"CC","$(TARGET_DIR)$@")
  
@@ -50,7 +51,7 @@ libc.a: $(LIBCOBJS)

  # libnet files:
  
  LIBNETOBJS := args.o dhcp.o dns.o icmpv6.o ipv6.o tcp.o udp.o bootp.o \

- dhcpv6.o ethernet.o ipv4.o ndp.o tftp.o
+ dhcpv6.o ethernet.o ipv4.o ndp.o tftp.o pxelinux.o
  LIBNETCFLAGS := $(QEMU_CFLAGS) -DDHCPARCH=0x1F $(LIBC_INC) $(LIBNET_INC)
  
  %.o : $(SLOF_DIR)/lib/libnet/%.c

diff --git a/pc-bios/s390-ccw/netmain.c b/pc-bios/s390-ccw/netmain.c
index 7533cf7..e84bb2b 100644
--- a/pc-bios/s390-ccw/netmain.c
+++ b/pc-bios/s390-ccw/netmain.c
@@ -30,6 +30,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #include "s390-ccw.h"

  #include "virtio.h"
@@ -41,12 +42,14 @@ extern char _start[];
  
  #define KERNEL_ADDR ((void *)0L)

  #define KERNEL_MAX_SIZE ((long)_start)
+#define ARCH_COMMAND_LINE_SIZE  896  /* Taken from Linux kernel */
  
  char stack[PAGE_SIZE * 8] __attribute__((aligned(PAGE_SIZE)));

  IplParameterBlock iplb __attribute__((aligned(PAGE_SIZE)));
  static char cfgbuf[2048];
  
  static SubChannelId net_schid = { .one = 1 };

+static uint8_t mac[6];
  static uint64_t dest_timer;
  
  static uint64_t get_timer_ms(void)

@@ -158,7 +161,6 @@ static int tftp_load(filename_ip_t *fnip, void *buffer, int 
len)
  
  static int net_init(filename_ip_t *fn_ip)

  {
-uint8_t mac[6];
  int rc;
  
  memset(fn_ip, 0, sizeof(filename_ip_t));

@@ -233,6 +235,66 @@ static void net_release(filename_ip_t *fn_ip)
  }
  
  /**

+ * Load a kernel with initrd (i.e. with the information that we've got from
+ * a pxelinux.cfg config file)
+ */
+static int load_kernel_with_initrd(filename_ip_t *fn_ip,
+   struct pl_cfg_entry *entry)
+{
+int rc;
+
+printf("Loading pxelinux.cfg entry '%s'\n", entry->label);
+
+if (!entry->kernel) {
+printf("Kernel entry is missing!\n");
+return -1;
+}
+
+strncpy(fn_ip->filename, entry->kernel, sizeof(fn_ip->filename));
+rc = tftp_load(fn_ip, KERNEL_ADDR, KERNEL_MAX_SIZE);
+if (rc < 0) {
+return rc;
+}
+
+if (entry->initrd) {
+uint64_t iaddr = (rc + 0xfff) & ~0xfffUL;
+
+strncpy(fn_ip->filename, entry->initrd, sizeof(fn_ip->filename));
+rc = tftp_load(fn_ip, (void *)iaddr, KERNEL_MAX_SIZE - iaddr);
+if (rc < 0) {
+return rc;
+}
+/* Patch location and size: */
+*(uint64_t *)0x10408 = iaddr;
+*(uint64_t *)0x10410 = rc;
+rc += iaddr;
+}
+
+if (entry->append) {
+strncpy((char *)0x10480, entry->append, ARCH_COMMAND_LINE_SIZE);
+}
+
+return rc;
+}
+
+#define MAX_PXELINUX_ENTRIES 16
+
+static int net_try_pxelinux_cfg(filename_ip_t *fn_ip)
+{
+struct pl_cfg_entry entries[MAX_PXELINUX_ENTRIES];
+int num_ent, def_ent = 0;
+
+num_ent = pxelinux_load_parse_cfg(fn_ip, mac, NULL, DEFAULT_TFTP_RETRIES,
+  cfgbuf, sizeof(cfgbuf),
+   

[Qemu-devel] [PATCH] block: Ignore generated job QAPI files

2018-05-31 Thread Eric Blake
Commit bf42508f introduced new generated files; make sure they
don't get accidentally committed from an in-tree build.

Signed-off-by: Eric Blake 
---
 .gitignore | 4 
 1 file changed, 4 insertions(+)

diff --git a/.gitignore b/.gitignore
index 81e1f2fb0f1..9da3b3e6267 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,6 +36,7 @@
 /qapi/qapi-commands-common.[ch]
 /qapi/qapi-commands-crypto.[ch]
 /qapi/qapi-commands-introspect.[ch]
+/qapi/qapi-commands-job.[ch]
 /qapi/qapi-commands-migration.[ch]
 /qapi/qapi-commands-misc.[ch]
 /qapi/qapi-commands-net.[ch]
@@ -53,6 +54,7 @@
 /qapi/qapi-events-common.[ch]
 /qapi/qapi-events-crypto.[ch]
 /qapi/qapi-events-introspect.[ch]
+/qapi/qapi-events-job.[ch]
 /qapi/qapi-events-migration.[ch]
 /qapi/qapi-events-misc.[ch]
 /qapi/qapi-events-net.[ch]
@@ -71,6 +73,7 @@
 /qapi/qapi-types-common.[ch]
 /qapi/qapi-types-crypto.[ch]
 /qapi/qapi-types-introspect.[ch]
+/qapi/qapi-types-job.[ch]
 /qapi/qapi-types-migration.[ch]
 /qapi/qapi-types-misc.[ch]
 /qapi/qapi-types-net.[ch]
@@ -88,6 +91,7 @@
 /qapi/qapi-visit-common.[ch]
 /qapi/qapi-visit-crypto.[ch]
 /qapi/qapi-visit-introspect.[ch]
+/qapi/qapi-visit-job.[ch]
 /qapi/qapi-visit-migration.[ch]
 /qapi/qapi-visit-misc.[ch]
 /qapi/qapi-visit-net.[ch]
-- 
2.14.3




[Qemu-devel] [PATCH 1/5] block: Add blklogwrites

2018-05-31 Thread Ari Sundholm
From: Aapo Vienamo 

Implements a block device write logging system, similar to Linux kernel
device mapper dm-log-writes. The write operations that are performed
on a block device are logged to a file or another block device. The
write log format is identical to the dm-log-writes format. Currently,
log markers are not supported.

This functionality can be used for fail-safe and fs consistency
testing. By implementing it in qemu, tests utilizing write logs can be
be used to test non-Linux drivers and older kernels.

The implementation is based on the blkverify and blkdebug block drivers.

Signed-off-by: Aapo Vienamo 
Signed-off-by: Ari Sundholm 
---
 block.c   |   6 -
 block/Makefile.objs   |   1 +
 block/blklogwrites.c  | 441 ++
 include/block/block.h |   7 +
 4 files changed, 449 insertions(+), 6 deletions(-)
 create mode 100644 block/blklogwrites.c

diff --git a/block.c b/block.c
index 501b64c..c8cffe1 100644
--- a/block.c
+++ b/block.c
@@ -1914,12 +1914,6 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, 
uint64_t shared,
 return 0;
 }
 
-#define DEFAULT_PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \
- | BLK_PERM_WRITE \
- | BLK_PERM_WRITE_UNCHANGED \
- | BLK_PERM_RESIZE)
-#define DEFAULT_PERM_UNCHANGED (BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH)
-
 void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
const BdrvChildRole *role,
BlockReopenQueue *reopen_queue,
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 899bfb5..c8337bf 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -5,6 +5,7 @@ block-obj-y += qed-check.o
 block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o
 block-obj-y += quorum.o
 block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o
+block-obj-y += blklogwrites.o
 block-obj-y += block-backend.o snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o
 block-obj-$(CONFIG_POSIX) += file-posix.o
diff --git a/block/blklogwrites.c b/block/blklogwrites.c
new file mode 100644
index 000..78835bf
--- /dev/null
+++ b/block/blklogwrites.c
@@ -0,0 +1,441 @@
+/*
+ * Write logging blk driver based on blkverify and blkdebug.
+ *
+ * Copyright (c) 2017 Tuomas Tynkkynen 
+ * Copyright (c) 2018 Aapo Vienamo 
+ * Copyright (c) 2018 Ari Sundholm 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/sockets.h" /* for EINPROGRESS on Windows */
+#include "block/block_int.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"
+#include "qemu/option.h"
+
+/* Disk format stuff - taken from Linux drivers/md/dm-log-writes.c */
+
+#define LOG_FLUSH_FLAG (1 << 0)
+#define LOG_FUA_FLAG (1 << 1)
+#define LOG_DISCARD_FLAG (1 << 2)
+#define LOG_MARK_FLAG (1 << 3)
+
+#define WRITE_LOG_VERSION 1ULL
+#define WRITE_LOG_MAGIC 0x6a736677736872ULL
+
+/* All fields are little-endian. */
+struct log_write_super {
+uint64_t magic;
+uint64_t version;
+uint64_t nr_entries;
+uint32_t sectorsize;
+};
+
+struct log_write_entry {
+uint64_t sector;
+uint64_t nr_sectors;
+uint64_t flags;
+uint64_t data_len;
+};
+
+/* End of disk format structures. */
+
+typedef struct {
+BdrvChild *log_file;
+uint64_t cur_log_sector;
+uint64_t nr_entries;
+} BDRVBlkLogWritesState;
+
+/* Valid blk_log_writes filenames look like:
+ *  blk_log_writes:path/to/raw_image:path/to/logfile */
+static void blk_log_writes_parse_filename(const char *filename, QDict *options,
+  Error **errp)
+{
+const char *c;
+QString *raw_path;
+
+/* Parse the blk_log_writes: prefix */
+if (!strstart(filename, "blk_log_writes:", &filename)) {
+/* There was no prefix; therefore, all options have to be already
+ * present in the QDict (except for the filename) */
+qdict_put(options, "x-log", qstring_from_str(filename));
+return;
+}
+
+/* Parse the raw image filename */
+c = strchr(filename, ':');
+if (c == NULL) {
+error_setg(errp,
+   "blk_log_writes requires paths to both image and log");
+return;
+}
+
+raw_path = qstring_from_substr(filename, 0, c - filename - 1);
+qdict_put(options, "x-raw", raw_path);
+
+filename = c + 1;
+qdict_put(options, "x-log", qstring_from_str(filename));
+}
+
+static QemuOptsList runtime_opts = {
+.name = "blk_log_writes",
+.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+.desc = {
+{
+.name = "x-raw",
+.type = QEMU_OPT_STRING,
+.help = "[internal use only, will be removed]",
+},
+{
+.name = "x-log",
+

  1   2   3   4   >