[Qemu-devel] [PATCH v8 10/11] block-backend: Add blk_co_copy_range
It's a BlockBackend wrapper of the BDS interface. Signed-off-by: Fam Zheng Reviewed-by: Stefan Hajnoczi --- block/block-backend.c | 18 ++ include/sysemu/block-backend.h | 4 2 files changed, 22 insertions(+) diff --git a/block/block-backend.c b/block/block-backend.c index 89f47b00ea..d55c328736 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -2211,3 +2211,21 @@ void blk_unregister_buf(BlockBackend *blk, void *host) { bdrv_unregister_buf(blk_bs(blk), host); } + +int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, + BlockBackend *blk_out, int64_t off_out, + int bytes, BdrvRequestFlags flags) +{ +int r; +r = blk_check_byte_request(blk_in, off_in, bytes); +if (r) { +return r; +} +r = blk_check_byte_request(blk_out, off_out, bytes); +if (r) { +return r; +} +return bdrv_co_copy_range(blk_in->root, off_in, + blk_out->root, off_out, + bytes, flags); +} diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 92ab624fac..8d03d493c2 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -232,4 +232,8 @@ void blk_set_force_allow_inactivate(BlockBackend *blk); void blk_register_buf(BlockBackend *blk, void *host, size_t size); void blk_unregister_buf(BlockBackend *blk, void *host); +int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, + BlockBackend *blk_out, int64_t off_out, + int bytes, BdrvRequestFlags flags); + #endif -- 2.17.0
[Qemu-devel] [PATCH v8 11/11] qemu-img: Convert with copy offloading
The new blk_co_copy_range interface offers a more efficient way in the case of network based storage. Make use of it to allow faster convert operation. Since copy offloading cannot do zero detection ('-S') and compression (-c), only try it when these options are not used. Signed-off-by: Fam Zheng Reviewed-by: Stefan Hajnoczi --- qemu-img.c | 50 -- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index 976b437da0..75f1610aa0 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -1547,6 +1547,7 @@ typedef struct ImgConvertState { bool compressed; bool target_has_backing; bool wr_in_order; +bool copy_range; int min_sparse; size_t cluster_sectors; size_t buf_sectors; @@ -1740,6 +1741,37 @@ static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num, return 0; } +static int coroutine_fn convert_co_copy_range(ImgConvertState *s, int64_t sector_num, + int nb_sectors) +{ +int n, ret; + +while (nb_sectors > 0) { +BlockBackend *blk; +int src_cur; +int64_t bs_sectors, src_cur_offset; +int64_t offset; + +convert_select_part(s, sector_num, &src_cur, &src_cur_offset); +offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS; +blk = s->src[src_cur]; +bs_sectors = s->src_sectors[src_cur]; + +n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset)); + +ret = blk_co_copy_range(blk, offset, s->target, +sector_num << BDRV_SECTOR_BITS, +n << BDRV_SECTOR_BITS, 0); +if (ret < 0) { +return ret; +} + +sector_num += n; +nb_sectors -= n; +} +return 0; +} + static void coroutine_fn convert_co_do_copy(void *opaque) { ImgConvertState *s = opaque; @@ -1762,6 +1794,7 @@ static void coroutine_fn convert_co_do_copy(void *opaque) int n; int64_t sector_num; enum ImgConvertBlockStatus status; +bool copy_range; qemu_co_mutex_lock(&s->lock); if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) { @@ -1791,7 +1824,9 @@ static void coroutine_fn convert_co_do_copy(void *opaque) s->allocated_sectors, 0); } -if (status == BLK_DATA) { +retry: +copy_range = s->copy_range && s->status == BLK_DATA; +if (status == BLK_DATA && !copy_range) { ret = convert_co_read(s, sector_num, n, buf); if (ret < 0) { error_report("error while reading sector %" PRId64 @@ -1813,7 +1848,15 @@ static void coroutine_fn convert_co_do_copy(void *opaque) } if (s->ret == -EINPROGRESS) { -ret = convert_co_write(s, sector_num, n, buf, status); +if (copy_range) { +ret = convert_co_copy_range(s, sector_num, n); +if (ret) { +s->copy_range = false; +goto retry; +} +} else { +ret = convert_co_write(s, sector_num, n, buf, status); +} if (ret < 0) { error_report("error while writing sector %" PRId64 ": %s", sector_num, strerror(-ret)); @@ -1936,6 +1979,7 @@ static int img_convert(int argc, char **argv) ImgConvertState s = (ImgConvertState) { /* Need at least 4k of zeros for sparse detection */ .min_sparse = 8, +.copy_range = true, .buf_sectors= IO_BUF_SIZE / BDRV_SECTOR_SIZE, .wr_in_order= true, .num_coroutines = 8, @@ -1976,6 +2020,7 @@ static int img_convert(int argc, char **argv) break; case 'c': s.compressed = true; +s.copy_range = false; break; case 'o': if (!is_valid_option_list(optarg)) { @@ -2017,6 +2062,7 @@ static int img_convert(int argc, char **argv) } s.min_sparse = sval / BDRV_SECTOR_SIZE; +s.copy_range = false; break; } case 'p': -- 2.17.0
[Qemu-devel] [PATCH v8 04/11] raw: Implement copy offloading
Just pass down to ->file. Signed-off-by: Fam Zheng Reviewed-by: Stefan Hajnoczi --- block/raw-format.c | 32 1 file changed, 32 insertions(+) diff --git a/block/raw-format.c b/block/raw-format.c index b69a0674b3..f2e468df6f 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -497,6 +497,36 @@ static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo) return bdrv_probe_geometry(bs->file->bs, geo); } +static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs, + BdrvChild *src, uint64_t src_offset, + BdrvChild *dst, uint64_t dst_offset, + uint64_t bytes, BdrvRequestFlags flags) +{ +int ret; + +ret = raw_adjust_offset(bs, &src_offset, bytes, false); +if (ret) { +return ret; +} +return bdrv_co_copy_range_from(bs->file, src_offset, dst, dst_offset, + bytes, flags); +} + +static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs, + BdrvChild *src, uint64_t src_offset, + BdrvChild *dst, uint64_t dst_offset, + uint64_t bytes, BdrvRequestFlags flags) +{ +int ret; + +ret = raw_adjust_offset(bs, &dst_offset, bytes, true); +if (ret) { +return ret; +} +return bdrv_co_copy_range_to(src, src_offset, bs->file, dst_offset, bytes, + flags); +} + BlockDriver bdrv_raw = { .format_name = "raw", .instance_size= sizeof(BDRVRawState), @@ -513,6 +543,8 @@ BlockDriver bdrv_raw = { .bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes, .bdrv_co_pdiscard = &raw_co_pdiscard, .bdrv_co_block_status = &raw_co_block_status, +.bdrv_co_copy_range_from = &raw_co_copy_range_from, +.bdrv_co_copy_range_to = &raw_co_copy_range_to, .bdrv_truncate= &raw_truncate, .bdrv_getlength = &raw_getlength, .has_variable_length = true, -- 2.17.0
[Qemu-devel] [PATCH v8 06/11] file-posix: Implement bdrv_co_copy_range
With copy_file_range(2), we can implement the bdrv_co_copy_range semantics. Signed-off-by: Fam Zheng --- block/file-posix.c | 98 +++-- configure | 17 +++ include/block/raw-aio.h | 10 - 3 files changed, 120 insertions(+), 5 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index 5a602cfe37..513d371bb1 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -59,6 +59,7 @@ #ifdef __linux__ #include #include +#include #include #include #include @@ -187,6 +188,8 @@ typedef struct RawPosixAIOData { #define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */ off_t aio_offset; int aio_type; +int aio_fd2; +off_t aio_offset2; } RawPosixAIOData; #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) @@ -1446,6 +1449,49 @@ static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb) return -ENOTSUP; } +#ifndef HAVE_COPY_FILE_RANGE +static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd, + off_t *out_off, size_t len, unsigned int flags) +{ +#ifdef __NR_copy_file_range +return syscall(__NR_copy_file_range, in_fd, in_off, out_fd, + out_off, len, flags); +#else +errno = ENOSYS; +return -1; +#endif +} +#endif + +static ssize_t handle_aiocb_copy_range(RawPosixAIOData *aiocb) +{ +uint64_t bytes = aiocb->aio_nbytes; +off_t in_off = aiocb->aio_offset; +off_t out_off = aiocb->aio_offset2; + +while (bytes) { +ssize_t ret = copy_file_range(aiocb->aio_fildes, &in_off, + aiocb->aio_fd2, &out_off, + bytes, 0); +if (ret == -EINTR) { +continue; +} +if (ret < 0) { +if (errno == ENOSYS) { +return -ENOTSUP; +} else { +return -errno; +} +} +if (!ret) { +/* No progress (e.g. when beyond EOF), fall back to buffer I/O. */ +return -ENOTSUP; +} +bytes -= ret; +} +return 0; +} + static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb) { int ret = -EOPNOTSUPP; @@ -1526,6 +1572,9 @@ static int aio_worker(void *arg) case QEMU_AIO_WRITE_ZEROES: ret = handle_aiocb_write_zeroes(aiocb); break; +case QEMU_AIO_COPY_RANGE: +ret = handle_aiocb_copy_range(aiocb); +break; default: fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type); ret = -EINVAL; @@ -1536,9 +1585,10 @@ static int aio_worker(void *arg) return ret; } -static int paio_submit_co(BlockDriverState *bs, int fd, - int64_t offset, QEMUIOVector *qiov, - int bytes, int type) +static int paio_submit_co_full(BlockDriverState *bs, int fd, + int64_t offset, int fd2, int64_t offset2, + QEMUIOVector *qiov, + int bytes, int type) { RawPosixAIOData *acb = g_new(RawPosixAIOData, 1); ThreadPool *pool; @@ -1546,6 +1596,8 @@ static int paio_submit_co(BlockDriverState *bs, int fd, acb->bs = bs; acb->aio_type = type; acb->aio_fildes = fd; +acb->aio_fd2 = fd2; +acb->aio_offset2 = offset2; acb->aio_nbytes = bytes; acb->aio_offset = offset; @@ -1561,6 +1613,13 @@ static int paio_submit_co(BlockDriverState *bs, int fd, return thread_pool_submit_co(pool, aio_worker, acb); } +static inline int paio_submit_co(BlockDriverState *bs, int fd, + int64_t offset, QEMUIOVector *qiov, + int bytes, int type) +{ +return paio_submit_co_full(bs, fd, offset, -1, 0, qiov, bytes, type); +} + static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd, int64_t offset, QEMUIOVector *qiov, int bytes, BlockCompletionFunc *cb, void *opaque, int type) @@ -2451,6 +2510,35 @@ static void raw_abort_perm_update(BlockDriverState *bs) raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL); } +static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs, + BdrvChild *src, uint64_t src_offset, + BdrvChild *dst, uint64_t dst_offset, + uint64_t bytes, BdrvRequestFlags flags) +{ +return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, flags); +} + +static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs, + BdrvChild *src, uint64_t src_offset, + BdrvChild *dst, uint64_t dst_offset, + uint64_t bytes, BdrvRequestFlags flags) +{ +BDRVRawState *s = bs->opaque; +BDRVRawState *src_s; + +assert(dst->bs =
[Qemu-devel] [PATCH v8 03/11] raw: Check byte range uniformly
We don't verify the request range against s->size in the I/O callbacks except for raw_co_pwritev. This is inconsistent (especially for raw_co_pwrite_zeroes and raw_co_pdiscard), so fix them, in the meanwhile make the helper reusable by the coming new callbacks. Note that in most cases the block layer already verifies the request byte range against our reported image length, before invoking the driver callbacks. The exception is during image creating, after blk_set_allow_write_beyond_eof(blk, true) is called. But in that case, the requests are not directly from the user or guest. So there is no visible behavior change in adding the check code. The int64_t -> uint64_t inconsistency, as shown by the type casting, is pre-existing due to the interface. Reviewed-by: Stefan Hajnoczi Reviewed-by: Eric Blake Signed-off-by: Fam Zheng --- block/raw-format.c | 64 -- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/block/raw-format.c b/block/raw-format.c index fe33693a2d..b69a0674b3 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -167,16 +167,37 @@ static void raw_reopen_abort(BDRVReopenState *state) state->opaque = NULL; } +/* Check and adjust the offset, against 'offset' and 'size' options. */ +static inline int raw_adjust_offset(BlockDriverState *bs, uint64_t *offset, +uint64_t bytes, bool is_write) +{ +BDRVRawState *s = bs->opaque; + +if (s->has_size && (*offset > s->size || bytes > (s->size - *offset))) { +/* There's not enough space for the write, or the read request is + * out-of-range. Don't read/write anything to prevent leaking out of + * the size specified in options. */ +return is_write ? -ENOSPC : -EINVAL;; +} + +if (*offset > INT64_MAX - s->offset) { +return -EINVAL; +} +*offset += s->offset; + +return 0; +} + static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { -BDRVRawState *s = bs->opaque; +int ret; -if (offset > UINT64_MAX - s->offset) { -return -EINVAL; +ret = raw_adjust_offset(bs, &offset, bytes, false); +if (ret) { +return ret; } -offset += s->offset; BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); @@ -186,23 +207,11 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { -BDRVRawState *s = bs->opaque; void *buf = NULL; BlockDriver *drv; QEMUIOVector local_qiov; int ret; -if (s->has_size && (offset > s->size || bytes > (s->size - offset))) { -/* There's not enough space for the data. Don't write anything and just - * fail to prevent leaking out of the size specified in options. */ -return -ENOSPC; -} - -if (offset > UINT64_MAX - s->offset) { -ret = -EINVAL; -goto fail; -} - if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) { /* Handling partial writes would be a pain - so we just * require that guests have 512-byte request alignment if @@ -237,7 +246,10 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset, qiov = &local_qiov; } -offset += s->offset; +ret = raw_adjust_offset(bs, &offset, bytes, true); +if (ret) { +goto fail; +} BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); @@ -267,22 +279,24 @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, BdrvRequestFlags flags) { -BDRVRawState *s = bs->opaque; -if (offset > UINT64_MAX - s->offset) { -return -EINVAL; +int ret; + +ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true); +if (ret) { +return ret; } -offset += s->offset; return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); } static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes) { -BDRVRawState *s = bs->opaque; -if (offset > UINT64_MAX - s->offset) { -return -EINVAL; +int ret; + +ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true); +if (ret) { +return ret; } -offset += s->offset; return bdrv_co_pdiscard(bs->file->bs, offset, bytes); } -- 2.17.0
[Qemu-devel] [PATCH v8 08/11] iscsi: Create and use iscsi_co_wait_for_task
This loop is repeated a growing number times. Make a helper. Signed-off-by: Fam Zheng Reviewed-by: Stefan Hajnoczi Reviewed-by: Eric Blake --- block/iscsi.c | 54 --- 1 file changed, 17 insertions(+), 37 deletions(-) diff --git a/block/iscsi.c b/block/iscsi.c index 6d0035d4b9..6a365cb07b 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -556,6 +556,17 @@ static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun, offset / iscsilun->cluster_size) == size); } +static void coroutine_fn iscsi_co_wait_for_task(IscsiTask *iTask, +IscsiLun *iscsilun) +{ +while (!iTask->complete) { +iscsi_set_events(iscsilun); +qemu_mutex_unlock(&iscsilun->mutex); +qemu_coroutine_yield(); +qemu_mutex_lock(&iscsilun->mutex); +} +} + static int coroutine_fn iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov, int flags) @@ -617,12 +628,7 @@ retry: scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov); #endif -while (!iTask.complete) { -iscsi_set_events(iscsilun); -qemu_mutex_unlock(&iscsilun->mutex); -qemu_coroutine_yield(); -qemu_mutex_lock(&iscsilun->mutex); -} +iscsi_co_wait_for_task(&iTask, iscsilun); if (iTask.task != NULL) { scsi_free_scsi_task(iTask.task); @@ -693,13 +699,7 @@ retry: ret = -ENOMEM; goto out_unlock; } - -while (!iTask.complete) { -iscsi_set_events(iscsilun); -qemu_mutex_unlock(&iscsilun->mutex); -qemu_coroutine_yield(); -qemu_mutex_lock(&iscsilun->mutex); -} +iscsi_co_wait_for_task(&iTask, iscsilun); if (iTask.do_retry) { if (iTask.task != NULL) { @@ -863,13 +863,8 @@ retry: #if LIBISCSI_API_VERSION < (20160603) scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov); #endif -while (!iTask.complete) { -iscsi_set_events(iscsilun); -qemu_mutex_unlock(&iscsilun->mutex); -qemu_coroutine_yield(); -qemu_mutex_lock(&iscsilun->mutex); -} +iscsi_co_wait_for_task(&iTask, iscsilun); if (iTask.task != NULL) { scsi_free_scsi_task(iTask.task); iTask.task = NULL; @@ -906,12 +901,7 @@ retry: return -ENOMEM; } -while (!iTask.complete) { -iscsi_set_events(iscsilun); -qemu_mutex_unlock(&iscsilun->mutex); -qemu_coroutine_yield(); -qemu_mutex_lock(&iscsilun->mutex); -} +iscsi_co_wait_for_task(&iTask, iscsilun); if (iTask.task != NULL) { scsi_free_scsi_task(iTask.task); @@ -1143,12 +1133,7 @@ retry: goto out_unlock; } -while (!iTask.complete) { -iscsi_set_events(iscsilun); -qemu_mutex_unlock(&iscsilun->mutex); -qemu_coroutine_yield(); -qemu_mutex_lock(&iscsilun->mutex); -} +iscsi_co_wait_for_task(&iTask, iscsilun); if (iTask.task != NULL) { scsi_free_scsi_task(iTask.task); @@ -1244,12 +1229,7 @@ retry: return -ENOMEM; } -while (!iTask.complete) { -iscsi_set_events(iscsilun); -qemu_mutex_unlock(&iscsilun->mutex); -qemu_coroutine_yield(); -qemu_mutex_lock(&iscsilun->mutex); -} +iscsi_co_wait_for_task(&iTask, iscsilun); if (iTask.status == SCSI_STATUS_CHECK_CONDITION && iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST && -- 2.17.0
[Qemu-devel] [PATCH v8 05/11] qcow2: Implement copy offloading
The two callbacks are implemented quite similarly to the read/write functions: bdrv_co_copy_range_from maps for read and calls into bs->file or bs->backing depending on the allocation status; bdrv_co_copy_range_to maps for write and calls into bs->file. Reviewed-by: Stefan Hajnoczi Signed-off-by: Fam Zheng --- block/qcow2.c | 229 +++--- 1 file changed, 199 insertions(+), 30 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 6d532470a8..8f89c4fe72 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1762,6 +1762,39 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, return status; } +static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs, +QCowL2Meta **pl2meta, +bool link_l2) +{ +int ret = 0; +QCowL2Meta *l2meta = *pl2meta; + +while (l2meta != NULL) { +QCowL2Meta *next; + +if (!ret && link_l2) { +ret = qcow2_alloc_cluster_link_l2(bs, l2meta); +if (ret) { +goto out; +} +} + +/* Take the request off the list of running requests */ +if (l2meta->nb_clusters != 0) { +QLIST_REMOVE(l2meta, next_in_flight); +} + +qemu_co_queue_restart_all(&l2meta->dependent_requests); + +next = l2meta->next; +g_free(l2meta); +l2meta = next; +} +out: +*pl2meta = l2meta; +return ret; +} + static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) @@ -2048,24 +2081,9 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, } } -while (l2meta != NULL) { -QCowL2Meta *next; - -ret = qcow2_alloc_cluster_link_l2(bs, l2meta); -if (ret < 0) { -goto fail; -} - -/* Take the request off the list of running requests */ -if (l2meta->nb_clusters != 0) { -QLIST_REMOVE(l2meta, next_in_flight); -} - -qemu_co_queue_restart_all(&l2meta->dependent_requests); - -next = l2meta->next; -g_free(l2meta); -l2meta = next; +ret = qcow2_handle_l2meta(bs, &l2meta, true); +if (ret) { +goto fail; } bytes -= cur_bytes; @@ -2076,18 +2094,7 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, ret = 0; fail: -while (l2meta != NULL) { -QCowL2Meta *next; - -if (l2meta->nb_clusters != 0) { -QLIST_REMOVE(l2meta, next_in_flight); -} -qemu_co_queue_restart_all(&l2meta->dependent_requests); - -next = l2meta->next; -g_free(l2meta); -l2meta = next; -} +qcow2_handle_l2meta(bs, &l2meta, false); qemu_co_mutex_unlock(&s->lock); @@ -3274,6 +3281,166 @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs, return ret; } +static int coroutine_fn +qcow2_co_copy_range_from(BlockDriverState *bs, + BdrvChild *src, uint64_t src_offset, + BdrvChild *dst, uint64_t dst_offset, + uint64_t bytes, BdrvRequestFlags flags) +{ +BDRVQcow2State *s = bs->opaque; +int ret; +unsigned int cur_bytes; /* number of bytes in current iteration */ +BdrvChild *child = NULL; +BdrvRequestFlags cur_flags; + +assert(!bs->encrypted); +qemu_co_mutex_lock(&s->lock); + +while (bytes != 0) { +uint64_t copy_offset = 0; +/* prepare next request */ +cur_bytes = MIN(bytes, INT_MAX); +cur_flags = flags; + +ret = qcow2_get_cluster_offset(bs, src_offset, &cur_bytes, ©_offset); +if (ret < 0) { +goto out; +} + +switch (ret) { +case QCOW2_CLUSTER_UNALLOCATED: +if (bs->backing && bs->backing->bs) { +int64_t backing_length = bdrv_getlength(bs->backing->bs); +if (src_offset >= backing_length) { +cur_flags |= BDRV_REQ_ZERO_WRITE; +} else { +child = bs->backing; +cur_bytes = MIN(cur_bytes, backing_length - src_offset); +copy_offset = src_offset; +} +} else { +cur_flags |= BDRV_REQ_ZERO_WRITE; +} +break; + +case QCOW2_CLUSTER_ZERO_PLAIN: +case QCOW2_CLUSTER_ZERO_ALLOC: +cur_flags |= BDRV_REQ_ZERO_WRITE; +break; + +case QCOW2_CLUSTER_COMPRESSED: +ret = -ENOTSUP; +goto out; +break; + +case QCOW2_CLUSTER_NORMAL: +child = bs->file; +c
[Qemu-devel] [PATCH v8 02/11] block: Introduce API for copy offloading
Introduce the bdrv_co_copy_range() API for copy offloading. Block drivers implementing this API support efficient copy operations that avoid reading each block from the source device and writing it to the destination devices. Examples of copy offload primitives are SCSI EXTENDED COPY and Linux copy_file_range(2). Signed-off-by: Fam Zheng Reviewed-by: Stefan Hajnoczi --- block/io.c| 97 +++ include/block/block.h | 32 + include/block/block_int.h | 38 +++ 3 files changed, 167 insertions(+) diff --git a/block/io.c b/block/io.c index ca96b487eb..b7beaeeb9f 100644 --- a/block/io.c +++ b/block/io.c @@ -2835,3 +2835,100 @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host) bdrv_unregister_buf(child->bs, host); } } + +static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src, +uint64_t src_offset, +BdrvChild *dst, +uint64_t dst_offset, +uint64_t bytes, +BdrvRequestFlags flags, +bool recurse_src) +{ +int ret; + +if (!src || !dst || !src->bs || !dst->bs) { +return -ENOMEDIUM; +} +ret = bdrv_check_byte_request(src->bs, src_offset, bytes); +if (ret) { +return ret; +} + +ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes); +if (ret) { +return ret; +} +if (flags & BDRV_REQ_ZERO_WRITE) { +return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, flags); +} + +if (!src->bs->drv->bdrv_co_copy_range_from +|| !dst->bs->drv->bdrv_co_copy_range_to +|| src->bs->encrypted || dst->bs->encrypted) { +return -ENOTSUP; +} +if (recurse_src) { +return src->bs->drv->bdrv_co_copy_range_from(src->bs, + src, src_offset, + dst, dst_offset, + bytes, flags); +} else { +return dst->bs->drv->bdrv_co_copy_range_to(dst->bs, + src, src_offset, + dst, dst_offset, + bytes, flags); +} +} + +/* Copy range from @src to @dst. + * + * See the comment of bdrv_co_copy_range for the parameter and return value + * semantics. */ +int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset, + BdrvChild *dst, uint64_t dst_offset, + uint64_t bytes, BdrvRequestFlags flags) +{ +return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset, + bytes, flags, true); +} + +/* Copy range from @src to @dst. + * + * See the comment of bdrv_co_copy_range for the parameter and return value + * semantics. */ +int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset, + BdrvChild *dst, uint64_t dst_offset, + uint64_t bytes, BdrvRequestFlags flags) +{ +return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset, + bytes, flags, false); +} + +int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset, +BdrvChild *dst, uint64_t dst_offset, +uint64_t bytes, BdrvRequestFlags flags) +{ +BdrvTrackedRequest src_req, dst_req; +BlockDriverState *src_bs = src->bs; +BlockDriverState *dst_bs = dst->bs; +int ret; + +bdrv_inc_in_flight(src_bs); +bdrv_inc_in_flight(dst_bs); +tracked_request_begin(&src_req, src_bs, src_offset, + bytes, BDRV_TRACKED_READ); +tracked_request_begin(&dst_req, dst_bs, dst_offset, + bytes, BDRV_TRACKED_WRITE); + +wait_serialising_requests(&src_req); +wait_serialising_requests(&dst_req); +ret = bdrv_co_copy_range_from(src, src_offset, + dst, dst_offset, + bytes, flags); + +tracked_request_end(&src_req); +tracked_request_end(&dst_req); +bdrv_dec_in_flight(src_bs); +bdrv_dec_in_flight(dst_bs); +return ret; +} diff --git a/include/block/block.h b/include/block/block.h index 3894edda9d..6cc6c7e699 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -611,4 +611,36 @@ bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name, */ void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size); void bdrv_unregister_buf(BlockDriver
[Qemu-devel] [PATCH v8 07/11] iscsi: Query and save device designator when opening
The device designator data returned in INQUIRY command will be useful to fill in source/target fields during copy offloading. Do this when connecting to the target and save the data for later use. Signed-off-by: Fam Zheng Reviewed-by: Stefan Hajnoczi --- block/iscsi.c | 41 + 1 file changed, 41 insertions(+) diff --git a/block/iscsi.c b/block/iscsi.c index 3fd7203916..6d0035d4b9 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -68,6 +68,7 @@ typedef struct IscsiLun { QemuMutex mutex; struct scsi_inquiry_logical_block_provisioning lbp; struct scsi_inquiry_block_limits bl; +struct scsi_inquiry_device_designator *dd; unsigned char *zeroblock; /* The allocmap tracks which clusters (pages) on the iSCSI target are * allocated and which are not. In case a target returns zeros for @@ -1740,6 +1741,30 @@ static QemuOptsList runtime_opts = { }, }; +static void iscsi_save_designator(IscsiLun *lun, + struct scsi_inquiry_device_identification *inq_di) +{ +struct scsi_inquiry_device_designator *desig, *copy = NULL; + +for (desig = inq_di->designators; desig; desig = desig->next) { +if (desig->association || +desig->designator_type > SCSI_DESIGNATOR_TYPE_NAA) { +continue; +} +/* NAA works better than T10 vendor ID based designator. */ +if (!copy || copy->designator_type < desig->designator_type) { +copy = desig; +} +} +if (copy) { +lun->dd = g_new(struct scsi_inquiry_device_designator, 1); +*lun->dd = *copy; +lun->dd->next = NULL; +lun->dd->designator = g_malloc(copy->designator_length); +memcpy(lun->dd->designator, copy->designator, copy->designator_length); +} +} + static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -1922,6 +1947,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, struct scsi_task *inq_task; struct scsi_inquiry_logical_block_provisioning *inq_lbp; struct scsi_inquiry_block_limits *inq_bl; +struct scsi_inquiry_device_identification *inq_di; switch (inq_vpd->pages[i]) { case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING: inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1, @@ -1947,6 +1973,17 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, sizeof(struct scsi_inquiry_block_limits)); scsi_free_scsi_task(inq_task); break; +case SCSI_INQUIRY_PAGECODE_DEVICE_IDENTIFICATION: +inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1, + SCSI_INQUIRY_PAGECODE_DEVICE_IDENTIFICATION, +(void **) &inq_di, errp); +if (inq_task == NULL) { +ret = -EINVAL; +goto out; +} +iscsi_save_designator(iscsilun, inq_di); +scsi_free_scsi_task(inq_task); +break; default: break; } @@ -2003,6 +2040,10 @@ static void iscsi_close(BlockDriverState *bs) iscsi_logout_sync(iscsi); } iscsi_destroy_context(iscsi); +if (iscsilun->dd) { +g_free(iscsilun->dd->designator); +g_free(iscsilun->dd); +} g_free(iscsilun->zeroblock); iscsi_allocmap_free(iscsilun); qemu_mutex_destroy(&iscsilun->mutex); -- 2.17.0
[Qemu-devel] [PATCH v8 09/11] iscsi: Implement copy offloading
Issue EXTENDED COPY (LID1) command to implement the copy_range API. The parameter data construction code is modified from libiscsi's iscsi-dd.c. Signed-off-by: Fam Zheng Reviewed-by: Stefan Hajnoczi --- block/iscsi.c| 219 +++ include/scsi/constants.h | 4 + 2 files changed, 223 insertions(+) diff --git a/block/iscsi.c b/block/iscsi.c index 6a365cb07b..5ea75646d9 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -2205,6 +2205,221 @@ static void coroutine_fn iscsi_co_invalidate_cache(BlockDriverState *bs, iscsi_allocmap_invalidate(iscsilun); } +static int coroutine_fn iscsi_co_copy_range_from(BlockDriverState *bs, + BdrvChild *src, + uint64_t src_offset, + BdrvChild *dst, + uint64_t dst_offset, + uint64_t bytes, + BdrvRequestFlags flags) +{ +return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, flags); +} + +static struct scsi_task *iscsi_xcopy_task(int param_len) +{ +struct scsi_task *task; + +task = g_new0(struct scsi_task, 1); + +task->cdb[0] = EXTENDED_COPY; +task->cdb[10]= (param_len >> 24) & 0xFF; +task->cdb[11]= (param_len >> 16) & 0xFF; +task->cdb[12]= (param_len >> 8) & 0xFF; +task->cdb[13]= param_len & 0xFF; +task->cdb_size = 16; +task->xfer_dir = SCSI_XFER_WRITE; +task->expxferlen = param_len; + +return task; +} + +static void iscsi_populate_target_desc(unsigned char *desc, IscsiLun *lun) +{ +struct scsi_inquiry_device_designator *dd = lun->dd; + +memset(desc, 0, 32); +desc[0] = IDENT_DESCR_TGT_DESCR; +desc[4] = dd->code_set; +desc[5] = (dd->designator_type & 0xF) +| ((dd->association & 3) << 4); +desc[7] = dd->designator_length; +memcpy(desc + 8, dd->designator, dd->designator_length); + +desc[28] = 0; +desc[29] = (lun->block_size >> 16) & 0xFF; +desc[30] = (lun->block_size >> 8) & 0xFF; +desc[31] = lun->block_size & 0xFF; +} + +static void iscsi_xcopy_desc_hdr(uint8_t *hdr, int dc, int cat, int src_index, + int dst_index) +{ +hdr[0] = 0x02; /* BLK_TO_BLK_SEG_DESCR */ +hdr[1] = ((dc << 1) | cat) & 0xFF; +hdr[2] = (XCOPY_BLK2BLK_SEG_DESC_SIZE >> 8) & 0xFF; +/* don't account for the first 4 bytes in descriptor header*/ +hdr[3] = (XCOPY_BLK2BLK_SEG_DESC_SIZE - 4 /* SEG_DESC_SRC_INDEX_OFFSET */) & 0xFF; +hdr[4] = (src_index >> 8) & 0xFF; +hdr[5] = src_index & 0xFF; +hdr[6] = (dst_index >> 8) & 0xFF; +hdr[7] = dst_index & 0xFF; +} + +static void iscsi_xcopy_populate_desc(uint8_t *desc, int dc, int cat, + int src_index, int dst_index, int num_blks, + uint64_t src_lba, uint64_t dst_lba) +{ +iscsi_xcopy_desc_hdr(desc, dc, cat, src_index, dst_index); + +/* The caller should verify the request size */ +assert(num_blks < 65536); +desc[10] = (num_blks >> 8) & 0xFF; +desc[11] = num_blks & 0xFF; +desc[12] = (src_lba >> 56) & 0xFF; +desc[13] = (src_lba >> 48) & 0xFF; +desc[14] = (src_lba >> 40) & 0xFF; +desc[15] = (src_lba >> 32) & 0xFF; +desc[16] = (src_lba >> 24) & 0xFF; +desc[17] = (src_lba >> 16) & 0xFF; +desc[18] = (src_lba >> 8) & 0xFF; +desc[19] = src_lba & 0xFF; +desc[20] = (dst_lba >> 56) & 0xFF; +desc[21] = (dst_lba >> 48) & 0xFF; +desc[22] = (dst_lba >> 40) & 0xFF; +desc[23] = (dst_lba >> 32) & 0xFF; +desc[24] = (dst_lba >> 24) & 0xFF; +desc[25] = (dst_lba >> 16) & 0xFF; +desc[26] = (dst_lba >> 8) & 0xFF; +desc[27] = dst_lba & 0xFF; +} + +static void iscsi_xcopy_populate_header(unsigned char *buf, int list_id, int str, +int list_id_usage, int prio, +int tgt_desc_len, +int seg_desc_len, int inline_data_len) +{ +buf[0] = list_id; +buf[1] = ((str & 1) << 5) | ((list_id_usage & 3) << 3) | (prio & 7); +buf[2] = (tgt_desc_len >> 8) & 0xFF; +buf[3] = tgt_desc_len & 0xFF; +buf[8] = (seg_desc_len >> 24) & 0xFF; +buf[9] = (seg_desc_len >> 16) & 0xFF; +buf[10] = (seg_desc_len >> 8) & 0xFF; +buf[11] = seg_desc_len & 0xFF; +buf[12] = (inline_data_len >> 24) & 0xFF; +buf[13] = (inline_data_len >> 16) & 0xFF; +buf[14] = (inline_data_len >> 8) & 0xFF; +buf[15] = inline_data_len & 0xFF; +} + +static void iscsi_xcopy_data(struct iscsi_data *data, + IscsiLun *src, int64_t src_lba, + IscsiLun *dst, int64_t dst_lba, + uint16_t num_blocks) +{ +uint8_t *buf; +
[Qemu-devel] [PATCH v8 01/11] docker: Update fedora image to 28
Signed-off-by: Fam Zheng --- tests/docker/dockerfiles/fedora.docker | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker index b706f42405..65d7761cf5 100644 --- a/tests/docker/dockerfiles/fedora.docker +++ b/tests/docker/dockerfiles/fedora.docker @@ -1,4 +1,4 @@ -FROM fedora:27 +FROM fedora:28 ENV PACKAGES \ ccache gettext git tar PyYAML sparse flex bison python3 bzip2 hostname \ gcc gcc-c++ llvm clang make perl which bc findutils glib2-devel \ -- 2.17.0
[Qemu-devel] [PATCH v8 00/11] qemu-img convert with copy offloading
v8: Fix compiling against new glibc and libiscsi on Fedora 28 where v7 had conflict definitions. [Stefan, myself] - Add HAVE_COPY_FILE_RANGE in configure. - Drop IDENT_DESCR_TGT_DESCR from scsi constants header. v7: Fix qcow2. v6: Pick up rev-by from Stefan and Eric. Tweak patch 2 commit message. v5: - Fix raw offset/bytes check for read. [Eric] - Fix qcow2_handle_l2meta. [Stefan] - Add coroutine_fn whereever appropriate. [Stefan] v4: - Fix raw offset and size. [Eric] - iscsi: Drop unnecessary return values and variables in favor of constants. [Stefan] - qcow2: Handle small backing case. [Stefan] - file-posix: Translate ENOSYS to ENOTSUP. [Stefan] - API documentation and commit message. [Stefan] - Add rev-by to patches 3, 5 - 10. [Stefan, Eric] This series introduces block layer API for copy offloading and makes use of it in qemu-img convert. For now we implemented the operation in local file protocol with copy_file_range(2). Besides that it's possible to add similar to iscsi, nfs and potentially more. As far as its usage goes, in addition to qemu-img convert, we can emulate offloading in scsi-disk (handle EXTENDED COPY command), and use the API in block jobs too. Fam Zheng (11): docker: Update fedora image to 28 block: Introduce API for copy offloading raw: Check byte range uniformly raw: Implement copy offloading qcow2: Implement copy offloading file-posix: Implement bdrv_co_copy_range iscsi: Query and save device designator when opening iscsi: Create and use iscsi_co_wait_for_task iscsi: Implement copy offloading block-backend: Add blk_co_copy_range qemu-img: Convert with copy offloading block/block-backend.c | 18 ++ block/file-posix.c | 98 +++- block/io.c | 97 block/iscsi.c | 314 ++--- block/qcow2.c | 229 +++--- block/raw-format.c | 96 ++-- configure | 17 ++ include/block/block.h | 32 +++ include/block/block_int.h | 38 +++ include/block/raw-aio.h| 10 +- include/scsi/constants.h | 4 + include/sysemu/block-backend.h | 4 + qemu-img.c | 50 +++- tests/docker/dockerfiles/fedora.docker | 2 +- 14 files changed, 909 insertions(+), 100 deletions(-) -- 2.17.0
Re: [Qemu-devel] Questions about the flow of interrupt simulation
Thanks deeply for your clear explanation! Let me try to conclude what I learned from your description. qemu_set_irq() qemu_set_irq() [ device ] -> [ GIC ] -> [ CPU ] Detailed: [device] =>qemu_set_irq() [GIC] =>gic_set_irq() =>gic_update() =>qemu_set_irq() [CPU] => arm_cpu_set_irq() GIC decides to send an irq to CPU or not depends on the level that device sends or others reason like interrupt mask. GIC turns the interrupt into an interrupt type: ARM_CPU_HARD/FIQ/VIRQ/VFIQ. In arm_cpu_set_irq(), it turns ARM_CPU_HARD/FIQ/VIRQ/VFIQ into CPU_INTERRUPT_HARD/FIQ/VIRQ/VFIQ. cpu_interrupt() sets cpu->interrupt_request. Then I have some following questions: 1. There are two kinds of interrupt: edge triggered and level triggered. I have seen two code segment related to the level: gic_set_irq() and arm_cpu_set_irq(). In gic_set_irq(), if level == GIC_TEST_LEVEL(irq, cm), which means the level is not changed, will return. In arm_cpu_set_irq() said that if level ==1, call cpu_interrupt(). if level==0, call cpu_reset_interrupt(), which will clean up that irq bits.. Does that mean all interrupt in arm are level triggered(high level)? How to know the triggered type of interrupt? 2. interrupt signal will be passed through GIC from device to CPU. There are four types of interrupt in CPU: CPU_INTERRUPT_HARD/FIQ/VIRQ/VFIQ. Where exactly define the CPU_INTERRUPT_{type} that device's interrupt corresponded? 3. I have seen others device's code under qemu/hw directory. Almost all device will call qemu_set_irq() at the end of device's read/write. Is that for the purpose of a device to tell CPU that it has done some works? but the second parameter of qemu_set_irq(), level, will be set to a different value(not always 1 or 0), which sometimes will cause the interrupt return at gic_set_irq() instead of passing to CPU. What does the interrupt at the end of device_read/write(device_update()) mean? I think I might ask some basic questions of interrupt. If there is any documents or tutorial about interrupt, please share with me, I am willing to study it. Sincerely, Eva 2018-05-24 21:40 GMT+08:00 Peter Maydell : > On 23 May 2018 at 11:45, Eva Chen wrote: > > Hi, > > > > I have added a new virtual device under qemu/hw, and a device driver in > the > > source code of guest OS (I use arm linux). > > Since I want to add an interrupt to the virtual device, I am tracing the > > flow of interrupt simulation of QEMU. > > However, I have some questions about the flow of interrupt, and wondering > > if I could ask here for some help. > > > > I use $qemu-system-aarch64 -machine virt. > > Take the flow of interrupt that generated from the UART pl011 device as > > example. > > First an interrupt, CPU_INTERRUPT_HARD, will be sent from device to GIC > for > > telling CPU that this device finished some job, then this interrupt wiil > be > > transformed into EXCP_IRQ in aarch64_cpu_do_interrupt() and interrupt > > handled by CPU. > > This isn't quite the way it works. > > What happens is more like: > * pl011 device calls qemu_set_irq() on its outbound IRQ line > * the board model has connected that line up to one of the GIC's >input lines > * the GIC function on the other end of the qemu_irq is called >and does the necessary processing of the input line state change. >(For the gicv2 this is the gic_set_irq() function.) > * if the GIC state is such that it should now signal an IRQ to >the CPU it will do that. (This is not necessarily going to happen: >for instance the interrupt might be disabled in the GIC, or >another higher priority interrupt might already be being signaled.) >The GIC does this by calling qemu_set_irq() on its outbound >IRQ or FIQ line. > * the CPU object on the other end of that IRQ or FIQ line will have >its arm_cpu_set_irq() function called. All this function does is >make a note so that we stop executing code and start doing >something else; we do that by calling cpu_interrupt(). We use >CPU_INTERRUPT_HARD/FIQ/VIRQ/VFIQ here to distinguish which of our >4 interrupt lines this is; nothing outside the CPU cares about >those names. > * cpu_interrupt() itself doesn't do anything, it just sets a flag >which the thread executing guest cpu code will check. > * in the guest-cpu-execution code path you have traced the relevant >flow here: >> 2. CPU handling : cpu_exec() => arm_cpu_exec_interrupt() => >> aarch64_cpu_do_interrupt() >> Guest CPU's pc (env->pc) will be changed to the address of interrupt >> service routine in aarch64_cpu_do_interrupt(). > > Some notes: > (1) I've assumed GICv2 in the above; the GICv3 is a bit more complex > but the basic idea is similar > (2) The GIC is a device model that is not "special" just because > it happens to be an interrupt controller. It's just a model of a > bit of hardware that has some input lines and some outp
[Qemu-devel] [PATCH v4] qga: add mountpoint usage to GuestFilesystemInfo
From: Chen Hanxiao This patch adds support for getting the usage of mounted filesystem. It's very useful when we try to monitor guest's filesystem. Cc: Michael Roth Cc: Eric Blake Signed-off-by: Chen Hanxiao --- v2: add description in qapi-schema and version numbers v3: use float for usage to get more precision. v4: make usage as a best-effort query and mark it as optional. qga/commands-posix.c | 17 + qga/qapi-schema.json | 3 ++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/qga/commands-posix.c b/qga/commands-posix.c index 0dc219dbcf..4facc76953 100644 --- a/qga/commands-posix.c +++ b/qga/commands-posix.c @@ -46,6 +46,7 @@ extern char **environ; #include #include #include +#include #ifdef FIFREEZE #define CONFIG_FSFREEZE @@ -1072,6 +1073,9 @@ static GuestFilesystemInfo *build_guest_fsinfo(struct FsMount *mount, Error **errp) { GuestFilesystemInfo *fs = g_malloc0(sizeof(*fs)); +struct statvfs buf; +unsigned long used, nonroot_total; +double usage; char *devpath = g_strdup_printf("/sys/dev/block/%u:%u", mount->devmajor, mount->devminor); @@ -1079,7 +1083,20 @@ static GuestFilesystemInfo *build_guest_fsinfo(struct FsMount *mount, fs->type = g_strdup(mount->devtype); build_guest_fsinfo_for_device(devpath, fs, errp); +if (statvfs(fs->mountpoint, &buf)) { +fs->has_usage = false; +fs->usage = -1; +} else { +used = buf.f_blocks - buf.f_bfree; +nonroot_total = used + buf.f_bavail; +usage = (double) used / nonroot_total; + +fs->has_usage = true; +fs->usage = usage; +} + g_free(devpath); + return fs; } diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json index 17884c7c70..fcd427e86d 100644 --- a/qga/qapi-schema.json +++ b/qga/qapi-schema.json @@ -846,6 +846,7 @@ # @name: disk name # @mountpoint: mount point path # @type: file system type string +# @usage: file system usage, fraction between 0 and 1 (since 3.0) # @disk: an array of disk hardware information that the volume lies on, #which may be empty if the disk type is not supported # @@ -853,7 +854,7 @@ ## { 'struct': 'GuestFilesystemInfo', 'data': {'name': 'str', 'mountpoint': 'str', 'type': 'str', - 'disk': ['GuestDiskAddress']} } + '*usage': 'number', 'disk': ['GuestDiskAddress']} } ## # @guest-get-fsinfo: -- 2.17.0
Re: [Qemu-devel] [PATCH 2/3] pc-bios/s390-ccw/net: Add support for pxelinux-style config files
On 30.05.2018 13:07, Viktor VM Mihajlovski wrote: > On 30.05.2018 11:16, Thomas Huth wrote: >> Since it is quite cumbersome to manually create a combined kernel with >> initrd image for network booting, we now support loading via pxelinux >> configuration files, too. In these files, the kernel, initrd and command >> line parameters can be specified seperately, and the firmware then takes >> care of glueing everything together in memory after the files have been >> downloaded. See this URL for details about the config file layout: >> https://www.syslinux.org/wiki/index.php?title=PXELINUX >> >> The user can either specify a config file directly as bootfile via DHCP >> (but in this case, the file has to start either with "default" or a "#" >> comment so we can distinguish it from binary kernels), or a folder (i.e. >> the bootfile name must end with "/") where the firmware should look for >> the typical pxelinux.cfg file names, e.g. based on MAC or IP address. >> We also support the pxelinux.cfg DHCP options 209 and 210 from RFC 5071. >> >> Signed-off-by: Thomas Huth >> --- >> pc-bios/s390-ccw/netboot.mak | 7 ++-- >> pc-bios/s390-ccw/netmain.c | 79 >> +++- >> 2 files changed, 82 insertions(+), 4 deletions(-) > [...] >> diff --git a/pc-bios/s390-ccw/netmain.c b/pc-bios/s390-ccw/netmain.c >> index 7533cf7..e84bb2b 100644 >> --- a/pc-bios/s390-ccw/netmain.c >> +++ b/pc-bios/s390-ccw/netmain.c > [...] >> @@ -301,6 +363,18 @@ static int net_try_direct_tftp_load(filename_ip_t >> *fn_ip) >> if (!strncmp("* ", cfgbuf, 2)) { >> return handle_ins_cfg(fn_ip, cfgbuf, rc); >> } >> +if (!strncasecmp("default", cfgbuf, 7) || !strncmp("# ", cfgbuf, >> 2)) { > Minor, but I'm wondering whether this is not too cautious and could rule > out valid config files. You might just unconditionally call > pxelinux_parse_cfg and let it find out if this is as pxelinux config > file or not. I thought about this for a while, but I think I'd rather avoid this. It's also possible that the user tried to load a small binary which accidentally contained a string like "label" and "kernel", and then the bios would try to interpret this as config file instead of just running the binary. If you feel really unhappy about the magic string matching with "default" and "# " here, I think it would be better to remove this magic completely instead. The original pxelinux loader and petitboot also do not support this, but rely on the DHCP options 209 and 210 instead (which we also support now). I only added this magic here for my own convenience, since the built-in DHCP server of QEMU does not support the options 209 and 210, and I still wanted to have a way to quickly change from one config file to another... but now that the code is basically up and running, it's not really required anymore, so removing this should be fine. Alternatively, we could use a more sophisticated magic here, like "# pxelinux" for example, just for the developers' convenience ... what do you think? Thomas
Re: [Qemu-devel] Recording I/O activity after KVM does a VMEXIT
Hi, I’m not familiar with KVM, but I know successful attempts of replaying the execution by logging IO and MMIO in TCG mode. The difference in CPU I/O and VM I/O is the following. In icount we record anything coming into the VM, but not into the CPU. It means that the whole packet is recorded. Virtual hardware behaves deterministically and therefore CPU will get identical input in case of replay, because the whole recorded packet is injected again by the filter. Pavel Dovgalyuk From: Arnabjyoti Kalita [mailto:akal...@cs.stonybrook.edu] Sent: Thursday, May 31, 2018 11:14 PM To: Pavel Dovgalyuk Cc: Stefan Hajnoczi; qemu-devel@nongnu.org; Pavel Dovgalyuk Subject: Re: [Qemu-devel] Recording I/O activity after KVM does a VMEXIT Dear Pavel, Thank you for your answer. I am not being able to understand the difference between CPU I/Os and VM I/Os. Would any network packet that comes into the Guest OS from the outside be a part of VM I/O or CPU I/O ? I am only interested in "recording" and "replaying" those network packets that come from the outside into the networking backend and not the other way around. Say for example when I get a VMExit because of the arrival of a network packet, I will use the VMExit reason : "KVM_EXIT_MMIO" to trace back to "e1000_mmio_write()" which I expect should be enough to record network packets that come from the outside and write to the guest address space for "e1000" devices. In such a case, I think I will not have to use the "network-filter" backend that you use to record VM I/O only. Let me know if you find errors in my approach. I will try to see how I can record disk packets. If disk packets use other ways of writing to the guest memory apart from a normal VMExit, I will try to find it out. Eventually I hope that it will use one of the available disk front-end functions to write to the guest memory from the disk, just like e1000 does with an "e1000_mmio_write()" call. Thanks and best regards, Arnab On Thu, May 31, 2018 at 8:44 AM, Pavel Dovgalyuk wrote: > From: Stefan Hajnoczi [mailto:stefa...@gmail.com] > On Wed, May 30, 2018 at 11:19:13PM -0400, Arnabjyoti Kalita wrote: > > I am trying to implement a 'minimal' record-replay mechanism for KVM, which > > is similar to the one existing for TCG via -icount. I am trying to record > > I/O events only (specifically disk and network events) when KVM does a > > VMEXIT. This has led me to the function kvm_cpu_exec where I can clearly > > see the different ways of handling all of the possible VMExit cases (like > > PIO, MMIO etc.). To record network packets, I am working with the e1000 > > hardware device. > > > > Can I make sure that all of the network I/O, atleast for the e1000 device > > happens through the KVM_EXIT_MMIO case and subsequent use of the > > address_space_rw() function ? Do I also need to look at other functions as > > well ? Also for recording disk activity, can I make sure that looking out > > for the KVM_EXIT_MMIO and/or KVM_EXIT_PIO cases in the vmexit mechanism, > > will be enough ? > > > > Let me know if there are other details that I need to take care of. I am > > using QEMU 2.11 on a x86-64 CPU and the guest runs a Linux Kernel 4.4 with > > Ubuntu 16.04. The main icount-based record/replay advantage is that we don't record any CPU IO. We record only VM IO (e.g., by using the network filter). Disk devices may transfer data to CPU using DMA, therefore intercepting only VMExit cases will not be enough. Pavel Dovgalyuk
Re: [Qemu-devel] [PATCH 2/9 V5] hostmem-file: add the 'pmem' option
Because we have disk backend imitation for NVDimm kind memory, I think it is more flexible for user to specify the real or not real pmem rather than we check in qemu using the pmem_is_pmem API From: Stefan Hajnoczi Sent: Thursday, May 31, 2018 1:09:42 PM To: junyan...@gmx.com Cc: qemu-devel@nongnu.org; Haozhong Zhang; xiaoguangrong.e...@gmail.com; crosthwaite.pe...@gmail.com; m...@redhat.com; dgilb...@redhat.com; ehabk...@redhat.com; quint...@redhat.com; Junyan He; stefa...@redhat.com; pbonz...@redhat.com; imamm...@redhat.com; r...@twiddle.net Subject: Re: [Qemu-devel] [PATCH 2/9 V5] hostmem-file: add the 'pmem' option On Thu, May 10, 2018 at 10:08:51AM +0800, junyan...@gmx.com wrote: > From: Junyan He > > When QEMU emulates vNVDIMM labels and migrates vNVDIMM devices, it > needs to know whether the backend storage is a real persistent memory, > in order to decide whether special operations should be performed to > ensure the data persistence. > > This boolean option 'pmem' allows users to specify whether the backend > storage of memory-backend-file is a real persistent memory. If > 'pmem=on', QEMU will set the flag RAM_PMEM in the RAM block of the > corresponding memory region. I'm still not sure if this option is necessary since pmem_is_pmem() is available with the introduction of the libpmem dependency. Why can't it be used? Stefan
Re: [Qemu-devel] [qemu PATCH v4 0/4] support NFIT platform capabilities
> Ping on this series. Rob, I think I've addressed all your feedback. > Can you please verify? I haven't tested it, but it reads OK. I'm OK with just extending the valid count for bits set to one for now; we can add a new argument later if a need arises for extending it to express new bits set to zero.
Re: [Qemu-devel] [PATCH v7 0/5] virtio-balloon: free page hint reporting support
On Fri, Jun 01, 2018 at 12:58:24PM +0800, Peter Xu wrote: > On Tue, Apr 24, 2018 at 02:13:43PM +0800, Wei Wang wrote: > > This is the deivce part implementation to add a new feature, > > VIRTIO_BALLOON_F_FREE_PAGE_HINT to the virtio-balloon device. The device > > receives the guest free page hints from the driver and clears the > > corresponding bits in the dirty bitmap, so that those free pages are > > not transferred by the migration thread to the destination. > > > > - Test Environment > > Host: Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz > > Guest: 8G RAM, 4 vCPU > > Migration setup: migrate_set_speed 100G, migrate_set_downtime 2 second > > > > - Test Results > > - Idle Guest Live Migration Time (results are averaged over 10 runs): > > - Optimization v.s. Legacy = 271ms vs 1769ms --> ~86% reduction > > - Guest with Linux Compilation Workload (make bzImage -j4): > > - Live Migration Time (average) > > Optimization v.s. Legacy = 1265ms v.s. 2634ms --> ~51% reduction > > - Linux Compilation Time > > Optimization v.s. Legacy = 4min56s v.s. 5min3s > > --> no obvious difference > > > > - Source Code > > - QEMU: https://github.com/wei-w-wang/qemu-free-page-lm.git > > - Linux: https://github.com/wei-w-wang/linux-free-page-lm.git > > Hi, Wei, > > I have a very high-level question to the series. > > IIUC the core idea for this series is that we can avoid sending some > of the pages if we know that we don't need to send them. I think this > is based on the fact that on the destination side all the pages are by > default zero after they are malloced. While before this series, IIUC > any migration will send every single page to destination, no matter > whether it's zeroed or not. So I'm uncertain about whether this will > affect the received bitmap on the destination side. Say, before this > series, the received bitmap will directly cover the whole RAM bitmap > after migration is finished, now it's won't. Will there be any side > effect? I don't see obvious issue now, but just raise this question > up. > > Meanwhile, this reminds me about a more funny idea: whether we can > just avoid sending the zero pages directly from QEMU's perspective. > In other words, can we just do nothing if save_zero_page() detected > that the page is zero (I guess the is_zero_range() can be fast too, > but I don't know exactly how fast it is)? And how that would be > differed from this page hinting way in either performance and other > aspects. I noticed a problem (after I wrote the above paragraph 5 minutes ago...): when a page was valid and sent to the destination (with non-zero data), however after a while that page was zeroed. Then if we don't send zero pages at all, we won't send the page after it's zeroed. Then on the destination side we'll have a stale non-zero page. Is my understanding correct? Will that be a problem to this series too where a valid page can be possibly freed and hinted? > > I haven't digged into the kernel patches yet so I have totally no idea > on the detailed implementation of the page hinting. Please feel free > to correct me if there is obvious misunderstandings. > > Regards, > > -- > Peter Xu -- Peter Xu
Re: [Qemu-devel] [PATCH v7 0/5] virtio-balloon: free page hint reporting support
On Tue, Apr 24, 2018 at 02:13:43PM +0800, Wei Wang wrote: > This is the deivce part implementation to add a new feature, > VIRTIO_BALLOON_F_FREE_PAGE_HINT to the virtio-balloon device. The device > receives the guest free page hints from the driver and clears the > corresponding bits in the dirty bitmap, so that those free pages are > not transferred by the migration thread to the destination. > > - Test Environment > Host: Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz > Guest: 8G RAM, 4 vCPU > Migration setup: migrate_set_speed 100G, migrate_set_downtime 2 second > > - Test Results > - Idle Guest Live Migration Time (results are averaged over 10 runs): > - Optimization v.s. Legacy = 271ms vs 1769ms --> ~86% reduction > - Guest with Linux Compilation Workload (make bzImage -j4): > - Live Migration Time (average) > Optimization v.s. Legacy = 1265ms v.s. 2634ms --> ~51% reduction > - Linux Compilation Time > Optimization v.s. Legacy = 4min56s v.s. 5min3s > --> no obvious difference > > - Source Code > - QEMU: https://github.com/wei-w-wang/qemu-free-page-lm.git > - Linux: https://github.com/wei-w-wang/linux-free-page-lm.git Hi, Wei, I have a very high-level question to the series. IIUC the core idea for this series is that we can avoid sending some of the pages if we know that we don't need to send them. I think this is based on the fact that on the destination side all the pages are by default zero after they are malloced. While before this series, IIUC any migration will send every single page to destination, no matter whether it's zeroed or not. So I'm uncertain about whether this will affect the received bitmap on the destination side. Say, before this series, the received bitmap will directly cover the whole RAM bitmap after migration is finished, now it's won't. Will there be any side effect? I don't see obvious issue now, but just raise this question up. Meanwhile, this reminds me about a more funny idea: whether we can just avoid sending the zero pages directly from QEMU's perspective. In other words, can we just do nothing if save_zero_page() detected that the page is zero (I guess the is_zero_range() can be fast too, but I don't know exactly how fast it is)? And how that would be differed from this page hinting way in either performance and other aspects. I haven't digged into the kernel patches yet so I have totally no idea on the detailed implementation of the page hinting. Please feel free to correct me if there is obvious misunderstandings. Regards, -- Peter Xu
Re: [Qemu-devel] [PATCH v7 3/5] migration: API to clear bits of guest free pages from the dirty bitmap
On Tue, Apr 24, 2018 at 02:13:46PM +0800, Wei Wang wrote: > This patch adds an API to clear bits corresponding to guest free pages > from the dirty bitmap. Spilt the free page block if it crosses the QEMU > RAMBlock boundary. > > Signed-off-by: Wei Wang > CC: Dr. David Alan Gilbert > CC: Juan Quintela > CC: Michael S. Tsirkin > --- > include/migration/misc.h | 2 ++ > migration/ram.c | 44 > 2 files changed, 46 insertions(+) > > diff --git a/include/migration/misc.h b/include/migration/misc.h > index 4ebf24c..113320e 100644 > --- a/include/migration/misc.h > +++ b/include/migration/misc.h > @@ -14,11 +14,13 @@ > #ifndef MIGRATION_MISC_H > #define MIGRATION_MISC_H > > +#include "exec/cpu-common.h" > #include "qemu/notify.h" > > /* migration/ram.c */ > > void ram_mig_init(void); > +void qemu_guest_free_page_hint(void *addr, size_t len); > > /* migration/block.c */ > > diff --git a/migration/ram.c b/migration/ram.c > index 9a72b1a..0147548 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -2198,6 +2198,50 @@ static int ram_init_all(RAMState **rsp) > } > > /* > + * This function clears bits of the free pages reported by the caller from > the > + * migration dirty bitmap. @addr is the host address corresponding to the > + * start of the continuous guest free pages, and @len is the total bytes of > + * those pages. > + */ > +void qemu_guest_free_page_hint(void *addr, size_t len) > +{ > +RAMBlock *block; > +ram_addr_t offset; > +size_t used_len, start, npages; Do we need to check here on whether a migration is in progress? Since if not I'm not sure whether this hint still makes any sense any more, and more importantly it seems to me that block->bmap below at [1] is only valid during a migration. So I'm not sure whether QEMU will crash if this function is called without a running migration. > + > +for (; len > 0; len -= used_len) { > +block = qemu_ram_block_from_host(addr, false, &offset); > +if (unlikely(!block)) { > +return; We should never reach here, should we? Assuming the callers of this function should always pass in a correct host address. If we are very sure that the host addr should be valid, could we just assert? > +} > + > +/* > + * This handles the case that the RAMBlock is resized after the free > + * page hint is reported. > + */ > +if (unlikely(offset > block->used_length)) { > +return; > +} > + > +if (len <= block->used_length - offset) { > +used_len = len; > +} else { > +used_len = block->used_length - offset; > +addr += used_len; > +} > + > +start = offset >> TARGET_PAGE_BITS; > +npages = used_len >> TARGET_PAGE_BITS; > + > +qemu_mutex_lock(&ram_state->bitmap_mutex); So now I think I understand the lock can still be meaningful since this function now can be called outside the migration thread (e.g., in vcpu thread). But still it would be nice to mention it somewhere on the truth of the lock. Regards, > +ram_state->migration_dirty_pages -= > + bitmap_count_one_with_offset(block->bmap, start, > npages); > +bitmap_clear(block->bmap, start, npages); [1] > +qemu_mutex_unlock(&ram_state->bitmap_mutex); > +} > +} > + > +/* > * Each of ram_save_setup, ram_save_iterate and ram_save_complete has > * long-running RCU critical section. When rcu-reclaims in the code > * start to become numerous it will be necessary to reduce the > -- > 1.8.3.1 > > -- Peter Xu
Re: [Qemu-devel] [Qemu-block] [PATCH v2 8/8] block: Remove unused sector-based vectored I/O
On Thu, May 31, 2018 at 03:50:46PM -0500, Eric Blake wrote: > We are gradually moving away from sector-based interfaces, towards > byte-based. Now that all callers of vectored I/O have been converted > to use our preferred byte-based bdrv_co_p{read,write}v(), we can > delete the unused bdrv_co_{read,write}v(). > > Furthermore, this gets rid of the signature difference between the > public bdrv_co_writev() and the callback .bdrv_co_writev (the > latter still exists, because some drivers still need more work > before they are fully byte-based). > > Signed-off-by: Eric Blake > Reviewed-by: Stefan Hajnoczi Reviewed-by: Jeff Cody > --- > v2: commit typo fix [Kashyap] > --- > include/block/block.h | 4 > block/io.c| 36 > 2 files changed, 40 deletions(-) > > diff --git a/include/block/block.h b/include/block/block.h > index 3894edda9de..fe40d2929ac 100644 > --- a/include/block/block.h > +++ b/include/block/block.h > @@ -285,10 +285,6 @@ int bdrv_pwrite(BdrvChild *child, int64_t offset, const > void *buf, int bytes); > int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov); > int bdrv_pwrite_sync(BdrvChild *child, int64_t offset, > const void *buf, int count); > -int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num, > - int nb_sectors, QEMUIOVector *qiov); > -int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num, > - int nb_sectors, QEMUIOVector *qiov); > /* > * Efficiently zero a region of the disk image. Note that this is a regular > * I/O request like read or write and should have a reasonable size. This > diff --git a/block/io.c b/block/io.c > index ca96b487eb8..1d86bfc0072 100644 > --- a/block/io.c > +++ b/block/io.c > @@ -1341,24 +1341,6 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child, > return ret; > } > > -static int coroutine_fn bdrv_co_do_readv(BdrvChild *child, > -int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, > -BdrvRequestFlags flags) > -{ > -if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { > -return -EINVAL; > -} > - > -return bdrv_co_preadv(child, sector_num << BDRV_SECTOR_BITS, > - nb_sectors << BDRV_SECTOR_BITS, qiov, flags); > -} > - > -int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num, > - int nb_sectors, QEMUIOVector *qiov) > -{ > -return bdrv_co_do_readv(child, sector_num, nb_sectors, qiov, 0); > -} > - > static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, > int64_t offset, int bytes, BdrvRequestFlags flags) > { > @@ -1801,24 +1783,6 @@ out: > return ret; > } > > -static int coroutine_fn bdrv_co_do_writev(BdrvChild *child, > -int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, > -BdrvRequestFlags flags) > -{ > -if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { > -return -EINVAL; > -} > - > -return bdrv_co_pwritev(child, sector_num << BDRV_SECTOR_BITS, > - nb_sectors << BDRV_SECTOR_BITS, qiov, flags); > -} > - > -int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num, > -int nb_sectors, QEMUIOVector *qiov) > -{ > -return bdrv_co_do_writev(child, sector_num, nb_sectors, qiov, 0); > -} > - > int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset, > int bytes, BdrvRequestFlags flags) > { > -- > 2.14.3 > >
Re: [Qemu-devel] [Qemu-block] [PATCH v2 6/8] replication: Switch to byte-based calls
On Thu, May 31, 2018 at 03:50:44PM -0500, Eric Blake wrote: > We are gradually moving away from sector-based interfaces, towards > byte-based. Make the change for the last few sector-based calls > into the block layer from the replication driver. > > Ideally, the replication driver should switch to doing everything > byte-based, but that's a more invasive change that requires a > bit more auditing. > > Signed-off-by: Eric Blake Reviewed-by: Jeff Cody > --- > block/replication.c | 14 -- > 1 file changed, 8 insertions(+), 6 deletions(-) > > diff --git a/block/replication.c b/block/replication.c > index 826db7b3049..6349d6958e4 100644 > --- a/block/replication.c > +++ b/block/replication.c > @@ -246,13 +246,14 @@ static coroutine_fn int > replication_co_readv(BlockDriverState *bs, > backup_cow_request_begin(&req, child->bs->job, > sector_num * BDRV_SECTOR_SIZE, > remaining_bytes); > -ret = bdrv_co_readv(bs->file, sector_num, remaining_sectors, > -qiov); > +ret = bdrv_co_preadv(bs->file, sector_num * BDRV_SECTOR_SIZE, > + remaining_bytes, qiov, 0); > backup_cow_request_end(&req); > goto out; > } > > -ret = bdrv_co_readv(bs->file, sector_num, remaining_sectors, qiov); > +ret = bdrv_co_preadv(bs->file, sector_num * BDRV_SECTOR_SIZE, > + remaining_sectors * BDRV_SECTOR_SIZE, qiov, 0); > out: > return replication_return_value(s, ret); > } > @@ -279,8 +280,8 @@ static coroutine_fn int > replication_co_writev(BlockDriverState *bs, > } > > if (ret == 0) { > -ret = bdrv_co_writev(top, sector_num, > - remaining_sectors, qiov); > +ret = bdrv_co_pwritev(top, sector_num * BDRV_SECTOR_SIZE, > + remaining_sectors * BDRV_SECTOR_SIZE, qiov, 0); > return replication_return_value(s, ret); > } > > @@ -306,7 +307,8 @@ static coroutine_fn int > replication_co_writev(BlockDriverState *bs, > qemu_iovec_concat(&hd_qiov, qiov, bytes_done, count); > > target = ret ? top : base; > -ret = bdrv_co_writev(target, sector_num, n, &hd_qiov); > +ret = bdrv_co_pwritev(target, sector_num * BDRV_SECTOR_SIZE, > + n * BDRV_SECTOR_SIZE, &hd_qiov, 0); > if (ret < 0) { > goto out1; > } > -- > 2.14.3 > >
[Qemu-devel] vIOMMU Posted-interrupt implementation - atomic operation?
Hi, I'm implementing Posted-interrupt functionality in vIOMMU. According to Vt-d spec 5.2.3, IOMMU performs a coherent atomic read-modify-write operation of the posted-interrupt descriptor. I wonder how can we achieve this considering the guest can modify the same posted-interrupt descriptor anytime. Is there any existing mechanism that I can use in QEMU? Thanks, Jintack
Re: [Qemu-devel] [PATCH v7 2/5] migration: use bitmap_mutex in migration_bitmap_clear_dirty
On Tue, Apr 24, 2018 at 02:13:45PM +0800, Wei Wang wrote: > The bitmap mutex is used to synchronize threads to update the dirty > bitmap and the migration_dirty_pages counter. This patch makes > migration_bitmap_clear_dirty update the bitmap and counter under the > mutex. > > Signed-off-by: Wei Wang > CC: Dr. David Alan Gilbert > CC: Juan Quintela > CC: Michael S. Tsirkin > --- > migration/ram.c | 3 +++ > 1 file changed, 3 insertions(+) > > diff --git a/migration/ram.c b/migration/ram.c > index 0e90efa..9a72b1a 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -795,11 +795,14 @@ static inline bool > migration_bitmap_clear_dirty(RAMState *rs, > { > bool ret; > > +qemu_mutex_lock(&rs->bitmap_mutex); > ret = test_and_clear_bit(page, rb->bmap); > > if (ret) { > rs->migration_dirty_pages--; > } > +qemu_mutex_unlock(&rs->bitmap_mutex); > + > return ret; > } > > -- > 1.8.3.1 > > Do we need the lock after all? I see that we introduced this lock due to device hotplug at dd63169766 ("migration: extend migration_bitmap", 2015-07-07), however now we actually don't allow that to happen any more after commit b06424de62 ("migration: Disable hotplug/unplug during migration", 2017-04-21). I'm not sure whether it means that the lock is not needed now. If so, this patch seems to be unecessary. Regards, -- Peter Xu
Re: [Qemu-devel] [PATCH 2/3] pc-bios/s390-ccw/net: Add support for pxelinux-style config files
On 31.05.2018 23:25, Farhan Ali wrote: > > > On 05/30/2018 05:16 AM, Thomas Huth wrote: >> Since it is quite cumbersome to manually create a combined kernel with >> initrd image for network booting, we now support loading via pxelinux >> configuration files, too. In these files, the kernel, initrd and command >> line parameters can be specified seperately, and the firmware then takes >> care of glueing everything together in memory after the files have been >> downloaded. See this URL for details about the config file layout: >> https://www.syslinux.org/wiki/index.php?title=PXELINUX >> >> The user can either specify a config file directly as bootfile via DHCP >> (but in this case, the file has to start either with "default" or a "#" >> comment so we can distinguish it from binary kernels), or a folder (i.e. >> the bootfile name must end with "/") where the firmware should look for >> the typical pxelinux.cfg file names, e.g. based on MAC or IP address. >> We also support the pxelinux.cfg DHCP options 209 and 210 from RFC 5071. >> >> Signed-off-by: Thomas Huth >> --- >> pc-bios/s390-ccw/netboot.mak | 7 ++-- >> pc-bios/s390-ccw/netmain.c | 79 >> +++- >> 2 files changed, 82 insertions(+), 4 deletions(-) >> >> diff --git a/pc-bios/s390-ccw/netboot.mak b/pc-bios/s390-ccw/netboot.mak >> index a73be36..8af0cfd 100644 >> --- a/pc-bios/s390-ccw/netboot.mak >> +++ b/pc-bios/s390-ccw/netboot.mak >> @@ -25,8 +25,9 @@ CTYPE_OBJS = isdigit.o isxdigit.o toupper.o >> %.o : $(SLOF_DIR)/lib/libc/ctype/%.c >> $(call quiet-command,$(CC) $(LIBC_CFLAGS) -c -o $@ >> $<,"CC","$(TARGET_DIR)$@") >> -STRING_OBJS = strcat.o strchr.o strcmp.o strcpy.o strlen.o >> strncmp.o strncpy.o \ >> - strstr.o memset.o memcpy.o memmove.o memcmp.o >> +STRING_OBJS = strcat.o strchr.o strrchr.o strcpy.o strlen.o strncpy.o \ >> + strcmp.o strncmp.o strcasecmp.o strncasecmp.o strstr.o \ >> + memset.o memcpy.o memmove.o memcmp.o >> %.o : $(SLOF_DIR)/lib/libc/string/%.c >> $(call quiet-command,$(CC) $(LIBC_CFLAGS) -c -o $@ >> $<,"CC","$(TARGET_DIR)$@") >> @@ -50,7 +51,7 @@ libc.a: $(LIBCOBJS) >> # libnet files: >> LIBNETOBJS := args.o dhcp.o dns.o icmpv6.o ipv6.o tcp.o udp.o >> bootp.o \ >> - dhcpv6.o ethernet.o ipv4.o ndp.o tftp.o >> + dhcpv6.o ethernet.o ipv4.o ndp.o tftp.o pxelinux.o >> LIBNETCFLAGS := $(QEMU_CFLAGS) -DDHCPARCH=0x1F $(LIBC_INC) >> $(LIBNET_INC) >> %.o : $(SLOF_DIR)/lib/libnet/%.c >> diff --git a/pc-bios/s390-ccw/netmain.c b/pc-bios/s390-ccw/netmain.c >> index 7533cf7..e84bb2b 100644 >> --- a/pc-bios/s390-ccw/netmain.c >> +++ b/pc-bios/s390-ccw/netmain.c >> @@ -30,6 +30,7 @@ >> #include >> #include >> #include >> +#include >> #include "s390-ccw.h" >> #include "virtio.h" >> @@ -41,12 +42,14 @@ extern char _start[]; >> #define KERNEL_ADDR ((void *)0L) >> #define KERNEL_MAX_SIZE ((long)_start) >> +#define ARCH_COMMAND_LINE_SIZE 896 /* Taken from Linux >> kernel */ >> char stack[PAGE_SIZE * 8] __attribute__((aligned(PAGE_SIZE))); >> IplParameterBlock iplb __attribute__((aligned(PAGE_SIZE))); >> static char cfgbuf[2048]; >> static SubChannelId net_schid = { .one = 1 }; >> +static uint8_t mac[6]; >> static uint64_t dest_timer; >> static uint64_t get_timer_ms(void) >> @@ -158,7 +161,6 @@ static int tftp_load(filename_ip_t *fnip, void >> *buffer, int len) >> static int net_init(filename_ip_t *fn_ip) >> { >> - uint8_t mac[6]; >> int rc; >> memset(fn_ip, 0, sizeof(filename_ip_t)); >> @@ -233,6 +235,66 @@ static void net_release(filename_ip_t *fn_ip) >> } >> /** >> + * Load a kernel with initrd (i.e. with the information that we've >> got from >> + * a pxelinux.cfg config file) >> + */ >> +static int load_kernel_with_initrd(filename_ip_t *fn_ip, >> + struct pl_cfg_entry *entry) >> +{ >> + int rc; >> + >> + printf("Loading pxelinux.cfg entry '%s'\n", entry->label); >> + >> + if (!entry->kernel) { >> + printf("Kernel entry is missing!\n"); >> + return -1; >> + } >> + >> + strncpy(fn_ip->filename, entry->kernel, sizeof(fn_ip->filename)); >> + rc = tftp_load(fn_ip, KERNEL_ADDR, KERNEL_MAX_SIZE); >> + if (rc < 0) { >> + return rc; >> + } >> + >> + if (entry->initrd) { >> + uint64_t iaddr = (rc + 0xfff) & ~0xfffUL; >> + >> + strncpy(fn_ip->filename, entry->initrd, >> sizeof(fn_ip->filename)); >> + rc = tftp_load(fn_ip, (void *)iaddr, KERNEL_MAX_SIZE - iaddr); >> + if (rc < 0) { >> + return rc; >> + } >> + /* Patch location and size: */ >> + *(uint64_t *)0x10408 = iaddr; >> + *(uint64_t *)0x10410 = rc; >> + rc += iaddr; >> + } >> + >> + if (entry->append) { >> + strncpy((char *)0x10480, entry->append, ARCH_COMMAND_LINE_SIZE); >> + } >> + >> + return rc;
Re: [Qemu-devel] [PATCH RFC] qemu-error: introduce error_report_once
On Thu, May 31, 2018 at 12:03:17PM +0100, Stefan Hajnoczi wrote: > On Tue, May 15, 2018 at 05:13:56PM +0800, Peter Xu wrote: > > I stole the printk_once() macro. > > > > I always wanted to be able to print some error directly if there is a > > buffer to dump, however we can't use error_report() really quite often > > when there can be any DDOS attack. To avoid that, we can introduce a > > print-once function for it. > > I like the idea. It solves the problem of guest-triggerable error > messages that we should know about for troubleshooting but can't due to > DoS. > > Stefan Thanks for the positive feedback. Please feel free to have a look on the latest version: Subject: [PATCH v4 0/2] error-report: introduce {error|warn}_report_once Message-Id: <20180524044454.11792-1-pet...@redhat.com> Regards, -- Peter Xu
Re: [Qemu-devel] [PATCH v7 4/5] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT
On 06/01/2018 01:42 AM, Michael S. Tsirkin wrote: On Thu, May 31, 2018 at 10:27:00AM +0800, Wei Wang wrote: On 05/30/2018 08:47 PM, Michael S. Tsirkin wrote: On Wed, May 30, 2018 at 05:12:09PM +0800, Wei Wang wrote: On 05/29/2018 11:24 PM, Michael S. Tsirkin wrote: On Tue, Apr 24, 2018 at 02:13:47PM +0800, Wei Wang wrote: +/* + * Balloon will report pages which were free at the time of this call. As the + * reporting happens asynchronously, dirty bit logging must be enabled before + * this call is made. + */ +void balloon_free_page_start(void) +{ +balloon_free_page_start_fn(balloon_opaque); +} Please create notifier support, not a single global. OK. The start is called at the end of bitmap_sync, and the stop is called at the beginning of bitmap_sync. In this case, we will need to add two migration states, MIGRATION_STATUS_BEFORE_BITMAP_SYNC and MIGRATION_STATUS_AFTER_BITMAP_SYNC, right? If that's the way you do it, you need to ask migration guys, not me. Yeah, I know.. thanks for the virtio part. + +static bool virtio_balloon_free_page_support(void *opaque) +{ +VirtIOBalloon *s = opaque; +VirtIODevice *vdev = VIRTIO_DEVICE(s); + +return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT); or if poison is negotiated. Will make it return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT) && !virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON) I mean the reverse: virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT) || virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON) If poison has been negotiated you must migrate the guest supplied value even if you don't use it for hints. Just a little confused with the logic. Writing it that way means that we are taking this possibility "virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)=fasle, virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)=true" into account, and let the support function return true when F_FREE_PAGE_HINT isn't supported. All I am saying is that in this configuration, you must migrate the poison value programmed by guest even if you do not yet use it without VIRTIO_BALLOON_F_FREE_PAGE_HINT. Right now you have a section: +.needed = virtio_balloon_free_page_support, which includes the poison value. So if guest migrates after writing the poison value, it's lost. Not nice. If guest doesn't support F_FREE_PAGE_HINT, it doesn't support the free page reporting (even the free page vq). I'm not sure why we tell the migration thread that the free page reporting feature is supported via this support function. If the support function simply returns false when F_FREE_PAGE_HINT isn't negotiated, the legacy migration already migrates the poisoned pages (not skipped, but may be compressed). I think it would be better to simply use the original "return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)" here. So maybe you should put the poison value in a separate section then. Yes, that looks good to me, thanks. Best, Wei
Re: [Qemu-devel] [PATCH v2 7/8] vhdx: Switch to byte-based calls
On Thu, May 31, 2018 at 03:50:45PM -0500, Eric Blake wrote: > We are gradually moving away from sector-based interfaces, towards > byte-based. Make the change for the last few sector-based calls > into the block layer from the vhdx driver. > > Ideally, the vhdx driver should switch to doing everything > byte-based, but that's a more invasive change that requires a > bit more auditing. > > Signed-off-by: Eric Blake Reviewed-by: Jeff Cody > --- > block/vhdx.c | 12 ++-- > 1 file changed, 6 insertions(+), 6 deletions(-) > > diff --git a/block/vhdx.c b/block/vhdx.c > index 0b1e21c7501..295d3276120 100644 > --- a/block/vhdx.c > +++ b/block/vhdx.c > @@ -1126,9 +1126,9 @@ static coroutine_fn int vhdx_co_readv(BlockDriverState > *bs, int64_t sector_num, > break; > case PAYLOAD_BLOCK_FULLY_PRESENT: > qemu_co_mutex_unlock(&s->lock); > -ret = bdrv_co_readv(bs->file, > -sinfo.file_offset >> BDRV_SECTOR_BITS, > -sinfo.sectors_avail, &hd_qiov); > +ret = bdrv_co_preadv(bs->file, sinfo.file_offset, > + sinfo.sectors_avail * BDRV_SECTOR_SIZE, > + &hd_qiov, 0); > qemu_co_mutex_lock(&s->lock); > if (ret < 0) { > goto exit; > @@ -1348,9 +1348,9 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState > *bs, int64_t sector_num, > } > /* block exists, so we can just overwrite it */ > qemu_co_mutex_unlock(&s->lock); > -ret = bdrv_co_writev(bs->file, > -sinfo.file_offset >> BDRV_SECTOR_BITS, > -sectors_to_write, &hd_qiov); > +ret = bdrv_co_pwritev(bs->file, sinfo.file_offset, > + sectors_to_write * BDRV_SECTOR_SIZE, > + &hd_qiov, 0); > qemu_co_mutex_lock(&s->lock); > if (ret < 0) { > goto error_bat_restore; > -- > 2.14.3 >
[Qemu-devel] [PATCH] docker: Update fedora image to 28
Signed-off-by: Fam Zheng --- tests/docker/dockerfiles/fedora.docker | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker index b706f42405..65d7761cf5 100644 --- a/tests/docker/dockerfiles/fedora.docker +++ b/tests/docker/dockerfiles/fedora.docker @@ -1,4 +1,4 @@ -FROM fedora:27 +FROM fedora:28 ENV PACKAGES \ ccache gettext git tar PyYAML sparse flex bison python3 bzip2 hostname \ gcc gcc-c++ llvm clang make perl which bc findutils glib2-devel \ -- 2.14.3
Re: [Qemu-devel] [PATCH v7 00/10] qemu-img convert with copy offloading
On Wed, 05/30 17:06, Stefan Hajnoczi wrote: > On Tue, May 29, 2018 at 01:59:49PM +0800, Fam Zheng wrote: > > v7: Fix qcow2. > > > > v6: Pick up rev-by from Stefan and Eric. > > Tweak patch 2 commit message. > > > > v5: - Fix raw offset/bytes check for read. [Eric] > > - Fix qcow2_handle_l2meta. [Stefan] > > - Add coroutine_fn whereever appropriate. [Stefan] > > > > v4: - Fix raw offset and size. [Eric] > > - iscsi: Drop unnecessary return values and variables in favor of > > constants. [Stefan] > > - qcow2: Handle small backing case. [Stefan] > > - file-posix: Translate ENOSYS to ENOTSUP. [Stefan] > > - API documentation and commit message. [Stefan] > > - Add rev-by to patches 3, 5 - 10. [Stefan, Eric] > > > > This series introduces block layer API for copy offloading and makes use of > > it > > in qemu-img convert. > > > > For now we implemented the operation in local file protocol with > > copy_file_range(2). Besides that it's possible to add similar to iscsi, nfs > > and potentially more. > > > > As far as its usage goes, in addition to qemu-img convert, we can emulate > > offloading in scsi-disk (handle EXTENDED COPY command), and use the API in > > block jobs too. > > Fails to compile on Fedora 28. > > Oops, I guess my glibc is too new. This type of bug doesn't happen very > often :D :D :D. > > block/file-posix.c:1452:14: error: static declaration of ‘copy_file_range’ > follows non-static declaration > static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd, > ^~~ > In file included from /home/stefanha/qemu/include/qemu/osdep.h:75, > from block/file-posix.c:25: > /usr/include/unistd.h:1107:9: note: previous declaration of ‘copy_file_range’ > was here > ssize_t copy_file_range (int __infd, __off64_t *__pinoff, > ^~~ It means it's time to update our Fedora docker image! Will fix this by adding a configure test. Fam
Re: [Qemu-devel] [Qemu-block] [PATCH v2 1/8] parallels: Switch to byte-based calls
On Thu, May 31, 2018 at 03:50:39PM -0500, Eric Blake wrote: > We are gradually moving away from sector-based interfaces, towards > byte-based. Make the change for the last few sector-based calls > into the block layer from the parallels driver. > > Ideally, the parallels driver should switch to doing everything > byte-based, but that's a more invasive change that requires a > bit more auditing. > > Signed-off-by: Eric Blake > Reviewed-by: Stefan Hajnoczi > Reviewed-by: Denis V. Lunev Reviewed-by: Jeff Cody > --- > block/parallels.c | 16 ++-- > 1 file changed, 10 insertions(+), 6 deletions(-) > > diff --git a/block/parallels.c b/block/parallels.c > index 6e9c37f44e1..a761c896d35 100644 > --- a/block/parallels.c > +++ b/block/parallels.c > @@ -226,14 +226,15 @@ static int64_t allocate_clusters(BlockDriverState *bs, > int64_t sector_num, > }; > qemu_iovec_init_external(&qiov, &iov, 1); > > -ret = bdrv_co_readv(bs->backing, idx * s->tracks, nb_cow_sectors, > -&qiov); > +ret = bdrv_co_preadv(bs->backing, idx * s->tracks * BDRV_SECTOR_SIZE, > + nb_cow_bytes, &qiov, 0); > if (ret < 0) { > qemu_vfree(iov.iov_base); > return ret; > } > > -ret = bdrv_co_writev(bs->file, s->data_end, nb_cow_sectors, &qiov); > +ret = bdrv_co_pwritev(bs->file, s->data_end * BDRV_SECTOR_SIZE, > + nb_cow_bytes, &qiov, 0); > qemu_vfree(iov.iov_base); > if (ret < 0) { > return ret; > @@ -339,7 +340,8 @@ static coroutine_fn int > parallels_co_writev(BlockDriverState *bs, > qemu_iovec_reset(&hd_qiov); > qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes); > > -ret = bdrv_co_writev(bs->file, position, n, &hd_qiov); > +ret = bdrv_co_pwritev(bs->file, position * BDRV_SECTOR_SIZE, nbytes, > + &hd_qiov, 0); > if (ret < 0) { > break; > } > @@ -378,7 +380,8 @@ static coroutine_fn int > parallels_co_readv(BlockDriverState *bs, > > if (position < 0) { > if (bs->backing) { > -ret = bdrv_co_readv(bs->backing, sector_num, n, &hd_qiov); > +ret = bdrv_co_preadv(bs->backing, sector_num * > BDRV_SECTOR_SIZE, > + nbytes, &hd_qiov, 0); > if (ret < 0) { > break; > } > @@ -386,7 +389,8 @@ static coroutine_fn int > parallels_co_readv(BlockDriverState *bs, > qemu_iovec_memset(&hd_qiov, 0, 0, nbytes); > } > } else { > -ret = bdrv_co_readv(bs->file, position, n, &hd_qiov); > +ret = bdrv_co_preadv(bs->file, position * BDRV_SECTOR_SIZE, > nbytes, > + &hd_qiov, 0); > if (ret < 0) { > break; > } > -- > 2.14.3 > >
[Qemu-devel] [PATCH v2 16/20] 9p: darwin: Compatibility for f/l*xattr
On darwin `fgetxattr` takes two extra optional arguments, and the l* variants are not defined (in favor of an extra flag to the regular variants. Signed-off-by: Keno Fischer --- Changes from v1: New patch, qemu_fgetxattr had previously been moved to 9p-util as fgetxattr_follow. The remaining functions are required by the proxy-helper. Makefile| 6 ++ fsdev/virtfs-proxy-helper.c | 9 + hw/9pfs/9p-local.c | 12 hw/9pfs/9p-util.h | 17 + 4 files changed, 36 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 6d588d1..dac6efd 100644 --- a/Makefile +++ b/Makefile @@ -544,7 +544,13 @@ qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o $(COMMON_LDADDS) qemu-keymap$(EXESUF): qemu-keymap.o ui/input-keymap.o $(COMMON_LDADDS) fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal.o fsdev/9p-iov-marshal.o $(COMMON_LDADDS) +ifdef CONFIG_DARWIN +fsdev/virtfs-proxy-helper$(EXESUF): hw/9pfs/9p-util-darwin.o +endif +ifdef CONFIG_LINUX +fsdev/virtfs-proxy-helper$(EXESUF): hw/9pfs/9p-util-linux.o fsdev/virtfs-proxy-helper$(EXESUF): LIBS += -lcap +endif scsi/qemu-pr-helper$(EXESUF): scsi/qemu-pr-helper.o scsi/utils.o $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) $(COMMON_LDADDS) ifdef CONFIG_MPATH diff --git a/fsdev/virtfs-proxy-helper.c b/fsdev/virtfs-proxy-helper.c index 3bc1269..a26f8b8 100644 --- a/fsdev/virtfs-proxy-helper.c +++ b/fsdev/virtfs-proxy-helper.c @@ -28,6 +28,7 @@ #include "qemu/statfs.h" #include "9p-iov-marshal.h" #include "hw/9pfs/9p-proxy.h" +#include "hw/9pfs/9p-util.h" #include "fsdev/9p-iov-marshal.h" #define PROGNAME "virtfs-proxy-helper" @@ -459,7 +460,7 @@ static int do_getxattr(int type, struct iovec *iovec, struct iovec *out_iovec) v9fs_string_init(&name); retval = proxy_unmarshal(iovec, offset, "s", &name); if (retval > 0) { -retval = lgetxattr(path.data, name.data, xattr.data, size); +retval = qemu_lgetxattr(path.data, name.data, xattr.data, size); if (retval < 0) { retval = -errno; } else { @@ -469,7 +470,7 @@ static int do_getxattr(int type, struct iovec *iovec, struct iovec *out_iovec) v9fs_string_free(&name); break; case T_LLISTXATTR: -retval = llistxattr(path.data, xattr.data, size); +retval = qemu_llistxattr(path.data, xattr.data, size); if (retval < 0) { retval = -errno; } else { @@ -1000,7 +1001,7 @@ static int process_requests(int sock) retval = proxy_unmarshal(&in_iovec, PROXY_HDR_SZ, "sssdd", &path, &name, &value, &size, &flags); if (retval > 0) { -retval = lsetxattr(path.data, +retval = qemu_lsetxattr(path.data, name.data, value.data, size, flags); if (retval < 0) { retval = -errno; @@ -1016,7 +1017,7 @@ static int process_requests(int sock) retval = proxy_unmarshal(&in_iovec, PROXY_HDR_SZ, "ss", &path, &name); if (retval > 0) { -retval = lremovexattr(path.data, name.data); +retval = qemu_lremovexattr(path.data, name.data); if (retval < 0) { retval = -errno; } diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index 1f0b1b0..7830526 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -776,16 +776,20 @@ static int local_fstat(FsContext *fs_ctx, int fid_type, mode_t tmp_mode; dev_t tmp_dev; -if (fgetxattr(fd, "user.virtfs.uid", &tmp_uid, sizeof(uid_t)) > 0) { +if (qemu_fgetxattr(fd, "user.virtfs.uid", + &tmp_uid, sizeof(uid_t)) > 0) { stbuf->st_uid = le32_to_cpu(tmp_uid); } -if (fgetxattr(fd, "user.virtfs.gid", &tmp_gid, sizeof(gid_t)) > 0) { +if (qemu_fgetxattr(fd, "user.virtfs.gid", + &tmp_gid, sizeof(gid_t)) > 0) { stbuf->st_gid = le32_to_cpu(tmp_gid); } -if (fgetxattr(fd, "user.virtfs.mode", &tmp_mode, sizeof(mode_t)) > 0) { +if (qemu_fgetxattr(fd, "user.virtfs.mode", + &tmp_mode, sizeof(mode_t)) > 0) { stbuf->st_mode = le32_to_cpu(tmp_mode); } -if (fgetxattr(fd, "user.virtfs.rdev", &tmp_dev, sizeof(dev_t)) > 0) { +if (qemu_fgetxattr(fd, "user.virtfs.rdev", + &tmp_dev, sizeof(dev_t)) > 0) { stbuf->st_rdev = le64_to_cpu(tmp_dev); } } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h index 79ed6b2..50a03c7 100644 --- a/hw/9pfs/9p-util.h +++ b/hw/9pfs/9p-util.h @@ -19,6 +19,23 @@ #define O_PATH_9P_UTIL 0 #endif +#ifdef
[Qemu-devel] [PATCH v2 20/20] 9p: darwin: configure: Allow VirtFS on Darwin
Signed-off-by: Keno Fischer --- Changes from v1: Now builds the proxy-helper on Darwin. Makefile.objs | 1 + configure | 22 +++--- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/Makefile.objs b/Makefile.objs index c6c3554..a2245c9 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -104,6 +104,7 @@ common-obj-$(CONFIG_WIN32) += os-win32.o common-obj-$(CONFIG_POSIX) += os-posix.o common-obj-$(CONFIG_LINUX) += fsdev/ +common-obj-$(CONFIG_DARWIN) += fsdev/ common-obj-y += migration/ diff --git a/configure b/configure index a6a4616..4808459 100755 --- a/configure +++ b/configure @@ -5535,16 +5535,28 @@ if test "$want_tools" = "yes" ; then fi fi if test "$softmmu" = yes ; then - if test "$linux" = yes; then -if test "$virtfs" != no && test "$cap" = yes && test "$attr" = yes ; then + if test "$virtfs" != no; then +if test "$linux" = yes; then + if test "$cap" = yes && test "$attr" = yes ; then +virtfs=yes +tools="$tools fsdev/virtfs-proxy-helper\$(EXESUF)" + else +if test "$virtfs" = yes; then + error_exit "VirtFS requires libcap devel and libattr devel under Linux" +fi +virtfs=no + fi +elif test "$darwin" = yes; then virtfs=yes tools="$tools fsdev/virtfs-proxy-helper\$(EXESUF)" else if test "$virtfs" = yes; then -error_exit "VirtFS requires libcap devel and libattr devel" +error_exit "VirtFS is supported only on Linux and Darwin" fi virtfs=no fi + fi + if test "$linux" = yes; then if test "$mpath" != no && test "$mpathpersist" = yes ; then mpath=yes else @@ -,10 +5567,6 @@ if test "$softmmu" = yes ; then fi tools="$tools scsi/qemu-pr-helper\$(EXESUF)" else -if test "$virtfs" = yes; then - error_exit "VirtFS is supported only on Linux" -fi -virtfs=no if test "$mpath" = yes; then error_exit "Multipath is supported only on Linux" fi -- 2.8.1
[Qemu-devel] [PATCH v2 18/20] 9p: darwin: Implement compatibility for mknodat
Darwin does not support mknodat. However, to avoid race conditions with later setting the permissions, we must avoid using mknod on the full path instead. We could try to fchdir, but that would cause problems if multiple threads try to call mknodat at the same time. However, luckily there is a solution: Darwin as an (unexposed in the C library) system call that sets the cwd for the current thread only. This should suffice to use mknod safely. Signed-off-by: Keno Fischer --- Changes from v1: New patch. The previous series marked mknodat unsupported. hw/9pfs/9p-local.c | 5 +++-- hw/9pfs/9p-util-darwin.c | 25 + hw/9pfs/9p-util-linux.c | 5 + hw/9pfs/9p-util.h| 2 ++ 4 files changed, 35 insertions(+), 2 deletions(-) diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index 47e8580..c7a2b08 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -668,7 +668,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path, if (fs_ctx->export_flags & V9FS_SM_MAPPED || fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { -err = mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0); +err = qemu_mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0); if (err == -1) { goto out; } @@ -683,7 +683,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path, } } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH || fs_ctx->export_flags & V9FS_SM_NONE) { -err = mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev); +err = qemu_mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev); if (err == -1) { goto out; } @@ -696,6 +696,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path, err_end: unlinkat_preserve_errno(dirfd, name, 0); + out: close_preserve_errno(dirfd); return err; diff --git a/hw/9pfs/9p-util-darwin.c b/hw/9pfs/9p-util-darwin.c index ac414bc..49fe7d3 100644 --- a/hw/9pfs/9p-util-darwin.c +++ b/hw/9pfs/9p-util-darwin.c @@ -158,3 +158,28 @@ done: close_preserve_errno(fd); return ret; } + +#ifndef SYS___pthread_fchdir +# define SYS___pthread_fchdir 349 +#endif + +static int fchdir_thread_local(int fd) +{ +return syscall(SYS___pthread_fchdir, fd); +} + +int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev) +{ +int preserved_errno, err; +if (fchdir_thread_local(dirfd) < 0) { +return -1; +} +err = mknod(filename, mode, dev); +preserved_errno = errno; +/* Stop using the thread-local cwd */ +fchdir_thread_local(-1); +if (err < 0) { +errno = preserved_errno; +} +return err; +} diff --git a/hw/9pfs/9p-util-linux.c b/hw/9pfs/9p-util-linux.c index 3902378..06399c5 100644 --- a/hw/9pfs/9p-util-linux.c +++ b/hw/9pfs/9p-util-linux.c @@ -63,3 +63,8 @@ int utimensat_nofollow(int dirfd, const char *filename, { return utimensat(dirfd, filename, times, AT_SYMLINK_NOFOLLOW); } + +int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev) +{ +return mknodat(dirfd, filename, mode, dev); +} diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h index b1dc08a..127564d 100644 --- a/hw/9pfs/9p-util.h +++ b/hw/9pfs/9p-util.h @@ -90,4 +90,6 @@ ssize_t fremovexattrat_nofollow(int dirfd, const char *filename, int utimensat_nofollow(int dirfd, const char *filename, const struct timespec times[2]); +int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev); + #endif -- 2.8.1
[Qemu-devel] [PATCH v6] arm_gicv3_kvm: kvm_dist_get/put: skip the registers banked by GICR
While we skip the GIC_INTERNAL irqs, we don't change the register offset accordingly. This will overlap the GICR registers value and leave the last GIC_INTERNAL irq's registers out of update. Fix this by skipping the registers banked by GICR. Also for migration compatibility if the migration source (old version qemu) doesn't send gicd_no_migration_shift_bug = 1 to destination, then we shift the data of PPI to get the right data for SPI. Fixes: 367b9f527becdd20ddf116e17a3c0c2bbc486920 Cc: qemu-sta...@nongnu.org Reviewed-by: Eric Auger Reviewed-by: Peter Maydell Signed-off-by: Shannon Zhao --- V6: Fix typos and keep same with offset for clroffset --- hw/intc/arm_gicv3_common.c | 79 ++ hw/intc/arm_gicv3_kvm.c| 38 ++ include/hw/intc/arm_gicv3_common.h | 1 + 3 files changed, 118 insertions(+) diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c index 7b54d52..864b7c6 100644 --- a/hw/intc/arm_gicv3_common.c +++ b/hw/intc/arm_gicv3_common.c @@ -27,6 +27,7 @@ #include "hw/intc/arm_gicv3_common.h" #include "gicv3_internal.h" #include "hw/arm/linux-boot-if.h" +#include "sysemu/kvm.h" static int gicv3_pre_save(void *opaque) { @@ -141,6 +142,79 @@ static const VMStateDescription vmstate_gicv3_cpu = { } }; +static int gicv3_gicd_no_migration_shift_bug_pre_load(void *opaque) +{ +GICv3State *cs = opaque; + + /* +* The gicd_no_migration_shift_bug flag is used for migration compatibility +* for old version QEMU which may have the GICD bmp shift bug under KVM mode. +* Strictly, what we want to know is whether the migration source is using +* KVM. Since we don't have any way to determine that, we look at whether the +* destination is using KVM; this is close enough because for the older QEMU +* versions with this bug KVM -> TCG migration didn't work anyway. If the +* source is a newer QEMU without this bug it will transmit the migration +* subsection which sets the flag to true; otherwise it will remain set to +* the value we select here. +*/ +if (kvm_enabled()) { +cs->gicd_no_migration_shift_bug = false; +} + +return 0; +} + +static int gicv3_gicd_no_migration_shift_bug_post_load(void *opaque, + int version_id) +{ +GICv3State *cs = opaque; + +if (cs->gicd_no_migration_shift_bug) { +return 0; +} + +/* Older versions of QEMU had a bug in the handling of state save/restore + * to the KVM GICv3: they got the offset in the bitmap arrays wrong, + * so that instead of the data for external interrupts 32 and up + * starting at bit position 32 in the bitmap, it started at bit + * position 64. If we're receiving data from a QEMU with that bug, + * we must move the data down into the right place. + */ +memmove(cs->group, (uint8_t *)cs->group + GIC_INTERNAL / 8, +sizeof(cs->group) - GIC_INTERNAL / 8); +memmove(cs->grpmod, (uint8_t *)cs->grpmod + GIC_INTERNAL / 8, +sizeof(cs->grpmod) - GIC_INTERNAL / 8); +memmove(cs->enabled, (uint8_t *)cs->enabled + GIC_INTERNAL / 8, +sizeof(cs->enabled) - GIC_INTERNAL / 8); +memmove(cs->pending, (uint8_t *)cs->pending + GIC_INTERNAL / 8, +sizeof(cs->pending) - GIC_INTERNAL / 8); +memmove(cs->active, (uint8_t *)cs->active + GIC_INTERNAL / 8, +sizeof(cs->active) - GIC_INTERNAL / 8); +memmove(cs->edge_trigger, (uint8_t *)cs->edge_trigger + GIC_INTERNAL / 8, +sizeof(cs->edge_trigger) - GIC_INTERNAL / 8); + +/* + * While this new version QEMU doesn't have this kind of bug as we fix it, + * so it needs to set the flag to true to indicate that and it's necessary + * for next migration to work from this new version QEMU. + */ +cs->gicd_no_migration_shift_bug = true; + +return 0; +} + +const VMStateDescription vmstate_gicv3_gicd_no_migration_shift_bug = { +.name = "arm_gicv3/gicd_no_migration_shift_bug", +.version_id = 1, +.minimum_version_id = 1, +.pre_load = gicv3_gicd_no_migration_shift_bug_pre_load, +.post_load = gicv3_gicd_no_migration_shift_bug_post_load, +.fields = (VMStateField[]) { +VMSTATE_BOOL(gicd_no_migration_shift_bug, GICv3State), +VMSTATE_END_OF_LIST() +} +}; + static const VMStateDescription vmstate_gicv3 = { .name = "arm_gicv3", .version_id = 1, @@ -165,6 +239,10 @@ static const VMStateDescription vmstate_gicv3 = { VMSTATE_STRUCT_VARRAY_POINTER_UINT32(cpu, GICv3State, num_cpu, vmstate_gicv3_cpu, GICv3CPUState), VMSTATE_END_OF_LIST() +}, +.subsections = (const VMStateDescription * []) { +&vmstate_gicv3_gicd_no_migration_shift_bug, +NULL } }; @@ -364,6 +442,7 @@ static void arm_gicv3_common_reset(DeviceState *dev) gicv3_gicd_group_
[Qemu-devel] [PATCH v2 19/20] 9p: darwin: virtfs-proxy: Implement setuid code for darwin
Darwin does not have linux capabilities, so make that code linux-only. Darwin also does not have setresuid/gid. The correct way to temporarily drop capabilities is to call seteuid/gid. Also factor out the code that acquires acquire_dac_override into a separate function in the linux implementation. I had originally done this when I thought it made sense to have only one `setugid` function, but I retained this because it seems clearer this way. Signed-off-by: Keno Fischer --- Changes from v1: New patch. fsdev/virtfs-proxy-helper.c | 200 +++- 1 file changed, 125 insertions(+), 75 deletions(-) diff --git a/fsdev/virtfs-proxy-helper.c b/fsdev/virtfs-proxy-helper.c index d8dd3f5..6baf2a6 100644 --- a/fsdev/virtfs-proxy-helper.c +++ b/fsdev/virtfs-proxy-helper.c @@ -82,6 +82,7 @@ static void do_perror(const char *string) } } +#ifdef CONFIG_LINUX static int do_cap_set(cap_value_t *cap_value, int size, int reset) { cap_t caps; @@ -121,6 +122,85 @@ error: return -1; } +static int acquire_dac_override(void) +{ +cap_value_t cap_list[] = { +CAP_DAC_OVERRIDE, +}; +return do_cap_set(cap_list, ARRAY_SIZE(cap_list), 0); +} + +/* + * from man 7 capabilities, section + * Effect of User ID Changes on Capabilities: + * If the effective user ID is changed from nonzero to 0, then the permitted + * set is copied to the effective set. If the effective user ID is changed + * from 0 to nonzero, then all capabilities are are cleared from the effective + * set. + * + * The setfsuid/setfsgid man pages warn that changing the effective user ID may + * expose the program to unwanted signals, but this is not true anymore: for an + * unprivileged (without CAP_KILL) program to send a signal, the real or + * effective user ID of the sending process must equal the real or saved user + * ID of the target process. Even when dropping privileges, it is enough to + * keep the saved UID to a "privileged" value and virtfs-proxy-helper won't + * be exposed to signals. So just use setresuid/setresgid. + */ +static int setugid(int uid, int gid, int *suid, int *sgid) +{ +int retval; + +*suid = geteuid(); +*sgid = getegid(); + +if (setresgid(-1, gid, *sgid) == -1) { +retval = -errno; +goto err_out; +} + +if (setresuid(-1, uid, *suid) == -1) { +retval = -errno; +goto err_sgid; +} + +if (uid != 0 || gid != 0) { +/* +* We still need DAC_OVERRIDE because we don't change +* supplementary group ids, and hence may be subjected DAC rules +*/ +if (acquire_dac_override() < 0) { +retval = -errno; +goto err_suid; +} +} +return 0; + +err_suid: +if (setresuid(-1, *suid, *suid) == -1) { +abort(); +} +err_sgid: +if (setresgid(-1, *sgid, *sgid) == -1) { +abort(); +} +err_out: +return retval; +} + +/* + * This is used to reset the ugid back with the saved values + * There is nothing much we can do checking error values here. + */ +static void resetugid(int suid, int sgid) +{ +if (setresgid(-1, sgid, sgid) == -1) { +abort(); +} +if (setresuid(-1, suid, suid) == -1) { +abort(); +} +} + static int init_capabilities(void) { /* helper needs following capabilities only */ @@ -135,6 +215,51 @@ static int init_capabilities(void) }; return do_cap_set(cap_list, ARRAY_SIZE(cap_list), 1); } +#else +static int setugid(int uid, int gid, int *suid, int *sgid) +{ +int retval; + +*suid = geteuid(); +*sgid = getegid(); + +if (setegid(gid) == -1) { +retval = -errno; +goto err_out; +} + +if (seteuid(uid) == -1) { +retval = -errno; +goto err_sgid; +} + +err_sgid: +if (setgid(*sgid) == -1) { +abort(); +} +err_out: +return retval; +} + +/* + * This is used to reset the ugid back with the saved values + * There is nothing much we can do checking error values here. + */ +static void resetugid(int suid, int sgid) +{ +if (setegid(sgid) == -1) { +abort(); +} +if (seteuid(suid) == -1) { +abort(); +} +} + +static int init_capabilities(void) +{ +return 0; +} +#endif static int socket_read(int sockfd, void *buff, ssize_t size) { @@ -279,81 +404,6 @@ static int send_status(int sockfd, struct iovec *iovec, int status) } /* - * from man 7 capabilities, section - * Effect of User ID Changes on Capabilities: - * If the effective user ID is changed from nonzero to 0, then the permitted - * set is copied to the effective set. If the effective user ID is changed - * from 0 to nonzero, then all capabilities are are cleared from the effective - * set. - * - * The setfsuid/setfsgid man pages warn that changing the effective user ID may - * expose the program to unwanted signals, but this is not true anymore: for an - * unprivileged (without CAP_KILL) program to send a signa
[Qemu-devel] [PATCH v2 12/20] 9p: darwin: Explicitly cast comparisons of mode_t with -1
Comparisons of mode_t with -1 require an explicit cast, since mode_t is unsigned on Darwin. Signed-off-by: Keno Fischer --- Changes from v1: Split from strchrnul change hw/9pfs/9p-local.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index 6222891..1f0b1b0 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -310,7 +310,7 @@ update_map_file: if (credp->fc_gid != -1) { gid = credp->fc_gid; } -if (credp->fc_mode != -1) { +if (credp->fc_mode != (mode_t)-1) { mode = credp->fc_mode; } if (credp->fc_rdev != -1) { @@ -416,7 +416,7 @@ static int local_set_xattrat(int dirfd, const char *path, FsCred *credp) return err; } } -if (credp->fc_mode != -1) { +if (credp->fc_mode != (mode_t)-1) { uint32_t tmp_mode = cpu_to_le32(credp->fc_mode); err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.mode", &tmp_mode, sizeof(mode_t), 0); -- 2.8.1
[Qemu-devel] [PATCH v2 15/20] 9p: darwin: *xattr_nofollow implementations
This implements the darwin equivalent of the functions that were moved to 9p-util(-linux) earlier in this series in the new 9p-util-darwin file. Signed-off-by: Keno Fischer --- Changes from v1: * New 9p-util-darwin.c rather than ifdefs in 9p-util.c * Drop incorrect AT_NOFOLLOW from the actual call hw/9pfs/9p-util-darwin.c | 64 hw/9pfs/Makefile.objs| 1 + 2 files changed, 65 insertions(+) create mode 100644 hw/9pfs/9p-util-darwin.c diff --git a/hw/9pfs/9p-util-darwin.c b/hw/9pfs/9p-util-darwin.c new file mode 100644 index 000..cdb4c9e --- /dev/null +++ b/hw/9pfs/9p-util-darwin.c @@ -0,0 +1,64 @@ +/* + * 9p utilities (Darwin Implementation) + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/xattr.h" +#include "9p-util.h" + +ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name, + void *value, size_t size) +{ +int ret; +int fd = openat_file(dirfd, filename, + O_RDONLY | O_PATH_9P_UTIL | O_NOFOLLOW, 0); +if (fd == -1) { +return -1; +} +ret = fgetxattr(fd, name, value, size, 0, 0); +close_preserve_errno(fd); +return ret; +} + +ssize_t flistxattrat_nofollow(int dirfd, const char *filename, + char *list, size_t size) +{ +int ret; +int fd = openat_file(dirfd, filename, + O_RDONLY | O_PATH_9P_UTIL | O_NOFOLLOW, 0); +if (fd == -1) { +return -1; +} +ret = flistxattr(fd, list, size, 0); +close_preserve_errno(fd); +return ret; +} + +ssize_t fremovexattrat_nofollow(int dirfd, const char *filename, +const char *name) +{ +int ret; +int fd = openat_file(dirfd, filename, O_PATH_9P_UTIL | O_NOFOLLOW, 0); +if (fd == -1) { +return -1; +} +ret = fremovexattr(fd, name, 0); +close_preserve_errno(fd); +return ret; +} + +int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name, + void *value, size_t size, int flags) +{ +int ret; +int fd = openat_file(dirfd, filename, O_PATH_9P_UTIL | O_NOFOLLOW, 0); +if (fd == -1) { +return -1; +} +ret = fsetxattr(fd, name, value, size, 0, flags); +close_preserve_errno(fd); +return ret; +} diff --git a/hw/9pfs/Makefile.objs b/hw/9pfs/Makefile.objs index 083508f..24a8695 100644 --- a/hw/9pfs/Makefile.objs +++ b/hw/9pfs/Makefile.objs @@ -1,5 +1,6 @@ common-obj-y = 9p.o common-obj-$(CONFIG_LINUX) += 9p-util-linux.o +common-obj-$(CONFIG_DARWIN) += 9p-util-darwin.o common-obj-y += 9p-local.o 9p-xattr.o common-obj-y += 9p-xattr-user.o 9p-posix-acl.o common-obj-y += coth.o cofs.o codir.o cofile.o -- 2.8.1
[Qemu-devel] [PATCH v2 17/20] 9p: darwin: Provide a fallback implementation for utimensat
This function is new in Mac OS 10.13. Provide a fallback implementation when building against older SDKs. The complication in the definition comes having to separately handle the used SDK version and the target OS version. - If the SDK version is too low (__MAC_10_13 not defined), utimensat is not defined in the header, so we must not try to use it (doing so would error). - Otherwise, if the targetted OS version is at least 10.13, we know this function is available, so we can unconditionally call it. - Lastly, we check for the availability of the __builtin_available macro to potentially insert a dynamic check for this OS version. However, __builtin_available is only available with sufficiently recent versions of clang and while all Apple clang versions that ship with Xcode versions that support the 10.13 SDK support with builtin, we want to allow building with compilers other than Apple clang that may not support this builtin. Signed-off-by: Keno Fischer --- Changes from v1: * Correct calculation of tv_usec * Support UTIME_NOW/UTIME/OMIT * Now covers fsdev/virtfs-proxy-helper.c fsdev/virtfs-proxy-helper.c | 3 +- hw/9pfs/9p-local.c | 2 +- hw/9pfs/9p-util-darwin.c| 96 + hw/9pfs/9p-util-linux.c | 6 +++ hw/9pfs/9p-util.h | 8 hw/9pfs/9p.c| 1 + 6 files changed, 113 insertions(+), 3 deletions(-) diff --git a/fsdev/virtfs-proxy-helper.c b/fsdev/virtfs-proxy-helper.c index a26f8b8..d8dd3f5 100644 --- a/fsdev/virtfs-proxy-helper.c +++ b/fsdev/virtfs-proxy-helper.c @@ -957,8 +957,7 @@ static int process_requests(int sock) &spec[0].tv_sec, &spec[0].tv_nsec, &spec[1].tv_sec, &spec[1].tv_nsec); if (retval > 0) { -retval = utimensat(AT_FDCWD, path.data, spec, - AT_SYMLINK_NOFOLLOW); +retval = utimensat_nofollow(AT_FDCWD, path.data, spec); if (retval < 0) { retval = -errno; } diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index 7830526..47e8580 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -1071,7 +1071,7 @@ static int local_utimensat(FsContext *s, V9fsPath *fs_path, goto out; } -ret = utimensat(dirfd, name, buf, AT_SYMLINK_NOFOLLOW); +ret = utimensat_nofollow(dirfd, name, buf); close_preserve_errno(dirfd); out: g_free(dirpath); diff --git a/hw/9pfs/9p-util-darwin.c b/hw/9pfs/9p-util-darwin.c index cdb4c9e..ac414bc 100644 --- a/hw/9pfs/9p-util-darwin.c +++ b/hw/9pfs/9p-util-darwin.c @@ -62,3 +62,99 @@ int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name, close_preserve_errno(fd); return ret; } + +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + +static int update_times_from_stat(int fd, struct timespec times[2], + int update0, int update1) +{ +struct stat buf; +int ret = fstat(fd, &buf); +if (ret == -1) { +return ret; +} +if (update0) { +times[0] = buf.st_atimespec; +} +if (update1) { +times[1] = buf.st_mtimespec; +} +return 0; +} + +int utimensat_nofollow(int dirfd, const char *filename, + const struct timespec times_in[2]) +{ +int ret, fd; +int special0, special1; +struct timeval futimes_buf[2]; +struct timespec times[2]; +memcpy(times, times_in, 2 * sizeof(struct timespec)); + +/* Check whether we have an SDK version that defines utimensat */ +#if defined(__MAC_10_13) +# if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_13 +# define UTIMENSAT_AVAILABLE 1 +# elif __has_builtin(__builtin_available) +# define UTIMENSAT_AVAILABLE __builtin_available(macos 10.13, *) +# else +# define UTIMENSAT_AVAILABLE 0 +# endif +if (UTIMENSAT_AVAILABLE) { +return utimensat(dirfd, filename, times, AT_SYMLINK_NOFOLLOW); +} +#endif + +/* utimensat not available. Use futimes. */ +fd = openat_file(dirfd, filename, O_PATH_9P_UTIL | O_NOFOLLOW, 0); +if (fd == -1) { +return -1; +} + +special0 = times[0].tv_nsec == UTIME_OMIT; +special1 = times[1].tv_nsec == UTIME_OMIT; +if (special0 || special1) { +/* If both are set, nothing to do */ +if (special0 && special1) { +ret = 0; +goto done; +} + +ret = update_times_from_stat(fd, times, special0, special1); +if (ret < 0) { +goto done; +} +} + +special0 = times[0].tv_nsec == UTIME_NOW; +special1 = times[1].tv_nsec == UTIME_NOW; +if (special0 || special1) { +ret = futimes(fd, NULL); +if (ret < 0) { +goto done; +} + +/* If both are set, we are done */ +if (special0 && special1) { +ret = 0; +goto done; +} + +
[Qemu-devel] [PATCH v2 10/20] 9p: darwin: Handle struct stat(fs) differences
Signed-off-by: Keno Fischer --- Changes since v1: * Now also covers fsdev/virtfs-proxy-helper.c fsdev/virtfs-proxy-helper.c | 14 +++--- hw/9pfs/9p-proxy.c | 17 ++--- hw/9pfs/9p-synth.c | 2 ++ hw/9pfs/9p.c| 16 ++-- 4 files changed, 41 insertions(+), 8 deletions(-) diff --git a/fsdev/virtfs-proxy-helper.c b/fsdev/virtfs-proxy-helper.c index 94fb069..3bc1269 100644 --- a/fsdev/virtfs-proxy-helper.c +++ b/fsdev/virtfs-proxy-helper.c @@ -506,12 +506,15 @@ static void stat_to_prstat(ProxyStat *pr_stat, struct stat *stat) pr_stat->st_size = stat->st_size; pr_stat->st_blksize = stat->st_blksize; pr_stat->st_blocks = stat->st_blocks; +#ifdef CONFIG_DARWIN +pr_stat->st_atim_nsec = stat->st_atimespec.tv_nsec; +pr_stat->st_mtim_nsec = stat->st_mtimespec.tv_nsec; +pr_stat->st_ctim_nsec = stat->st_ctimespec.tv_nsec; +#else pr_stat->st_atim_sec = stat->st_atim.tv_sec; -pr_stat->st_atim_nsec = stat->st_atim.tv_nsec; pr_stat->st_mtim_sec = stat->st_mtim.tv_sec; -pr_stat->st_mtim_nsec = stat->st_mtim.tv_nsec; pr_stat->st_ctim_sec = stat->st_ctim.tv_sec; -pr_stat->st_ctim_nsec = stat->st_ctim.tv_nsec; +#endif } static void statfs_to_prstatfs(ProxyStatFS *pr_stfs, struct statfs *stfs) @@ -524,10 +527,15 @@ static void statfs_to_prstatfs(ProxyStatFS *pr_stfs, struct statfs *stfs) pr_stfs->f_bavail = stfs->f_bavail; pr_stfs->f_files = stfs->f_files; pr_stfs->f_ffree = stfs->f_ffree; +#ifdef CONFIG_DARWIN +pr_stfs->f_fsid[0] = stfs->f_fsid.val[0]; +pr_stfs->f_fsid[1] = stfs->f_fsid.val[1]; +#else pr_stfs->f_fsid[0] = stfs->f_fsid.__val[0]; pr_stfs->f_fsid[1] = stfs->f_fsid.__val[1]; pr_stfs->f_namelen = stfs->f_namelen; pr_stfs->f_frsize = stfs->f_frsize; +#endif } /* diff --git a/hw/9pfs/9p-proxy.c b/hw/9pfs/9p-proxy.c index 47a94e0..8a2c174 100644 --- a/hw/9pfs/9p-proxy.c +++ b/hw/9pfs/9p-proxy.c @@ -117,10 +117,15 @@ static void prstatfs_to_statfs(struct statfs *stfs, ProxyStatFS *prstfs) stfs->f_bavail = prstfs->f_bavail; stfs->f_files = prstfs->f_files; stfs->f_ffree = prstfs->f_ffree; +#ifdef CONFIG_DARWIN +stfs->f_fsid.val[0] = prstfs->f_fsid[0] & 0xU; +stfs->f_fsid.val[1] = prstfs->f_fsid[1] >> 32 & 0xU; +#else stfs->f_fsid.__val[0] = prstfs->f_fsid[0] & 0xU; stfs->f_fsid.__val[1] = prstfs->f_fsid[1] >> 32 & 0xU; stfs->f_namelen = prstfs->f_namelen; stfs->f_frsize = prstfs->f_frsize; +#endif } /* Converts proxy_stat structure to VFS stat structure */ @@ -137,12 +142,18 @@ static void prstat_to_stat(struct stat *stbuf, ProxyStat *prstat) stbuf->st_size = prstat->st_size; stbuf->st_blksize = prstat->st_blksize; stbuf->st_blocks = prstat->st_blocks; - stbuf->st_atim.tv_sec = prstat->st_atim_sec; - stbuf->st_atim.tv_nsec = prstat->st_atim_nsec; + stbuf->st_atime = prstat->st_atim_sec; stbuf->st_mtime = prstat->st_mtim_sec; - stbuf->st_mtim.tv_nsec = prstat->st_mtim_nsec; stbuf->st_ctime = prstat->st_ctim_sec; +#ifdef CONFIG_DARWIN + stbuf->st_atimespec.tv_nsec = prstat->st_atim_nsec; + stbuf->st_mtimespec.tv_nsec = prstat->st_mtim_nsec; + stbuf->st_ctimespec.tv_nsec = prstat->st_ctim_nsec; +#else + stbuf->st_atim.tv_nsec = prstat->st_atim_nsec; + stbuf->st_mtim.tv_nsec = prstat->st_mtim_nsec; stbuf->st_ctim.tv_nsec = prstat->st_ctim_nsec; +#endif } /* diff --git a/hw/9pfs/9p-synth.c b/hw/9pfs/9p-synth.c index 54239c9..eb68b42 100644 --- a/hw/9pfs/9p-synth.c +++ b/hw/9pfs/9p-synth.c @@ -426,7 +426,9 @@ static int synth_statfs(FsContext *s, V9fsPath *fs_path, stbuf->f_bsize = 512; stbuf->f_blocks = 0; stbuf->f_files = synth_node_count; +#ifndef CONFIG_DARWIN stbuf->f_namelen = NAME_MAX; +#endif return 0; } diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index a757374..212f569 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -905,11 +905,17 @@ static void stat_to_v9stat_dotl(V9fsState *s, const struct stat *stbuf, v9lstat->st_blksize = stbuf->st_blksize; v9lstat->st_blocks = stbuf->st_blocks; v9lstat->st_atime_sec = stbuf->st_atime; -v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec; v9lstat->st_mtime_sec = stbuf->st_mtime; -v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec; v9lstat->st_ctime_sec = stbuf->st_ctime; +#ifdef CONFIG_DARWIN +v9lstat->st_atime_nsec = stbuf->st_atimespec.tv_nsec; +v9lstat->st_mtime_nsec = stbuf->st_mtimespec.tv_nsec; +v9lstat->st_ctime_nsec = stbuf->st_ctimespec.tv_nsec; +#else +v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec; +v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec; v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec; +#endif /* Currently we only support BASIC fields in stat */ v9lstat->st_result_mask = P9_STATS_BASIC; @@ -2959,9 +2965,15 @@ static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct
[Qemu-devel] [PATCH v2 09/20] 9p: Properly check/translate flags in unlinkat
This code previously relied on P9_DOTL_AT_REMOVEDIR and AT_REMOVEDIR having the same numerical value and deferred any errorchecking to the syscall itself. However, while the former assumption is true on Linux, it is not true in general. Thus, add appropriate error checking and translation to the 9p unlinkat server code. Signed-off-by: Keno Fischer --- Changes since v1: * Code was moved from 9p-local.c to server entry point in 9p.c hw/9pfs/9p.c | 13 +++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index b80db65..a757374 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -2522,7 +2522,7 @@ static void coroutine_fn v9fs_unlinkat(void *opaque) { int err = 0; V9fsString name; -int32_t dfid, flags; +int32_t dfid, flags, rflags = 0; size_t offset = 7; V9fsPath path; V9fsFidState *dfidp; @@ -2549,6 +2549,15 @@ static void coroutine_fn v9fs_unlinkat(void *opaque) goto out_nofid; } +if (flags & ~P9_DOTL_AT_REMOVEDIR) { +err = -EINVAL; +goto out_nofid; +} + +if (flags & P9_DOTL_AT_REMOVEDIR) { +rflags |= AT_REMOVEDIR; +} + dfidp = get_fid(pdu, dfid); if (dfidp == NULL) { err = -EINVAL; @@ -2567,7 +2576,7 @@ static void coroutine_fn v9fs_unlinkat(void *opaque) if (err < 0) { goto out_err; } -err = v9fs_co_unlinkat(pdu, &dfidp->path, &name, flags); +err = v9fs_co_unlinkat(pdu, &dfidp->path, &name, rflags); if (!err) { err = offset; } -- 2.8.1
[Qemu-devel] [PATCH v2 14/20] 9p: darwin: Provide a compatibility definition for XATTR_SIZE_MAX
Signed-off-by: Keno Fischer --- No change from v1. hw/9pfs/9p.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 70cfab9..24802b9 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -3374,6 +3374,13 @@ out_nofid: v9fs_string_free(&name); } +#if defined(CONFIG_DARWIN) && !defined(XATTR_SIZE_MAX) +/* Darwin doesn't seem to define a maximum xattr size in its user + user space header, but looking at the kernel source, HFS supports + up to INT32_MAX, so use that as the maximum. +*/ +#define XATTR_SIZE_MAX INT32_MAX +#endif static void coroutine_fn v9fs_xattrcreate(void *opaque) { int flags; -- 2.8.1
[Qemu-devel] [PATCH v2 08/20] 9p: Rename 9p-util -> 9p-util-linux
The current file only has the Linux versions of these functions. Rename the file accordingly and update the Makefile to only build it on Linux. A Darwin version of these will follow later in the series. Signed-off-by: Keno Fischer --- Changes since v1: New patch hw/9pfs/9p-util-linux.c | 59 + hw/9pfs/9p-util.c | 59 - hw/9pfs/Makefile.objs | 3 ++- 3 files changed, 61 insertions(+), 60 deletions(-) create mode 100644 hw/9pfs/9p-util-linux.c delete mode 100644 hw/9pfs/9p-util.c diff --git a/hw/9pfs/9p-util-linux.c b/hw/9pfs/9p-util-linux.c new file mode 100644 index 000..defa3a4 --- /dev/null +++ b/hw/9pfs/9p-util-linux.c @@ -0,0 +1,59 @@ +/* + * 9p utilities (Linux Implementation) + * + * Copyright IBM, Corp. 2017 + * + * Authors: + * Greg Kurz + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/xattr.h" +#include "9p-util.h" + +ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name, + void *value, size_t size) +{ +char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); +int ret; + +ret = lgetxattr(proc_path, name, value, size); +g_free(proc_path); +return ret; +} + +ssize_t flistxattrat_nofollow(int dirfd, const char *filename, + char *list, size_t size) +{ +char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); +int ret; + +ret = llistxattr(proc_path, list, size); +g_free(proc_path); +return ret; +} + +ssize_t fremovexattrat_nofollow(int dirfd, const char *filename, +const char *name) +{ +char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); +int ret; + +ret = lremovexattr(proc_path, name); +g_free(proc_path); +return ret; +} + +int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name, + void *value, size_t size, int flags) +{ +char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); +int ret; + +ret = lsetxattr(proc_path, name, value, size, flags); +g_free(proc_path); +return ret; +} diff --git a/hw/9pfs/9p-util.c b/hw/9pfs/9p-util.c deleted file mode 100644 index 614b7fc..000 --- a/hw/9pfs/9p-util.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - * 9p utilities - * - * Copyright IBM, Corp. 2017 - * - * Authors: - * Greg Kurz - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#include "qemu/osdep.h" -#include "qemu/xattr.h" -#include "9p-util.h" - -ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name, - void *value, size_t size) -{ -char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); -int ret; - -ret = lgetxattr(proc_path, name, value, size); -g_free(proc_path); -return ret; -} - -ssize_t flistxattrat_nofollow(int dirfd, const char *filename, - char *list, size_t size) -{ -char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); -int ret; - -ret = llistxattr(proc_path, list, size); -g_free(proc_path); -return ret; -} - -ssize_t fremovexattrat_nofollow(int dirfd, const char *filename, -const char *name) -{ -char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); -int ret; - -ret = lremovexattr(proc_path, name); -g_free(proc_path); -return ret; -} - -int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name, - void *value, size_t size, int flags) -{ -char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); -int ret; - -ret = lsetxattr(proc_path, name, value, size, flags); -g_free(proc_path); -return ret; -} diff --git a/hw/9pfs/Makefile.objs b/hw/9pfs/Makefile.objs index fd90b62..083508f 100644 --- a/hw/9pfs/Makefile.objs +++ b/hw/9pfs/Makefile.objs @@ -1,4 +1,5 @@ -common-obj-y = 9p.o 9p-util.o +common-obj-y = 9p.o +common-obj-$(CONFIG_LINUX) += 9p-util-linux.o common-obj-y += 9p-local.o 9p-xattr.o common-obj-y += 9p-xattr-user.o 9p-posix-acl.o common-obj-y += coth.o cofs.o codir.o cofile.o -- 2.8.1
[Qemu-devel] [PATCH v2 13/20] 9p: darwin: Ignore O_{NOATIME, DIRECT}
Darwin doesn't have either of these flags. Darwin does have F_NOCACHE, which is similar to O_DIRECT, but has different enough semantics that other projects don't generally map them automatically. In any case, we don't support O_DIRECT on Linux at the moment either. Signed-off-by: Keno Fischer --- Changes from v1: Undo accidental formatting change hw/9pfs/9p.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 9751246..70cfab9 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -123,11 +123,18 @@ static int dotl_to_open_flags(int flags) { P9_DOTL_NONBLOCK, O_NONBLOCK } , { P9_DOTL_DSYNC, O_DSYNC }, { P9_DOTL_FASYNC, FASYNC }, +#ifndef CONFIG_DARWIN +{ P9_DOTL_NOATIME, O_NOATIME }, +/* On Darwin, we could map to F_NOCACHE, which is + similar, but doesn't quite have the same + semantics. However, we don't support O_DIRECT + even on linux at the moment, so we just ignore + it here. */ { P9_DOTL_DIRECT, O_DIRECT }, +#endif { P9_DOTL_LARGEFILE, O_LARGEFILE }, { P9_DOTL_DIRECTORY, O_DIRECTORY }, { P9_DOTL_NOFOLLOW, O_NOFOLLOW }, -{ P9_DOTL_NOATIME, O_NOATIME }, { P9_DOTL_SYNC, O_SYNC }, }; @@ -156,10 +163,12 @@ static int get_dotl_openflags(V9fsState *s, int oflags) */ flags = dotl_to_open_flags(oflags); flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT); +#ifndef CONFIG_DARWIN /* * Ignore direct disk access hint until the server supports it. */ flags &= ~O_DIRECT; +#endif return flags; } -- 2.8.1
[Qemu-devel] [PATCH v2 07/20] 9p: Move a couple xattr functions to 9p-util
These functions will need custom implementations on Darwin. Since the implementation is very similar among all of them, and 9p-util already has the _nofollow version of fgetxattrat, let's move them all there. Signed-off-by: Keno Fischer --- Changes since v1: * fgetxattr_follow is dropped in favor of a different approach later in the series. hw/9pfs/9p-util.c | 33 + hw/9pfs/9p-util.h | 4 hw/9pfs/9p-xattr.c | 33 - 3 files changed, 37 insertions(+), 33 deletions(-) diff --git a/hw/9pfs/9p-util.c b/hw/9pfs/9p-util.c index f709c27..614b7fc 100644 --- a/hw/9pfs/9p-util.c +++ b/hw/9pfs/9p-util.c @@ -24,3 +24,36 @@ ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name, g_free(proc_path); return ret; } + +ssize_t flistxattrat_nofollow(int dirfd, const char *filename, + char *list, size_t size) +{ +char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); +int ret; + +ret = llistxattr(proc_path, list, size); +g_free(proc_path); +return ret; +} + +ssize_t fremovexattrat_nofollow(int dirfd, const char *filename, +const char *name) +{ +char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); +int ret; + +ret = lremovexattr(proc_path, name); +g_free(proc_path); +return ret; +} + +int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name, + void *value, size_t size, int flags) +{ +char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); +int ret; + +ret = lsetxattr(proc_path, name, value, size, flags); +g_free(proc_path); +return ret; +} diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h index dc0d2e2..79ed6b2 100644 --- a/hw/9pfs/9p-util.h +++ b/hw/9pfs/9p-util.h @@ -60,5 +60,9 @@ ssize_t fgetxattrat_nofollow(int dirfd, const char *path, const char *name, void *value, size_t size); int fsetxattrat_nofollow(int dirfd, const char *path, const char *name, void *value, size_t size, int flags); +ssize_t flistxattrat_nofollow(int dirfd, const char *filename, + char *list, size_t size); +ssize_t fremovexattrat_nofollow(int dirfd, const char *filename, +const char *name); #endif diff --git a/hw/9pfs/9p-xattr.c b/hw/9pfs/9p-xattr.c index d05c1a1..c696d8f 100644 --- a/hw/9pfs/9p-xattr.c +++ b/hw/9pfs/9p-xattr.c @@ -60,17 +60,6 @@ ssize_t pt_listxattr(FsContext *ctx, const char *path, return name_size; } -static ssize_t flistxattrat_nofollow(int dirfd, const char *filename, - char *list, size_t size) -{ -char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); -int ret; - -ret = llistxattr(proc_path, list, size); -g_free(proc_path); -return ret; -} - /* * Get the list and pass to each layer to find out whether * to send the data or not @@ -196,17 +185,6 @@ ssize_t pt_getxattr(FsContext *ctx, const char *path, const char *name, return local_getxattr_nofollow(ctx, path, name, value, size); } -int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name, - void *value, size_t size, int flags) -{ -char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); -int ret; - -ret = lsetxattr(proc_path, name, value, size, flags); -g_free(proc_path); -return ret; -} - ssize_t local_setxattr_nofollow(FsContext *ctx, const char *path, const char *name, void *value, size_t size, int flags) @@ -235,17 +213,6 @@ int pt_setxattr(FsContext *ctx, const char *path, const char *name, void *value, return local_setxattr_nofollow(ctx, path, name, value, size, flags); } -static ssize_t fremovexattrat_nofollow(int dirfd, const char *filename, - const char *name) -{ -char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); -int ret; - -ret = lremovexattr(proc_path, name); -g_free(proc_path); -return ret; -} - ssize_t local_removexattr_nofollow(FsContext *ctx, const char *path, const char *name) { -- 2.8.1
[Qemu-devel] [PATCH v2 05/20] 9p: Properly set errp in fstatfs error path
In the review of 9p: Avoid warning if FS_IOC_GETVERSION is not defined Grep Kurz noted this error path was failing to set errp. Fix that. Signed-off-by: Keno Fischer --- Changes since v1: New patch hw/9pfs/9p-local.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index adc169a..576c8e3 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -1420,6 +1420,8 @@ static int local_init(FsContext *ctx, Error **errp) */ if (fstatfs(data->mountfd, &stbuf) < 0) { close_preserve_errno(data->mountfd); +error_setg_errno(errp, errno, +"failed to stat file system at '%s'", ctx->fs_root); goto err; } switch (stbuf.f_type) { -- 2.8.1
[Qemu-devel] [PATCH v2 11/20] 9p: darwin: Handle struct dirent differences
On darwin d_seekoff exists, but is optional and does not seem to be commonly used by file systems. Use `telldir` instead to obtain the seek offset. Signed-off-by: Keno Fischer --- Changes since v1: * Drop setting d_seekoff in synth_direntry * Factor telldir vs d_off logic into v9fs_dent_telldir * Error path for telldir failure hw/9pfs/9p-synth.c | 2 ++ hw/9pfs/9p.c | 36 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/hw/9pfs/9p-synth.c b/hw/9pfs/9p-synth.c index eb68b42..a312f8c 100644 --- a/hw/9pfs/9p-synth.c +++ b/hw/9pfs/9p-synth.c @@ -221,7 +221,9 @@ static void synth_direntry(V9fsSynthNode *node, { strcpy(entry->d_name, node->name); entry->d_ino = node->attr->inode; +#ifndef CONFIG_DARWIN entry->d_off = off + 1; +#endif } static struct dirent *synth_get_dentry(V9fsSynthNode *dir, diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 212f569..9751246 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -1738,6 +1738,25 @@ static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp, return offset; } +/** + * Get the seek offset of a dirent. If not available from the structure itself, + * obtain it by calling telldir. + */ +static int v9fs_dent_telldir(V9fsPDU *pdu, V9fsFidState *fidp, + struct dirent *dent) +{ +#ifdef CONFIG_DARWIN +/* + * Darwin has d_seekoff, which appears to function similarly to d_off. + * However, it does not appear to be supported on all file systems, + * so use telldir for correctness. + */ +return v9fs_co_telldir(pdu, fidp); +#else +return dent->d_off; +#endif +} + static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu, V9fsFidState *fidp, uint32_t max_count) @@ -1801,7 +1820,11 @@ static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu, count += len; v9fs_stat_free(&v9stat); v9fs_path_free(&path); -saved_dir_pos = dent->d_off; +saved_dir_pos = v9fs_dent_telldir(pdu, fidp, dent); +if (saved_dir_pos < 0) { +err = saved_dir_pos; +break; +} } v9fs_readdir_unlock(&fidp->fs.dir); @@ -1915,7 +1938,7 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp, V9fsString name; int len, err = 0; int32_t count = 0; -off_t saved_dir_pos; +off_t saved_dir_pos, off; struct dirent *dent; /* save the directory position */ @@ -1951,10 +1974,15 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp, /* Fill the other fields with dummy values */ qid.type = 0; qid.version = 0; +off = v9fs_dent_telldir(pdu, fidp, dent); +if (off < 0) { +err = off; +break; +} /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */ len = pdu_marshal(pdu, 11 + count, "Qqbs", - &qid, dent->d_off, + &qid, off, dent->d_type, &name); v9fs_readdir_unlock(&fidp->fs.dir); @@ -1966,7 +1994,7 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp, } count += len; v9fs_string_free(&name); -saved_dir_pos = dent->d_off; +saved_dir_pos = off; } v9fs_readdir_unlock(&fidp->fs.dir); -- 2.8.1
[Qemu-devel] [PATCH v2 06/20] 9p: Avoid warning if FS_IOC_GETVERSION is not defined
Both `stbuf` and `local_ioc_getversion` where unused when FS_IOC_GETVERSION was not defined, causing a compiler warning. Reorgnaize the code to avoid this warning. Signed-off-by: Keno Fischer --- Changes since v1: * As request in review, logic is factored into a local_ioc_getversion_init function. hw/9pfs/9p-local.c | 43 +-- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index 576c8e3..6222891 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -1375,10 +1375,10 @@ static int local_unlinkat(FsContext *ctx, V9fsPath *dir, return ret; } +#ifdef FS_IOC_GETVERSION static int local_ioc_getversion(FsContext *ctx, V9fsPath *path, mode_t st_mode, uint64_t *st_gen) { -#ifdef FS_IOC_GETVERSION int err; V9fsFidOpenState fid_open; @@ -1397,32 +1397,19 @@ static int local_ioc_getversion(FsContext *ctx, V9fsPath *path, err = ioctl(fid_open.fd, FS_IOC_GETVERSION, st_gen); local_close(ctx, &fid_open); return err; -#else -errno = ENOTTY; -return -1; -#endif } +#endif -static int local_init(FsContext *ctx, Error **errp) +static int local_ioc_getversion_init(FsContext *ctx, LocalData *data) { +#ifdef FS_IOC_GETVERSION struct statfs stbuf; -LocalData *data = g_malloc(sizeof(*data)); -data->mountfd = open(ctx->fs_root, O_DIRECTORY | O_RDONLY); -if (data->mountfd == -1) { -error_setg_errno(errp, errno, "failed to open '%s'", ctx->fs_root); -goto err; -} - -#ifdef FS_IOC_GETVERSION /* * use ioc_getversion only if the ioctl is definied */ if (fstatfs(data->mountfd, &stbuf) < 0) { -close_preserve_errno(data->mountfd); -error_setg_errno(errp, errno, -"failed to stat file system at '%s'", ctx->fs_root); -goto err; +return -1; } switch (stbuf.f_type) { case EXT2_SUPER_MAGIC: @@ -1433,6 +1420,26 @@ static int local_init(FsContext *ctx, Error **errp) break; } #endif +return 0; +} + +static int local_init(FsContext *ctx, Error **errp) +{ +LocalData *data = g_malloc(sizeof(*data)); + +data->mountfd = open(ctx->fs_root, O_DIRECTORY | O_RDONLY); +if (data->mountfd == -1) { +error_setg_errno(errp, errno, "failed to open '%s'", ctx->fs_root); +goto err; +} + +if (local_ioc_getversion_init(ctx, data) < 0) { +close_preserve_errno(data->mountfd); +error_setg_errno(errp, errno, +"failed initialize ioc_getversion for file system at '%s'", +ctx->fs_root); +goto err; +} if (ctx->export_flags & V9FS_SM_PASSTHROUGH) { ctx->xops = passthrough_xattr_ops; -- 2.8.1
[Qemu-devel] [PATCH v2 02/20] 9p: proxy: Fix size passed to `connect`
The size to pass to the `connect` call is the size of the entire `struct sockaddr_un`. Passing anything shorter than this causes errors on darwin. Signed-off-by: Keno Fischer --- Changes since v1: New patch hw/9pfs/9p-proxy.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hw/9pfs/9p-proxy.c b/hw/9pfs/9p-proxy.c index e2e0329..47a94e0 100644 --- a/hw/9pfs/9p-proxy.c +++ b/hw/9pfs/9p-proxy.c @@ -1088,7 +1088,7 @@ static int proxy_ioc_getversion(FsContext *fs_ctx, V9fsPath *path, static int connect_namedsocket(const char *path, Error **errp) { -int sockfd, size; +int sockfd; struct sockaddr_un helper; if (strlen(path) >= sizeof(helper.sun_path)) { @@ -1102,8 +1102,7 @@ static int connect_namedsocket(const char *path, Error **errp) } strcpy(helper.sun_path, path); helper.sun_family = AF_UNIX; -size = strlen(helper.sun_path) + sizeof(helper.sun_family); -if (connect(sockfd, (struct sockaddr *)&helper, size) < 0) { +if (connect(sockfd, (struct sockaddr *)&helper, sizeof(helper)) < 0) { error_setg_errno(errp, errno, "failed to connect to '%s'", path); close(sockfd); return -1; -- 2.8.1
[Qemu-devel] [PATCH v2 04/20] 9p: linux: Fix a couple Linux assumptions
- Guard Linux only headers. - Add qemu/statfs.h header to abstract over the which headers are needed for struct statfs - Define `ENOATTR` only if not only defined (it's defined in system headers on Darwin). Signed-off-by: Keno Fischer --- Changes since v1: * New qemu/statfs.h header to factor out the header selection to a common place. I did not write a configure check, since the rest of 9p only supports linux/darwin anyway, so there didn't seem to be much point, but I can write one if requested. * Now also covers fsdev/virtfs-proxy-helper.c fsdev/file-op-9p.h | 2 +- fsdev/virtfs-proxy-helper.c | 4 +++- hw/9pfs/9p-local.c | 2 ++ include/qemu/statfs.h | 19 +++ include/qemu/xattr.h| 4 +++- 5 files changed, 28 insertions(+), 3 deletions(-) create mode 100644 include/qemu/statfs.h diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h index 3fa062b..111f804 100644 --- a/fsdev/file-op-9p.h +++ b/fsdev/file-op-9p.h @@ -16,7 +16,7 @@ #include #include -#include +#include "qemu/statfs.h" #include "qemu-fsdev-throttle.h" #define SM_LOCAL_MODE_BITS0600 diff --git a/fsdev/virtfs-proxy-helper.c b/fsdev/virtfs-proxy-helper.c index 6f132c5..94fb069 100644 --- a/fsdev/virtfs-proxy-helper.c +++ b/fsdev/virtfs-proxy-helper.c @@ -13,17 +13,19 @@ #include #include #include +#ifdef CONFIG_LINUX #include #include -#include #include #include #ifdef CONFIG_LINUX_MAGIC_H #include #endif +#endif #include "qemu-common.h" #include "qemu/sockets.h" #include "qemu/xattr.h" +#include "qemu/statfs.h" #include "9p-iov-marshal.h" #include "hw/9pfs/9p-proxy.h" #include "fsdev/9p-iov-marshal.h" diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index bcf2798..adc169a 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -27,10 +27,12 @@ #include "qemu/error-report.h" #include "qemu/option.h" #include +#ifdef CONFIG_LINUX #include #ifdef CONFIG_LINUX_MAGIC_H #include #endif +#endif #include #ifndef XFS_SUPER_MAGIC diff --git a/include/qemu/statfs.h b/include/qemu/statfs.h new file mode 100644 index 000..dde289f --- /dev/null +++ b/include/qemu/statfs.h @@ -0,0 +1,19 @@ +/* + * Host statfs header abstraction + * + * This work is licensed under the terms of the GNU GPL, version 2, or any + * later version. See the COPYING file in the top-level directory. + * + */ +#ifndef QEMU_STATFS_H +#define QEMU_STATFS_H + +#ifdef CONFIG_LINUX +# include +#endif +#ifdef CONFIG_DARWIN +# include +# include +#endif + +#endif diff --git a/include/qemu/xattr.h b/include/qemu/xattr.h index a83fe8e..f1d0f7b 100644 --- a/include/qemu/xattr.h +++ b/include/qemu/xattr.h @@ -22,7 +22,9 @@ #ifdef CONFIG_LIBATTR # include #else -# define ENOATTR ENODATA +# if !defined(ENOATTR) +#define ENOATTR ENODATA +# endif # include #endif -- 2.8.1
[Qemu-devel] [PATCH v2 03/20] 9p: xattr: Fix crash due to free of uninitialized value
If the size returned from llistxattr is 0, we skipped the malloc call, leaving xattr.value uninitialized. However, this value is later passed to `g_free` without any further checks, causing an error. Fix that by always calling g_malloc unconditionally. If `size` is 0, it will return a pointer that is safe to pass to g_free, likely NULL. Signed-off-by: Keno Fischer --- Changes since v1: New patch hw/9pfs/9p.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index d74302d..b80db65 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -3256,8 +3256,8 @@ static void coroutine_fn v9fs_xattrwalk(void *opaque) xattr_fidp->fs.xattr.len = size; xattr_fidp->fid_type = P9_FID_XATTR; xattr_fidp->fs.xattr.xattrwalk_fid = true; +xattr_fidp->fs.xattr.value = g_malloc0(size); if (size) { -xattr_fidp->fs.xattr.value = g_malloc0(size); err = v9fs_co_llistxattr(pdu, &xattr_fidp->path, xattr_fidp->fs.xattr.value, xattr_fidp->fs.xattr.len); -- 2.8.1
[Qemu-devel] [PATCH v2 01/20] cutils: Provide strchrnul
strchrnul is a GNU extension and thus unavailable on a number of targets. In the review for a commit removing strchrnul from 9p, I was asked to create a qemu_strchrnul helper to factor out this functionality. Do so, and use it in a number of other places in the code base that inlined the replacement pattern in a place where strchrnul could be used. Signed-off-by: Keno Fischer --- Changes since v1: New patch hw/9pfs/9p-local.c| 2 +- include/qemu/cutils.h | 1 + monitor.c | 8 ++-- util/cutils.c | 13 + util/qemu-option.c| 6 +- util/uri.c| 6 ++ 6 files changed, 20 insertions(+), 16 deletions(-) diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index b37b1db..bcf2798 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -65,7 +65,7 @@ int local_open_nofollow(FsContext *fs_ctx, const char *path, int flags, assert(*path != '/'); head = g_strdup(path); -c = strchrnul(path, '/'); +c = qemu_strchrnul(path, '/'); if (*c) { /* Intermediate path element */ head[c - path] = 0; diff --git a/include/qemu/cutils.h b/include/qemu/cutils.h index a663340..bc40c30 100644 --- a/include/qemu/cutils.h +++ b/include/qemu/cutils.h @@ -122,6 +122,7 @@ int qemu_strnlen(const char *s, int max_len); * Returns: the pointer originally in @input. */ char *qemu_strsep(char **input, const char *delim); +const char *qemu_strchrnul(const char *s, int c); time_t mktimegm(struct tm *tm); int qemu_fdatasync(int fd); int fcntl_setfl(int fd, int flag); diff --git a/monitor.c b/monitor.c index 922cfc0..e1f01c4 100644 --- a/monitor.c +++ b/monitor.c @@ -798,9 +798,7 @@ static int compare_cmd(const char *name, const char *list) p = list; for(;;) { pstart = p; -p = strchr(p, '|'); -if (!p) -p = pstart + strlen(pstart); +p = qemu_strchrnul(p, '|'); if ((p - pstart) == len && !memcmp(pstart, name, len)) return 1; if (*p == '\0') @@ -3401,9 +3399,7 @@ static void cmd_completion(Monitor *mon, const char *name, const char *list) p = list; for(;;) { pstart = p; -p = strchr(p, '|'); -if (!p) -p = pstart + strlen(pstart); +p = qemu_strchrnul(p, '|'); len = p - pstart; if (len > sizeof(cmd) - 2) len = sizeof(cmd) - 2; diff --git a/util/cutils.c b/util/cutils.c index 0de69e6..6e078b0 100644 --- a/util/cutils.c +++ b/util/cutils.c @@ -545,6 +545,19 @@ int qemu_strtou64(const char *nptr, const char **endptr, int base, } /** + * Searches for the first occurrence of 'c' in 's', and returns a pointer + * to the trailing null byte if none was found. + */ +const char *qemu_strchrnul(const char *s, int c) +{ +const char *e = strchr(s, c); +if (!e) { +e = s + strlen(s); +} +return e; +} + +/** * parse_uint: * * @s: String to parse diff --git a/util/qemu-option.c b/util/qemu-option.c index 58d1c23..54eca12 100644 --- a/util/qemu-option.c +++ b/util/qemu-option.c @@ -77,11 +77,7 @@ const char *get_opt_value(const char *p, char **value) *value = NULL; while (1) { -offset = strchr(p, ','); -if (!offset) { -offset = p + strlen(p); -} - +offset = qemu_strchrnul(p, ','); length = offset - p; if (*offset != '\0' && *(offset + 1) == ',') { length++; diff --git a/util/uri.c b/util/uri.c index 8624a7a..8bdef84 100644 --- a/util/uri.c +++ b/util/uri.c @@ -52,6 +52,7 @@ */ #include "qemu/osdep.h" +#include "qemu/cutils.h" #include "qemu/uri.h" @@ -2266,10 +2267,7 @@ struct QueryParams *query_params_parse(const char *query) /* Find the next separator, or end of the string. */ end = strchr(query, '&'); if (!end) { -end = strchr(query, ';'); -} -if (!end) { -end = query + strlen(query); +end = qemu_strchrnul(query, ';'); } /* Find the first '=' character between here and end. */ -- 2.8.1
[Qemu-devel] [PATCH v2 00/20] 9p: Add support for Darwin
This is v2 of my patch series to provide 9p server support for Darwin. The patches in this series address review from v1, now support building the virtfs proxy, as well as fix bugs found since v1. Keno Fischer (20): cutils: Provide strchrnul 9p: proxy: Fix size passed to `connect` 9p: xattr: Fix crash due to free of uninitialized value 9p: linux: Fix a couple Linux assumptions 9p: Properly set errp in fstatfs error path 9p: Avoid warning if FS_IOC_GETVERSION is not defined 9p: Move a couple xattr functions to 9p-util 9p: Rename 9p-util -> 9p-util-linux 9p: Properly error check and translate flags in unlinkat 9p: darwin: Handle struct stat(fs) differences 9p: darwin: Handle struct dirent differences 9p: darwin: Explicitly cast comparisons of mode_t with -1 9p: darwin: Ignore O_{NOATIME, DIRECT} 9p: darwin: Provide a compatibility definition for XATTR_SIZE_MAX 9p: darwin: *xattr_nofollow implementations 9p: darwin: Compatibility for f/l*xattr 9p: darwin: Provide a fallback implementation for utimensat 9p: darwin: Implement compatibility for mknodat 9p: darwin: virtfs-proxy: Implement setuid code for darwin 9p: darwin: configure: Allow VirtFS on Darwin Makefile| 6 ++ Makefile.objs | 1 + configure | 22 +++-- fsdev/file-op-9p.h | 2 +- fsdev/virtfs-proxy-helper.c | 230 hw/9pfs/9p-local.c | 68 - hw/9pfs/9p-proxy.c | 22 +++-- hw/9pfs/9p-synth.c | 4 + hw/9pfs/9p-util-darwin.c| 185 +++ hw/9pfs/9p-util-linux.c | 70 ++ hw/9pfs/9p-util.c | 26 - hw/9pfs/9p-util.h | 31 ++ hw/9pfs/9p-xattr.c | 33 --- hw/9pfs/9p.c| 86 +++-- hw/9pfs/Makefile.objs | 4 +- include/qemu/cutils.h | 1 + include/qemu/statfs.h | 19 include/qemu/xattr.h| 4 +- monitor.c | 8 +- util/cutils.c | 13 +++ util/qemu-option.c | 6 +- util/uri.c | 6 +- 22 files changed, 636 insertions(+), 211 deletions(-) create mode 100644 hw/9pfs/9p-util-darwin.c create mode 100644 hw/9pfs/9p-util-linux.c delete mode 100644 hw/9pfs/9p-util.c create mode 100644 include/qemu/statfs.h -- 2.8.1
Re: [Qemu-devel] [PATCH 1/6] gdbstub: Return the fd from gdbserver_start
On 05/31/2018 04:15 PM, Philippe Mathieu-Daudé wrote: > Hi Richard, > > On 05/31/2018 07:49 PM, Richard Henderson wrote: >> This will allow us to protect gdbserver_fd from the guest. >> >> Signed-off-by: Richard Henderson >> --- >> gdbstub.c | 5 +++-- >> 1 file changed, 3 insertions(+), 2 deletions(-) >> >> diff --git a/gdbstub.c b/gdbstub.c >> index 6081e719c5..057d0d65c5 100644 >> --- a/gdbstub.c >> +++ b/gdbstub.c >> @@ -1890,15 +1890,16 @@ static int gdbserver_open(int port) >> int gdbserver_start(int port) >> { >> gdbserver_fd = gdbserver_open(port); >> -if (gdbserver_fd < 0) >> +if (gdbserver_fd < 0) { >> return -1; >> +} >> /* accept connections */ >> if (!gdb_accept()) { >> close(gdbserver_fd); >> gdbserver_fd = -1; >> return -1; >> } >> -return 0; >> +return gdbserver_fd; > > I agree with your change, but what about !CONFIG_USER_ONLY? It's still a non-negative number, so the success value is still the same. r~
Re: [Qemu-devel] [PATCH v3 0/4] Add new CD-ROM related qtests
On 04/27/2018 05:40 AM, Thomas Huth wrote: > With one of my clean-up patches (see commit 1454509726719e0933c800), I > recently accidentially broke the "-cdrom" parameter (more precisely > "-drive if=scsi") on a couple of boards, since there was no error > detected during the "make check" regression testing. This is clearly an > indication that we are lacking tests in this area. > So this small patch series now introduces some tests for CD-ROM drives: > The first two patches introduce the possibility to check that booting > from CD-ROM drives still works fine for x86 and s390x, and the third > patch adds a test that certain machines can at least still be started > with the "-cdrom" parameter (i.e. that test would have catched the > mistake that I did with my SCSI cleanup patch). > > v3: > - Rebased to current master branch > - Add a final patch that adds an entry to the MAINTAINERS file > > v2: > - Use g_spawn_sync() instead of execlp() to run genisoimage > - The "-cdrom" parameter test is now run on all architectures (with >machine "none" for the machines that are not explicitly checked) > - Some rewordings and improved comments here and there > > Thomas Huth (4): > tests/boot-sector: Add magic bytes to s390x boot code header > tests/cdrom-test: Test booting from CD-ROM ISO image file > tests/cdrom-test: Test that -cdrom parameter is working > MAINTAINERS: Add the cdrom-test to John's section > > MAINTAINERS| 1 + > tests/Makefile.include | 2 + > tests/boot-sector.c| 9 +- > tests/cdrom-test.c | 222 > + > 4 files changed, 231 insertions(+), 3 deletions(-) > create mode 100644 tests/cdrom-test.c > *coughs* Thanks, applied to my IDE tree: https://github.com/jnsnow/qemu/commits/ide https://github.com/jnsnow/qemu.git --js
Re: [Qemu-devel] [PATCH v2 00/16] AHCI: tracing improvements
On 05/31/2018 06:28 PM, John Snow wrote: > This set just adds register names so that the read/write traces make > more sense on their own without having to memorize register offsets. > It also splits read/write traces into supported/unsupported subsets, > so you can just monitor for things that QEMU is likely doing wrong. > > v2: > - Added qemu_log_mask(LOG_UNIMP, ...) statements in addition to traces >for writes to unknown/unsupported registers. (Philippe) > > John Snow (16): > ahci: add port register enumeration > ahci: modify ahci_port_read to use register numbers > ahci: make port read traces more descriptive > ahci: fix spacing damage on ahci_port_write > ahci: combine identical clauses in port write > ahci: modify ahci_port_write to use register numbers > ahci: make port write traces more descriptive > ahci: delete old port register address definitions > ahci: add host register enumeration > ahci: fix host register max address > ahci: modify ahci_mem_read_32 to work on register numbers > ahci: make mem_read_32 traces more descriptive > ahci: fix spacing damage on ahci_mem_write > ahci: adjust ahci_mem_write to work on registers > ahci: delete old host register address definitions > ahci: make ahci_mem_write traces more descriptive > > hw/ide/ahci.c | 314 > ++--- > hw/ide/ahci_internal.h | 63 ++ > hw/ide/trace-events| 13 +- > 3 files changed, 241 insertions(+), 149 deletions(-) > Touched up the format strings, and Thanks, applied to my IDE tree: https://github.com/jnsnow/qemu/commits/ide https://github.com/jnsnow/qemu.git --js
Re: [Qemu-devel] [PATCH 0/3] ahci: fix completion race condition
On 05/30/2018 08:43 PM, John Snow wrote: > Commit d759c951f changed the main thread lock release/reacquisition, > and in so doing apparently jostled loose a race condition in the AHCI > code. > > Patch 2 should be sufficient to fix this, and patches 1 and 3 are just > little trivial fixes. > > This might be sufficient to fix the bug as reported at > https://bugs.launchpad.net/qemu/+bug/1769189 > but the nature of the timing changes make it difficult to confirm, > so I am posting this patchset for the reporters to help test. > > John Snow (3): > ahci: trim signatures on raise/lower > ahci: fix PxCI register race > ahci: don't schedule unnecessary BH > > hw/ide/ahci.c | 24 +++- > 1 file changed, 11 insertions(+), 13 deletions(-) > Thanks for the testing and reviews, everyone! Thanks, applied to my IDE tree: https://github.com/jnsnow/qemu/commits/ide https://github.com/jnsnow/qemu.git --js
Re: [Qemu-devel] [PATCH v2 00/16] AHCI: tracing improvements
On 05/31/2018 06:49 PM, no-re...@patchew.org wrote: > Hi, > > This series failed docker-mingw@fedora build test. Please find the testing > commands and > their output below. If you have Docker installed, you can probably reproduce > it > locally. > [ blah blah blah ] > In file included from /tmp/qemu-test/src/hw/ide/ahci.c:30:0: > /tmp/qemu-test/src/hw/ide/ahci.c: In function 'ahci_mem_write': > /tmp/qemu-test/src/hw/ide/ahci.c:497:38: error: format '%lx' expects argument > of type 'long unsigned int', but argument 3 has type 'hwaddr {aka long long > unsigned int}' [-Werror=format=] > qemu_log_mask(LOG_UNIMP, "Attempted write to unimplemented > register:" > ^ > /tmp/qemu-test/src/include/qemu/log.h:85:22: note: in definition of macro > 'qemu_log_mask' > qemu_log(FMT, ## __VA_ARGS__); \ > ^~~ > /tmp/qemu-test/src/hw/ide/ahci.c:498:63: note: format string is defined here >" AHCI host register %s, offset 0x%lx: 0x%"PRIu64, > ~~^ > %llx > In file included from /tmp/qemu-test/src/hw/ide/ahci.c:30:0: > /tmp/qemu-test/src/hw/ide/ahci.c:511:34: error: format '%lx' expects argument > of type 'long unsigned int', but argument 2 has type 'hwaddr {aka long long > unsigned int}' [-Werror=format=] > qemu_log_mask(LOG_UNIMP, "Attempted write to unimplemented register: > " > ^ > /tmp/qemu-test/src/include/qemu/log.h:85:22: note: in definition of macro > 'qemu_log_mask' > qemu_log(FMT, ## __VA_ARGS__); \ > ^~~ > /tmp/qemu-test/src/hw/ide/ahci.c:512:59: note: format string is defined here >"AHCI global register at offset 0x%lx: 0x%"PRIu64, > ~~^ > %llx Rookie stuff; I forgot hwaddr was just uint64_t, which is implemented as long on x86_64. Replaced address and value specifications both with "0x%"PRIx64 in both cases. --js
Re: [Qemu-devel] [PATCH v2 00/16] AHCI: tracing improvements
On 05/31/2018 06:47 PM, no-re...@patchew.org wrote: > Hi, > > This series seems to have some coding style problems. See output below for > more information: > > Type: series > Message-id: 20180531222835.16558-1-js...@redhat.com > Subject: [Qemu-devel] [PATCH v2 00/16] AHCI: tracing improvements > > === TEST SCRIPT BEGIN === > #!/bin/bash > > BASE=base > n=1 > total=$(git log --oneline $BASE.. | wc -l) > failed=0 > > git config --local diff.renamelimit 0 > git config --local diff.renames True > git config --local diff.algorithm histogram > > commits="$(git log --format=%H --reverse $BASE..)" > for c in $commits; do > echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..." > if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; > then > failed=1 > echo > fi > n=$((n+1)) > done > > exit $failed > === TEST SCRIPT END === > > Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384 > From https://github.com/patchew-project/qemu > t [tag update]patchew/20180531212435.165261-1-ebl...@redhat.com > -> patchew/20180531212435.165261-1-ebl...@redhat.com > * [new tag] patchew/20180531222835.16558-1-js...@redhat.com -> > patchew/20180531222835.16558-1-js...@redhat.com > Switched to a new branch 'test' > 9932482af2 ahci: make ahci_mem_write traces more descriptive > c89f4076bd ahci: delete old host register address definitions > 70f11b4d95 ahci: adjust ahci_mem_write to work on registers > c13dce7973 ahci: fix spacing damage on ahci_mem_write > fab5307968 ahci: make mem_read_32 traces more descriptive > ace1b4ffac ahci: modify ahci_mem_read_32 to work on register numbers > 3021a82d31 ahci: fix host register max address > 3c4808a257 ahci: add host register enumeration > f57758869b ahci: delete old port register address definitions > 52d04d9b2c ahci: make port write traces more descriptive > 08f435fad3 ahci: modify ahci_port_write to use register numbers > 634c79d46b ahci: combine identical clauses in port write > 50b162ae60 ahci: fix spacing damage on ahci_port_write > 5895ec8f28 ahci: make port read traces more descriptive > 68fd051a9a ahci: modify ahci_port_read to use register numbers > 0478088585 ahci: add port register enumeration > > === OUTPUT BEGIN === > Checking PATCH 1/16: ahci: add port register enumeration... > WARNING: line over 80 characters > #71: FILE: hw/ide/ahci_internal.h:94: > +AHCI_PORT_REG_SCR_NOTIF = 15, /* PxSNTF: SATA phy register: > SNotification */ > > total: 0 errors, 1 warnings, 72 lines checked > > Your patch has style problems, please review. If any of these errors > are false positives report them to the maintainer, see > CHECKPATCH in MAINTAINERS. > Checking PATCH 2/16: ahci: modify ahci_port_read to use register numbers... > Checking PATCH 3/16: ahci: make port read traces more descriptive... > Checking PATCH 4/16: ahci: fix spacing damage on ahci_port_write... > ERROR: spaces required around that '|' (ctx:VxV) > #83: FILE: hw/ide/ahci.c:316: > +(val & ~(PORT_CMD_RO_MASK|PORT_CMD_ICC_MASK)); I'll fix that in this patch. > ^ > > total: 1 errors, 0 warnings, 156 lines checked > > Your patch has style problems, please review. If any of these errors > are false positives report them to the maintainer, see > CHECKPATCH in MAINTAINERS. > > Checking PATCH 5/16: ahci: combine identical clauses in port write... > Checking PATCH 6/16: ahci: modify ahci_port_write to use register numbers... > Checking PATCH 7/16: ahci: make port write traces more descriptive... > Checking PATCH 8/16: ahci: delete old port register address definitions... > Checking PATCH 9/16: ahci: add host register enumeration... > Checking PATCH 10/16: ahci: fix host register max address... > Checking PATCH 11/16: ahci: modify ahci_mem_read_32 to work on register > numbers... > Checking PATCH 12/16: ahci: make mem_read_32 traces more descriptive... > Checking PATCH 13/16: ahci: fix spacing damage on ahci_mem_write... > Checking PATCH 14/16: ahci: adjust ahci_mem_write to work on registers... > Checking PATCH 15/16: ahci: delete old host register address definitions... > Checking PATCH 16/16: ahci: make ahci_mem_write traces more descriptive... > WARNING: line over 80 characters > #18: FILE: hw/ide/ahci.c:497: > +qemu_log_mask(LOG_UNIMP, "Attempted write to unimplemented > register:" > TIL that my emacs configuration considers column 80 in-bounds. Fixed. > total: 0 errors, 1 warnings, 35 lines checked > > Your patch has style problems, please review. If any of these errors > are false positives report them to the maintainer, see > CHECKPATCH in MAINTAINERS. > === OUTPUT END === > > Test command exited with code: 1 > > > --- > Email generated automatically by Patchew [http://patchew.org/]. > Please send your feedback to patchew-de...@redhat.com > You're a harsh master, patchew. --js
Re: [Qemu-devel] [PATCH v2 00/16] AHCI: tracing improvements
On 05/31/2018 07:24 PM, Philippe Mathieu-Daudé wrote: > On 05/31/2018 07:28 PM, John Snow wrote: >> This set just adds register names so that the read/write traces make >> more sense on their own without having to memorize register offsets. >> It also splits read/write traces into supported/unsupported subsets, >> so you can just monitor for things that QEMU is likely doing wrong. >> >> v2: >> - Added qemu_log_mask(LOG_UNIMP, ...) statements in addition to traces >>for writes to unknown/unsupported registers. (Philippe) > > Thanks, > > With format string and style issues reported by checkpatch fixed: > Reviewed-by: Philippe Mathieu-Daudé > > (I recommend you to setup the scripts/git.orderfile to ease reviews). > Ah, yeah, my environment is kind of broken at the moment, sorry :( Just haven't had the time to figure out why git-publish is broken so I'm missing a few of my usual pieces. --js
Re: [Qemu-devel] [PATCH v2 00/16] AHCI: tracing improvements
On 05/31/2018 07:28 PM, John Snow wrote: > This set just adds register names so that the read/write traces make > more sense on their own without having to memorize register offsets. > It also splits read/write traces into supported/unsupported subsets, > so you can just monitor for things that QEMU is likely doing wrong. > > v2: > - Added qemu_log_mask(LOG_UNIMP, ...) statements in addition to traces >for writes to unknown/unsupported registers. (Philippe) Thanks, With format string and style issues reported by checkpatch fixed: Reviewed-by: Philippe Mathieu-Daudé (I recommend you to setup the scripts/git.orderfile to ease reviews). > > John Snow (16): > ahci: add port register enumeration > ahci: modify ahci_port_read to use register numbers > ahci: make port read traces more descriptive > ahci: fix spacing damage on ahci_port_write > ahci: combine identical clauses in port write > ahci: modify ahci_port_write to use register numbers > ahci: make port write traces more descriptive > ahci: delete old port register address definitions > ahci: add host register enumeration > ahci: fix host register max address > ahci: modify ahci_mem_read_32 to work on register numbers > ahci: make mem_read_32 traces more descriptive > ahci: fix spacing damage on ahci_mem_write > ahci: adjust ahci_mem_write to work on registers > ahci: delete old host register address definitions > ahci: make ahci_mem_write traces more descriptive > > hw/ide/ahci.c | 314 > ++--- > hw/ide/ahci_internal.h | 63 ++ > hw/ide/trace-events| 13 +- > 3 files changed, 241 insertions(+), 149 deletions(-) >
Re: [Qemu-devel] [PATCH 11/13] 9p: darwin: Mark mknod as unsupported
On Thu, May 31, 2018 at 7:06 PM, Keno Fischer wrote: > On Thu, May 31, 2018 at 6:56 PM, Keno Fischer wrote: My concern was that allowing this would cause unexpected behavior, since the device numbers will differ between OS X and Linux. Though maybe this isn't the place to worry about that. >>> >>> The numbers may differ indeed but we don't really care since the >>> server never opens device files. This is just a directory entry. >> >> Ok, let me try to implement it. However, I don't think it is possible >> to implement mknodat (or at least I can't think of how) on Darwin >> directly. I could use regular mknod, but presumably, this is used >> to avoid a race condition between creating the device and setting >> the permissions. Can you think of a good way to resolve that? > > Would it work to fchdir in to the directory and use a cwd-relative > mknod, then fchdir back? Or do we need to maintain the cwd > while in this code? Sorry for the triple-self-post here, but I took a look at the Darwin kernel source and there's an unexposed (from the Darwin C library) syscall that only changes the current thread's cwd. That seems like it should be safe, so I'll go ahead and use that to implement mknodat. I'll also submit a feature request to apple to implement mknodat.
Re: [Qemu-devel] [PATCH 1/6] gdbstub: Return the fd from gdbserver_start
Hi Richard, On 05/31/2018 07:49 PM, Richard Henderson wrote: > This will allow us to protect gdbserver_fd from the guest. > > Signed-off-by: Richard Henderson > --- > gdbstub.c | 5 +++-- > 1 file changed, 3 insertions(+), 2 deletions(-) > > diff --git a/gdbstub.c b/gdbstub.c > index 6081e719c5..057d0d65c5 100644 > --- a/gdbstub.c > +++ b/gdbstub.c > @@ -1890,15 +1890,16 @@ static int gdbserver_open(int port) > int gdbserver_start(int port) > { > gdbserver_fd = gdbserver_open(port); > -if (gdbserver_fd < 0) > +if (gdbserver_fd < 0) { > return -1; > +} > /* accept connections */ > if (!gdb_accept()) { > close(gdbserver_fd); > gdbserver_fd = -1; > return -1; > } > -return 0; > +return gdbserver_fd; I agree with your change, but what about !CONFIG_USER_ONLY? It should be safe enough documenting the different behaviors in include/exec/gdbstub.h. > } > > /* Disable gdb stub for child processes. */ >
Re: [Qemu-devel] [PATCH v1 4/8] docker: update Travis docker image
On 05/31/2018 05:14 PM, Alex Bennée wrote: > > Philippe Mathieu-Daudé writes: > >> Hi Alex, >> >> On 05/30/2018 08:06 AM, Alex Bennée wrote: >>> This is still poorly documented by Travis but according to: >>> >>> >>> https://docs.travis-ci.com/user/common-build-problems/#Running-a-Container-Based-Docker-Image-Locally >>> >>> their reference images are now hosted on Docker Hub. So we update the >>> FROM line to refer to the new default image. We also need a few >>> additional tweaks: >>> >>> - re-enable deb-src lines for our build-dep install >>> - add explicit PATH definition for tools >> >> I don't understand how this is related to QEMU testing, isn't it rather >> some Travis-ci bug? We don't need to use PhantomJS / Neo4j / Maven. > > It's oddly constructed I'll grant you but I just set the path to what a > running image has. The normal image is started up with a full systemd > init whereas we drop directly into the shell. OK, can you add a comment about it? (so we don't remove what seems unrelated). > >> >>> - force the build USER to be Travis >>> - add clang to FEATURES for our test-clang machinery >>> >>> Signed-off-by: Alex Bennée >>> --- >>> tests/docker/dockerfiles/travis.docker | 7 +-- >>> 1 file changed, 5 insertions(+), 2 deletions(-) >>> >>> diff --git a/tests/docker/dockerfiles/travis.docker >>> b/tests/docker/dockerfiles/travis.docker >>> index 605b6e429b..6e90f033d5 100644 >>> --- a/tests/docker/dockerfiles/travis.docker >>> +++ b/tests/docker/dockerfiles/travis.docker >>> @@ -1,8 +1,11 @@ >>> -FROM quay.io/travisci/travis-ruby >>> +FROM travisci/ci-garnet:packer-1512502276-986baf0 >>> ENV DEBIAN_FRONTEND noninteractive >>> ENV LANG en_US.UTF-8 >>> ENV LC_ALL en_US.UTF-8 >>> +RUN cat /etc/apt/sources.list | sed "s/# deb-src/deb-src/" >> >>> /etc/apt/sources.list >>> RUN apt-get update >>> RUN apt-get -y build-dep qemu >>> RUN apt-get -y install device-tree-compiler python2.7 python-yaml >>> dh-autoreconf gdb strace lsof net-tools >>> -ENV FEATURES pyyaml # Travis tools require PhantomJS / Neo4j / Maven accessible # in their PATH (QEMU build won't access them). Reviewed-by: Philippe Mathieu-Daudé >>> +ENV PATH >>> /usr/local/phantomjs/bin:/usr/local/phantomjs:/usr/local/neo4j-3.2.7/bin:/usr/local/maven-3.5.2/bin:/usr/local/cmake-3.9.2/bin:/usr/local/clang-5.0.0/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin >>> +ENV FEATURES clang pyyaml >>> +USER travis >>> > > > -- > Alex Bennée >
Re: [Qemu-devel] [PATCH 11/13] 9p: darwin: Mark mknod as unsupported
On Thu, May 31, 2018 at 6:56 PM, Keno Fischer wrote: >>> My concern was that allowing this would cause unexpected >>> behavior, since the device numbers will differ between OS X >>> and Linux. Though maybe this isn't the place to worry about >>> that. >> >> The numbers may differ indeed but we don't really care since the >> server never opens device files. This is just a directory entry. > > Ok, let me try to implement it. However, I don't think it is possible > to implement mknodat (or at least I can't think of how) on Darwin > directly. I could use regular mknod, but presumably, this is used > to avoid a race condition between creating the device and setting > the permissions. Can you think of a good way to resolve that? Would it work to fchdir in to the directory and use a cwd-relative mknod, then fchdir back? Or do we need to maintain the cwd while in this code?
Re: [Qemu-devel] [PATCH v5 0/6] linux-user: Reorg interp_prefix handling
Hi, This series seems to have some coding style problems. See output below for more information: Type: series Message-id: 20180531224911.23725-1-richard.hender...@linaro.org Subject: [Qemu-devel] [PATCH v5 0/6] linux-user: Reorg interp_prefix handling === TEST SCRIPT BEGIN === #!/bin/bash BASE=base n=1 total=$(git log --oneline $BASE.. | wc -l) failed=0 git config --local diff.renamelimit 0 git config --local diff.renames True git config --local diff.algorithm histogram commits="$(git log --format=%H --reverse $BASE..)" for c in $commits; do echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..." if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then failed=1 echo fi n=$((n+1)) done exit $failed === TEST SCRIPT END === Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384 From https://github.com/patchew-project/qemu * [new tag] patchew/20180531224911.23725-1-richard.hender...@linaro.org -> patchew/20180531224911.23725-1-richard.hender...@linaro.org Switched to a new branch 'test' 2755fa1986 linux-user: Use *at functions to implement interp_prefix 8af5f75489 linux-user: Check is_hostfd in mmap syscalls 83374c532a linux-user: Check contains_hostfd in select syscalls 41b80ec144 linux-user: Check is_hostfd in do_syscall fb3aa3a0d8 linux-user: Add host_fds and manipulators 11567891c5 gdbstub: Return the fd from gdbserver_start === OUTPUT BEGIN === Checking PATCH 1/6: gdbstub: Return the fd from gdbserver_start... Checking PATCH 2/6: linux-user: Add host_fds and manipulators... Checking PATCH 3/6: linux-user: Check is_hostfd in do_syscall... Checking PATCH 4/6: linux-user: Check contains_hostfd in select syscalls... Checking PATCH 5/6: linux-user: Check is_hostfd in mmap syscalls... Checking PATCH 6/6: linux-user: Use *at functions to implement interp_prefix... ERROR: do not use assignment in if condition #200: FILE: linux-user/syscall.c:8541: +if (!(fn = lock_user_string(arg2))) { ERROR: do not use assignment in if condition #218: FILE: linux-user/syscall.c:8562: +if (!(fn = lock_user_string(arg1))) { ERROR: do not use assignment in if condition #236: FILE: linux-user/syscall.c:8577: +if (!(fn = lock_user_string(arg2))) { ERROR: do not use assignment in if condition #254: FILE: linux-user/syscall.c:8735: +if (!(fn = lock_user_string(arg1))) { ERROR: do not use assignment in if condition #333: FILE: linux-user/syscall.c:9769: +if (!(fn = lock_user_string(arg1))) { ERROR: do not use assignment in if condition #349: FILE: linux-user/syscall.c:9809: +if (!(fn = lock_user_string(arg1))) { ERROR: do not use assignment in if condition #365: FILE: linux-user/syscall.c:10099: +if (!(fn = lock_user_string(arg1))) { ERROR: do not use assignment in if condition #380: FILE: linux-user/syscall.c:10111: +if (!(fn = lock_user_string(arg1))) { ERROR: do not use assignment in if condition #398: FILE: linux-user/syscall.c:11303: +if (!(fn = lock_user_string(arg1))) { ERROR: do not use assignment in if condition #415: FILE: linux-user/syscall.c:11317: +if (!(fn = lock_user_string(arg1))) { ERROR: do not use assignment in if condition #433: FILE: linux-user/syscall.c:11349: +if (!(fn = lock_user_string(arg2))) { ERROR: do not use assignment in if condition #452: FILE: linux-user/syscall.c:12394: +if (!(fn = lock_user_string(arg2))) { ERROR: do not use assignment in if condition #472: FILE: linux-user/syscall.c:12433: +if (!(fn = lock_user_string(arg2))) { total: 13 errors, 0 warnings, 423 lines checked Your patch has style problems, please review. If any of these errors are false positives report them to the maintainer, see CHECKPATCH in MAINTAINERS. === OUTPUT END === Test command exited with code: 1 --- Email generated automatically by Patchew [http://patchew.org/]. Please send your feedback to patchew-de...@redhat.com
Re: [Qemu-devel] [PATCHv2] slirp: Fix spurious error report when sending directly
On 05/31/2018 05:31 PM, Samuel Thibault wrote: > Move check to where it actually is useful, and reduce scope of 'len' > variable along the way. > > Signed-off-by: Samuel Thibault Thanks! Reviewed-by: Philippe Mathieu-Daudé > --- > > Difference from v1: > - move check instead of initializing len. > > slirp/socket.c | 14 +++--- > 1 file changed, 7 insertions(+), 7 deletions(-) > > diff --git a/slirp/socket.c b/slirp/socket.c > index e2a71c9b04..08fe98907d 100644 > --- a/slirp/socket.c > +++ b/slirp/socket.c > @@ -340,7 +340,7 @@ sosendoob(struct socket *so) > struct sbuf *sb = &so->so_rcv; > char buff[2048]; /* XXX Shouldn't be sending more oob data than this */ > > - int n, len; > + int n; > > DEBUG_CALL("sosendoob"); > DEBUG_ARG("so = %p", so); > @@ -359,7 +359,7 @@ sosendoob(struct socket *so) >* send it all >*/ > uint32_t urgc = so->so_urgc; > - len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; > + int len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; > if (len > urgc) { > len = urgc; > } > @@ -374,13 +374,13 @@ sosendoob(struct socket *so) > len += n; > } > n = slirp_send(so, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */ > - } > - > #ifdef DEBUG > - if (n != len) { > - DEBUG_ERROR((dfd, "Didn't send all data urgently X\n")); > - } > + if (n != len) { > + DEBUG_ERROR((dfd, "Didn't send all data urgently > X\n")); > + } > #endif > + } > + > if (n < 0) { > return n; > } >
Re: [Qemu-devel] [PATCH 11/13] 9p: darwin: Mark mknod as unsupported
>> My concern was that allowing this would cause unexpected >> behavior, since the device numbers will differ between OS X >> and Linux. Though maybe this isn't the place to worry about >> that. > > The numbers may differ indeed but we don't really care since the > server never opens device files. This is just a directory entry. Ok, let me try to implement it. However, I don't think it is possible to implement mknodat (or at least I can't think of how) on Darwin directly. I could use regular mknod, but presumably, this is used to avoid a race condition between creating the device and setting the permissions. Can you think of a good way to resolve that?
[Qemu-devel] [PATCH 3/6] linux-user: Check is_hostfd in do_syscall
This is the vast majority of all fd inputs, but not all. Signed-off-by: Richard Henderson --- linux-user/syscall.c | 303 +++ 1 file changed, 280 insertions(+), 23 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index d02c16bbc6..5339f0bc1c 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -8034,6 +8034,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, if (arg3 == 0) ret = 0; else { +if (is_hostfd(arg1)) { +goto ebadf; +} if (!(p = lock_user(VERIFY_WRITE, arg2, arg3, 0))) goto efault; ret = get_errno(safe_read(arg1, p, arg3)); @@ -8045,6 +8048,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, } break; case TARGET_NR_write: +if (is_hostfd(arg1)) { +goto ebadf; +} if (!(p = lock_user(VERIFY_READ, arg2, arg3, 1))) goto efault; if (fd_trans_target_to_host_data(arg1)) { @@ -8072,6 +8078,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, break; #endif case TARGET_NR_openat: +if (is_hostfd(arg1)) { +goto ebadf; +} if (!(p = lock_user_string(arg2))) goto efault; ret = get_errno(do_openat(cpu_env, arg1, p, @@ -8082,16 +8091,25 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, break; #if defined(TARGET_NR_name_to_handle_at) && defined(CONFIG_OPEN_BY_HANDLE) case TARGET_NR_name_to_handle_at: +if (is_hostfd(arg1)) { +goto ebadf; +} ret = do_name_to_handle_at(arg1, arg2, arg3, arg4, arg5); break; #endif #if defined(TARGET_NR_open_by_handle_at) && defined(CONFIG_OPEN_BY_HANDLE) case TARGET_NR_open_by_handle_at: +if (is_hostfd(arg1)) { +goto ebadf; +} ret = do_open_by_handle_at(arg1, arg2, arg3); fd_trans_unregister(ret); break; #endif case TARGET_NR_close: +if (is_hostfd(arg1)) { +goto ebadf; +} fd_trans_unregister(arg1); ret = get_errno(close(arg1)); break; @@ -8155,7 +8173,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #if defined(TARGET_NR_linkat) case TARGET_NR_linkat: -{ +if (is_hostfd(arg1)) { +goto ebadf; +} else { void * p2 = NULL; if (!arg2 || !arg4) goto efault; @@ -8180,6 +8200,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #if defined(TARGET_NR_unlinkat) case TARGET_NR_unlinkat: +if (is_hostfd(arg1)) { +goto ebadf; +} if (!(p = lock_user_string(arg2))) goto efault; ret = get_errno(unlinkat(arg1, p, arg3)); @@ -8311,6 +8334,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #if defined(TARGET_NR_mknodat) case TARGET_NR_mknodat: +if (is_hostfd(arg1)) { +goto ebadf; +} if (!(p = lock_user_string(arg2))) goto efault; ret = get_errno(mknodat(arg1, p, arg3, arg4)); @@ -8334,6 +8360,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, goto unimplemented; #endif case TARGET_NR_lseek: +if (is_hostfd(arg1)) { +goto ebadf; +} ret = get_errno(lseek(arg1, arg2, arg3)); break; #if defined(TARGET_NR_getxpid) && defined(TARGET_ALPHA) @@ -8484,7 +8513,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #if defined(TARGET_NR_futimesat) case TARGET_NR_futimesat: -{ +if (is_hostfd(arg1)) { +goto ebadf; +} else { struct timeval *tvp, tv[2]; if (arg3) { if (copy_from_user_timeval(&tv[0], arg3) @@ -8520,6 +8551,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #if defined(TARGET_NR_faccessat) && defined(__NR_faccessat) case TARGET_NR_faccessat: +if (is_hostfd(arg1)) { +goto ebadf; +} if (!(p = lock_user_string(arg2))) goto efault; ret = get_errno(faccessat(arg1, p, arg3, 0)); @@ -8564,7 +8598,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #if defined(TARGET_NR_renameat) case TARGET_NR_renameat: -{ +if (is_hostfd(arg1)) { +goto ebadf; +} else { void *p2; p = lock_user_string(arg2); p2 = lock_user_string(arg4); @@ -8579,7 +8615,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #if defined(TARGET_NR_renameat2) case TARGET_NR_renameat2: -{ +if (is_hostfd(arg1)) { +goto ebadf; +} else { void *p2; p = lock_user_string(arg2);
Re: [Qemu-devel] [PATCH v2 00/16] AHCI: tracing improvements
Hi, This series failed docker-mingw@fedora build test. Please find the testing commands and their output below. If you have Docker installed, you can probably reproduce it locally. Type: series Message-id: 20180531222835.16558-1-js...@redhat.com Subject: [Qemu-devel] [PATCH v2 00/16] AHCI: tracing improvements === TEST SCRIPT BEGIN === #!/bin/bash set -e git submodule update --init dtc # Let docker tests dump environment info export SHOW_ENV=1 export J=8 time make docker-test-mingw@fedora === TEST SCRIPT END === Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384 Switched to a new branch 'test' 9932482af2 ahci: make ahci_mem_write traces more descriptive c89f4076bd ahci: delete old host register address definitions 70f11b4d95 ahci: adjust ahci_mem_write to work on registers c13dce7973 ahci: fix spacing damage on ahci_mem_write fab5307968 ahci: make mem_read_32 traces more descriptive ace1b4ffac ahci: modify ahci_mem_read_32 to work on register numbers 3021a82d31 ahci: fix host register max address 3c4808a257 ahci: add host register enumeration f57758869b ahci: delete old port register address definitions 52d04d9b2c ahci: make port write traces more descriptive 08f435fad3 ahci: modify ahci_port_write to use register numbers 634c79d46b ahci: combine identical clauses in port write 50b162ae60 ahci: fix spacing damage on ahci_port_write 5895ec8f28 ahci: make port read traces more descriptive 68fd051a9a ahci: modify ahci_port_read to use register numbers 0478088585 ahci: add port register enumeration === OUTPUT BEGIN === Submodule 'dtc' (git://git.qemu-project.org/dtc.git) registered for path 'dtc' Cloning into '/var/tmp/patchew-tester-tmp-v_cvirkw/src/dtc'... Submodule path 'dtc': checked out 'e54388015af1fb4bf04d0bca99caba1074d9cc42' BUILD fedora make[1]: Entering directory '/var/tmp/patchew-tester-tmp-v_cvirkw/src' GEN /var/tmp/patchew-tester-tmp-v_cvirkw/src/docker-src.2018-05-31-18.47.55.18742/qemu.tar Cloning into '/var/tmp/patchew-tester-tmp-v_cvirkw/src/docker-src.2018-05-31-18.47.55.18742/qemu.tar.vroot'... done. Checking out files: 49% (3050/6180) Checking out files: 50% (3090/6180) Checking out files: 51% (3152/6180) Checking out files: 52% (3214/6180) Checking out files: 53% (3276/6180) Checking out files: 54% (3338/6180) Checking out files: 55% (3399/6180) Checking out files: 56% (3461/6180) Checking out files: 57% (3523/6180) Checking out files: 58% (3585/6180) Checking out files: 59% (3647/6180) Checking out files: 60% (3708/6180) Checking out files: 61% (3770/6180) Checking out files: 62% (3832/6180) Checking out files: 63% (3894/6180) Checking out files: 64% (3956/6180) Checking out files: 65% (4017/6180) Checking out files: 66% (4079/6180) Checking out files: 67% (4141/6180) Checking out files: 68% (4203/6180) Checking out files: 69% (4265/6180) Checking out files: 70% (4326/6180) Checking out files: 71% (4388/6180) Checking out files: 72% (4450/6180) Checking out files: 73% (4512/6180) Checking out files: 74% (4574/6180) Checking out files: 75% (4635/6180) Checking out files: 76% (4697/6180) Checking out files: 77% (4759/6180) Checking out files: 78% (4821/6180) Checking out files: 79% (4883/6180) Checking out files: 80% (4944/6180) Checking out files: 81% (5006/6180) Checking out files: 82% (5068/6180) Checking out files: 83% (5130/6180) Checking out files: 84% (5192/6180) Checking out files: 85% (5253/6180) Checking out files: 86% (5315/6180) Checking out files: 87% (5377/6180) Checking out files: 88% (5439/6180) Checking out files: 89% (5501/6180) Checking out files: 90% (5562/6180) Checking out files: 91% (5624/6180) Checking out files: 92% (5686/6180) Checking out files: 93% (5748/6180) Checking out files: 94% (5810/6180) Checking out files: 95% (5871/6180) Checking out files: 96% (5933/6180) Checking out files: 97% (5995/6180) Checking out files: 98% (6057/6180) Checking out files: 98% (6105/6180) Checking out files: 99% (6119/6180) Checking out files: 100% (6180/6180) Checking out files: 100% (6180/6180), done. Your branch is up-to-date with 'origin/test'. Submodule 'dtc' (git://git.qemu-project.org/dtc.git) registered for path 'dtc' Cloning into '/var/tmp/patchew-tester-tmp-v_cvirkw/src/docker-src.2018-05-31-18.47.55.18742/qemu.tar.vroot/dtc'... Submodule path 'dtc': checked out 'e54388015af1fb4bf04d0bca99caba1074d9cc42' Submodule 'ui/keycodemapdb' (git://git.qemu.org/keycodemapdb.git) registered for path 'ui/keycodemapdb' Cloning into '/var/tmp/patchew-tester-tmp-v_cvirkw/src/docker-src.2018-05-31-18.47.55.18742/qemu.tar.vroot/ui/keycodemapdb'... Submodule path 'ui/keycodemapdb': checked out '6b3d716e2b6472eb7189d3220552280ef3d832ce' COPYRUNNER RUN test-mingw in qemu:fedora Packages installed: PyYAML-3.12-5.fc27.x86_64 SDL2-devel-2.0.7-2.fc27.x86
[Qemu-devel] [PATCH 6/6] linux-user: Use *at functions to implement interp_prefix
If the interp_prefix is a complete chroot, it may have a *lot* of files. Setting up the cache for this is quite expensive. For the most part, we can use the *at versions of various syscalls to attempt the operation in the prefix. For the few cases that remain, attempt the operation in the prefix via concatenation and then retry if that fails. Signed-off-by: Richard Henderson --- linux-user/qemu.h| 37 ++ linux-user/elfload.c | 5 +- linux-user/main.c| 26 ++- linux-user/syscall.c | 163 +-- 4 files changed, 174 insertions(+), 57 deletions(-) diff --git a/linux-user/qemu.h b/linux-user/qemu.h index 33dafbe0e4..05a82a3628 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -471,7 +471,44 @@ void mmap_fork_start(void); void mmap_fork_end(int child); /* main.c */ +extern int interp_dirfd; extern unsigned long guest_stack_size; +char *interp_prefix_path(const char *path); + +/* If PATH is absolute, attempt an operation first within interp_dirfd, + * using OPENAT_EXPR. If that fails with ENOENT, or if PATH is not + * absolute, only use NORMAL_EXPR. + */ +#define TRY_INTERP_FD(RET, PATH, OPENAT_EXPR, NORMAL_EXPR) \ +do {\ +if (interp_dirfd >= 0 && (PATH)[0] == '/') {\ +RET = OPENAT_EXPR; \ +if (RET != -1 || errno != ENOENT) { \ +break; \ +} \ +} \ +RET = NORMAL_EXPR; \ +} while (0) + +/* If PATH is absolute, attempt an operation first with interp_prefix + * prefixed. If that fails with ENOENT, or if PATH is not absolute, + * only attempt with PATH. + */ +#define TRY_INTERP_PATH(RET, PATH, EXPR)\ +do {\ +char *new_##PATH = interp_prefix_path(PATH);\ +if (new_##PATH) { \ +__typeof(PATH) save_##PATH = PATH; \ +PATH = new_##PATH; \ +RET = EXPR; \ +free(new_##PATH); \ +PATH = save_##PATH; \ +if (RET != -1 || errno != ENOENT) { \ +break; \ +} \ +} \ +RET = EXPR; \ +} while (0) /* user access */ diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 13bc78d0c8..abdf5bbf01 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -6,7 +6,6 @@ #include "qemu.h" #include "disas/disas.h" -#include "qemu/path.h" #ifdef _ARCH_PPC64 #undef ARCH_DLINFO @@ -2375,7 +2374,9 @@ static void load_elf_interp(const char *filename, struct image_info *info, { int fd, retval; -fd = open(path(filename), O_RDONLY); +TRY_INTERP_FD(fd, filename, + openat(interp_dirfd, filename + 1, O_RDONLY), + open(filename, O_RDONLY)); if (fd < 0) { goto exit_perror; } diff --git a/linux-user/main.c b/linux-user/main.c index ee3f323c08..4e956fc00c 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -23,7 +23,6 @@ #include "qapi/error.h" #include "qemu.h" -#include "qemu/path.h" #include "qemu/config-file.h" #include "qemu/cutils.h" #include "qemu/help_option.h" @@ -89,9 +88,27 @@ unsigned long reserved_va; static void usage(int exitcode); +int interp_dirfd; static const char *interp_prefix = CONFIG_QEMU_INTERP_PREFIX; const char *qemu_uname_release; +char *interp_prefix_path(const char *path) +{ +size_t i_len, p_len; +char *ret; + +if (interp_prefix == NULL || path[0] != '/') { +return NULL; +} +i_len = strlen(interp_prefix); +p_len = strlen(path) + 1; +ret = g_malloc(i_len + p_len); + +memcpy(ret, interp_prefix, i_len); +memcpy(ret + i_len, path, p_len); +return ret; +} + /* XXX: on x86 MAP_GROWSDOWN only works if ESP <= address + 32, so we allocate a bigger stack. Need a better solution, for example by remapping the process stack directly at the right place */ @@ -671,7 +688,12 @@ int main(int argc, char **argv, char **envp) memset(&bprm, 0, sizeof (bprm)); /* Scan interp_prefix dir for replacement files. */ -init_paths(interp_prefix); +interp_dirfd = open(interp_prefix, O_CLOEXEC | O_DIRECTORY | O_PATH); +if (interp_dirfd >= 0) { +add_hostfd(interp_dirfd); +} else { +interp_prefix = NULL; +} init_qemu_uname_release(); diff --git a/linu
[Qemu-devel] [PATCH 4/6] linux-user: Check contains_hostfd in select syscalls
Signed-off-by: Richard Henderson --- linux-user/syscall.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 5339f0bc1c..b98125829b 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -1506,6 +1506,11 @@ static abi_long do_select(int n, if (ret) { return ret; } +if (contains_hostfd(&rfds) || +contains_hostfd(&wfds) || +contains_hostfd(&efds)) { +return -TARGET_EBADF; +} if (target_tv_addr) { if (copy_from_user_timeval(&tv, target_tv_addr)) @@ -9392,6 +9397,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, if (ret) { goto fail; } +if (contains_hostfd(&rfds) || +contains_hostfd(&wfds) || +contains_hostfd(&efds)) { +goto ebadf; +} /* * This takes a timespec, and not a timeval, so we cannot -- 2.17.0
[Qemu-devel] [PATCH 2/6] linux-user: Add host_fds and manipulators
This allows emulation of guest syscalls to reject manipulations to fds used by the host. Signed-off-by: Richard Henderson --- linux-user/qemu.h | 30 ++ linux-user/main.c | 27 ++- 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/linux-user/qemu.h b/linux-user/qemu.h index c55c8e294b..33dafbe0e4 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -155,6 +155,36 @@ void task_settid(TaskState *); void stop_all_tasks(void); extern const char *qemu_uname_release; extern unsigned long mmap_min_addr; +extern fd_set host_fds; + +/** + * is_hostfd: + * @fd: file descriptor to check + * + * Return true if @fd is being used by the host and therefore any + * guest system call referencing @fd should return EBADF. + */ +static inline bool is_hostfd(int fd) +{ +return fd >= 0 && fd < FD_SETSIZE && FD_ISSET(fd, &host_fds); +} + +/** + * contains_hostfd: + * @fds: fd_set of descriptors to check + * + * Return true if any descriptor in @fds are being used by the host + * and therefore the guest system call should return EBADF. + */ +bool contains_hostfd(const fd_set *fds); + +/** + * add_hostfd: + * @fd: file descriptor to reserve + * + * Add @fd to the set of file descriptors to reserve for the host. + */ +void add_hostfd(int fd); /* ??? See if we can avoid exposing so much of the loader internals. */ diff --git a/linux-user/main.c b/linux-user/main.c index 78d6d3e7eb..ee3f323c08 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -49,6 +49,7 @@ static const char *cpu_type; unsigned long mmap_min_addr; unsigned long guest_base; int have_guest_base; +fd_set host_fds; /* * When running 32-on-64 we should make sure we can fit all of the possible @@ -112,6 +113,23 @@ int cpu_get_pic_interrupt(CPUX86State *env) } #endif +bool contains_hostfd(const fd_set *fds) +{ +int i; +for (i = 0; i < ARRAY_SIZE(__FDS_BITS(fds)); ++i) { +if (__FDS_BITS(fds)[i] & __FDS_BITS(&host_fds)[i]) { +return true; +} +} +return true; +} + +void add_hostfd(int fd) +{ +g_assert(fd >= 0 && fd < FD_SETSIZE); +FD_SET(fd, &host_fds); +} + /***/ /* Helper routines for implementing atomic operations. */ @@ -805,12 +823,19 @@ int main(int argc, char **argv, char **envp) target_cpu_copy_regs(env, regs); +/* Prevent the guest from closing the log file. */ +if (qemu_logfile && qemu_logfile != stderr) { +add_hostfd(fileno(qemu_logfile)); +} + if (gdbstub_port) { -if (gdbserver_start(gdbstub_port) < 0) { +int fd = gdbserver_start(gdbstub_port); +if (fd < 0) { fprintf(stderr, "qemu: could not open gdbserver on port %d\n", gdbstub_port); exit(EXIT_FAILURE); } +add_hostfd(fd); gdb_handlesig(cpu, 0); } cpu_loop(env); -- 2.17.0
[Qemu-devel] [PATCH v5 0/6] linux-user: Reorg interp_prefix handling
Changes since v4 (2018-01-28) * Protect host fds from guest manipulation. * Rename the interp_dirfd macro to TRY_INTERP_FD * Add TRY_INTERP_PATH for acct, statfs, inotify_add_watch Changes since v3 (2017-12-29) * Use DO/WHILE as the control construct; wrap it in a macro. * Introduce linux_user_path to handle the cases *at syscalls do not cover. Changes since v2 (2017-12-04) * Use IF as the control construct instead of SWITCH. Changes since v1 (2016-11-??) * Require interp_dirfd set before trying the *at path. r~ Richard Henderson (6): gdbstub: Return the fd from gdbserver_start linux-user: Add host_fds and manipulators linux-user: Check is_hostfd in do_syscall linux-user: Check contains_hostfd in select syscalls linux-user: Check is_hostfd in mmap syscalls linux-user: Use *at functions to implement interp_prefix linux-user/qemu.h| 67 ++ gdbstub.c| 5 +- linux-user/elfload.c | 5 +- linux-user/main.c| 53 - linux-user/syscall.c | 485 --- 5 files changed, 532 insertions(+), 83 deletions(-) -- 2.17.0
[Qemu-devel] [PATCH 5/6] linux-user: Check is_hostfd in mmap syscalls
Signed-off-by: Richard Henderson --- linux-user/syscall.c | 9 + 1 file changed, 9 insertions(+) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index b98125829b..d7513d5dac 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -9605,11 +9605,17 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, v5 = tswapal(v[4]); v6 = tswapal(v[5]); unlock_user(v, arg1, 0); +if (is_hostfd(v5)) { +goto ebadf; +} ret = get_errno(target_mmap(v1, v2, v3, target_to_host_bitmask(v4, mmap_flags_tbl), v5, v6)); } #else +if (is_hostfd(arg5)) { +goto ebadf; +} ret = get_errno(target_mmap(arg1, arg2, arg3, target_to_host_bitmask(arg4, mmap_flags_tbl), arg5, @@ -9622,6 +9628,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #ifndef MMAP_SHIFT #define MMAP_SHIFT 12 #endif +if (is_hostfd(arg5)) { +goto ebadf; +} ret = get_errno(target_mmap(arg1, arg2, arg3, target_to_host_bitmask(arg4, mmap_flags_tbl), arg5, -- 2.17.0
[Qemu-devel] [PATCH 1/6] gdbstub: Return the fd from gdbserver_start
This will allow us to protect gdbserver_fd from the guest. Signed-off-by: Richard Henderson --- gdbstub.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gdbstub.c b/gdbstub.c index 6081e719c5..057d0d65c5 100644 --- a/gdbstub.c +++ b/gdbstub.c @@ -1890,15 +1890,16 @@ static int gdbserver_open(int port) int gdbserver_start(int port) { gdbserver_fd = gdbserver_open(port); -if (gdbserver_fd < 0) +if (gdbserver_fd < 0) { return -1; +} /* accept connections */ if (!gdb_accept()) { close(gdbserver_fd); gdbserver_fd = -1; return -1; } -return 0; +return gdbserver_fd; } /* Disable gdb stub for child processes. */ -- 2.17.0
Re: [Qemu-devel] [PATCH v2 00/16] AHCI: tracing improvements
Hi, This series seems to have some coding style problems. See output below for more information: Type: series Message-id: 20180531222835.16558-1-js...@redhat.com Subject: [Qemu-devel] [PATCH v2 00/16] AHCI: tracing improvements === TEST SCRIPT BEGIN === #!/bin/bash BASE=base n=1 total=$(git log --oneline $BASE.. | wc -l) failed=0 git config --local diff.renamelimit 0 git config --local diff.renames True git config --local diff.algorithm histogram commits="$(git log --format=%H --reverse $BASE..)" for c in $commits; do echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..." if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then failed=1 echo fi n=$((n+1)) done exit $failed === TEST SCRIPT END === Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384 From https://github.com/patchew-project/qemu t [tag update]patchew/20180531212435.165261-1-ebl...@redhat.com -> patchew/20180531212435.165261-1-ebl...@redhat.com * [new tag] patchew/20180531222835.16558-1-js...@redhat.com -> patchew/20180531222835.16558-1-js...@redhat.com Switched to a new branch 'test' 9932482af2 ahci: make ahci_mem_write traces more descriptive c89f4076bd ahci: delete old host register address definitions 70f11b4d95 ahci: adjust ahci_mem_write to work on registers c13dce7973 ahci: fix spacing damage on ahci_mem_write fab5307968 ahci: make mem_read_32 traces more descriptive ace1b4ffac ahci: modify ahci_mem_read_32 to work on register numbers 3021a82d31 ahci: fix host register max address 3c4808a257 ahci: add host register enumeration f57758869b ahci: delete old port register address definitions 52d04d9b2c ahci: make port write traces more descriptive 08f435fad3 ahci: modify ahci_port_write to use register numbers 634c79d46b ahci: combine identical clauses in port write 50b162ae60 ahci: fix spacing damage on ahci_port_write 5895ec8f28 ahci: make port read traces more descriptive 68fd051a9a ahci: modify ahci_port_read to use register numbers 0478088585 ahci: add port register enumeration === OUTPUT BEGIN === Checking PATCH 1/16: ahci: add port register enumeration... WARNING: line over 80 characters #71: FILE: hw/ide/ahci_internal.h:94: +AHCI_PORT_REG_SCR_NOTIF = 15, /* PxSNTF: SATA phy register: SNotification */ total: 0 errors, 1 warnings, 72 lines checked Your patch has style problems, please review. If any of these errors are false positives report them to the maintainer, see CHECKPATCH in MAINTAINERS. Checking PATCH 2/16: ahci: modify ahci_port_read to use register numbers... Checking PATCH 3/16: ahci: make port read traces more descriptive... Checking PATCH 4/16: ahci: fix spacing damage on ahci_port_write... ERROR: spaces required around that '|' (ctx:VxV) #83: FILE: hw/ide/ahci.c:316: +(val & ~(PORT_CMD_RO_MASK|PORT_CMD_ICC_MASK)); ^ total: 1 errors, 0 warnings, 156 lines checked Your patch has style problems, please review. If any of these errors are false positives report them to the maintainer, see CHECKPATCH in MAINTAINERS. Checking PATCH 5/16: ahci: combine identical clauses in port write... Checking PATCH 6/16: ahci: modify ahci_port_write to use register numbers... Checking PATCH 7/16: ahci: make port write traces more descriptive... Checking PATCH 8/16: ahci: delete old port register address definitions... Checking PATCH 9/16: ahci: add host register enumeration... Checking PATCH 10/16: ahci: fix host register max address... Checking PATCH 11/16: ahci: modify ahci_mem_read_32 to work on register numbers... Checking PATCH 12/16: ahci: make mem_read_32 traces more descriptive... Checking PATCH 13/16: ahci: fix spacing damage on ahci_mem_write... Checking PATCH 14/16: ahci: adjust ahci_mem_write to work on registers... Checking PATCH 15/16: ahci: delete old host register address definitions... Checking PATCH 16/16: ahci: make ahci_mem_write traces more descriptive... WARNING: line over 80 characters #18: FILE: hw/ide/ahci.c:497: +qemu_log_mask(LOG_UNIMP, "Attempted write to unimplemented register:" total: 0 errors, 1 warnings, 35 lines checked Your patch has style problems, please review. If any of these errors are false positives report them to the maintainer, see CHECKPATCH in MAINTAINERS. === OUTPUT END === Test command exited with code: 1 --- Email generated automatically by Patchew [http://patchew.org/]. Please send your feedback to patchew-de...@redhat.com
[Qemu-devel] [PATCH v2 16/16] ahci: make ahci_mem_write traces more descriptive
Signed-off-by: John Snow --- hw/ide/ahci.c | 13 - hw/ide/trace-events | 4 +++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 1309f80458..6cab853741 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -494,13 +494,24 @@ static void ahci_mem_write(void *opaque, hwaddr addr, /* FIXME report write? */ break; default: -trace_ahci_mem_write_unknown(s, size, addr, val); +qemu_log_mask(LOG_UNIMP, "Attempted write to unimplemented register:" + " AHCI host register %s, offset 0x%lx: 0x%"PRIu64, + AHCIHostReg_lookup[regnum], addr, val); +trace_ahci_mem_write_host_unimpl(s, size, + AHCIHostReg_lookup[regnum], addr); } +trace_ahci_mem_write_host(s, size, AHCIHostReg_lookup[regnum], + addr, val); } else if ((addr >= AHCI_PORT_REGS_START_ADDR) && (addr < (AHCI_PORT_REGS_START_ADDR + (s->ports * AHCI_PORT_ADDR_OFFSET_LEN { ahci_port_write(s, (addr - AHCI_PORT_REGS_START_ADDR) >> 7, addr & AHCI_PORT_ADDR_OFFSET_MASK, val); +} else { +qemu_log_mask(LOG_UNIMP, "Attempted write to unimplemented register: " + "AHCI global register at offset 0x%lx: 0x%"PRIu64, + addr, val); +trace_ahci_mem_write_unimpl(s, size, addr, val); } } diff --git a/hw/ide/trace-events b/hw/ide/trace-events index 8149a54db8..e6bd95f52f 100644 --- a/hw/ide/trace-events +++ b/hw/ide/trace-events @@ -77,7 +77,9 @@ ahci_mem_read_32_host(void *s, const char *reg, uint64_t addr, uint32_t val) "ah ahci_mem_read_32_host_default(void *s, const char *reg, uint64_t addr) "ahci(%p): unimplemented mem read [reg:%s] @ 0x%"PRIx64 ahci_mem_read(void *s, unsigned size, uint64_t addr, uint64_t val) "ahci(%p): read%u @ 0x%"PRIx64": 0x%016"PRIx64 ahci_mem_write(void *s, unsigned size, uint64_t addr, uint64_t val) "ahci(%p): write%u @ 0x%"PRIx64": 0x%016"PRIx64 -ahci_mem_write_unknown(void *s, unsigned size, uint64_t addr, uint64_t val) "ahci(%p): write%u to unknown register 0x%"PRIx64": 0x%016"PRIx64 +ahci_mem_write_host_unimpl(void *s, unsigned size, const char *reg, uint64_t addr) "ahci(%p) unimplemented write%u [reg:%s] @ 0x%"PRIx64 +ahci_mem_write_host(void *s, unsigned size, const char *reg, uint64_t addr, uint64_t val) "ahci(%p) write%u [reg:%s] @ 0x%"PRIx64": 0x%016"PRIx64 +ahci_mem_write_unimpl(void *s, unsigned size, uint64_t addr, uint64_t val) "ahci(%p): write%u to unknown register 0x%"PRIx64": 0x%016"PRIx64 ahci_set_signature(void *s, int port, uint8_t nsector, uint8_t sector, uint8_t lcyl, uint8_t hcyl, uint32_t sig) "ahci(%p)[%d]: set signature sector:0x%02x nsector:0x%02x lcyl:0x%02x hcyl:0x%02x (cumulatively: 0x%08x)" ahci_reset_port(void *s, int port) "ahci(%p)[%d]: reset port" ahci_unmap_fis_address_null(void *s, int port) "ahci(%p)[%d]: Attempt to unmap NULL FIS address" -- 2.14.3
[Qemu-devel] [PATCH v2 12/16] ahci: make mem_read_32 traces more descriptive
Signed-off-by: John Snow --- hw/ide/ahci.c | 7 +-- hw/ide/trace-events | 3 +++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 7edbb30f60..5e902c6615 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -47,7 +47,6 @@ static bool ahci_map_fis_address(AHCIDevice *ad); static void ahci_unmap_clb_address(AHCIDevice *ad); static void ahci_unmap_fis_address(AHCIDevice *ad); -__attribute__((__unused__)) /* TODO */ static const char *AHCIHostReg_lookup[AHCI_HOST_REG__COUNT] = { [AHCI_HOST_REG_CAP]= "CAP", [AHCI_HOST_REG_CTL]= "GHC", @@ -406,13 +405,17 @@ static uint64_t ahci_mem_read_32(void *opaque, hwaddr addr) val = s->control_regs.version; break; default: -break; +trace_ahci_mem_read_32_host_default(s, AHCIHostReg_lookup[regnum], +addr); } +trace_ahci_mem_read_32_host(s, AHCIHostReg_lookup[regnum], addr, val); } else if ((addr >= AHCI_PORT_REGS_START_ADDR) && (addr < (AHCI_PORT_REGS_START_ADDR + (s->ports * AHCI_PORT_ADDR_OFFSET_LEN { val = ahci_port_read(s, (addr - AHCI_PORT_REGS_START_ADDR) >> 7, addr & AHCI_PORT_ADDR_OFFSET_MASK); +} else { +trace_ahci_mem_read_32_default(s, addr, val); } trace_ahci_mem_read_32(s, addr, val); diff --git a/hw/ide/trace-events b/hw/ide/trace-events index 1efbbb8114..8149a54db8 100644 --- a/hw/ide/trace-events +++ b/hw/ide/trace-events @@ -72,6 +72,9 @@ ahci_trigger_irq(void *s, int port, const char *name, uint32_t val, uint32_t old ahci_port_write(void *s, int port, const char *reg, int offset, uint32_t val) "ahci(%p)[%d]: port write [reg:%s] @ 0x%x: 0x%08x" ahci_port_write_unimpl(void *s, int port, const char *reg, int offset, uint32_t val) "ahci(%p)[%d]: unimplemented port write [reg:%s] @ 0x%x: 0x%08x" ahci_mem_read_32(void *s, uint64_t addr, uint32_t val) "ahci(%p): mem read @ 0x%"PRIx64": 0x%08x" +ahci_mem_read_32_default(void *s, uint64_t addr, uint32_t val) "ahci(%p): mem read @ 0x%"PRIx64": 0x%08x" +ahci_mem_read_32_host(void *s, const char *reg, uint64_t addr, uint32_t val) "ahci(%p): mem read [reg:%s] @ 0x%"PRIx64": 0x%08x" +ahci_mem_read_32_host_default(void *s, const char *reg, uint64_t addr) "ahci(%p): unimplemented mem read [reg:%s] @ 0x%"PRIx64 ahci_mem_read(void *s, unsigned size, uint64_t addr, uint64_t val) "ahci(%p): read%u @ 0x%"PRIx64": 0x%016"PRIx64 ahci_mem_write(void *s, unsigned size, uint64_t addr, uint64_t val) "ahci(%p): write%u @ 0x%"PRIx64": 0x%016"PRIx64 ahci_mem_write_unknown(void *s, unsigned size, uint64_t addr, uint64_t val) "ahci(%p): write%u to unknown register 0x%"PRIx64": 0x%016"PRIx64 -- 2.14.3
[Qemu-devel] [PATCH v2 11/16] ahci: modify ahci_mem_read_32 to work on register numbers
Signed-off-by: John Snow --- hw/ide/ahci.c | 17 +++-- 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index f2e8bce797..7edbb30f60 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -386,22 +386,27 @@ static uint64_t ahci_mem_read_32(void *opaque, hwaddr addr) uint32_t val = 0; if (addr < AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR) { -switch (addr) { -case HOST_CAP: +enum AHCIHostReg regnum = addr / 4; +assert(regnum < AHCI_HOST_REG__COUNT); + +switch (regnum) { +case AHCI_HOST_REG_CAP: val = s->control_regs.cap; break; -case HOST_CTL: +case AHCI_HOST_REG_CTL: val = s->control_regs.ghc; break; -case HOST_IRQ_STAT: +case AHCI_HOST_REG_IRQ_STAT: val = s->control_regs.irqstatus; break; -case HOST_PORTS_IMPL: +case AHCI_HOST_REG_PORTS_IMPL: val = s->control_regs.impl; break; -case HOST_VERSION: +case AHCI_HOST_REG_VERSION: val = s->control_regs.version; break; +default: +break; } } else if ((addr >= AHCI_PORT_REGS_START_ADDR) && (addr < (AHCI_PORT_REGS_START_ADDR + -- 2.14.3
[Qemu-devel] [PATCH v2 01/16] ahci: add port register enumeration
Instead of tracking offsets, lets count the registers. Signed-off-by: John Snow --- hw/ide/ahci.c | 25 + hw/ide/ahci_internal.h | 29 + 2 files changed, 54 insertions(+) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index e22d7be05f..48130c6439 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -46,6 +46,31 @@ static bool ahci_map_fis_address(AHCIDevice *ad); static void ahci_unmap_clb_address(AHCIDevice *ad); static void ahci_unmap_fis_address(AHCIDevice *ad); +__attribute__((__unused__)) /* TODO */ +static const char *AHCIPortReg_lookup[AHCI_PORT_REG__COUNT] = { +[AHCI_PORT_REG_LST_ADDR]= "PxCLB", +[AHCI_PORT_REG_LST_ADDR_HI] = "PxCLBU", +[AHCI_PORT_REG_FIS_ADDR]= "PxFB", +[AHCI_PORT_REG_FIS_ADDR_HI] = "PxFBU", +[AHCI_PORT_REG_IRQ_STAT]= "PxIS", +[AHCI_PORT_REG_IRQ_MASK]= "PXIE", +[AHCI_PORT_REG_CMD] = "PxCMD", +[7] = "Reserved", +[AHCI_PORT_REG_TFDATA] = "PxTFD", +[AHCI_PORT_REG_SIG] = "PxSIG", +[AHCI_PORT_REG_SCR_STAT]= "PxSSTS", +[AHCI_PORT_REG_SCR_CTL] = "PxSCTL", +[AHCI_PORT_REG_SCR_ERR] = "PxSERR", +[AHCI_PORT_REG_SCR_ACT] = "PxSACT", +[AHCI_PORT_REG_CMD_ISSUE] = "PxCI", +[AHCI_PORT_REG_SCR_NOTIF] = "PxSNTF", +[AHCI_PORT_REG_FIS_CTL] = "PxFBS", +[AHCI_PORT_REG_DEV_SLEEP] = "PxDEVSLP", +[18 ... 27] = "Reserved", +[AHCI_PORT_REG_VENDOR_1 ... + AHCI_PORT_REG_VENDOR_4]= "PxVS", +}; + static const char *AHCIPortIRQ_lookup[AHCI_PORT_IRQ__COUNT] = { [AHCI_PORT_IRQ_BIT_DHRS] = "DHRS", [AHCI_PORT_IRQ_BIT_PSS] = "PSS", diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h index 1a25d6c039..eb7e1eefc0 100644 --- a/hw/ide/ahci_internal.h +++ b/hw/ide/ahci_internal.h @@ -74,6 +74,34 @@ #define HOST_CAP_NCQ (1 << 30) /* Native Command Queueing */ #define HOST_CAP_64 (1U << 31) /* PCI DAC (64-bit DMA) support */ +/* registers for each SATA port */ +enum AHCIPortReg { +AHCI_PORT_REG_LST_ADDR= 0, /* PxCLB: command list DMA addr */ +AHCI_PORT_REG_LST_ADDR_HI = 1, /* PxCLBU: command list DMA addr hi */ +AHCI_PORT_REG_FIS_ADDR= 2, /* PxFB: FIS rx buf addr */ +AHCI_PORT_REG_FIS_ADDR_HI = 3, /* PxFBU: FIX rx buf addr hi */ +AHCI_PORT_REG_IRQ_STAT= 4, /* PxIS: interrupt status */ +AHCI_PORT_REG_IRQ_MASK= 5, /* PxIE: interrupt enable/disable mask */ +AHCI_PORT_REG_CMD = 6, /* PxCMD: port command */ +/* RESERVED */ +AHCI_PORT_REG_TFDATA = 8, /* PxTFD: taskfile data */ +AHCI_PORT_REG_SIG = 9, /* PxSIG: device TF signature */ +AHCI_PORT_REG_SCR_STAT= 10, /* PxSSTS: SATA phy register: SStatus */ +AHCI_PORT_REG_SCR_CTL = 11, /* PxSCTL: SATA phy register: SControl */ +AHCI_PORT_REG_SCR_ERR = 12, /* PxSERR: SATA phy register: SError */ +AHCI_PORT_REG_SCR_ACT = 13, /* PxSACT: SATA phy register: SActive */ +AHCI_PORT_REG_CMD_ISSUE = 14, /* PxCI: command issue */ +AHCI_PORT_REG_SCR_NOTIF = 15, /* PxSNTF: SATA phy register: SNotification */ +AHCI_PORT_REG_FIS_CTL = 16, /* PxFBS: Port multiplier switching ctl */ +AHCI_PORT_REG_DEV_SLEEP = 17, /* PxDEVSLP: device sleep control */ +/* RESERVED */ +AHCI_PORT_REG_VENDOR_1= 28, /* PxVS: Vendor Specific */ +AHCI_PORT_REG_VENDOR_2= 29, +AHCI_PORT_REG_VENDOR_3= 30, +AHCI_PORT_REG_VENDOR_4= 31, +AHCI_PORT_REG__COUNT = 32 +}; + /* registers for each SATA port */ #define PORT_LST_ADDR 0x00 /* command list DMA addr */ #define PORT_LST_ADDR_HI 0x04 /* command list DMA addr hi */ @@ -82,6 +110,7 @@ #define PORT_IRQ_STAT 0x10 /* interrupt status */ #define PORT_IRQ_MASK 0x14 /* interrupt enable/disable mask */ #define PORT_CMD 0x18 /* port command */ + #define PORT_TFDATA 0x20 /* taskfile data */ #define PORT_SIG 0x24 /* device TF signature */ #define PORT_SCR_STAT 0x28 /* SATA phy register: SStatus */ -- 2.14.3
[Qemu-devel] [PATCH v2 14/16] ahci: adjust ahci_mem_write to work on registers
Actually, this function looks pretty broken, but for now, let's finish up what this series of commits came here to do. Signed-off-by: John Snow --- hw/ide/ahci.c | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 568e6b848a..1309f80458 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -468,11 +468,14 @@ static void ahci_mem_write(void *opaque, hwaddr addr, } if (addr < AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR) { -switch (addr) { -case HOST_CAP: /* R/WO, RO */ +enum AHCIHostReg regnum = addr / 4; +assert(regnum < AHCI_HOST_REG__COUNT); + +switch (regnum) { +case AHCI_HOST_REG_CAP: /* R/WO, RO */ /* FIXME handle R/WO */ break; -case HOST_CTL: /* R/W */ +case AHCI_HOST_REG_CTL: /* R/W */ if (val & HOST_CTL_RESET) { ahci_reset(s); } else { @@ -480,14 +483,14 @@ static void ahci_mem_write(void *opaque, hwaddr addr, ahci_check_irq(s); } break; -case HOST_IRQ_STAT: /* R/WC, RO */ +case AHCI_HOST_REG_IRQ_STAT: /* R/WC, RO */ s->control_regs.irqstatus &= ~val; ahci_check_irq(s); break; -case HOST_PORTS_IMPL: /* R/WO, RO */ +case AHCI_HOST_REG_PORTS_IMPL: /* R/WO, RO */ /* FIXME handle R/WO */ break; -case HOST_VERSION: /* RO */ +case AHCI_HOST_REG_VERSION: /* RO */ /* FIXME report write? */ break; default: -- 2.14.3
[Qemu-devel] [PATCH v2 03/16] ahci: make port read traces more descriptive
A trace is added to let us watch unimplemented registers specifically, as these are more likely to cause us trouble. Otherwise, the port read traces now tell us what register is getting hit, which is nicer. Signed-off-by: John Snow --- hw/ide/ahci.c | 5 +++-- hw/ide/trace-events | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 5187bf34ad..5b5ab823f4 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -46,7 +46,6 @@ static bool ahci_map_fis_address(AHCIDevice *ad); static void ahci_unmap_clb_address(AHCIDevice *ad); static void ahci_unmap_fis_address(AHCIDevice *ad); -__attribute__((__unused__)) /* TODO */ static const char *AHCIPortReg_lookup[AHCI_PORT_REG__COUNT] = { [AHCI_PORT_REG_LST_ADDR]= "PxCLB", [AHCI_PORT_REG_LST_ADDR_HI] = "PxCLBU", @@ -149,10 +148,12 @@ static uint32_t ahci_port_read(AHCIState *s, int port, int offset) val = pr->cmd_issue; break; default: +trace_ahci_port_read_default(s, port, AHCIPortReg_lookup[regnum], + offset); val = 0; } -trace_ahci_port_read(s, port, offset, val); +trace_ahci_port_read(s, port, AHCIPortReg_lookup[regnum], offset, val); return val; } diff --git a/hw/ide/trace-events b/hw/ide/trace-events index 5c0e59ec42..0db18d8271 100644 --- a/hw/ide/trace-events +++ b/hw/ide/trace-events @@ -63,7 +63,8 @@ ide_atapi_cmd_read_dma_cb_aio(void *s, int lba, int n) "IDEState: %p; aio read: ide_atapi_cmd_packet(void *s, uint16_t limit, const char *packet) "IDEState: %p; limit=0x%x packet: %s" # hw/ide/ahci.c -ahci_port_read(void *s, int port, int offset, uint32_t ret) "ahci(%p)[%d]: port read @ 0x%x: 0x%08x" +ahci_port_read(void *s, int port, const char *reg, int offset, uint32_t ret) "ahci(%p)[%d]: port read [reg:%s] @ 0x%x: 0x%08x" +ahci_port_read_default(void *s, int port, const char *reg, int offset) "ahci(%p)[%d]: unimplemented port read [reg:%s] @ 0x%x" ahci_irq_raise(void *s) "ahci(%p): raise irq" ahci_irq_lower(void *s) "ahci(%p): lower irq" ahci_check_irq(void *s, uint32_t old, uint32_t new) "ahci(%p): check irq 0x%08x --> 0x%08x" -- 2.14.3
[Qemu-devel] [PATCH v2 15/16] ahci: delete old host register address definitions
Signed-off-by: John Snow --- hw/ide/ahci_internal.h | 6 -- 1 file changed, 6 deletions(-) diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h index f9dcf8b6e6..2953243929 100644 --- a/hw/ide/ahci_internal.h +++ b/hw/ide/ahci_internal.h @@ -55,12 +55,6 @@ #define RX_FIS_UNK0x60 /* offset of Unknown FIS data */ /* global controller registers */ -#define HOST_CAP 0x00 /* host capabilities */ -#define HOST_CTL 0x04 /* global host control */ -#define HOST_IRQ_STAT 0x08 /* interrupt status */ -#define HOST_PORTS_IMPL 0x0c /* bitmap of implemented ports */ -#define HOST_VERSION 0x10 /* AHCI spec. version compliancy */ - enum AHCIHostReg { AHCI_HOST_REG_CAP= 0, /* CAP: host capabilities */ AHCI_HOST_REG_CTL= 1, /* GHC: global host control */ -- 2.14.3
[Qemu-devel] [PATCH v2 09/16] ahci: add host register enumeration
Signed-off-by: John Snow --- hw/ide/ahci.c | 15 +++ hw/ide/ahci_internal.h | 15 +++ 2 files changed, 30 insertions(+) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 01463f0f51..f2e8bce797 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -47,6 +47,21 @@ static bool ahci_map_fis_address(AHCIDevice *ad); static void ahci_unmap_clb_address(AHCIDevice *ad); static void ahci_unmap_fis_address(AHCIDevice *ad); +__attribute__((__unused__)) /* TODO */ +static const char *AHCIHostReg_lookup[AHCI_HOST_REG__COUNT] = { +[AHCI_HOST_REG_CAP]= "CAP", +[AHCI_HOST_REG_CTL]= "GHC", +[AHCI_HOST_REG_IRQ_STAT] = "IS", +[AHCI_HOST_REG_PORTS_IMPL] = "PI", +[AHCI_HOST_REG_VERSION]= "VS", +[AHCI_HOST_REG_CCC_CTL]= "CCC_CTL", +[AHCI_HOST_REG_CCC_PORTS] = "CCC_PORTS", +[AHCI_HOST_REG_EM_LOC] = "EM_LOC", +[AHCI_HOST_REG_EM_CTL] = "EM_CTL", +[AHCI_HOST_REG_CAP2] = "CAP2", +[AHCI_HOST_REG_BOHC] = "BOHC", +}; + static const char *AHCIPortReg_lookup[AHCI_PORT_REG__COUNT] = { [AHCI_PORT_REG_LST_ADDR]= "PxCLB", [AHCI_PORT_REG_LST_ADDR_HI] = "PxCLBU", diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h index db00c9aa39..af366db6f3 100644 --- a/hw/ide/ahci_internal.h +++ b/hw/ide/ahci_internal.h @@ -61,6 +61,21 @@ #define HOST_PORTS_IMPL 0x0c /* bitmap of implemented ports */ #define HOST_VERSION 0x10 /* AHCI spec. version compliancy */ +enum AHCIHostReg { +AHCI_HOST_REG_CAP= 0, /* CAP: host capabilities */ +AHCI_HOST_REG_CTL= 1, /* GHC: global host control */ +AHCI_HOST_REG_IRQ_STAT = 2, /* IS: interrupt status */ +AHCI_HOST_REG_PORTS_IMPL = 3, /* PI: bitmap of implemented ports */ +AHCI_HOST_REG_VERSION= 4, /* VS: AHCI spec. version compliancy */ +AHCI_HOST_REG_CCC_CTL= 5, /* CCC_CTL: CCC Control */ +AHCI_HOST_REG_CCC_PORTS = 6, /* CCC_PORTS: CCC Ports */ +AHCI_HOST_REG_EM_LOC = 7, /* EM_LOC: Enclosure Mgmt Location */ +AHCI_HOST_REG_EM_CTL = 8, /* EM_CTL: Enclosure Mgmt Control */ +AHCI_HOST_REG_CAP2 = 9, /* CAP2: host capabilities, extended */ +AHCI_HOST_REG_BOHC = 10, /* BOHC: firmare/os handoff ctrl & status */ +AHCI_HOST_REG__COUNT = 11 +}; + /* HOST_CTL bits */ #define HOST_CTL_RESET(1 << 0) /* reset controller; self-clear */ #define HOST_CTL_IRQ_EN (1 << 1) /* global IRQ enable */ -- 2.14.3
[Qemu-devel] [PATCH v2 02/16] ahci: modify ahci_port_read to use register numbers
Signed-off-by: John Snow --- hw/ide/ahci.c | 38 +++--- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 48130c6439..5187bf34ad 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -93,41 +93,42 @@ static const char *AHCIPortIRQ_lookup[AHCI_PORT_IRQ__COUNT] = { [AHCI_PORT_IRQ_BIT_CPDS] = "CPDS" }; -static uint32_t ahci_port_read(AHCIState *s, int port, int offset) +static uint32_t ahci_port_read(AHCIState *s, int port, int offset) { uint32_t val; -AHCIPortRegs *pr; -pr = &s->dev[port].port_regs; +AHCIPortRegs *pr = &s->dev[port].port_regs; +enum AHCIPortReg regnum = offset / sizeof(uint32_t); +assert(regnum < (AHCI_PORT_ADDR_OFFSET_LEN / sizeof(uint32_t))); -switch (offset) { -case PORT_LST_ADDR: +switch (regnum) { +case AHCI_PORT_REG_LST_ADDR: val = pr->lst_addr; break; -case PORT_LST_ADDR_HI: +case AHCI_PORT_REG_LST_ADDR_HI: val = pr->lst_addr_hi; break; -case PORT_FIS_ADDR: +case AHCI_PORT_REG_FIS_ADDR: val = pr->fis_addr; break; -case PORT_FIS_ADDR_HI: +case AHCI_PORT_REG_FIS_ADDR_HI: val = pr->fis_addr_hi; break; -case PORT_IRQ_STAT: +case AHCI_PORT_REG_IRQ_STAT: val = pr->irq_stat; break; -case PORT_IRQ_MASK: +case AHCI_PORT_REG_IRQ_MASK: val = pr->irq_mask; break; -case PORT_CMD: +case AHCI_PORT_REG_CMD: val = pr->cmd; break; -case PORT_TFDATA: +case AHCI_PORT_REG_TFDATA: val = pr->tfdata; break; -case PORT_SIG: +case AHCI_PORT_REG_SIG: val = pr->sig; break; -case PORT_SCR_STAT: +case AHCI_PORT_REG_SCR_STAT: if (s->dev[port].port.ifs[0].blk) { val = SATA_SCR_SSTATUS_DET_DEV_PRESENT_PHY_UP | SATA_SCR_SSTATUS_SPD_GEN1 | SATA_SCR_SSTATUS_IPM_ACTIVE; @@ -135,19 +136,18 @@ static uint32_t ahci_port_read(AHCIState *s, int port, int offset) val = SATA_SCR_SSTATUS_DET_NODEV; } break; -case PORT_SCR_CTL: +case AHCI_PORT_REG_SCR_CTL: val = pr->scr_ctl; break; -case PORT_SCR_ERR: +case AHCI_PORT_REG_SCR_ERR: val = pr->scr_err; break; -case PORT_SCR_ACT: +case AHCI_PORT_REG_SCR_ACT: val = pr->scr_act; break; -case PORT_CMD_ISSUE: +case AHCI_PORT_REG_CMD_ISSUE: val = pr->cmd_issue; break; -case PORT_RESERVED: default: val = 0; } -- 2.14.3
[Qemu-devel] [PATCH v2 13/16] ahci: fix spacing damage on ahci_mem_write
Signed-off-by: John Snow --- hw/ide/ahci.c | 47 +++ 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 5e902c6615..568e6b848a 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -469,37 +469,36 @@ static void ahci_mem_write(void *opaque, hwaddr addr, if (addr < AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR) { switch (addr) { -case HOST_CAP: /* R/WO, RO */ -/* FIXME handle R/WO */ -break; -case HOST_CTL: /* R/W */ -if (val & HOST_CTL_RESET) { -ahci_reset(s); -} else { -s->control_regs.ghc = (val & 0x3) | HOST_CTL_AHCI_EN; -ahci_check_irq(s); -} -break; -case HOST_IRQ_STAT: /* R/WC, RO */ -s->control_regs.irqstatus &= ~val; +case HOST_CAP: /* R/WO, RO */ +/* FIXME handle R/WO */ +break; +case HOST_CTL: /* R/W */ +if (val & HOST_CTL_RESET) { +ahci_reset(s); +} else { +s->control_regs.ghc = (val & 0x3) | HOST_CTL_AHCI_EN; ahci_check_irq(s); -break; -case HOST_PORTS_IMPL: /* R/WO, RO */ -/* FIXME handle R/WO */ -break; -case HOST_VERSION: /* RO */ -/* FIXME report write? */ -break; -default: -trace_ahci_mem_write_unknown(s, size, addr, val); +} +break; +case HOST_IRQ_STAT: /* R/WC, RO */ +s->control_regs.irqstatus &= ~val; +ahci_check_irq(s); +break; +case HOST_PORTS_IMPL: /* R/WO, RO */ +/* FIXME handle R/WO */ +break; +case HOST_VERSION: /* RO */ +/* FIXME report write? */ +break; +default: +trace_ahci_mem_write_unknown(s, size, addr, val); } } else if ((addr >= AHCI_PORT_REGS_START_ADDR) && (addr < (AHCI_PORT_REGS_START_ADDR + -(s->ports * AHCI_PORT_ADDR_OFFSET_LEN { +(s->ports * AHCI_PORT_ADDR_OFFSET_LEN { ahci_port_write(s, (addr - AHCI_PORT_REGS_START_ADDR) >> 7, addr & AHCI_PORT_ADDR_OFFSET_MASK, val); } - } static const MemoryRegionOps ahci_mem_ops = { -- 2.14.3
[Qemu-devel] [PATCH v2 00/16] AHCI: tracing improvements
This set just adds register names so that the read/write traces make more sense on their own without having to memorize register offsets. It also splits read/write traces into supported/unsupported subsets, so you can just monitor for things that QEMU is likely doing wrong. v2: - Added qemu_log_mask(LOG_UNIMP, ...) statements in addition to traces for writes to unknown/unsupported registers. (Philippe) John Snow (16): ahci: add port register enumeration ahci: modify ahci_port_read to use register numbers ahci: make port read traces more descriptive ahci: fix spacing damage on ahci_port_write ahci: combine identical clauses in port write ahci: modify ahci_port_write to use register numbers ahci: make port write traces more descriptive ahci: delete old port register address definitions ahci: add host register enumeration ahci: fix host register max address ahci: modify ahci_mem_read_32 to work on register numbers ahci: make mem_read_32 traces more descriptive ahci: fix spacing damage on ahci_mem_write ahci: adjust ahci_mem_write to work on registers ahci: delete old host register address definitions ahci: make ahci_mem_write traces more descriptive hw/ide/ahci.c | 314 ++--- hw/ide/ahci_internal.h | 63 ++ hw/ide/trace-events| 13 +- 3 files changed, 241 insertions(+), 149 deletions(-) -- 2.14.3
[Qemu-devel] [PATCH v2 10/16] ahci: fix host register max address
Yes, comment, it ought to be 0x2C. Signed-off-by: John Snow --- hw/ide/ahci_internal.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h index af366db6f3..f9dcf8b6e6 100644 --- a/hw/ide/ahci_internal.h +++ b/hw/ide/ahci_internal.h @@ -224,8 +224,7 @@ enum AHCIPortIRQ { #define SATA_SIGNATURE_CDROM 0xeb140101 #define SATA_SIGNATURE_DISK0x0101 -#define AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR 0x20 -/* Shouldn't this be 0x2c? */ +#define AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR 0x2c #define AHCI_PORT_REGS_START_ADDR 0x100 #define AHCI_PORT_ADDR_OFFSET_MASK 0x7f -- 2.14.3
[Qemu-devel] [PATCH v2 04/16] ahci: fix spacing damage on ahci_port_write
Churn. Signed-off-by: John Snow --- hw/ide/ahci.c | 142 +- 1 file changed, 71 insertions(+), 71 deletions(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 5b5ab823f4..92780990a3 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -279,85 +279,85 @@ static int ahci_cond_start_engines(AHCIDevice *ad) return 0; } -static void ahci_port_write(AHCIState *s, int port, int offset, uint32_t val) +static void ahci_port_write(AHCIState *s, int port, int offset, uint32_t val) { AHCIPortRegs *pr = &s->dev[port].port_regs; trace_ahci_port_write(s, port, offset, val); switch (offset) { -case PORT_LST_ADDR: -pr->lst_addr = val; -break; -case PORT_LST_ADDR_HI: -pr->lst_addr_hi = val; -break; -case PORT_FIS_ADDR: -pr->fis_addr = val; -break; -case PORT_FIS_ADDR_HI: -pr->fis_addr_hi = val; -break; -case PORT_IRQ_STAT: -pr->irq_stat &= ~val; -ahci_check_irq(s); -break; -case PORT_IRQ_MASK: -pr->irq_mask = val & 0xfdc000ff; -ahci_check_irq(s); -break; -case PORT_CMD: -/* Block any Read-only fields from being set; - * including LIST_ON and FIS_ON. - * The spec requires to set ICC bits to zero after the ICC change - * is done. We don't support ICC state changes, therefore always - * force the ICC bits to zero. - */ -pr->cmd = (pr->cmd & PORT_CMD_RO_MASK) | - (val & ~(PORT_CMD_RO_MASK|PORT_CMD_ICC_MASK)); +case PORT_LST_ADDR: +pr->lst_addr = val; +break; +case PORT_LST_ADDR_HI: +pr->lst_addr_hi = val; +break; +case PORT_FIS_ADDR: +pr->fis_addr = val; +break; +case PORT_FIS_ADDR_HI: +pr->fis_addr_hi = val; +break; +case PORT_IRQ_STAT: +pr->irq_stat &= ~val; +ahci_check_irq(s); +break; +case PORT_IRQ_MASK: +pr->irq_mask = val & 0xfdc000ff; +ahci_check_irq(s); +break; +case PORT_CMD: +/* Block any Read-only fields from being set; + * including LIST_ON and FIS_ON. + * The spec requires to set ICC bits to zero after the ICC change + * is done. We don't support ICC state changes, therefore always + * force the ICC bits to zero. + */ +pr->cmd = (pr->cmd & PORT_CMD_RO_MASK) | +(val & ~(PORT_CMD_RO_MASK|PORT_CMD_ICC_MASK)); -/* Check FIS RX and CLB engines */ -ahci_cond_start_engines(&s->dev[port]); +/* Check FIS RX and CLB engines */ +ahci_cond_start_engines(&s->dev[port]); -/* XXX usually the FIS would be pending on the bus here and - issuing deferred until the OS enables FIS receival. - Instead, we only submit it once - which works in most - cases, but is a hack. */ -if ((pr->cmd & PORT_CMD_FIS_ON) && -!s->dev[port].init_d2h_sent) { -ahci_init_d2h(&s->dev[port]); -} +/* XXX usually the FIS would be pending on the bus here and + issuing deferred until the OS enables FIS receival. + Instead, we only submit it once - which works in most + cases, but is a hack. */ +if ((pr->cmd & PORT_CMD_FIS_ON) && +!s->dev[port].init_d2h_sent) { +ahci_init_d2h(&s->dev[port]); +} -check_cmd(s, port); -break; -case PORT_TFDATA: -/* Read Only. */ -break; -case PORT_SIG: -/* Read Only */ -break; -case PORT_SCR_STAT: -/* Read Only */ -break; -case PORT_SCR_CTL: -if (((pr->scr_ctl & AHCI_SCR_SCTL_DET) == 1) && -((val & AHCI_SCR_SCTL_DET) == 0)) { -ahci_reset_port(s, port); -} -pr->scr_ctl = val; -break; -case PORT_SCR_ERR: -pr->scr_err &= ~val; -break; -case PORT_SCR_ACT: -/* RW1 */ -pr->scr_act |= val; -break; -case PORT_CMD_ISSUE: -pr->cmd_issue |= val; -check_cmd(s, port); -break; -default: -break; +check_cmd(s, port); +break; +case PORT_TFDATA: +/* Read Only. */ +break; +case PORT_SIG: +/* Read Only */ +break; +case PORT_SCR_STAT: +/* Read Only */ +break; +case PORT_SCR_CTL: +if (((pr->scr_ctl & AHCI_SCR_SCTL_DET) == 1) && +((val & AHCI_SCR_SCTL_DET) == 0)) { +ahci_reset_port(s, port); +} +pr->scr_ctl = val; +break; +case
[Qemu-devel] [PATCH v2 05/16] ahci: combine identical clauses in port write
Signed-off-by: John Snow --- hw/ide/ahci.c | 4 1 file changed, 4 deletions(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 92780990a3..476841df58 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -330,11 +330,7 @@ static void ahci_port_write(AHCIState *s, int port, int offset, uint32_t val) check_cmd(s, port); break; case PORT_TFDATA: -/* Read Only. */ -break; case PORT_SIG: -/* Read Only */ -break; case PORT_SCR_STAT: /* Read Only */ break; -- 2.14.3
[Qemu-devel] [PATCH v2 08/16] ahci: delete old port register address definitions
They're now unused. Signed-off-by: John Snow --- hw/ide/ahci_internal.h | 18 -- 1 file changed, 18 deletions(-) diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h index eb7e1eefc0..db00c9aa39 100644 --- a/hw/ide/ahci_internal.h +++ b/hw/ide/ahci_internal.h @@ -102,24 +102,6 @@ enum AHCIPortReg { AHCI_PORT_REG__COUNT = 32 }; -/* registers for each SATA port */ -#define PORT_LST_ADDR 0x00 /* command list DMA addr */ -#define PORT_LST_ADDR_HI 0x04 /* command list DMA addr hi */ -#define PORT_FIS_ADDR 0x08 /* FIS rx buf addr */ -#define PORT_FIS_ADDR_HI 0x0c /* FIS rx buf addr hi */ -#define PORT_IRQ_STAT 0x10 /* interrupt status */ -#define PORT_IRQ_MASK 0x14 /* interrupt enable/disable mask */ -#define PORT_CMD 0x18 /* port command */ - -#define PORT_TFDATA 0x20 /* taskfile data */ -#define PORT_SIG 0x24 /* device TF signature */ -#define PORT_SCR_STAT 0x28 /* SATA phy register: SStatus */ -#define PORT_SCR_CTL 0x2c /* SATA phy register: SControl */ -#define PORT_SCR_ERR 0x30 /* SATA phy register: SError */ -#define PORT_SCR_ACT 0x34 /* SATA phy register: SActive */ -#define PORT_CMD_ISSUE0x38 /* command issue */ -#define PORT_RESERVED 0x3c /* reserved */ - /* Port interrupt bit descriptors */ enum AHCIPortIRQ { AHCI_PORT_IRQ_BIT_DHRS = 0, -- 2.14.3
[Qemu-devel] [PATCH v2 06/16] ahci: modify ahci_port_write to use register numbers
Signed-off-by: John Snow --- hw/ide/ahci.c | 32 +--- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 476841df58..efecf849a9 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -282,30 +282,32 @@ static int ahci_cond_start_engines(AHCIDevice *ad) static void ahci_port_write(AHCIState *s, int port, int offset, uint32_t val) { AHCIPortRegs *pr = &s->dev[port].port_regs; +enum AHCIPortReg regnum = offset / sizeof(uint32_t); +assert(regnum < (AHCI_PORT_ADDR_OFFSET_LEN / sizeof(uint32_t))); trace_ahci_port_write(s, port, offset, val); -switch (offset) { -case PORT_LST_ADDR: +switch (regnum) { +case AHCI_PORT_REG_LST_ADDR: pr->lst_addr = val; break; -case PORT_LST_ADDR_HI: +case AHCI_PORT_REG_LST_ADDR_HI: pr->lst_addr_hi = val; break; -case PORT_FIS_ADDR: +case AHCI_PORT_REG_FIS_ADDR: pr->fis_addr = val; break; -case PORT_FIS_ADDR_HI: +case AHCI_PORT_REG_FIS_ADDR_HI: pr->fis_addr_hi = val; break; -case PORT_IRQ_STAT: +case AHCI_PORT_REG_IRQ_STAT: pr->irq_stat &= ~val; ahci_check_irq(s); break; -case PORT_IRQ_MASK: +case AHCI_PORT_REG_IRQ_MASK: pr->irq_mask = val & 0xfdc000ff; ahci_check_irq(s); break; -case PORT_CMD: +case AHCI_PORT_REG_CMD: /* Block any Read-only fields from being set; * including LIST_ON and FIS_ON. * The spec requires to set ICC bits to zero after the ICC change @@ -329,26 +331,26 @@ static void ahci_port_write(AHCIState *s, int port, int offset, uint32_t val) check_cmd(s, port); break; -case PORT_TFDATA: -case PORT_SIG: -case PORT_SCR_STAT: +case AHCI_PORT_REG_TFDATA: +case AHCI_PORT_REG_SIG: +case AHCI_PORT_REG_SCR_STAT: /* Read Only */ break; -case PORT_SCR_CTL: +case AHCI_PORT_REG_SCR_CTL: if (((pr->scr_ctl & AHCI_SCR_SCTL_DET) == 1) && ((val & AHCI_SCR_SCTL_DET) == 0)) { ahci_reset_port(s, port); } pr->scr_ctl = val; break; -case PORT_SCR_ERR: +case AHCI_PORT_REG_SCR_ERR: pr->scr_err &= ~val; break; -case PORT_SCR_ACT: +case AHCI_PORT_REG_SCR_ACT: /* RW1 */ pr->scr_act |= val; break; -case PORT_CMD_ISSUE: +case AHCI_PORT_REG_CMD_ISSUE: pr->cmd_issue |= val; check_cmd(s, port); break; -- 2.14.3
[Qemu-devel] [PATCH v2 07/16] ahci: make port write traces more descriptive
Signed-off-by: John Snow --- hw/ide/ahci.c | 8 +++- hw/ide/trace-events | 3 ++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index efecf849a9..01463f0f51 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -27,6 +27,7 @@ #include "hw/pci/pci.h" #include "qemu/error-report.h" +#include "qemu/log.h" #include "sysemu/block-backend.h" #include "sysemu/dma.h" #include "hw/ide/internal.h" @@ -284,8 +285,8 @@ static void ahci_port_write(AHCIState *s, int port, int offset, uint32_t val) AHCIPortRegs *pr = &s->dev[port].port_regs; enum AHCIPortReg regnum = offset / sizeof(uint32_t); assert(regnum < (AHCI_PORT_ADDR_OFFSET_LEN / sizeof(uint32_t))); +trace_ahci_port_write(s, port, AHCIPortReg_lookup[regnum], offset, val); -trace_ahci_port_write(s, port, offset, val); switch (regnum) { case AHCI_PORT_REG_LST_ADDR: pr->lst_addr = val; @@ -355,6 +356,11 @@ static void ahci_port_write(AHCIState *s, int port, int offset, uint32_t val) check_cmd(s, port); break; default: +trace_ahci_port_write_unimpl(s, port, AHCIPortReg_lookup[regnum], + offset, val); +qemu_log_mask(LOG_UNIMP, "Attempted write to unimplemented register: " + "AHCI port %d register %s, offset 0x%x: 0x%"PRIu32, + port, AHCIPortReg_lookup[regnum], offset, val); break; } } diff --git a/hw/ide/trace-events b/hw/ide/trace-events index 0db18d8271..1efbbb8114 100644 --- a/hw/ide/trace-events +++ b/hw/ide/trace-events @@ -69,7 +69,8 @@ ahci_irq_raise(void *s) "ahci(%p): raise irq" ahci_irq_lower(void *s) "ahci(%p): lower irq" ahci_check_irq(void *s, uint32_t old, uint32_t new) "ahci(%p): check irq 0x%08x --> 0x%08x" ahci_trigger_irq(void *s, int port, const char *name, uint32_t val, uint32_t old, uint32_t new, uint32_t effective) "ahci(%p)[%d]: trigger irq +%s (0x%08x); irqstat: 0x%08x --> 0x%08x; effective: 0x%08x" -ahci_port_write(void *s, int port, int offset, uint32_t val) "ahci(%p)[%d]: port write @ 0x%x: 0x%08x" +ahci_port_write(void *s, int port, const char *reg, int offset, uint32_t val) "ahci(%p)[%d]: port write [reg:%s] @ 0x%x: 0x%08x" +ahci_port_write_unimpl(void *s, int port, const char *reg, int offset, uint32_t val) "ahci(%p)[%d]: unimplemented port write [reg:%s] @ 0x%x: 0x%08x" ahci_mem_read_32(void *s, uint64_t addr, uint32_t val) "ahci(%p): mem read @ 0x%"PRIx64": 0x%08x" ahci_mem_read(void *s, unsigned size, uint64_t addr, uint64_t val) "ahci(%p): read%u @ 0x%"PRIx64": 0x%016"PRIx64 ahci_mem_write(void *s, unsigned size, uint64_t addr, uint64_t val) "ahci(%p): write%u @ 0x%"PRIx64": 0x%016"PRIx64 -- 2.14.3
Re: [Qemu-devel] [qemu PATCH v4 0/4] support NFIT platform capabilities
On Fri, May 25, 2018 at 08:51:22PM +0300, Michael S. Tsirkin wrote: > On Mon, May 21, 2018 at 10:31:59AM -0600, Ross Zwisler wrote: > > Changes since v3: > > * Updated the text in docs/nvdimm.txt to make it clear that the value > >being passed in on the command line in an integer made up of various > >bit fields. (Rob Elliott) > > > > * Updated the "Highest Valid Capability" byte to be dynamic based on > >the highest valid bit in the user's input. (Rob Elliott) > > Igor could you review pls? I think your comments have been addressed. Ping? Igor, can you pick this up?
Re: [Qemu-devel] [Qemu-block] [PATCH] block: Ignore generated job QAPI files
On Thu, May 31, 2018 at 04:24:35PM -0500, Eric Blake wrote: > Commit bf42508f introduced new generated files; make sure they > don't get accidentally committed from an in-tree build. > > Signed-off-by: Eric Blake Reviewed-by: Jeff Cody > --- > .gitignore | 4 > 1 file changed, 4 insertions(+) > > diff --git a/.gitignore b/.gitignore > index 81e1f2fb0f1..9da3b3e6267 100644 > --- a/.gitignore > +++ b/.gitignore > @@ -36,6 +36,7 @@ > /qapi/qapi-commands-common.[ch] > /qapi/qapi-commands-crypto.[ch] > /qapi/qapi-commands-introspect.[ch] > +/qapi/qapi-commands-job.[ch] > /qapi/qapi-commands-migration.[ch] > /qapi/qapi-commands-misc.[ch] > /qapi/qapi-commands-net.[ch] > @@ -53,6 +54,7 @@ > /qapi/qapi-events-common.[ch] > /qapi/qapi-events-crypto.[ch] > /qapi/qapi-events-introspect.[ch] > +/qapi/qapi-events-job.[ch] > /qapi/qapi-events-migration.[ch] > /qapi/qapi-events-misc.[ch] > /qapi/qapi-events-net.[ch] > @@ -71,6 +73,7 @@ > /qapi/qapi-types-common.[ch] > /qapi/qapi-types-crypto.[ch] > /qapi/qapi-types-introspect.[ch] > +/qapi/qapi-types-job.[ch] > /qapi/qapi-types-migration.[ch] > /qapi/qapi-types-misc.[ch] > /qapi/qapi-types-net.[ch] > @@ -88,6 +91,7 @@ > /qapi/qapi-visit-common.[ch] > /qapi/qapi-visit-crypto.[ch] > /qapi/qapi-visit-introspect.[ch] > +/qapi/qapi-visit-job.[ch] > /qapi/qapi-visit-migration.[ch] > /qapi/qapi-visit-misc.[ch] > /qapi/qapi-visit-net.[ch] > -- > 2.14.3 > >
Re: [Qemu-devel] [PATCH 2/3] pc-bios/s390-ccw/net: Add support for pxelinux-style config files
On 05/30/2018 05:16 AM, Thomas Huth wrote: Since it is quite cumbersome to manually create a combined kernel with initrd image for network booting, we now support loading via pxelinux configuration files, too. In these files, the kernel, initrd and command line parameters can be specified seperately, and the firmware then takes care of glueing everything together in memory after the files have been downloaded. See this URL for details about the config file layout: https://www.syslinux.org/wiki/index.php?title=PXELINUX The user can either specify a config file directly as bootfile via DHCP (but in this case, the file has to start either with "default" or a "#" comment so we can distinguish it from binary kernels), or a folder (i.e. the bootfile name must end with "/") where the firmware should look for the typical pxelinux.cfg file names, e.g. based on MAC or IP address. We also support the pxelinux.cfg DHCP options 209 and 210 from RFC 5071. Signed-off-by: Thomas Huth --- pc-bios/s390-ccw/netboot.mak | 7 ++-- pc-bios/s390-ccw/netmain.c | 79 +++- 2 files changed, 82 insertions(+), 4 deletions(-) diff --git a/pc-bios/s390-ccw/netboot.mak b/pc-bios/s390-ccw/netboot.mak index a73be36..8af0cfd 100644 --- a/pc-bios/s390-ccw/netboot.mak +++ b/pc-bios/s390-ccw/netboot.mak @@ -25,8 +25,9 @@ CTYPE_OBJS = isdigit.o isxdigit.o toupper.o %.o : $(SLOF_DIR)/lib/libc/ctype/%.c $(call quiet-command,$(CC) $(LIBC_CFLAGS) -c -o $@ $<,"CC","$(TARGET_DIR)$@") -STRING_OBJS = strcat.o strchr.o strcmp.o strcpy.o strlen.o strncmp.o strncpy.o \ - strstr.o memset.o memcpy.o memmove.o memcmp.o +STRING_OBJS = strcat.o strchr.o strrchr.o strcpy.o strlen.o strncpy.o \ + strcmp.o strncmp.o strcasecmp.o strncasecmp.o strstr.o \ + memset.o memcpy.o memmove.o memcmp.o %.o : $(SLOF_DIR)/lib/libc/string/%.c $(call quiet-command,$(CC) $(LIBC_CFLAGS) -c -o $@ $<,"CC","$(TARGET_DIR)$@") @@ -50,7 +51,7 @@ libc.a: $(LIBCOBJS) # libnet files: LIBNETOBJS := args.o dhcp.o dns.o icmpv6.o ipv6.o tcp.o udp.o bootp.o \ - dhcpv6.o ethernet.o ipv4.o ndp.o tftp.o + dhcpv6.o ethernet.o ipv4.o ndp.o tftp.o pxelinux.o LIBNETCFLAGS := $(QEMU_CFLAGS) -DDHCPARCH=0x1F $(LIBC_INC) $(LIBNET_INC) %.o : $(SLOF_DIR)/lib/libnet/%.c diff --git a/pc-bios/s390-ccw/netmain.c b/pc-bios/s390-ccw/netmain.c index 7533cf7..e84bb2b 100644 --- a/pc-bios/s390-ccw/netmain.c +++ b/pc-bios/s390-ccw/netmain.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "s390-ccw.h" #include "virtio.h" @@ -41,12 +42,14 @@ extern char _start[]; #define KERNEL_ADDR ((void *)0L) #define KERNEL_MAX_SIZE ((long)_start) +#define ARCH_COMMAND_LINE_SIZE 896 /* Taken from Linux kernel */ char stack[PAGE_SIZE * 8] __attribute__((aligned(PAGE_SIZE))); IplParameterBlock iplb __attribute__((aligned(PAGE_SIZE))); static char cfgbuf[2048]; static SubChannelId net_schid = { .one = 1 }; +static uint8_t mac[6]; static uint64_t dest_timer; static uint64_t get_timer_ms(void) @@ -158,7 +161,6 @@ static int tftp_load(filename_ip_t *fnip, void *buffer, int len) static int net_init(filename_ip_t *fn_ip) { -uint8_t mac[6]; int rc; memset(fn_ip, 0, sizeof(filename_ip_t)); @@ -233,6 +235,66 @@ static void net_release(filename_ip_t *fn_ip) } /** + * Load a kernel with initrd (i.e. with the information that we've got from + * a pxelinux.cfg config file) + */ +static int load_kernel_with_initrd(filename_ip_t *fn_ip, + struct pl_cfg_entry *entry) +{ +int rc; + +printf("Loading pxelinux.cfg entry '%s'\n", entry->label); + +if (!entry->kernel) { +printf("Kernel entry is missing!\n"); +return -1; +} + +strncpy(fn_ip->filename, entry->kernel, sizeof(fn_ip->filename)); +rc = tftp_load(fn_ip, KERNEL_ADDR, KERNEL_MAX_SIZE); +if (rc < 0) { +return rc; +} + +if (entry->initrd) { +uint64_t iaddr = (rc + 0xfff) & ~0xfffUL; + +strncpy(fn_ip->filename, entry->initrd, sizeof(fn_ip->filename)); +rc = tftp_load(fn_ip, (void *)iaddr, KERNEL_MAX_SIZE - iaddr); +if (rc < 0) { +return rc; +} +/* Patch location and size: */ +*(uint64_t *)0x10408 = iaddr; +*(uint64_t *)0x10410 = rc; +rc += iaddr; +} + +if (entry->append) { +strncpy((char *)0x10480, entry->append, ARCH_COMMAND_LINE_SIZE); +} + +return rc; +} + +#define MAX_PXELINUX_ENTRIES 16 + +static int net_try_pxelinux_cfg(filename_ip_t *fn_ip) +{ +struct pl_cfg_entry entries[MAX_PXELINUX_ENTRIES]; +int num_ent, def_ent = 0; + +num_ent = pxelinux_load_parse_cfg(fn_ip, mac, NULL, DEFAULT_TFTP_RETRIES, + cfgbuf, sizeof(cfgbuf), +
[Qemu-devel] [PATCH] block: Ignore generated job QAPI files
Commit bf42508f introduced new generated files; make sure they don't get accidentally committed from an in-tree build. Signed-off-by: Eric Blake --- .gitignore | 4 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 81e1f2fb0f1..9da3b3e6267 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,7 @@ /qapi/qapi-commands-common.[ch] /qapi/qapi-commands-crypto.[ch] /qapi/qapi-commands-introspect.[ch] +/qapi/qapi-commands-job.[ch] /qapi/qapi-commands-migration.[ch] /qapi/qapi-commands-misc.[ch] /qapi/qapi-commands-net.[ch] @@ -53,6 +54,7 @@ /qapi/qapi-events-common.[ch] /qapi/qapi-events-crypto.[ch] /qapi/qapi-events-introspect.[ch] +/qapi/qapi-events-job.[ch] /qapi/qapi-events-migration.[ch] /qapi/qapi-events-misc.[ch] /qapi/qapi-events-net.[ch] @@ -71,6 +73,7 @@ /qapi/qapi-types-common.[ch] /qapi/qapi-types-crypto.[ch] /qapi/qapi-types-introspect.[ch] +/qapi/qapi-types-job.[ch] /qapi/qapi-types-migration.[ch] /qapi/qapi-types-misc.[ch] /qapi/qapi-types-net.[ch] @@ -88,6 +91,7 @@ /qapi/qapi-visit-common.[ch] /qapi/qapi-visit-crypto.[ch] /qapi/qapi-visit-introspect.[ch] +/qapi/qapi-visit-job.[ch] /qapi/qapi-visit-migration.[ch] /qapi/qapi-visit-misc.[ch] /qapi/qapi-visit-net.[ch] -- 2.14.3
[Qemu-devel] [PATCH 1/5] block: Add blklogwrites
From: Aapo Vienamo Implements a block device write logging system, similar to Linux kernel device mapper dm-log-writes. The write operations that are performed on a block device are logged to a file or another block device. The write log format is identical to the dm-log-writes format. Currently, log markers are not supported. This functionality can be used for fail-safe and fs consistency testing. By implementing it in qemu, tests utilizing write logs can be be used to test non-Linux drivers and older kernels. The implementation is based on the blkverify and blkdebug block drivers. Signed-off-by: Aapo Vienamo Signed-off-by: Ari Sundholm --- block.c | 6 - block/Makefile.objs | 1 + block/blklogwrites.c | 441 ++ include/block/block.h | 7 + 4 files changed, 449 insertions(+), 6 deletions(-) create mode 100644 block/blklogwrites.c diff --git a/block.c b/block.c index 501b64c..c8cffe1 100644 --- a/block.c +++ b/block.c @@ -1914,12 +1914,6 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, return 0; } -#define DEFAULT_PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \ - | BLK_PERM_WRITE \ - | BLK_PERM_WRITE_UNCHANGED \ - | BLK_PERM_RESIZE) -#define DEFAULT_PERM_UNCHANGED (BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH) - void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, BlockReopenQueue *reopen_queue, diff --git a/block/Makefile.objs b/block/Makefile.objs index 899bfb5..c8337bf 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -5,6 +5,7 @@ block-obj-y += qed-check.o block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o block-obj-y += quorum.o block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o +block-obj-y += blklogwrites.o block-obj-y += block-backend.o snapshot.o qapi.o block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o block-obj-$(CONFIG_POSIX) += file-posix.o diff --git a/block/blklogwrites.c b/block/blklogwrites.c new file mode 100644 index 000..78835bf --- /dev/null +++ b/block/blklogwrites.c @@ -0,0 +1,441 @@ +/* + * Write logging blk driver based on blkverify and blkdebug. + * + * Copyright (c) 2017 Tuomas Tynkkynen + * Copyright (c) 2018 Aapo Vienamo + * Copyright (c) 2018 Ari Sundholm + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/sockets.h" /* for EINPROGRESS on Windows */ +#include "block/block_int.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" +#include "qemu/cutils.h" +#include "qemu/option.h" + +/* Disk format stuff - taken from Linux drivers/md/dm-log-writes.c */ + +#define LOG_FLUSH_FLAG (1 << 0) +#define LOG_FUA_FLAG (1 << 1) +#define LOG_DISCARD_FLAG (1 << 2) +#define LOG_MARK_FLAG (1 << 3) + +#define WRITE_LOG_VERSION 1ULL +#define WRITE_LOG_MAGIC 0x6a736677736872ULL + +/* All fields are little-endian. */ +struct log_write_super { +uint64_t magic; +uint64_t version; +uint64_t nr_entries; +uint32_t sectorsize; +}; + +struct log_write_entry { +uint64_t sector; +uint64_t nr_sectors; +uint64_t flags; +uint64_t data_len; +}; + +/* End of disk format structures. */ + +typedef struct { +BdrvChild *log_file; +uint64_t cur_log_sector; +uint64_t nr_entries; +} BDRVBlkLogWritesState; + +/* Valid blk_log_writes filenames look like: + * blk_log_writes:path/to/raw_image:path/to/logfile */ +static void blk_log_writes_parse_filename(const char *filename, QDict *options, + Error **errp) +{ +const char *c; +QString *raw_path; + +/* Parse the blk_log_writes: prefix */ +if (!strstart(filename, "blk_log_writes:", &filename)) { +/* There was no prefix; therefore, all options have to be already + * present in the QDict (except for the filename) */ +qdict_put(options, "x-log", qstring_from_str(filename)); +return; +} + +/* Parse the raw image filename */ +c = strchr(filename, ':'); +if (c == NULL) { +error_setg(errp, + "blk_log_writes requires paths to both image and log"); +return; +} + +raw_path = qstring_from_substr(filename, 0, c - filename - 1); +qdict_put(options, "x-raw", raw_path); + +filename = c + 1; +qdict_put(options, "x-log", qstring_from_str(filename)); +} + +static QemuOptsList runtime_opts = { +.name = "blk_log_writes", +.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), +.desc = { +{ +.name = "x-raw", +.type = QEMU_OPT_STRING, +.help = "[internal use only, will be removed]", +}, +{ +.name = "x-log", +