[Qemu-devel] [PATCH kernel v4 7/7] virtio-balloon: tell host vm's unused page info
Support the request for vm's unused page information, response with a page bitmap. QEMU can make use of this bitmap and the dirty page logging mechanism to skip the transportation of some of these unused pages, this is very helpful to reduce the network traffic and speed up the live migration process. Signed-off-by: Liang Li Cc: Michael S. Tsirkin Cc: Paolo Bonzini Cc: Cornelia Huck Cc: Amit Shah Cc: Dave Hansen --- drivers/virtio/virtio_balloon.c | 128 +--- 1 file changed, 121 insertions(+), 7 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index c6c94b6..ba2d37b 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -56,7 +56,7 @@ struct virtio_balloon { struct virtio_device *vdev; - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *req_vq; /* The balloon servicing is delegated to a freezable workqueue. */ struct work_struct update_balloon_stats_work; @@ -83,6 +83,8 @@ struct virtio_balloon { unsigned int nr_page_bmap; /* Used to record the processed pfn range */ unsigned long min_pfn, max_pfn, start_pfn, end_pfn; + /* Request header */ + struct virtio_balloon_req_hdr req_hdr; /* * The pages we've told the Host we're not using are enqueued * at vb_dev_info->pages list. @@ -552,6 +554,63 @@ static void update_balloon_stats(struct virtio_balloon *vb) pages_to_bytes(available)); } +static void send_unused_pages_info(struct virtio_balloon *vb, + unsigned long req_id) +{ + struct scatterlist sg_in; + unsigned long pfn = 0, bmap_len, pfn_limit, last_pfn, nr_pfn; + struct virtqueue *vq = vb->req_vq; + struct virtio_balloon_resp_hdr *hdr = vb->resp_hdr; + int ret = 1, used_nr_bmap = 0, i; + + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP) && + vb->nr_page_bmap == 1) + extend_page_bitmap(vb); + + pfn_limit = PFNS_PER_BMAP * vb->nr_page_bmap; + mutex_lock(&vb->balloon_lock); + last_pfn = get_max_pfn(); + + while (ret) { + clear_page_bitmap(vb); + ret = get_unused_pages(pfn, pfn + pfn_limit, vb->page_bitmap, +PFNS_PER_BMAP, vb->nr_page_bmap); + if (ret < 0) + break; + hdr->cmd = BALLOON_GET_UNUSED_PAGES; + hdr->id = req_id; + bmap_len = BALLOON_BMAP_SIZE * vb->nr_page_bmap; + + if (!ret) { + hdr->flag = BALLOON_FLAG_DONE; + nr_pfn = last_pfn - pfn; + used_nr_bmap = nr_pfn / PFNS_PER_BMAP; + if (nr_pfn % PFNS_PER_BMAP) + used_nr_bmap++; + bmap_len = nr_pfn / BITS_PER_BYTE; + } else { + hdr->flag = BALLOON_FLAG_CONT; + used_nr_bmap = vb->nr_page_bmap; + } + for (i = 0; i < used_nr_bmap; i++) { + unsigned int bmap_size = BALLOON_BMAP_SIZE; + + if (i + 1 == used_nr_bmap) + bmap_size = bmap_len - BALLOON_BMAP_SIZE * i; + set_bulk_pages(vb, vq, pfn + i * PFNS_PER_BMAP, +vb->page_bitmap[i], bmap_size, true); + } + if (vb->resp_pos > 0) + send_resp_data(vb, vq, true); + pfn += pfn_limit; + } + + mutex_unlock(&vb->balloon_lock); + sg_init_one(&sg_in, &vb->req_hdr, sizeof(vb->req_hdr)); + virtqueue_add_inbuf(vq, &sg_in, 1, &vb->req_hdr, GFP_KERNEL); + virtqueue_kick(vq); +} + /* * While most virtqueues communicate guest-initiated requests to the hypervisor, * the stats queue operates in reverse. The driver initializes the virtqueue @@ -686,18 +745,56 @@ static void update_balloon_size_func(struct work_struct *work) queue_work(system_freezable_wq, work); } +static void misc_handle_rq(struct virtio_balloon *vb) +{ + struct virtio_balloon_req_hdr *ptr_hdr; + unsigned int len; + + ptr_hdr = virtqueue_get_buf(vb->req_vq, &len); + if (!ptr_hdr || len != sizeof(vb->req_hdr)) + return; + + switch (ptr_hdr->cmd) { + case BALLOON_GET_UNUSED_PAGES: + send_unused_pages_info(vb, ptr_hdr->param); + break; + default: + break; + } +} + +static void misc_request(struct virtqueue *vq) +{ + struct virtio_balloon *vb = vq->vdev->priv; + + misc_handle_rq(vb); +} + static int init_vqs(struct virtio_balloon *vb) { - struct virtqueue *vqs[3]; - vq_callback_t *callback
[Qemu-devel] [PATCH kernel v4 6/7] virtio-balloon: define flags and head for host request vq
Define the flags and head struct for a new host request virtual queue. Guest can get requests from host and then responds to them on this new virtual queue. Host can make use of this virtual queue to request the guest do some operations, e.g. drop page cache, synchronize file system, etc. And the hypervisor can get some of guest's runtime information through this virtual queue too, e.g. the guest's unused page information, which can be used for live migration optimization. Signed-off-by: Liang Li Cc: Andrew Morton Cc: Mel Gorman Cc: Michael S. Tsirkin Cc: Paolo Bonzini Cc: Cornelia Huck Cc: Amit Shah Cc: Dave Hansen --- include/uapi/linux/virtio_balloon.h | 22 ++ 1 file changed, 22 insertions(+) diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index bed6f41..c4e34d0 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -35,6 +35,7 @@ #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM2 /* Deflate balloon on OOM */ #define VIRTIO_BALLOON_F_PAGE_BITMAP 3 /* Send page info with bitmap */ +#define VIRTIO_BALLOON_F_HOST_REQ_VQ 4 /* Host request virtqueue */ /* Size of a PFN in the balloon interface. */ #define VIRTIO_BALLOON_PFN_SHIFT 12 @@ -101,4 +102,25 @@ struct virtio_balloon_bmap_hdr { __le64 bmap[0]; }; +enum virtio_balloon_req_id { + /* Get unused page information */ + BALLOON_GET_UNUSED_PAGES, +}; + +enum virtio_balloon_flag { + /* Have more data for a request */ + BALLOON_FLAG_CONT, + /* No more data for a request */ + BALLOON_FLAG_DONE, +}; + +struct virtio_balloon_req_hdr { + /* Used to distinguish different requests */ + __le16 cmd; + /* Reserved */ + __le16 reserved[3]; + /* Request parameter */ + __le64 param; +}; + #endif /* _LINUX_VIRTIO_BALLOON_H */ -- 1.8.3.1
[Qemu-devel] [PATCH kernel v4 5/7] mm: add the related functions to get unused page
Save the unused page info into a split page bitmap. The virtio balloon driver will use this new API to get the unused page bitmap and send the bitmap to hypervisor(QEMU) to speed up live migration. During sending the bitmap, some the pages may be modified and are no free anymore, this inaccuracy can be corrected by the dirty page logging mechanism. Signed-off-by: Liang Li Cc: Andrew Morton Cc: Mel Gorman Cc: Michael S. Tsirkin Cc: Paolo Bonzini Cc: Cornelia Huck Cc: Amit Shah Cc: Dave Hansen --- include/linux/mm.h | 2 ++ mm/page_alloc.c| 85 ++ 2 files changed, 87 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index f47862a..7014d8a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1773,6 +1773,8 @@ extern void free_area_init_node(int nid, unsigned long * zones_size, unsigned long zone_start_pfn, unsigned long *zholes_size); extern void free_initmem(void); extern unsigned long get_max_pfn(void); +extern int get_unused_pages(unsigned long start_pfn, unsigned long end_pfn, + unsigned long *bitmap[], unsigned long len, unsigned int nr_bmap); /* * Free reserved pages within range [PAGE_ALIGN(start), end & PAGE_MASK) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 12cc8ed..72537cc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4438,6 +4438,91 @@ unsigned long get_max_pfn(void) } EXPORT_SYMBOL(get_max_pfn); +static void mark_unused_pages_bitmap(struct zone *zone, + unsigned long start_pfn, unsigned long end_pfn, + unsigned long *bitmap[], unsigned long bits, + unsigned int nr_bmap) +{ + unsigned long pfn, flags, nr_pg, pos, *bmap; + unsigned int order, i, t, bmap_idx; + struct list_head *curr; + + if (zone_is_empty(zone)) + return; + + end_pfn = min(start_pfn + nr_bmap * bits, end_pfn); + spin_lock_irqsave(&zone->lock, flags); + + for_each_migratetype_order(order, t) { + list_for_each(curr, &zone->free_area[order].free_list[t]) { + pfn = page_to_pfn(list_entry(curr, struct page, lru)); + if (pfn < start_pfn || pfn >= end_pfn) + continue; + nr_pg = 1UL << order; + if (pfn + nr_pg > end_pfn) + nr_pg = end_pfn - pfn; + bmap_idx = (pfn - start_pfn) / bits; + if (bmap_idx == (pfn + nr_pg - start_pfn) / bits) { + bmap = bitmap[bmap_idx]; + pos = (pfn - start_pfn) % bits; + bitmap_set(bmap, pos, nr_pg); + } else + for (i = 0; i < nr_pg; i++) { + pos = pfn - start_pfn + i; + bmap_idx = pos / bits; + bmap = bitmap[bmap_idx]; + pos = pos % bits; + bitmap_set(bmap, pos, 1); + } + } + } + + spin_unlock_irqrestore(&zone->lock, flags); +} + +/* + * During live migration, page is always discardable unless it's + * content is needed by the system. + * get_unused_pages provides an API to get the unused pages, these + * unused pages can be discarded if there is no modification since + * the request. Some other mechanism, like the dirty page logging + * can be used to track the modification. + * + * This function scans the free page list to get the unused pages + * whose pfn are range from start_pfn to end_pfn, and set the + * corresponding bit in the bitmap if an unused page is found. + * + * Allocating a large bitmap may fail because of fragmentation, + * instead of using a single bitmap, we use a scatter/gather bitmap. + * The 'bitmap' is the start address of an array which contains + * 'nr_bmap' separate small bitmaps, each bitmap contains 'bits' bits. + * + * return -1 if parameters are invalid + * return 0 when end_pfn >= max_pfn + * return 1 when end_pfn < max_pfn + */ +int get_unused_pages(unsigned long start_pfn, unsigned long end_pfn, + unsigned long *bitmap[], unsigned long bits, unsigned int nr_bmap) +{ + struct zone *zone; + int ret = 0; + + if (bitmap == NULL || *bitmap == NULL || nr_bmap == 0 || +bits == 0 || start_pfn > end_pfn) + return -1; + if (end_pfn < max_pfn) + ret = 1; + if (end_pfn >= max_pfn) + ret = 0; + + for_each_populated_zone(zone) + mark_unused_pages_bitmap(zone, start_pfn, end_pfn, bitmap, +bits, nr_bmap); + + return ret; +} +EXPORT_SYMBOL(get_unused_pages); + static void zoneref_set_zone(struct zone *zone, struct zon
[Qemu-devel] [PATCH kernel v4 4/7] virtio-balloon: speed up inflate/deflate process
The implementation of the current virtio-balloon is not very efficient, the time spends on different stages of inflating the balloon to 7GB of a 8GB idle guest: a. allocating pages (6.5%) b. sending PFNs to host (68.3%) c. address translation (6.1%) d. madvise (19%) It takes about 4126ms for the inflating process to complete. Debugging shows that the bottle neck are the stage b and stage d. If using a bitmap to send the page info instead of the PFNs, we can reduce the overhead in stage b quite a lot. Furthermore, we can do the address translation and call madvise() with a bulk of RAM pages, instead of the current page per page way, the overhead of stage c and stage d can also be reduced a lot. This patch is the kernel side implementation which is intended to speed up the inflating & deflating process by adding a new feature to the virtio-balloon device. With this new feature, inflating the balloon to 7GB of a 8GB idle guest only takes 590ms, the performance improvement is about 85%. TODO: optimize stage a by allocating/freeing a chunk of pages instead of a single page at a time. Signed-off-by: Liang Li Suggested-by: Michael S. Tsirkin Cc: Michael S. Tsirkin Cc: Paolo Bonzini Cc: Cornelia Huck Cc: Amit Shah Cc: Dave Hansen --- drivers/virtio/virtio_balloon.c | 398 +--- 1 file changed, 369 insertions(+), 29 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 59ffe5a..c6c94b6 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -42,6 +42,10 @@ #define OOM_VBALLOON_DEFAULT_PAGES 256 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80 +#define BALLOON_BMAP_SIZE (8 * PAGE_SIZE) +#define PFNS_PER_BMAP (BALLOON_BMAP_SIZE * BITS_PER_BYTE) +#define BALLOON_BMAP_COUNT 32 + static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES; module_param(oom_pages, int, S_IRUSR | S_IWUSR); MODULE_PARM_DESC(oom_pages, "pages to free on OOM"); @@ -67,6 +71,18 @@ struct virtio_balloon { /* Number of balloon pages we've told the Host we're not using. */ unsigned int num_pages; + /* Pointer to the response header. */ + void *resp_hdr; + /* Pointer to the start address of response data. */ + unsigned long *resp_data; + /* Pointer offset of the response data. */ + unsigned long resp_pos; + /* Bitmap and bitmap count used to tell the host the pages */ + unsigned long *page_bitmap[BALLOON_BMAP_COUNT]; + /* Number of split page bitmaps */ + unsigned int nr_page_bmap; + /* Used to record the processed pfn range */ + unsigned long min_pfn, max_pfn, start_pfn, end_pfn; /* * The pages we've told the Host we're not using are enqueued * at vb_dev_info->pages list. @@ -110,20 +126,227 @@ static void balloon_ack(struct virtqueue *vq) wake_up(&vb->acked); } -static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq) +static inline void init_bmap_pfn_range(struct virtio_balloon *vb) { - struct scatterlist sg; + vb->min_pfn = ULONG_MAX; + vb->max_pfn = 0; +} + +static inline void update_bmap_pfn_range(struct virtio_balloon *vb, +struct page *page) +{ + unsigned long balloon_pfn = page_to_balloon_pfn(page); + + vb->min_pfn = min(balloon_pfn, vb->min_pfn); + vb->max_pfn = max(balloon_pfn, vb->max_pfn); +} + +static void extend_page_bitmap(struct virtio_balloon *vb) +{ + int i, bmap_count; + unsigned long bmap_len; + + bmap_len = ALIGN(get_max_pfn(), BITS_PER_LONG) / BITS_PER_BYTE; + bmap_len = ALIGN(bmap_len, BALLOON_BMAP_SIZE); + bmap_count = min((int)(bmap_len / BALLOON_BMAP_SIZE), +BALLOON_BMAP_COUNT); + + for (i = 1; i < bmap_count; i++) { + vb->page_bitmap[i] = kmalloc(BALLOON_BMAP_SIZE, GFP_KERNEL); + if (vb->page_bitmap[i]) + vb->nr_page_bmap++; + else + break; + } +} + +static void free_extended_page_bitmap(struct virtio_balloon *vb) +{ + int i, bmap_count = vb->nr_page_bmap; + + + for (i = 1; i < bmap_count; i++) { + kfree(vb->page_bitmap[i]); + vb->page_bitmap[i] = NULL; + vb->nr_page_bmap--; + } +} + +static void kfree_page_bitmap(struct virtio_balloon *vb) +{ + int i; + + for (i = 0; i < vb->nr_page_bmap; i++) + kfree(vb->page_bitmap[i]); +} + +static void clear_page_bitmap(struct virtio_balloon *vb) +{ + int i; + + for (i = 0; i < vb->nr_page_bmap; i++) + memset(vb->page_bitmap[i], 0, BALLOON_BMAP_SIZE); +} + +static unsigned long do_set_resp_bitmap(struct virtio_balloon *vb, + unsigned long *bitmap, unsigned long base_pfn, + unsigned long pos, int nr_page) + +{ + struct virtio_balloon_bmap_hdr *hdr; + uns
[Qemu-devel] [PATCH kernel v4 3/7] mm: add a function to get the max pfn
Expose the function to get the max pfn, so it can be used in the virtio-balloon device driver. Simply include the 'linux/bootmem.h' is not enough, if the device driver is built to a module, directly refer the max_pfn lead to build failed. Signed-off-by: Liang Li Cc: Andrew Morton Cc: Mel Gorman Cc: Michael S. Tsirkin Cc: Paolo Bonzini Cc: Cornelia Huck Cc: Amit Shah Cc: Dave Hansen --- include/linux/mm.h | 1 + mm/page_alloc.c| 10 ++ 2 files changed, 11 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index a92c8d7..f47862a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1772,6 +1772,7 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) extern void free_area_init_node(int nid, unsigned long * zones_size, unsigned long zone_start_pfn, unsigned long *zholes_size); extern void free_initmem(void); +extern unsigned long get_max_pfn(void); /* * Free reserved pages within range [PAGE_ALIGN(start), end & PAGE_MASK) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8fd42aa..12cc8ed 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4428,6 +4428,16 @@ void show_free_areas(unsigned int filter) show_swap_cache_info(); } +/* + * The max_pfn can change because of memory hot plug, so it's only good + * as a hint. e.g. for sizing data structures. + */ +unsigned long get_max_pfn(void) +{ + return max_pfn; +} +EXPORT_SYMBOL(get_max_pfn); + static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref) { zoneref->zone = zone; -- 1.8.3.1
[Qemu-devel] [PATCH kernel v4 2/7] virtio-balloon: define new feature bit and head struct
Add a new feature which supports sending the page information with a bitmap. The current implementation uses PFNs array, which is not very efficient. Using bitmap can improve the performance of inflating/deflating significantly The page bitmap header will used to tell the host some information about the page bitmap. e.g. the page size, page bitmap length and start pfn. Signed-off-by: Liang Li Cc: Michael S. Tsirkin Cc: Paolo Bonzini Cc: Cornelia Huck Cc: Amit Shah Cc: Dave Hansen --- include/uapi/linux/virtio_balloon.h | 19 +++ 1 file changed, 19 insertions(+) diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index 343d7dd..bed6f41 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -34,6 +34,7 @@ #define VIRTIO_BALLOON_F_MUST_TELL_HOST0 /* Tell before reclaiming pages */ #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM2 /* Deflate balloon on OOM */ +#define VIRTIO_BALLOON_F_PAGE_BITMAP 3 /* Send page info with bitmap */ /* Size of a PFN in the balloon interface. */ #define VIRTIO_BALLOON_PFN_SHIFT 12 @@ -82,4 +83,22 @@ struct virtio_balloon_stat { __virtio64 val; } __attribute__((packed)); +/* Response header structure */ +struct virtio_balloon_resp_hdr { + __le64 cmd : 8; /* Distinguish different requests type */ + __le64 flag: 8; /* Mark status for a specific request type */ + __le64 id : 16; /* Distinguish requests of a specific type */ + __le64 data_len: 32; /* Length of the following data, in bytes */ +}; + +/* Page bitmap header structure */ +struct virtio_balloon_bmap_hdr { + struct { + __le64 start_pfn : 52; /* start pfn for the bitmap */ + __le64 page_shift : 6; /* page shift width, in bytes */ + __le64 bmap_len : 6; /* bitmap length, in bytes */ + } head; + __le64 bmap[0]; +}; + #endif /* _LINUX_VIRTIO_BALLOON_H */ -- 1.8.3.1
[Qemu-devel] [PATCH kernel v4 0/7] Extend virtio-balloon for fast (de)inflating & fast live migration
This patch set contains two parts of changes to the virtio-balloon. One is the change for speeding up the inflating & deflating process, the main idea of this optimization is to use bitmap to send the page information to host instead of the PFNs, to reduce the overhead of virtio data transmission, address translation and madvise(). This can help to improve the performance by about 85%. Another change is for speeding up live migration. By skipping process guest's unused pages in the first round of data copy, to reduce needless data processing, this can help to save quite a lot of CPU cycles and network bandwidth. We put guest's unused page information in a bitmap and send it to host with the virt queue of virtio-balloon. For an idle guest with 8GB RAM, this can help to shorten the total live migration time from 2Sec to about 500ms in 10Gbps network environment. Changes from v3 to v4: * Use the new scheme suggested by Dave Hansen to encode the bitmap. * Add code which is missed in v3 to handle migrate page. * Free the memory for bitmap intime once the operation is done. * Address some of the comments in v3. Changes from v2 to v3: * Change the name of 'free page' to 'unused page'. * Use the scatter & gather bitmap instead of a 1MB page bitmap. * Fix overwriting the page bitmap after kicking. * Some of MST's comments for v2. Changes from v1 to v2: * Abandon the patch for dropping page cache. * Put some structures to uapi head file. * Use a new way to determine the page bitmap size. * Use a unified way to send the free page information with the bitmap * Address the issues referred in MST's comments Liang Li (7): virtio-balloon: rework deflate to add page to a list virtio-balloon: define new feature bit and head struct mm: add a function to get the max pfn virtio-balloon: speed up inflate/deflate process mm: add the related functions to get unused page virtio-balloon: define flags and head for host request vq virtio-balloon: tell host vm's unused page info drivers/virtio/virtio_balloon.c | 546 include/linux/mm.h | 3 + include/uapi/linux/virtio_balloon.h | 41 +++ mm/page_alloc.c | 95 +++ 4 files changed, 636 insertions(+), 49 deletions(-) -- 1.8.3.1
[Qemu-devel] [PATCH kernel v4 1/7] virtio-balloon: rework deflate to add page to a list
When doing the inflating/deflating operation, the current virtio-balloon implementation uses an array to save 256 PFNS, then send these PFNS to host through virtio and process each PFN one by one. This way is not efficient when inflating/deflating a large mount of memory because too many times of the following operations: 1. Virtio data transmission 2. Page allocate/free 3. Address translation(GPA->HVA) 4. madvise The over head of these operations will consume a lot of CPU cycles and will take a long time to complete, it may impact the QoS of the guest as well as the host. The overhead will be reduced a lot if batch processing is used. E.g. If there are several pages whose address are physical contiguous in the guest, these bulk pages can be processed in one operation. The main idea for the optimization is to reduce the above operations as much as possible. And it can be achieved by using a bitmap instead of an PFN array. Comparing with PFN array, for a specific size buffer, bitmap can present more pages, which is very important for batch processing. Using bitmap instead of PFN is not very helpful when inflating/deflating a small mount of pages, in this case, using PFNs is better. But using bitmap will not impact the QoS of guest or host heavily because the operation will be completed very soon for a small mount of pages, and we will use some methods to make sure the efficiency not drop too much. This patch saves the deflated pages to a list instead of the PFN array, which will allow faster notifications using a bitmap down the road. balloon_pfn_to_page() can be removed because it's useless. Signed-off-by: Liang Li Signed-off-by: Michael S. Tsirkin Cc: Paolo Bonzini Cc: Cornelia Huck Cc: Amit Shah Cc: Dave Hansen --- drivers/virtio/virtio_balloon.c | 22 -- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 4e7003d..59ffe5a 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -103,12 +103,6 @@ static u32 page_to_balloon_pfn(struct page *page) return pfn * VIRTIO_BALLOON_PAGES_PER_PAGE; } -static struct page *balloon_pfn_to_page(u32 pfn) -{ - BUG_ON(pfn % VIRTIO_BALLOON_PAGES_PER_PAGE); - return pfn_to_page(pfn / VIRTIO_BALLOON_PAGES_PER_PAGE); -} - static void balloon_ack(struct virtqueue *vq) { struct virtio_balloon *vb = vq->vdev->priv; @@ -181,18 +175,16 @@ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) return num_allocated_pages; } -static void release_pages_balloon(struct virtio_balloon *vb) +static void release_pages_balloon(struct virtio_balloon *vb, +struct list_head *pages) { - unsigned int i; - struct page *page; + struct page *page, *next; - /* Find pfns pointing at start of each page, get pages and free them. */ - for (i = 0; i < vb->num_pfns; i += VIRTIO_BALLOON_PAGES_PER_PAGE) { - page = balloon_pfn_to_page(virtio32_to_cpu(vb->vdev, - vb->pfns[i])); + list_for_each_entry_safe(page, next, pages, lru) { if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) adjust_managed_page_count(page, 1); + list_del(&page->lru); put_page(page); /* balloon reference */ } } @@ -202,6 +194,7 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) unsigned num_freed_pages; struct page *page; struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info; + LIST_HEAD(pages); /* We can only do one array worth at a time. */ num = min(num, ARRAY_SIZE(vb->pfns)); @@ -215,6 +208,7 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) if (!page) break; set_page_pfns(vb, vb->pfns + vb->num_pfns, page); + list_add(&page->lru, &pages); vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE; } @@ -226,7 +220,7 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) */ if (vb->num_pfns != 0) tell_host(vb, vb->deflate_vq); - release_pages_balloon(vb); + release_pages_balloon(vb, &pages); mutex_unlock(&vb->balloon_lock); return num_freed_pages; } -- 1.8.3.1
Re: [Qemu-devel] [PATCH v1] vhost-pci-net: design vhost-pci-net for the transmission of network packets between VMs
On Monday, October 24, 2016 3:09 PM, Wei Wang wrote: > To: virtio-comm...@lists.oasis-open.org; qemu-devel@nongnu.org; > m...@redhat.com; marcandre.lur...@gmail.com; stefa...@redhat.com; > pbonz...@redhat.com > Cc: Wang, Wei W > Subject: [PATCH v1] vhost-pci-net: design vhost-pci-net for the transmission > of > network packets between VMs > +\subsection{Device configuration layout}\label{sec:Device Types / > +Vhost-pci Device / Device configuration layout} > + None currently defined. I thought about it more - I think it would be better to define a device specific config field, "rxq_num " , so that the driver can allocate and initialize the num of rx virtqs in the initialization (i.e. the probe() ) when the controlq pair hasn't been ready to be used for the iteration between the device and driver. Best, Wei
Re: [Qemu-devel] [PULL 00/47] virtio, pc: fixes and features
On Tue, Nov 01, 2016 at 03:22:01PM +, Peter Maydell wrote: > On 30 October 2016 at 21:23, Michael S. Tsirkin wrote: > > The following changes since commit 5b2ecabaeabc17f032197246c4846b9ba95ba8a6: > > > > Merge remote-tracking branch 'remotes/kraxel/tags/pull-ui-20161028-1' > > into staging (2016-10-28 17:59:04 +0100) > > > > are available in the git repository at: > > > > git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream > > > > for you to fetch changes up to f082ec0225bd15c71e0b4697d2df3af7bad65d7f: > > > > acpi: fix assert failure caused by commit 35c5a52d (2016-10-30 20:06:25 > > +0200) > > > > > > virtio, pc: fixes and features > > > > nvdimm hotplug support > > virtio migration and ioeventfd rework > > virtio crypto device > > ipmi fixes > > > > Signed-off-by: Michael S. Tsirkin > > > > Hi; this fails to build on OSX with format string issues: > > /Users/pm215/src/qemu-for-merges/hw/virtio/virtio-crypto.c:770:20: > error: format specifies type 'unsign > ed short' but the argument has type 'uint32_t' (aka 'unsigned int') > [-Werror,-Wformat] >vcrypto->max_queues, VIRTIO_QUEUE_MAX); > ~~~^ > /Users/pm215/src/qemu-for-merges/include/qapi/error.h:163:35: note: > expanded from macro 'error_setg' > (fmt), ## __VA_ARGS__) > ^ > > Fun fact: in struct vhost_dev, max_queues is a uint64_t; > in struct VirtIONet it is a uint16_t; and in VirtIOCrypto > it is a uint32_t... > > thanks > -- PMM Just to make sure : I fixed that and pushed to same tag. I don't think it makes sense to repost the pull request - pls take it from git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream -- MST
Re: [Qemu-devel] [PATCH v9 04/12] vfio iommu: Add support for mediated devices
On 02/11/16 14:29, Kirti Wankhede wrote: > > > On 11/2/2016 6:54 AM, Alexey Kardashevskiy wrote: >> On 02/11/16 01:01, Kirti Wankhede wrote: >>> >>> >>> On 10/28/2016 7:48 AM, Alexey Kardashevskiy wrote: On 27/10/16 23:31, Kirti Wankhede wrote: > > > On 10/27/2016 12:50 PM, Alexey Kardashevskiy wrote: >> On 18/10/16 08:22, Kirti Wankhede wrote: >>> VFIO IOMMU drivers are designed for the devices which are IOMMU capable. >>> Mediated device only uses IOMMU APIs, the underlying hardware can be >>> managed by an IOMMU domain. >>> >>> Aim of this change is: >>> - To use most of the code of TYPE1 IOMMU driver for mediated devices >>> - To support direct assigned device and mediated device in single module >>> >>> Added two new callback functions to struct vfio_iommu_driver_ops. >>> Backend >>> IOMMU module that supports pining and unpinning pages for mdev devices >>> should provide these functions. >>> Added APIs for pining and unpining pages to VFIO module. These calls >>> back >>> into backend iommu module to actually pin and unpin pages. >>> >>> This change adds pin and unpin support for mediated device to TYPE1 >>> IOMMU >>> backend module. More details: >>> - When iommu_group of mediated devices is attached, task structure is >>> cached which is used later to pin pages and page accounting. >> >> >> For SPAPR TCE IOMMU driver, I ended up caching mm_struct with >> atomic_inc(&container->mm->mm_count) (patches are on the way) instead of >> using @current or task as the process might be gone while VFIO container >> is >> still alive and @mm might be needed to do proper cleanup; this might not >> be >> an issue with this patchset now but still you seem to only use @mm from >> task_struct. >> > > Consider the example of QEMU process which creates VFIO container, QEMU > in its teardown path would release the container. How could container be > alive when process is gone? do_exit() in kernel/exit.c calls exit_mm() (which sets NULL to tsk->mm) first, and then releases open files by calling exit_files(). So container's release() does not have current->mm. >>> >>> Incrementing usage count (get_task_struct()) while saving task structure >>> and decementing it (put_task_struct()) from release() should work here. >>> Updating the patch. >> >> I cannot see how the task->usage counter prevents do_exit() from performing >> the exit, can you? >> > > It will not prevent exit from do_exit(), but that will make sure that we > don't have stale pointer of task structure. Then we can check whether > the task is alive and get mm pointer in teardown path as below: Or you could just reference and use @mm as KVM and others do. Or there is anything else you need from @current than just @mm? > > { > struct task_struct *task = domain->external_addr_space->task; > struct mm_struct *mm = NULL; > > put_pfn(pfn, prot); > > if (pid_alive(task)) > mm = get_task_mm(task); > > if (mm) { > if (do_accounting) > vfio_lock_acct(task, -1); > > mmput(mm); > } > } -- Alexey
Re: [Qemu-devel] [PATCH v9 04/12] vfio iommu: Add support for mediated devices
On 11/2/2016 6:54 AM, Alexey Kardashevskiy wrote: > On 02/11/16 01:01, Kirti Wankhede wrote: >> >> >> On 10/28/2016 7:48 AM, Alexey Kardashevskiy wrote: >>> On 27/10/16 23:31, Kirti Wankhede wrote: On 10/27/2016 12:50 PM, Alexey Kardashevskiy wrote: > On 18/10/16 08:22, Kirti Wankhede wrote: >> VFIO IOMMU drivers are designed for the devices which are IOMMU capable. >> Mediated device only uses IOMMU APIs, the underlying hardware can be >> managed by an IOMMU domain. >> >> Aim of this change is: >> - To use most of the code of TYPE1 IOMMU driver for mediated devices >> - To support direct assigned device and mediated device in single module >> >> Added two new callback functions to struct vfio_iommu_driver_ops. Backend >> IOMMU module that supports pining and unpinning pages for mdev devices >> should provide these functions. >> Added APIs for pining and unpining pages to VFIO module. These calls back >> into backend iommu module to actually pin and unpin pages. >> >> This change adds pin and unpin support for mediated device to TYPE1 IOMMU >> backend module. More details: >> - When iommu_group of mediated devices is attached, task structure is >> cached which is used later to pin pages and page accounting. > > > For SPAPR TCE IOMMU driver, I ended up caching mm_struct with > atomic_inc(&container->mm->mm_count) (patches are on the way) instead of > using @current or task as the process might be gone while VFIO container > is > still alive and @mm might be needed to do proper cleanup; this might not > be > an issue with this patchset now but still you seem to only use @mm from > task_struct. > Consider the example of QEMU process which creates VFIO container, QEMU in its teardown path would release the container. How could container be alive when process is gone? >>> >>> do_exit() in kernel/exit.c calls exit_mm() (which sets NULL to tsk->mm) >>> first, and then releases open files by calling exit_files(). So >>> container's release() does not have current->mm. >>> >> >> Incrementing usage count (get_task_struct()) while saving task structure >> and decementing it (put_task_struct()) from release() should work here. >> Updating the patch. > > I cannot see how the task->usage counter prevents do_exit() from performing > the exit, can you? > It will not prevent exit from do_exit(), but that will make sure that we don't have stale pointer of task structure. Then we can check whether the task is alive and get mm pointer in teardown path as below: { struct task_struct *task = domain->external_addr_space->task; struct mm_struct *mm = NULL; put_pfn(pfn, prot); if (pid_alive(task)) mm = get_task_mm(task); if (mm) { if (do_accounting) vfio_lock_acct(task, -1); mmput(mm); } } Thanks, Kirti
Re: [Qemu-devel] How to print value of float instruction argument
On Oct 31, 2016, at 4:39 AM, Peter Maydell wrote: > On 31 October 2016 at 03:13, Programmingkid wrote: >> I'm trying to print the value of the arguments sent to >> gen_fmadds() in target-ppc/translate/fp-impl.inc.c. How >> do I do this? I have tried printf("cpu_fpr[rA(ctx->opcode)] = %d\n", >> cpu_fpr[rA(ctx->opcode)]), but that always prints the same >> value of 138 even if I change the values sent to fmadds. > > At translate time we do not know the values in registers: > those are only available at runtime. The cpu_fpr[] values > are TCGv_i64 which are opaque datatypes (and just track > that FP register 5 is a unique thing that's not the same > as FP register 6, and so on). > > For runtime information you need to turn on the -d > tracing with suitable flags for what you care about. > Where generated code calls a helper function you can > put a breakpoint or tracing in the helper to show > the arguments it prints, because the helper is called > at runtime, not translate time. > > thanks > -- PMM Thank you very much for your help. I guess I should explain what I want. The C99 standard defines functions that are IEEE 754 complaint. I'm hoping to replace QEMU's fmadd implementation (for the PowerPC) with the fma() function. The fma() function does the exact same thing that fmadd does. I just don't know where exactly I can access the values being sent to the emulated fmadd instruction. Would you have a suggestion as to where I can place the fma() function?
Re: [Qemu-devel] [PATCH 1/1] net: skip virtio-net config of deleted nic's peers
On 2016年11月01日 06:42, Michael S. Tsirkin wrote: On Tue, Nov 01, 2016 at 12:01:17AM +0200, yuri.benditov...@daynix.com wrote: From: Yuri Benditovich https://bugzilla.redhat.com/show_bug.cgi?id=1373816 qemu core dump happens during repetitive unpug-plug with multiple queues and Windows RSS-capable guest. If back-end delete requested during virtio-net device initialization, driver still can try configure the device for multiple queues. The virtio-net device is expected to be removed as soon as the initialization is done. Signed-off-by: Yuri Benditovich Reviewed-by: Michael S. Tsirkin Applied to -net, thanks. --- hw/net/virtio-net.c | 4 1 file changed, 4 insertions(+) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 06bfe4b..77a4fae 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -508,6 +508,10 @@ static void virtio_net_set_queues(VirtIONet *n) int i; int r; +if (n->nic->peer_deleted) { +return; +} + for (i = 0; i < n->max_queues; i++) { if (i < n->curr_queues) { r = peer_attach(n, i); -- 1.9.1
Re: [Qemu-devel] [PATCH v2 3/3] hostmem-file: make option 'size' optional
On 10/31/16 16:18 -0200, Eduardo Habkost wrote: On Fri, Oct 28, 2016 at 10:06:40AM +0800, Haozhong Zhang wrote: [...] > > diff --git a/exec.c b/exec.c > > index 264a25f..89065bd 100644 > > --- a/exec.c > > +++ b/exec.c > > @@ -1234,7 +1234,7 @@ static int64_t get_file_size(int fd) > > } > > > > static void *file_ram_alloc(RAMBlock *block, > > -ram_addr_t memory, > > +ram_addr_t *memory, > > const char *path, > > Error **errp) > > { > > @@ -1245,6 +1245,7 @@ static void *file_ram_alloc(RAMBlock *block, > > void *area = MAP_FAILED; > > int fd = -1; > > int64_t file_size; > > +ram_addr_t mem_size = *memory; > > > > if (kvm_enabled() && !kvm_has_sync_mmu()) { > > error_setg(errp, > > @@ -1309,21 +1310,27 @@ static void *file_ram_alloc(RAMBlock *block, > > > > file_size = get_file_size(fd); > > > > -if (memory < block->page_size) { > > +if (!mem_size && file_size > 0) { > > +mem_size = file_size; > > Maybe we should set *memory here and not below? > Qemu currently sets the memory region size to the file size, and block length to the aligned file size, so the code here can be changed as below: memory_region_set_size(block->mr, mem_size); mem_size = HOST_PAGE_ALIGN(mem_size); *memory = mem_size; The second line is necessary because Qemu currently passes the aligned file size to file_ram_alloc(). That would duplicate the existing HOST_PAGE_ALIGN logic from qemu_ram_alloc_from_file(), won't it? I believe that's yet another reason to check file size before initializing the memory region, instead of initializing it first, and fixing up its size later. Agree. In the next version of this patch 3, I'll move file operations (open/close/get size) from file_ram_alloc() to qemu_ram_alloc_from_file(). Thanks, Haozhong
Re: [Qemu-devel] [PATCH] docs: Fix typos found by codespell
On 11/02/2016 01:52 AM, Eric Blake wrote: On 11/01/2016 12:19 PM, Stefan Weil wrote: Fix also some indefinite articles. Signed-off-by: Stefan Weil --- I still don't understand the comment in docs/colo-proxy.txt. Me neither. Which part of the docs/colo-proxy.txt? Perhaps I can explain it more clearly and make the doc better. +++ b/docs/colo-proxy.txt @@ -158,7 +158,7 @@ secondary. == Usage == -Here, we use demo ip and port discribe more clearly. +Here, we use demo ip and port describe more clearly. Primary(ip:3.3.3.3): -netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown -device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66 Maybe: Here is an example using demonstration IP and port addresses to more clearly describe the usage. The remaining hunks are clear improvements, so: Reviewed-by: Eric Blake It's easier to understand. Reviewed-by: Zhang Chen -- Thanks zhangchen
Re: [Qemu-devel] [PULL 08/27] hostmem-file: make option 'size' optional
On 11/01/16 12:16 -0200, Eduardo Habkost wrote: On Tue, Nov 01, 2016 at 05:32:20PM +0800, Haozhong Zhang wrote: On 10/31/16 20:22 -0200, Eduardo Habkost wrote: > On Mon, Oct 31, 2016 at 03:47:53PM -0400, Paolo Bonzini wrote: > > > > > > - Original Message - > > > From: "Eduardo Habkost" > > > To: "Paolo Bonzini" > > > Cc: qemu-devel@nongnu.org, "Haozhong Zhang" > > > Sent: Monday, October 31, 2016 7:20:10 PM > > > Subject: Re: [Qemu-devel] [PULL 08/27] hostmem-file: make option 'size' optional > > > > > > On Mon, Oct 31, 2016 at 03:37:24PM +0100, Paolo Bonzini wrote: > > > [...] > > > > @@ -1309,21 +1317,27 @@ static void *file_ram_alloc(RAMBlock *block, > > > > > > > > file_size = get_file_size(fd); > > > > > > > > -if (memory < block->page_size) { > > > > +if (!mem_size && file_size > 0) { > > > > +mem_size = file_size; > > > > +memory_region_set_size(block->mr, mem_size); > > > > +} > > > > + > > > > +if (mem_size < block->page_size) { > > > > error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to > > > > " > > > > "or larger than page size 0x%zx", > > > > - memory, block->page_size); > > > > + mem_size, block->page_size); > > > > goto error; > > > > } > > > > > > > > -if (file_size > 0 && file_size < memory) { > > > > +if (file_size > 0 && file_size < mem_size) { > > > > error_setg(errp, "backing store %s size %"PRId64 > > > > " does not match 'size' option %"PRIu64, > > > > - path, file_size, memory); > > > > + path, file_size, mem_size); > > > > goto error; > > > > } > > > > > > > > -memory = ROUND_UP(memory, block->page_size); > > > > +mem_size = ROUND_UP(mem_size, block->page_size); > > > > +*memory = mem_size; > > > > > > I suggested not touching *memory unless it was zero, and setting > > > it to the memory region size, not the rounded-up size. Haozhong > > > said this was going to be changed. > > > > > > This will have the side-effect of setting block->used_length and > > > block->max_length to the rounded up size in > > > qemu_ram_alloc_from_file() (instead of the original memory region > > > size). I don't know what could be the consequences of that. > > > > > > This patch also skip HOST_PAGE_ALIGN-ing mem_size after getting > > > the file size, which would be different from the behavior when > > > size is specified explicitly. (And I also don't know the > > > consequences of that) > > > > > > Considering that this pull request failed to build, I suggest > > > waiting for a new version from Haozhong. > > > > Yes, I'll drop these three patches. > > I believe you can keep the other two (as long as the build error > is fixed). I was already going to include them in my pull > request, but removed them. I'm a little confused. Do I need to send a following patch to fix this build error? I was going to send a new version of the entire patch series. I thought we could fix it while committing, to make sure it gets in a pull request for soft freeze. But: * Patch 1/3 ("do not truncate") is a bug fix for nvdimm, so eligible post-soft-freeze. * Patch 2/3 ("check file size") looks like a bug fix too. * Patch 3/3 ("make size optional") is not a bug fix and is riskier, so I believe it won't get into 2.8. So sending a new series is probably simpler, as patch 1-2 can be included after soft freeze. Got it. I'll resend patch 2/3 to fix the build error, and send the new version of patch 3/3 in another series for 2.9. Thanks, Haozhong
Re: [Qemu-devel] [PATCH v9 04/12] vfio iommu: Add support for mediated devices
On 02/11/16 01:01, Kirti Wankhede wrote: > > > On 10/28/2016 7:48 AM, Alexey Kardashevskiy wrote: >> On 27/10/16 23:31, Kirti Wankhede wrote: >>> >>> >>> On 10/27/2016 12:50 PM, Alexey Kardashevskiy wrote: On 18/10/16 08:22, Kirti Wankhede wrote: > VFIO IOMMU drivers are designed for the devices which are IOMMU capable. > Mediated device only uses IOMMU APIs, the underlying hardware can be > managed by an IOMMU domain. > > Aim of this change is: > - To use most of the code of TYPE1 IOMMU driver for mediated devices > - To support direct assigned device and mediated device in single module > > Added two new callback functions to struct vfio_iommu_driver_ops. Backend > IOMMU module that supports pining and unpinning pages for mdev devices > should provide these functions. > Added APIs for pining and unpining pages to VFIO module. These calls back > into backend iommu module to actually pin and unpin pages. > > This change adds pin and unpin support for mediated device to TYPE1 IOMMU > backend module. More details: > - When iommu_group of mediated devices is attached, task structure is > cached which is used later to pin pages and page accounting. For SPAPR TCE IOMMU driver, I ended up caching mm_struct with atomic_inc(&container->mm->mm_count) (patches are on the way) instead of using @current or task as the process might be gone while VFIO container is still alive and @mm might be needed to do proper cleanup; this might not be an issue with this patchset now but still you seem to only use @mm from task_struct. >>> >>> Consider the example of QEMU process which creates VFIO container, QEMU >>> in its teardown path would release the container. How could container be >>> alive when process is gone? >> >> do_exit() in kernel/exit.c calls exit_mm() (which sets NULL to tsk->mm) >> first, and then releases open files by calling exit_files(). So >> container's release() does not have current->mm. >> > > Incrementing usage count (get_task_struct()) while saving task structure > and decementing it (put_task_struct()) from release() should work here. > Updating the patch. I cannot see how the task->usage counter prevents do_exit() from performing the exit, can you? -- Alexey
Re: [Qemu-devel] [PULL 00/47] virtio, pc: fixes and features
> -Original Message- > From: Qemu-devel > [mailto:qemu-devel-bounces+arei.gonglei=huawei@nongnu.org] On > Behalf Of Michael S. Tsirkin > Sent: Wednesday, November 02, 2016 1:26 AM > To: Peter Maydell > Cc: QEMU Developers > Subject: Re: [Qemu-devel] [PULL 00/47] virtio, pc: fixes and features > > On Tue, Nov 01, 2016 at 03:22:01PM +, Peter Maydell wrote: > > On 30 October 2016 at 21:23, Michael S. Tsirkin wrote: > > > The following changes since commit > 5b2ecabaeabc17f032197246c4846b9ba95ba8a6: > > > > > > Merge remote-tracking branch 'remotes/kraxel/tags/pull-ui-20161028-1' > into staging (2016-10-28 17:59:04 +0100) > > > > > > are available in the git repository at: > > > > > > git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream > > > > > > for you to fetch changes up to > f082ec0225bd15c71e0b4697d2df3af7bad65d7f: > > > > > > acpi: fix assert failure caused by commit 35c5a52d (2016-10-30 20:06:25 > +0200) > > > > > > > > > virtio, pc: fixes and features > > > > > > nvdimm hotplug support > > > virtio migration and ioeventfd rework > > > virtio crypto device > > > ipmi fixes > > > > > > Signed-off-by: Michael S. Tsirkin > > > > > > > Hi; this fails to build on OSX with format string issues: > > > > /Users/pm215/src/qemu-for-merges/hw/virtio/virtio-crypto.c:770:20: > > error: format specifies type 'unsign > > ed short' but the argument has type 'uint32_t' (aka 'unsigned int') > > [-Werror,-Wformat] > >vcrypto->max_queues, VIRTIO_QUEUE_MAX); > > ~~~^ > > /Users/pm215/src/qemu-for-merges/include/qapi/error.h:163:35: note: > > expanded from macro 'error_setg' > > (fmt), ## __VA_ARGS__) > > ^ > > > > Fun fact: in struct vhost_dev, max_queues is a uint64_t; > > in struct VirtIONet it is a uint16_t; and in VirtIOCrypto > > it is a uint32_t... > > > > thanks > > -- PMM > > Yes - I really think we should move that to virtio core > going forward. > Anyway, I fixed that up and pushed. > Sorry about that. TBH that error log messge is copied from virtio-net.c and I didn't notice their types are different. For virtio-crypto, it should be 'PRIu32'. Regards, -Gonglei
[Qemu-devel] [RESEND PATCH v2 2/3] exec.c: check memory backend file size with 'size' option
If the memory backend file is not large enough to hold the required 'size', Qemu will report error and exit. Signed-off-by: Haozhong Zhang Message-Id: <20161027042300.5929-3-haozhong.zh...@intel.com> Reviewed-by: Eduardo Habkost --- Changes in RESEND: * Use format string RAM_ADDR_FMT for variable 'memory'. This change is to fix the build error. * Add Eduardo's r-b. * Add the message id of this thread. --- exec.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/exec.c b/exec.c index a2b371a..23c21c2 100644 --- a/exec.c +++ b/exec.c @@ -1316,6 +1316,13 @@ static void *file_ram_alloc(RAMBlock *block, goto error; } +if (file_size > 0 && file_size < memory) { +error_setg(errp, "backing store %s size 0x%" PRIx64 + " does not match 'size' option 0x" RAM_ADDR_FMT, + path, file_size, memory); +goto error; +} + memory = ROUND_UP(memory, block->page_size); /* -- 2.10.1
Re: [Qemu-devel] [PATCH v10 00/19] Add Mediated device support
On 11/01/2016 11:24 PM, Gerd Hoffmann wrote: >> I rebased KVMGT upon v10, with 2 minor changes: >> >> 1, get_user_pages_remote has only 7 args > > Appears to be a 4.9 merge window change. v10 as-is applies and builds > fine against 4.8, after rebasing to 4.9-rc3 it stops building due to > this. > > Can you share the patch? > >> 2, vfio iommu notifier calls vendor callback with iova instead of pfn > > And this one too? > > Also: github seems to have the v9 kvmgt version still. Can you push > the update? Zhenyu will help to push it to 01org/gvt-linux. Those 2 patches added by me are trivial, I guess Kirti will have her formal fixes in next version. > The kvmgt branch apparently depends on alot of unmerged stuff, "git > describe" says 1669 patches on top of 4.8-rc8. > > Can you outline what this is? Mostly drm-next? > How much of this landed in the 4.9 merge window? Yes, mostly drm-next, targeting 4.10 merge window. The overwhelming part is the device-model under drivers/gpu/drm/i915/gvt/. -- Thanks, Jike
Re: [Qemu-devel] [PATCH v2] PCMachineState: introduce acpi_build_enabled field
On Tue, 1 Nov 2016, Eduardo Habkost wrote: > On Tue, Nov 01, 2016 at 05:44:16PM +, Wei Liu wrote: > > Introduce this field to control whether ACPI build is enabled by a > > particular machine or accelerator. > > > > It defaults to true if the machine itself supports ACPI build. Xen > > accelerator will disable it because Xen is in charge of building ACPI > > tables for the guest. > > > > Signed-off-by: Wei Liu > > Reviewed-by: Eduardo Habkost Thank you Eduardo. I can queue it up unless you have other stuff in progress.
Re: [Qemu-devel] [PATCH v2] PCMachineState: introduce acpi_build_enabled field
On Tue, Nov 01, 2016 at 05:44:16PM +, Wei Liu wrote: > Introduce this field to control whether ACPI build is enabled by a > particular machine or accelerator. > > It defaults to true if the machine itself supports ACPI build. Xen > accelerator will disable it because Xen is in charge of building ACPI > tables for the guest. > > Signed-off-by: Wei Liu Reviewed-by: Eduardo Habkost -- Eduardo
Re: [Qemu-devel] [PATCH 00/15] optimize Qemu RSS usage
On 10/31/2016 05:00 PM, Michael R. Hines wrote: On 10/18/2016 05:47 AM, Peter Lieven wrote: Am 12.10.2016 um 23:18 schrieb Michael R. Hines: Peter, Greetings from DigitalOcean. We're experiencing the same symptoms without this patch. We have, collectively, many gigabytes of un-planned-for RSS being used per-hypervisor that we would like to get rid of =). Without explicitly trying this patch (will do that ASAP), we immediately noticed that the 192MB mentioned immediately melts away (Yay) when we disabled the coroutine thread pool explicitly, with another ~100MB in additional stack usage that would likely also go away if we applied the entirety of your patch. Is there any chance you have revisited this or have a timeline for it? Hi Michael, the current master already includes some of the patches of this original series. There are still some changes left, but what works for me is the current master + diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c index 5816702..3eaef68 100644 --- a/util/qemu-coroutine.c +++ b/util/qemu-coroutine.c @@ -25,8 +25,6 @@ enum { }; /** Free list to speed up creation */ -static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); -static unsigned int release_pool_size; static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool); static __thread unsigned int alloc_pool_size; static __thread Notifier coroutine_pool_cleanup_notifier; @@ -49,20 +47,10 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry) if (CONFIG_COROUTINE_POOL) { co = QSLIST_FIRST(&alloc_pool); if (!co) { -if (release_pool_size > POOL_BATCH_SIZE) { -/* Slow path; a good place to register the destructor, too. */ -if (!coroutine_pool_cleanup_notifier.notify) { -coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup; - qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier); -} - -/* This is not exact; there could be a little skew between - * release_pool_size and the actual size of release_pool. But - * it is just a heuristic, it does not need to be perfect. - */ -alloc_pool_size = atomic_xchg(&release_pool_size, 0); -QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool); -co = QSLIST_FIRST(&alloc_pool); +/* Slow path; a good place to register the destructor, too. */ +if (!coroutine_pool_cleanup_notifier.notify) { +coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup; + qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier); } } if (co) { @@ -85,11 +73,6 @@ static void coroutine_delete(Coroutine *co) co->caller = NULL; if (CONFIG_COROUTINE_POOL) { -if (release_pool_size < POOL_BATCH_SIZE * 2) { -QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next); -atomic_inc(&release_pool_size); -return; -} if (alloc_pool_size < POOL_BATCH_SIZE) { QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next); alloc_pool_size++; + invoking qemu with the following environemnet variable set: MALLOC_MMAP_THRESHOLD_=32768 qemu-system-x86_64 The last one makes glibc automatically using mmap when the malloced memory exceeds 32kByte. Peter, I tested the above patch (and the environment variable --- it doesn't quite come close to as lean of an RSS tally as the original patchset there's still about 70-80 MB of remaining RSS. Any chance you could trim the remaining fat before merging this? =) False alarm! I didn't set the MMAP threshold low enough. Now the results are on-par with the other patchset. Thank you!
[Qemu-devel] [PATCH 4/4] target-m68k: Use gen_ea_mode in cmpm
The writeback infrastructure takes care of ensuring that Ax == Ay both uses the updated register value from Ay for Ax, and updating the final register result twice. ??? Maybe squash into previous. Signed-off-by: Richard Henderson --- target-m68k/translate.c | 28 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/target-m68k/translate.c b/target-m68k/translate.c index 85cdba5..aefd90c 100644 --- a/target-m68k/translate.c +++ b/target-m68k/translate.c @@ -2227,26 +2227,14 @@ DISAS_INSN(cmpa) DISAS_INSN(cmpm) { int opsize = insn_opsize(insn); -TCGv tmp = tcg_temp_new(); -TCGv src, dst, addr; - -src = gen_load(s, opsize, AREG(insn, 0), 1); -/* delay the update after the second gen_load() */ -tcg_gen_addi_i32(tmp, AREG(insn, 0), opsize_bytes(opsize)); - -/* but if the we use the same address register to - * read the second value, we must use the updated address - */ -if (REG(insn, 0) == REG(insn, 9)) { -addr = tmp; -} else { -addr = AREG(insn, 9); -} - -dst = gen_load(s, opsize, addr, 1); -tcg_gen_mov_i32(AREG(insn, 0), tmp); -tcg_gen_addi_i32(AREG(insn, 9), addr, opsize_bytes(opsize)); -tcg_temp_free(tmp); +TCGv src, dst; + +/* Post-increment load (mode 3) from Ay. */ +src = gen_ea_mode(env, s, 3, REG(insn, 0), opsize, + NULL_QREG, NULL, EA_LOADS); +/* Post-increment load (mode 3) from Ax. */ +dst = gen_ea_mode(env, s, 3, REG(insn, 9), opsize, + NULL_QREG, NULL, EA_LOADS); gen_update_cc_cmp(s, dst, src, opsize); } -- 2.7.4
[Qemu-devel] [PATCH 3/4] target-m68k: add cmpm
From: Laurent Vivier Signed-off-by: Laurent Vivier Reviewed-by: Richard Henderson Message-Id: <1477604609-2206-2-git-send-email-laur...@vivier.eu> Signed-off-by: Richard Henderson --- target-m68k/translate.c | 28 1 file changed, 28 insertions(+) diff --git a/target-m68k/translate.c b/target-m68k/translate.c index 00018b4..85cdba5 100644 --- a/target-m68k/translate.c +++ b/target-m68k/translate.c @@ -2224,6 +2224,33 @@ DISAS_INSN(cmpa) gen_update_cc_cmp(s, reg, src, opsize); } +DISAS_INSN(cmpm) +{ +int opsize = insn_opsize(insn); +TCGv tmp = tcg_temp_new(); +TCGv src, dst, addr; + +src = gen_load(s, opsize, AREG(insn, 0), 1); +/* delay the update after the second gen_load() */ +tcg_gen_addi_i32(tmp, AREG(insn, 0), opsize_bytes(opsize)); + +/* but if the we use the same address register to + * read the second value, we must use the updated address + */ +if (REG(insn, 0) == REG(insn, 9)) { +addr = tmp; +} else { +addr = AREG(insn, 9); +} + +dst = gen_load(s, opsize, addr, 1); +tcg_gen_mov_i32(AREG(insn, 0), tmp); +tcg_gen_addi_i32(AREG(insn, 9), addr, opsize_bytes(opsize)); +tcg_temp_free(tmp); + +gen_update_cc_cmp(s, dst, src, opsize); +} + DISAS_INSN(eor) { TCGv src; @@ -3465,6 +3492,7 @@ void register_m68k_insns (CPUM68KState *env) INSN(cmpa, b1c0, f1c0, CF_ISA_A); INSN(cmp, b000, f100, M68000); INSN(eor, b100, f100, M68000); +INSN(cmpm, b108, f138, M68000); INSN(cmpa, b0c0, f0c0, M68000); INSN(eor, b180, f1c0, CF_ISA_A); BASE(and, c000, f000); -- 2.7.4
[Qemu-devel] [PATCH 1/4] target-m68k: Delay autoinc writeback
Signed-off-by: Richard Henderson --- target-m68k/translate.c | 83 + 1 file changed, 63 insertions(+), 20 deletions(-) diff --git a/target-m68k/translate.c b/target-m68k/translate.c index 9ad974f..f812c4b 100644 --- a/target-m68k/translate.c +++ b/target-m68k/translate.c @@ -59,12 +59,12 @@ static TCGv cpu_aregs[8]; static TCGv_i64 cpu_fregs[8]; static TCGv_i64 cpu_macc[4]; -#define REG(insn, pos) (((insn) >> (pos)) & 7) +#define REG(insn, pos) (((insn) >> (pos)) & 7) #define DREG(insn, pos) cpu_dregs[REG(insn, pos)] -#define AREG(insn, pos) cpu_aregs[REG(insn, pos)] +#define AREG(insn, pos) get_areg(s, REG(insn, pos)) #define FREG(insn, pos) cpu_fregs[REG(insn, pos)] -#define MACREG(acc) cpu_macc[acc] -#define QREG_SP cpu_aregs[7] +#define MACREG(acc) cpu_macc[acc] +#define QREG_SP get_areg(s, 7) static TCGv NULL_QREG; #define IS_NULL_QREG(t) (TCGV_EQUAL(t, NULL_QREG)) @@ -141,8 +141,55 @@ typedef struct DisasContext { int singlestep_enabled; TCGv_i64 mactmp; int done_mac; +int writeback_mask; +TCGv writeback[8]; } DisasContext; +static TCGv get_areg(DisasContext *s, unsigned regno) +{ +if (s->writeback_mask & (1 << regno)) { +return s->writeback[regno]; +} else { +return cpu_aregs[regno]; +} +} + +static void delay_set_areg(DisasContext *s, unsigned regno, + TCGv val, bool give_temp) +{ +if (s->writeback_mask & (1 << regno)) { +if (give_temp) { +tcg_temp_free(s->writeback[regno]); +s->writeback[regno] = val; +} else { +tcg_gen_mov_i32(s->writeback[regno], val); +} +} else { +s->writeback_mask |= 1 << regno; +if (give_temp) { +s->writeback[regno] = val; +} else { +TCGv tmp = tcg_temp_new(); +s->writeback[regno] = tmp; +tcg_gen_mov_i32(tmp, val); +} +} +} + +static void do_writebacks(DisasContext *s) +{ +unsigned mask = s->writeback_mask; +if (mask) { +s->writeback_mask = 0; +do { +unsigned regno = ctz32(mask); +tcg_gen_mov_i32(cpu_aregs[regno], s->writeback[regno]); +tcg_temp_free(s->writeback[regno]); +mask &= mask - 1; +} while (mask); +} +} + #define DISAS_JUMP_NEXT 4 #if defined(CONFIG_USER_ONLY) @@ -331,7 +378,7 @@ static inline uint32_t read_im32(CPUM68KState *env, DisasContext *s) } /* Calculate and address index. */ -static TCGv gen_addr_index(uint16_t ext, TCGv tmp) +static TCGv gen_addr_index(DisasContext *s, uint16_t ext, TCGv tmp) { TCGv add; int scale; @@ -388,7 +435,7 @@ static TCGv gen_lea_indexed(CPUM68KState *env, DisasContext *s, TCGv base) tmp = tcg_temp_new(); if ((ext & 0x44) == 0) { /* pre-index */ -add = gen_addr_index(ext, tmp); +add = gen_addr_index(s, ext, tmp); } else { add = NULL_QREG; } @@ -417,7 +464,7 @@ static TCGv gen_lea_indexed(CPUM68KState *env, DisasContext *s, TCGv base) /* memory indirect */ base = gen_load(s, OS_LONG, add, 0); if ((ext & 0x44) == 4) { -add = gen_addr_index(ext, tmp); +add = gen_addr_index(s, ext, tmp); tcg_gen_add_i32(tmp, add, base); add = tmp; } else { @@ -441,7 +488,7 @@ static TCGv gen_lea_indexed(CPUM68KState *env, DisasContext *s, TCGv base) } else { /* brief extension word format */ tmp = tcg_temp_new(); -add = gen_addr_index(ext, tmp); +add = gen_addr_index(s, ext, tmp); if (!IS_NULL_QREG(base)) { tcg_gen_add_i32(tmp, add, base); if ((int8_t)ext) @@ -755,10 +802,11 @@ static TCGv gen_ea(CPUM68KState *env, DisasContext *s, uint16_t insn, case 3: /* Indirect postincrement. */ reg = AREG(insn, 0); result = gen_ldst(s, opsize, reg, val, what); -/* ??? This is not exception safe. The instruction may still - fault after this point. */ -if (what == EA_STORE || !addrp) -tcg_gen_addi_i32(reg, reg, opsize_bytes(opsize)); +if (what == EA_STORE || !addrp) { +TCGv tmp = tcg_temp_new(); +tcg_gen_addi_i32(tmp, reg, opsize_bytes(opsize)); +delay_set_areg(s, REG(insn, 0), tmp, true); +} return result; case 4: /* Indirect predecrememnt. */ { @@ -773,11 +821,8 @@ static TCGv gen_ea(CPUM68KState *env, DisasContext *s, uint16_t insn, *addrp = tmp; } result = gen_ldst(s, opsize, tmp, val, what); -/* ??? This is not exception safe. The instruction may still - fault after this point. */ if (what == EA_STORE || !addrp) { -reg = AREG(insn, 0); -
[Qemu-devel] [PATCH 0/4] target-m68k areg writeback
Here's the patch I almost wrote in the email, followed by a cleanup that allows cmpm to be written "nicely". I can test this to some extent with the coldfire kernel, but of course coldfire can't excersise any of the tricky edge cases that m68000 can. I'm particularly interested in edge cases like mov.b a0@+, a0@+ movea a0@+, a0 movea a0, a0@- The first two are not really useful and likely not show up in normal code. The third may well do so; I think our current code gets it wrong, but this will get it right. r~ Laurent Vivier (1): target-m68k: add cmpm Richard Henderson (3): target-m68k: Delay autoinc writeback target-m68k: Split gen_lea and gen_ea target-m68k: Use gen_ea_mode in cmpm target-m68k/translate.c | 207 +++- 1 file changed, 136 insertions(+), 71 deletions(-) -- 2.7.4
[Qemu-devel] [PATCH 2/4] target-m68k: Split gen_lea and gen_ea
Provide gen_lea_mode and gen_ea_mode, where the mode can be specified manually, rather than taken from the instruction. Signed-off-by: Richard Henderson --- target-m68k/translate.c | 112 +--- 1 file changed, 59 insertions(+), 53 deletions(-) diff --git a/target-m68k/translate.c b/target-m68k/translate.c index f812c4b..00018b4 100644 --- a/target-m68k/translate.c +++ b/target-m68k/translate.c @@ -697,37 +697,37 @@ static void gen_partset_reg(int opsize, TCGv reg, TCGv val) /* Generate code for an "effective address". Does not adjust the base register for autoincrement addressing modes. */ -static TCGv gen_lea(CPUM68KState *env, DisasContext *s, uint16_t insn, -int opsize) +static TCGv gen_lea_mode(CPUM68KState *env, DisasContext *s, + int mode, int reg0, int opsize) { TCGv reg; TCGv tmp; uint16_t ext; uint32_t offset; -switch ((insn >> 3) & 7) { +switch (mode) { case 0: /* Data register direct. */ case 1: /* Address register direct. */ return NULL_QREG; case 2: /* Indirect register */ case 3: /* Indirect postincrement. */ -return AREG(insn, 0); +return get_areg(s, reg0); case 4: /* Indirect predecrememnt. */ -reg = AREG(insn, 0); +reg = get_areg(s, reg0); tmp = tcg_temp_new(); tcg_gen_subi_i32(tmp, reg, opsize_bytes(opsize)); return tmp; case 5: /* Indirect displacement. */ -reg = AREG(insn, 0); +reg = get_areg(s, reg0); tmp = tcg_temp_new(); ext = read_im16(env, s); tcg_gen_addi_i32(tmp, reg, (int16_t)ext); return tmp; case 6: /* Indirect index + displacement. */ -reg = AREG(insn, 0); +reg = get_areg(s, reg0); return gen_lea_indexed(env, s, reg); case 7: /* Other */ -switch (insn & 7) { +switch (reg0) { case 0: /* Absolute short. */ offset = (int16_t)read_im16(env, s); return tcg_const_i32(offset); @@ -749,39 +749,26 @@ static TCGv gen_lea(CPUM68KState *env, DisasContext *s, uint16_t insn, return NULL_QREG; } -/* Helper function for gen_ea. Reuse the computed address between the - for read/write operands. */ -static inline TCGv gen_ea_once(CPUM68KState *env, DisasContext *s, - uint16_t insn, int opsize, TCGv val, - TCGv *addrp, ea_what what) +static TCGv gen_lea(CPUM68KState *env, DisasContext *s, uint16_t insn, +int opsize) { -TCGv tmp; - -if (addrp && what == EA_STORE) { -tmp = *addrp; -} else { -tmp = gen_lea(env, s, insn, opsize); -if (IS_NULL_QREG(tmp)) -return tmp; -if (addrp) -*addrp = tmp; -} -return gen_ldst(s, opsize, tmp, val, what); +int mode = extract32(insn, 3, 3); +int reg0 = REG(insn, 0); +return gen_lea_mode(env, s, mode, reg0, opsize); } -/* Generate code to load/store a value from/into an EA. If VAL > 0 this is +/* Generate code to load/store a value from/into an EA. If WHAT > 0 this is a write otherwise it is a read (0 == sign extend, -1 == zero extend). ADDRP is non-null for readwrite operands. */ -static TCGv gen_ea(CPUM68KState *env, DisasContext *s, uint16_t insn, - int opsize, TCGv val, TCGv *addrp, ea_what what) +static TCGv gen_ea_mode(CPUM68KState *env, DisasContext *s, int mode, int reg0, +int opsize, TCGv val, TCGv *addrp, ea_what what) { -TCGv reg; -TCGv result; -uint32_t offset; +TCGv reg, tmp, result; +int32_t offset; -switch ((insn >> 3) & 7) { +switch (mode) { case 0: /* Data register direct. */ -reg = DREG(insn, 0); +reg = cpu_dregs[reg0]; if (what == EA_STORE) { gen_partset_reg(opsize, reg, val); return store_dummy; @@ -789,7 +776,7 @@ static TCGv gen_ea(CPUM68KState *env, DisasContext *s, uint16_t insn, return gen_extend(reg, opsize, what == EA_LOADS); } case 1: /* Address register direct. */ -reg = AREG(insn, 0); +reg = get_areg(s, reg0); if (what == EA_STORE) { tcg_gen_mov_i32(reg, val); return store_dummy; @@ -797,45 +784,56 @@ static TCGv gen_ea(CPUM68KState *env, DisasContext *s, uint16_t insn, return gen_extend(reg, opsize, what == EA_LOADS); } case 2: /* Indirect register */ -reg = AREG(insn, 0); +reg = get_areg(s, reg0); return gen_ldst(s, opsize, reg, val, what); case 3: /* Indirect postincrement. */ -reg = AREG(insn, 0); +reg = get_areg(s, reg0); result = gen_ldst(s, opsize, reg, val, what); if (what == EA_STORE || !addrp) { TCGv tmp = tcg_temp_new(); tcg_gen_addi_i32(tmp, re
Re: [Qemu-devel] [PULL v2 for-2.8 0/9] tcg queued patches
On 1 November 2016 at 19:21, Richard Henderson wrote: > On 11/01/2016 12:17 PM, Peter Maydell wrote: >> On 1 November 2016 at 17:51, Richard Henderson wrote: >>> >>> The usual pain point for me is building for 32-bit on a 64-bit >>> system, where there is no cross-prefix that one can use, and >>> PKG_CONFIG_PATH and --extra-cflags are the only changes. >> >> >> That sounds like a bug in how the 32-bit compilers work on >> x86 ;-) Ideally you should be able to get a 32-bit compile >> with i586-linux-gnu-gcc... > gcc -m32 worked very well for years, before pkg-config > came along and mucked things up by using package-local > include and library directories. As long as everything > was installed in the toolchain-aware search paths, everything > Just Worked. -m32 doesn't extend to anything other than the 32-bit/64-bit special case, though, and pkg-config does more than just tell you include paths. thanks -- PMM
Re: [Qemu-devel] [PATCH v4 2/2] target-m68k: add 680x0 divu/divs variants
On 11/01/2016 02:03 PM, Laurent Vivier wrote: Update helper to set the throwing location in case of div-by-0. Cleanup divX.w and add quad word variants of divX.l. Signed-off-by: Laurent Vivier --- linux-user/main.c | 7 ++ target-m68k/cpu.h | 4 -- target-m68k/helper.h| 8 ++- target-m68k/op_helper.c | 182 +--- target-m68k/qregs.def | 2 - target-m68k/translate.c | 84 -- 6 files changed, 217 insertions(+), 70 deletions(-) Reviewed-by: Richard Henderson r~
Re: [Qemu-devel] [PATCH] atomic.h: Use __atomic_load_n() primitive
Hi, Your series seems to have some coding style problems. See output below for more information: Type: series Subject: [Qemu-devel] [PATCH] atomic.h: Use __atomic_load_n() primitive Message-id: 20161101203953.18065-1-bobby.pr...@gmail.com === TEST SCRIPT BEGIN === #!/bin/bash BASE=base n=1 total=$(git log --oneline $BASE.. | wc -l) failed=0 # Useful git options git config --local diff.renamelimit 0 git config --local diff.renames True commits="$(git log --format=%H --reverse $BASE..)" for c in $commits; do echo "Checking PATCH $n/$total: $(git show --no-patch --format=%s $c)..." if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then failed=1 echo fi n=$((n+1)) done exit $failed === TEST SCRIPT END === Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384 From https://github.com/patchew-project/qemu - [tag update] patchew/1478022256-7089-1-git-send-email-wei.l...@citrix.com -> patchew/1478022256-7089-1-git-send-email-wei.l...@citrix.com - [tag update] patchew/20161101171927.25684-1...@weilnetz.de -> patchew/20161101171927.25684-1...@weilnetz.de * [new tag] patchew/20161101203953.18065-1-bobby.pr...@gmail.com -> patchew/20161101203953.18065-1-bobby.pr...@gmail.com Switched to a new branch 'test' 587647b atomic.h: Use __atomic_load_n() primitive === OUTPUT BEGIN === fatal: unrecognized argument: --no-patch Checking PATCH 1/1: ... ERROR: memory barrier without comment #32: FILE: include/qemu/atomic.h:130: +smp_read_barrier_depends(); \ total: 1 errors, 0 warnings, 40 lines checked Your patch has style problems, please review. If any of these errors are false positives report them to the maintainer, see CHECKPATCH in MAINTAINERS. === OUTPUT END === Test command exited with code: 1 --- Email generated automatically by Patchew [http://patchew.org/]. Please send your feedback to patchew-de...@freelists.org
[Qemu-devel] [PATCH] atomic.h: Use __atomic_load_n() primitive
Use __atomic_load_n() primitive saving a load and store to a local variable. Signed-off-by: Pranith Kumar --- include/qemu/atomic.h | 22 +++--- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h index 878fa07..be44094 100644 --- a/include/qemu/atomic.h +++ b/include/qemu/atomic.h @@ -120,20 +120,22 @@ * same, but this slows down atomic_rcu_read unnecessarily. */ #ifdef __SANITIZE_THREAD__ -#define atomic_rcu_read__nocheck(ptr, valptr) \ -__atomic_load(ptr, valptr, __ATOMIC_CONSUME); +#define atomic_rcu_read__nocheck(ptr) \ +__atomic_load_n(ptr, __ATOMIC_CONSUME); #else -#define atomic_rcu_read__nocheck(ptr, valptr) \ -__atomic_load(ptr, valptr, __ATOMIC_RELAXED); \ -smp_read_barrier_depends(); +#define atomic_rcu_read__nocheck(ptr) \ +({\ +typeof_strip_qual(*ptr) _val; \ +__atomic_load(ptr, &_val, __ATOMIC_RELAXED); \ +smp_read_barrier_depends(); \ +_val; \ +}) #endif #define atomic_rcu_read(ptr) \ ({\ QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ -typeof_strip_qual(*ptr) _val; \ -atomic_rcu_read__nocheck(ptr, &_val); \ -_val; \ +atomic_rcu_read__nocheck(ptr);\ }) #define atomic_rcu_set(ptr, i) do { \ @@ -144,9 +146,7 @@ #define atomic_load_acquire(ptr)\ ({ \ QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ -typeof_strip_qual(*ptr) _val; \ -__atomic_load(ptr, &_val, __ATOMIC_ACQUIRE);\ -_val; \ +__atomic_load_n(ptr, __ATOMIC_ACQUIRE); \ }) #define atomic_store_release(ptr, i) do { \ -- 2.10.2
[Qemu-devel] [Bug 1629618] Re: QEMU causes host hang / reset on PPC64EL
Unfortunately, the machine just crashed again. It seems related to the use of 4K pages instead of the more typical 64K pages; the machine is rock solid with 64K pages. ** Changed in: qemu Status: Fix Released => New -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/1629618 Title: QEMU causes host hang / reset on PPC64EL Status in QEMU: New Bug description: QEMU causes a host hang / reset on PPC64EL when used in KVM + HV mode (kvm_hv module). After a random amount of uptime, starting new QEMU virtual machines will cause the host to experience a soft CPU lockup. Depending on configuration and other random factors the host will either checkstop and reboot, or hang indefinitely. The following stacktrace was pulled from an instance where the host simply hung after starting a fourth virtual machine. Command line: qemu-system-ppc64 --enable-kvm -M pseries -cpu host -smp 14,cores=14,threads=1,sockets=1 -m 64G -realtime mlock=on -kernel vmlinux-4.7.0-1-powerpc64le -initrd initrd.img-4.7.0-1-powerpc64le Lockup trace: [ 527.393933] KVM guest htab at c03ae400 (order 29), LPID 4 [ 574.637695] INFO: rcu_sched self-detected stall on CPU [ 574.637799]112-...: (5249 ticks this GP) idle=699/141/0 softirq=5358/5382 fqs=5072 [ 574.637877] (t=5250 jiffies g=19853 c=19852 q=64401) [ 574.637947] Task dump for CPU 112: [ 574.637982] qemu-system-ppc R running task0 12037 11828 0x00040004 [ 574.638051] Call Trace: [ 574.638081] [c01c1cddb430] [c00f2710] sched_show_task+0xe0/0x180 (unreliable) [ 574.638164] [c01c1cddb4a0] [c01326f4] rcu_dump_cpu_stacks+0xe4/0x150 [ 574.638246] [c01c1cddb4f0] [c0137a04] rcu_check_callbacks+0x6b4/0x9c0 [ 574.638328] [c01c1cddb610] [c013f7c4] update_process_times+0x54/0xa0 [ 574.638409] [c01c1cddb640] [c0156c28] tick_sched_handle.isra.5+0x48/0xe0 [ 574.638489] [c01c1cddb680] [c0156d24] tick_sched_timer+0x64/0xd0 [ 574.638602] [c01c1cddb6c0] [c0140274] __hrtimer_run_queues+0x124/0x420 [ 574.638683] [c01c1cddb750] [c014123c] hrtimer_interrupt+0xec/0x2c0 [ 574.638765] [c01c1cddb810] [c001fe5c] __timer_interrupt+0x8c/0x270 [ 574.638847] [c01c1cddb860] [c002053c] timer_interrupt+0x9c/0xe0 [ 574.638915] [c01c1cddb890] [c0002750] decrementer_common+0x150/0x180 [ 574.639001] --- interrupt: 901 at kvmppc_hv_get_dirty_log+0x1c4/0x570 [kvm_hv] [ 574.639001] LR = kvmppc_hv_get_dirty_log+0x1f8/0x570 [kvm_hv] [ 574.639114] [c01c1cddbc30] [d0001a524980] kvm_vm_ioctl_get_dirty_log_hv+0xd0/0x170 [kvm_hv] [ 574.639209] [c01c1cddbc80] [d0001a4d4140] kvm_vm_ioctl_get_dirty_log+0x40/0x60 [kvm] [ 574.639291] [c01c1cddbcb0] [d0001a4ca3cc] kvm_vm_ioctl+0x3fc/0x760 [kvm] [ 574.639372] [c01c1cddbd40] [c02d9e18] do_vfs_ioctl+0xd8/0x8e0 [ 574.639442] [c01c1cddbde0] [c02da6f4] SyS_ioctl+0xd4/0xf0 [ 574.639512] [c01c1cddbe30] [c0009260] system_call+0x38/0x108 [ 580.601573] NMI watchdog: BUG: soft lockup - CPU#112 stuck for 22s! [qemu-system-ppc:12037] [ 580.601655] Modules linked in: xt_tcpudp(E) rpcsec_gss_krb5(E) nfsv4(E) dns_resolver(E) ext4(E) ecb(E) crc16(E) jbd2(E) mbcache(E) tun(E) btrfs(E) crc32c_generic(E) raid6_pq(E) xor(E) dm_crypt(E) xts(E) gf128mul(E) algif_skcipher(E) af_alg(E) dm_mod(E) bonding(E) cpufreq_stats(E) iptable_filter(E) ip_tables(E) x_tables(E) bridge(E) stp(E) llc(E) ipmi_devintf(E) ipmi_msghandler(E) i2c_dev(E) fuse(E) raid1(E) md_mod(E) ses(E) sd_mod(E) enclosure(E) sg(E) binfmt_misc(E) radeon(E) ttm(E) drm_kms_helper(E) snd_hda_codec_hdmi(E) snd_hda_intel(E) drm(E) snd_hda_codec(E) snd_hda_core(E) snd_hwdep(E) snd_pcm(E) syscopyarea(E) sysfillrect(E) sysimgblt(E) fb_sys_fops(E) snd_timer(E) evdev(E) i2c_algo_bit(E) snd(E) soundcore(E) at24(E) ahci(E) mpt3sas(E) nvmem_core(E) libahci(E) raid_class(E) scsi_transport_sas(E) powernv_rng(E) rng_core(E) uinput(E) kvm_hv(E) kvm(E) ib_srp(E) scsi_transport_srp(E) ofpart(E) powernv_flash(E) mtd(E) nfsd(E) opal_prd(E) auth_rpcgss(E) parport_pc(E) lp(E) parport(E) autofs4(E) nfsv3(E) nfs_acl(E) nfs(E) lockd(E) grace(E) sunrpc(E) fscache(E) ib_ipoib(E) ib_umad(E) rdma_ucm(E) ib_uverbs(E) rdma_cm(E) iw_cm(E) ib_cm(E) ib_sa(E) configfs(E) hid_generic(E) usbhid(E) hid(E) xhci_pci(E) xhci_hcd(E) usbcore(E) tg3(E) usb_common(E) ptp(E) pps_core(E) libphy(E) ib_mthca(E) ib_mad(E) ib_core(E) ib_addr(E) [ 580.603295] CPU: 112 PID: 12037 Comm: qemu-system-ppc Tainted: G E 4.6.0-2-powerpc64le #1 Debian 4.6.3-1 [ 580.603386] task: c01f706f0180 ti: c01c1cdd8000 task.ti: c01c1cdd8000 [ 580.603456] NIP: d0001a52cb54 LR: d0001a52cb88 CTR:
[Qemu-devel] [PATCH v4 1/2] target-m68k: add 64bit mull
Signed-off-by: Laurent Vivier Reviewed-by: Richard Henderson --- target-m68k/translate.c | 62 +++-- 1 file changed, 50 insertions(+), 12 deletions(-) diff --git a/target-m68k/translate.c b/target-m68k/translate.c index 8433fa0..61986cc 100644 --- a/target-m68k/translate.c +++ b/target-m68k/translate.c @@ -1812,24 +1812,62 @@ DISAS_INSN(tas) DISAS_INSN(mull) { uint16_t ext; -TCGv reg; TCGv src1; -TCGv dest; +int sign; -/* The upper 32 bits of the product are discarded, so - muls.l and mulu.l are functionally equivalent. */ ext = read_im16(env, s); -if (ext & 0x87ff) { -gen_exception(s, s->pc - 4, EXCP_UNSUPPORTED); + +sign = ext & 0x800; + +if (ext & 0x400) { +if (!m68k_feature(s->env, M68K_FEATURE_QUAD_MULDIV)) { +gen_exception(s, s->pc - 4, EXCP_UNSUPPORTED); +return; +} + +SRC_EA(env, src1, OS_LONG, 0, NULL); + +if (sign) { +tcg_gen_muls2_i32(QREG_CC_Z, QREG_CC_N, src1, DREG(ext, 12)); +} else { +tcg_gen_mulu2_i32(QREG_CC_Z, QREG_CC_N, src1, DREG(ext, 12)); +} +/* if Dl == Dh, 68040 returns low word */ +tcg_gen_mov_i32(DREG(ext, 0), QREG_CC_N); +tcg_gen_mov_i32(DREG(ext, 12), QREG_CC_Z); +tcg_gen_or_i32(QREG_CC_Z, QREG_CC_Z, QREG_CC_N); + +tcg_gen_movi_i32(QREG_CC_V, 0); +tcg_gen_movi_i32(QREG_CC_C, 0); + +set_cc_op(s, CC_OP_FLAGS); return; } -reg = DREG(ext, 12); SRC_EA(env, src1, OS_LONG, 0, NULL); -dest = tcg_temp_new(); -tcg_gen_mul_i32(dest, src1, reg); -tcg_gen_mov_i32(reg, dest); -/* Unlike m68k, coldfire always clears the overflow bit. */ -gen_logic_cc(s, dest, OS_LONG); +if (m68k_feature(s->env, M68K_FEATURE_M68000)) { +tcg_gen_movi_i32(QREG_CC_C, 0); +if (sign) { +tcg_gen_muls2_i32(QREG_CC_N, QREG_CC_V, src1, DREG(ext, 12)); +/* QREG_CC_V is -(QREG_CC_V != (QREG_CC_N >> 31)) */ +tcg_gen_sari_i32(QREG_CC_Z, QREG_CC_N, 31); +tcg_gen_setcond_i32(TCG_COND_NE, QREG_CC_V, QREG_CC_V, QREG_CC_Z); +} else { +tcg_gen_mulu2_i32(QREG_CC_N, QREG_CC_V, src1, DREG(ext, 12)); +/* QREG_CC_V is -(QREG_CC_V != 0), use QREG_CC_C as 0 */ +tcg_gen_setcond_i32(TCG_COND_NE, QREG_CC_V, QREG_CC_V, QREG_CC_C); +} +tcg_gen_neg_i32(QREG_CC_V, QREG_CC_V); +tcg_gen_mov_i32(DREG(ext, 12), QREG_CC_N); + +tcg_gen_mov_i32(QREG_CC_Z, QREG_CC_N); + +set_cc_op(s, CC_OP_FLAGS); +} else { +/* The upper 32 bits of the product are discarded, so + muls.l and mulu.l are functionally equivalent. */ +tcg_gen_mul_i32(DREG(ext, 12), src1, DREG(ext, 12)); +gen_logic_cc(s, DREG(ext, 12), OS_LONG); +} } static void gen_link(DisasContext *s, uint16_t insn, int32_t offset) -- 2.7.4
[Qemu-devel] [PATCH v4 2/2] target-m68k: add 680x0 divu/divs variants
Update helper to set the throwing location in case of div-by-0. Cleanup divX.w and add quad word variants of divX.l. Signed-off-by: Laurent Vivier --- linux-user/main.c | 7 ++ target-m68k/cpu.h | 4 -- target-m68k/helper.h| 8 ++- target-m68k/op_helper.c | 182 +--- target-m68k/qregs.def | 2 - target-m68k/translate.c | 84 -- 6 files changed, 217 insertions(+), 70 deletions(-) diff --git a/linux-user/main.c b/linux-user/main.c index 75b199f..c1d5eb4 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -2864,6 +2864,13 @@ void cpu_loop(CPUM68KState *env) info._sifields._sigfault._addr = env->pc; queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); break; +case EXCP_DIV0: +info.si_signo = TARGET_SIGFPE; +info.si_errno = 0; +info.si_code = TARGET_FPE_INTDIV; +info._sifields._sigfault._addr = env->pc; +queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); +break; case EXCP_TRAP0: { abi_long ret; diff --git a/target-m68k/cpu.h b/target-m68k/cpu.h index 6dfb54e..0b4ed7b 100644 --- a/target-m68k/cpu.h +++ b/target-m68k/cpu.h @@ -95,10 +95,6 @@ typedef struct CPUM68KState { uint32_t macsr; uint32_t mac_mask; -/* Temporary storage for DIV helpers. */ -uint32_t div1; -uint32_t div2; - /* MMU status. */ struct { uint32_t ar; diff --git a/target-m68k/helper.h b/target-m68k/helper.h index aae01f9..d863e55 100644 --- a/target-m68k/helper.h +++ b/target-m68k/helper.h @@ -1,8 +1,12 @@ DEF_HELPER_1(bitrev, i32, i32) DEF_HELPER_1(ff1, i32, i32) DEF_HELPER_FLAGS_2(sats, TCG_CALL_NO_RWG_SE, i32, i32, i32) -DEF_HELPER_2(divu, void, env, i32) -DEF_HELPER_2(divs, void, env, i32) +DEF_HELPER_3(divuw, void, env, int, i32) +DEF_HELPER_3(divsw, void, env, int, s32) +DEF_HELPER_4(divul, void, env, int, int, i32) +DEF_HELPER_4(divsl, void, env, int, int, s32) +DEF_HELPER_4(divull, void, env, int, int, i32) +DEF_HELPER_4(divsll, void, env, int, int, s32) DEF_HELPER_2(set_sr, void, env, i32) DEF_HELPER_3(movec, void, env, i32, i32) diff --git a/target-m68k/op_helper.c b/target-m68k/op_helper.c index 48e02e4..a4bfa4e 100644 --- a/target-m68k/op_helper.c +++ b/target-m68k/op_helper.c @@ -166,12 +166,17 @@ bool m68k_cpu_exec_interrupt(CPUState *cs, int interrupt_request) return false; } -static void raise_exception(CPUM68KState *env, int tt) +static void raise_exception_ra(CPUM68KState *env, int tt, uintptr_t raddr) { CPUState *cs = CPU(m68k_env_get_cpu(env)); cs->exception_index = tt; -cpu_loop_exit(cs); +cpu_loop_exit_restore(cs, raddr); +} + +static void raise_exception(CPUM68KState *env, int tt) +{ +raise_exception_ra(env, tt, 0); } void HELPER(raise_exception)(CPUM68KState *env, uint32_t tt) @@ -179,51 +184,178 @@ void HELPER(raise_exception)(CPUM68KState *env, uint32_t tt) raise_exception(env, tt); } -void HELPER(divu)(CPUM68KState *env, uint32_t word) +void HELPER(divuw)(CPUM68KState *env, int destr, uint32_t den) { -uint32_t num; -uint32_t den; -uint32_t quot; -uint32_t rem; +uint32_t num = env->dregs[destr]; +uint32_t quot, rem; -num = env->div1; -den = env->div2; -/* ??? This needs to make sure the throwing location is accurate. */ if (den == 0) { -raise_exception(env, EXCP_DIV0); +raise_exception_ra(env, EXCP_DIV0, GETPC()); } quot = num / den; rem = num % den; -env->cc_v = (word && quot > 0x ? -1 : 0); +env->cc_c = 0; /* always cleared, even if overflow */ +if (quot > 0x) { +env->cc_v = -1; +/* nothing else is modified */ +/* real 68040 keeps Z and N on overflow, + * whereas documentation says "undefined" + */ +return; +} +env->dregs[destr] = deposit32(quot, 16, 16, rem); env->cc_z = quot; env->cc_n = quot; +env->cc_v = 0; +} + +void HELPER(divsw)(CPUM68KState *env, int destr, int32_t den) +{ +int32_t num = env->dregs[destr]; +uint32_t quot, rem; + +if (den == 0) { +raise_exception_ra(env, EXCP_DIV0, GETPC()); +} +quot = num / den; +rem = num % den; + +env->cc_c = 0; /* always cleared, even if overflow */ +if (quot != (int16_t)quot) { +env->cc_v = -1; +/* nothing else is modified */ +/* real 68040 keeps Z and N on overflow, + * whereas documentation says "undefined" + */ +return; +} +env->dregs[destr] = deposit32(quot, 16, 16, rem); +env->cc_z = quot; +env->cc_n = quot; +env->cc_v = 0; +} + +void HELPER(divul)(CPUM68KState *env, int numr, int regr, uint32_t den) +{ +uint32_t num = env->dregs[numr]; +uint32_t quot, rem; + +if (den == 0) { +raise_exception_ra(env, EXCP_DIV0, GETPC()
[Qemu-devel] [PATCH v4 0/2] 680x0 mul and div instructions
This series is another subset of the series I sent in May: https://lists.gnu.org/archive/html/qemu-devel/2016-05/msg00501.html It must be applied on top of series: "target-m68k: 680x0 instruction set, part 2" This subset contains reworked patches of mul and div instructions: - "add 64bit mull": correctly set QREG_CC_V, - "inline divu/divs": don't inline divu/divs, but update existing functions to manage 680x0 div instructions I've checked it doesn't break coldfire support: http://wiki.qemu.org/download/coldfire-test-0.1.tar.bz2 but it can't boot a 680x0 processor kernel. v4: - divull/divsll: build correctly the 64bit num value check overflow as it is done in divsw v3: - mull: manage the case where Dl == Dh - divu/divs: move all the mechanic to helpers, and pass the register number to directly set the result v2: - Free "rem" and "quot" in "divl". Laurent Vivier (2): target-m68k: add 64bit mull target-m68k: add 680x0 divu/divs variants linux-user/main.c | 7 ++ target-m68k/cpu.h | 4 -- target-m68k/helper.h| 8 ++- target-m68k/op_helper.c | 182 +--- target-m68k/qregs.def | 2 - target-m68k/translate.c | 146 +- 6 files changed, 267 insertions(+), 82 deletions(-) -- 2.7.4
Re: [Qemu-devel] [PATCH v2 1/3] target-m68k: add cmpm
Le 01/11/2016 à 18:48, Richard Henderson a écrit : > On 10/27/2016 03:43 PM, Laurent Vivier wrote: >> +DISAS_INSN(cmpm) >> +{ >> +int opsize = insn_opsize(insn); >> +TCGv tmp = tcg_temp_new(); >> +TCGv src, dst, addr; >> + >> +src = gen_load(s, opsize, AREG(insn, 0), 1); >> +/* delay the update after the second gen_load() */ >> +tcg_gen_addi_i32(tmp, AREG(insn, 0), opsize_bytes(opsize)); >> + >> +/* but if the we use the same address register to >> + * read the second value, we must use the updated address >> + */ >> +if (REG(insn, 0) == REG(insn, 9)) { >> +addr = tmp; >> +} else { >> +addr = AREG(insn, 9); >> +} >> + >> +dst = gen_load(s, opsize, addr, 1); >> +tcg_gen_mov_i32(AREG(insn, 0), tmp); >> +tcg_gen_addi_i32(AREG(insn, 9), addr, opsize_bytes(opsize)); > > I wonder if we should make this more generic. > I'm thinking of transparently fixing > > case 3: /* Indirect postincrement. */ > reg = AREG(insn, 0); > result = gen_ldst(s, opsize, reg, val, what); > /* ??? This is not exception safe. The instruction may still >fault after this point. */ > if (what == EA_STORE || !addrp) > tcg_gen_addi_i32(reg, reg, opsize_bytes(opsize)); > return result; > > If we add to DisasContext > > unsigned writeback_mask; > TCGv writeback[8]; > > Then > > static TCGv get_areg(DisasContext *s, unsigned regno) > { > if (s->writeback_mask & (1 << regno)) { > return s->writeback[regno]; > } else { > return cpu_aregs[regno]; > } > } > > static void delay_set_areg(DisasContext *s, unsigned regno, >TCGv val, bool give_temp) > { > if (s->writeback_mask & (1 << regno)) { > tcg_gen_mov_i32(s->writeback[regno], val); > if (give_temp) { > tcg_temp_free(val); > } > } else { > s->writeback_mask |= 1 << regno; > if (give_temp) { > s->writeback[regno] = val; > } else { > TCGv tmp = tcg_temp_new(); > tcg_gen_mov_i32(tmp, val); > s->writeback[regno] = tmp; > } > } > } > > static void do_writebacks(DisasContext *s) > { > unsigned mask = s->writeback_mask; > if (mask) { > s->writeback_mask = 0; > do { > unsigned regno = ctz32(mask); > tcg_gen_mov_i32(cpu_aregs[regno], s->writeback[regno]); > tcg_temp_free(s->writeback[regno]); > mask &= mask - 1; > } while (mask); > } > } > > where get_areg is used within the AREG macro, delay_set_areg is used in > those cases where pre- and post-increment are needed, and do_writebacks > is invoked at the end of disas_m68k_insn. > > This all pre-supposes that cmpm is not a special-case within the ISA, > that any time one reuses a register after modification one sees the new > value. Given the existing code within gen_ea, this would seem to be the > case. > > Thoughts? I think it's really a good idea to manage this in a generic way. As you have already written 90% of the code, do you want to provide a patch? I can test this with coldfire kernel and 68020 linux-user container. Thanks, Laurent
Re: [Qemu-devel] [PATCH v2] PCMachineState: introduce acpi_build_enabled field
On Tue, 1 Nov 2016, Wei Liu wrote: > Introduce this field to control whether ACPI build is enabled by a > particular machine or accelerator. > > It defaults to true if the machine itself supports ACPI build. Xen > accelerator will disable it because Xen is in charge of building ACPI > tables for the guest. > > Signed-off-by: Wei Liu Reviewed-by: Stefano Stabellini > Cc: Igor Mammedov > Cc: Eduardo Habkost > Cc: Anthony PERARD > Cc: Stefano Stabellini > Cc: Sander Eikelenboom > > v2: > 1. drop acpi-build property > 2. set acpi_build_enabled to acpi_has_build > 3. replace acpi_has_build check in acpi_build() > --- > hw/i386/acpi-build.c | 2 +- > hw/i386/pc.c | 2 ++ > include/hw/i386/pc.h | 2 ++ > xen-common.c | 6 ++ > 4 files changed, 11 insertions(+), 1 deletion(-) > > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c > index 5cd1da9..13cbbde 100644 > --- a/hw/i386/acpi-build.c > +++ b/hw/i386/acpi-build.c > @@ -2953,7 +2953,7 @@ void acpi_setup(void) > return; > } > > -if (!pcmc->has_acpi_build) { > +if (!pcms->acpi_build_enabled) { > ACPI_BUILD_DPRINTF("ACPI build disabled. Bailing out.\n"); > return; > } > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > index f56ea0f..fbd9aed 100644 > --- a/hw/i386/pc.c > +++ b/hw/i386/pc.c > @@ -2159,6 +2159,8 @@ static void pc_machine_initfn(Object *obj) > pcms->vmport = ON_OFF_AUTO_AUTO; > /* nvdimm is disabled on default. */ > pcms->acpi_nvdimm_state.is_enabled = false; > +/* acpi build is enabled by default if machine supports it */ > +pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build; > } > > static void pc_machine_reset(void) > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h > index 98dc772..8eb517f 100644 > --- a/include/hw/i386/pc.h > +++ b/include/hw/i386/pc.h > @@ -62,6 +62,8 @@ struct PCMachineState { > > AcpiNVDIMMState acpi_nvdimm_state; > > +bool acpi_build_enabled; > + > /* RAM information (sizes, addresses, configuration): */ > ram_addr_t below_4g_mem_size, above_4g_mem_size; > > diff --git a/xen-common.c b/xen-common.c > index 9099760..bacf962 100644 > --- a/xen-common.c > +++ b/xen-common.c > @@ -9,6 +9,7 @@ > */ > > #include "qemu/osdep.h" > +#include "hw/i386/pc.h" > #include "hw/xen/xen_backend.h" > #include "qmp-commands.h" > #include "sysemu/char.h" > @@ -114,6 +115,11 @@ static void xen_change_state_handler(void *opaque, int > running, > > static int xen_init(MachineState *ms) > { > +PCMachineState *pcms = PC_MACHINE(ms); > + > +/* Disable ACPI build because Xen handles it */ > +pcms->acpi_build_enabled = false; > + > xen_xc = xc_interface_open(0, 0, 0); > if (xen_xc == NULL) { > xen_pv_printf(NULL, 0, "can't open xen interface\n"); > -- > 2.1.4 >
Re: [Qemu-devel] [PATCH 09/29] target-sparc: hypervisor mode takes over nucleus mode
On 11/01/2016 12:12 PM, Artyom Tarasenko wrote: While playing with your patch set, I discovered that we also need a patch to get_asi for ASI_N et al to retain MMU_HYPV_IDX, and not decrease privilege. This happens *very* early in the prom boot, with the first casx (when casx is implemented inline). Why is the bug not visible with the current master? I wonder if we have a symmetrical bug somewhere. Hmm, I dunno. I assume it has something to do with casx being implemented out of line, and using helper_ld_asi instead of tcg_gen_qemu_ld_tl directly. Actually I don't see where the privilege is decreased: get_asi uses a local mem_idx variable, the dc->mem_idx is retained. What patch do you have in mind? Like this. Anyway, now that PMM has my atomic and sparc patch sets, you should be able to see the problem yourself with your patch set and your rom. r~ From fa75ae10f26b7611f9f36013a11b066766b9faee Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 10 Oct 2016 15:52:49 -0500 Subject: target-sparc: Override ASI_N for hypervisor Signed-off-by: Richard Henderson diff --git a/target-sparc/translate.c b/target-sparc/translate.c index 43f89d4..2b80d79 100644 --- a/target-sparc/translate.c +++ b/target-sparc/translate.c @@ -2139,7 +2139,11 @@ static DisasASI get_asi(DisasContext *dc, int insn, TCGMemOp memop) case ASI_TWINX_NL: case ASI_NUCLEUS_QUAD_LDD: case ASI_NUCLEUS_QUAD_LDD_L: -mem_idx = MMU_NUCLEUS_IDX; +if (hypervisor(dc)) { +mem_idx = MMU_PHYS_IDX; +} else { +mem_idx = MMU_NUCLEUS_IDX; +} break; case ASI_AIUP: /* As if user primary */ case ASI_AIUPL: /* As if user primary LE */
Re: [Qemu-devel] [PULL v2 for-2.8 0/9] tcg queued patches
On 11/01/2016 12:17 PM, Peter Maydell wrote: On 1 November 2016 at 17:51, Richard Henderson wrote: The usual pain point for me is building for 32-bit on a 64-bit system, where there is no cross-prefix that one can use, and PKG_CONFIG_PATH and --extra-cflags are the only changes. That sounds like a bug in how the 32-bit compilers work on x86 ;-) Ideally you should be able to get a 32-bit compile with i586-linux-gnu-gcc... gcc -m32 worked very well for years, before pkg-config came along and mucked things up by using package-local include and library directories. As long as everything was installed in the toolchain-aware search paths, everything Just Worked. r~
Re: [Qemu-devel] [PATCH] arm-smmu: Fix bug when merging two 32 bit words to form 64 bit word
> -Original Message- > From: Edgar E. Iglesias [mailto:edgar.igles...@xilinx.com] > Sent: Monday, 26 September 2016 7:56 AM > To: Paul Kennedy > Cc: qemu-devel@nongnu.org; Alistair Francis ; qemu- > triv...@nongnu.org > Subject: Re: [PATCH] arm-smmu: Fix bug when merging two 32 bit words to > form 64 bit word > > On Mon, Sep 26, 2016 at 01:52:22PM +, Paul Kennedy wrote: > > From 7bf015d76a5b53cd061c91f91fea4427101b26fd Mon Sep 17 00:00:00 > 2001 > > From: Paul Kennedy > > Date: Mon, 26 Sep 2016 11:59:00 +0100 > > Subject: [PATCH] arm-smmu: Fix bug when merging two 32 bit words to > > form 64 bit word > > > > Fix bug where least significant 32 bits overwrite most significant > > 32 bits of TTBR1 register. > > Hi Paul, > > Thanks for the patch. > > This code is not upstream yet, it's staged in the Xilinx tree. > It probably doesn't makes sense to CC qemu-devel@nongnu.org and qemu- > triv...@nongnu.org yet but there's a g...@xilinx.com for future patches to > staged Xilinx code. > > Reviewed-by: Edgar E. Iglesias > > Alistair, can you merge this to our trees? Yep, applied! Thanks for the patch Paul. Sorry it took so long to apply, we were getting ready for a release. Thanks, Alistair > > Cheers, > Edgar > > > > > Signed-off-by: Paul C Kennedy > > --- > > hw/misc/arm-smmu.c |2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git a/hw/misc/arm-smmu.c b/hw/misc/arm-smmu.c index > > 7e7acd8..8255f3e 100644 > > --- a/hw/misc/arm-smmu.c > > +++ b/hw/misc/arm-smmu.c > > @@ -6566,7 +6566,7 @@ static bool smmu500_at64(SMMU *s, unsigned > int > > cb, hwaddr va, > > > > req.ttbr[1][1] = s->regs[R_SMMU_CB0_TTBR1_HIGH + cb_offset]; > > req.ttbr[1][1] <<= 32; > > -req.ttbr[1][1] = s->regs[R_SMMU_CB0_TTBR1_LOW + cb_offset]; > > +req.ttbr[1][1] |= s->regs[R_SMMU_CB0_TTBR1_LOW + cb_offset]; > > > > if (req.s2_enabled) { > > req.tcr[2] = s->regs[R_SMMU_CB0_TCR_LPAE + cb2_offset]; > > -- > > 1.7.9.5 > > This email and any attachments are intended for the sole use of the named recipient(s) and contain(s) confidential information that may be proprietary, privileged or copyrighted under applicable law. If you are not the intended recipient, do not read, copy, or forward this email message or any attachments. Delete this email message and any attachments immediately.
Re: [Qemu-devel] [PATCH v2] PCMachineState: introduce acpi_build_enabled field
On 2016-11-01 18:44, Wei Liu wrote: Introduce this field to control whether ACPI build is enabled by a particular machine or accelerator. It defaults to true if the machine itself supports ACPI build. Xen accelerator will disable it because Xen is in charge of building ACPI tables for the guest. Signed-off-by: Wei Liu Hi Wei, Just gave it a spin (backporting for xen-unstable wasn't too hard), and this also works for me ! -- Sander --- Cc: Igor Mammedov Cc: Eduardo Habkost Cc: Anthony PERARD Cc: Stefano Stabellini Cc: Sander Eikelenboom v2: 1. drop acpi-build property 2. set acpi_build_enabled to acpi_has_build 3. replace acpi_has_build check in acpi_build() --- hw/i386/acpi-build.c | 2 +- hw/i386/pc.c | 2 ++ include/hw/i386/pc.h | 2 ++ xen-common.c | 6 ++ 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 5cd1da9..13cbbde 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2953,7 +2953,7 @@ void acpi_setup(void) return; } -if (!pcmc->has_acpi_build) { +if (!pcms->acpi_build_enabled) { ACPI_BUILD_DPRINTF("ACPI build disabled. Bailing out.\n"); return; } diff --git a/hw/i386/pc.c b/hw/i386/pc.c index f56ea0f..fbd9aed 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -2159,6 +2159,8 @@ static void pc_machine_initfn(Object *obj) pcms->vmport = ON_OFF_AUTO_AUTO; /* nvdimm is disabled on default. */ pcms->acpi_nvdimm_state.is_enabled = false; +/* acpi build is enabled by default if machine supports it */ +pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build; } static void pc_machine_reset(void) diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 98dc772..8eb517f 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -62,6 +62,8 @@ struct PCMachineState { AcpiNVDIMMState acpi_nvdimm_state; +bool acpi_build_enabled; + /* RAM information (sizes, addresses, configuration): */ ram_addr_t below_4g_mem_size, above_4g_mem_size; diff --git a/xen-common.c b/xen-common.c index 9099760..bacf962 100644 --- a/xen-common.c +++ b/xen-common.c @@ -9,6 +9,7 @@ */ #include "qemu/osdep.h" +#include "hw/i386/pc.h" #include "hw/xen/xen_backend.h" #include "qmp-commands.h" #include "sysemu/char.h" @@ -114,6 +115,11 @@ static void xen_change_state_handler(void *opaque, int running, static int xen_init(MachineState *ms) { +PCMachineState *pcms = PC_MACHINE(ms); + +/* Disable ACPI build because Xen handles it */ +pcms->acpi_build_enabled = false; + xen_xc = xc_interface_open(0, 0, 0); if (xen_xc == NULL) { xen_pv_printf(NULL, 0, "can't open xen interface\n");
Re: [Qemu-devel] [PULL v2 for-2.8 0/9] tcg queued patches
On 1 November 2016 at 17:51, Richard Henderson wrote: > The usual pain point for me is building for 32-bit on a 64-bit > system, where there is no cross-prefix that one can use, and > PKG_CONFIG_PATH and --extra-cflags are the only changes. That sounds like a bug in how the 32-bit compilers work on x86 ;-) Ideally you should be able to get a 32-bit compile with i586-linux-gnu-gcc... thanks -- PMM
Re: [Qemu-devel] [PATCH 09/29] target-sparc: hypervisor mode takes over nucleus mode
On Wed, Oct 12, 2016 at 3:29 PM, Richard Henderson wrote: > On 10/12/2016 06:33 AM, Artyom Tarasenko wrote: >> >> On Mon, Oct 10, 2016 at 11:41 PM, Richard Henderson >> wrote: >>> >>> On 10/01/2016 05:05 AM, Artyom Tarasenko wrote: Signed-off-by: Artyom Tarasenko --- target-sparc/cpu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h index 0b5c79f..fbeb8d7 100644 --- a/target-sparc/cpu.h +++ b/target-sparc/cpu.h @@ -699,10 +699,10 @@ static inline int cpu_mmu_index(CPUSPARCState *env1, bool ifetch) #elif !defined(TARGET_SPARC64) return env1->psrs; #else -if (env1->tl > 0) { -return MMU_NUCLEUS_IDX; -} else if (cpu_hypervisor_mode(env1)) { +if (cpu_hypervisor_mode(env1)) { return MMU_HYPV_IDX; +} else if (env1->tl > 0) { +return MMU_NUCLEUS_IDX; } else if (cpu_supervisor_mode(env1)) { return MMU_KERNEL_IDX; } else { >>> >>> While playing with your patch set, I discovered that we also need a patch >>> to >>> get_asi for ASI_N et al to retain MMU_HYPV_IDX, and not decrease >>> privilege. >>> This happens *very* early in the prom boot, with the first casx (when >>> casx >>> is implemented inline). >> >> >> Why is the bug not visible with the current master? I wonder if we >> have a symmetrical bug somewhere. > > > Hmm, I dunno. I assume it has something to do with casx being implemented > out of line, and using helper_ld_asi instead of tcg_gen_qemu_ld_tl directly. > Actually I don't see where the privilege is decreased: get_asi uses a local mem_idx variable, the dc->mem_idx is retained. What patch do you have in mind? -- Regards, Artyom Tarasenko SPARC and PPC PReP under qemu blog: http://tyom.blogspot.com/search/label/qemu
Re: [Qemu-devel] [PATCH v3 4/4] pc: memhp: enable nvdimm device hotplug
On Sat, Oct 29, 2016 at 12:35:40AM +0800, Xiao Guangrong wrote: > _GPE.E04 is dedicated for nvdimm device hotplug > > Signed-off-by: Xiao Guangrong > --- > docs/specs/acpi_mem_hotplug.txt | 3 +++ > hw/acpi/memory_hotplug.c | 31 +++ > hw/i386/acpi-build.c | 7 +++ > hw/i386/pc.c | 12 > hw/mem/nvdimm.c | 4 > include/hw/acpi/acpi_dev_interface.h | 1 + > 6 files changed, 46 insertions(+), 12 deletions(-) Reviewed-by: Stefan Hajnoczi signature.asc Description: PGP signature
Re: [Qemu-devel] [RFC 1/2] linux-headers: update
On 1 November 2016 at 18:13, Alexander Graf wrote: > On 01/11/2016 11:19, Peter Maydell wrote: >> Is there a cover letter email for this series ? > > > I figured that the set is so small that it didn't deserve one :). The usual rule is "one patch: no cover letter; more than one patch: cover letter". Forgetting the cover letter is a good way to cause a patchset to miss my to-review queue, because it's the cover letter that I mark as "to-review" (and gmail doesn't thread patchsets). thanks -- PMM
Re: [Qemu-devel] [PULL v2 for-2.8 0/9] tcg queued patches
On Tue, Nov 01, 2016 at 04:26:57PM +, Peter Maydell wrote: > On 1 November 2016 at 16:20, Daniel P. Berrange wrote: > > On Tue, Nov 01, 2016 at 09:34:24AM -0600, Richard Henderson wrote: > >> On 11/01/2016 06:07 AM, Richard Henderson wrote: > >> > V2, now with more windows workarounds. I'll note that I have not > >> > been 100% successful in actually building a mingw64 image. But > >> > at least the error Peter saw with v1 is fixed. > >> > > >> > I'll report on the other mingw64 failures under separate cover. > >> > >> Bah. I think I've been tripped up by the fact that we fail to preserve > >> PKG_CONFIG_PATH when re-running configure via make. We really should > >> finally fix that -- it's really really annoying when building a non-default > >> config. > > > > I sent a fix for this issue last year, but it never got picked up > > by anyone for merge > > > > https://lists.gnu.org/archive/html/qemu-devel/2015-11/msg04074.html > > Hmm, that's a lot of random environment variables. > I think I prefer the approach of "if you care about anything > in the environment, pass it to both configure and to make". > For instance we don't pass PATH from configure to make, so > with that patch we would have the opposite behaviour, that > make can run configure (which will get the saved PATH from > config.status) but then make itself runs with a different PATH. > That seems awkwardly inconsistent. Yeah, it is akward - I was coming at it from an autotools perspective where the generated Makefile tends to get variables added for each tool used, containing the fully qualified path. eg so 'make' ends up running '/usr/bin/sed' instead of just 'sed', and so 'make' is not dependant on stuff in your env like $PATH - it mostly honours the env that was set at time of configure. Of course QEMU is not using autotools, so this doesn't map exactly hence the inconsistency you point out. Personally the idea that you must explicitly pass the same environment to both configure & make is kind of hard to guarantee - I'll have many terminal windows open when working on QEMU and I can't have any confidence that the env seen by 'make' will exactly match what I used with 'configure' (which may have been run quite some time earlier - hours or even days). It was a particular pain-point for me when doing mingw builds, where I would typically use 'PKG_CONFIG_PATH=/blah ./configure' so that I didn't permanently pollute my shell with mingw32 pkg-config env Regards, Daniel -- |: http://berrange.com -o-http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://entangle-photo.org -o-http://search.cpan.org/~danberr/ :|
Re: [Qemu-devel] [RFC 1/2] linux-headers: update
On 01/11/2016 11:19, Peter Maydell wrote: On 29 October 2016 at 22:10, Alexander Graf wrote: This patch updates the Linux headers to include the in-progress user space ARM timer patches. It is explicitly RFC, as the patches are not merged yet. --- Is there a cover letter email for this series ? I figured that the set is so small that it didn't deserve one :). Alex
Re: [Qemu-devel] [PATCH] docs: Fix typos found by codespell
On 1 November 2016 at 17:19, Stefan Weil wrote: > Fix also some indefinite articles. > > Signed-off-by: Stefan Weil > --- > > I still don't understand the comment in docs/colo-proxy.txt. > diff --git a/docs/colo-proxy.txt b/docs/colo-proxy.txt > index 76767cb..6c8cca5 100644 > --- a/docs/colo-proxy.txt > +++ b/docs/colo-proxy.txt > @@ -158,7 +158,7 @@ secondary. > > == Usage == > > -Here, we use demo ip and port discribe more clearly. > +Here, we use demo ip and port describe more clearly. The problem with this sentence (as you note) is not the spelling but that it is not clear what it is trying to say and needs rewriting. > Primary(ip:3.3.3.3): > -netdev > tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown > -device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66 thanks -- PMM
[Qemu-devel] when we add EL2 to QEMU TCG ARM emulation and the virt board, should it default on or off?
I'm working on turning on EL2 support in our TCG ARM emulation, and one area I'm not sure about is whether it should default to on or off. We have a few precedents: For EL3 support: * the CPU property is enabled by default but can be disabled by the board * 'virt' defaults it to disabled, with a -machine secure=on property which turns it on (barfing if KVM is enabled) and also does some other stuff like wiring up secure-only memory and devices and making sure the GIC has security enabled * if the user does -cpu has_el3=yes things will probably not go too well and that's unsupported For PMU support: * the CPU property is enabled by default * 'virt' defaults it to enabled (except for virt-2.6 and earlier) * we do expect the user to be able to turn it on and off with a -cpu pmu=yes/no option (and the board model changes behaviour based on whether the CPU claims to have the feature) So what about EL2? Some background facts that are probably relevant: * at the moment KVM can't support it, but eventually machines with the nested virtualization hardware support will appear (this is an ARMv8.3 feature), at which point it ought to work there too * if you just enable EL2 then some currently working setups stop working (basically any code that was written to assume it was entered in EL1); notably this includes kvm-unit-tests (patches pending from Drew that fix that). Linux is fine though as it checks and handles both. Disabled-by-default has the advantage that (a) a command line will work the same for TCG and KVM and (b) nothing that used to work will break. The disadvantage is that anybody who wants to be able to run nested KVM will now have to specify a command line option of some kind. So: (1) should we default on or off? (both at the board level, and for the cpu object itself) (2) directly user-set cpu property, or board property ? thanks -- PMM
Re: [Qemu-devel] [PATCH RFC] PCMachineState: introduce acpi_build_enabled field
On Tue, Nov 01, 2016 at 03:02:31PM -0200, Eduardo Habkost wrote: > On Tue, Nov 01, 2016 at 04:43:11PM +, Wei Liu wrote: > [...] > > @@ -114,6 +115,11 @@ static void xen_change_state_handler(void *opaque, int > > running, > > > > static int xen_init(MachineState *ms) > > { > > +PCMachineState *pcms = PC_MACHINE(ms); > > + > > +/* Disable ACPI build because Xen handles it */ > > +pcms->acpi_build_enabled = false; > > I just noticed that I don't see any code to disable ACPI build in > the case of "-machine pc,accel=xen". I suggest a xen_enabled() Hmm... I think this code snippet does exactly that -- xen_init is the initialization function for Xen accelerator. So this covers -m xenfv (because it sets accelerator to xen) and -m whatever,accel=xen. Did I miss anything? Wei.
Re: [Qemu-devel] [PULL v2 00/30] Misc patches for 2016-10-31
On 1 November 2016 at 16:29, Paolo Bonzini wrote: > The following changes since commit 39542105bbb19c690219d2f22844d8dfbd9bba05: > > Merge remote-tracking branch 'remotes/gkurz/tags/for-upstream' into staging > (2016-11-01 12:48:07 +) > > are available in the git repository at: > > git://github.com/bonzini/qemu.git tags/for-upstream > > for you to fetch changes up to a92e5ebbcea7c1d16f4aa75b08fce298fcd4cdfa: > > main-loop: Suppress I/O thread warning under qtest (2016-11-01 16:07:37 > +0100) > > > * NBD bugfix (Changlong) > * NBD write zeroes support (Eric) > * Memory backend fixes (Haozhong) > * Atomics fix (Alex) > * New AVX512 features (Luwei) > * "make check" logging fix (Paolo) > * Chardev refactoring fallout (Paolo) > * Small checkpatch improvements (Paolo, Jeff) > More format string problems, 32-bit build: /home/petmay01/qemu/exec.c: In function 'file_ram_alloc': /home/petmay01/qemu/exec.c:1327:9: error: format '%llu' expects argument of type 'long long unsigned int', but argument 8 has type 'ram_addr_t' [-Werror=format=] error_setg(errp, "backing store %s size %"PRId64 ^ I did a manual make -k so I think this is the last one. thanks -- PMM
Re: [Qemu-devel] [PATCH v1 1/1] qemu-doc: update gluster protocol usage guide
On 11/01/2016 07:29 AM, Prasanna Kumar Kalever wrote: > Document: > 1. The new debug and logfile options with their usages and > 2. New json format and its usage. > > Signed-off-by: Prasanna Kumar Kalever > --- > qemu-doc.texi | 46 -- > qemu-options.hx | 14 -- > 2 files changed, 44 insertions(+), 16 deletions(-) > > diff --git a/qemu-doc.texi b/qemu-doc.texi > index 023c140..a7c5722 100644 > --- a/qemu-doc.texi > +++ b/qemu-doc.texi > @@ -1041,35 +1041,50 @@ GlusterFS is an user space distributed file system. > > You can boot from the GlusterFS disk image with the command: > @example > -qemu-system-x86_64 -drive > file=gluster[+@var{transport}]://[@var{server}[:@var{port}]]/@var{volname}/@var{image}[?socket=...] > +URI: > +qemu-system-x86_64 -drive > file=gluster[+@var{type}]://[@var{host}[:@var{port}]]/@var{volume}/@var{path}[?socket=...] > + > +JSON: > +qemu-system-x86_64 > 'json:@{"driver":"qcow2","file":@{"driver":"gluster","volume":"testvol","path":"a.img","debug":"N","logfile":"...","server":[@{"type":"tcp","host":"...","port":"..."@},@{"type":"unix","socket":"..."@}]@}@}' "debug":"N" does not match the schema; the parameter is named "debug-level", and it is an integer not a string. The parameter is optional; you could just omit it. But if you are going to include it, give a reasonable example like "debug-level":0. Can you break up this long line for legibility? > @end example > > @var{gluster} is the protocol. > > -@var{transport} specifies the transport type used to connect to gluster > +@var{type} specifies the transport type used to connect to gluster > management daemon (glusterd). Valid transport types are > -tcp, unix and rdma. If a transport type isn't specified, then tcp > -type is assumed. > +tcp, unix. Incase of URI, if a transport type isn't specified, s/tcp, unix/tcp and unix/ s/Incase of URI/In the URI form/ > +then tcp type is assumed. > > -@var{server} specifies the server where the volume file specification for > -the given volume resides. This can be either hostname, ipv4 address > -or ipv6 address. ipv6 address needs to be within square brackets [ ]. > -If transport type is unix, then @var{server} field should not be specified. > +@var{host} specifies the server where the volume file specification for > +the given volume resides. This can be either hostname, ipv4 address. s/hostname, ipv4/a hostname or an ipv4/ > +If transport type is unix, then @var{host} field should not be specified. > Instead @var{socket} field needs to be populated with the path to unix domain > socket. > > @var{port} is the port number on which glusterd is listening. This is > optional > -and if not specified, QEMU will send 0 which will make gluster to use the > -default port. If the transport type is unix, then @var{port} should not be > -specified. > +and if not specified, it default to port 24007. If the transport type is > unix, s/default/defaults/ > +then @var{port} should not be specified. > + > +@var{volume} is the name of the gluster volume which contains the disk image. > + > +@var{path} is the path to the actual disk image that resides on gluster > volume. > + > +@var{debug} is the logging level of the gluster protocol driver. Debug levels Again, the schema for BlockdevOptionsGluster spells this debug-level, so you need to fix this paragraph. > +are 0-9, with 9 being the most verbose, and 0 representing no debugging > output. > +Default is level of 4. The current logging levels defined in the gluster > source s/Default is level of 4/The default level is 4/ > +are 0 - None, 1 - Emergency, 2 - Alert, 3 - Critical, 4 - Error, 5 - Warning, > +6 - Notice, 7 - Info, 8 - Debug, 9 - Trace > + > +@var{logfile} is a commandline option to mention log file path which helps in > +logging to the specified file and also help in persisting the gfapi logs. The > +default is stderr. > + > > @@ -1082,6 +1097,9 @@ qemu-system-x86_64 -drive > file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir > qemu-system-x86_64 -drive > file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img > qemu-system-x86_64 -drive > file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket > qemu-system-x86_64 -drive file=gluster+rdma://1.2.3.4:24007/testvol/a.img > +qemu-system-x86_64 -drive > file=gluster://1.2.3.4/testvol/a.img,file.debug=9,file.logfile=/var/log/qemu-gluster.log Should be file.debug-level > +qemu-system-x86_64 > 'json:@{"driver":"qcow2","file":@{"driver":"gluster","volume":"testvol","path":"a.img","debug":"9","logfile":"/var/log/qemu-gluster.log","server":[@{"type":"tcp","host":"1.2.3.4","port":24007@},@{"type":"unix","socket":"/var/run/glusterd.socket"@}]@}@}' > +qemu-system-x86_64 -drive > driver=qcow2,file.driver=gluster,file.volume=testvol,file.path=/path/a.img,file.debug=9,file.logfile=/var/log/qemu-gluster.log,file.server.0.type=tcp,file.server.0.host=1.2.3.4,file.server.0.port=24007,f
Re: [Qemu-devel] [PATCH] docs: Fix typos found by codespell
On 11/01/2016 12:19 PM, Stefan Weil wrote: > Fix also some indefinite articles. > > Signed-off-by: Stefan Weil > --- > > I still don't understand the comment in docs/colo-proxy.txt. Me neither. > +++ b/docs/colo-proxy.txt > @@ -158,7 +158,7 @@ secondary. > > == Usage == > > -Here, we use demo ip and port discribe more clearly. > +Here, we use demo ip and port describe more clearly. > Primary(ip:3.3.3.3): > -netdev > tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown > -device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66 Maybe: Here is an example using demonstration IP and port addresses to more clearly describe the usage. The remaining hunks are clear improvements, so: Reviewed-by: Eric Blake -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] [PATCH v2 1/3] target-m68k: add cmpm
On 10/27/2016 03:43 PM, Laurent Vivier wrote: +DISAS_INSN(cmpm) +{ +int opsize = insn_opsize(insn); +TCGv tmp = tcg_temp_new(); +TCGv src, dst, addr; + +src = gen_load(s, opsize, AREG(insn, 0), 1); +/* delay the update after the second gen_load() */ +tcg_gen_addi_i32(tmp, AREG(insn, 0), opsize_bytes(opsize)); + +/* but if the we use the same address register to + * read the second value, we must use the updated address + */ +if (REG(insn, 0) == REG(insn, 9)) { +addr = tmp; +} else { +addr = AREG(insn, 9); +} + +dst = gen_load(s, opsize, addr, 1); +tcg_gen_mov_i32(AREG(insn, 0), tmp); +tcg_gen_addi_i32(AREG(insn, 9), addr, opsize_bytes(opsize)); I wonder if we should make this more generic. I'm thinking of transparently fixing case 3: /* Indirect postincrement. */ reg = AREG(insn, 0); result = gen_ldst(s, opsize, reg, val, what); /* ??? This is not exception safe. The instruction may still fault after this point. */ if (what == EA_STORE || !addrp) tcg_gen_addi_i32(reg, reg, opsize_bytes(opsize)); return result; If we add to DisasContext unsigned writeback_mask; TCGv writeback[8]; Then static TCGv get_areg(DisasContext *s, unsigned regno) { if (s->writeback_mask & (1 << regno)) { return s->writeback[regno]; } else { return cpu_aregs[regno]; } } static void delay_set_areg(DisasContext *s, unsigned regno, TCGv val, bool give_temp) { if (s->writeback_mask & (1 << regno)) { tcg_gen_mov_i32(s->writeback[regno], val); if (give_temp) { tcg_temp_free(val); } } else { s->writeback_mask |= 1 << regno; if (give_temp) { s->writeback[regno] = val; } else { TCGv tmp = tcg_temp_new(); tcg_gen_mov_i32(tmp, val); s->writeback[regno] = tmp; } } } static void do_writebacks(DisasContext *s) { unsigned mask = s->writeback_mask; if (mask) { s->writeback_mask = 0; do { unsigned regno = ctz32(mask); tcg_gen_mov_i32(cpu_aregs[regno], s->writeback[regno]); tcg_temp_free(s->writeback[regno]); mask &= mask - 1; } while (mask); } } where get_areg is used within the AREG macro, delay_set_areg is used in those cases where pre- and post-increment are needed, and do_writebacks is invoked at the end of disas_m68k_insn. This all pre-supposes that cmpm is not a special-case within the ISA, that any time one reuses a register after modification one sees the new value. Given the existing code within gen_ea, this would seem to be the case. Thoughts? r~
Re: [Qemu-devel] [PULL v2 for-2.8 0/9] tcg queued patches
On 11/01/2016 11:43 AM, Daniel P. Berrange wrote: It was a particular pain-point for me when doing mingw builds, where I would typically use 'PKG_CONFIG_PATH=/blah ./configure' so that I didn't permanently pollute my shell with mingw32 pkg-config env The usual pain point for me is building for 32-bit on a 64-bit system, where there is no cross-prefix that one can use, and PKG_CONFIG_PATH and --extra-cflags are the only changes. r~
Re: [Qemu-devel] [PATCH RFC] PCMachineState: introduce acpi_build_enabled field
On Tue, Nov 01, 2016 at 04:43:11PM +, Wei Liu wrote: [...] > @@ -114,6 +115,11 @@ static void xen_change_state_handler(void *opaque, int > running, > > static int xen_init(MachineState *ms) > { > +PCMachineState *pcms = PC_MACHINE(ms); > + > +/* Disable ACPI build because Xen handles it */ > +pcms->acpi_build_enabled = false; I just noticed that I don't see any code to disable ACPI build in the case of "-machine pc,accel=xen". I suggest a xen_enabled() check in pc_init1() and pc_q35_init(). -- Eduardo
Re: [Qemu-devel] [PATCH v3 3/4] nvdimm acpi: introduce _FIT
On Sat, Oct 29, 2016 at 12:35:39AM +0800, Xiao Guangrong wrote: > +1) Read FIT > + As we only reserved one page for NVDIMM ACPI it is impossible to map the > + whole FIT data to guest's address space. This function is used by _FIT > + method to read a piece of FIT data from QEMU. > + > + Input parameters: > + Arg0 – UUID {set to 648B9CF2-CDA1-4312-8AD9-49C4AF32BD62} > + Arg1 – Revision ID (set to 1) > + Arg2 - Function Index, 0x1 > + Arg3 - A package containing a buffer whose layout is as follows: > + > + > +--+-+-+---+ > + | Filed | Byte Length | Byte Offset | Description > | s/Filed/Field/ The same applies below too. > + > +--+-+-+---+ > + | offset | 4 |0| the offset of FIT buffer > | > + > +--+-+-+---+ s/offset of FIT buffer/offset into FIT buffer/ > + > + Output: > + > +--+-+-+---+ > + | Filed | Byte Length | Byte Offset | Description > | > + > +--+-+-+---+ > + | | | | return status codes > | > + | | | | 0x100 indicates fit has been > | > + | status | 4 |0| updated > | > + | | | | other follows Chapter 3 in DSM > | > + | | | | Spec Rev1 > | > + > +--+-+-+---+ > + | fit data | Varies |4| FIT data > | > + | | | | > | > + > +--+-+-+---+ > + > + The FIT offset is maintained by the caller itself, current offset plugs s/plugs/plus/ > +struct NvdimmFuncReadFITIn { > +uint32_t offset; /* the offset of FIT buffer. */ s/offset of FIT buffer/offset into FIT buffer/ signature.asc Description: PGP signature
Re: [Qemu-devel] [PATCH RFC] PCMachineState: introduce acpi_build_enabled field
On Tue, Nov 01, 2016 at 04:43:11PM +, Wei Liu wrote: > Introduce this field to control whether ACPI build is enabled by a > particular machine or accelerator. > > It defaults to true so that PC machine has ACPI build by default. Xen > accelerator will disable it because Xen is in charge of building ACPI > tables for the guest. > > Signed-off-by: Wei Liu > --- > Cc: Igor Mammedov > Cc: Eduardo Habkost > Cc: Anthony PERARD > Cc: Stefano Stabellini > Cc: Sander Eikelenboom > > Tested a backport version which only involves trivial code movement. It > worked with both -m xenfv and -m pc,accel=xen. > > Sander, if you want the backported patch please let me know. > --- > hw/i386/acpi-build.c | 2 +- > hw/i386/pc.c | 19 +++ > include/hw/i386/pc.h | 3 +++ > xen-common.c | 6 ++ > 4 files changed, 29 insertions(+), 1 deletion(-) > > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c > index 93be96f..a5cd2fd 100644 > --- a/hw/i386/acpi-build.c > +++ b/hw/i386/acpi-build.c > @@ -2954,7 +2954,7 @@ void acpi_setup(void) > return; > } > > -if (!pcmc->has_acpi_build) { > +if (!pcmc->has_acpi_build || !pcms->acpi_build_enabled) { > ACPI_BUILD_DPRINTF("ACPI build disabled. Bailing out.\n"); > return; > } > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > index f56ea0f..3e7982f 100644 > --- a/hw/i386/pc.c > +++ b/hw/i386/pc.c > @@ -2143,6 +2143,20 @@ static bool pc_machine_get_nvdimm(Object *obj, Error > **errp) > return pcms->acpi_nvdimm_state.is_enabled; > } > > +static bool pc_machine_get_acpi_build(Object *obj, Error **errp) > +{ > +PCMachineState *pcms = PC_MACHINE(obj); > + > +return pcms->acpi_build_enabled; > +} > + > +static void pc_machine_set_acpi_build(Object *obj, bool value, Error **errp) > +{ > +PCMachineState *pcms = PC_MACHINE(obj); > + > +pcms->acpi_build_enabled = value; > +} > + > static void pc_machine_set_nvdimm(Object *obj, bool value, Error **errp) > { > PCMachineState *pcms = PC_MACHINE(obj); > @@ -2159,6 +2173,8 @@ static void pc_machine_initfn(Object *obj) > pcms->vmport = ON_OFF_AUTO_AUTO; > /* nvdimm is disabled on default. */ > pcms->acpi_nvdimm_state.is_enabled = false; > +/* acpi build is enabled by default. */ > +pcms->acpi_build_enabled = true; If you set: pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build; the default value will be more consistent with the actual results (where pc-1.6 and older don't have ACPI build). Then you would probably be able to remove the pcmc->has_acpi_build check from acpi_setup() and only check pcms->acpi_build_enabled. > } > > static void pc_machine_reset(void) > @@ -2319,6 +2335,9 @@ static void pc_machine_class_init(ObjectClass *oc, void > *data) > > object_class_property_add_bool(oc, PC_MACHINE_NVDIMM, > pc_machine_get_nvdimm, pc_machine_set_nvdimm, &error_abort); > + > +object_class_property_add_bool(oc, PC_MACHINE_ACPI_BUILD, > +pc_machine_get_acpi_build, pc_machine_set_acpi_build, &error_abort); > } > > static const TypeInfo pc_machine_info = { > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h > index 17fff80..ec8cd0c 100644 > --- a/include/hw/i386/pc.h > +++ b/include/hw/i386/pc.h > @@ -63,6 +63,8 @@ struct PCMachineState { > > AcpiNVDIMMState acpi_nvdimm_state; > > +bool acpi_build_enabled; > + > /* RAM information (sizes, addresses, configuration): */ > ram_addr_t below_4g_mem_size, above_4g_mem_size; > > @@ -87,6 +89,7 @@ struct PCMachineState { > #define PC_MACHINE_VMPORT "vmport" > #define PC_MACHINE_SMM "smm" > #define PC_MACHINE_NVDIMM "nvdimm" > +#define PC_MACHINE_ACPI_BUILD "acpi-build" > > /** > * PCMachineClass: > diff --git a/xen-common.c b/xen-common.c > index e641ad1..b1858d7 100644 > --- a/xen-common.c > +++ b/xen-common.c > @@ -9,6 +9,7 @@ > */ > > #include "qemu/osdep.h" > +#include "hw/i386/pc.h" > #include "hw/xen/xen_backend.h" > #include "qmp-commands.h" > #include "sysemu/char.h" > @@ -114,6 +115,11 @@ static void xen_change_state_handler(void *opaque, int > running, > > static int xen_init(MachineState *ms) > { > +PCMachineState *pcms = PC_MACHINE(ms); > + > +/* Disable ACPI build because Xen handles it */ > +pcms->acpi_build_enabled = false; > + > xen_xc = xc_interface_open(0, 0, 0); > if (xen_xc == NULL) { > xen_be_printf(NULL, 0, "can't open xen interface\n"); > -- > 2.1.4 > -- Eduardo
[Qemu-devel] [PATCH v2] PCMachineState: introduce acpi_build_enabled field
Introduce this field to control whether ACPI build is enabled by a particular machine or accelerator. It defaults to true if the machine itself supports ACPI build. Xen accelerator will disable it because Xen is in charge of building ACPI tables for the guest. Signed-off-by: Wei Liu --- Cc: Igor Mammedov Cc: Eduardo Habkost Cc: Anthony PERARD Cc: Stefano Stabellini Cc: Sander Eikelenboom v2: 1. drop acpi-build property 2. set acpi_build_enabled to acpi_has_build 3. replace acpi_has_build check in acpi_build() --- hw/i386/acpi-build.c | 2 +- hw/i386/pc.c | 2 ++ include/hw/i386/pc.h | 2 ++ xen-common.c | 6 ++ 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 5cd1da9..13cbbde 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2953,7 +2953,7 @@ void acpi_setup(void) return; } -if (!pcmc->has_acpi_build) { +if (!pcms->acpi_build_enabled) { ACPI_BUILD_DPRINTF("ACPI build disabled. Bailing out.\n"); return; } diff --git a/hw/i386/pc.c b/hw/i386/pc.c index f56ea0f..fbd9aed 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -2159,6 +2159,8 @@ static void pc_machine_initfn(Object *obj) pcms->vmport = ON_OFF_AUTO_AUTO; /* nvdimm is disabled on default. */ pcms->acpi_nvdimm_state.is_enabled = false; +/* acpi build is enabled by default if machine supports it */ +pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build; } static void pc_machine_reset(void) diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 98dc772..8eb517f 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -62,6 +62,8 @@ struct PCMachineState { AcpiNVDIMMState acpi_nvdimm_state; +bool acpi_build_enabled; + /* RAM information (sizes, addresses, configuration): */ ram_addr_t below_4g_mem_size, above_4g_mem_size; diff --git a/xen-common.c b/xen-common.c index 9099760..bacf962 100644 --- a/xen-common.c +++ b/xen-common.c @@ -9,6 +9,7 @@ */ #include "qemu/osdep.h" +#include "hw/i386/pc.h" #include "hw/xen/xen_backend.h" #include "qmp-commands.h" #include "sysemu/char.h" @@ -114,6 +115,11 @@ static void xen_change_state_handler(void *opaque, int running, static int xen_init(MachineState *ms) { +PCMachineState *pcms = PC_MACHINE(ms); + +/* Disable ACPI build because Xen handles it */ +pcms->acpi_build_enabled = false; + xen_xc = xc_interface_open(0, 0, 0); if (xen_xc == NULL) { xen_pv_printf(NULL, 0, "can't open xen interface\n"); -- 2.1.4
Re: [Qemu-devel] [PULL v3 for-2.8 0/9] tcg queued patches
On 1 November 2016 at 16:48, Richard Henderson wrote: > Yet another workaround for windows, this time with me actually > being able to produce (and run) a mingw32 binary. Only re-sending > patch 4/9, as that's the only one that changed. > > > r~ > > > > The following changes since commit c46ef897dbb75590af94b6e3dca6a9c5f9a1ea1e: > > Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into > staging (2016-11-01 14:27:05 +) > > are available in the git repository at: > > git://github.com/rth7680/qemu.git tags/pull-tcg-20161101-2 > > for you to fetch changes up to 3ff91d7e85176f8b4b131163d7fd801757a2c949: > > tcg: correct 32-bit tcg_gen_ld8s_i64 sign-extension (2016-11-01 10:30:45 > -0600) > > > tcg queued patches > > Applied, thanks. -- PMM
Re: [Qemu-devel] [PATCH RFC] PCMachineState: introduce acpi_build_enabled field
On Tue, Nov 01, 2016 at 05:12:37PM +, Wei Liu wrote: > On Tue, Nov 01, 2016 at 03:02:31PM -0200, Eduardo Habkost wrote: > > On Tue, Nov 01, 2016 at 04:43:11PM +, Wei Liu wrote: > > [...] > > > @@ -114,6 +115,11 @@ static void xen_change_state_handler(void *opaque, > > > int running, > > > > > > static int xen_init(MachineState *ms) > > > { > > > +PCMachineState *pcms = PC_MACHINE(ms); > > > + > > > +/* Disable ACPI build because Xen handles it */ > > > +pcms->acpi_build_enabled = false; > > > > I just noticed that I don't see any code to disable ACPI build in > > the case of "-machine pc,accel=xen". I suggest a xen_enabled() > > Hmm... I think this code snippet does exactly that -- xen_init is the > initialization function for Xen accelerator. So this covers -m xenfv > (because it sets accelerator to xen) and -m whatever,accel=xen. > > Did I miss anything? Nevermind, I confused it with pc_xen_hvm_init() (which is specific for xenfv). The patch looks correct. -- Eduardo
Re: [Qemu-devel] [PULL 00/47] virtio, pc: fixes and features
On Tue, Nov 01, 2016 at 03:22:01PM +, Peter Maydell wrote: > On 30 October 2016 at 21:23, Michael S. Tsirkin wrote: > > The following changes since commit 5b2ecabaeabc17f032197246c4846b9ba95ba8a6: > > > > Merge remote-tracking branch 'remotes/kraxel/tags/pull-ui-20161028-1' > > into staging (2016-10-28 17:59:04 +0100) > > > > are available in the git repository at: > > > > git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream > > > > for you to fetch changes up to f082ec0225bd15c71e0b4697d2df3af7bad65d7f: > > > > acpi: fix assert failure caused by commit 35c5a52d (2016-10-30 20:06:25 > > +0200) > > > > > > virtio, pc: fixes and features > > > > nvdimm hotplug support > > virtio migration and ioeventfd rework > > virtio crypto device > > ipmi fixes > > > > Signed-off-by: Michael S. Tsirkin > > > > Hi; this fails to build on OSX with format string issues: > > /Users/pm215/src/qemu-for-merges/hw/virtio/virtio-crypto.c:770:20: > error: format specifies type 'unsign > ed short' but the argument has type 'uint32_t' (aka 'unsigned int') > [-Werror,-Wformat] >vcrypto->max_queues, VIRTIO_QUEUE_MAX); > ~~~^ > /Users/pm215/src/qemu-for-merges/include/qapi/error.h:163:35: note: > expanded from macro 'error_setg' > (fmt), ## __VA_ARGS__) > ^ > > Fun fact: in struct vhost_dev, max_queues is a uint64_t; > in struct VirtIONet it is a uint16_t; and in VirtIOCrypto > it is a uint32_t... > > thanks > -- PMM Yes - I really think we should move that to virtio core going forward. Anyway, I fixed that up and pushed. A general question - I have a couple of tweaks to tests in my tree. Didn't push to avoid them blocking the features. I think test tweaks are fair game after freeze - agree? -- MST
[Qemu-devel] [PULL v3 for-2.8 0/9] tcg queued patches
Yet another workaround for windows, this time with me actually being able to produce (and run) a mingw32 binary. Only re-sending patch 4/9, as that's the only one that changed. r~ The following changes since commit c46ef897dbb75590af94b6e3dca6a9c5f9a1ea1e: Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging (2016-11-01 14:27:05 +) are available in the git repository at: git://github.com/rth7680/qemu.git tags/pull-tcg-20161101-2 for you to fetch changes up to 3ff91d7e85176f8b4b131163d7fd801757a2c949: tcg: correct 32-bit tcg_gen_ld8s_i64 sign-extension (2016-11-01 10:30:45 -0600) tcg queued patches Joseph Myers (1): tcg: correct 32-bit tcg_gen_ld8s_i64 sign-extension Peter Maydell (1): tcg/tcg.h: Improve documentation of TCGv_i32 etc types Pranith Kumar (1): MAINTAINERS: Update PPC status and maintainer Richard Henderson (6): target-cris: Do not dump cpu state with -d in_asm target-microblaze: Do not dump cpu state with -d in_asm target-openrisc: Do not dump cpu state with -d in_asm log: Add locking to large logging blocks tcg: Add tcg_gen_mulsu2_{i32,i64,tl} target-microblaze: Cleanup dec_mul MAINTAINERS | 4 +-- cpu-exec.c| 2 ++ exec.c| 2 ++ include/qemu/log.h| 16 ++ include/sysemu/os-posix.h | 12 include/sysemu/os-win32.h | 15 + target-alpha/translate.c | 2 ++ target-arm/translate-a64.c| 2 ++ target-arm/translate.c| 2 ++ target-cris/translate.c | 27 +++- target-i386/translate.c | 4 +++ target-lm32/translate.c | 2 ++ target-m68k/translate.c | 2 ++ target-microblaze/translate.c | 72 --- target-mips/translate.c | 2 ++ target-openrisc/translate.c | 9 +++--- target-ppc/translate.c| 2 ++ target-s390x/translate.c | 2 ++ target-sh4/translate.c| 2 ++ target-sparc/translate.c | 2 ++ target-tilegx/translate.c | 6 +++- target-tricore/translate.c| 2 ++ target-unicore32/translate.c | 2 ++ target-xtensa/translate.c | 2 ++ tcg/tcg-op.c | 45 ++- tcg/tcg-op.h | 4 +++ tcg/tcg.c | 8 + tcg/tcg.h | 38 ++- translate-all.c | 2 ++ 29 files changed, 192 insertions(+), 100 deletions(-)
Re: [Qemu-devel] [PULL v2 00/30] Misc patches for 2016-10-31
Hi, Your series seems to have some coding style problems. See output below for more information: Type: series Subject: [Qemu-devel] [PULL v2 00/30] Misc patches for 2016-10-31 Message-id: 1478017783-7703-1-git-send-email-pbonz...@redhat.com === TEST SCRIPT BEGIN === #!/bin/bash BASE=base n=1 total=$(git log --oneline $BASE.. | wc -l) failed=0 # Useful git options git config --local diff.renamelimit 0 git config --local diff.renames True commits="$(git log --format=%H --reverse $BASE..)" for c in $commits; do echo "Checking PATCH $n/$total: $(git show --no-patch --format=%s $c)..." if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then failed=1 echo fi n=$((n+1)) done exit $failed === TEST SCRIPT END === Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384 From https://github.com/patchew-project/qemu * [new tag] patchew/1478017783-7703-1-git-send-email-pbonz...@redhat.com -> patchew/1478017783-7703-1-git-send-email-pbonz...@redhat.com Switched to a new branch 'test' faf292f main-loop: Suppress I/O thread warning under qtest 3fe8c60 docs/rcu.txt: Fix minor typo d32538f vl: exit qemu on guest panic if -no-shutdown is not set 031d388 checkpatch: allow spaces before parenthesis for 'coroutine_fn' d9ab8bb x86: add AVX512_4VNNIW and AVX512_4FMAPS features 584e354 slirp: fix CharDriver breakage d7e7fa4 qemu-char: do not forward events through the mux until QEMU has started 924fea8 nbd: Implement NBD_CMD_WRITE_ZEROES on client 6b5ee21 nbd: Implement NBD_CMD_WRITE_ZEROES on server f032e48 nbd: Improve server handling of shutdown requests 486ffc6 nbd: Refactor conversion to errno to silence checkpatch f698de1 nbd: Support shorter handshake ec7f3de nbd: Less allocation during NBD_OPT_LIST 71e470c nbd: Let client skip portions of server reply eb8e4b4 nbd: Let server know when client gives up negotiation 4f58cc8 nbd: Share common option-sending code in client c919458 nbd: Send message along with server NBD_REP_ERR errors b76fe64 nbd: Share common reply-sending code in server afa213c nbd: Rename struct nbd_request and nbd_reply 4cb63fc nbd: Rename NbdClientSession to NBDClientSession 3828fb9 nbd: Rename NBDRequest to NBDRequestData 9bf69c9 nbd: Treat flags vs. command type as separate fields a2d3af6 nbd: Add qemu-nbd -D for human-readable description aef3582 exec.c: check memory backend file size with 'size' option 25a5c4b exec.c: do not truncate non-empty memory backend file 3c4f049 exec.c: ensure all AddressSpaceDispatch updates under RCU bb0a1f3 tests: send error_report to test log 0916440 qemu-error: remove dependency of stubs on monitor a6f8da9 nbd: Use CoQueue for free_sema instead of CoMutex c22e47a checkpatch: tweak "struct should normally be const" warning === OUTPUT BEGIN === fatal: unrecognized argument: --no-patch Checking PATCH 1/30: ... WARNING: line over 80 characters #29: FILE: scripts/checkpatch.pl:2502: + ERROR("initializer for struct $1 should normally be const\n" . total: 0 errors, 1 warnings, 10 lines checked Your patch has style problems, please review. If any of these errors are false positives report them to the maintainer, see CHECKPATCH in MAINTAINERS. fatal: unrecognized argument: --no-patch Checking PATCH 2/30: ... fatal: unrecognized argument: --no-patch Checking PATCH 3/30: ... fatal: unrecognized argument: --no-patch Checking PATCH 4/30: ... fatal: unrecognized argument: --no-patch Checking PATCH 5/30: ... WARNING: line over 80 characters #27: FILE: exec.c:496: +AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch); total: 0 errors, 1 warnings, 16 lines checked Your patch has style problems, please review. If any of these errors are false positives report them to the maintainer, see CHECKPATCH in MAINTAINERS. fatal: unrecognized argument: --no-patch Checking PATCH 6/30: ... fatal: unrecognized argument: --no-patch Checking PATCH 7/30: ... fatal: unrecognized argument: --no-patch Checking PATCH 8/30: ... fatal: unrecognized argument: --no-patch Checking PATCH 9/30: ... fatal: unrecognized argument: --no-patch Checking PATCH 10/30: ... fatal: unrecognized argument: --no-patch Checking PATCH 11/30: ... fatal: unrecognized argument: --no-patch Checking PATCH 12/30: ... fatal: unrecognized argument: --no-patch Checking PATCH 13/30: ... fatal: unrecognized argument: --no-patch Checking PATCH 14/30: ... fatal: unrecognized argument: --no-patch Checking PATCH 15/30: ... fatal: unrecognized argument: --no-patch Checking PATCH 16/30: ... fatal: unrecognized argument: --no-patch Checking PATCH 17/30: ... fatal: unrecognized argument: --no-patch Checking PATCH 18/30: ... fatal: unrecognized argument: --no-patch Checking PATCH 19/30: ... fatal: unrecognized argument: --no-patch Checking PATCH 20/30: ... fatal: unrecognized argument: --no-patch Checking PATCH 21/30: ... fatal: unrecognized argument: --no-patch Checking PATCH 22/30: ... fatal: unrecognized argument: --no-patc
Re: [Qemu-devel] [PULL 00/47] virtio, pc: fixes and features
On 1 November 2016 at 17:25, Michael S. Tsirkin wrote: > A general question - I have a couple of tweaks to tests in > my tree. Didn't push to avoid them blocking the features. > I think test tweaks are fair game after freeze - agree? Yes, especially if they're only tweaks, not enormous changes. thanks -- PMM
Re: [Qemu-devel] [PULL v2 for-2.8 0/9] tcg queued patches
On 11/01/2016 08:26 AM, Peter Maydell wrote: i686-w64-mingw32 builds fail to link :-( exec.o: In function `qemu_flockfile': /home/petmay01/linaro/qemu-for-merges/include/sysemu/os-win32.h:110: undefined reference to `_imp___lock_file' exec.o: In function `qemu_funlockfile': /home/petmay01/linaro/qemu-for-merges/include/sysemu/os-win32.h:115: I see the symbols present in libmsvcrt.a. I wonder why it isn't being seen. Oh well. I'm going to simply disable file locking for mingw32 and let someone who cares deal with it later. The situation with logging is no worse than before the patch. r~
Re: [Qemu-devel] [PATCH v5] block/vxhs.c Add support for a new block device type called "vxhs"
On Tue, Nov 01, 2016 at 09:52:59AM -0700, ashish mittal wrote: > Done! > > All builds are fine, but I have no way of knowing if that fixed the > issue! Please let me know if you still see a problem. configure works for me after pulling your libqnio update Regards, Daniel -- |: http://berrange.com -o-http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://entangle-photo.org -o-http://search.cpan.org/~danberr/ :|
[Qemu-devel] [PULL 25/30] slirp: fix CharDriver breakage
SLIRP expects a CharBackend as the third argument to slirp_add_exec, but net/slirp.c was passing a CharDriverState. Fix this to restore guestfwd functionality. Reported-by: Dr. David Alan Gilbert Signed-off-by: Paolo Bonzini --- net/slirp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/slirp.c b/net/slirp.c index 64dd325..bcd1c5f 100644 --- a/net/slirp.c +++ b/net/slirp.c @@ -763,8 +763,7 @@ static int slirp_guestfwd(SlirpState *s, const char *config_str, return -1; } -if (slirp_add_exec(s->slirp, 3, qemu_chr_fe_get_driver(&fwd->hd), - &server, port) < 0) { +if (slirp_add_exec(s->slirp, 3, &fwd->hd, &server, port) < 0) { error_report("conflicting/invalid host:port in guest forwarding " "rule '%s'", config_str); g_free(fwd); -- 2.7.4
[Qemu-devel] [PATCH] hw/block/m25p80: Fix typo in local macro name
Signed-off-by: Stefan Weil --- hw/block/m25p80.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c index d29ff4c..09ea51f 100644 --- a/hw/block/m25p80.c +++ b/hw/block/m25p80.c @@ -121,7 +121,7 @@ typedef struct FlashPartInfo { #define CFG_DUMMY_CLK_LEN 4 #define NVCFG_DUMMY_CLK_POS 12 #define VCFG_DUMMY_CLK_POS 4 -#define EVCFG_OUT_DRIVER_STRENGHT_DEF 7 +#define EVCFG_OUT_DRIVER_STRENGTH_DEF 7 #define EVCFG_VPP_ACCELERATOR (1 << 3) #define EVCFG_RESET_HOLD_ENABLED (1 << 4) #define NVCFG_DUAL_IO_MASK (1 << 2) @@ -700,7 +700,7 @@ static void reset_memory(Flash *s) ); s->enh_volatile_cfg = 0; -s->enh_volatile_cfg |= EVCFG_OUT_DRIVER_STRENGHT_DEF; +s->enh_volatile_cfg |= EVCFG_OUT_DRIVER_STRENGTH_DEF; s->enh_volatile_cfg |= EVCFG_VPP_ACCELERATOR; s->enh_volatile_cfg |= EVCFG_RESET_HOLD_ENABLED; if (s->nonvolatile_cfg & NVCFG_DUAL_IO_MASK) { -- 2.10.1
[Qemu-devel] [PATCH] docs: Fix typos found by codespell
Fix also some indefinite articles. Signed-off-by: Stefan Weil --- I still don't understand the comment in docs/colo-proxy.txt. Stefan docs/COLO-FT.txt| 2 +- docs/colo-proxy.txt | 2 +- qemu-doc.texi | 2 +- qemu-options.hx | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/COLO-FT.txt b/docs/COLO-FT.txt index 6282938..043b14a 100644 --- a/docs/COLO-FT.txt +++ b/docs/COLO-FT.txt @@ -102,7 +102,7 @@ Primary side. COLO Proxy: Delivers packets to Primary and Seconday, and then compare the responses from both side. Then decide whether to start a checkpoint according to some rules. -Please refer to docs/colo-proxy.txt for more informations. +Please refer to docs/colo-proxy.txt for more information. Note: HeartBeat has not been implemented yet, so you need to trigger failover process diff --git a/docs/colo-proxy.txt b/docs/colo-proxy.txt index 76767cb..6c8cca5 100644 --- a/docs/colo-proxy.txt +++ b/docs/colo-proxy.txt @@ -158,7 +158,7 @@ secondary. == Usage == -Here, we use demo ip and port discribe more clearly. +Here, we use demo ip and port describe more clearly. Primary(ip:3.3.3.3): -netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown -device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66 diff --git a/qemu-doc.texi b/qemu-doc.texi index 023c140..691acf9 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -1037,7 +1037,7 @@ qemu-system-i386 -iscsi initiator-name=iqn.qemu.test:my-initiator \ @node disk_images_gluster @subsection GlusterFS disk images -GlusterFS is an user space distributed file system. +GlusterFS is a user space distributed file system. You can boot from the GlusterFS disk image with the command: @example diff --git a/qemu-options.hx b/qemu-options.hx index 95332cc..df5e4e7 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2589,7 +2589,7 @@ qemu-system-i386 --drive file=sheepdog://192.0.2.1:3/MyVirtualMachine See also @url{http://http://www.osrg.net/sheepdog/}. @item GlusterFS -GlusterFS is an user space distributed file system. +GlusterFS is a user space distributed file system. QEMU supports the use of GlusterFS volumes for hosting VM disk images using TCP, Unix Domain Sockets and RDMA transport protocols. -- 2.10.1
[Qemu-devel] [PULL 20/30] nbd: Refactor conversion to errno to silence checkpatch
From: Eric Blake Checkpatch complains that 'return EINVAL' is usually wrong (since we tend to favor 'return -EINVAL'). But it is a false positive for nbd_errno_to_system_errno(). Since NBD may add future defined wire values, refactor the code to keep checkpatch happy. Signed-off-by: Eric Blake Message-Id: <1476469998-28592-14-git-send-email-ebl...@redhat.com> Signed-off-by: Paolo Bonzini --- nbd/client.c | 20 ++-- 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/nbd/client.c b/nbd/client.c index b29963b..7bdce53 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -23,23 +23,31 @@ static int nbd_errno_to_system_errno(int err) { +int ret; switch (err) { case NBD_SUCCESS: -return 0; +ret = 0; +break; case NBD_EPERM: -return EPERM; +ret = EPERM; +break; case NBD_EIO: -return EIO; +ret = EIO; +break; case NBD_ENOMEM: -return ENOMEM; +ret = ENOMEM; +break; case NBD_ENOSPC: -return ENOSPC; +ret = ENOSPC; +break; default: TRACE("Squashing unexpected error %d to EINVAL", err); /* fallthrough */ case NBD_EINVAL: -return EINVAL; +ret = EINVAL; +break; } +return ret; } /* Definitions for opaque data types */ -- 2.7.4
[Qemu-devel] [PULL v3 for-2.8 4/9] log: Add locking to large logging blocks
Reuse the existing locking provided by stdio to keep in_asm, cpu, op, op_opt, op_ind, and out_asm as contiguous blocks. While it isn't possible to interleave e.g. in_asm or op_opt logs because of the TB lock protecting all code generation, it is possible to interleave cpu logs, or to interleave a cpu dump with an out_asm dump. For mingw32, we appear to have no viable solution for this. The locking functions are not properly exported from the system runtime library. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- cpu-exec.c| 2 ++ exec.c| 2 ++ include/qemu/log.h| 16 include/sysemu/os-posix.h | 12 include/sysemu/os-win32.h | 15 +++ target-alpha/translate.c | 2 ++ target-arm/translate-a64.c| 2 ++ target-arm/translate.c| 2 ++ target-cris/translate.c | 2 ++ target-i386/translate.c | 4 target-lm32/translate.c | 2 ++ target-m68k/translate.c | 2 ++ target-microblaze/translate.c | 2 ++ target-mips/translate.c | 2 ++ target-openrisc/translate.c | 2 ++ target-ppc/translate.c| 2 ++ target-s390x/translate.c | 2 ++ target-sh4/translate.c| 2 ++ target-sparc/translate.c | 2 ++ target-tilegx/translate.c | 6 +- target-tricore/translate.c| 2 ++ target-unicore32/translate.c | 2 ++ target-xtensa/translate.c | 2 ++ tcg/tcg.c | 8 translate-all.c | 2 ++ 25 files changed, 98 insertions(+), 1 deletion(-) diff --git a/cpu-exec.c b/cpu-exec.c index 3e40886..4188fed 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -150,11 +150,13 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb) #if defined(DEBUG_DISAS) if (qemu_loglevel_mask(CPU_LOG_TB_CPU) && qemu_log_in_addr_range(itb->pc)) { +qemu_log_lock(); #if defined(TARGET_I386) log_cpu_state(cpu, CPU_DUMP_CCOP); #else log_cpu_state(cpu, 0); #endif +qemu_log_unlock(); } #endif /* DEBUG_DISAS */ diff --git a/exec.c b/exec.c index 4d08581..b1094c0 100644 --- a/exec.c +++ b/exec.c @@ -911,11 +911,13 @@ void cpu_abort(CPUState *cpu, const char *fmt, ...) fprintf(stderr, "\n"); cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP); if (qemu_log_separate()) { +qemu_log_lock(); qemu_log("qemu: fatal: "); qemu_log_vprintf(fmt, ap2); qemu_log("\n"); log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP); qemu_log_flush(); +qemu_log_unlock(); qemu_log_close(); } va_end(ap2); diff --git a/include/qemu/log.h b/include/qemu/log.h index 00bf37f..a50e994 100644 --- a/include/qemu/log.h +++ b/include/qemu/log.h @@ -51,6 +51,22 @@ static inline bool qemu_loglevel_mask(int mask) return (qemu_loglevel & mask) != 0; } +/* Lock output for a series of related logs. Since this is not needed + * for a single qemu_log / qemu_log_mask / qemu_log_mask_and_addr, we + * assume that qemu_loglevel_mask has already been tested, and that + * qemu_loglevel is never set when qemu_logfile is unset. + */ + +static inline void qemu_log_lock(void) +{ +qemu_flockfile(qemu_logfile); +} + +static inline void qemu_log_unlock(void) +{ +qemu_funlockfile(qemu_logfile); +} + /* Logging functions: */ /* main logging function diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h index 3cfedbc..b0a6c06 100644 --- a/include/sysemu/os-posix.h +++ b/include/sysemu/os-posix.h @@ -87,4 +87,16 @@ void *qemu_alloc_stack(size_t *sz); */ void qemu_free_stack(void *stack, size_t sz); +/* POSIX and Mingw32 differ in the name of the stdio lock functions. */ + +static inline void qemu_flockfile(FILE *f) +{ +flockfile(f); +} + +static inline void qemu_funlockfile(FILE *f) +{ +funlockfile(f); +} + #endif diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h index 17aad3b..ff18b23 100644 --- a/include/sysemu/os-win32.h +++ b/include/sysemu/os-win32.h @@ -103,6 +103,21 @@ static inline char *realpath(const char *path, char *resolved_path) return resolved_path; } +/* ??? Mingw appears to export _lock_file and _unlock_file as the functions + * with which to lock a stdio handle. But something is wrong in the markup, + * either in the header or the library, such that we get undefined references + * to "_imp___lock_file" etc when linking. Since we seem to have no other + * alternative, and the usage within the logging functions isn't critical, + * ignore FILE locking. + */ + +static inline void qemu_flockfile(FILE *f) +{ +} + +static inline void qemu_funlockfile(FILE *f) +{ +} /* We wrap all the sockets functions so that we can * set errno based on WSAGetLastError() diff --git a/target-alpha/translate.c b/target-alpha/translate.c index 03e4776..114927b 100644 --- a/target-alpha/translate.
[Qemu-devel] [PULL 18/30] nbd: Less allocation during NBD_OPT_LIST
From: Eric Blake Since we know that the maximum name we are willing to accept is small enough to stack-allocate, rework the iteration over NBD_OPT_LIST responses to reuse a stack buffer rather than allocating every time. Furthermore, we don't even have to allocate if we know the server's length doesn't match what we are searching for. Signed-off-by: Eric Blake Message-Id: <1476469998-28592-12-git-send-email-ebl...@redhat.com> Signed-off-by: Paolo Bonzini --- nbd/client.c | 139 --- 1 file changed, 67 insertions(+), 72 deletions(-) diff --git a/nbd/client.c b/nbd/client.c index 5d94e34..0afb8be 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -254,19 +254,28 @@ static int nbd_handle_reply_err(QIOChannel *ioc, nbd_opt_reply *reply, return result; } -static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp) +/* Process another portion of the NBD_OPT_LIST reply. Set *@match if + * the current reply matches @want or if the server does not support + * NBD_OPT_LIST, otherwise leave @match alone. Return 0 if iteration + * is complete, positive if more replies are expected, or negative + * with @errp set if an unrecoverable error occurred. */ +static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match, +Error **errp) { nbd_opt_reply reply; uint32_t len; uint32_t namelen; +char name[NBD_MAX_NAME_SIZE + 1]; int error; -*name = NULL; if (nbd_receive_option_reply(ioc, NBD_OPT_LIST, &reply, errp) < 0) { return -1; } error = nbd_handle_reply_err(ioc, &reply, errp); if (error <= 0) { +/* The server did not support NBD_OPT_LIST, so set *match on + * the assumption that any name will be accepted. */ +*match = true; return error; } len = reply.length; @@ -277,105 +286,91 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp) nbd_send_opt_abort(ioc); return -1; } -} else if (reply.type == NBD_REP_SERVER) { -if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) { -error_setg(errp, "incorrect option length %" PRIu32, len); -nbd_send_opt_abort(ioc); -return -1; -} -if (read_sync(ioc, &namelen, sizeof(namelen)) != sizeof(namelen)) { -error_setg(errp, "failed to read option name length"); -nbd_send_opt_abort(ioc); -return -1; -} -namelen = be32_to_cpu(namelen); -len -= sizeof(namelen); -if (len < namelen) { -error_setg(errp, "incorrect option name length"); -nbd_send_opt_abort(ioc); -return -1; -} -if (namelen > NBD_MAX_NAME_SIZE) { -error_setg(errp, "export name length too long %" PRIu32, namelen); -nbd_send_opt_abort(ioc); -return -1; -} +return 0; +} else if (reply.type != NBD_REP_SERVER) { +error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x", + reply.type, NBD_REP_SERVER); +nbd_send_opt_abort(ioc); +return -1; +} -*name = g_new0(char, namelen + 1); -if (read_sync(ioc, *name, namelen) != namelen) { -error_setg(errp, "failed to read export name"); -g_free(*name); -*name = NULL; -nbd_send_opt_abort(ioc); -return -1; -} -(*name)[namelen] = '\0'; -len -= namelen; +if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) { +error_setg(errp, "incorrect option length %" PRIu32, len); +nbd_send_opt_abort(ioc); +return -1; +} +if (read_sync(ioc, &namelen, sizeof(namelen)) != sizeof(namelen)) { +error_setg(errp, "failed to read option name length"); +nbd_send_opt_abort(ioc); +return -1; +} +namelen = be32_to_cpu(namelen); +len -= sizeof(namelen); +if (len < namelen) { +error_setg(errp, "incorrect option name length"); +nbd_send_opt_abort(ioc); +return -1; +} +if (namelen != strlen(want)) { if (drop_sync(ioc, len) != len) { -error_setg(errp, "failed to read export description"); -g_free(*name); -*name = NULL; +error_setg(errp, "failed to skip export name with wrong length"); nbd_send_opt_abort(ioc); return -1; } -} else { -error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x", - reply.type, NBD_REP_SERVER); +return 1; +} + +assert(namelen < sizeof(name)); +if (read_sync(ioc, name, namelen) != namelen) { +error_setg(errp, "failed to read export name"); +nbd_send_opt_abort(ioc); +return -1; +} +name[namelen] = '\0'; +len -= namelen; +if (drop_sync(ioc, len) != len)
[Qemu-devel] [PATCH RFC] PCMachineState: introduce acpi_build_enabled field
Introduce this field to control whether ACPI build is enabled by a particular machine or accelerator. It defaults to true so that PC machine has ACPI build by default. Xen accelerator will disable it because Xen is in charge of building ACPI tables for the guest. Signed-off-by: Wei Liu --- Cc: Igor Mammedov Cc: Eduardo Habkost Cc: Anthony PERARD Cc: Stefano Stabellini Cc: Sander Eikelenboom Tested a backport version which only involves trivial code movement. It worked with both -m xenfv and -m pc,accel=xen. Sander, if you want the backported patch please let me know. --- hw/i386/acpi-build.c | 2 +- hw/i386/pc.c | 19 +++ include/hw/i386/pc.h | 3 +++ xen-common.c | 6 ++ 4 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 93be96f..a5cd2fd 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2954,7 +2954,7 @@ void acpi_setup(void) return; } -if (!pcmc->has_acpi_build) { +if (!pcmc->has_acpi_build || !pcms->acpi_build_enabled) { ACPI_BUILD_DPRINTF("ACPI build disabled. Bailing out.\n"); return; } diff --git a/hw/i386/pc.c b/hw/i386/pc.c index f56ea0f..3e7982f 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -2143,6 +2143,20 @@ static bool pc_machine_get_nvdimm(Object *obj, Error **errp) return pcms->acpi_nvdimm_state.is_enabled; } +static bool pc_machine_get_acpi_build(Object *obj, Error **errp) +{ +PCMachineState *pcms = PC_MACHINE(obj); + +return pcms->acpi_build_enabled; +} + +static void pc_machine_set_acpi_build(Object *obj, bool value, Error **errp) +{ +PCMachineState *pcms = PC_MACHINE(obj); + +pcms->acpi_build_enabled = value; +} + static void pc_machine_set_nvdimm(Object *obj, bool value, Error **errp) { PCMachineState *pcms = PC_MACHINE(obj); @@ -2159,6 +2173,8 @@ static void pc_machine_initfn(Object *obj) pcms->vmport = ON_OFF_AUTO_AUTO; /* nvdimm is disabled on default. */ pcms->acpi_nvdimm_state.is_enabled = false; +/* acpi build is enabled by default. */ +pcms->acpi_build_enabled = true; } static void pc_machine_reset(void) @@ -2319,6 +2335,9 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) object_class_property_add_bool(oc, PC_MACHINE_NVDIMM, pc_machine_get_nvdimm, pc_machine_set_nvdimm, &error_abort); + +object_class_property_add_bool(oc, PC_MACHINE_ACPI_BUILD, +pc_machine_get_acpi_build, pc_machine_set_acpi_build, &error_abort); } static const TypeInfo pc_machine_info = { diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 17fff80..ec8cd0c 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -63,6 +63,8 @@ struct PCMachineState { AcpiNVDIMMState acpi_nvdimm_state; +bool acpi_build_enabled; + /* RAM information (sizes, addresses, configuration): */ ram_addr_t below_4g_mem_size, above_4g_mem_size; @@ -87,6 +89,7 @@ struct PCMachineState { #define PC_MACHINE_VMPORT "vmport" #define PC_MACHINE_SMM "smm" #define PC_MACHINE_NVDIMM "nvdimm" +#define PC_MACHINE_ACPI_BUILD "acpi-build" /** * PCMachineClass: diff --git a/xen-common.c b/xen-common.c index e641ad1..b1858d7 100644 --- a/xen-common.c +++ b/xen-common.c @@ -9,6 +9,7 @@ */ #include "qemu/osdep.h" +#include "hw/i386/pc.h" #include "hw/xen/xen_backend.h" #include "qmp-commands.h" #include "sysemu/char.h" @@ -114,6 +115,11 @@ static void xen_change_state_handler(void *opaque, int running, static int xen_init(MachineState *ms) { +PCMachineState *pcms = PC_MACHINE(ms); + +/* Disable ACPI build because Xen handles it */ +pcms->acpi_build_enabled = false; + xen_xc = xc_interface_open(0, 0, 0); if (xen_xc == NULL) { xen_be_printf(NULL, 0, "can't open xen interface\n"); -- 2.1.4
Re: [Qemu-devel] [PATCH RFC] PCMachineState: introduce acpi_build_enabled field
On Tue, Nov 01, 2016 at 02:59:25PM -0200, Eduardo Habkost wrote: > On Tue, Nov 01, 2016 at 04:53:17PM +, Wei Liu wrote: > > On Tue, Nov 01, 2016 at 02:48:27PM -0200, Eduardo Habkost wrote: > > [...] > > > > static void pc_machine_set_nvdimm(Object *obj, bool value, Error > > > > **errp) > > > > { > > > > PCMachineState *pcms = PC_MACHINE(obj); > > > > @@ -2159,6 +2173,8 @@ static void pc_machine_initfn(Object *obj) > > > > pcms->vmport = ON_OFF_AUTO_AUTO; > > > > /* nvdimm is disabled on default. */ > > > > pcms->acpi_nvdimm_state.is_enabled = false; > > > > +/* acpi build is enabled by default. */ > > > > +pcms->acpi_build_enabled = true; > > > > > > If you set: > > > pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build; > > > the default value will be more consistent with the actual results > > > (where pc-1.6 and older don't have ACPI build). Then you would > > > probably be able to remove the pcmc->has_acpi_build check from > > > acpi_setup() and only check pcms->acpi_build_enabled. > > > > > > > Thank you for your good advice. > > > > I take it that you're ok with the name of the field and the code in > > general? If so I will drop RFC tag in my next submission. > > The rest looks good to me, except that I don't see a reason to > add a "acpi-build" property if it's not used for anything (all > code is using the struct field directly). > OK. I'm fine with deleting that. I was just following existing examples. Wei. > -- > Eduardo
[Qemu-devel] [PULL 30/30] main-loop: Suppress I/O thread warning under qtest
From: Max Reitz We do not want to display the "I/O thread spun" warning for test cases that run under qtest. The first attempt for this (commit 01c22f2cdd4fcf02276ea10f48253850a5fd7259) tested whether qtest_enabled() was true. Commit 21a24302e85024dd7b2a151158adbc1f5dc5c4dd correctly recognized that just testing qtest_enabled() is not sufficient since there are some tests that do not use the qtest accelerator but just the qtest character device, and thus replaced qtest_enabled() by qtest_driver(). However, there are also some tests that only use the qtest accelerator and not the qtest chardev; perhaps most notably the bash iotests. Therefore, we have to check both qtest_enabled() and qtest_driver(). Signed-off-by: Max Reitz Message-Id: <20161017180939.27912-1-mre...@redhat.com> Signed-off-by: Paolo Bonzini --- main-loop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main-loop.c b/main-loop.c index 66c4eb6..ad10bca 100644 --- a/main-loop.c +++ b/main-loop.c @@ -234,7 +234,7 @@ static int os_host_main_loop_wait(int64_t timeout) if (!timeout && (spin_counter > MAX_MAIN_LOOP_SPIN)) { static bool notified; -if (!notified && !qtest_driver()) { +if (!notified && !qtest_enabled() && !qtest_driver()) { fprintf(stderr, "main-loop: WARNING: I/O thread spun for %d iterations\n", MAX_MAIN_LOOP_SPIN); -- 2.7.4
[Qemu-devel] [PULL 15/30] nbd: Share common option-sending code in client
From: Eric Blake Rather than open-coding each option request, it's easier to have common helper functions do the work. That in turn requires having convenient packed types for handling option requests and replies. Signed-off-by: Eric Blake Message-Id: <1476469998-28592-9-git-send-email-ebl...@redhat.com> Signed-off-by: Paolo Bonzini --- include/block/nbd.h | 25 +- nbd/client.c| 255 ++-- nbd/nbd-internal.h | 2 +- 3 files changed, 131 insertions(+), 151 deletions(-) diff --git a/include/block/nbd.h b/include/block/nbd.h index a33581b6..b69bf1d 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -26,15 +26,34 @@ #include "io/channel-socket.h" #include "crypto/tlscreds.h" -/* Note: these are _NOT_ the same as the network representation of an NBD +/* Handshake phase structs - this struct is passed on the wire */ + +struct nbd_option { +uint64_t magic; /* NBD_OPTS_MAGIC */ +uint32_t option; /* NBD_OPT_* */ +uint32_t length; +} QEMU_PACKED; +typedef struct nbd_option nbd_option; + +struct nbd_opt_reply { +uint64_t magic; /* NBD_REP_MAGIC */ +uint32_t option; /* NBD_OPT_* */ +uint32_t type; /* NBD_REP_* */ +uint32_t length; +} QEMU_PACKED; +typedef struct nbd_opt_reply nbd_opt_reply; + +/* Transmission phase structs + * + * Note: these are _NOT_ the same as the network representation of an NBD * request and reply! */ struct NBDRequest { uint64_t handle; uint64_t from; uint32_t len; -uint16_t flags; -uint16_t type; +uint16_t flags; /* NBD_CMD_FLAG_* */ +uint16_t type; /* NBD_CMD_* */ }; typedef struct NBDRequest NBDRequest; diff --git a/nbd/client.c b/nbd/client.c index fe24d31..e78000a 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -75,64 +75,128 @@ static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); */ +/* Send an option request. + * + * The request is for option @opt, with @data containing @len bytes of + * additional payload for the request (@len may be -1 to treat @data as + * a C string; and @data may be NULL if @len is 0). + * Return 0 if successful, -1 with errp set if it is impossible to + * continue. */ +static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt, + uint32_t len, const char *data, + Error **errp) +{ +nbd_option req; +QEMU_BUILD_BUG_ON(sizeof(req) != 16); + +if (len == -1) { +req.length = len = strlen(data); +} +TRACE("Sending option request %" PRIu32", len %" PRIu32, opt, len); + +stq_be_p(&req.magic, NBD_OPTS_MAGIC); +stl_be_p(&req.option, opt); +stl_be_p(&req.length, len); + +if (write_sync(ioc, &req, sizeof(req)) != sizeof(req)) { +error_setg(errp, "Failed to send option request header"); +return -1; +} + +if (len && write_sync(ioc, (char *) data, len) != len) { +error_setg(errp, "Failed to send option request data"); +return -1; +} + +return 0; +} + +/* Receive the header of an option reply, which should match the given + * opt. Read through the length field, but NOT the length bytes of + * payload. Return 0 if successful, -1 with errp set if it is + * impossible to continue. */ +static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt, +nbd_opt_reply *reply, Error **errp) +{ +QEMU_BUILD_BUG_ON(sizeof(*reply) != 20); +if (read_sync(ioc, reply, sizeof(*reply)) != sizeof(*reply)) { +error_setg(errp, "failed to read option reply"); +return -1; +} +be64_to_cpus(&reply->magic); +be32_to_cpus(&reply->option); +be32_to_cpus(&reply->type); +be32_to_cpus(&reply->length); + +TRACE("Received option reply %" PRIx32", type %" PRIx32", len %" PRIu32, + reply->option, reply->type, reply->length); -/* If type represents success, return 1 without further action. - * If type represents an error reply, consume the rest of the packet on ioc. - * Then return 0 for unsupported (so the client can fall back to - * other approaches), or -1 with errp set for other errors. +if (reply->magic != NBD_REP_MAGIC) { +error_setg(errp, "Unexpected option reply magic"); +return -1; +} +if (reply->option != opt) { +error_setg(errp, "Unexpected option type %x expected %x", + reply->option, opt); +return -1; +} +return 0; +} + +/* If reply represents success, return 1 without further action. + * If reply represents an error, consume the optional payload of + * the packet on ioc. Then return 0 for unsupported (so the client + * can fall back to other approaches), or -1 with errp set for other + * errors. */ -static int nbd_handle_reply_err(QIOChannel *ioc, uint32_t opt, uint32_t type, +static int nbd_handle_reply_err(QIOChannel *ioc, nbd_opt_reply *reply, Error **errp)
Re: [Qemu-devel] [PATCH RFC] PCMachineState: introduce acpi_build_enabled field
On Tue, Nov 01, 2016 at 04:53:17PM +, Wei Liu wrote: > On Tue, Nov 01, 2016 at 02:48:27PM -0200, Eduardo Habkost wrote: > [...] > > > static void pc_machine_set_nvdimm(Object *obj, bool value, Error **errp) > > > { > > > PCMachineState *pcms = PC_MACHINE(obj); > > > @@ -2159,6 +2173,8 @@ static void pc_machine_initfn(Object *obj) > > > pcms->vmport = ON_OFF_AUTO_AUTO; > > > /* nvdimm is disabled on default. */ > > > pcms->acpi_nvdimm_state.is_enabled = false; > > > +/* acpi build is enabled by default. */ > > > +pcms->acpi_build_enabled = true; > > > > If you set: > > pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build; > > the default value will be more consistent with the actual results > > (where pc-1.6 and older don't have ACPI build). Then you would > > probably be able to remove the pcmc->has_acpi_build check from > > acpi_setup() and only check pcms->acpi_build_enabled. > > > > Thank you for your good advice. > > I take it that you're ok with the name of the field and the code in > general? If so I will drop RFC tag in my next submission. The rest looks good to me, except that I don't see a reason to add a "acpi-build" property if it's not used for anything (all code is using the struct field directly). -- Eduardo
[Qemu-devel] [PULL 28/30] vl: exit qemu on guest panic if -no-shutdown is not set
From: Christian Borntraeger For automated testing purposes it can be helpful to exit qemu (poweroff) when the guest panics. Make this the default unless -no-shutdown is specified. For internal-errors like errors from KVM_RUN the behaviour is not changed, in other words QEMU does not exit to allow debugging in the QEMU monitor. Signed-off-by: Christian Borntraeger Message-Id: <1476775794-108012-1-git-send-email-borntrae...@de.ibm.com> Reviewed-by: Eric Blake Signed-off-by: Paolo Bonzini --- qapi-schema.json | 4 ++-- vl.c | 5 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/qapi-schema.json b/qapi-schema.json index 5dc96af..b0b4bf6 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -4621,10 +4621,10 @@ # # @pause: system pauses # -# Since: 2.1 +# Since: 2.1 (poweroff since 2.8) ## { 'enum': 'GuestPanicAction', - 'data': [ 'pause' ] } + 'data': [ 'pause', 'poweroff' ] } ## # @rtc-reset-reinjection diff --git a/vl.c b/vl.c index 368510f..319f641 100644 --- a/vl.c +++ b/vl.c @@ -1792,6 +1792,11 @@ void qemu_system_guest_panicked(void) } qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE, &error_abort); vm_stop(RUN_STATE_GUEST_PANICKED); +if (!no_shutdown) { +qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_POWEROFF, + &error_abort); +qemu_system_shutdown_request(); +} } void qemu_system_reset_request(void) -- 2.7.4
[Qemu-devel] [PULL 14/30] nbd: Send message along with server NBD_REP_ERR errors
From: Eric Blake The NBD Protocol allows us to send human-readable messages along with any NBD_REP_ERR error during option negotiation; make use of this fact for clients that know what to do with our message. Signed-off-by: Eric Blake Message-Id: <1476469998-28592-8-git-send-email-ebl...@redhat.com> Signed-off-by: Paolo Bonzini --- nbd/server.c | 78 +--- 1 file changed, 59 insertions(+), 19 deletions(-) diff --git a/nbd/server.c b/nbd/server.c index 0f0c68c..fa01e49 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -236,6 +236,38 @@ static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt) return nbd_negotiate_send_rep_len(ioc, type, opt, 0); } +/* Send an error reply. + * Return -errno on error, 0 on success. */ +static int GCC_FMT_ATTR(4, 5) +nbd_negotiate_send_rep_err(QIOChannel *ioc, uint32_t type, + uint32_t opt, const char *fmt, ...) +{ +va_list va; +char *msg; +int ret; +size_t len; + +va_start(va, fmt); +msg = g_strdup_vprintf(fmt, va); +va_end(va); +len = strlen(msg); +assert(len < 4096); +TRACE("sending error message \"%s\"", msg); +ret = nbd_negotiate_send_rep_len(ioc, type, opt, len); +if (ret < 0) { +goto out; +} +if (nbd_negotiate_write(ioc, msg, len) != len) { +LOG("write failed (error message)"); +ret = -EIO; +} else { +ret = 0; +} +out: +g_free(msg); +return ret; +} + /* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload. * Return -errno on error, 0 on success. */ static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp) @@ -281,8 +313,9 @@ static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length) if (nbd_negotiate_drop_sync(client->ioc, length) != length) { return -EIO; } -return nbd_negotiate_send_rep(client->ioc, - NBD_REP_ERR_INVALID, NBD_OPT_LIST); +return nbd_negotiate_send_rep_err(client->ioc, + NBD_REP_ERR_INVALID, NBD_OPT_LIST, + "OPT_LIST should not have length"); } /* For each export, send a NBD_REP_SERVER reply. */ @@ -329,7 +362,8 @@ fail: return rc; } - +/* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the + * new channel for all further (now-encrypted) communication. */ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, uint32_t length) { @@ -343,7 +377,8 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, if (nbd_negotiate_drop_sync(ioc, length) != length) { return NULL; } -nbd_negotiate_send_rep(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS); +nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS, + "OPT_STARTTLS should not have length"); return NULL; } @@ -474,13 +509,15 @@ static int nbd_negotiate_options(NBDClient *client) return -EINVAL; default: -TRACE("Option 0x%" PRIx32 " not permitted before TLS", - clientflags); if (nbd_negotiate_drop_sync(client->ioc, length) != length) { return -EIO; } -ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_TLS_REQD, - clientflags); +ret = nbd_negotiate_send_rep_err(client->ioc, + NBD_REP_ERR_TLS_REQD, + clientflags, + "Option 0x%" PRIx32 + "not permitted before TLS", + clientflags); if (ret < 0) { return ret; } @@ -506,27 +543,30 @@ static int nbd_negotiate_options(NBDClient *client) return -EIO; } if (client->tlscreds) { -TRACE("TLS already enabled"); -ret = nbd_negotiate_send_rep(client->ioc, - NBD_REP_ERR_INVALID, - clientflags); +ret = nbd_negotiate_send_rep_err(client->ioc, + NBD_REP_ERR_INVALID, + clientflags, + "TLS already enabled"); } else { -TRACE("TLS not configured"); -ret = nbd_negotiate_send_rep(client->ioc, -
Re: [Qemu-devel] [RFC PATCH v3 00/18] x86: Secure Encrypted Virtualization (AMD)
On 1 November 2016 at 16:22, wrote: > Hi, > > Your series seems to have some coding style problems. See output below for > more information: Fam, it looks like patchew's test script is assuming git arguments that don't exist on the git version it's trying to use: > === TEST SCRIPT BEGIN === > #!/bin/bash > > BASE=base > n=1 > total=$(git log --oneline $BASE.. | wc -l) > failed=0 > > # Useful git options > git config --local diff.renamelimit 0 > git config --local diff.renames True > > commits="$(git log --format=%H --reverse $BASE..)" > for c in $commits; do > echo "Checking PATCH $n/$total: $(git show --no-patch --format=%s $c)..." > if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; > then > failed=1 > echo > fi > n=$((n+1)) > done > > exit $failed > === TEST SCRIPT END === > === OUTPUT BEGIN === > fatal: unrecognized argument: --no-patch > Checking PATCH 1/18: ... > fatal: unrecognized argument: --no-patch > Checking PATCH 2/18: ... > fatal: unrecognized argument: --no-patch [etc] thanks -- PMM
[Qemu-devel] [PULL 22/30] nbd: Implement NBD_CMD_WRITE_ZEROES on server
From: Eric Blake Upstream NBD protocol recently added the ability to efficiently write zeroes without having to send the zeroes over the wire, along with a flag to control whether the client wants to allow a hole. Note that when it comes to requiring full allocation, vs. permitting optimizations, the NBD spec intentionally picked a different sense for the flag; the rules in qemu are: MAY_UNMAP == 0: must write zeroes MAY_UNMAP == 1: may use holes if reads will see zeroes while in NBD, the rules are: FLAG_NO_HOLE == 1: must write zeroes FLAG_NO_HOLE == 0: may use holes if reads will see zeroes In all cases, the 'may use holes' scenario is optional (the server need not use a hole, and must not use a hole if subsequent reads would not see zeroes). Signed-off-by: Eric Blake Message-Id: <1476469998-28592-16-git-send-email-ebl...@redhat.com> Signed-off-by: Paolo Bonzini --- include/block/nbd.h | 8 ++-- nbd/server.c| 42 -- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/include/block/nbd.h b/include/block/nbd.h index eea7ef0..3e373f0 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -71,6 +71,7 @@ typedef struct NBDReply NBDReply; #define NBD_FLAG_SEND_FUA (1 << 3)/* Send FUA (Force Unit Access) */ #define NBD_FLAG_ROTATIONAL (1 << 4)/* Use elevator algorithm - rotational media */ #define NBD_FLAG_SEND_TRIM (1 << 5)/* Send TRIM (discard) */ +#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */ /* New-style handshake (global) flags, sent from server to client, and control what will happen during handshake phase. */ @@ -96,7 +97,8 @@ typedef struct NBDReply NBDReply; #define NBD_REP_ERR_SHUTDOWNNBD_REP_ERR(7) /* Server shutting down */ /* Request flags, sent from client to server during transmission phase */ -#define NBD_CMD_FLAG_FUA(1 << 0) +#define NBD_CMD_FLAG_FUA(1 << 0) /* 'force unit access' during write */ +#define NBD_CMD_FLAG_NO_HOLE(1 << 1) /* don't punch hole on zero run */ /* Supported request types */ enum { @@ -104,7 +106,9 @@ enum { NBD_CMD_WRITE = 1, NBD_CMD_DISC = 2, NBD_CMD_FLUSH = 3, -NBD_CMD_TRIM = 4 +NBD_CMD_TRIM = 4, +/* 5 reserved for failed experiment NBD_CMD_CACHE */ +NBD_CMD_WRITE_ZEROES = 6, }; #define NBD_DEFAULT_PORT 10809 diff --git a/nbd/server.c b/nbd/server.c index 0b50caa..5b76261 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -616,7 +616,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) char buf[8 + 8 + 8 + 128]; int rc; const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | - NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA); + NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA | + NBD_FLAG_SEND_WRITE_ZEROES); bool oldStyle; size_t len; @@ -1146,11 +1147,17 @@ static ssize_t nbd_co_receive_request(NBDRequestData *req, rc = request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL; goto out; } -if (request->flags & ~NBD_CMD_FLAG_FUA) { +if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) { LOG("unsupported flags (got 0x%x)", request->flags); rc = -EINVAL; goto out; } +if (request->type != NBD_CMD_WRITE_ZEROES && +(request->flags & NBD_CMD_FLAG_NO_HOLE)) { +LOG("unexpected flags (got 0x%x)", request->flags); +rc = -EINVAL; +goto out; +} rc = 0; @@ -1255,6 +1262,37 @@ static void nbd_trip(void *opaque) } break; +case NBD_CMD_WRITE_ZEROES: +TRACE("Request type is WRITE_ZEROES"); + +if (exp->nbdflags & NBD_FLAG_READ_ONLY) { +TRACE("Server is read-only, return error"); +reply.error = EROFS; +goto error_reply; +} + +TRACE("Writing to device"); + +flags = 0; +if (request.flags & NBD_CMD_FLAG_FUA) { +flags |= BDRV_REQ_FUA; +} +if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) { +flags |= BDRV_REQ_MAY_UNMAP; +} +ret = blk_pwrite_zeroes(exp->blk, request.from + exp->dev_offset, +request.len, flags); +if (ret < 0) { +LOG("writing to file failed"); +reply.error = -ret; +goto error_reply; +} + +if (nbd_co_send_reply(req, &reply, 0) < 0) { +goto out; +} +break; + case NBD_CMD_DISC: /* unreachable, thanks to special case in nbd_co_receive_request() */ abort(); -- 2.7.4
[Qemu-devel] [PULL 11/30] nbd: Rename NbdClientSession to NBDClientSession
From: Eric Blake It's better to use consistent capitalization of the namespace used for NBD functions; we have more instances of NBD* than Nbd*. Signed-off-by: Eric Blake Message-Id: <1476469998-28592-5-git-send-email-ebl...@redhat.com> Signed-off-by: Paolo Bonzini --- block/nbd-client.c | 26 +- block/nbd-client.h | 6 +++--- block/nbd.c| 4 ++-- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/block/nbd-client.c b/block/nbd-client.c index 8ca5030..e6fe603 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -33,7 +33,7 @@ #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs)) #define INDEX_TO_HANDLE(bs, index) ((index) ^ ((uint64_t)(intptr_t)bs)) -static void nbd_recv_coroutines_enter_all(NbdClientSession *s) +static void nbd_recv_coroutines_enter_all(NBDClientSession *s) { int i; @@ -46,7 +46,7 @@ static void nbd_recv_coroutines_enter_all(NbdClientSession *s) static void nbd_teardown_connection(BlockDriverState *bs) { -NbdClientSession *client = nbd_get_client_session(bs); +NBDClientSession *client = nbd_get_client_session(bs); if (!client->ioc) { /* Already closed */ return; @@ -68,7 +68,7 @@ static void nbd_teardown_connection(BlockDriverState *bs) static void nbd_reply_ready(void *opaque) { BlockDriverState *bs = opaque; -NbdClientSession *s = nbd_get_client_session(bs); +NBDClientSession *s = nbd_get_client_session(bs); uint64_t i; int ret; @@ -119,7 +119,7 @@ static int nbd_co_send_request(BlockDriverState *bs, struct nbd_request *request, QEMUIOVector *qiov) { -NbdClientSession *s = nbd_get_client_session(bs); +NBDClientSession *s = nbd_get_client_session(bs); AioContext *aio_context; int rc, ret, i; @@ -167,7 +167,7 @@ static int nbd_co_send_request(BlockDriverState *bs, return rc; } -static void nbd_co_receive_reply(NbdClientSession *s, +static void nbd_co_receive_reply(NBDClientSession *s, struct nbd_request *request, struct nbd_reply *reply, QEMUIOVector *qiov) @@ -195,7 +195,7 @@ static void nbd_co_receive_reply(NbdClientSession *s, } } -static void nbd_coroutine_start(NbdClientSession *s, +static void nbd_coroutine_start(NBDClientSession *s, struct nbd_request *request) { /* Poor man semaphore. The free_sema is locked when no other request @@ -209,7 +209,7 @@ static void nbd_coroutine_start(NbdClientSession *s, /* s->recv_coroutine[i] is set as soon as we get the send_lock. */ } -static void nbd_coroutine_end(NbdClientSession *s, +static void nbd_coroutine_end(NBDClientSession *s, struct nbd_request *request) { int i = HANDLE_TO_INDEX(s, request->handle); @@ -222,7 +222,7 @@ static void nbd_coroutine_end(NbdClientSession *s, int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { -NbdClientSession *client = nbd_get_client_session(bs); +NBDClientSession *client = nbd_get_client_session(bs); struct nbd_request request = { .type = NBD_CMD_READ, .from = offset, @@ -248,7 +248,7 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset, int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { -NbdClientSession *client = nbd_get_client_session(bs); +NBDClientSession *client = nbd_get_client_session(bs); struct nbd_request request = { .type = NBD_CMD_WRITE, .from = offset, @@ -277,7 +277,7 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, int nbd_client_co_flush(BlockDriverState *bs) { -NbdClientSession *client = nbd_get_client_session(bs); +NBDClientSession *client = nbd_get_client_session(bs); struct nbd_request request = { .type = NBD_CMD_FLUSH }; struct nbd_reply reply; ssize_t ret; @@ -302,7 +302,7 @@ int nbd_client_co_flush(BlockDriverState *bs) int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count) { -NbdClientSession *client = nbd_get_client_session(bs); +NBDClientSession *client = nbd_get_client_session(bs); struct nbd_request request = { .type = NBD_CMD_TRIM, .from = offset, @@ -343,7 +343,7 @@ void nbd_client_attach_aio_context(BlockDriverState *bs, void nbd_client_close(BlockDriverState *bs) { -NbdClientSession *client = nbd_get_client_session(bs); +NBDClientSession *client = nbd_get_client_session(bs); struct nbd_request request = { .type = NBD_CMD_DISC }; if (client->ioc == NULL) { @@ -362,7 +362,7 @@ int nbd_client_init(BlockDriverState *bs, const char *hostname, Error **errp) { -NbdClientSession *c
Re: [Qemu-devel] [PATCH RFC] PCMachineState: introduce acpi_build_enabled field
On Tue, Nov 01, 2016 at 02:48:27PM -0200, Eduardo Habkost wrote: [...] > > static void pc_machine_set_nvdimm(Object *obj, bool value, Error **errp) > > { > > PCMachineState *pcms = PC_MACHINE(obj); > > @@ -2159,6 +2173,8 @@ static void pc_machine_initfn(Object *obj) > > pcms->vmport = ON_OFF_AUTO_AUTO; > > /* nvdimm is disabled on default. */ > > pcms->acpi_nvdimm_state.is_enabled = false; > > +/* acpi build is enabled by default. */ > > +pcms->acpi_build_enabled = true; > > If you set: > pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build; > the default value will be more consistent with the actual results > (where pc-1.6 and older don't have ACPI build). Then you would > probably be able to remove the pcmc->has_acpi_build check from > acpi_setup() and only check pcms->acpi_build_enabled. > Thank you for your good advice. I take it that you're ok with the name of the field and the code in general? If so I will drop RFC tag in my next submission. Wei.
[Qemu-devel] [PULL 26/30] x86: add AVX512_4VNNIW and AVX512_4FMAPS features
From: Luwei Kang The spec can be found in Intel Software Developer Manual or in Instruction Set Extensions Programming Reference. Signed-off-by: Piotr Luc Signed-off-by: Luwei Kang Message-Id: <1477902446-5932-1-git-send-email-he.c...@linux.intel.com> Signed-off-by: Paolo Bonzini --- target-i386/cpu.c | 19 ++- target-i386/cpu.h | 4 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 0f8a8fb..14c5186 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -239,6 +239,7 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM, CPUID_7_0_EBX_RDSEED */ #define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE) +#define TCG_7_0_EDX_FEATURES 0 #define TCG_APM_FEATURES 0 #define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT #define TCG_XSAVE_FEATURES (CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1) @@ -444,6 +445,22 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .cpuid_reg = R_ECX, .tcg_features = TCG_7_0_ECX_FEATURES, }, +[FEAT_7_0_EDX] = { +.feat_names = { +NULL, NULL, "avx512-4vnniw", "avx512-4fmaps", +NULL, NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +}, +.cpuid_eax = 7, +.cpuid_needs_ecx = true, .cpuid_ecx = 0, +.cpuid_reg = R_EDX, +.tcg_features = TCG_7_0_EDX_FEATURES, +}, [FEAT_8000_0007_EDX] = { .feat_names = { NULL, NULL, NULL, NULL, @@ -2560,7 +2577,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, if ((*ecx & CPUID_7_0_ECX_PKU) && env->cr[4] & CR4_PKE_MASK) { *ecx |= CPUID_7_0_ECX_OSPKE; } -*edx = 0; /* Reserved */ +*edx = env->features[FEAT_7_0_EDX]; /* Feature flags */ } else { *eax = 0; *ebx = 0; diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 6303d65..c605724 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -443,6 +443,7 @@ typedef enum FeatureWord { FEAT_1_ECX, /* CPUID[1].ECX */ FEAT_7_0_EBX, /* CPUID[EAX=7,ECX=0].EBX */ FEAT_7_0_ECX, /* CPUID[EAX=7,ECX=0].ECX */ +FEAT_7_0_EDX, /* CPUID[EAX=7,ECX=0].EDX */ FEAT_8000_0001_EDX, /* CPUID[8000_0001].EDX */ FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */ FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */ @@ -629,6 +630,9 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_0_ECX_OSPKE(1U << 4) #define CPUID_7_0_ECX_RDPID(1U << 22) +#define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network Instructions */ +#define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) /* AVX512 Multiply Accumulation Single Precision */ + #define CPUID_XSAVE_XSAVEOPT (1U << 0) #define CPUID_XSAVE_XSAVEC (1U << 1) #define CPUID_XSAVE_XGETBV1(1U << 2) -- 2.7.4
[Qemu-devel] [PULL 05/30] exec.c: ensure all AddressSpaceDispatch updates under RCU
From: Alex Bennée The memory_dispatch field is meant to be protected by RCU so we should use the correct primitives when accessing it. This race was flagged up by the ThreadSanitizer. Signed-off-by: Alex Bennée Message-Id: <20161021153418.21571-1-alex.ben...@linaro.org> Signed-off-by: Paolo Bonzini --- exec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exec.c b/exec.c index 4d08581..a19ed21 100644 --- a/exec.c +++ b/exec.c @@ -493,7 +493,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr, hwaddr *xlat, hwaddr *plen) { MemoryRegionSection *section; -AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch; +AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch); section = address_space_translate_internal(d, addr, xlat, plen, false); @@ -2376,7 +2376,7 @@ static void tcg_commit(MemoryListener *listener) * may have split the RCU critical section. */ d = atomic_rcu_read(&cpuas->as->dispatch); -cpuas->memory_dispatch = d; +atomic_rcu_set(&cpuas->memory_dispatch, d); tlb_flush(cpuas->cpu, 1); } -- 2.7.4
[Qemu-devel] [PULL 21/30] nbd: Improve server handling of shutdown requests
From: Eric Blake NBD commit 6d34500b clarified how clients and servers are supposed to behave before closing a connection. It added NBD_REP_ERR_SHUTDOWN (for the server to announce it is about to go away during option haggling, so the client should quit sending NBD_OPT_* other than NBD_OPT_ABORT) and ESHUTDOWN (for the server to announce it is about to go away during transmission, so the client should quit sending NBD_CMD_* other than NBD_CMD_DISC). It also clarified that NBD_OPT_ABORT gets a reply, while NBD_CMD_DISC does not. This patch merely adds the missing reply to NBD_OPT_ABORT and teaches the client to recognize server errors. Actually teaching the server to send NBD_REP_ERR_SHUTDOWN or ESHUTDOWN would require knowing that the server has been requested to shut down soon (maybe we could do that by installing a SIGINT handler in qemu-nbd, which transitions from RUNNING to a new state that waits for the client to react, rather than just out-right quitting - but that's a bigger task for another day). Signed-off-by: Eric Blake Message-Id: <1476469998-28592-15-git-send-email-ebl...@redhat.com> [Move dummy ESHUTDOWN to include/qemu/osdep.h. - Paolo] Signed-off-by: Paolo Bonzini --- include/block/nbd.h | 13 + include/qemu/osdep.h | 3 +++ nbd/client.c | 18 ++ nbd/nbd-internal.h | 1 + nbd/server.c | 10 ++ 5 files changed, 41 insertions(+), 4 deletions(-) diff --git a/include/block/nbd.h b/include/block/nbd.h index d326308..eea7ef0 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -83,12 +83,17 @@ typedef struct NBDReply NBDReply; #define NBD_FLAG_C_NO_ZEROES (1 << 1) /* End handshake without zeroes. */ /* Reply types. */ +#define NBD_REP_ERR(value) ((UINT32_C(1) << 31) | (value)) + #define NBD_REP_ACK (1) /* Data sending finished. */ #define NBD_REP_SERVER (2) /* Export description. */ -#define NBD_REP_ERR_UNSUP ((UINT32_C(1) << 31) | 1) /* Unknown option. */ -#define NBD_REP_ERR_POLICY ((UINT32_C(1) << 31) | 2) /* Server denied */ -#define NBD_REP_ERR_INVALID ((UINT32_C(1) << 31) | 3) /* Invalid length. */ -#define NBD_REP_ERR_TLS_REQD((UINT32_C(1) << 31) | 5) /* TLS required */ + +#define NBD_REP_ERR_UNSUP NBD_REP_ERR(1) /* Unknown option */ +#define NBD_REP_ERR_POLICY NBD_REP_ERR(2) /* Server denied */ +#define NBD_REP_ERR_INVALID NBD_REP_ERR(3) /* Invalid length */ +#define NBD_REP_ERR_PLATFORMNBD_REP_ERR(4) /* Not compiled in */ +#define NBD_REP_ERR_TLS_REQDNBD_REP_ERR(5) /* TLS required */ +#define NBD_REP_ERR_SHUTDOWNNBD_REP_ERR(7) /* Server shutting down */ /* Request flags, sent from client to server during transmission phase */ #define NBD_CMD_FLAG_FUA(1 << 0) diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 0e3c330..689f253 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -128,6 +128,9 @@ extern int daemon(int, int); #if !defined(EMEDIUMTYPE) #define EMEDIUMTYPE 4098 #endif +#if !defined(ESHUTDOWN) +#define ESHUTDOWN 4099 +#endif #ifndef TIME_MAX #define TIME_MAX LONG_MAX #endif diff --git a/nbd/client.c b/nbd/client.c index 7bdce53..7db4301 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -40,6 +40,9 @@ static int nbd_errno_to_system_errno(int err) case NBD_ENOSPC: ret = ENOSPC; break; +case NBD_ESHUTDOWN: +ret = ESHUTDOWN; +break; default: TRACE("Squashing unexpected error %d to EINVAL", err); /* fallthrough */ @@ -239,11 +242,21 @@ static int nbd_handle_reply_err(QIOChannel *ioc, nbd_opt_reply *reply, reply->option); break; +case NBD_REP_ERR_PLATFORM: +error_setg(errp, "Server lacks support for option %" PRIx32, + reply->option); +break; + case NBD_REP_ERR_TLS_REQD: error_setg(errp, "TLS negotiation required before option %" PRIx32, reply->option); break; +case NBD_REP_ERR_SHUTDOWN: +error_setg(errp, "Server shutting down before option %" PRIx32, + reply->option); +break; + default: error_setg(errp, "Unknown error code when asking for option %" PRIx32, reply->option); @@ -785,6 +798,11 @@ ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply) reply->error = nbd_errno_to_system_errno(reply->error); +if (reply->error == ESHUTDOWN) { +/* This works even on mingw which lacks a native ESHUTDOWN */ +LOG("server shutting down"); +return -EINVAL; +} TRACE("Got reply: { magic = 0x%" PRIx32 ", .error = % " PRId32 ", handle = %" PRIu64" }", magic, reply->error, reply->handle); diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h index dd57e18..eee20ab 100644 --- a/nbd/nbd-internal.h +++ b/nbd/nbd-internal.h @@ -92,6 +92,7 @@ #define NBD_ENOMEM 12
Re: [Qemu-devel] [PATCH v5 28/33] cputlb: make tlb_flush_by_mmuidx safe for MTTCG
Pranith Kumar writes: > On Tue, Nov 1, 2016 at 3:45 AM, Alex Bennée wrote: > >> >> Odd. I'll look into it. What was you configure string and host architecture? >> > > It's a plain configure string, nothing special: > > $ ../configure --target-list=aarch64-softmmu > > But I did rebase your patches on master, May be something new in the > tree tripped this? Yeah, I'll look for a fix with dynamic page sizes when I re-base. -- Alex Bennée
[Qemu-devel] [PULL 23/30] nbd: Implement NBD_CMD_WRITE_ZEROES on client
From: Eric Blake Upstream NBD protocol recently added the ability to efficiently write zeroes without having to send the zeroes over the wire, along with a flag to control whether the client wants a hole. The generic block code takes care of falling back to the obvious write of lots of zeroes if we return -ENOTSUP because the server does not have WRITE_ZEROES. Ideally, since NBD_CMD_WRITE_ZEROES does not involve any data over the wire, we want to support transactions that are much larger than the normal 32M limit imposed on NBD_CMD_WRITE. But the server may still have a limit smaller than UINT_MAX, so until experimental NBD protocol additions for advertising various command sizes is finalized (see [1], [2]), for now we just stick to the same limits as normal writes. [1] https://github.com/yoe/nbd/blob/extension-info/doc/proto.md [2] https://sourceforge.net/p/nbd/mailman/message/35081223/ Signed-off-by: Eric Blake Message-Id: <1476469998-28592-17-git-send-email-ebl...@redhat.com> Signed-off-by: Paolo Bonzini --- block/nbd-client.c | 35 +++ block/nbd-client.h | 2 ++ block/nbd.c| 4 3 files changed, 41 insertions(+) diff --git a/block/nbd-client.c b/block/nbd-client.c index 3e5f9f5..2a302de 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -275,6 +275,41 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, return -reply.error; } +int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, +int count, BdrvRequestFlags flags) +{ +ssize_t ret; +NBDClientSession *client = nbd_get_client_session(bs); +NBDRequest request = { +.type = NBD_CMD_WRITE_ZEROES, +.from = offset, +.len = count, +}; +NBDReply reply; + +if (!(client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES)) { +return -ENOTSUP; +} + +if (flags & BDRV_REQ_FUA) { +assert(client->nbdflags & NBD_FLAG_SEND_FUA); +request.flags |= NBD_CMD_FLAG_FUA; +} +if (!(flags & BDRV_REQ_MAY_UNMAP)) { +request.flags |= NBD_CMD_FLAG_NO_HOLE; +} + +nbd_coroutine_start(client, &request); +ret = nbd_co_send_request(bs, &request, NULL); +if (ret < 0) { +reply.error = -ret; +} else { +nbd_co_receive_reply(client, &request, &reply, NULL); +} +nbd_coroutine_end(client, &request); +return -reply.error; +} + int nbd_client_co_flush(BlockDriverState *bs) { NBDClientSession *client = nbd_get_client_session(bs); diff --git a/block/nbd-client.h b/block/nbd-client.h index 51be419..f8d6006 100644 --- a/block/nbd-client.h +++ b/block/nbd-client.h @@ -48,6 +48,8 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count); int nbd_client_co_flush(BlockDriverState *bs); int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); +int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, +int count, BdrvRequestFlags flags); int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); diff --git a/block/nbd.c b/block/nbd.c index b281484..9cff839 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -466,6 +466,7 @@ static int nbd_co_flush(BlockDriverState *bs) static void nbd_refresh_limits(BlockDriverState *bs, Error **errp) { bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE; +bs->bl.max_pwrite_zeroes = NBD_MAX_BUFFER_SIZE; bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE; } @@ -558,6 +559,7 @@ static BlockDriver bdrv_nbd = { .bdrv_file_open = nbd_open, .bdrv_co_preadv = nbd_client_co_preadv, .bdrv_co_pwritev= nbd_client_co_pwritev, +.bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes, .bdrv_close = nbd_close, .bdrv_co_flush_to_os= nbd_co_flush, .bdrv_co_pdiscard = nbd_client_co_pdiscard, @@ -576,6 +578,7 @@ static BlockDriver bdrv_nbd_tcp = { .bdrv_file_open = nbd_open, .bdrv_co_preadv = nbd_client_co_preadv, .bdrv_co_pwritev= nbd_client_co_pwritev, +.bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes, .bdrv_close = nbd_close, .bdrv_co_flush_to_os= nbd_co_flush, .bdrv_co_pdiscard = nbd_client_co_pdiscard, @@ -594,6 +597,7 @@ static BlockDriver bdrv_nbd_unix = { .bdrv_file_open = nbd_open, .bdrv_co_preadv = nbd_client_co_preadv, .bdrv_co_pwritev= nbd_client_co_pwritev, +.bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes, .bdrv_close = nbd_close, .bdrv_co_flush_to_os= nbd_co_flush, .bdrv_co_pdiscard = nbd_client_co_pdiscard, -- 2.7.4
[Qemu-devel] [PULL 16/30] nbd: Let server know when client gives up negotiation
From: Eric Blake The NBD spec says that a client should send NBD_OPT_ABORT rather than just dropping the connection, if the client doesn't like something the server sent during option negotiation. This is a best-effort attempt only, and can only be done in places where we know the server is still in sync with what we've sent, whether or not we've read everything the server has sent. Technically, the server then has to reply with NBD_REP_ACK, but it's not worth complicating the client to wait around for that reply. Signed-off-by: Eric Blake Message-Id: <1476469998-28592-10-git-send-email-ebl...@redhat.com> Signed-off-by: Paolo Bonzini --- nbd/client.c | 30 ++ 1 file changed, 30 insertions(+) diff --git a/nbd/client.c b/nbd/client.c index e78000a..651e08c 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -111,6 +111,19 @@ static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt, return 0; } +/* Send NBD_OPT_ABORT as a courtesy to let the server know that we are + * not going to attempt further negotiation. */ +static void nbd_send_opt_abort(QIOChannel *ioc) +{ +/* Technically, a compliant server is supposed to reply to us; but + * older servers disconnected instead. At any rate, we're allowed + * to disconnect without waiting for the server reply, so we don't + * even care if the request makes it to the server, let alone + * waiting around for whether the server replies. */ +nbd_send_option_request(ioc, NBD_OPT_ABORT, 0, NULL, NULL); +} + + /* Receive the header of an option reply, which should match the given * opt. Read through the length field, but NOT the length bytes of * payload. Return 0 if successful, -1 with errp set if it is @@ -121,6 +134,7 @@ static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt, QEMU_BUILD_BUG_ON(sizeof(*reply) != 20); if (read_sync(ioc, reply, sizeof(*reply)) != sizeof(*reply)) { error_setg(errp, "failed to read option reply"); +nbd_send_opt_abort(ioc); return -1; } be64_to_cpus(&reply->magic); @@ -133,11 +147,13 @@ static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt, if (reply->magic != NBD_REP_MAGIC) { error_setg(errp, "Unexpected option reply magic"); +nbd_send_opt_abort(ioc); return -1; } if (reply->option != opt) { error_setg(errp, "Unexpected option type %x expected %x", reply->option, opt); +nbd_send_opt_abort(ioc); return -1; } return 0; @@ -206,6 +222,9 @@ static int nbd_handle_reply_err(QIOChannel *ioc, nbd_opt_reply *reply, cleanup: g_free(msg); +if (result < 0) { +nbd_send_opt_abort(ioc); +} return result; } @@ -229,25 +248,30 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp) if (reply.type == NBD_REP_ACK) { if (len != 0) { error_setg(errp, "length too long for option end"); +nbd_send_opt_abort(ioc); return -1; } } else if (reply.type == NBD_REP_SERVER) { if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) { error_setg(errp, "incorrect option length %" PRIu32, len); +nbd_send_opt_abort(ioc); return -1; } if (read_sync(ioc, &namelen, sizeof(namelen)) != sizeof(namelen)) { error_setg(errp, "failed to read option name length"); +nbd_send_opt_abort(ioc); return -1; } namelen = be32_to_cpu(namelen); len -= sizeof(namelen); if (len < namelen) { error_setg(errp, "incorrect option name length"); +nbd_send_opt_abort(ioc); return -1; } if (namelen > NBD_MAX_NAME_SIZE) { error_setg(errp, "export name length too long %" PRIu32, namelen); +nbd_send_opt_abort(ioc); return -1; } @@ -256,6 +280,7 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp) error_setg(errp, "failed to read export name"); g_free(*name); *name = NULL; +nbd_send_opt_abort(ioc); return -1; } (*name)[namelen] = '\0'; @@ -267,6 +292,7 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp) g_free(*name); g_free(buf); *name = NULL; +nbd_send_opt_abort(ioc); return -1; } buf[len] = '\0'; @@ -276,6 +302,7 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp) } else { error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x", reply.type, NBD_REP_SERVER); +nbd_send_opt_abort(ioc); return -1; } return 1; @@ -325,6 +352,7 @@ static int nbd_receive_query_exports(QIOChannel *ioc,
[Qemu-devel] [PULL v2 00/30] Misc patches for 2016-10-31
The following changes since commit 39542105bbb19c690219d2f22844d8dfbd9bba05: Merge remote-tracking branch 'remotes/gkurz/tags/for-upstream' into staging (2016-11-01 12:48:07 +) are available in the git repository at: git://github.com/bonzini/qemu.git tags/for-upstream for you to fetch changes up to a92e5ebbcea7c1d16f4aa75b08fce298fcd4cdfa: main-loop: Suppress I/O thread warning under qtest (2016-11-01 16:07:37 +0100) * NBD bugfix (Changlong) * NBD write zeroes support (Eric) * Memory backend fixes (Haozhong) * Atomics fix (Alex) * New AVX512 features (Luwei) * "make check" logging fix (Paolo) * Chardev refactoring fallout (Paolo) * Small checkpatch improvements (Paolo, Jeff) Alex Bennée (1): exec.c: ensure all AddressSpaceDispatch updates under RCU Changlong Xie (1): nbd: Use CoQueue for free_sema instead of CoMutex Christian Borntraeger (1): vl: exit qemu on guest panic if -no-shutdown is not set Eric Blake (16): nbd: Add qemu-nbd -D for human-readable description nbd: Treat flags vs. command type as separate fields nbd: Rename NBDRequest to NBDRequestData nbd: Rename NbdClientSession to NBDClientSession nbd: Rename struct nbd_request and nbd_reply nbd: Share common reply-sending code in server nbd: Send message along with server NBD_REP_ERR errors nbd: Share common option-sending code in client nbd: Let server know when client gives up negotiation nbd: Let client skip portions of server reply nbd: Less allocation during NBD_OPT_LIST nbd: Support shorter handshake nbd: Refactor conversion to errno to silence checkpatch nbd: Improve server handling of shutdown requests nbd: Implement NBD_CMD_WRITE_ZEROES on server nbd: Implement NBD_CMD_WRITE_ZEROES on client Haozhong Zhang (2): exec.c: do not truncate non-empty memory backend file exec.c: check memory backend file size with 'size' option Jeff Cody (1): checkpatch: allow spaces before parenthesis for 'coroutine_fn' Luwei Kang (1): x86: add AVX512_4VNNIW and AVX512_4FMAPS features Max Reitz (1): main-loop: Suppress I/O thread warning under qtest Paolo Bonzini (5): checkpatch: tweak "struct should normally be const" warning qemu-error: remove dependency of stubs on monitor tests: send error_report to test log qemu-char: do not forward events through the mux until QEMU has started slirp: fix CharDriver breakage Pranith Kumar (1): docs/rcu.txt: Fix minor typo block/nbd-client.c | 104 + block/nbd-client.h | 12 +- block/nbd.c | 8 +- docs/rcu.txt| 2 +- exec.c | 33 ++- include/block/nbd.h | 73 +-- include/glib-compat.h | 13 ++ include/qemu/error-report.h | 1 + include/qemu/osdep.h| 3 + main-loop.c | 2 +- monitor.c | 21 ++ nbd/client.c| 498 nbd/nbd-internal.h | 12 +- nbd/server.c| 292 ++ net/slirp.c | 3 +- qapi-schema.json| 4 +- qemu-char.c | 8 +- qemu-nbd.c | 12 +- qemu-nbd.texi | 5 +- scripts/checkpatch.pl | 6 +- stubs/Makefile.objs | 2 +- stubs/error-printf.c| 19 ++ stubs/mon-printf.c | 11 - target-i386/cpu.c | 19 +- target-i386/cpu.h | 4 + util/qemu-error.c | 26 +-- vl.c| 5 + 27 files changed, 763 insertions(+), 435 deletions(-) create mode 100644 stubs/error-printf.c delete mode 100644 stubs/mon-printf.c -- 2.7.4
Re: [Qemu-devel] [PATCH v5] block/vxhs.c Add support for a new block device type called "vxhs"
Done! All builds are fine, but I have no way of knowing if that fixed the issue! Please let me know if you still see a problem. [root@rhel72ga-build-upstream qemu] 2016-11-01 09:46:06$ make CC block/vxhs.o LINKqemu-nbd LINKqemu-img LINKqemu-io LINKaarch64-softmmu/qemu-system-aarch64 LINKalpha-softmmu/qemu-system-alpha LINKarm-softmmu/qemu-system-arm LINKcris-softmmu/qemu-system-cris LINKi386-softmmu/qemu-system-i386 LINKlm32-softmmu/qemu-system-lm32 LINKm68k-softmmu/qemu-system-m68k LINKmicroblazeel-softmmu/qemu-system-microblazeel LINKmicroblaze-softmmu/qemu-system-microblaze LINKmips64el-softmmu/qemu-system-mips64el LINKmips64-softmmu/qemu-system-mips64 LINKmipsel-softmmu/qemu-system-mipsel LINKmips-softmmu/qemu-system-mips LINKmoxie-softmmu/qemu-system-moxie LINKor32-softmmu/qemu-system-or32 LINKppc64-softmmu/qemu-system-ppc64 LINKppcemb-softmmu/qemu-system-ppcemb LINKppc-softmmu/qemu-system-ppc LINKs390x-softmmu/qemu-system-s390x LINKsh4eb-softmmu/qemu-system-sh4eb LINKsh4-softmmu/qemu-system-sh4 LINKsparc64-softmmu/qemu-system-sparc64 LINKsparc-softmmu/qemu-system-sparc LINKtricore-softmmu/qemu-system-tricore LINKunicore32-softmmu/qemu-system-unicore32 LINKx86_64-softmmu/qemu-system-x86_64 LINKxtensaeb-softmmu/qemu-system-xtensaeb LINKxtensa-softmmu/qemu-system-xtensa [root@rhel72ga-build-upstream qemu] 2016-11-01 09:48:13$ Thanks! On Tue, Nov 1, 2016 at 9:27 AM, Daniel P. Berrange wrote: > On Tue, Nov 01, 2016 at 09:09:31AM -0700, ashish mittal wrote: >> Hi Daniel, >> >> Thanks for pointing that out. I had done a fresh configure and make >> before submitting the patch. I am somehow not able to reproduce that >> error! >> >> Checked out fresh source just now and tried again, still no luck. >> >> [root@rhel72ga-build-upstream qemu] 2016-11-01 08:51:15$ git apply >> ~/qemu/31Oct2016/0001-block-vxhs.c-Add-support-for-a-new-block-device-type.patch >> [root@rhel72ga-build-upstream qemu] 2016-11-01 08:51:31$ ./configure >> --enable-vxhs >> Install prefix/usr/local >> BIOS directory/usr/local/share/qemu >> binary directory /usr/local/bin >> library directory /usr/local/lib >> module directory /usr/local/lib/qemu >> libexec directory /usr/local/libexec >> include directory /usr/local/include >> config directory /usr/local/etc >> local state directory /usr/local/var >> Manual directory /usr/local/share/man >> ELF interp prefix /usr/gnemul/qemu-%M >> Source path /root/qemu_second_buildarea/qemu >> C compilercc >> Host C compiler cc >> C++ compiler c++ >> Objective-C compiler cc >> ARFLAGS rv >> CFLAGS-O2 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -g >> QEMU_CFLAGS -I/usr/include/pixman-1-Werror >> -DHAS_LIBSSH2_SFTP_FSYNC -pthread -I/usr/include/glib-2.0 >> -I/usr/lib64/glib-2.0/include -fPIE -DPIE -m64 -mcx16 >> -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE >> -Wstrict-prototypes -Wredundant-decls -Wall -Wundef -Wwrite-strings >> -Wmissing-prototypes -fno-strict-aliasing -fno-common -fwrapv >> -Wendif-labels -Wmissing-include-dirs -Wempty-body -Wnested-externs >> -Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers >> -Wold-style-declaration -Wold-style-definition -Wtype-limits >> -fstack-protector-strong -I/usr/include/p11-kit-1 >> -I/usr/include/libpng15 -I/usr/include/spice-server >> -I/usr/include/cacard -I/usr/include/glib-2.0 >> -I/usr/lib64/glib-2.0/include -I/usr/include/pixman-1 >> -I/usr/include/nss3 -I/usr/include/nspr4 -I/usr/include/spice-1 >> -I/usr/include/cacard -I/usr/include/nss3 -I/usr/include/nspr4 >> -I/usr/include/libusb-1.0 >> LDFLAGS -Wl,--warn-common -Wl,-z,relro -Wl,-z,now -pie -m64 -g >> make make >> install install >> pythonpython -B >> smbd /usr/sbin/smbd >> module supportno >> host CPU x86_64 >> host big endian no >> target listaarch64-softmmu alpha-softmmu arm-softmmu >> cris-softmmu i386-softmmu lm32-softmmu m68k-softmmu >> microblazeel-softmmu microblaze-softmmu mips64el-softmmu >> mips64-softmmu mipsel-softmmu mips-softmmu moxie-softmmu or32-softmmu >> ppc64-softmmu ppcemb-softmmu ppc-softmmu s390x-softmmu sh4eb-softmmu >> sh4-softmmu sparc64-softmmu sparc-softmmu tricore-softmmu >> unicore32-softmmu x86_64-softmmu xtensaeb-softmmu xtensa-softmmu >> aarch64-linux-user alpha-linux-user armeb-linux-user arm-linux-user >> cris-linux-user i386-linux-user m68k-linux-user >> microblazeel-linux-user microblaze-linux-user mips64el-linux-user >> mips64-linux-user mipsel-linux-user mips-linux-user >> mipsn32el-linux-user mipsn32-linux-user or32-linux-user >> ppc64abi32-linux-user ppc64le-linux-user ppc64-linux-user >> ppc-linux-user s390x-linux-user sh4eb-linux-user sh4-linux-user >> sparc32plus-linux-user sparc64-linux-us
[Qemu-devel] [PULL 19/30] nbd: Support shorter handshake
From: Eric Blake The NBD Protocol allows the server and client to mutually agree on a shorter handshake (omit the 124 bytes of reserved 0), via the server advertising NBD_FLAG_NO_ZEROES and the client acknowledging with NBD_FLAG_C_NO_ZEROES (only possible in newstyle, whether or not it is fixed newstyle). It doesn't shave much off the wire, but we might as well implement it. Signed-off-by: Eric Blake Reviewed-by: Alex Bligh Message-Id: <1476469998-28592-13-git-send-email-ebl...@redhat.com> Signed-off-by: Paolo Bonzini --- include/block/nbd.h | 6 -- nbd/client.c| 8 +++- nbd/server.c| 15 +++ 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/include/block/nbd.h b/include/block/nbd.h index b69bf1d..d326308 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -74,11 +74,13 @@ typedef struct NBDReply NBDReply; /* New-style handshake (global) flags, sent from server to client, and control what will happen during handshake phase. */ -#define NBD_FLAG_FIXED_NEWSTYLE (1 << 0)/* Fixed newstyle protocol. */ +#define NBD_FLAG_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */ +#define NBD_FLAG_NO_ZEROES(1 << 1) /* End handshake without zeroes. */ /* New-style client flags, sent from client to server to control what happens during handshake phase. */ -#define NBD_FLAG_C_FIXED_NEWSTYLE (1 << 0)/* Fixed newstyle protocol. */ +#define NBD_FLAG_C_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */ +#define NBD_FLAG_C_NO_ZEROES (1 << 1) /* End handshake without zeroes. */ /* Reply types. */ #define NBD_REP_ACK (1) /* Data sending finished. */ diff --git a/nbd/client.c b/nbd/client.c index 0afb8be..b29963b 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -440,6 +440,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, char buf[256]; uint64_t magic, s; int rc; +bool zeroes = true; TRACE("Receiving negotiation tlscreds=%p hostname=%s.", tlscreds, hostname ? hostname : ""); @@ -504,6 +505,11 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, TRACE("Server supports fixed new style"); clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE; } +if (globalflags & NBD_FLAG_NO_ZEROES) { +zeroes = false; +TRACE("Server supports no zeroes"); +clientflags |= NBD_FLAG_C_NO_ZEROES; +} /* client requested flags */ clientflags = cpu_to_be32(clientflags); if (write_sync(ioc, &clientflags, sizeof(clientflags)) != @@ -591,7 +597,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, } TRACE("Size is %" PRIu64 ", export flags %" PRIx16, *size, *flags); -if (drop_sync(ioc, 124) != 124) { +if (zeroes && drop_sync(ioc, 124) != 124) { error_setg(errp, "Failed to read reserved block"); goto fail; } diff --git a/nbd/server.c b/nbd/server.c index fa01e49..afc1ec4 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -81,6 +81,7 @@ struct NBDClient { int refcount; void (*close)(NBDClient *client); +bool no_zeroes; NBDExport *exp; QCryptoTLSCreds *tlscreds; char *tlsaclname; @@ -450,6 +451,11 @@ static int nbd_negotiate_options(NBDClient *client) fixedNewstyle = true; flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE; } +if (flags & NBD_FLAG_C_NO_ZEROES) { +TRACE("Client supports no zeroes at handshake end"); +client->no_zeroes = true; +flags &= ~NBD_FLAG_C_NO_ZEROES; +} if (flags != 0) { TRACE("Unknown client flags 0x%" PRIx32 " received", flags); return -EIO; @@ -602,6 +608,7 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA); bool oldStyle; +size_t len; /* Old style negotiation header without options [ 0 .. 7] passwd ("NBDMAGIC") @@ -618,7 +625,7 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) options sent [18 .. 25] size [26 .. 27] export flags -[28 .. 151] reserved (0) +[28 .. 151] reserved (0, omit if no_zeroes) */ qio_channel_set_blocking(client->ioc, false, NULL); @@ -637,7 +644,7 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) stw_be_p(buf + 26, client->exp->nbdflags | myflags); } else { stq_be_p(buf + 8, NBD_OPTS_MAGIC); -stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE); +stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES); } if (oldStyle) { @@ -664,8 +671,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) client->exp->size, client->exp->nbdflags | m
[Qemu-devel] [PULL 09/30] nbd: Treat flags vs. command type as separate fields
From: Eric Blake Current upstream NBD documents that requests have a 16-bit flags, followed by a 16-bit type integer; although older versions mentioned only a 32-bit field with masking to find flags. Since the protocol is in network order (big-endian over the wire), the ABI is unchanged; but dealing with the flags as a separate field rather than masking will make it easier to add support for upcoming NBD extensions that increase the number of both flags and commands. Improve some comments in nbd.h based on the current upstream NBD protocol (https://github.com/yoe/nbd/blob/master/doc/proto.md), and touch some nearby code to keep checkpatch.pl happy. Signed-off-by: Eric Blake Message-Id: <1476469998-28592-3-git-send-email-ebl...@redhat.com> Signed-off-by: Paolo Bonzini --- block/nbd-client.c | 9 +++-- include/block/nbd.h | 18 -- nbd/client.c| 9 ++--- nbd/nbd-internal.h | 4 ++-- nbd/server.c| 39 +++ 5 files changed, 42 insertions(+), 37 deletions(-) diff --git a/block/nbd-client.c b/block/nbd-client.c index 40b28ab..8ca5030 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -1,6 +1,7 @@ /* * QEMU Block driver for NBD * + * Copyright (C) 2016 Red Hat, Inc. * Copyright (C) 2008 Bull S.A.S. * Author: Laurent Vivier * @@ -258,7 +259,7 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, if (flags & BDRV_REQ_FUA) { assert(client->nbdflags & NBD_FLAG_SEND_FUA); -request.type |= NBD_CMD_FLAG_FUA; +request.flags |= NBD_CMD_FLAG_FUA; } assert(bytes <= NBD_MAX_BUFFER_SIZE); @@ -343,11 +344,7 @@ void nbd_client_attach_aio_context(BlockDriverState *bs, void nbd_client_close(BlockDriverState *bs) { NbdClientSession *client = nbd_get_client_session(bs); -struct nbd_request request = { -.type = NBD_CMD_DISC, -.from = 0, -.len = 0 -}; +struct nbd_request request = { .type = NBD_CMD_DISC }; if (client->ioc == NULL) { return; diff --git a/include/block/nbd.h b/include/block/nbd.h index fd58390..5fe2670 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -1,4 +1,5 @@ /* + * Copyright (C) 2016 Red Hat, Inc. * Copyright (C) 2005 Anthony Liguori * * Network Block Device @@ -32,7 +33,8 @@ struct nbd_request { uint64_t handle; uint64_t from; uint32_t len; -uint32_t type; +uint16_t flags; +uint16_t type; }; struct nbd_reply { @@ -40,6 +42,8 @@ struct nbd_reply { uint32_t error; }; +/* Transmission (export) flags: sent from server to client during handshake, + but describe what will happen during transmission */ #define NBD_FLAG_HAS_FLAGS (1 << 0)/* Flags are there */ #define NBD_FLAG_READ_ONLY (1 << 1)/* Device is read-only */ #define NBD_FLAG_SEND_FLUSH (1 << 2)/* Send FLUSH */ @@ -47,10 +51,12 @@ struct nbd_reply { #define NBD_FLAG_ROTATIONAL (1 << 4)/* Use elevator algorithm - rotational media */ #define NBD_FLAG_SEND_TRIM (1 << 5)/* Send TRIM (discard) */ -/* New-style global flags. */ +/* New-style handshake (global) flags, sent from server to client, and + control what will happen during handshake phase. */ #define NBD_FLAG_FIXED_NEWSTYLE (1 << 0)/* Fixed newstyle protocol. */ -/* New-style client flags. */ +/* New-style client flags, sent from client to server to control what happens + during handshake phase. */ #define NBD_FLAG_C_FIXED_NEWSTYLE (1 << 0)/* Fixed newstyle protocol. */ /* Reply types. */ @@ -61,10 +67,10 @@ struct nbd_reply { #define NBD_REP_ERR_INVALID ((UINT32_C(1) << 31) | 3) /* Invalid length. */ #define NBD_REP_ERR_TLS_REQD((UINT32_C(1) << 31) | 5) /* TLS required */ +/* Request flags, sent from client to server during transmission phase */ +#define NBD_CMD_FLAG_FUA(1 << 0) -#define NBD_CMD_MASK_COMMAND 0x -#define NBD_CMD_FLAG_FUA (1 << 16) - +/* Supported request types */ enum { NBD_CMD_READ = 0, NBD_CMD_WRITE = 1, diff --git a/nbd/client.c b/nbd/client.c index f6db836..4620e8d 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -1,4 +1,5 @@ /* + * Copyright (C) 2016 Red Hat, Inc. * Copyright (C) 2005 Anthony Liguori * * Network Block Device Client Side @@ -714,11 +715,13 @@ ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request) TRACE("Sending request to server: " "{ .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64 - ", .type=%" PRIu32 " }", - request->from, request->len, request->handle, request->type); + ", .flags = %" PRIx16 ", .type = %" PRIu16 " }", + request->from, request->len, request->handle, + request->flags, request->type); stl_be_p(buf, NBD_REQUEST_MAGIC); -stl_be_p(buf + 4, request->type); +stw_be_p(buf + 4, request->flags); +stw_be_p(bu
[Qemu-devel] [PULL 02/30] nbd: Use CoQueue for free_sema instead of CoMutex
From: Changlong Xie NBD is using the CoMutex in a way that wasn't anticipated. For example, if there are N(N=26, MAX_NBD_REQUESTS=16) nbd write requests, so we will invoke nbd_client_co_pwritev N times. time request Actions 11 in_flight=1, Coroutine=C1 22 in_flight=2, Coroutine=C2 ... 15 15 in_flight=15, Coroutine=C15 16 16 in_flight=16, Coroutine=C16, free_sema->holder=C16, mutex->locked=true 17 17 in_flight=16, Coroutine=C17, queue C17 into free_sema->queue 18 18 in_flight=16, Coroutine=C18, queue C18 into free_sema->queue ... 26 N in_flight=16, Coroutine=C26, queue C26 into free_sema->queue Once nbd client recieves request No.16' reply, we will re-enter C16. It's ok, because it's equal to 'free_sema->holder'. time request Actions 27 16 in_flight=15, Coroutine=C16, free_sema->holder=C16, mutex->locked=false Then nbd_coroutine_end invokes qemu_co_mutex_unlock what will pop coroutines from free_sema->queue's head and enter C17. More free_sema->holder is C17 now. time request Actions 28 17 in_flight=16, Coroutine=C17, free_sema->holder=C17, mutex->locked=true In above scenario, we only recieves request No.16' reply. As time goes by, nbd client will almostly recieves replies from requests 1 to 15 rather than request 17 who owns C17. In this case, we will encounter assert "mutex->holder == self" failed since Kevin's commit 0e438cdc "coroutine: Let CoMutex remember who holds it". For example, if nbd client recieves request No.15' reply, qemu will stop unexpectedly: time request Actions 29 15(most case) in_flight=15, Coroutine=C15, free_sema->holder=C17, mutex->locked=false Per Paolo's suggestion "The simplest fix is to change it to CoQueue, which is like a condition variable", this patch replaces CoMutex with CoQueue. Cc: Wen Congyang Reported-by: zhanghailiang Suggested-by: Paolo Bonzini Signed-off-by: Changlong Xie Message-Id: <1476267508-19499-1-git-send-email-xiecl.f...@cn.fujitsu.com> Signed-off-by: Paolo Bonzini --- block/nbd-client.c | 8 block/nbd-client.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/block/nbd-client.c b/block/nbd-client.c index 2cf3237..40b28ab 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -199,8 +199,8 @@ static void nbd_coroutine_start(NbdClientSession *s, { /* Poor man semaphore. The free_sema is locked when no other request * can be accepted, and unlocked after receiving one reply. */ -if (s->in_flight >= MAX_NBD_REQUESTS - 1) { -qemu_co_mutex_lock(&s->free_sema); +if (s->in_flight == MAX_NBD_REQUESTS) { +qemu_co_queue_wait(&s->free_sema); assert(s->in_flight < MAX_NBD_REQUESTS); } s->in_flight++; @@ -214,7 +214,7 @@ static void nbd_coroutine_end(NbdClientSession *s, int i = HANDLE_TO_INDEX(s, request->handle); s->recv_coroutine[i] = NULL; if (s->in_flight-- == MAX_NBD_REQUESTS) { -qemu_co_mutex_unlock(&s->free_sema); +qemu_co_queue_next(&s->free_sema); } } @@ -386,7 +386,7 @@ int nbd_client_init(BlockDriverState *bs, } qemu_co_mutex_init(&client->send_mutex); -qemu_co_mutex_init(&client->free_sema); +qemu_co_queue_init(&client->free_sema); client->sioc = sioc; object_ref(OBJECT(client->sioc)); diff --git a/block/nbd-client.h b/block/nbd-client.h index 044aca4..307b8b1 100644 --- a/block/nbd-client.h +++ b/block/nbd-client.h @@ -24,7 +24,7 @@ typedef struct NbdClientSession { off_t size; CoMutex send_mutex; -CoMutex free_sema; +CoQueue free_sema; Coroutine *send_coroutine; int in_flight; -- 2.7.4
[Qemu-devel] [PULL 17/30] nbd: Let client skip portions of server reply
From: Eric Blake The server has a nice helper function nbd_negotiate_drop_sync() which lets it easily ignore fluff from the client (such as the payload to an unknown option request). We can't quite make it common, since it depends on nbd_negotiate_read() which handles coroutine magic, but we can copy the idea into the client where we have places where we want to ignore data (such as the description tacked on the end of NBD_REP_SERVER). Signed-off-by: Eric Blake Message-Id: <1476469998-28592-11-git-send-email-ebl...@redhat.com> Signed-off-by: Paolo Bonzini --- nbd/client.c | 47 +-- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/nbd/client.c b/nbd/client.c index 651e08c..5d94e34 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -75,6 +75,32 @@ static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); */ +/* Discard length bytes from channel. Return -errno on failure, or + * the amount of bytes consumed. */ +static ssize_t drop_sync(QIOChannel *ioc, size_t size) +{ +ssize_t ret, dropped = size; +char small[1024]; +char *buffer; + +buffer = sizeof(small) < size ? small : g_malloc(MIN(65536, size)); +while (size > 0) { +ret = read_sync(ioc, buffer, MIN(65536, size)); +if (ret < 0) { +goto cleanup; +} +assert(ret <= size); +size -= ret; +} +ret = dropped; + + cleanup: +if (buffer != small) { +g_free(buffer); +} +return ret; +} + /* Send an option request. * * The request is for option @opt, with @data containing @len bytes of @@ -285,19 +311,12 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp) } (*name)[namelen] = '\0'; len -= namelen; -if (len) { -char *buf = g_malloc(len + 1); -if (read_sync(ioc, buf, len) != len) { -error_setg(errp, "failed to read export description"); -g_free(*name); -g_free(buf); -*name = NULL; -nbd_send_opt_abort(ioc); -return -1; -} -buf[len] = '\0'; -TRACE("Ignoring export description: %s", buf); -g_free(buf); +if (drop_sync(ioc, len) != len) { +error_setg(errp, "failed to read export description"); +g_free(*name); +*name = NULL; +nbd_send_opt_abort(ioc); +return -1; } } else { error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x", @@ -577,7 +596,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, } TRACE("Size is %" PRIu64 ", export flags %" PRIx16, *size, *flags); -if (read_sync(ioc, &buf, 124) != 124) { +if (drop_sync(ioc, 124) != 124) { error_setg(errp, "Failed to read reserved block"); goto fail; } -- 2.7.4
[Qemu-devel] [PULL 29/30] docs/rcu.txt: Fix minor typo
From: Pranith Kumar s/presented/prevented/ Signed-off-by: Pranith Kumar Message-Id: <20161018050418.4912-1-bobby.pr...@gmail.com> Signed-off-by: Paolo Bonzini --- docs/rcu.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/rcu.txt b/docs/rcu.txt index a70b72c..c84e7f4 100644 --- a/docs/rcu.txt +++ b/docs/rcu.txt @@ -145,7 +145,7 @@ The core RCU API is small: and then read from there. RCU read-side critical sections must use atomic_rcu_read() to -read data, unless concurrent writes are presented by another +read data, unless concurrent writes are prevented by another synchronization mechanism. Furthermore, RCU read-side critical sections should traverse the -- 2.7.4
[Qemu-devel] [PULL 01/30] checkpatch: tweak "struct should normally be const" warning
Avoid triggering on typedef struct BlockJobDriver BlockJobDriver; or struct BlockJobDriver { Cc: John Snow Signed-off-by: Paolo Bonzini --- scripts/checkpatch.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 3afa19a..1f1c9d3 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2498,8 +2498,8 @@ sub process { VMStateDescription| VMStateInfo}x; if ($line !~ /\bconst\b/ && - $line =~ /\b($struct_ops)\b/) { - ERROR("struct $1 should normally be const\n" . + $line =~ /\b($struct_ops)\b.*=/) { + ERROR("initializer for struct $1 should normally be const\n" . $herecurr); } -- 2.7.4