[Qemu-devel] [PATCH kernel v4 7/7] virtio-balloon: tell host vm's unused page info

2016-11-01 Thread Liang Li
Support the request for vm's unused page information, response with
a page bitmap. QEMU can make use of this bitmap and the dirty page
logging mechanism to skip the transportation of some of these unused
pages, this is very helpful to reduce the network traffic and  speed
up the live migration process.

Signed-off-by: Liang Li 
Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Cc: Cornelia Huck 
Cc: Amit Shah 
Cc: Dave Hansen 
---
 drivers/virtio/virtio_balloon.c | 128 +---
 1 file changed, 121 insertions(+), 7 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index c6c94b6..ba2d37b 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -56,7 +56,7 @@
 
 struct virtio_balloon {
struct virtio_device *vdev;
-   struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
+   struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *req_vq;
 
/* The balloon servicing is delegated to a freezable workqueue. */
struct work_struct update_balloon_stats_work;
@@ -83,6 +83,8 @@ struct virtio_balloon {
unsigned int nr_page_bmap;
/* Used to record the processed pfn range */
unsigned long min_pfn, max_pfn, start_pfn, end_pfn;
+   /* Request header */
+   struct virtio_balloon_req_hdr req_hdr;
/*
 * The pages we've told the Host we're not using are enqueued
 * at vb_dev_info->pages list.
@@ -552,6 +554,63 @@ static void update_balloon_stats(struct virtio_balloon *vb)
pages_to_bytes(available));
 }
 
+static void send_unused_pages_info(struct virtio_balloon *vb,
+   unsigned long req_id)
+{
+   struct scatterlist sg_in;
+   unsigned long pfn = 0, bmap_len, pfn_limit, last_pfn, nr_pfn;
+   struct virtqueue *vq = vb->req_vq;
+   struct virtio_balloon_resp_hdr *hdr = vb->resp_hdr;
+   int ret = 1, used_nr_bmap = 0, i;
+
+   if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP) &&
+   vb->nr_page_bmap == 1)
+   extend_page_bitmap(vb);
+
+   pfn_limit = PFNS_PER_BMAP * vb->nr_page_bmap;
+   mutex_lock(&vb->balloon_lock);
+   last_pfn = get_max_pfn();
+
+   while (ret) {
+   clear_page_bitmap(vb);
+   ret = get_unused_pages(pfn, pfn + pfn_limit, vb->page_bitmap,
+PFNS_PER_BMAP, vb->nr_page_bmap);
+   if (ret < 0)
+   break;
+   hdr->cmd = BALLOON_GET_UNUSED_PAGES;
+   hdr->id = req_id;
+   bmap_len = BALLOON_BMAP_SIZE * vb->nr_page_bmap;
+
+   if (!ret) {
+   hdr->flag = BALLOON_FLAG_DONE;
+   nr_pfn = last_pfn - pfn;
+   used_nr_bmap = nr_pfn / PFNS_PER_BMAP;
+   if (nr_pfn % PFNS_PER_BMAP)
+   used_nr_bmap++;
+   bmap_len = nr_pfn / BITS_PER_BYTE;
+   } else {
+   hdr->flag = BALLOON_FLAG_CONT;
+   used_nr_bmap = vb->nr_page_bmap;
+   }
+   for (i = 0; i < used_nr_bmap; i++) {
+   unsigned int bmap_size = BALLOON_BMAP_SIZE;
+
+   if (i + 1 == used_nr_bmap)
+   bmap_size = bmap_len - BALLOON_BMAP_SIZE * i;
+   set_bulk_pages(vb, vq, pfn + i * PFNS_PER_BMAP,
+vb->page_bitmap[i], bmap_size, true);
+   }
+   if (vb->resp_pos > 0)
+   send_resp_data(vb, vq, true);
+   pfn += pfn_limit;
+   }
+
+   mutex_unlock(&vb->balloon_lock);
+   sg_init_one(&sg_in, &vb->req_hdr, sizeof(vb->req_hdr));
+   virtqueue_add_inbuf(vq, &sg_in, 1, &vb->req_hdr, GFP_KERNEL);
+   virtqueue_kick(vq);
+}
+
 /*
  * While most virtqueues communicate guest-initiated requests to the 
hypervisor,
  * the stats queue operates in reverse.  The driver initializes the virtqueue
@@ -686,18 +745,56 @@ static void update_balloon_size_func(struct work_struct 
*work)
queue_work(system_freezable_wq, work);
 }
 
+static void misc_handle_rq(struct virtio_balloon *vb)
+{
+   struct virtio_balloon_req_hdr *ptr_hdr;
+   unsigned int len;
+
+   ptr_hdr = virtqueue_get_buf(vb->req_vq, &len);
+   if (!ptr_hdr || len != sizeof(vb->req_hdr))
+   return;
+
+   switch (ptr_hdr->cmd) {
+   case BALLOON_GET_UNUSED_PAGES:
+   send_unused_pages_info(vb, ptr_hdr->param);
+   break;
+   default:
+   break;
+   }
+}
+
+static void misc_request(struct virtqueue *vq)
+{
+   struct virtio_balloon *vb = vq->vdev->priv;
+
+   misc_handle_rq(vb);
+}
+
 static int init_vqs(struct virtio_balloon *vb)
 {
-   struct virtqueue *vqs[3];
-   vq_callback_t *callback

[Qemu-devel] [PATCH kernel v4 6/7] virtio-balloon: define flags and head for host request vq

2016-11-01 Thread Liang Li
Define the flags and head struct for a new host request virtual
queue. Guest can get requests from host and then responds to them on
this new virtual queue.
Host can make use of this virtual queue to request the guest do some
operations, e.g. drop page cache, synchronize file system, etc.
And the hypervisor can get some of guest's runtime information
through this virtual queue too, e.g. the guest's unused page
information, which can be used for live migration optimization.

Signed-off-by: Liang Li 
Cc: Andrew Morton 
Cc: Mel Gorman 
Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Cc: Cornelia Huck 
Cc: Amit Shah 
Cc: Dave Hansen 
---
 include/uapi/linux/virtio_balloon.h | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/include/uapi/linux/virtio_balloon.h 
b/include/uapi/linux/virtio_balloon.h
index bed6f41..c4e34d0 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -35,6 +35,7 @@
 #define VIRTIO_BALLOON_F_STATS_VQ  1 /* Memory Stats virtqueue */
 #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM2 /* Deflate balloon on OOM */
 #define VIRTIO_BALLOON_F_PAGE_BITMAP   3 /* Send page info with bitmap */
+#define VIRTIO_BALLOON_F_HOST_REQ_VQ   4 /* Host request virtqueue */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
@@ -101,4 +102,25 @@ struct virtio_balloon_bmap_hdr {
__le64 bmap[0];
 };
 
+enum virtio_balloon_req_id {
+   /* Get unused page information */
+   BALLOON_GET_UNUSED_PAGES,
+};
+
+enum virtio_balloon_flag {
+   /* Have more data for a request */
+   BALLOON_FLAG_CONT,
+   /* No more data for a request */
+   BALLOON_FLAG_DONE,
+};
+
+struct virtio_balloon_req_hdr {
+   /* Used to distinguish different requests */
+   __le16 cmd;
+   /* Reserved */
+   __le16 reserved[3];
+   /* Request parameter */
+   __le64 param;
+};
+
 #endif /* _LINUX_VIRTIO_BALLOON_H */
-- 
1.8.3.1




[Qemu-devel] [PATCH kernel v4 5/7] mm: add the related functions to get unused page

2016-11-01 Thread Liang Li
Save the unused page info into a split page bitmap. The virtio
balloon driver will use this new API to get the unused page bitmap
and send the bitmap to hypervisor(QEMU) to speed up live migration.
During sending the bitmap, some the pages may be modified and are
no free anymore, this inaccuracy can be corrected by the dirty
page logging mechanism.

Signed-off-by: Liang Li 
Cc: Andrew Morton 
Cc: Mel Gorman 
Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Cc: Cornelia Huck 
Cc: Amit Shah 
Cc: Dave Hansen 
---
 include/linux/mm.h |  2 ++
 mm/page_alloc.c| 85 ++
 2 files changed, 87 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f47862a..7014d8a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1773,6 +1773,8 @@ extern void free_area_init_node(int nid, unsigned long * 
zones_size,
unsigned long zone_start_pfn, unsigned long *zholes_size);
 extern void free_initmem(void);
 extern unsigned long get_max_pfn(void);
+extern int get_unused_pages(unsigned long start_pfn, unsigned long end_pfn,
+   unsigned long *bitmap[], unsigned long len, unsigned int nr_bmap);
 
 /*
  * Free reserved pages within range [PAGE_ALIGN(start), end & PAGE_MASK)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 12cc8ed..72537cc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4438,6 +4438,91 @@ unsigned long get_max_pfn(void)
 }
 EXPORT_SYMBOL(get_max_pfn);
 
+static void mark_unused_pages_bitmap(struct zone *zone,
+   unsigned long start_pfn, unsigned long end_pfn,
+   unsigned long *bitmap[], unsigned long bits,
+   unsigned int nr_bmap)
+{
+   unsigned long pfn, flags, nr_pg, pos, *bmap;
+   unsigned int order, i, t, bmap_idx;
+   struct list_head *curr;
+
+   if (zone_is_empty(zone))
+   return;
+
+   end_pfn = min(start_pfn + nr_bmap * bits, end_pfn);
+   spin_lock_irqsave(&zone->lock, flags);
+
+   for_each_migratetype_order(order, t) {
+   list_for_each(curr, &zone->free_area[order].free_list[t]) {
+   pfn = page_to_pfn(list_entry(curr, struct page, lru));
+   if (pfn < start_pfn || pfn >= end_pfn)
+   continue;
+   nr_pg = 1UL << order;
+   if (pfn + nr_pg > end_pfn)
+   nr_pg = end_pfn - pfn;
+   bmap_idx = (pfn - start_pfn) / bits;
+   if (bmap_idx == (pfn + nr_pg - start_pfn) / bits) {
+   bmap = bitmap[bmap_idx];
+   pos = (pfn - start_pfn) % bits;
+   bitmap_set(bmap, pos, nr_pg);
+   } else
+   for (i = 0; i < nr_pg; i++) {
+   pos = pfn - start_pfn + i;
+   bmap_idx = pos / bits;
+   bmap = bitmap[bmap_idx];
+   pos = pos % bits;
+   bitmap_set(bmap, pos, 1);
+   }
+   }
+   }
+
+   spin_unlock_irqrestore(&zone->lock, flags);
+}
+
+/*
+ * During live migration, page is always discardable unless it's
+ * content is needed by the system.
+ * get_unused_pages provides an API to get the unused pages, these
+ * unused pages can be discarded if there is no modification since
+ * the request. Some other mechanism, like the dirty page logging
+ * can be used to track the modification.
+ *
+ * This function scans the free page list to get the unused pages
+ * whose pfn are range from start_pfn to end_pfn, and set the
+ * corresponding bit in the bitmap if an unused page is found.
+ *
+ * Allocating a large bitmap may fail because of fragmentation,
+ * instead of using a single bitmap, we use a scatter/gather bitmap.
+ * The 'bitmap' is the start address of an array which contains
+ * 'nr_bmap' separate small bitmaps, each bitmap contains 'bits' bits.
+ *
+ * return -1 if parameters are invalid
+ * return 0 when end_pfn >= max_pfn
+ * return 1 when end_pfn < max_pfn
+ */
+int get_unused_pages(unsigned long start_pfn, unsigned long end_pfn,
+   unsigned long *bitmap[], unsigned long bits, unsigned int nr_bmap)
+{
+   struct zone *zone;
+   int ret = 0;
+
+   if (bitmap == NULL || *bitmap == NULL || nr_bmap == 0 ||
+bits == 0 || start_pfn > end_pfn)
+   return -1;
+   if (end_pfn < max_pfn)
+   ret = 1;
+   if (end_pfn >= max_pfn)
+   ret = 0;
+
+   for_each_populated_zone(zone)
+   mark_unused_pages_bitmap(zone, start_pfn, end_pfn, bitmap,
+bits, nr_bmap);
+
+   return ret;
+}
+EXPORT_SYMBOL(get_unused_pages);
+
 static void zoneref_set_zone(struct zone *zone, struct zon

[Qemu-devel] [PATCH kernel v4 4/7] virtio-balloon: speed up inflate/deflate process

2016-11-01 Thread Liang Li
The implementation of the current virtio-balloon is not very
efficient, the time spends on different stages of inflating
the balloon to 7GB of a 8GB idle guest:

a. allocating pages (6.5%)
b. sending PFNs to host (68.3%)
c. address translation (6.1%)
d. madvise (19%)

It takes about 4126ms for the inflating process to complete.
Debugging shows that the bottle neck are the stage b and stage d.

If using a bitmap to send the page info instead of the PFNs, we
can reduce the overhead in stage b quite a lot. Furthermore, we
can do the address translation and call madvise() with a bulk of
RAM pages, instead of the current page per page way, the overhead
of stage c and stage d can also be reduced a lot.

This patch is the kernel side implementation which is intended to
speed up the inflating & deflating process by adding a new feature
to the virtio-balloon device. With this new feature, inflating the
balloon to 7GB of a 8GB idle guest only takes 590ms, the
performance improvement is about 85%.

TODO: optimize stage a by allocating/freeing a chunk of pages
instead of a single page at a time.

Signed-off-by: Liang Li 
Suggested-by: Michael S. Tsirkin 
Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Cc: Cornelia Huck 
Cc: Amit Shah 
Cc: Dave Hansen 
---
 drivers/virtio/virtio_balloon.c | 398 +---
 1 file changed, 369 insertions(+), 29 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 59ffe5a..c6c94b6 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -42,6 +42,10 @@
 #define OOM_VBALLOON_DEFAULT_PAGES 256
 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
 
+#define BALLOON_BMAP_SIZE  (8 * PAGE_SIZE)
+#define PFNS_PER_BMAP  (BALLOON_BMAP_SIZE * BITS_PER_BYTE)
+#define BALLOON_BMAP_COUNT 32
+
 static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES;
 module_param(oom_pages, int, S_IRUSR | S_IWUSR);
 MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
@@ -67,6 +71,18 @@ struct virtio_balloon {
 
/* Number of balloon pages we've told the Host we're not using. */
unsigned int num_pages;
+   /* Pointer to the response header. */
+   void *resp_hdr;
+   /* Pointer to the start address of response data. */
+   unsigned long *resp_data;
+   /* Pointer offset of the response data. */
+   unsigned long resp_pos;
+   /* Bitmap and bitmap count used to tell the host the pages */
+   unsigned long *page_bitmap[BALLOON_BMAP_COUNT];
+   /* Number of split page bitmaps */
+   unsigned int nr_page_bmap;
+   /* Used to record the processed pfn range */
+   unsigned long min_pfn, max_pfn, start_pfn, end_pfn;
/*
 * The pages we've told the Host we're not using are enqueued
 * at vb_dev_info->pages list.
@@ -110,20 +126,227 @@ static void balloon_ack(struct virtqueue *vq)
wake_up(&vb->acked);
 }
 
-static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
+static inline void init_bmap_pfn_range(struct virtio_balloon *vb)
 {
-   struct scatterlist sg;
+   vb->min_pfn = ULONG_MAX;
+   vb->max_pfn = 0;
+}
+
+static inline void update_bmap_pfn_range(struct virtio_balloon *vb,
+struct page *page)
+{
+   unsigned long balloon_pfn = page_to_balloon_pfn(page);
+
+   vb->min_pfn = min(balloon_pfn, vb->min_pfn);
+   vb->max_pfn = max(balloon_pfn, vb->max_pfn);
+}
+
+static void extend_page_bitmap(struct virtio_balloon *vb)
+{
+   int i, bmap_count;
+   unsigned long bmap_len;
+
+   bmap_len = ALIGN(get_max_pfn(), BITS_PER_LONG) / BITS_PER_BYTE;
+   bmap_len = ALIGN(bmap_len, BALLOON_BMAP_SIZE);
+   bmap_count = min((int)(bmap_len / BALLOON_BMAP_SIZE),
+BALLOON_BMAP_COUNT);
+
+   for (i = 1; i < bmap_count; i++) {
+   vb->page_bitmap[i] = kmalloc(BALLOON_BMAP_SIZE, GFP_KERNEL);
+   if (vb->page_bitmap[i])
+   vb->nr_page_bmap++;
+   else
+   break;
+   }
+}
+
+static void free_extended_page_bitmap(struct virtio_balloon *vb)
+{
+   int i, bmap_count = vb->nr_page_bmap;
+
+
+   for (i = 1; i < bmap_count; i++) {
+   kfree(vb->page_bitmap[i]);
+   vb->page_bitmap[i] = NULL;
+   vb->nr_page_bmap--;
+   }
+}
+
+static void kfree_page_bitmap(struct virtio_balloon *vb)
+{
+   int i;
+
+   for (i = 0; i < vb->nr_page_bmap; i++)
+   kfree(vb->page_bitmap[i]);
+}
+
+static void clear_page_bitmap(struct virtio_balloon *vb)
+{
+   int i;
+
+   for (i = 0; i < vb->nr_page_bmap; i++)
+   memset(vb->page_bitmap[i], 0, BALLOON_BMAP_SIZE);
+}
+
+static unsigned long do_set_resp_bitmap(struct virtio_balloon *vb,
+   unsigned long *bitmap,  unsigned long base_pfn,
+   unsigned long pos, int nr_page)
+
+{
+   struct virtio_balloon_bmap_hdr *hdr;
+   uns

[Qemu-devel] [PATCH kernel v4 3/7] mm: add a function to get the max pfn

2016-11-01 Thread Liang Li
Expose the function to get the max pfn, so it can be used in the
virtio-balloon device driver. Simply include the 'linux/bootmem.h'
is not enough, if the device driver is built to a module, directly
refer the max_pfn lead to build failed.

Signed-off-by: Liang Li 
Cc: Andrew Morton 
Cc: Mel Gorman 
Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Cc: Cornelia Huck 
Cc: Amit Shah 
Cc: Dave Hansen 
---
 include/linux/mm.h |  1 +
 mm/page_alloc.c| 10 ++
 2 files changed, 11 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a92c8d7..f47862a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1772,6 +1772,7 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, 
pmd_t *pmd)
 extern void free_area_init_node(int nid, unsigned long * zones_size,
unsigned long zone_start_pfn, unsigned long *zholes_size);
 extern void free_initmem(void);
+extern unsigned long get_max_pfn(void);
 
 /*
  * Free reserved pages within range [PAGE_ALIGN(start), end & PAGE_MASK)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8fd42aa..12cc8ed 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4428,6 +4428,16 @@ void show_free_areas(unsigned int filter)
show_swap_cache_info();
 }
 
+/*
+ * The max_pfn can change because of memory hot plug, so it's only good
+ * as a hint. e.g. for sizing data structures.
+ */
+unsigned long get_max_pfn(void)
+{
+   return max_pfn;
+}
+EXPORT_SYMBOL(get_max_pfn);
+
 static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
 {
zoneref->zone = zone;
-- 
1.8.3.1




[Qemu-devel] [PATCH kernel v4 2/7] virtio-balloon: define new feature bit and head struct

2016-11-01 Thread Liang Li
Add a new feature which supports sending the page information with
a bitmap. The current implementation uses PFNs array, which is not
very efficient. Using bitmap can improve the performance of
inflating/deflating significantly

The page bitmap header will used to tell the host some information
about the page bitmap. e.g. the page size, page bitmap length and
start pfn.

Signed-off-by: Liang Li 
Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Cc: Cornelia Huck 
Cc: Amit Shah 
Cc: Dave Hansen 
---
 include/uapi/linux/virtio_balloon.h | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/include/uapi/linux/virtio_balloon.h 
b/include/uapi/linux/virtio_balloon.h
index 343d7dd..bed6f41 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -34,6 +34,7 @@
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST0 /* Tell before reclaiming 
pages */
 #define VIRTIO_BALLOON_F_STATS_VQ  1 /* Memory Stats virtqueue */
 #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM2 /* Deflate balloon on OOM */
+#define VIRTIO_BALLOON_F_PAGE_BITMAP   3 /* Send page info with bitmap */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
@@ -82,4 +83,22 @@ struct virtio_balloon_stat {
__virtio64 val;
 } __attribute__((packed));
 
+/* Response header structure */
+struct virtio_balloon_resp_hdr {
+   __le64 cmd : 8; /* Distinguish different requests type */
+   __le64 flag: 8; /* Mark status for a specific request type */
+   __le64 id : 16; /* Distinguish requests of a specific type */
+   __le64 data_len: 32; /* Length of the following data, in bytes */
+};
+
+/* Page bitmap header structure */
+struct virtio_balloon_bmap_hdr {
+   struct {
+   __le64 start_pfn : 52; /* start pfn for the bitmap */
+   __le64 page_shift : 6; /* page shift width, in bytes */
+   __le64 bmap_len : 6;  /* bitmap length, in bytes */
+   } head;
+   __le64 bmap[0];
+};
+
 #endif /* _LINUX_VIRTIO_BALLOON_H */
-- 
1.8.3.1




[Qemu-devel] [PATCH kernel v4 0/7] Extend virtio-balloon for fast (de)inflating & fast live migration

2016-11-01 Thread Liang Li
This patch set contains two parts of changes to the virtio-balloon.
 
One is the change for speeding up the inflating & deflating process,
the main idea of this optimization is to use bitmap to send the page
information to host instead of the PFNs, to reduce the overhead of
virtio data transmission, address translation and madvise(). This can
help to improve the performance by about 85%.
 
Another change is for speeding up live migration. By skipping process
guest's unused pages in the first round of data copy, to reduce needless
data processing, this can help to save quite a lot of CPU cycles and
network bandwidth. We put guest's unused page information in a bitmap
and send it to host with the virt queue of virtio-balloon. For an idle
guest with 8GB RAM, this can help to shorten the total live migration
time from 2Sec to about 500ms in 10Gbps network environment.
 
Changes from v3 to v4:
* Use the new scheme suggested by Dave Hansen to encode the bitmap.
* Add code which is missed in v3 to handle migrate page. 
* Free the memory for bitmap intime once the operation is done.
* Address some of the comments in v3.

Changes from v2 to v3:
* Change the name of 'free page' to 'unused page'.
* Use the scatter & gather bitmap instead of a 1MB page bitmap.
* Fix overwriting the page bitmap after kicking.
* Some of MST's comments for v2.
 
Changes from v1 to v2:
* Abandon the patch for dropping page cache.
* Put some structures to uapi head file.
* Use a new way to determine the page bitmap size.
* Use a unified way to send the free page information with the bitmap
* Address the issues referred in MST's comments

Liang Li (7):
  virtio-balloon: rework deflate to add page to a list
  virtio-balloon: define new feature bit and head struct
  mm: add a function to get the max pfn
  virtio-balloon: speed up inflate/deflate process
  mm: add the related functions to get unused page
  virtio-balloon: define flags and head for host request vq
  virtio-balloon: tell host vm's unused page info

 drivers/virtio/virtio_balloon.c | 546 
 include/linux/mm.h  |   3 +
 include/uapi/linux/virtio_balloon.h |  41 +++
 mm/page_alloc.c |  95 +++
 4 files changed, 636 insertions(+), 49 deletions(-)

-- 
1.8.3.1




[Qemu-devel] [PATCH kernel v4 1/7] virtio-balloon: rework deflate to add page to a list

2016-11-01 Thread Liang Li
When doing the inflating/deflating operation, the current virtio-balloon
implementation uses an array to save 256 PFNS, then send these PFNS to
host through virtio and process each PFN one by one. This way is not
efficient when inflating/deflating a large mount of memory because too
many times of the following operations:

1. Virtio data transmission
2. Page allocate/free
3. Address translation(GPA->HVA)
4. madvise

The over head of these operations will consume a lot of CPU cycles and
will take a long time to complete, it may impact the QoS of the guest as
well as the host. The overhead will be reduced a lot if batch processing
is used. E.g. If there are several pages whose address are physical
contiguous in the guest, these bulk pages can be processed in one
operation.

The main idea for the optimization is to reduce the above operations as
much as possible. And it can be achieved by using a bitmap instead of an
PFN array. Comparing with PFN array, for a specific size buffer, bitmap
can present more pages, which is very important for batch processing.

Using bitmap instead of PFN is not very helpful when inflating/deflating
a small mount of pages, in this case, using PFNs is better. But using
bitmap will not impact the QoS of guest or host heavily because the
operation will be completed very soon for a small mount of pages, and we
will use some methods to make sure the efficiency not drop too much.

This patch saves the deflated pages to a list instead of the PFN array,
which will allow faster notifications using a bitmap down the road.
balloon_pfn_to_page() can be removed because it's useless.

Signed-off-by: Liang Li 
Signed-off-by: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Cc: Cornelia Huck 
Cc: Amit Shah 
Cc: Dave Hansen 
---
 drivers/virtio/virtio_balloon.c | 22 --
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 4e7003d..59ffe5a 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -103,12 +103,6 @@ static u32 page_to_balloon_pfn(struct page *page)
return pfn * VIRTIO_BALLOON_PAGES_PER_PAGE;
 }
 
-static struct page *balloon_pfn_to_page(u32 pfn)
-{
-   BUG_ON(pfn % VIRTIO_BALLOON_PAGES_PER_PAGE);
-   return pfn_to_page(pfn / VIRTIO_BALLOON_PAGES_PER_PAGE);
-}
-
 static void balloon_ack(struct virtqueue *vq)
 {
struct virtio_balloon *vb = vq->vdev->priv;
@@ -181,18 +175,16 @@ static unsigned fill_balloon(struct virtio_balloon *vb, 
size_t num)
return num_allocated_pages;
 }
 
-static void release_pages_balloon(struct virtio_balloon *vb)
+static void release_pages_balloon(struct virtio_balloon *vb,
+struct list_head *pages)
 {
-   unsigned int i;
-   struct page *page;
+   struct page *page, *next;
 
-   /* Find pfns pointing at start of each page, get pages and free them. */
-   for (i = 0; i < vb->num_pfns; i += VIRTIO_BALLOON_PAGES_PER_PAGE) {
-   page = balloon_pfn_to_page(virtio32_to_cpu(vb->vdev,
-  vb->pfns[i]));
+   list_for_each_entry_safe(page, next, pages, lru) {
if (!virtio_has_feature(vb->vdev,
VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
adjust_managed_page_count(page, 1);
+   list_del(&page->lru);
put_page(page); /* balloon reference */
}
 }
@@ -202,6 +194,7 @@ static unsigned leak_balloon(struct virtio_balloon *vb, 
size_t num)
unsigned num_freed_pages;
struct page *page;
struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
+   LIST_HEAD(pages);
 
/* We can only do one array worth at a time. */
num = min(num, ARRAY_SIZE(vb->pfns));
@@ -215,6 +208,7 @@ static unsigned leak_balloon(struct virtio_balloon *vb, 
size_t num)
if (!page)
break;
set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
+   list_add(&page->lru, &pages);
vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
}
 
@@ -226,7 +220,7 @@ static unsigned leak_balloon(struct virtio_balloon *vb, 
size_t num)
 */
if (vb->num_pfns != 0)
tell_host(vb, vb->deflate_vq);
-   release_pages_balloon(vb);
+   release_pages_balloon(vb, &pages);
mutex_unlock(&vb->balloon_lock);
return num_freed_pages;
 }
-- 
1.8.3.1




Re: [Qemu-devel] [PATCH v1] vhost-pci-net: design vhost-pci-net for the transmission of network packets between VMs

2016-11-01 Thread Wang, Wei W
On Monday, October 24, 2016 3:09 PM, Wei Wang wrote:
> To: virtio-comm...@lists.oasis-open.org; qemu-devel@nongnu.org;
> m...@redhat.com; marcandre.lur...@gmail.com; stefa...@redhat.com;
> pbonz...@redhat.com
> Cc: Wang, Wei W 
> Subject: [PATCH v1] vhost-pci-net: design vhost-pci-net for the transmission 
> of
> network packets between VMs


> +\subsection{Device configuration layout}\label{sec:Device Types /
> +Vhost-pci Device / Device configuration layout}
> +  None currently defined.

I thought about it more - I think it would be better to define a device 
specific config field, "rxq_num " , so that the driver can allocate and 
initialize the num of rx virtqs in the initialization (i.e. the probe() )  when 
the controlq pair hasn't been ready to be used for the iteration between the 
device and driver. 

Best,
Wei



Re: [Qemu-devel] [PULL 00/47] virtio, pc: fixes and features

2016-11-01 Thread Michael S. Tsirkin
On Tue, Nov 01, 2016 at 03:22:01PM +, Peter Maydell wrote:
> On 30 October 2016 at 21:23, Michael S. Tsirkin  wrote:
> > The following changes since commit 5b2ecabaeabc17f032197246c4846b9ba95ba8a6:
> >
> >   Merge remote-tracking branch 'remotes/kraxel/tags/pull-ui-20161028-1' 
> > into staging (2016-10-28 17:59:04 +0100)
> >
> > are available in the git repository at:
> >
> >   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
> >
> > for you to fetch changes up to f082ec0225bd15c71e0b4697d2df3af7bad65d7f:
> >
> >   acpi: fix assert failure caused by commit 35c5a52d (2016-10-30 20:06:25 
> > +0200)
> >
> > 
> > virtio, pc: fixes and features
> >
> > nvdimm hotplug support
> > virtio migration and ioeventfd rework
> > virtio crypto device
> > ipmi fixes
> >
> > Signed-off-by: Michael S. Tsirkin 
> >
> 
> Hi; this fails to build on OSX with format string issues:
> 
> /Users/pm215/src/qemu-for-merges/hw/virtio/virtio-crypto.c:770:20:
> error: format specifies type 'unsign
> ed short' but the argument has type 'uint32_t' (aka 'unsigned int')
> [-Werror,-Wformat]
>vcrypto->max_queues, VIRTIO_QUEUE_MAX);
> ~~~^
> /Users/pm215/src/qemu-for-merges/include/qapi/error.h:163:35: note:
> expanded from macro 'error_setg'
> (fmt), ## __VA_ARGS__)
>   ^
> 
> Fun fact: in struct vhost_dev, max_queues is a uint64_t;
> in struct VirtIONet it is a uint16_t; and in VirtIOCrypto
> it is a uint32_t...
> 
> thanks
> -- PMM

Just to make sure : I fixed that and pushed to
same tag. I don't think it makes sense to repost the pull
request - pls take it from
   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream

-- 
MST



Re: [Qemu-devel] [PATCH v9 04/12] vfio iommu: Add support for mediated devices

2016-11-01 Thread Alexey Kardashevskiy
On 02/11/16 14:29, Kirti Wankhede wrote:
> 
> 
> On 11/2/2016 6:54 AM, Alexey Kardashevskiy wrote:
>> On 02/11/16 01:01, Kirti Wankhede wrote:
>>>
>>>
>>> On 10/28/2016 7:48 AM, Alexey Kardashevskiy wrote:
 On 27/10/16 23:31, Kirti Wankhede wrote:
>
>
> On 10/27/2016 12:50 PM, Alexey Kardashevskiy wrote:
>> On 18/10/16 08:22, Kirti Wankhede wrote:
>>> VFIO IOMMU drivers are designed for the devices which are IOMMU capable.
>>> Mediated device only uses IOMMU APIs, the underlying hardware can be
>>> managed by an IOMMU domain.
>>>
>>> Aim of this change is:
>>> - To use most of the code of TYPE1 IOMMU driver for mediated devices
>>> - To support direct assigned device and mediated device in single module
>>>
>>> Added two new callback functions to struct vfio_iommu_driver_ops. 
>>> Backend
>>> IOMMU module that supports pining and unpinning pages for mdev devices
>>> should provide these functions.
>>> Added APIs for pining and unpining pages to VFIO module. These calls 
>>> back
>>> into backend iommu module to actually pin and unpin pages.
>>>
>>> This change adds pin and unpin support for mediated device to TYPE1 
>>> IOMMU
>>> backend module. More details:
>>> - When iommu_group of mediated devices is attached, task structure is
>>>   cached which is used later to pin pages and page accounting.
>>
>>
>> For SPAPR TCE IOMMU driver, I ended up caching mm_struct with
>> atomic_inc(&container->mm->mm_count) (patches are on the way) instead of
>> using @current or task as the process might be gone while VFIO container 
>> is
>> still alive and @mm might be needed to do proper cleanup; this might not 
>> be
>> an issue with this patchset now but still you seem to only use @mm from
>> task_struct.
>>
>
> Consider the example of QEMU process which creates VFIO container, QEMU
> in its teardown path would release the container. How could container be
> alive when process is gone?

 do_exit() in kernel/exit.c calls exit_mm() (which sets NULL to tsk->mm)
 first, and then releases open files by calling  exit_files(). So
 container's release() does not have current->mm.

>>>
>>> Incrementing usage count (get_task_struct()) while saving task structure
>>> and decementing it (put_task_struct()) from release() should  work here.
>>> Updating the patch.
>>
>> I cannot see how the task->usage counter prevents do_exit() from performing
>> the exit, can you?
>>
> 
> It will not prevent exit from do_exit(), but that will make sure that we
> don't have stale pointer of task structure. Then we can check whether
> the task is alive and get mm pointer in teardown path as below:


Or you could just reference and use @mm as KVM and others do. Or there is
anything else you need from @current than just @mm?


> 
> {
> struct task_struct *task = domain->external_addr_space->task;
> struct mm_struct *mm = NULL;
> 
> put_pfn(pfn, prot);
> 
> if (pid_alive(task))
> mm = get_task_mm(task);
> 
> if (mm) {
> if (do_accounting)
> vfio_lock_acct(task, -1);
> 
> mmput(mm);
> }
> }



-- 
Alexey



Re: [Qemu-devel] [PATCH v9 04/12] vfio iommu: Add support for mediated devices

2016-11-01 Thread Kirti Wankhede


On 11/2/2016 6:54 AM, Alexey Kardashevskiy wrote:
> On 02/11/16 01:01, Kirti Wankhede wrote:
>>
>>
>> On 10/28/2016 7:48 AM, Alexey Kardashevskiy wrote:
>>> On 27/10/16 23:31, Kirti Wankhede wrote:


 On 10/27/2016 12:50 PM, Alexey Kardashevskiy wrote:
> On 18/10/16 08:22, Kirti Wankhede wrote:
>> VFIO IOMMU drivers are designed for the devices which are IOMMU capable.
>> Mediated device only uses IOMMU APIs, the underlying hardware can be
>> managed by an IOMMU domain.
>>
>> Aim of this change is:
>> - To use most of the code of TYPE1 IOMMU driver for mediated devices
>> - To support direct assigned device and mediated device in single module
>>
>> Added two new callback functions to struct vfio_iommu_driver_ops. Backend
>> IOMMU module that supports pining and unpinning pages for mdev devices
>> should provide these functions.
>> Added APIs for pining and unpining pages to VFIO module. These calls back
>> into backend iommu module to actually pin and unpin pages.
>>
>> This change adds pin and unpin support for mediated device to TYPE1 IOMMU
>> backend module. More details:
>> - When iommu_group of mediated devices is attached, task structure is
>>   cached which is used later to pin pages and page accounting.
>
>
> For SPAPR TCE IOMMU driver, I ended up caching mm_struct with
> atomic_inc(&container->mm->mm_count) (patches are on the way) instead of
> using @current or task as the process might be gone while VFIO container 
> is
> still alive and @mm might be needed to do proper cleanup; this might not 
> be
> an issue with this patchset now but still you seem to only use @mm from
> task_struct.
>

 Consider the example of QEMU process which creates VFIO container, QEMU
 in its teardown path would release the container. How could container be
 alive when process is gone?
>>>
>>> do_exit() in kernel/exit.c calls exit_mm() (which sets NULL to tsk->mm)
>>> first, and then releases open files by calling  exit_files(). So
>>> container's release() does not have current->mm.
>>>
>>
>> Incrementing usage count (get_task_struct()) while saving task structure
>> and decementing it (put_task_struct()) from release() should  work here.
>> Updating the patch.
> 
> I cannot see how the task->usage counter prevents do_exit() from performing
> the exit, can you?
> 

It will not prevent exit from do_exit(), but that will make sure that we
don't have stale pointer of task structure. Then we can check whether
the task is alive and get mm pointer in teardown path as below:

{
struct task_struct *task = domain->external_addr_space->task;
struct mm_struct *mm = NULL;

put_pfn(pfn, prot);

if (pid_alive(task))
mm = get_task_mm(task);

if (mm) {
if (do_accounting)
vfio_lock_acct(task, -1);

mmput(mm);
}
}

Thanks,
Kirti



Re: [Qemu-devel] How to print value of float instruction argument

2016-11-01 Thread Programmingkid

On Oct 31, 2016, at 4:39 AM, Peter Maydell wrote:

> On 31 October 2016 at 03:13, Programmingkid  wrote:
>> I'm trying to print the value of the arguments sent to
>> gen_fmadds() in target-ppc/translate/fp-impl.inc.c. How
>> do I do this? I have tried printf("cpu_fpr[rA(ctx->opcode)] = %d\n",
>> cpu_fpr[rA(ctx->opcode)]), but that always prints the same
>> value of 138 even if I change the values sent to fmadds.
> 
> At translate time we do not know the values in registers:
> those are only available at runtime. The cpu_fpr[] values
> are TCGv_i64 which are opaque datatypes (and just track
> that FP register 5 is a unique thing that's not the same
> as FP register 6, and so on).
> 
> For runtime information you need to turn on the -d
> tracing with suitable flags for what you care about.
> Where generated code calls a helper function you can
> put a breakpoint or tracing in the helper to show
> the arguments it prints, because the helper is called
> at runtime, not translate time.
> 
> thanks
> -- PMM

Thank you very much for your help. I guess I should explain what I want. The 
C99 standard defines functions that are IEEE 754 complaint. I'm hoping to 
replace QEMU's fmadd implementation (for the PowerPC) with the fma() function. 
The fma() function does the exact same thing that fmadd does. I just don't know 
where exactly I can access the values being sent to the emulated fmadd 
instruction. Would you have a suggestion as to where I can place the fma() 
function?


Re: [Qemu-devel] [PATCH 1/1] net: skip virtio-net config of deleted nic's peers

2016-11-01 Thread Jason Wang



On 2016年11月01日 06:42, Michael S. Tsirkin wrote:

On Tue, Nov 01, 2016 at 12:01:17AM +0200, yuri.benditov...@daynix.com wrote:

From: Yuri Benditovich 

https://bugzilla.redhat.com/show_bug.cgi?id=1373816
qemu core dump happens during repetitive unpug-plug
with multiple queues and Windows RSS-capable guest.
If back-end delete requested during virtio-net device
initialization, driver still can try configure the device
for multiple queues. The virtio-net device is expected
to be removed as soon as the initialization is done.

Signed-off-by: Yuri Benditovich 

Reviewed-by: Michael S. Tsirkin 


Applied to -net, thanks.




---
  hw/net/virtio-net.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 06bfe4b..77a4fae 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -508,6 +508,10 @@ static void virtio_net_set_queues(VirtIONet *n)
  int i;
  int r;
  
+if (n->nic->peer_deleted) {

+return;
+}
+
  for (i = 0; i < n->max_queues; i++) {
  if (i < n->curr_queues) {
  r = peer_attach(n, i);
--
1.9.1





Re: [Qemu-devel] [PATCH v2 3/3] hostmem-file: make option 'size' optional

2016-11-01 Thread Haozhong Zhang

On 10/31/16 16:18 -0200, Eduardo Habkost wrote:

On Fri, Oct 28, 2016 at 10:06:40AM +0800, Haozhong Zhang wrote:
[...]

> > diff --git a/exec.c b/exec.c
> > index 264a25f..89065bd 100644
> > --- a/exec.c
> > +++ b/exec.c
> > @@ -1234,7 +1234,7 @@ static int64_t get_file_size(int fd)
> >  }
> >
> >  static void *file_ram_alloc(RAMBlock *block,
> > -ram_addr_t memory,
> > +ram_addr_t *memory,
> >  const char *path,
> >  Error **errp)
> >  {
> > @@ -1245,6 +1245,7 @@ static void *file_ram_alloc(RAMBlock *block,
> >  void *area = MAP_FAILED;
> >  int fd = -1;
> >  int64_t file_size;
> > +ram_addr_t mem_size = *memory;
> >
> >  if (kvm_enabled() && !kvm_has_sync_mmu()) {
> >  error_setg(errp,
> > @@ -1309,21 +1310,27 @@ static void *file_ram_alloc(RAMBlock *block,
> >
> >  file_size = get_file_size(fd);
> >
> > -if (memory < block->page_size) {
> > +if (!mem_size && file_size > 0) {
> > +mem_size = file_size;
>
> Maybe we should set *memory here and not below?
>

Qemu currently sets the memory region size to the file size, and block
length to the aligned file size, so the code here can be changed as below:

   memory_region_set_size(block->mr, mem_size);
   mem_size = HOST_PAGE_ALIGN(mem_size);
   *memory = mem_size;

The second line is necessary because Qemu currently passes the aligned
file size to file_ram_alloc().


That would duplicate the existing HOST_PAGE_ALIGN logic from
qemu_ram_alloc_from_file(), won't it?

I believe that's yet another reason to check file size before
initializing the memory region, instead of initializing it first,
and fixing up its size later.



Agree. In the next version of this patch 3, I'll move file operations
(open/close/get size) from file_ram_alloc() to qemu_ram_alloc_from_file().

Thanks,
Haozhong



Re: [Qemu-devel] [PATCH] docs: Fix typos found by codespell

2016-11-01 Thread Zhang Chen



On 11/02/2016 01:52 AM, Eric Blake wrote:

On 11/01/2016 12:19 PM, Stefan Weil wrote:

Fix also some indefinite articles.

Signed-off-by: Stefan Weil 
---

I still don't understand the comment in docs/colo-proxy.txt.

Me neither.




Which part of the docs/colo-proxy.txt?
Perhaps I can explain it more clearly and make the doc better.


+++ b/docs/colo-proxy.txt
@@ -158,7 +158,7 @@ secondary.
  
  == Usage ==
  
-Here, we use demo ip and port discribe more clearly.

+Here, we use demo ip and port describe more clearly.
  Primary(ip:3.3.3.3):
  -netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
  -device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66

Maybe:

Here is an example using demonstration IP and port addresses to more
clearly describe the usage.

The remaining hunks are clear improvements, so:
Reviewed-by: Eric Blake 


It's easier to understand.

Reviewed-by: Zhang Chen 



--
Thanks
zhangchen






Re: [Qemu-devel] [PULL 08/27] hostmem-file: make option 'size' optional

2016-11-01 Thread Haozhong Zhang

On 11/01/16 12:16 -0200, Eduardo Habkost wrote:

On Tue, Nov 01, 2016 at 05:32:20PM +0800, Haozhong Zhang wrote:

On 10/31/16 20:22 -0200, Eduardo Habkost wrote:
> On Mon, Oct 31, 2016 at 03:47:53PM -0400, Paolo Bonzini wrote:
> >
> >
> > - Original Message -
> > > From: "Eduardo Habkost" 
> > > To: "Paolo Bonzini" 
> > > Cc: qemu-devel@nongnu.org, "Haozhong Zhang" 
> > > Sent: Monday, October 31, 2016 7:20:10 PM
> > > Subject: Re: [Qemu-devel] [PULL 08/27] hostmem-file: make option 'size' 
optional
> > >
> > > On Mon, Oct 31, 2016 at 03:37:24PM +0100, Paolo Bonzini wrote:
> > > [...]
> > > > @@ -1309,21 +1317,27 @@ static void *file_ram_alloc(RAMBlock *block,
> > > >
> > > >  file_size = get_file_size(fd);
> > > >
> > > > -if (memory < block->page_size) {
> > > > +if (!mem_size && file_size > 0) {
> > > > +mem_size = file_size;
> > > > +memory_region_set_size(block->mr, mem_size);
> > > > +}
> > > > +
> > > > +if (mem_size < block->page_size) {
> > > >  error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal 
to
> > > >  "
> > > > "or larger than page size 0x%zx",
> > > > -   memory, block->page_size);
> > > > +   mem_size, block->page_size);
> > > >  goto error;
> > > >  }
> > > >
> > > > -if (file_size > 0 && file_size < memory) {
> > > > +if (file_size > 0 && file_size < mem_size) {
> > > >  error_setg(errp, "backing store %s size %"PRId64
> > > > " does not match 'size' option %"PRIu64,
> > > > -   path, file_size, memory);
> > > > +   path, file_size, mem_size);
> > > >  goto error;
> > > >  }
> > > >
> > > > -memory = ROUND_UP(memory, block->page_size);
> > > > +mem_size = ROUND_UP(mem_size, block->page_size);
> > > > +*memory = mem_size;
> > >
> > > I suggested not touching *memory unless it was zero, and setting
> > > it to the memory region size, not the rounded-up size. Haozhong
> > > said this was going to be changed.
> > >
> > > This will have the side-effect of setting block->used_length and
> > > block->max_length to the rounded up size in
> > > qemu_ram_alloc_from_file() (instead of the original memory region
> > > size). I don't know what could be the consequences of that.
> > >
> > > This patch also skip HOST_PAGE_ALIGN-ing mem_size after getting
> > > the file size, which would be different from the behavior when
> > > size is specified explicitly. (And I also don't know the
> > > consequences of that)
> > >
> > > Considering that this pull request failed to build, I suggest
> > > waiting for a new version from Haozhong.
> >
> > Yes, I'll drop these three patches.
>
> I believe you can keep the other two (as long as the build error
> is fixed). I was already going to include them in my pull
> request, but removed them.

I'm a little confused. Do I need to send a following patch to fix this
build error? I was going to send a new version of the entire patch
series.


I thought we could fix it while committing, to make sure it gets
in a pull request for soft freeze. But:

* Patch 1/3 ("do not truncate") is a bug fix for nvdimm, so
 eligible post-soft-freeze.
* Patch 2/3 ("check file size") looks like a bug fix too.
* Patch 3/3 ("make size optional") is not a bug fix and is
 riskier, so I believe it won't get into 2.8.

So sending a new series is probably simpler, as patch 1-2 can be
included after soft freeze.



Got it. I'll resend patch 2/3 to fix the build error, and send the new
version of patch 3/3 in another series for 2.9.

Thanks,
Haozhong



Re: [Qemu-devel] [PATCH v9 04/12] vfio iommu: Add support for mediated devices

2016-11-01 Thread Alexey Kardashevskiy
On 02/11/16 01:01, Kirti Wankhede wrote:
> 
> 
> On 10/28/2016 7:48 AM, Alexey Kardashevskiy wrote:
>> On 27/10/16 23:31, Kirti Wankhede wrote:
>>>
>>>
>>> On 10/27/2016 12:50 PM, Alexey Kardashevskiy wrote:
 On 18/10/16 08:22, Kirti Wankhede wrote:
> VFIO IOMMU drivers are designed for the devices which are IOMMU capable.
> Mediated device only uses IOMMU APIs, the underlying hardware can be
> managed by an IOMMU domain.
>
> Aim of this change is:
> - To use most of the code of TYPE1 IOMMU driver for mediated devices
> - To support direct assigned device and mediated device in single module
>
> Added two new callback functions to struct vfio_iommu_driver_ops. Backend
> IOMMU module that supports pining and unpinning pages for mdev devices
> should provide these functions.
> Added APIs for pining and unpining pages to VFIO module. These calls back
> into backend iommu module to actually pin and unpin pages.
>
> This change adds pin and unpin support for mediated device to TYPE1 IOMMU
> backend module. More details:
> - When iommu_group of mediated devices is attached, task structure is
>   cached which is used later to pin pages and page accounting.


 For SPAPR TCE IOMMU driver, I ended up caching mm_struct with
 atomic_inc(&container->mm->mm_count) (patches are on the way) instead of
 using @current or task as the process might be gone while VFIO container is
 still alive and @mm might be needed to do proper cleanup; this might not be
 an issue with this patchset now but still you seem to only use @mm from
 task_struct.

>>>
>>> Consider the example of QEMU process which creates VFIO container, QEMU
>>> in its teardown path would release the container. How could container be
>>> alive when process is gone?
>>
>> do_exit() in kernel/exit.c calls exit_mm() (which sets NULL to tsk->mm)
>> first, and then releases open files by calling  exit_files(). So
>> container's release() does not have current->mm.
>>
> 
> Incrementing usage count (get_task_struct()) while saving task structure
> and decementing it (put_task_struct()) from release() should  work here.
> Updating the patch.

I cannot see how the task->usage counter prevents do_exit() from performing
the exit, can you?



-- 
Alexey



Re: [Qemu-devel] [PULL 00/47] virtio, pc: fixes and features

2016-11-01 Thread Gonglei (Arei)

> -Original Message-
> From: Qemu-devel
> [mailto:qemu-devel-bounces+arei.gonglei=huawei@nongnu.org] On
> Behalf Of Michael S. Tsirkin
> Sent: Wednesday, November 02, 2016 1:26 AM
> To: Peter Maydell
> Cc: QEMU Developers
> Subject: Re: [Qemu-devel] [PULL 00/47] virtio, pc: fixes and features
> 
> On Tue, Nov 01, 2016 at 03:22:01PM +, Peter Maydell wrote:
> > On 30 October 2016 at 21:23, Michael S. Tsirkin  wrote:
> > > The following changes since commit
> 5b2ecabaeabc17f032197246c4846b9ba95ba8a6:
> > >
> > >   Merge remote-tracking branch 'remotes/kraxel/tags/pull-ui-20161028-1'
> into staging (2016-10-28 17:59:04 +0100)
> > >
> > > are available in the git repository at:
> > >
> > >   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
> > >
> > > for you to fetch changes up to
> f082ec0225bd15c71e0b4697d2df3af7bad65d7f:
> > >
> > >   acpi: fix assert failure caused by commit 35c5a52d (2016-10-30 20:06:25
> +0200)
> > >
> > > 
> > > virtio, pc: fixes and features
> > >
> > > nvdimm hotplug support
> > > virtio migration and ioeventfd rework
> > > virtio crypto device
> > > ipmi fixes
> > >
> > > Signed-off-by: Michael S. Tsirkin 
> > >
> >
> > Hi; this fails to build on OSX with format string issues:
> >
> > /Users/pm215/src/qemu-for-merges/hw/virtio/virtio-crypto.c:770:20:
> > error: format specifies type 'unsign
> > ed short' but the argument has type 'uint32_t' (aka 'unsigned int')
> > [-Werror,-Wformat]
> >vcrypto->max_queues, VIRTIO_QUEUE_MAX);
> > ~~~^
> > /Users/pm215/src/qemu-for-merges/include/qapi/error.h:163:35: note:
> > expanded from macro 'error_setg'
> > (fmt), ## __VA_ARGS__)
> >   ^
> >
> > Fun fact: in struct vhost_dev, max_queues is a uint64_t;
> > in struct VirtIONet it is a uint16_t; and in VirtIOCrypto
> > it is a uint32_t...
> >
> > thanks
> > -- PMM
> 
> Yes - I really think we should move that to virtio core
> going forward.
> Anyway, I fixed that up and pushed.
> 
Sorry about that. TBH that error log messge is copied from virtio-net.c and
I didn't notice their types are different. For virtio-crypto, it should be 
'PRIu32'.


Regards,
-Gonglei



[Qemu-devel] [RESEND PATCH v2 2/3] exec.c: check memory backend file size with 'size' option

2016-11-01 Thread Haozhong Zhang
If the memory backend file is not large enough to hold the required 'size',
Qemu will report error and exit.

Signed-off-by: Haozhong Zhang 
Message-Id: <20161027042300.5929-3-haozhong.zh...@intel.com>
Reviewed-by: Eduardo Habkost 
---
Changes in RESEND:
* Use format string RAM_ADDR_FMT for variable 'memory'.
  This change is to fix the build error.
* Add Eduardo's r-b.
* Add the message id of this thread.
---
 exec.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/exec.c b/exec.c
index a2b371a..23c21c2 100644
--- a/exec.c
+++ b/exec.c
@@ -1316,6 +1316,13 @@ static void *file_ram_alloc(RAMBlock *block,
 goto error;
 }
 
+if (file_size > 0 && file_size < memory) {
+error_setg(errp, "backing store %s size 0x%" PRIx64
+   " does not match 'size' option 0x" RAM_ADDR_FMT,
+   path, file_size, memory);
+goto error;
+}
+
 memory = ROUND_UP(memory, block->page_size);
 
 /*
-- 
2.10.1




Re: [Qemu-devel] [PATCH v10 00/19] Add Mediated device support

2016-11-01 Thread Jike Song
On 11/01/2016 11:24 PM, Gerd Hoffmann wrote:
>> I rebased KVMGT upon v10, with 2 minor changes:
>>
>>  1, get_user_pages_remote has only 7 args
> 
> Appears to be a 4.9 merge window change.  v10 as-is applies and builds
> fine against 4.8, after rebasing to 4.9-rc3 it stops building due to
> this.
> 
> Can you share the patch?
> 
>>  2, vfio iommu notifier calls vendor callback with iova instead of pfn
> 
> And this one too?
> 
> Also:  github seems to have the v9 kvmgt version still.  Can you push
> the update?

Zhenyu will help to push it to 01org/gvt-linux. Those 2 patches added by
me are trivial, I guess Kirti will have her formal fixes in next version.

> The kvmgt branch apparently depends on alot of unmerged stuff, "git
> describe" says 1669 patches on top of 4.8-rc8.
> 
> Can you outline what this is? Mostly drm-next?
> How much of this landed in the 4.9 merge window?

Yes, mostly drm-next, targeting 4.10 merge window. The overwhelming
part is the device-model under drivers/gpu/drm/i915/gvt/.
 

--
Thanks,
Jike



Re: [Qemu-devel] [PATCH v2] PCMachineState: introduce acpi_build_enabled field

2016-11-01 Thread Stefano Stabellini
On Tue, 1 Nov 2016, Eduardo Habkost wrote:
> On Tue, Nov 01, 2016 at 05:44:16PM +, Wei Liu wrote:
> > Introduce this field to control whether ACPI build is enabled by a
> > particular machine or accelerator.
> > 
> > It defaults to true if the machine itself supports ACPI build. Xen
> > accelerator will disable it because Xen is in charge of building ACPI
> > tables for the guest.
> > 
> > Signed-off-by: Wei Liu 
> 
> Reviewed-by: Eduardo Habkost 

Thank you Eduardo. I can queue it up unless you have other stuff in
progress.



Re: [Qemu-devel] [PATCH v2] PCMachineState: introduce acpi_build_enabled field

2016-11-01 Thread Eduardo Habkost
On Tue, Nov 01, 2016 at 05:44:16PM +, Wei Liu wrote:
> Introduce this field to control whether ACPI build is enabled by a
> particular machine or accelerator.
> 
> It defaults to true if the machine itself supports ACPI build. Xen
> accelerator will disable it because Xen is in charge of building ACPI
> tables for the guest.
> 
> Signed-off-by: Wei Liu 

Reviewed-by: Eduardo Habkost 

-- 
Eduardo



Re: [Qemu-devel] [PATCH 00/15] optimize Qemu RSS usage

2016-11-01 Thread Michael R. Hines

On 10/31/2016 05:00 PM, Michael R. Hines wrote:

On 10/18/2016 05:47 AM, Peter Lieven wrote:

Am 12.10.2016 um 23:18 schrieb Michael R. Hines:

Peter,

Greetings from DigitalOcean. We're experiencing the same symptoms 
without this patch.
We have, collectively, many gigabytes of un-planned-for RSS being 
used per-hypervisor

that we would like to get rid of =).

Without explicitly trying this patch (will do that ASAP), we 
immediately noticed that the
192MB mentioned immediately melts away (Yay) when we disabled the 
coroutine thread pool explicitly,
with another ~100MB in additional stack usage that would likely also 
go away if we

applied the entirety of your patch.

Is there any chance you have revisited this or have a timeline for it?


Hi Michael,

the current master already includes some of the patches of this 
original series. There are still some changes left, but

what works for me is the current master +

diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index 5816702..3eaef68 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -25,8 +25,6 @@ enum {
 };

 /** Free list to speed up creation */
-static QSLIST_HEAD(, Coroutine) release_pool = 
QSLIST_HEAD_INITIALIZER(pool);

-static unsigned int release_pool_size;
 static __thread QSLIST_HEAD(, Coroutine) alloc_pool = 
QSLIST_HEAD_INITIALIZER(pool);

 static __thread unsigned int alloc_pool_size;
 static __thread Notifier coroutine_pool_cleanup_notifier;
@@ -49,20 +47,10 @@ Coroutine *qemu_coroutine_create(CoroutineEntry 
*entry)

 if (CONFIG_COROUTINE_POOL) {
 co = QSLIST_FIRST(&alloc_pool);
 if (!co) {
-if (release_pool_size > POOL_BATCH_SIZE) {
-/* Slow path; a good place to register the 
destructor, too.  */

-if (!coroutine_pool_cleanup_notifier.notify) {
-coroutine_pool_cleanup_notifier.notify = 
coroutine_pool_cleanup;

- qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier);
-}
-
-/* This is not exact; there could be a little skew 
between
- * release_pool_size and the actual size of 
release_pool.  But
- * it is just a heuristic, it does not need to be 
perfect.

- */
-alloc_pool_size = atomic_xchg(&release_pool_size, 0);
-QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool);
-co = QSLIST_FIRST(&alloc_pool);
+/* Slow path; a good place to register the destructor, 
too.  */

+if (!coroutine_pool_cleanup_notifier.notify) {
+coroutine_pool_cleanup_notifier.notify = 
coroutine_pool_cleanup;

+ qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier);
 }
 }
 if (co) {
@@ -85,11 +73,6 @@ static void coroutine_delete(Coroutine *co)
 co->caller = NULL;

 if (CONFIG_COROUTINE_POOL) {
-if (release_pool_size < POOL_BATCH_SIZE * 2) {
-QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
-atomic_inc(&release_pool_size);
-return;
-}
 if (alloc_pool_size < POOL_BATCH_SIZE) {
 QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
 alloc_pool_size++;

+ invoking qemu with the following environemnet variable set:

MALLOC_MMAP_THRESHOLD_=32768 qemu-system-x86_64 

The last one makes glibc automatically using mmap when the malloced 
memory exceeds 32kByte.




Peter,

I tested the above patch (and the environment variable --- it doesn't 
quite come close to as lean of
an RSS tally as the original patchset  there's still about 
70-80 MB of remaining RSS.


Any chance you could trim the remaining fat before merging this? =)




False alarm! I didn't set the MMAP threshold low enough. Now the results 
are on-par with the other patchset.


Thank you!



[Qemu-devel] [PATCH 4/4] target-m68k: Use gen_ea_mode in cmpm

2016-11-01 Thread Richard Henderson
The writeback infrastructure takes care of ensuring that Ax == Ay
both uses the updated register value from Ay for Ax, and updating
the final register result twice.

??? Maybe squash into previous.

Signed-off-by: Richard Henderson 
---
 target-m68k/translate.c | 28 
 1 file changed, 8 insertions(+), 20 deletions(-)

diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index 85cdba5..aefd90c 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -2227,26 +2227,14 @@ DISAS_INSN(cmpa)
 DISAS_INSN(cmpm)
 {
 int opsize = insn_opsize(insn);
-TCGv tmp = tcg_temp_new();
-TCGv src, dst, addr;
-
-src = gen_load(s, opsize, AREG(insn, 0), 1);
-/* delay the update after the second gen_load() */
-tcg_gen_addi_i32(tmp, AREG(insn, 0), opsize_bytes(opsize));
-
-/* but if the we use the same address register to
- * read the second value, we must use the updated address
- */
-if (REG(insn, 0) == REG(insn, 9)) {
-addr = tmp;
-} else {
-addr = AREG(insn, 9);
-}
-
-dst = gen_load(s, opsize, addr, 1);
-tcg_gen_mov_i32(AREG(insn, 0), tmp);
-tcg_gen_addi_i32(AREG(insn, 9), addr, opsize_bytes(opsize));
-tcg_temp_free(tmp);
+TCGv src, dst;
+
+/* Post-increment load (mode 3) from Ay.  */
+src = gen_ea_mode(env, s, 3, REG(insn, 0), opsize,
+  NULL_QREG, NULL, EA_LOADS);
+/* Post-increment load (mode 3) from Ax.  */
+dst = gen_ea_mode(env, s, 3, REG(insn, 9), opsize,
+  NULL_QREG, NULL, EA_LOADS);
 
 gen_update_cc_cmp(s, dst, src, opsize);
 }
-- 
2.7.4




[Qemu-devel] [PATCH 3/4] target-m68k: add cmpm

2016-11-01 Thread Richard Henderson
From: Laurent Vivier 

Signed-off-by: Laurent Vivier 
Reviewed-by: Richard Henderson 
Message-Id: <1477604609-2206-2-git-send-email-laur...@vivier.eu>
Signed-off-by: Richard Henderson 
---
 target-m68k/translate.c | 28 
 1 file changed, 28 insertions(+)

diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index 00018b4..85cdba5 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -2224,6 +2224,33 @@ DISAS_INSN(cmpa)
 gen_update_cc_cmp(s, reg, src, opsize);
 }
 
+DISAS_INSN(cmpm)
+{
+int opsize = insn_opsize(insn);
+TCGv tmp = tcg_temp_new();
+TCGv src, dst, addr;
+
+src = gen_load(s, opsize, AREG(insn, 0), 1);
+/* delay the update after the second gen_load() */
+tcg_gen_addi_i32(tmp, AREG(insn, 0), opsize_bytes(opsize));
+
+/* but if the we use the same address register to
+ * read the second value, we must use the updated address
+ */
+if (REG(insn, 0) == REG(insn, 9)) {
+addr = tmp;
+} else {
+addr = AREG(insn, 9);
+}
+
+dst = gen_load(s, opsize, addr, 1);
+tcg_gen_mov_i32(AREG(insn, 0), tmp);
+tcg_gen_addi_i32(AREG(insn, 9), addr, opsize_bytes(opsize));
+tcg_temp_free(tmp);
+
+gen_update_cc_cmp(s, dst, src, opsize);
+}
+
 DISAS_INSN(eor)
 {
 TCGv src;
@@ -3465,6 +3492,7 @@ void register_m68k_insns (CPUM68KState *env)
 INSN(cmpa,  b1c0, f1c0, CF_ISA_A);
 INSN(cmp,   b000, f100, M68000);
 INSN(eor,   b100, f100, M68000);
+INSN(cmpm,  b108, f138, M68000);
 INSN(cmpa,  b0c0, f0c0, M68000);
 INSN(eor,   b180, f1c0, CF_ISA_A);
 BASE(and,   c000, f000);
-- 
2.7.4




[Qemu-devel] [PATCH 1/4] target-m68k: Delay autoinc writeback

2016-11-01 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 target-m68k/translate.c | 83 +
 1 file changed, 63 insertions(+), 20 deletions(-)

diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index 9ad974f..f812c4b 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -59,12 +59,12 @@ static TCGv cpu_aregs[8];
 static TCGv_i64 cpu_fregs[8];
 static TCGv_i64 cpu_macc[4];
 
-#define REG(insn, pos) (((insn) >> (pos)) & 7)
+#define REG(insn, pos)  (((insn) >> (pos)) & 7)
 #define DREG(insn, pos) cpu_dregs[REG(insn, pos)]
-#define AREG(insn, pos) cpu_aregs[REG(insn, pos)]
+#define AREG(insn, pos) get_areg(s, REG(insn, pos))
 #define FREG(insn, pos) cpu_fregs[REG(insn, pos)]
-#define MACREG(acc) cpu_macc[acc]
-#define QREG_SP cpu_aregs[7]
+#define MACREG(acc) cpu_macc[acc]
+#define QREG_SP get_areg(s, 7)
 
 static TCGv NULL_QREG;
 #define IS_NULL_QREG(t) (TCGV_EQUAL(t, NULL_QREG))
@@ -141,8 +141,55 @@ typedef struct DisasContext {
 int singlestep_enabled;
 TCGv_i64 mactmp;
 int done_mac;
+int writeback_mask;
+TCGv writeback[8];
 } DisasContext;
 
+static TCGv get_areg(DisasContext *s, unsigned regno)
+{
+if (s->writeback_mask & (1 << regno)) {
+return s->writeback[regno];
+} else {
+return cpu_aregs[regno];
+}
+}
+
+static void delay_set_areg(DisasContext *s, unsigned regno,
+   TCGv val, bool give_temp)
+{
+if (s->writeback_mask & (1 << regno)) {
+if (give_temp) {
+tcg_temp_free(s->writeback[regno]);
+s->writeback[regno] = val;
+} else {
+tcg_gen_mov_i32(s->writeback[regno], val);
+}
+} else {
+s->writeback_mask |= 1 << regno;
+if (give_temp) {
+s->writeback[regno] = val;
+} else {
+TCGv tmp = tcg_temp_new();
+s->writeback[regno] = tmp;
+tcg_gen_mov_i32(tmp, val);
+}
+}
+}
+
+static void do_writebacks(DisasContext *s)
+{
+unsigned mask = s->writeback_mask;
+if (mask) {
+s->writeback_mask = 0;
+do {
+unsigned regno = ctz32(mask);
+tcg_gen_mov_i32(cpu_aregs[regno], s->writeback[regno]);
+tcg_temp_free(s->writeback[regno]);
+mask &= mask - 1;
+} while (mask);
+}
+}
+
 #define DISAS_JUMP_NEXT 4
 
 #if defined(CONFIG_USER_ONLY)
@@ -331,7 +378,7 @@ static inline uint32_t read_im32(CPUM68KState *env, 
DisasContext *s)
 }
 
 /* Calculate and address index.  */
-static TCGv gen_addr_index(uint16_t ext, TCGv tmp)
+static TCGv gen_addr_index(DisasContext *s, uint16_t ext, TCGv tmp)
 {
 TCGv add;
 int scale;
@@ -388,7 +435,7 @@ static TCGv gen_lea_indexed(CPUM68KState *env, DisasContext 
*s, TCGv base)
 tmp = tcg_temp_new();
 if ((ext & 0x44) == 0) {
 /* pre-index */
-add = gen_addr_index(ext, tmp);
+add = gen_addr_index(s, ext, tmp);
 } else {
 add = NULL_QREG;
 }
@@ -417,7 +464,7 @@ static TCGv gen_lea_indexed(CPUM68KState *env, DisasContext 
*s, TCGv base)
 /* memory indirect */
 base = gen_load(s, OS_LONG, add, 0);
 if ((ext & 0x44) == 4) {
-add = gen_addr_index(ext, tmp);
+add = gen_addr_index(s, ext, tmp);
 tcg_gen_add_i32(tmp, add, base);
 add = tmp;
 } else {
@@ -441,7 +488,7 @@ static TCGv gen_lea_indexed(CPUM68KState *env, DisasContext 
*s, TCGv base)
 } else {
 /* brief extension word format */
 tmp = tcg_temp_new();
-add = gen_addr_index(ext, tmp);
+add = gen_addr_index(s, ext, tmp);
 if (!IS_NULL_QREG(base)) {
 tcg_gen_add_i32(tmp, add, base);
 if ((int8_t)ext)
@@ -755,10 +802,11 @@ static TCGv gen_ea(CPUM68KState *env, DisasContext *s, 
uint16_t insn,
 case 3: /* Indirect postincrement.  */
 reg = AREG(insn, 0);
 result = gen_ldst(s, opsize, reg, val, what);
-/* ??? This is not exception safe.  The instruction may still
-   fault after this point.  */
-if (what == EA_STORE || !addrp)
-tcg_gen_addi_i32(reg, reg, opsize_bytes(opsize));
+if (what == EA_STORE || !addrp) {
+TCGv tmp = tcg_temp_new();
+tcg_gen_addi_i32(tmp, reg, opsize_bytes(opsize));
+delay_set_areg(s, REG(insn, 0), tmp, true);
+}
 return result;
 case 4: /* Indirect predecrememnt.  */
 {
@@ -773,11 +821,8 @@ static TCGv gen_ea(CPUM68KState *env, DisasContext *s, 
uint16_t insn,
 *addrp = tmp;
 }
 result = gen_ldst(s, opsize, tmp, val, what);
-/* ??? This is not exception safe.  The instruction may still
-   fault after this point.  */
 if (what == EA_STORE || !addrp) {
-reg = AREG(insn, 0);
-  

[Qemu-devel] [PATCH 0/4] target-m68k areg writeback

2016-11-01 Thread Richard Henderson
Here's the patch I almost wrote in the email, followed by
a cleanup that allows cmpm to be written "nicely".

I can test this to some extent with the coldfire kernel,
but of course coldfire can't excersise any of the tricky
edge cases that m68000 can.

I'm particularly interested in edge cases like

mov.b   a0@+, a0@+
movea   a0@+, a0
movea   a0, a0@-

The first two are not really useful and likely not show up
in normal code.  The third may well do so; I think our
current code gets it wrong, but this will get it right.


r~


Laurent Vivier (1):
  target-m68k: add cmpm

Richard Henderson (3):
  target-m68k: Delay autoinc writeback
  target-m68k: Split gen_lea and gen_ea
  target-m68k: Use gen_ea_mode in cmpm

 target-m68k/translate.c | 207 +++-
 1 file changed, 136 insertions(+), 71 deletions(-)

-- 
2.7.4




[Qemu-devel] [PATCH 2/4] target-m68k: Split gen_lea and gen_ea

2016-11-01 Thread Richard Henderson
Provide gen_lea_mode and gen_ea_mode, where the mode can be
specified manually, rather than taken from the instruction.

Signed-off-by: Richard Henderson 
---
 target-m68k/translate.c | 112 +---
 1 file changed, 59 insertions(+), 53 deletions(-)

diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index f812c4b..00018b4 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -697,37 +697,37 @@ static void gen_partset_reg(int opsize, TCGv reg, TCGv 
val)
 
 /* Generate code for an "effective address".  Does not adjust the base
register for autoincrement addressing modes.  */
-static TCGv gen_lea(CPUM68KState *env, DisasContext *s, uint16_t insn,
-int opsize)
+static TCGv gen_lea_mode(CPUM68KState *env, DisasContext *s,
+ int mode, int reg0, int opsize)
 {
 TCGv reg;
 TCGv tmp;
 uint16_t ext;
 uint32_t offset;
 
-switch ((insn >> 3) & 7) {
+switch (mode) {
 case 0: /* Data register direct.  */
 case 1: /* Address register direct.  */
 return NULL_QREG;
 case 2: /* Indirect register */
 case 3: /* Indirect postincrement.  */
-return AREG(insn, 0);
+return get_areg(s, reg0);
 case 4: /* Indirect predecrememnt.  */
-reg = AREG(insn, 0);
+reg = get_areg(s, reg0);
 tmp = tcg_temp_new();
 tcg_gen_subi_i32(tmp, reg, opsize_bytes(opsize));
 return tmp;
 case 5: /* Indirect displacement.  */
-reg = AREG(insn, 0);
+reg = get_areg(s, reg0);
 tmp = tcg_temp_new();
 ext = read_im16(env, s);
 tcg_gen_addi_i32(tmp, reg, (int16_t)ext);
 return tmp;
 case 6: /* Indirect index + displacement.  */
-reg = AREG(insn, 0);
+reg = get_areg(s, reg0);
 return gen_lea_indexed(env, s, reg);
 case 7: /* Other */
-switch (insn & 7) {
+switch (reg0) {
 case 0: /* Absolute short.  */
 offset = (int16_t)read_im16(env, s);
 return tcg_const_i32(offset);
@@ -749,39 +749,26 @@ static TCGv gen_lea(CPUM68KState *env, DisasContext *s, 
uint16_t insn,
 return NULL_QREG;
 }
 
-/* Helper function for gen_ea. Reuse the computed address between the
-   for read/write operands.  */
-static inline TCGv gen_ea_once(CPUM68KState *env, DisasContext *s,
-   uint16_t insn, int opsize, TCGv val,
-   TCGv *addrp, ea_what what)
+static TCGv gen_lea(CPUM68KState *env, DisasContext *s, uint16_t insn,
+int opsize)
 {
-TCGv tmp;
-
-if (addrp && what == EA_STORE) {
-tmp = *addrp;
-} else {
-tmp = gen_lea(env, s, insn, opsize);
-if (IS_NULL_QREG(tmp))
-return tmp;
-if (addrp)
-*addrp = tmp;
-}
-return gen_ldst(s, opsize, tmp, val, what);
+int mode = extract32(insn, 3, 3);
+int reg0 = REG(insn, 0);
+return gen_lea_mode(env, s, mode, reg0, opsize);
 }
 
-/* Generate code to load/store a value from/into an EA.  If VAL > 0 this is
+/* Generate code to load/store a value from/into an EA.  If WHAT > 0 this is
a write otherwise it is a read (0 == sign extend, -1 == zero extend).
ADDRP is non-null for readwrite operands.  */
-static TCGv gen_ea(CPUM68KState *env, DisasContext *s, uint16_t insn,
-   int opsize, TCGv val, TCGv *addrp, ea_what what)
+static TCGv gen_ea_mode(CPUM68KState *env, DisasContext *s, int mode, int reg0,
+int opsize, TCGv val, TCGv *addrp, ea_what what)
 {
-TCGv reg;
-TCGv result;
-uint32_t offset;
+TCGv reg, tmp, result;
+int32_t offset;
 
-switch ((insn >> 3) & 7) {
+switch (mode) {
 case 0: /* Data register direct.  */
-reg = DREG(insn, 0);
+reg = cpu_dregs[reg0];
 if (what == EA_STORE) {
 gen_partset_reg(opsize, reg, val);
 return store_dummy;
@@ -789,7 +776,7 @@ static TCGv gen_ea(CPUM68KState *env, DisasContext *s, 
uint16_t insn,
 return gen_extend(reg, opsize, what == EA_LOADS);
 }
 case 1: /* Address register direct.  */
-reg = AREG(insn, 0);
+reg = get_areg(s, reg0);
 if (what == EA_STORE) {
 tcg_gen_mov_i32(reg, val);
 return store_dummy;
@@ -797,45 +784,56 @@ static TCGv gen_ea(CPUM68KState *env, DisasContext *s, 
uint16_t insn,
 return gen_extend(reg, opsize, what == EA_LOADS);
 }
 case 2: /* Indirect register */
-reg = AREG(insn, 0);
+reg = get_areg(s, reg0);
 return gen_ldst(s, opsize, reg, val, what);
 case 3: /* Indirect postincrement.  */
-reg = AREG(insn, 0);
+reg = get_areg(s, reg0);
 result = gen_ldst(s, opsize, reg, val, what);
 if (what == EA_STORE || !addrp) {
 TCGv tmp = tcg_temp_new();
 tcg_gen_addi_i32(tmp, re

Re: [Qemu-devel] [PULL v2 for-2.8 0/9] tcg queued patches

2016-11-01 Thread Peter Maydell
On 1 November 2016 at 19:21, Richard Henderson  wrote:
> On 11/01/2016 12:17 PM, Peter Maydell wrote:
>> On 1 November 2016 at 17:51, Richard Henderson  wrote:
>>>
>>> The usual pain point for me is building for 32-bit on a 64-bit
>>> system, where there is no cross-prefix that one can use, and
>>> PKG_CONFIG_PATH and --extra-cflags are the only changes.
>>
>>
>> That sounds like a bug in how the 32-bit compilers work on
>> x86 ;-) Ideally you should be able to get a 32-bit compile
>> with i586-linux-gnu-gcc...

> gcc -m32 worked very well for years, before pkg-config
> came along and mucked things up by using package-local
> include and library directories.  As long as everything
> was installed in the toolchain-aware search paths, everything
> Just Worked.

-m32 doesn't extend to anything other than the 32-bit/64-bit
special case, though, and pkg-config does more than just
tell you include paths.

thanks
-- PMM



Re: [Qemu-devel] [PATCH v4 2/2] target-m68k: add 680x0 divu/divs variants

2016-11-01 Thread Richard Henderson

On 11/01/2016 02:03 PM, Laurent Vivier wrote:

Update helper to set the throwing location in case of div-by-0.
Cleanup divX.w and add quad word variants of divX.l.

Signed-off-by: Laurent Vivier 
---
 linux-user/main.c   |   7 ++
 target-m68k/cpu.h   |   4 --
 target-m68k/helper.h|   8 ++-
 target-m68k/op_helper.c | 182 +---
 target-m68k/qregs.def   |   2 -
 target-m68k/translate.c |  84 --
 6 files changed, 217 insertions(+), 70 deletions(-)


Reviewed-by: Richard Henderson 


r~



Re: [Qemu-devel] [PATCH] atomic.h: Use __atomic_load_n() primitive

2016-11-01 Thread no-reply
Hi,

Your series seems to have some coding style problems. See output below for
more information:

Type: series
Subject: [Qemu-devel] [PATCH] atomic.h: Use __atomic_load_n() primitive
Message-id: 20161101203953.18065-1-bobby.pr...@gmail.com

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

# Useful git options
git config --local diff.renamelimit 0
git config --local diff.renames True

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git show --no-patch --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 - [tag update]  
patchew/1478022256-7089-1-git-send-email-wei.l...@citrix.com -> 
patchew/1478022256-7089-1-git-send-email-wei.l...@citrix.com
 - [tag update]  patchew/20161101171927.25684-1...@weilnetz.de -> 
patchew/20161101171927.25684-1...@weilnetz.de
 * [new tag] patchew/20161101203953.18065-1-bobby.pr...@gmail.com -> 
patchew/20161101203953.18065-1-bobby.pr...@gmail.com
Switched to a new branch 'test'
587647b atomic.h: Use __atomic_load_n() primitive

=== OUTPUT BEGIN ===
fatal: unrecognized argument: --no-patch
Checking PATCH 1/1: ...
ERROR: memory barrier without comment
#32: FILE: include/qemu/atomic.h:130:
+smp_read_barrier_depends();   \

total: 1 errors, 0 warnings, 40 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

=== OUTPUT END ===

Test command exited with code: 1


---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-de...@freelists.org

[Qemu-devel] [PATCH] atomic.h: Use __atomic_load_n() primitive

2016-11-01 Thread Pranith Kumar
Use __atomic_load_n() primitive saving a load and store to a local
variable.

Signed-off-by: Pranith Kumar 
---
 include/qemu/atomic.h | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
index 878fa07..be44094 100644
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -120,20 +120,22 @@
  * same, but this slows down atomic_rcu_read unnecessarily.
  */
 #ifdef __SANITIZE_THREAD__
-#define atomic_rcu_read__nocheck(ptr, valptr)   \
-__atomic_load(ptr, valptr, __ATOMIC_CONSUME);
+#define atomic_rcu_read__nocheck(ptr) \
+__atomic_load_n(ptr, __ATOMIC_CONSUME);
 #else
-#define atomic_rcu_read__nocheck(ptr, valptr)   \
-__atomic_load(ptr, valptr, __ATOMIC_RELAXED);   \
-smp_read_barrier_depends();
+#define atomic_rcu_read__nocheck(ptr) \
+({\
+typeof_strip_qual(*ptr) _val; \
+__atomic_load(ptr, &_val, __ATOMIC_RELAXED);  \
+smp_read_barrier_depends();   \
+_val; \
+})
 #endif
 
 #define atomic_rcu_read(ptr)  \
 ({\
 QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
-typeof_strip_qual(*ptr) _val; \
-atomic_rcu_read__nocheck(ptr, &_val); \
-_val; \
+atomic_rcu_read__nocheck(ptr);\
 })
 
 #define atomic_rcu_set(ptr, i) do {   \
@@ -144,9 +146,7 @@
 #define atomic_load_acquire(ptr)\
 ({  \
 QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *));   \
-typeof_strip_qual(*ptr) _val;   \
-__atomic_load(ptr, &_val, __ATOMIC_ACQUIRE);\
-_val;   \
+__atomic_load_n(ptr, __ATOMIC_ACQUIRE); \
 })
 
 #define atomic_store_release(ptr, i)  do {  \
-- 
2.10.2




[Qemu-devel] [Bug 1629618] Re: QEMU causes host hang / reset on PPC64EL

2016-11-01 Thread Timothy Pearson
Unfortunately, the machine just crashed again.  It seems related to the
use of 4K pages instead of the more typical 64K pages; the machine is
rock solid with 64K pages.

** Changed in: qemu
   Status: Fix Released => New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1629618

Title:
  QEMU causes host hang / reset on PPC64EL

Status in QEMU:
  New

Bug description:
  QEMU causes a host hang / reset on PPC64EL when used in KVM + HV mode
  (kvm_hv module).

  After a random amount of uptime, starting new QEMU virtual machines
  will cause the host to experience a soft CPU lockup.  Depending on
  configuration and other random factors the host will either checkstop
  and reboot, or hang indefinitely.  The following stacktrace was pulled
  from an instance where the host simply hung after starting a fourth
  virtual machine.

  Command line:

  qemu-system-ppc64 --enable-kvm -M pseries -cpu host -smp
  14,cores=14,threads=1,sockets=1 -m 64G -realtime mlock=on -kernel
  vmlinux-4.7.0-1-powerpc64le -initrd initrd.img-4.7.0-1-powerpc64le

  Lockup trace:

  [  527.393933] KVM guest htab at c03ae400 (order 29), LPID 4
  [  574.637695] INFO: rcu_sched self-detected stall on CPU
  [  574.637799]112-...: (5249 ticks this GP) 
idle=699/141/0 softirq=5358/5382 fqs=5072
  [  574.637877] (t=5250 jiffies g=19853 c=19852 q=64401)
  [  574.637947] Task dump for CPU 112:
  [  574.637982] qemu-system-ppc R  running task0 12037  11828 
0x00040004
  [  574.638051] Call Trace:
  [  574.638081] [c01c1cddb430] [c00f2710] 
sched_show_task+0xe0/0x180 (unreliable)
  [  574.638164] [c01c1cddb4a0] [c01326f4] 
rcu_dump_cpu_stacks+0xe4/0x150
  [  574.638246] [c01c1cddb4f0] [c0137a04] 
rcu_check_callbacks+0x6b4/0x9c0
  [  574.638328] [c01c1cddb610] [c013f7c4] 
update_process_times+0x54/0xa0
  [  574.638409] [c01c1cddb640] [c0156c28] 
tick_sched_handle.isra.5+0x48/0xe0
  [  574.638489] [c01c1cddb680] [c0156d24] 
tick_sched_timer+0x64/0xd0
  [  574.638602] [c01c1cddb6c0] [c0140274] 
__hrtimer_run_queues+0x124/0x420
  [  574.638683] [c01c1cddb750] [c014123c] 
hrtimer_interrupt+0xec/0x2c0
  [  574.638765] [c01c1cddb810] [c001fe5c] 
__timer_interrupt+0x8c/0x270
  [  574.638847] [c01c1cddb860] [c002053c] timer_interrupt+0x9c/0xe0
  [  574.638915] [c01c1cddb890] [c0002750] 
decrementer_common+0x150/0x180
  [  574.639001] --- interrupt: 901 at kvmppc_hv_get_dirty_log+0x1c4/0x570 
[kvm_hv]
  [  574.639001] LR = kvmppc_hv_get_dirty_log+0x1f8/0x570 [kvm_hv]
  [  574.639114] [c01c1cddbc30] [d0001a524980] 
kvm_vm_ioctl_get_dirty_log_hv+0xd0/0x170 [kvm_hv]
  [  574.639209] [c01c1cddbc80] [d0001a4d4140] 
kvm_vm_ioctl_get_dirty_log+0x40/0x60 [kvm]
  [  574.639291] [c01c1cddbcb0] [d0001a4ca3cc] kvm_vm_ioctl+0x3fc/0x760 
[kvm]
  [  574.639372] [c01c1cddbd40] [c02d9e18] do_vfs_ioctl+0xd8/0x8e0
  [  574.639442] [c01c1cddbde0] [c02da6f4] SyS_ioctl+0xd4/0xf0
  [  574.639512] [c01c1cddbe30] [c0009260] system_call+0x38/0x108
  [  580.601573] NMI watchdog: BUG: soft lockup - CPU#112 stuck for 22s! 
[qemu-system-ppc:12037]
  [  580.601655] Modules linked in: xt_tcpudp(E) rpcsec_gss_krb5(E) nfsv4(E) 
dns_resolver(E) ext4(E) ecb(E) crc16(E) jbd2(E) mbcache(E) tun(E) btrfs(E) 
crc32c_generic(E) raid6_pq(E) xor(E) dm_crypt(E) xts(E) gf128mul(E) 
algif_skcipher(E) af_alg(E) dm_mod(E) bonding(E) cpufreq_stats(E) 
iptable_filter(E) ip_tables(E) x_tables(E) bridge(E) stp(E) llc(E) 
ipmi_devintf(E) ipmi_msghandler(E) i2c_dev(E) fuse(E) raid1(E) md_mod(E) ses(E) 
sd_mod(E) enclosure(E) sg(E) binfmt_misc(E) radeon(E) ttm(E) drm_kms_helper(E) 
snd_hda_codec_hdmi(E) snd_hda_intel(E) drm(E) snd_hda_codec(E) snd_hda_core(E) 
snd_hwdep(E) snd_pcm(E) syscopyarea(E) sysfillrect(E) sysimgblt(E) 
fb_sys_fops(E) snd_timer(E) evdev(E) i2c_algo_bit(E) snd(E) soundcore(E) 
at24(E) ahci(E) mpt3sas(E) nvmem_core(E) libahci(E) raid_class(E) 
scsi_transport_sas(E) powernv_rng(E) rng_core(E) uinput(E) kvm_hv(E) kvm(E) 
ib_srp(E) scsi_transport_srp(E) ofpart(E) powernv_flash(E) mtd(E) nfsd(E) 
opal_prd(E) auth_rpcgss(E) parport_pc(E) lp(E) parport(E) autofs4(E) nfsv3(E) 
nfs_acl(E) nfs(E) lockd(E) grace(E) sunrpc(E) fscache(E) ib_ipoib(E) ib_umad(E) 
rdma_ucm(E) ib_uverbs(E) rdma_cm(E) iw_cm(E) ib_cm(E) ib_sa(E) configfs(E) 
hid_generic(E) usbhid(E) hid(E) xhci_pci(E) xhci_hcd(E) usbcore(E) tg3(E) 
usb_common(E) ptp(E) pps_core(E) libphy(E) ib_mthca(E) ib_mad(E) ib_core(E) 
ib_addr(E)
  [  580.603295] CPU: 112 PID: 12037 Comm: qemu-system-ppc Tainted: G   
 E   4.6.0-2-powerpc64le #1 Debian 4.6.3-1
  [  580.603386] task: c01f706f0180 ti: c01c1cdd8000 task.ti: 
c01c1cdd8000
  [  580.603456] NIP: d0001a52cb54 LR: d0001a52cb88 CTR: 

[Qemu-devel] [PATCH v4 1/2] target-m68k: add 64bit mull

2016-11-01 Thread Laurent Vivier
Signed-off-by: Laurent Vivier 
Reviewed-by: Richard Henderson 
---
 target-m68k/translate.c | 62 +++--
 1 file changed, 50 insertions(+), 12 deletions(-)

diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index 8433fa0..61986cc 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -1812,24 +1812,62 @@ DISAS_INSN(tas)
 DISAS_INSN(mull)
 {
 uint16_t ext;
-TCGv reg;
 TCGv src1;
-TCGv dest;
+int sign;
 
-/* The upper 32 bits of the product are discarded, so
-   muls.l and mulu.l are functionally equivalent.  */
 ext = read_im16(env, s);
-if (ext & 0x87ff) {
-gen_exception(s, s->pc - 4, EXCP_UNSUPPORTED);
+
+sign = ext & 0x800;
+
+if (ext & 0x400) {
+if (!m68k_feature(s->env, M68K_FEATURE_QUAD_MULDIV)) {
+gen_exception(s, s->pc - 4, EXCP_UNSUPPORTED);
+return;
+}
+
+SRC_EA(env, src1, OS_LONG, 0, NULL);
+
+if (sign) {
+tcg_gen_muls2_i32(QREG_CC_Z, QREG_CC_N, src1, DREG(ext, 12));
+} else {
+tcg_gen_mulu2_i32(QREG_CC_Z, QREG_CC_N, src1, DREG(ext, 12));
+}
+/* if Dl == Dh, 68040 returns low word */
+tcg_gen_mov_i32(DREG(ext, 0), QREG_CC_N);
+tcg_gen_mov_i32(DREG(ext, 12), QREG_CC_Z);
+tcg_gen_or_i32(QREG_CC_Z, QREG_CC_Z, QREG_CC_N);
+
+tcg_gen_movi_i32(QREG_CC_V, 0);
+tcg_gen_movi_i32(QREG_CC_C, 0);
+
+set_cc_op(s, CC_OP_FLAGS);
 return;
 }
-reg = DREG(ext, 12);
 SRC_EA(env, src1, OS_LONG, 0, NULL);
-dest = tcg_temp_new();
-tcg_gen_mul_i32(dest, src1, reg);
-tcg_gen_mov_i32(reg, dest);
-/* Unlike m68k, coldfire always clears the overflow bit.  */
-gen_logic_cc(s, dest, OS_LONG);
+if (m68k_feature(s->env, M68K_FEATURE_M68000)) {
+tcg_gen_movi_i32(QREG_CC_C, 0);
+if (sign) {
+tcg_gen_muls2_i32(QREG_CC_N, QREG_CC_V, src1, DREG(ext, 12));
+/* QREG_CC_V is -(QREG_CC_V != (QREG_CC_N >> 31)) */
+tcg_gen_sari_i32(QREG_CC_Z, QREG_CC_N, 31);
+tcg_gen_setcond_i32(TCG_COND_NE, QREG_CC_V, QREG_CC_V, QREG_CC_Z);
+} else {
+tcg_gen_mulu2_i32(QREG_CC_N, QREG_CC_V, src1, DREG(ext, 12));
+/* QREG_CC_V is -(QREG_CC_V != 0), use QREG_CC_C as 0 */
+tcg_gen_setcond_i32(TCG_COND_NE, QREG_CC_V, QREG_CC_V, QREG_CC_C);
+}
+tcg_gen_neg_i32(QREG_CC_V, QREG_CC_V);
+tcg_gen_mov_i32(DREG(ext, 12), QREG_CC_N);
+
+tcg_gen_mov_i32(QREG_CC_Z, QREG_CC_N);
+
+set_cc_op(s, CC_OP_FLAGS);
+} else {
+/* The upper 32 bits of the product are discarded, so
+   muls.l and mulu.l are functionally equivalent.  */
+tcg_gen_mul_i32(DREG(ext, 12), src1, DREG(ext, 12));
+gen_logic_cc(s, DREG(ext, 12), OS_LONG);
+}
 }
 
 static void gen_link(DisasContext *s, uint16_t insn, int32_t offset)
-- 
2.7.4




[Qemu-devel] [PATCH v4 2/2] target-m68k: add 680x0 divu/divs variants

2016-11-01 Thread Laurent Vivier
Update helper to set the throwing location in case of div-by-0.
Cleanup divX.w and add quad word variants of divX.l.

Signed-off-by: Laurent Vivier 
---
 linux-user/main.c   |   7 ++
 target-m68k/cpu.h   |   4 --
 target-m68k/helper.h|   8 ++-
 target-m68k/op_helper.c | 182 +---
 target-m68k/qregs.def   |   2 -
 target-m68k/translate.c |  84 --
 6 files changed, 217 insertions(+), 70 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 75b199f..c1d5eb4 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -2864,6 +2864,13 @@ void cpu_loop(CPUM68KState *env)
 info._sifields._sigfault._addr = env->pc;
 queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
 break;
+case EXCP_DIV0:
+info.si_signo = TARGET_SIGFPE;
+info.si_errno = 0;
+info.si_code = TARGET_FPE_INTDIV;
+info._sifields._sigfault._addr = env->pc;
+queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
+break;
 case EXCP_TRAP0:
 {
 abi_long ret;
diff --git a/target-m68k/cpu.h b/target-m68k/cpu.h
index 6dfb54e..0b4ed7b 100644
--- a/target-m68k/cpu.h
+++ b/target-m68k/cpu.h
@@ -95,10 +95,6 @@ typedef struct CPUM68KState {
 uint32_t macsr;
 uint32_t mac_mask;
 
-/* Temporary storage for DIV helpers.  */
-uint32_t div1;
-uint32_t div2;
-
 /* MMU status.  */
 struct {
 uint32_t ar;
diff --git a/target-m68k/helper.h b/target-m68k/helper.h
index aae01f9..d863e55 100644
--- a/target-m68k/helper.h
+++ b/target-m68k/helper.h
@@ -1,8 +1,12 @@
 DEF_HELPER_1(bitrev, i32, i32)
 DEF_HELPER_1(ff1, i32, i32)
 DEF_HELPER_FLAGS_2(sats, TCG_CALL_NO_RWG_SE, i32, i32, i32)
-DEF_HELPER_2(divu, void, env, i32)
-DEF_HELPER_2(divs, void, env, i32)
+DEF_HELPER_3(divuw, void, env, int, i32)
+DEF_HELPER_3(divsw, void, env, int, s32)
+DEF_HELPER_4(divul, void, env, int, int, i32)
+DEF_HELPER_4(divsl, void, env, int, int, s32)
+DEF_HELPER_4(divull, void, env, int, int, i32)
+DEF_HELPER_4(divsll, void, env, int, int, s32)
 DEF_HELPER_2(set_sr, void, env, i32)
 DEF_HELPER_3(movec, void, env, i32, i32)
 
diff --git a/target-m68k/op_helper.c b/target-m68k/op_helper.c
index 48e02e4..a4bfa4e 100644
--- a/target-m68k/op_helper.c
+++ b/target-m68k/op_helper.c
@@ -166,12 +166,17 @@ bool m68k_cpu_exec_interrupt(CPUState *cs, int 
interrupt_request)
 return false;
 }
 
-static void raise_exception(CPUM68KState *env, int tt)
+static void raise_exception_ra(CPUM68KState *env, int tt, uintptr_t raddr)
 {
 CPUState *cs = CPU(m68k_env_get_cpu(env));
 
 cs->exception_index = tt;
-cpu_loop_exit(cs);
+cpu_loop_exit_restore(cs, raddr);
+}
+
+static void raise_exception(CPUM68KState *env, int tt)
+{
+raise_exception_ra(env, tt, 0);
 }
 
 void HELPER(raise_exception)(CPUM68KState *env, uint32_t tt)
@@ -179,51 +184,178 @@ void HELPER(raise_exception)(CPUM68KState *env, uint32_t 
tt)
 raise_exception(env, tt);
 }
 
-void HELPER(divu)(CPUM68KState *env, uint32_t word)
+void HELPER(divuw)(CPUM68KState *env, int destr, uint32_t den)
 {
-uint32_t num;
-uint32_t den;
-uint32_t quot;
-uint32_t rem;
+uint32_t num = env->dregs[destr];
+uint32_t quot, rem;
 
-num = env->div1;
-den = env->div2;
-/* ??? This needs to make sure the throwing location is accurate.  */
 if (den == 0) {
-raise_exception(env, EXCP_DIV0);
+raise_exception_ra(env, EXCP_DIV0, GETPC());
 }
 quot = num / den;
 rem = num % den;
 
-env->cc_v = (word && quot > 0x ? -1 : 0);
+env->cc_c = 0; /* always cleared, even if overflow */
+if (quot > 0x) {
+env->cc_v = -1;
+/* nothing else is modified */
+/* real 68040 keeps Z and N on overflow,
+ * whereas documentation says "undefined"
+ */
+return;
+}
+env->dregs[destr] = deposit32(quot, 16, 16, rem);
 env->cc_z = quot;
 env->cc_n = quot;
+env->cc_v = 0;
+}
+
+void HELPER(divsw)(CPUM68KState *env, int destr, int32_t den)
+{
+int32_t num = env->dregs[destr];
+uint32_t quot, rem;
+
+if (den == 0) {
+raise_exception_ra(env, EXCP_DIV0, GETPC());
+}
+quot = num / den;
+rem = num % den;
+
+env->cc_c = 0; /* always cleared, even if overflow */
+if (quot != (int16_t)quot) {
+env->cc_v = -1;
+/* nothing else is modified */
+/* real 68040 keeps Z and N on overflow,
+ * whereas documentation says "undefined"
+ */
+return;
+}
+env->dregs[destr] = deposit32(quot, 16, 16, rem);
+env->cc_z = quot;
+env->cc_n = quot;
+env->cc_v = 0;
+}
+
+void HELPER(divul)(CPUM68KState *env, int numr, int regr, uint32_t den)
+{
+uint32_t num = env->dregs[numr];
+uint32_t quot, rem;
+
+if (den == 0) {
+raise_exception_ra(env, EXCP_DIV0, GETPC()

[Qemu-devel] [PATCH v4 0/2] 680x0 mul and div instructions

2016-11-01 Thread Laurent Vivier
This series is another subset of the series I sent in May:
https://lists.gnu.org/archive/html/qemu-devel/2016-05/msg00501.html

It must be applied on top of series:
"target-m68k: 680x0 instruction set, part 2"

This subset contains reworked patches of mul and div instructions:
- "add 64bit mull": correctly set QREG_CC_V,
- "inline divu/divs": don't inline divu/divs, but update
   existing functions to manage 680x0 div instructions

I've checked it doesn't break coldfire support:
http://wiki.qemu.org/download/coldfire-test-0.1.tar.bz2
but it can't boot a 680x0 processor kernel.

v4:
- divull/divsll: build correctly the 64bit num value
 check overflow as it is done in divsw

v3:
- mull: manage the case where Dl == Dh
- divu/divs: move all the mechanic to helpers,
  and pass the register number to directly set
  the result
v2:
- Free "rem" and "quot" in "divl".

Laurent Vivier (2):
  target-m68k: add 64bit mull
  target-m68k: add 680x0 divu/divs variants

 linux-user/main.c   |   7 ++
 target-m68k/cpu.h   |   4 --
 target-m68k/helper.h|   8 ++-
 target-m68k/op_helper.c | 182 +---
 target-m68k/qregs.def   |   2 -
 target-m68k/translate.c | 146 +-
 6 files changed, 267 insertions(+), 82 deletions(-)

-- 
2.7.4




Re: [Qemu-devel] [PATCH v2 1/3] target-m68k: add cmpm

2016-11-01 Thread Laurent Vivier


Le 01/11/2016 à 18:48, Richard Henderson a écrit :
> On 10/27/2016 03:43 PM, Laurent Vivier wrote:
>> +DISAS_INSN(cmpm)
>> +{
>> +int opsize = insn_opsize(insn);
>> +TCGv tmp = tcg_temp_new();
>> +TCGv src, dst, addr;
>> +
>> +src = gen_load(s, opsize, AREG(insn, 0), 1);
>> +/* delay the update after the second gen_load() */
>> +tcg_gen_addi_i32(tmp, AREG(insn, 0), opsize_bytes(opsize));
>> +
>> +/* but if the we use the same address register to
>> + * read the second value, we must use the updated address
>> + */
>> +if (REG(insn, 0) == REG(insn, 9)) {
>> +addr = tmp;
>> +} else {
>> +addr = AREG(insn, 9);
>> +}
>> +
>> +dst = gen_load(s, opsize, addr, 1);
>> +tcg_gen_mov_i32(AREG(insn, 0), tmp);
>> +tcg_gen_addi_i32(AREG(insn, 9), addr, opsize_bytes(opsize));
> 
> I wonder if we should make this more generic.
> I'm thinking of transparently fixing
> 
> case 3: /* Indirect postincrement.  */
> reg = AREG(insn, 0);
> result = gen_ldst(s, opsize, reg, val, what);
> /* ??? This is not exception safe.  The instruction may still
>fault after this point.  */
> if (what == EA_STORE || !addrp)
> tcg_gen_addi_i32(reg, reg, opsize_bytes(opsize));
> return result;
> 
> If we add to DisasContext
> 
> unsigned writeback_mask;
> TCGv writeback[8];
> 
> Then
> 
> static TCGv get_areg(DisasContext *s, unsigned regno)
> {
> if (s->writeback_mask & (1 << regno)) {
> return s->writeback[regno];
> } else {
> return cpu_aregs[regno];
> }
> }
> 
> static void delay_set_areg(DisasContext *s, unsigned regno,
>TCGv val, bool give_temp)
> {
> if (s->writeback_mask & (1 << regno)) {
> tcg_gen_mov_i32(s->writeback[regno], val);
> if (give_temp) {
> tcg_temp_free(val);
> }
> } else {
> s->writeback_mask |= 1 << regno;
> if (give_temp) {
> s->writeback[regno] = val;
> } else {
> TCGv tmp = tcg_temp_new();
> tcg_gen_mov_i32(tmp, val);
> s->writeback[regno] = tmp;
> }
> }
> }
> 
> static void do_writebacks(DisasContext *s)
> {
> unsigned mask = s->writeback_mask;
> if (mask) {
> s->writeback_mask = 0;
> do {
> unsigned regno = ctz32(mask);
> tcg_gen_mov_i32(cpu_aregs[regno], s->writeback[regno]);
> tcg_temp_free(s->writeback[regno]);
> mask &= mask - 1;
> } while (mask);
> }
> }
> 
> where get_areg is used within the AREG macro, delay_set_areg is used in
> those cases where pre- and post-increment are needed, and do_writebacks
> is invoked at the end of disas_m68k_insn.
> 
> This all pre-supposes that cmpm is not a special-case within the ISA,
> that any time one reuses a register after modification one sees the new
> value.  Given the existing code within gen_ea, this would seem to be the
> case.
> 
> Thoughts?

I think it's really a good idea to manage this in a generic way.
As you have already written 90% of the code, do you want to provide a
patch? I can test this with coldfire kernel and 68020 linux-user container.

Thanks,
Laurent



Re: [Qemu-devel] [PATCH v2] PCMachineState: introduce acpi_build_enabled field

2016-11-01 Thread Stefano Stabellini
On Tue, 1 Nov 2016, Wei Liu wrote:
> Introduce this field to control whether ACPI build is enabled by a
> particular machine or accelerator.
> 
> It defaults to true if the machine itself supports ACPI build. Xen
> accelerator will disable it because Xen is in charge of building ACPI
> tables for the guest.
> 
> Signed-off-by: Wei Liu 

Reviewed-by: Stefano Stabellini 


> Cc: Igor Mammedov 
> Cc: Eduardo Habkost 
> Cc: Anthony PERARD 
> Cc: Stefano Stabellini 
> Cc: Sander Eikelenboom 
> 
> v2:
> 1. drop acpi-build property
> 2. set acpi_build_enabled to acpi_has_build
> 3. replace acpi_has_build check in acpi_build()
> ---
>  hw/i386/acpi-build.c | 2 +-
>  hw/i386/pc.c | 2 ++
>  include/hw/i386/pc.h | 2 ++
>  xen-common.c | 6 ++
>  4 files changed, 11 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 5cd1da9..13cbbde 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -2953,7 +2953,7 @@ void acpi_setup(void)
>  return;
>  }
>  
> -if (!pcmc->has_acpi_build) {
> +if (!pcms->acpi_build_enabled) {
>  ACPI_BUILD_DPRINTF("ACPI build disabled. Bailing out.\n");
>  return;
>  }
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index f56ea0f..fbd9aed 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -2159,6 +2159,8 @@ static void pc_machine_initfn(Object *obj)
>  pcms->vmport = ON_OFF_AUTO_AUTO;
>  /* nvdimm is disabled on default. */
>  pcms->acpi_nvdimm_state.is_enabled = false;
> +/* acpi build is enabled by default if machine supports it */
> +pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build;
>  }
>  
>  static void pc_machine_reset(void)
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index 98dc772..8eb517f 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -62,6 +62,8 @@ struct PCMachineState {
>  
>  AcpiNVDIMMState acpi_nvdimm_state;
>  
> +bool acpi_build_enabled;
> +
>  /* RAM information (sizes, addresses, configuration): */
>  ram_addr_t below_4g_mem_size, above_4g_mem_size;
>  
> diff --git a/xen-common.c b/xen-common.c
> index 9099760..bacf962 100644
> --- a/xen-common.c
> +++ b/xen-common.c
> @@ -9,6 +9,7 @@
>   */
>  
>  #include "qemu/osdep.h"
> +#include "hw/i386/pc.h"
>  #include "hw/xen/xen_backend.h"
>  #include "qmp-commands.h"
>  #include "sysemu/char.h"
> @@ -114,6 +115,11 @@ static void xen_change_state_handler(void *opaque, int 
> running,
>  
>  static int xen_init(MachineState *ms)
>  {
> +PCMachineState *pcms = PC_MACHINE(ms);
> +
> +/* Disable ACPI build because Xen handles it */
> +pcms->acpi_build_enabled = false;
> +
>  xen_xc = xc_interface_open(0, 0, 0);
>  if (xen_xc == NULL) {
>  xen_pv_printf(NULL, 0, "can't open xen interface\n");
> -- 
> 2.1.4
> 



Re: [Qemu-devel] [PATCH 09/29] target-sparc: hypervisor mode takes over nucleus mode

2016-11-01 Thread Richard Henderson

On 11/01/2016 12:12 PM, Artyom Tarasenko wrote:

While playing with your patch set, I discovered that we also need a
patch to get_asi for ASI_N et al to retain MMU_HYPV_IDX, and not
decrease privilege. This happens *very* early in the prom boot, with
the first casx (when casx is implemented inline).


Why is the bug not visible with the current master? I wonder if we
have a symmetrical bug somewhere.



Hmm, I dunno.  I assume it has something to do with casx being implemented
out of line, and using helper_ld_asi instead of tcg_gen_qemu_ld_tl directly.


Actually I don't see where the  privilege is decreased: get_asi uses a
local mem_idx variable, the dc->mem_idx is retained.
What patch do you have in mind?


Like this.  Anyway, now that PMM has my atomic and sparc patch sets, you should 
be able to see the problem yourself with your patch set and your rom.



r~
From fa75ae10f26b7611f9f36013a11b066766b9faee Mon Sep 17 00:00:00 2001
From: Richard Henderson 
Date: Mon, 10 Oct 2016 15:52:49 -0500
Subject: target-sparc: Override ASI_N for hypervisor

Signed-off-by: Richard Henderson 

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 43f89d4..2b80d79 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -2139,7 +2139,11 @@ static DisasASI get_asi(DisasContext *dc, int insn, 
TCGMemOp memop)
 case ASI_TWINX_NL:
 case ASI_NUCLEUS_QUAD_LDD:
 case ASI_NUCLEUS_QUAD_LDD_L:
-mem_idx = MMU_NUCLEUS_IDX;
+if (hypervisor(dc)) {
+mem_idx = MMU_PHYS_IDX;
+} else {
+mem_idx = MMU_NUCLEUS_IDX;
+}
 break;
 case ASI_AIUP:  /* As if user primary */
 case ASI_AIUPL: /* As if user primary LE */


Re: [Qemu-devel] [PULL v2 for-2.8 0/9] tcg queued patches

2016-11-01 Thread Richard Henderson

On 11/01/2016 12:17 PM, Peter Maydell wrote:

On 1 November 2016 at 17:51, Richard Henderson  wrote:

The usual pain point for me is building for 32-bit on a 64-bit
system, where there is no cross-prefix that one can use, and
PKG_CONFIG_PATH and --extra-cflags are the only changes.


That sounds like a bug in how the 32-bit compilers work on
x86 ;-) Ideally you should be able to get a 32-bit compile
with i586-linux-gnu-gcc...


gcc -m32 worked very well for years, before pkg-config came along and mucked 
things up by using package-local include and library directories.  As long as 
everything was installed in the toolchain-aware search paths, everything Just 
Worked.



r~




Re: [Qemu-devel] [PATCH] arm-smmu: Fix bug when merging two 32 bit words to form 64 bit word

2016-11-01 Thread Alistair Francis
> -Original Message-
> From: Edgar E. Iglesias [mailto:edgar.igles...@xilinx.com]
> Sent: Monday, 26 September 2016 7:56 AM
> To: Paul Kennedy 
> Cc: qemu-devel@nongnu.org; Alistair Francis ; qemu-
> triv...@nongnu.org
> Subject: Re: [PATCH] arm-smmu: Fix bug when merging two 32 bit words to
> form 64 bit word
>
> On Mon, Sep 26, 2016 at 01:52:22PM +, Paul Kennedy wrote:
> > From 7bf015d76a5b53cd061c91f91fea4427101b26fd Mon Sep 17 00:00:00
> 2001
> > From: Paul Kennedy 
> > Date: Mon, 26 Sep 2016 11:59:00 +0100
> > Subject: [PATCH] arm-smmu: Fix bug when merging two 32 bit words to
> > form 64  bit word
> >
> > Fix bug where least significant 32 bits overwrite most significant
> > 32 bits of TTBR1 register.
>
> Hi Paul,
>
> Thanks for the patch.
>
> This code is not upstream yet, it's staged in the Xilinx tree.
> It probably doesn't makes sense to CC qemu-devel@nongnu.org and qemu-
> triv...@nongnu.org yet but there's a g...@xilinx.com for future patches to
> staged Xilinx code.
>
> Reviewed-by: Edgar E. Iglesias 
>
> Alistair, can you merge this to our trees?

Yep, applied!

Thanks for the patch Paul.

Sorry it took so long to apply, we were getting ready for a release.

Thanks,

Alistair

>
> Cheers,
> Edgar
>
> >
> > Signed-off-by: Paul C Kennedy 
> > ---
> >  hw/misc/arm-smmu.c |2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/hw/misc/arm-smmu.c b/hw/misc/arm-smmu.c index
> > 7e7acd8..8255f3e 100644
> > --- a/hw/misc/arm-smmu.c
> > +++ b/hw/misc/arm-smmu.c
> > @@ -6566,7 +6566,7 @@ static bool smmu500_at64(SMMU *s, unsigned
> int
> > cb, hwaddr va,
> >
> >  req.ttbr[1][1] = s->regs[R_SMMU_CB0_TTBR1_HIGH + cb_offset];
> >  req.ttbr[1][1] <<= 32;
> > -req.ttbr[1][1] = s->regs[R_SMMU_CB0_TTBR1_LOW + cb_offset];
> > +req.ttbr[1][1] |= s->regs[R_SMMU_CB0_TTBR1_LOW + cb_offset];
> >
> >  if (req.s2_enabled) {
> >  req.tcr[2] = s->regs[R_SMMU_CB0_TCR_LPAE + cb2_offset];
> > --
> > 1.7.9.5
> >


This email and any attachments are intended for the sole use of the named 
recipient(s) and contain(s) confidential information that may be proprietary, 
privileged or copyrighted under applicable law. If you are not the intended 
recipient, do not read, copy, or forward this email message or any attachments. 
Delete this email message and any attachments immediately.




Re: [Qemu-devel] [PATCH v2] PCMachineState: introduce acpi_build_enabled field

2016-11-01 Thread Sander Eikelenboom

On 2016-11-01 18:44, Wei Liu wrote:

Introduce this field to control whether ACPI build is enabled by a
particular machine or accelerator.

It defaults to true if the machine itself supports ACPI build. Xen
accelerator will disable it because Xen is in charge of building ACPI
tables for the guest.

Signed-off-by: Wei Liu 


Hi Wei,

Just gave it a spin (backporting for xen-unstable wasn't too hard),
and this also works for me !

--
Sander


---
Cc: Igor Mammedov 
Cc: Eduardo Habkost 
Cc: Anthony PERARD 
Cc: Stefano Stabellini 
Cc: Sander Eikelenboom 

v2:
1. drop acpi-build property
2. set acpi_build_enabled to acpi_has_build
3. replace acpi_has_build check in acpi_build()
---
 hw/i386/acpi-build.c | 2 +-
 hw/i386/pc.c | 2 ++
 include/hw/i386/pc.h | 2 ++
 xen-common.c | 6 ++
 4 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 5cd1da9..13cbbde 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2953,7 +2953,7 @@ void acpi_setup(void)
 return;
 }

-if (!pcmc->has_acpi_build) {
+if (!pcms->acpi_build_enabled) {
 ACPI_BUILD_DPRINTF("ACPI build disabled. Bailing out.\n");
 return;
 }
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index f56ea0f..fbd9aed 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -2159,6 +2159,8 @@ static void pc_machine_initfn(Object *obj)
 pcms->vmport = ON_OFF_AUTO_AUTO;
 /* nvdimm is disabled on default. */
 pcms->acpi_nvdimm_state.is_enabled = false;
+/* acpi build is enabled by default if machine supports it */
+pcms->acpi_build_enabled = 
PC_MACHINE_GET_CLASS(pcms)->has_acpi_build;

 }

 static void pc_machine_reset(void)
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 98dc772..8eb517f 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -62,6 +62,8 @@ struct PCMachineState {

 AcpiNVDIMMState acpi_nvdimm_state;

+bool acpi_build_enabled;
+
 /* RAM information (sizes, addresses, configuration): */
 ram_addr_t below_4g_mem_size, above_4g_mem_size;

diff --git a/xen-common.c b/xen-common.c
index 9099760..bacf962 100644
--- a/xen-common.c
+++ b/xen-common.c
@@ -9,6 +9,7 @@
  */

 #include "qemu/osdep.h"
+#include "hw/i386/pc.h"
 #include "hw/xen/xen_backend.h"
 #include "qmp-commands.h"
 #include "sysemu/char.h"
@@ -114,6 +115,11 @@ static void xen_change_state_handler(void
*opaque, int running,

 static int xen_init(MachineState *ms)
 {
+PCMachineState *pcms = PC_MACHINE(ms);
+
+/* Disable ACPI build because Xen handles it */
+pcms->acpi_build_enabled = false;
+
 xen_xc = xc_interface_open(0, 0, 0);
 if (xen_xc == NULL) {
 xen_pv_printf(NULL, 0, "can't open xen interface\n");




Re: [Qemu-devel] [PULL v2 for-2.8 0/9] tcg queued patches

2016-11-01 Thread Peter Maydell
On 1 November 2016 at 17:51, Richard Henderson  wrote:
> The usual pain point for me is building for 32-bit on a 64-bit
> system, where there is no cross-prefix that one can use, and
> PKG_CONFIG_PATH and --extra-cflags are the only changes.

That sounds like a bug in how the 32-bit compilers work on
x86 ;-) Ideally you should be able to get a 32-bit compile
with i586-linux-gnu-gcc...

thanks
-- PMM



Re: [Qemu-devel] [PATCH 09/29] target-sparc: hypervisor mode takes over nucleus mode

2016-11-01 Thread Artyom Tarasenko
On Wed, Oct 12, 2016 at 3:29 PM, Richard Henderson  wrote:
> On 10/12/2016 06:33 AM, Artyom Tarasenko wrote:
>>
>> On Mon, Oct 10, 2016 at 11:41 PM, Richard Henderson 
>> wrote:
>>>
>>> On 10/01/2016 05:05 AM, Artyom Tarasenko wrote:


 Signed-off-by: Artyom Tarasenko 
 ---
  target-sparc/cpu.h | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

 diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
 index 0b5c79f..fbeb8d7 100644
 --- a/target-sparc/cpu.h
 +++ b/target-sparc/cpu.h
 @@ -699,10 +699,10 @@ static inline int cpu_mmu_index(CPUSPARCState
 *env1,
 bool ifetch)
  #elif !defined(TARGET_SPARC64)
  return env1->psrs;
  #else
 -if (env1->tl > 0) {
 -return MMU_NUCLEUS_IDX;
 -} else if (cpu_hypervisor_mode(env1)) {
 +if (cpu_hypervisor_mode(env1)) {
  return MMU_HYPV_IDX;
 +} else if (env1->tl > 0) {
 +return MMU_NUCLEUS_IDX;
  } else if (cpu_supervisor_mode(env1)) {
  return MMU_KERNEL_IDX;
  } else {

>>>
>>> While playing with your patch set, I discovered that we also need a patch
>>> to
>>> get_asi for ASI_N et al to retain MMU_HYPV_IDX, and not decrease
>>> privilege.
>>> This happens *very* early in the prom boot, with the first casx (when
>>> casx
>>> is implemented inline).
>>
>>
>> Why is the bug not visible with the current master? I wonder if we
>> have a symmetrical bug somewhere.
>
>
> Hmm, I dunno.  I assume it has something to do with casx being implemented
> out of line, and using helper_ld_asi instead of tcg_gen_qemu_ld_tl directly.
>

Actually I don't see where the  privilege is decreased: get_asi uses a
local mem_idx variable, the dc->mem_idx is retained.
What patch do you have in mind?


-- 
Regards,
Artyom Tarasenko

SPARC and PPC PReP under qemu blog: http://tyom.blogspot.com/search/label/qemu



Re: [Qemu-devel] [PATCH v3 4/4] pc: memhp: enable nvdimm device hotplug

2016-11-01 Thread Stefan Hajnoczi
On Sat, Oct 29, 2016 at 12:35:40AM +0800, Xiao Guangrong wrote:
> _GPE.E04 is dedicated for nvdimm device hotplug
> 
> Signed-off-by: Xiao Guangrong 
> ---
>  docs/specs/acpi_mem_hotplug.txt  |  3 +++
>  hw/acpi/memory_hotplug.c | 31 +++
>  hw/i386/acpi-build.c |  7 +++
>  hw/i386/pc.c | 12 
>  hw/mem/nvdimm.c  |  4 
>  include/hw/acpi/acpi_dev_interface.h |  1 +
>  6 files changed, 46 insertions(+), 12 deletions(-)

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature


Re: [Qemu-devel] [RFC 1/2] linux-headers: update

2016-11-01 Thread Peter Maydell
On 1 November 2016 at 18:13, Alexander Graf  wrote:
> On 01/11/2016 11:19, Peter Maydell wrote:
>> Is there a cover letter email for this series ?
>
>
> I figured that the set is so small that it didn't deserve one :).

The usual rule is "one patch: no cover letter; more than one
patch: cover letter". Forgetting the cover letter is a good
way to cause a patchset to miss my to-review queue, because
it's the cover letter that I mark as "to-review" (and gmail
doesn't thread patchsets).

thanks
-- PMM



Re: [Qemu-devel] [PULL v2 for-2.8 0/9] tcg queued patches

2016-11-01 Thread Daniel P. Berrange
On Tue, Nov 01, 2016 at 04:26:57PM +, Peter Maydell wrote:
> On 1 November 2016 at 16:20, Daniel P. Berrange  wrote:
> > On Tue, Nov 01, 2016 at 09:34:24AM -0600, Richard Henderson wrote:
> >> On 11/01/2016 06:07 AM, Richard Henderson wrote:
> >> > V2, now with more windows workarounds.  I'll note that I have not
> >> > been 100% successful in actually building a mingw64 image.  But
> >> > at least the error Peter saw with v1 is fixed.
> >> >
> >> > I'll report on the other mingw64 failures under separate cover.
> >>
> >> Bah.  I think I've been tripped up by the fact that we fail to preserve
> >> PKG_CONFIG_PATH when re-running configure via make.  We really should
> >> finally fix that -- it's really really annoying when building a non-default
> >> config.
> >
> > I sent a fix for this issue last year, but it never got picked up
> > by anyone for merge
> >
> > https://lists.gnu.org/archive/html/qemu-devel/2015-11/msg04074.html
> 
> Hmm, that's a lot of random environment variables.
> I think I prefer the approach of "if you care about anything
> in the environment, pass it to both configure and to make".
> For instance we don't pass PATH from configure to make, so
> with that patch we would have the opposite behaviour, that
> make can run configure (which will get the saved PATH from
> config.status) but then make itself runs with a different PATH.
> That seems awkwardly inconsistent.

Yeah, it is akward - I was coming at it from an autotools perspective
where the generated Makefile tends to get variables added for each
tool used, containing the fully qualified path. eg so 'make' ends
up running '/usr/bin/sed' instead of just 'sed', and so 'make' is
not dependant on stuff in your env like $PATH - it mostly honours the
env that was set at time of configure.

Of course QEMU is not using autotools, so this doesn't map exactly
hence the inconsistency you point out.

Personally the idea that you must explicitly pass the same environment
to both configure & make is kind of hard to guarantee - I'll have
many terminal windows open when working on QEMU and I can't have any
confidence that the env seen by 'make' will exactly match what I used
with 'configure' (which may have been run quite some time earlier - hours
or even days).

It was a particular pain-point for me when doing mingw builds, where
I would typically use 'PKG_CONFIG_PATH=/blah ./configure' so that I
didn't permanently pollute my shell with mingw32 pkg-config env

Regards,
Daniel
-- 
|: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org  -o- http://virt-manager.org :|
|: http://entangle-photo.org   -o-http://search.cpan.org/~danberr/ :|



Re: [Qemu-devel] [RFC 1/2] linux-headers: update

2016-11-01 Thread Alexander Graf



On 01/11/2016 11:19, Peter Maydell wrote:

On 29 October 2016 at 22:10, Alexander Graf  wrote:

This patch updates the Linux headers to include the in-progress user
space ARM timer patches. It is explicitly RFC, as the patches are not
merged yet.
---


Is there a cover letter email for this series ?


I figured that the set is so small that it didn't deserve one :).


Alex



Re: [Qemu-devel] [PATCH] docs: Fix typos found by codespell

2016-11-01 Thread Peter Maydell
On 1 November 2016 at 17:19, Stefan Weil  wrote:
> Fix also some indefinite articles.
>
> Signed-off-by: Stefan Weil 
> ---
>
> I still don't understand the comment in docs/colo-proxy.txt.

> diff --git a/docs/colo-proxy.txt b/docs/colo-proxy.txt
> index 76767cb..6c8cca5 100644
> --- a/docs/colo-proxy.txt
> +++ b/docs/colo-proxy.txt
> @@ -158,7 +158,7 @@ secondary.
>
>  == Usage ==
>
> -Here, we use demo ip and port discribe more clearly.
> +Here, we use demo ip and port describe more clearly.

The problem with this sentence (as you note) is not the spelling
but that it is not clear what it is trying to say and needs
rewriting.

>  Primary(ip:3.3.3.3):
>  -netdev 
> tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
>  -device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66

thanks
-- PMM



[Qemu-devel] when we add EL2 to QEMU TCG ARM emulation and the virt board, should it default on or off?

2016-11-01 Thread Peter Maydell
I'm working on turning on EL2 support in our TCG ARM emulation,
and one area I'm not sure about is whether it should default to
on or off.

We have a few precedents:

For EL3 support:
 * the CPU property is enabled by default but can be disabled by the board
 * 'virt' defaults it to disabled, with a -machine secure=on property
   which turns it on (barfing if KVM is enabled) and also does some other
   stuff like wiring up secure-only memory and devices and making sure
   the GIC has security enabled
 * if the user does -cpu has_el3=yes things will probably not go
   too well and that's unsupported

For PMU support:
 * the CPU property is enabled by default
 * 'virt' defaults it to enabled (except for virt-2.6 and earlier)
 * we do expect the user to be able to turn it on and off with
   a -cpu pmu=yes/no option (and the board model changes behaviour
   based on whether the CPU claims to have the feature)

So what about EL2? Some background facts that are probably relevant:
 * at the moment KVM can't support it, but eventually machines with
   the nested virtualization hardware support will appear (this is
   an ARMv8.3 feature), at which point it ought to work there too
 * if you just enable EL2 then some currently working setups stop
   working (basically any code that was written to assume it was
   entered in EL1); notably this includes kvm-unit-tests (patches
   pending from Drew that fix that). Linux is fine though as it
   checks and handles both.

Disabled-by-default has the advantage that (a) a command line
will work the same for TCG and KVM and (b) nothing that used to
work will break. The disadvantage is that anybody who wants to
be able to run nested KVM will now have to specify a command line
option of some kind.

So:
 (1) should we default on or off? (both at the board level,
 and for the cpu object itself)
 (2) directly user-set cpu property, or board property ?

thanks
-- PMM



Re: [Qemu-devel] [PATCH RFC] PCMachineState: introduce acpi_build_enabled field

2016-11-01 Thread Wei Liu
On Tue, Nov 01, 2016 at 03:02:31PM -0200, Eduardo Habkost wrote:
> On Tue, Nov 01, 2016 at 04:43:11PM +, Wei Liu wrote:
> [...]
> > @@ -114,6 +115,11 @@ static void xen_change_state_handler(void *opaque, int 
> > running,
> >  
> >  static int xen_init(MachineState *ms)
> >  {
> > +PCMachineState *pcms = PC_MACHINE(ms);
> > +
> > +/* Disable ACPI build because Xen handles it */
> > +pcms->acpi_build_enabled = false;
> 
> I just noticed that I don't see any code to disable ACPI build in
> the case of "-machine pc,accel=xen". I suggest a xen_enabled()

Hmm... I think this code snippet does exactly that -- xen_init is the
initialization function for Xen accelerator. So this covers -m xenfv
(because it sets accelerator to xen) and -m whatever,accel=xen.

Did I miss anything?

Wei.



Re: [Qemu-devel] [PULL v2 00/30] Misc patches for 2016-10-31

2016-11-01 Thread Peter Maydell
On 1 November 2016 at 16:29, Paolo Bonzini  wrote:
> The following changes since commit 39542105bbb19c690219d2f22844d8dfbd9bba05:
>
>   Merge remote-tracking branch 'remotes/gkurz/tags/for-upstream' into staging 
> (2016-11-01 12:48:07 +)
>
> are available in the git repository at:
>
>   git://github.com/bonzini/qemu.git tags/for-upstream
>
> for you to fetch changes up to a92e5ebbcea7c1d16f4aa75b08fce298fcd4cdfa:
>
>   main-loop: Suppress I/O thread warning under qtest (2016-11-01 16:07:37 
> +0100)
>
> 
> * NBD bugfix (Changlong)
> * NBD write zeroes support (Eric)
> * Memory backend fixes (Haozhong)
> * Atomics fix (Alex)
> * New AVX512 features (Luwei)
> * "make check" logging fix (Paolo)
> * Chardev refactoring fallout (Paolo)
> * Small checkpatch improvements (Paolo, Jeff)
>

More format string problems, 32-bit build:

/home/petmay01/qemu/exec.c: In function 'file_ram_alloc':
/home/petmay01/qemu/exec.c:1327:9: error: format '%llu' expects
argument of type 'long long unsigned int', but argument 8 has type
'ram_addr_t' [-Werror=format=]
 error_setg(errp, "backing store %s size %"PRId64
 ^

I did a manual make -k so I think this is the last one.

thanks
-- PMM



Re: [Qemu-devel] [PATCH v1 1/1] qemu-doc: update gluster protocol usage guide

2016-11-01 Thread Eric Blake
On 11/01/2016 07:29 AM, Prasanna Kumar Kalever wrote:
> Document:
> 1. The new debug and logfile options with their usages and
> 2. New json format and its usage.
> 
> Signed-off-by: Prasanna Kumar Kalever 
> ---
>  qemu-doc.texi   | 46 --
>  qemu-options.hx | 14 --
>  2 files changed, 44 insertions(+), 16 deletions(-)
> 
> diff --git a/qemu-doc.texi b/qemu-doc.texi
> index 023c140..a7c5722 100644
> --- a/qemu-doc.texi
> +++ b/qemu-doc.texi
> @@ -1041,35 +1041,50 @@ GlusterFS is an user space distributed file system.
>  
>  You can boot from the GlusterFS disk image with the command:
>  @example
> -qemu-system-x86_64 -drive 
> file=gluster[+@var{transport}]://[@var{server}[:@var{port}]]/@var{volname}/@var{image}[?socket=...]
> +URI:
> +qemu-system-x86_64 -drive 
> file=gluster[+@var{type}]://[@var{host}[:@var{port}]]/@var{volume}/@var{path}[?socket=...]
> +
> +JSON:
> +qemu-system-x86_64 
> 'json:@{"driver":"qcow2","file":@{"driver":"gluster","volume":"testvol","path":"a.img","debug":"N","logfile":"...","server":[@{"type":"tcp","host":"...","port":"..."@},@{"type":"unix","socket":"..."@}]@}@}'

"debug":"N" does not match the schema; the parameter is named
"debug-level", and it is an integer not a string.  The parameter is
optional; you could just omit it. But if you are going to include it,
give a reasonable example like "debug-level":0.

Can you break up this long line for legibility?

>  @end example
>  
>  @var{gluster} is the protocol.
>  
> -@var{transport} specifies the transport type used to connect to gluster
> +@var{type} specifies the transport type used to connect to gluster
>  management daemon (glusterd). Valid transport types are
> -tcp, unix and rdma. If a transport type isn't specified, then tcp
> -type is assumed.
> +tcp, unix. Incase of URI, if a transport type isn't specified,

s/tcp, unix/tcp and unix/

s/Incase of URI/In the URI form/

> +then tcp type is assumed.
>  
> -@var{server} specifies the server where the volume file specification for
> -the given volume resides. This can be either hostname, ipv4 address
> -or ipv6 address. ipv6 address needs to be within square brackets [ ].
> -If transport type is unix, then @var{server} field should not be specified.
> +@var{host} specifies the server where the volume file specification for
> +the given volume resides. This can be either hostname, ipv4 address.

s/hostname, ipv4/a hostname or an ipv4/

> +If transport type is unix, then @var{host} field should not be specified.
>  Instead @var{socket} field needs to be populated with the path to unix domain
>  socket.
>  
>  @var{port} is the port number on which glusterd is listening. This is 
> optional
> -and if not specified, QEMU will send 0 which will make gluster to use the
> -default port. If the transport type is unix, then @var{port} should not be
> -specified.
> +and if not specified, it default to port 24007. If the transport type is 
> unix,

s/default/defaults/

> +then @var{port} should not be specified.
> +
> +@var{volume} is the name of the gluster volume which contains the disk image.
> +
> +@var{path} is the path to the actual disk image that resides on gluster 
> volume.
> +
> +@var{debug} is the logging level of the gluster protocol driver. Debug levels

Again, the schema for BlockdevOptionsGluster spells this debug-level, so
you need to fix this paragraph.

> +are 0-9, with 9 being the most verbose, and 0 representing no debugging 
> output.
> +Default is level of 4.  The current logging levels defined in the gluster 
> source

s/Default is level of 4/The default level is 4/

> +are 0 - None, 1 - Emergency, 2 - Alert, 3 - Critical, 4 - Error, 5 - Warning,
> +6 - Notice, 7 - Info, 8 - Debug, 9 - Trace
> +
> +@var{logfile} is a commandline option to mention log file path which helps in
> +logging to the specified file and also help in persisting the gfapi logs. The
> +default is stderr.
> +
>  
> @@ -1082,6 +1097,9 @@ qemu-system-x86_64 -drive 
> file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir
>  qemu-system-x86_64 -drive 
> file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img
>  qemu-system-x86_64 -drive 
> file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket
>  qemu-system-x86_64 -drive file=gluster+rdma://1.2.3.4:24007/testvol/a.img
> +qemu-system-x86_64 -drive 
> file=gluster://1.2.3.4/testvol/a.img,file.debug=9,file.logfile=/var/log/qemu-gluster.log

Should be file.debug-level

> +qemu-system-x86_64 
> 'json:@{"driver":"qcow2","file":@{"driver":"gluster","volume":"testvol","path":"a.img","debug":"9","logfile":"/var/log/qemu-gluster.log","server":[@{"type":"tcp","host":"1.2.3.4","port":24007@},@{"type":"unix","socket":"/var/run/glusterd.socket"@}]@}@}'
> +qemu-system-x86_64 -drive 
> driver=qcow2,file.driver=gluster,file.volume=testvol,file.path=/path/a.img,file.debug=9,file.logfile=/var/log/qemu-gluster.log,file.server.0.type=tcp,file.server.0.host=1.2.3.4,file.server.0.port=24007,f

Re: [Qemu-devel] [PATCH] docs: Fix typos found by codespell

2016-11-01 Thread Eric Blake
On 11/01/2016 12:19 PM, Stefan Weil wrote:
> Fix also some indefinite articles.
> 
> Signed-off-by: Stefan Weil 
> ---
> 
> I still don't understand the comment in docs/colo-proxy.txt.

Me neither.


> +++ b/docs/colo-proxy.txt
> @@ -158,7 +158,7 @@ secondary.
>  
>  == Usage ==
>  
> -Here, we use demo ip and port discribe more clearly.
> +Here, we use demo ip and port describe more clearly.
>  Primary(ip:3.3.3.3):
>  -netdev 
> tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
>  -device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66

Maybe:

Here is an example using demonstration IP and port addresses to more
clearly describe the usage.

The remaining hunks are clear improvements, so:
Reviewed-by: Eric Blake 

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH v2 1/3] target-m68k: add cmpm

2016-11-01 Thread Richard Henderson

On 10/27/2016 03:43 PM, Laurent Vivier wrote:

+DISAS_INSN(cmpm)
+{
+int opsize = insn_opsize(insn);
+TCGv tmp = tcg_temp_new();
+TCGv src, dst, addr;
+
+src = gen_load(s, opsize, AREG(insn, 0), 1);
+/* delay the update after the second gen_load() */
+tcg_gen_addi_i32(tmp, AREG(insn, 0), opsize_bytes(opsize));
+
+/* but if the we use the same address register to
+ * read the second value, we must use the updated address
+ */
+if (REG(insn, 0) == REG(insn, 9)) {
+addr = tmp;
+} else {
+addr = AREG(insn, 9);
+}
+
+dst = gen_load(s, opsize, addr, 1);
+tcg_gen_mov_i32(AREG(insn, 0), tmp);
+tcg_gen_addi_i32(AREG(insn, 9), addr, opsize_bytes(opsize));


I wonder if we should make this more generic.
I'm thinking of transparently fixing

case 3: /* Indirect postincrement.  */
reg = AREG(insn, 0);
result = gen_ldst(s, opsize, reg, val, what);
/* ??? This is not exception safe.  The instruction may still
   fault after this point.  */
if (what == EA_STORE || !addrp)
tcg_gen_addi_i32(reg, reg, opsize_bytes(opsize));
return result;

If we add to DisasContext

unsigned writeback_mask;
TCGv writeback[8];

Then

static TCGv get_areg(DisasContext *s, unsigned regno)
{
if (s->writeback_mask & (1 << regno)) {
return s->writeback[regno];
} else {
return cpu_aregs[regno];
}
}

static void delay_set_areg(DisasContext *s, unsigned regno,
   TCGv val, bool give_temp)
{
if (s->writeback_mask & (1 << regno)) {
tcg_gen_mov_i32(s->writeback[regno], val);
if (give_temp) {
tcg_temp_free(val);
}
} else {
s->writeback_mask |= 1 << regno;
if (give_temp) {
s->writeback[regno] = val;
} else {
TCGv tmp = tcg_temp_new();
tcg_gen_mov_i32(tmp, val);
s->writeback[regno] = tmp;
}
}
}

static void do_writebacks(DisasContext *s)
{
unsigned mask = s->writeback_mask;
if (mask) {
s->writeback_mask = 0;
do {
unsigned regno = ctz32(mask);
tcg_gen_mov_i32(cpu_aregs[regno], s->writeback[regno]);
tcg_temp_free(s->writeback[regno]);
mask &= mask - 1;
} while (mask);
}
}

where get_areg is used within the AREG macro, delay_set_areg is used in those 
cases where pre- and post-increment are needed, and do_writebacks is invoked at 
the end of disas_m68k_insn.


This all pre-supposes that cmpm is not a special-case within the ISA, that any 
time one reuses a register after modification one sees the new value.  Given 
the existing code within gen_ea, this would seem to be the case.


Thoughts?


r~



Re: [Qemu-devel] [PULL v2 for-2.8 0/9] tcg queued patches

2016-11-01 Thread Richard Henderson

On 11/01/2016 11:43 AM, Daniel P. Berrange wrote:

It was a particular pain-point for me when doing mingw builds, where
I would typically use 'PKG_CONFIG_PATH=/blah ./configure' so that I
didn't permanently pollute my shell with mingw32 pkg-config env


The usual pain point for me is building for 32-bit on a 64-bit system, where 
there is no cross-prefix that one can use, and PKG_CONFIG_PATH and 
--extra-cflags are the only changes.



r~



Re: [Qemu-devel] [PATCH RFC] PCMachineState: introduce acpi_build_enabled field

2016-11-01 Thread Eduardo Habkost
On Tue, Nov 01, 2016 at 04:43:11PM +, Wei Liu wrote:
[...]
> @@ -114,6 +115,11 @@ static void xen_change_state_handler(void *opaque, int 
> running,
>  
>  static int xen_init(MachineState *ms)
>  {
> +PCMachineState *pcms = PC_MACHINE(ms);
> +
> +/* Disable ACPI build because Xen handles it */
> +pcms->acpi_build_enabled = false;

I just noticed that I don't see any code to disable ACPI build in
the case of "-machine pc,accel=xen". I suggest a xen_enabled()
check in pc_init1() and pc_q35_init().

-- 
Eduardo



Re: [Qemu-devel] [PATCH v3 3/4] nvdimm acpi: introduce _FIT

2016-11-01 Thread Stefan Hajnoczi
On Sat, Oct 29, 2016 at 12:35:39AM +0800, Xiao Guangrong wrote:
> +1) Read FIT
> +   As we only reserved one page for NVDIMM ACPI it is impossible to map the
> +   whole FIT data to guest's address space. This function is used by _FIT
> +   method to read a piece of FIT data from QEMU.
> +
> +   Input parameters:
> +   Arg0 – UUID {set to 648B9CF2-CDA1-4312-8AD9-49C4AF32BD62}
> +   Arg1 – Revision ID (set to 1)
> +   Arg2 - Function Index, 0x1
> +   Arg3 - A package containing a buffer whose layout is as follows:
> +
> +   
> +--+-+-+---+
> +   |  Filed   | Byte Length | Byte Offset | Description  
>  |

s/Filed/Field/

The same applies below too.

> +   
> +--+-+-+---+
> +   | offset   | 4   |0| the offset of FIT buffer 
>  |
> +   
> +--+-+-+---+

s/offset of FIT buffer/offset into FIT buffer/

> +
> +   Output:
> +   
> +--+-+-+---+
> +   |  Filed   | Byte Length | Byte Offset | Description  
>  |
> +   
> +--+-+-+---+
> +   |  | | | return status codes  
>  |
> +   |  | | |   0x100 indicates fit has been   
>  |
> +   | status   | 4   |0|   updated
>  |
> +   |  | | | other follows Chapter 3 in DSM   
>  |
> +   |  | | | Spec Rev1
>  |
> +   
> +--+-+-+---+
> +   | fit data |  Varies |4| FIT data 
>  |
> +   |  | | |  
>  |
> +   
> +--+-+-+---+
> +
> +   The FIT offset is maintained by the caller itself, current offset plugs

s/plugs/plus/

> +struct NvdimmFuncReadFITIn {
> +uint32_t offset; /* the offset of FIT buffer. */

s/offset of FIT buffer/offset into FIT buffer/


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH RFC] PCMachineState: introduce acpi_build_enabled field

2016-11-01 Thread Eduardo Habkost
On Tue, Nov 01, 2016 at 04:43:11PM +, Wei Liu wrote:
> Introduce this field to control whether ACPI build is enabled by a
> particular machine or accelerator.
> 
> It defaults to true so that PC machine has ACPI build by default. Xen
> accelerator will disable it because Xen is in charge of building ACPI
> tables for the guest.
> 
> Signed-off-by: Wei Liu 
> ---
> Cc: Igor Mammedov 
> Cc: Eduardo Habkost 
> Cc: Anthony PERARD 
> Cc: Stefano Stabellini 
> Cc: Sander Eikelenboom 
> 
> Tested a backport version which only involves trivial code movement. It
> worked with both -m xenfv and -m pc,accel=xen.
> 
> Sander, if you want the backported patch please let me know.
> ---
>  hw/i386/acpi-build.c |  2 +-
>  hw/i386/pc.c | 19 +++
>  include/hw/i386/pc.h |  3 +++
>  xen-common.c |  6 ++
>  4 files changed, 29 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 93be96f..a5cd2fd 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -2954,7 +2954,7 @@ void acpi_setup(void)
>  return;
>  }
>  
> -if (!pcmc->has_acpi_build) {
> +if (!pcmc->has_acpi_build || !pcms->acpi_build_enabled) {
>  ACPI_BUILD_DPRINTF("ACPI build disabled. Bailing out.\n");
>  return;
>  }
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index f56ea0f..3e7982f 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -2143,6 +2143,20 @@ static bool pc_machine_get_nvdimm(Object *obj, Error 
> **errp)
>  return pcms->acpi_nvdimm_state.is_enabled;
>  }
>  
> +static bool pc_machine_get_acpi_build(Object *obj, Error **errp)
> +{
> +PCMachineState *pcms = PC_MACHINE(obj);
> +
> +return pcms->acpi_build_enabled;
> +}
> +
> +static void pc_machine_set_acpi_build(Object *obj, bool value, Error **errp)
> +{
> +PCMachineState *pcms = PC_MACHINE(obj);
> +
> +pcms->acpi_build_enabled = value;
> +}
> +
>  static void pc_machine_set_nvdimm(Object *obj, bool value, Error **errp)
>  {
>  PCMachineState *pcms = PC_MACHINE(obj);
> @@ -2159,6 +2173,8 @@ static void pc_machine_initfn(Object *obj)
>  pcms->vmport = ON_OFF_AUTO_AUTO;
>  /* nvdimm is disabled on default. */
>  pcms->acpi_nvdimm_state.is_enabled = false;
> +/* acpi build is enabled by default. */
> +pcms->acpi_build_enabled = true;

If you set:
  pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build;
the default value will be more consistent with the actual results
(where pc-1.6 and older don't have ACPI build). Then you would
probably be able to remove the pcmc->has_acpi_build check from
acpi_setup() and only check pcms->acpi_build_enabled.

>  }
>  
>  static void pc_machine_reset(void)
> @@ -2319,6 +2335,9 @@ static void pc_machine_class_init(ObjectClass *oc, void 
> *data)
>  
>  object_class_property_add_bool(oc, PC_MACHINE_NVDIMM,
>  pc_machine_get_nvdimm, pc_machine_set_nvdimm, &error_abort);
> +
> +object_class_property_add_bool(oc, PC_MACHINE_ACPI_BUILD,
> +pc_machine_get_acpi_build, pc_machine_set_acpi_build, &error_abort);
>  }
>  
>  static const TypeInfo pc_machine_info = {
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index 17fff80..ec8cd0c 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -63,6 +63,8 @@ struct PCMachineState {
>  
>  AcpiNVDIMMState acpi_nvdimm_state;
>  
> +bool acpi_build_enabled;
> +
>  /* RAM information (sizes, addresses, configuration): */
>  ram_addr_t below_4g_mem_size, above_4g_mem_size;
>  
> @@ -87,6 +89,7 @@ struct PCMachineState {
>  #define PC_MACHINE_VMPORT   "vmport"
>  #define PC_MACHINE_SMM  "smm"
>  #define PC_MACHINE_NVDIMM   "nvdimm"
> +#define PC_MACHINE_ACPI_BUILD   "acpi-build"
>  
>  /**
>   * PCMachineClass:
> diff --git a/xen-common.c b/xen-common.c
> index e641ad1..b1858d7 100644
> --- a/xen-common.c
> +++ b/xen-common.c
> @@ -9,6 +9,7 @@
>   */
>  
>  #include "qemu/osdep.h"
> +#include "hw/i386/pc.h"
>  #include "hw/xen/xen_backend.h"
>  #include "qmp-commands.h"
>  #include "sysemu/char.h"
> @@ -114,6 +115,11 @@ static void xen_change_state_handler(void *opaque, int 
> running,
>  
>  static int xen_init(MachineState *ms)
>  {
> +PCMachineState *pcms = PC_MACHINE(ms);
> +
> +/* Disable ACPI build because Xen handles it */
> +pcms->acpi_build_enabled = false;
> +
>  xen_xc = xc_interface_open(0, 0, 0);
>  if (xen_xc == NULL) {
>  xen_be_printf(NULL, 0, "can't open xen interface\n");
> -- 
> 2.1.4
> 

-- 
Eduardo



[Qemu-devel] [PATCH v2] PCMachineState: introduce acpi_build_enabled field

2016-11-01 Thread Wei Liu
Introduce this field to control whether ACPI build is enabled by a
particular machine or accelerator.

It defaults to true if the machine itself supports ACPI build. Xen
accelerator will disable it because Xen is in charge of building ACPI
tables for the guest.

Signed-off-by: Wei Liu 
---
Cc: Igor Mammedov 
Cc: Eduardo Habkost 
Cc: Anthony PERARD 
Cc: Stefano Stabellini 
Cc: Sander Eikelenboom 

v2:
1. drop acpi-build property
2. set acpi_build_enabled to acpi_has_build
3. replace acpi_has_build check in acpi_build()
---
 hw/i386/acpi-build.c | 2 +-
 hw/i386/pc.c | 2 ++
 include/hw/i386/pc.h | 2 ++
 xen-common.c | 6 ++
 4 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 5cd1da9..13cbbde 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2953,7 +2953,7 @@ void acpi_setup(void)
 return;
 }
 
-if (!pcmc->has_acpi_build) {
+if (!pcms->acpi_build_enabled) {
 ACPI_BUILD_DPRINTF("ACPI build disabled. Bailing out.\n");
 return;
 }
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index f56ea0f..fbd9aed 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -2159,6 +2159,8 @@ static void pc_machine_initfn(Object *obj)
 pcms->vmport = ON_OFF_AUTO_AUTO;
 /* nvdimm is disabled on default. */
 pcms->acpi_nvdimm_state.is_enabled = false;
+/* acpi build is enabled by default if machine supports it */
+pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build;
 }
 
 static void pc_machine_reset(void)
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 98dc772..8eb517f 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -62,6 +62,8 @@ struct PCMachineState {
 
 AcpiNVDIMMState acpi_nvdimm_state;
 
+bool acpi_build_enabled;
+
 /* RAM information (sizes, addresses, configuration): */
 ram_addr_t below_4g_mem_size, above_4g_mem_size;
 
diff --git a/xen-common.c b/xen-common.c
index 9099760..bacf962 100644
--- a/xen-common.c
+++ b/xen-common.c
@@ -9,6 +9,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "hw/i386/pc.h"
 #include "hw/xen/xen_backend.h"
 #include "qmp-commands.h"
 #include "sysemu/char.h"
@@ -114,6 +115,11 @@ static void xen_change_state_handler(void *opaque, int 
running,
 
 static int xen_init(MachineState *ms)
 {
+PCMachineState *pcms = PC_MACHINE(ms);
+
+/* Disable ACPI build because Xen handles it */
+pcms->acpi_build_enabled = false;
+
 xen_xc = xc_interface_open(0, 0, 0);
 if (xen_xc == NULL) {
 xen_pv_printf(NULL, 0, "can't open xen interface\n");
-- 
2.1.4




Re: [Qemu-devel] [PULL v3 for-2.8 0/9] tcg queued patches

2016-11-01 Thread Peter Maydell
On 1 November 2016 at 16:48, Richard Henderson  wrote:
> Yet another workaround for windows, this time with me actually
> being able to produce (and run) a mingw32 binary.  Only re-sending
> patch 4/9, as that's the only one that changed.
>
>
> r~
>
>
>
> The following changes since commit c46ef897dbb75590af94b6e3dca6a9c5f9a1ea1e:
>
>   Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into 
> staging (2016-11-01 14:27:05 +)
>
> are available in the git repository at:
>
>   git://github.com/rth7680/qemu.git tags/pull-tcg-20161101-2
>
> for you to fetch changes up to 3ff91d7e85176f8b4b131163d7fd801757a2c949:
>
>   tcg: correct 32-bit tcg_gen_ld8s_i64 sign-extension (2016-11-01 10:30:45 
> -0600)
>
> 
> tcg queued patches
>
> 

Applied, thanks.

-- PMM



Re: [Qemu-devel] [PATCH RFC] PCMachineState: introduce acpi_build_enabled field

2016-11-01 Thread Eduardo Habkost
On Tue, Nov 01, 2016 at 05:12:37PM +, Wei Liu wrote:
> On Tue, Nov 01, 2016 at 03:02:31PM -0200, Eduardo Habkost wrote:
> > On Tue, Nov 01, 2016 at 04:43:11PM +, Wei Liu wrote:
> > [...]
> > > @@ -114,6 +115,11 @@ static void xen_change_state_handler(void *opaque, 
> > > int running,
> > >  
> > >  static int xen_init(MachineState *ms)
> > >  {
> > > +PCMachineState *pcms = PC_MACHINE(ms);
> > > +
> > > +/* Disable ACPI build because Xen handles it */
> > > +pcms->acpi_build_enabled = false;
> > 
> > I just noticed that I don't see any code to disable ACPI build in
> > the case of "-machine pc,accel=xen". I suggest a xen_enabled()
> 
> Hmm... I think this code snippet does exactly that -- xen_init is the
> initialization function for Xen accelerator. So this covers -m xenfv
> (because it sets accelerator to xen) and -m whatever,accel=xen.
> 
> Did I miss anything?

Nevermind, I confused it with pc_xen_hvm_init() (which is
specific for xenfv). The patch looks correct.

-- 
Eduardo



Re: [Qemu-devel] [PULL 00/47] virtio, pc: fixes and features

2016-11-01 Thread Michael S. Tsirkin
On Tue, Nov 01, 2016 at 03:22:01PM +, Peter Maydell wrote:
> On 30 October 2016 at 21:23, Michael S. Tsirkin  wrote:
> > The following changes since commit 5b2ecabaeabc17f032197246c4846b9ba95ba8a6:
> >
> >   Merge remote-tracking branch 'remotes/kraxel/tags/pull-ui-20161028-1' 
> > into staging (2016-10-28 17:59:04 +0100)
> >
> > are available in the git repository at:
> >
> >   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
> >
> > for you to fetch changes up to f082ec0225bd15c71e0b4697d2df3af7bad65d7f:
> >
> >   acpi: fix assert failure caused by commit 35c5a52d (2016-10-30 20:06:25 
> > +0200)
> >
> > 
> > virtio, pc: fixes and features
> >
> > nvdimm hotplug support
> > virtio migration and ioeventfd rework
> > virtio crypto device
> > ipmi fixes
> >
> > Signed-off-by: Michael S. Tsirkin 
> >
> 
> Hi; this fails to build on OSX with format string issues:
> 
> /Users/pm215/src/qemu-for-merges/hw/virtio/virtio-crypto.c:770:20:
> error: format specifies type 'unsign
> ed short' but the argument has type 'uint32_t' (aka 'unsigned int')
> [-Werror,-Wformat]
>vcrypto->max_queues, VIRTIO_QUEUE_MAX);
> ~~~^
> /Users/pm215/src/qemu-for-merges/include/qapi/error.h:163:35: note:
> expanded from macro 'error_setg'
> (fmt), ## __VA_ARGS__)
>   ^
> 
> Fun fact: in struct vhost_dev, max_queues is a uint64_t;
> in struct VirtIONet it is a uint16_t; and in VirtIOCrypto
> it is a uint32_t...
> 
> thanks
> -- PMM

Yes - I really think we should move that to virtio core
going forward.
Anyway, I fixed that up and pushed.

A general question - I have a couple of tweaks to tests in
my tree. Didn't push to avoid them blocking the features.
I think test tweaks are fair game after freeze - agree?

-- 
MST



[Qemu-devel] [PULL v3 for-2.8 0/9] tcg queued patches

2016-11-01 Thread Richard Henderson
Yet another workaround for windows, this time with me actually
being able to produce (and run) a mingw32 binary.  Only re-sending
patch 4/9, as that's the only one that changed.


r~



The following changes since commit c46ef897dbb75590af94b6e3dca6a9c5f9a1ea1e:

  Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into 
staging (2016-11-01 14:27:05 +)

are available in the git repository at:

  git://github.com/rth7680/qemu.git tags/pull-tcg-20161101-2

for you to fetch changes up to 3ff91d7e85176f8b4b131163d7fd801757a2c949:

  tcg: correct 32-bit tcg_gen_ld8s_i64 sign-extension (2016-11-01 10:30:45 
-0600)


tcg queued patches


Joseph Myers (1):
  tcg: correct 32-bit tcg_gen_ld8s_i64 sign-extension

Peter Maydell (1):
  tcg/tcg.h: Improve documentation of TCGv_i32 etc types

Pranith Kumar (1):
  MAINTAINERS: Update PPC status and maintainer

Richard Henderson (6):
  target-cris: Do not dump cpu state with -d in_asm
  target-microblaze: Do not dump cpu state with -d in_asm
  target-openrisc: Do not dump cpu state with -d in_asm
  log: Add locking to large logging blocks
  tcg: Add tcg_gen_mulsu2_{i32,i64,tl}
  target-microblaze: Cleanup dec_mul

 MAINTAINERS   |  4 +--
 cpu-exec.c|  2 ++
 exec.c|  2 ++
 include/qemu/log.h| 16 ++
 include/sysemu/os-posix.h | 12 
 include/sysemu/os-win32.h | 15 +
 target-alpha/translate.c  |  2 ++
 target-arm/translate-a64.c|  2 ++
 target-arm/translate.c|  2 ++
 target-cris/translate.c   | 27 +++-
 target-i386/translate.c   |  4 +++
 target-lm32/translate.c   |  2 ++
 target-m68k/translate.c   |  2 ++
 target-microblaze/translate.c | 72 ---
 target-mips/translate.c   |  2 ++
 target-openrisc/translate.c   |  9 +++---
 target-ppc/translate.c|  2 ++
 target-s390x/translate.c  |  2 ++
 target-sh4/translate.c|  2 ++
 target-sparc/translate.c  |  2 ++
 target-tilegx/translate.c |  6 +++-
 target-tricore/translate.c|  2 ++
 target-unicore32/translate.c  |  2 ++
 target-xtensa/translate.c |  2 ++
 tcg/tcg-op.c  | 45 ++-
 tcg/tcg-op.h  |  4 +++
 tcg/tcg.c |  8 +
 tcg/tcg.h | 38 ++-
 translate-all.c   |  2 ++
 29 files changed, 192 insertions(+), 100 deletions(-)



Re: [Qemu-devel] [PULL v2 00/30] Misc patches for 2016-10-31

2016-11-01 Thread no-reply
Hi,

Your series seems to have some coding style problems. See output below for
more information:

Type: series
Subject: [Qemu-devel] [PULL v2 00/30] Misc patches for 2016-10-31
Message-id: 1478017783-7703-1-git-send-email-pbonz...@redhat.com

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

# Useful git options
git config --local diff.renamelimit 0
git config --local diff.renames True

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git show --no-patch --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 * [new tag] 
patchew/1478017783-7703-1-git-send-email-pbonz...@redhat.com -> 
patchew/1478017783-7703-1-git-send-email-pbonz...@redhat.com
Switched to a new branch 'test'
faf292f main-loop: Suppress I/O thread warning under qtest
3fe8c60 docs/rcu.txt: Fix minor typo
d32538f vl: exit qemu on guest panic if -no-shutdown is not set
031d388 checkpatch: allow spaces before parenthesis for 'coroutine_fn'
d9ab8bb x86: add AVX512_4VNNIW and AVX512_4FMAPS features
584e354 slirp: fix CharDriver breakage
d7e7fa4 qemu-char: do not forward events through the mux until QEMU has started
924fea8 nbd: Implement NBD_CMD_WRITE_ZEROES on client
6b5ee21 nbd: Implement NBD_CMD_WRITE_ZEROES on server
f032e48 nbd: Improve server handling of shutdown requests
486ffc6 nbd: Refactor conversion to errno to silence checkpatch
f698de1 nbd: Support shorter handshake
ec7f3de nbd: Less allocation during NBD_OPT_LIST
71e470c nbd: Let client skip portions of server reply
eb8e4b4 nbd: Let server know when client gives up negotiation
4f58cc8 nbd: Share common option-sending code in client
c919458 nbd: Send message along with server NBD_REP_ERR errors
b76fe64 nbd: Share common reply-sending code in server
afa213c nbd: Rename struct nbd_request and nbd_reply
4cb63fc nbd: Rename NbdClientSession to NBDClientSession
3828fb9 nbd: Rename NBDRequest to NBDRequestData
9bf69c9 nbd: Treat flags vs. command type as separate fields
a2d3af6 nbd: Add qemu-nbd -D for human-readable description
aef3582 exec.c: check memory backend file size with 'size' option
25a5c4b exec.c: do not truncate non-empty memory backend file
3c4f049 exec.c: ensure all AddressSpaceDispatch updates under RCU
bb0a1f3 tests: send error_report to test log
0916440 qemu-error: remove dependency of stubs on monitor
a6f8da9 nbd: Use CoQueue for free_sema instead of CoMutex
c22e47a checkpatch: tweak "struct should normally be const" warning

=== OUTPUT BEGIN ===
fatal: unrecognized argument: --no-patch
Checking PATCH 1/30: ...
WARNING: line over 80 characters
#29: FILE: scripts/checkpatch.pl:2502:
+   ERROR("initializer for struct $1 should normally be 
const\n" .

total: 0 errors, 1 warnings, 10 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
fatal: unrecognized argument: --no-patch
Checking PATCH 2/30: ...
fatal: unrecognized argument: --no-patch
Checking PATCH 3/30: ...
fatal: unrecognized argument: --no-patch
Checking PATCH 4/30: ...
fatal: unrecognized argument: --no-patch
Checking PATCH 5/30: ...
WARNING: line over 80 characters
#27: FILE: exec.c:496:
+AddressSpaceDispatch *d = 
atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);

total: 0 errors, 1 warnings, 16 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
fatal: unrecognized argument: --no-patch
Checking PATCH 6/30: ...
fatal: unrecognized argument: --no-patch
Checking PATCH 7/30: ...
fatal: unrecognized argument: --no-patch
Checking PATCH 8/30: ...
fatal: unrecognized argument: --no-patch
Checking PATCH 9/30: ...
fatal: unrecognized argument: --no-patch
Checking PATCH 10/30: ...
fatal: unrecognized argument: --no-patch
Checking PATCH 11/30: ...
fatal: unrecognized argument: --no-patch
Checking PATCH 12/30: ...
fatal: unrecognized argument: --no-patch
Checking PATCH 13/30: ...
fatal: unrecognized argument: --no-patch
Checking PATCH 14/30: ...
fatal: unrecognized argument: --no-patch
Checking PATCH 15/30: ...
fatal: unrecognized argument: --no-patch
Checking PATCH 16/30: ...
fatal: unrecognized argument: --no-patch
Checking PATCH 17/30: ...
fatal: unrecognized argument: --no-patch
Checking PATCH 18/30: ...
fatal: unrecognized argument: --no-patch
Checking PATCH 19/30: ...
fatal: unrecognized argument: --no-patch
Checking PATCH 20/30: ...
fatal: unrecognized argument: --no-patch
Checking PATCH 21/30: ...
fatal: unrecognized argument: --no-patch
Checking PATCH 22/30: ...
fatal: unrecognized argument: --no-patc

Re: [Qemu-devel] [PULL 00/47] virtio, pc: fixes and features

2016-11-01 Thread Peter Maydell
On 1 November 2016 at 17:25, Michael S. Tsirkin  wrote:
> A general question - I have a couple of tweaks to tests in
> my tree. Didn't push to avoid them blocking the features.
> I think test tweaks are fair game after freeze - agree?

Yes, especially if they're only tweaks, not enormous
changes.

thanks
-- PMM



Re: [Qemu-devel] [PULL v2 for-2.8 0/9] tcg queued patches

2016-11-01 Thread Richard Henderson

On 11/01/2016 08:26 AM, Peter Maydell wrote:

i686-w64-mingw32 builds fail to link :-(

exec.o: In function `qemu_flockfile':
/home/petmay01/linaro/qemu-for-merges/include/sysemu/os-win32.h:110:
undefined reference to `_imp___lock_file'
exec.o: In function `qemu_funlockfile':
/home/petmay01/linaro/qemu-for-merges/include/sysemu/os-win32.h:115:


I see the symbols present in libmsvcrt.a.  I wonder why it isn't being seen.

Oh well.  I'm going to simply disable file locking for mingw32 and let someone 
who cares deal with it later.  The situation with logging is no worse than 
before the patch.



r~




Re: [Qemu-devel] [PATCH v5] block/vxhs.c Add support for a new block device type called "vxhs"

2016-11-01 Thread Daniel P. Berrange
On Tue, Nov 01, 2016 at 09:52:59AM -0700, ashish mittal wrote:
> Done!
> 
> All builds are fine, but I have no way of knowing if that fixed the
> issue! Please let me know if you still see a problem.

configure works for me after pulling  your libqnio update


Regards,
Daniel
-- 
|: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org  -o- http://virt-manager.org :|
|: http://entangle-photo.org   -o-http://search.cpan.org/~danberr/ :|



[Qemu-devel] [PULL 25/30] slirp: fix CharDriver breakage

2016-11-01 Thread Paolo Bonzini
SLIRP expects a CharBackend as the third argument to slirp_add_exec,
but net/slirp.c was passing a CharDriverState.  Fix this to restore
guestfwd functionality.

Reported-by: Dr. David Alan Gilbert 
Signed-off-by: Paolo Bonzini 
---
 net/slirp.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/slirp.c b/net/slirp.c
index 64dd325..bcd1c5f 100644
--- a/net/slirp.c
+++ b/net/slirp.c
@@ -763,8 +763,7 @@ static int slirp_guestfwd(SlirpState *s, const char 
*config_str,
 return -1;
 }
 
-if (slirp_add_exec(s->slirp, 3, qemu_chr_fe_get_driver(&fwd->hd),
-   &server, port) < 0) {
+if (slirp_add_exec(s->slirp, 3, &fwd->hd, &server, port) < 0) {
 error_report("conflicting/invalid host:port in guest forwarding "
  "rule '%s'", config_str);
 g_free(fwd);
-- 
2.7.4





[Qemu-devel] [PATCH] hw/block/m25p80: Fix typo in local macro name

2016-11-01 Thread Stefan Weil
Signed-off-by: Stefan Weil 
---
 hw/block/m25p80.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index d29ff4c..09ea51f 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -121,7 +121,7 @@ typedef struct FlashPartInfo {
 #define CFG_DUMMY_CLK_LEN 4
 #define NVCFG_DUMMY_CLK_POS 12
 #define VCFG_DUMMY_CLK_POS 4
-#define EVCFG_OUT_DRIVER_STRENGHT_DEF 7
+#define EVCFG_OUT_DRIVER_STRENGTH_DEF 7
 #define EVCFG_VPP_ACCELERATOR (1 << 3)
 #define EVCFG_RESET_HOLD_ENABLED (1 << 4)
 #define NVCFG_DUAL_IO_MASK (1 << 2)
@@ -700,7 +700,7 @@ static void reset_memory(Flash *s)
 );
 
 s->enh_volatile_cfg = 0;
-s->enh_volatile_cfg |= EVCFG_OUT_DRIVER_STRENGHT_DEF;
+s->enh_volatile_cfg |= EVCFG_OUT_DRIVER_STRENGTH_DEF;
 s->enh_volatile_cfg |= EVCFG_VPP_ACCELERATOR;
 s->enh_volatile_cfg |= EVCFG_RESET_HOLD_ENABLED;
 if (s->nonvolatile_cfg & NVCFG_DUAL_IO_MASK) {
-- 
2.10.1




[Qemu-devel] [PATCH] docs: Fix typos found by codespell

2016-11-01 Thread Stefan Weil
Fix also some indefinite articles.

Signed-off-by: Stefan Weil 
---

I still don't understand the comment in docs/colo-proxy.txt.

Stefan

 docs/COLO-FT.txt| 2 +-
 docs/colo-proxy.txt | 2 +-
 qemu-doc.texi   | 2 +-
 qemu-options.hx | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/COLO-FT.txt b/docs/COLO-FT.txt
index 6282938..043b14a 100644
--- a/docs/COLO-FT.txt
+++ b/docs/COLO-FT.txt
@@ -102,7 +102,7 @@ Primary side.
 COLO Proxy:
 Delivers packets to Primary and Seconday, and then compare the responses from
 both side. Then decide whether to start a checkpoint according to some rules.
-Please refer to docs/colo-proxy.txt for more informations.
+Please refer to docs/colo-proxy.txt for more information.
 
 Note:
 HeartBeat has not been implemented yet, so you need to trigger failover process
diff --git a/docs/colo-proxy.txt b/docs/colo-proxy.txt
index 76767cb..6c8cca5 100644
--- a/docs/colo-proxy.txt
+++ b/docs/colo-proxy.txt
@@ -158,7 +158,7 @@ secondary.
 
 == Usage ==
 
-Here, we use demo ip and port discribe more clearly.
+Here, we use demo ip and port describe more clearly.
 Primary(ip:3.3.3.3):
 -netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
 -device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66
diff --git a/qemu-doc.texi b/qemu-doc.texi
index 023c140..691acf9 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -1037,7 +1037,7 @@ qemu-system-i386 -iscsi 
initiator-name=iqn.qemu.test:my-initiator \
 @node disk_images_gluster
 @subsection GlusterFS disk images
 
-GlusterFS is an user space distributed file system.
+GlusterFS is a user space distributed file system.
 
 You can boot from the GlusterFS disk image with the command:
 @example
diff --git a/qemu-options.hx b/qemu-options.hx
index 95332cc..df5e4e7 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2589,7 +2589,7 @@ qemu-system-i386 --drive 
file=sheepdog://192.0.2.1:3/MyVirtualMachine
 See also @url{http://http://www.osrg.net/sheepdog/}.
 
 @item GlusterFS
-GlusterFS is an user space distributed file system.
+GlusterFS is a user space distributed file system.
 QEMU supports the use of GlusterFS volumes for hosting VM disk images using
 TCP, Unix Domain Sockets and RDMA transport protocols.
 
-- 
2.10.1




[Qemu-devel] [PULL 20/30] nbd: Refactor conversion to errno to silence checkpatch

2016-11-01 Thread Paolo Bonzini
From: Eric Blake 

Checkpatch complains that 'return EINVAL' is usually wrong
(since we tend to favor 'return -EINVAL').  But it is a
false positive for nbd_errno_to_system_errno().  Since NBD
may add future defined wire values, refactor the code to
keep checkpatch happy.

Signed-off-by: Eric Blake 
Message-Id: <1476469998-28592-14-git-send-email-ebl...@redhat.com>
Signed-off-by: Paolo Bonzini 
---
 nbd/client.c | 20 ++--
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/nbd/client.c b/nbd/client.c
index b29963b..7bdce53 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -23,23 +23,31 @@
 
 static int nbd_errno_to_system_errno(int err)
 {
+int ret;
 switch (err) {
 case NBD_SUCCESS:
-return 0;
+ret = 0;
+break;
 case NBD_EPERM:
-return EPERM;
+ret = EPERM;
+break;
 case NBD_EIO:
-return EIO;
+ret = EIO;
+break;
 case NBD_ENOMEM:
-return ENOMEM;
+ret = ENOMEM;
+break;
 case NBD_ENOSPC:
-return ENOSPC;
+ret = ENOSPC;
+break;
 default:
 TRACE("Squashing unexpected error %d to EINVAL", err);
 /* fallthrough */
 case NBD_EINVAL:
-return EINVAL;
+ret = EINVAL;
+break;
 }
+return ret;
 }
 
 /* Definitions for opaque data types */
-- 
2.7.4





[Qemu-devel] [PULL v3 for-2.8 4/9] log: Add locking to large logging blocks

2016-11-01 Thread Richard Henderson
Reuse the existing locking provided by stdio to keep in_asm, cpu,
op, op_opt, op_ind, and out_asm as contiguous blocks.

While it isn't possible to interleave e.g. in_asm or op_opt logs
because of the TB lock protecting all code generation, it is
possible to interleave cpu logs, or to interleave a cpu dump with
an out_asm dump.

For mingw32, we appear to have no viable solution for this.  The locking
functions are not properly exported from the system runtime library.

Reviewed-by: Paolo Bonzini 
Signed-off-by: Richard Henderson 
---
 cpu-exec.c|  2 ++
 exec.c|  2 ++
 include/qemu/log.h| 16 
 include/sysemu/os-posix.h | 12 
 include/sysemu/os-win32.h | 15 +++
 target-alpha/translate.c  |  2 ++
 target-arm/translate-a64.c|  2 ++
 target-arm/translate.c|  2 ++
 target-cris/translate.c   |  2 ++
 target-i386/translate.c   |  4 
 target-lm32/translate.c   |  2 ++
 target-m68k/translate.c   |  2 ++
 target-microblaze/translate.c |  2 ++
 target-mips/translate.c   |  2 ++
 target-openrisc/translate.c   |  2 ++
 target-ppc/translate.c|  2 ++
 target-s390x/translate.c  |  2 ++
 target-sh4/translate.c|  2 ++
 target-sparc/translate.c  |  2 ++
 target-tilegx/translate.c |  6 +-
 target-tricore/translate.c|  2 ++
 target-unicore32/translate.c  |  2 ++
 target-xtensa/translate.c |  2 ++
 tcg/tcg.c |  8 
 translate-all.c   |  2 ++
 25 files changed, 98 insertions(+), 1 deletion(-)

diff --git a/cpu-exec.c b/cpu-exec.c
index 3e40886..4188fed 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -150,11 +150,13 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, 
TranslationBlock *itb)
 #if defined(DEBUG_DISAS)
 if (qemu_loglevel_mask(CPU_LOG_TB_CPU)
 && qemu_log_in_addr_range(itb->pc)) {
+qemu_log_lock();
 #if defined(TARGET_I386)
 log_cpu_state(cpu, CPU_DUMP_CCOP);
 #else
 log_cpu_state(cpu, 0);
 #endif
+qemu_log_unlock();
 }
 #endif /* DEBUG_DISAS */
 
diff --git a/exec.c b/exec.c
index 4d08581..b1094c0 100644
--- a/exec.c
+++ b/exec.c
@@ -911,11 +911,13 @@ void cpu_abort(CPUState *cpu, const char *fmt, ...)
 fprintf(stderr, "\n");
 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 if (qemu_log_separate()) {
+qemu_log_lock();
 qemu_log("qemu: fatal: ");
 qemu_log_vprintf(fmt, ap2);
 qemu_log("\n");
 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 qemu_log_flush();
+qemu_log_unlock();
 qemu_log_close();
 }
 va_end(ap2);
diff --git a/include/qemu/log.h b/include/qemu/log.h
index 00bf37f..a50e994 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -51,6 +51,22 @@ static inline bool qemu_loglevel_mask(int mask)
 return (qemu_loglevel & mask) != 0;
 }
 
+/* Lock output for a series of related logs.  Since this is not needed
+ * for a single qemu_log / qemu_log_mask / qemu_log_mask_and_addr, we
+ * assume that qemu_loglevel_mask has already been tested, and that
+ * qemu_loglevel is never set when qemu_logfile is unset.
+ */
+
+static inline void qemu_log_lock(void)
+{
+qemu_flockfile(qemu_logfile);
+}
+
+static inline void qemu_log_unlock(void)
+{
+qemu_funlockfile(qemu_logfile);
+}
+
 /* Logging functions: */
 
 /* main logging function
diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h
index 3cfedbc..b0a6c06 100644
--- a/include/sysemu/os-posix.h
+++ b/include/sysemu/os-posix.h
@@ -87,4 +87,16 @@ void *qemu_alloc_stack(size_t *sz);
  */
 void qemu_free_stack(void *stack, size_t sz);
 
+/* POSIX and Mingw32 differ in the name of the stdio lock functions.  */
+
+static inline void qemu_flockfile(FILE *f)
+{
+flockfile(f);
+}
+
+static inline void qemu_funlockfile(FILE *f)
+{
+funlockfile(f);
+}
+
 #endif
diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h
index 17aad3b..ff18b23 100644
--- a/include/sysemu/os-win32.h
+++ b/include/sysemu/os-win32.h
@@ -103,6 +103,21 @@ static inline char *realpath(const char *path, char 
*resolved_path)
 return resolved_path;
 }
 
+/* ??? Mingw appears to export _lock_file and _unlock_file as the functions
+ * with which to lock a stdio handle.  But something is wrong in the markup,
+ * either in the header or the library, such that we get undefined references
+ * to "_imp___lock_file" etc when linking.  Since we seem to have no other
+ * alternative, and the usage within the logging functions isn't critical,
+ * ignore FILE locking.
+ */
+
+static inline void qemu_flockfile(FILE *f)
+{
+}
+
+static inline void qemu_funlockfile(FILE *f)
+{
+}
 
 /* We wrap all the sockets functions so that we can
  * set errno based on WSAGetLastError()
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 03e4776..114927b 100644
--- a/target-alpha/translate.

[Qemu-devel] [PULL 18/30] nbd: Less allocation during NBD_OPT_LIST

2016-11-01 Thread Paolo Bonzini
From: Eric Blake 

Since we know that the maximum name we are willing to accept
is small enough to stack-allocate, rework the iteration over
NBD_OPT_LIST responses to reuse a stack buffer rather than
allocating every time.  Furthermore, we don't even have to
allocate if we know the server's length doesn't match what
we are searching for.

Signed-off-by: Eric Blake 
Message-Id: <1476469998-28592-12-git-send-email-ebl...@redhat.com>
Signed-off-by: Paolo Bonzini 
---
 nbd/client.c | 139 ---
 1 file changed, 67 insertions(+), 72 deletions(-)

diff --git a/nbd/client.c b/nbd/client.c
index 5d94e34..0afb8be 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -254,19 +254,28 @@ static int nbd_handle_reply_err(QIOChannel *ioc, 
nbd_opt_reply *reply,
 return result;
 }
 
-static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp)
+/* Process another portion of the NBD_OPT_LIST reply.  Set *@match if
+ * the current reply matches @want or if the server does not support
+ * NBD_OPT_LIST, otherwise leave @match alone.  Return 0 if iteration
+ * is complete, positive if more replies are expected, or negative
+ * with @errp set if an unrecoverable error occurred. */
+static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match,
+Error **errp)
 {
 nbd_opt_reply reply;
 uint32_t len;
 uint32_t namelen;
+char name[NBD_MAX_NAME_SIZE + 1];
 int error;
 
-*name = NULL;
 if (nbd_receive_option_reply(ioc, NBD_OPT_LIST, &reply, errp) < 0) {
 return -1;
 }
 error = nbd_handle_reply_err(ioc, &reply, errp);
 if (error <= 0) {
+/* The server did not support NBD_OPT_LIST, so set *match on
+ * the assumption that any name will be accepted.  */
+*match = true;
 return error;
 }
 len = reply.length;
@@ -277,105 +286,91 @@ static int nbd_receive_list(QIOChannel *ioc, char 
**name, Error **errp)
 nbd_send_opt_abort(ioc);
 return -1;
 }
-} else if (reply.type == NBD_REP_SERVER) {
-if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) {
-error_setg(errp, "incorrect option length %" PRIu32, len);
-nbd_send_opt_abort(ioc);
-return -1;
-}
-if (read_sync(ioc, &namelen, sizeof(namelen)) != sizeof(namelen)) {
-error_setg(errp, "failed to read option name length");
-nbd_send_opt_abort(ioc);
-return -1;
-}
-namelen = be32_to_cpu(namelen);
-len -= sizeof(namelen);
-if (len < namelen) {
-error_setg(errp, "incorrect option name length");
-nbd_send_opt_abort(ioc);
-return -1;
-}
-if (namelen > NBD_MAX_NAME_SIZE) {
-error_setg(errp, "export name length too long %" PRIu32, namelen);
-nbd_send_opt_abort(ioc);
-return -1;
-}
+return 0;
+} else if (reply.type != NBD_REP_SERVER) {
+error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x",
+   reply.type, NBD_REP_SERVER);
+nbd_send_opt_abort(ioc);
+return -1;
+}
 
-*name = g_new0(char, namelen + 1);
-if (read_sync(ioc, *name, namelen) != namelen) {
-error_setg(errp, "failed to read export name");
-g_free(*name);
-*name = NULL;
-nbd_send_opt_abort(ioc);
-return -1;
-}
-(*name)[namelen] = '\0';
-len -= namelen;
+if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) {
+error_setg(errp, "incorrect option length %" PRIu32, len);
+nbd_send_opt_abort(ioc);
+return -1;
+}
+if (read_sync(ioc, &namelen, sizeof(namelen)) != sizeof(namelen)) {
+error_setg(errp, "failed to read option name length");
+nbd_send_opt_abort(ioc);
+return -1;
+}
+namelen = be32_to_cpu(namelen);
+len -= sizeof(namelen);
+if (len < namelen) {
+error_setg(errp, "incorrect option name length");
+nbd_send_opt_abort(ioc);
+return -1;
+}
+if (namelen != strlen(want)) {
 if (drop_sync(ioc, len) != len) {
-error_setg(errp, "failed to read export description");
-g_free(*name);
-*name = NULL;
+error_setg(errp, "failed to skip export name with wrong length");
 nbd_send_opt_abort(ioc);
 return -1;
 }
-} else {
-error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x",
-   reply.type, NBD_REP_SERVER);
+return 1;
+}
+
+assert(namelen < sizeof(name));
+if (read_sync(ioc, name, namelen) != namelen) {
+error_setg(errp, "failed to read export name");
+nbd_send_opt_abort(ioc);
+return -1;
+}
+name[namelen] = '\0';
+len -= namelen;
+if (drop_sync(ioc, len) != len)

[Qemu-devel] [PATCH RFC] PCMachineState: introduce acpi_build_enabled field

2016-11-01 Thread Wei Liu
Introduce this field to control whether ACPI build is enabled by a
particular machine or accelerator.

It defaults to true so that PC machine has ACPI build by default. Xen
accelerator will disable it because Xen is in charge of building ACPI
tables for the guest.

Signed-off-by: Wei Liu 
---
Cc: Igor Mammedov 
Cc: Eduardo Habkost 
Cc: Anthony PERARD 
Cc: Stefano Stabellini 
Cc: Sander Eikelenboom 

Tested a backport version which only involves trivial code movement. It
worked with both -m xenfv and -m pc,accel=xen.

Sander, if you want the backported patch please let me know.
---
 hw/i386/acpi-build.c |  2 +-
 hw/i386/pc.c | 19 +++
 include/hw/i386/pc.h |  3 +++
 xen-common.c |  6 ++
 4 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 93be96f..a5cd2fd 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2954,7 +2954,7 @@ void acpi_setup(void)
 return;
 }
 
-if (!pcmc->has_acpi_build) {
+if (!pcmc->has_acpi_build || !pcms->acpi_build_enabled) {
 ACPI_BUILD_DPRINTF("ACPI build disabled. Bailing out.\n");
 return;
 }
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index f56ea0f..3e7982f 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -2143,6 +2143,20 @@ static bool pc_machine_get_nvdimm(Object *obj, Error 
**errp)
 return pcms->acpi_nvdimm_state.is_enabled;
 }
 
+static bool pc_machine_get_acpi_build(Object *obj, Error **errp)
+{
+PCMachineState *pcms = PC_MACHINE(obj);
+
+return pcms->acpi_build_enabled;
+}
+
+static void pc_machine_set_acpi_build(Object *obj, bool value, Error **errp)
+{
+PCMachineState *pcms = PC_MACHINE(obj);
+
+pcms->acpi_build_enabled = value;
+}
+
 static void pc_machine_set_nvdimm(Object *obj, bool value, Error **errp)
 {
 PCMachineState *pcms = PC_MACHINE(obj);
@@ -2159,6 +2173,8 @@ static void pc_machine_initfn(Object *obj)
 pcms->vmport = ON_OFF_AUTO_AUTO;
 /* nvdimm is disabled on default. */
 pcms->acpi_nvdimm_state.is_enabled = false;
+/* acpi build is enabled by default. */
+pcms->acpi_build_enabled = true;
 }
 
 static void pc_machine_reset(void)
@@ -2319,6 +2335,9 @@ static void pc_machine_class_init(ObjectClass *oc, void 
*data)
 
 object_class_property_add_bool(oc, PC_MACHINE_NVDIMM,
 pc_machine_get_nvdimm, pc_machine_set_nvdimm, &error_abort);
+
+object_class_property_add_bool(oc, PC_MACHINE_ACPI_BUILD,
+pc_machine_get_acpi_build, pc_machine_set_acpi_build, &error_abort);
 }
 
 static const TypeInfo pc_machine_info = {
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 17fff80..ec8cd0c 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -63,6 +63,8 @@ struct PCMachineState {
 
 AcpiNVDIMMState acpi_nvdimm_state;
 
+bool acpi_build_enabled;
+
 /* RAM information (sizes, addresses, configuration): */
 ram_addr_t below_4g_mem_size, above_4g_mem_size;
 
@@ -87,6 +89,7 @@ struct PCMachineState {
 #define PC_MACHINE_VMPORT   "vmport"
 #define PC_MACHINE_SMM  "smm"
 #define PC_MACHINE_NVDIMM   "nvdimm"
+#define PC_MACHINE_ACPI_BUILD   "acpi-build"
 
 /**
  * PCMachineClass:
diff --git a/xen-common.c b/xen-common.c
index e641ad1..b1858d7 100644
--- a/xen-common.c
+++ b/xen-common.c
@@ -9,6 +9,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "hw/i386/pc.h"
 #include "hw/xen/xen_backend.h"
 #include "qmp-commands.h"
 #include "sysemu/char.h"
@@ -114,6 +115,11 @@ static void xen_change_state_handler(void *opaque, int 
running,
 
 static int xen_init(MachineState *ms)
 {
+PCMachineState *pcms = PC_MACHINE(ms);
+
+/* Disable ACPI build because Xen handles it */
+pcms->acpi_build_enabled = false;
+
 xen_xc = xc_interface_open(0, 0, 0);
 if (xen_xc == NULL) {
 xen_be_printf(NULL, 0, "can't open xen interface\n");
-- 
2.1.4




Re: [Qemu-devel] [PATCH RFC] PCMachineState: introduce acpi_build_enabled field

2016-11-01 Thread Wei Liu
On Tue, Nov 01, 2016 at 02:59:25PM -0200, Eduardo Habkost wrote:
> On Tue, Nov 01, 2016 at 04:53:17PM +, Wei Liu wrote:
> > On Tue, Nov 01, 2016 at 02:48:27PM -0200, Eduardo Habkost wrote:
> > [...]
> > > >  static void pc_machine_set_nvdimm(Object *obj, bool value, Error 
> > > > **errp)
> > > >  {
> > > >  PCMachineState *pcms = PC_MACHINE(obj);
> > > > @@ -2159,6 +2173,8 @@ static void pc_machine_initfn(Object *obj)
> > > >  pcms->vmport = ON_OFF_AUTO_AUTO;
> > > >  /* nvdimm is disabled on default. */
> > > >  pcms->acpi_nvdimm_state.is_enabled = false;
> > > > +/* acpi build is enabled by default. */
> > > > +pcms->acpi_build_enabled = true;
> > > 
> > > If you set:
> > >   pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build;
> > > the default value will be more consistent with the actual results
> > > (where pc-1.6 and older don't have ACPI build). Then you would
> > > probably be able to remove the pcmc->has_acpi_build check from
> > > acpi_setup() and only check pcms->acpi_build_enabled.
> > > 
> > 
> > Thank you for your good advice.
> > 
> > I take it that you're ok with the name of the field and the code in
> > general? If so I will drop RFC tag in my next submission.
> 
> The rest looks good to me, except that I don't see a reason to
> add a "acpi-build" property if it's not used for anything (all
> code is using the struct field directly).
> 

OK. I'm fine with deleting that. I was just following existing examples.

Wei.

> -- 
> Eduardo



[Qemu-devel] [PULL 30/30] main-loop: Suppress I/O thread warning under qtest

2016-11-01 Thread Paolo Bonzini
From: Max Reitz 

We do not want to display the "I/O thread spun" warning for test cases
that run under qtest. The first attempt for this (commit
01c22f2cdd4fcf02276ea10f48253850a5fd7259) tested whether qtest_enabled()
was true.

Commit 21a24302e85024dd7b2a151158adbc1f5dc5c4dd correctly recognized
that just testing qtest_enabled() is not sufficient since there are some
tests that do not use the qtest accelerator but just the qtest character
device, and thus replaced qtest_enabled() by qtest_driver().

However, there are also some tests that only use the qtest accelerator
and not the qtest chardev; perhaps most notably the bash iotests.
Therefore, we have to check both qtest_enabled() and qtest_driver().

Signed-off-by: Max Reitz 
Message-Id: <20161017180939.27912-1-mre...@redhat.com>
Signed-off-by: Paolo Bonzini 
---
 main-loop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main-loop.c b/main-loop.c
index 66c4eb6..ad10bca 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -234,7 +234,7 @@ static int os_host_main_loop_wait(int64_t timeout)
 if (!timeout && (spin_counter > MAX_MAIN_LOOP_SPIN)) {
 static bool notified;
 
-if (!notified && !qtest_driver()) {
+if (!notified && !qtest_enabled() && !qtest_driver()) {
 fprintf(stderr,
 "main-loop: WARNING: I/O thread spun for %d iterations\n",
 MAX_MAIN_LOOP_SPIN);
-- 
2.7.4




[Qemu-devel] [PULL 15/30] nbd: Share common option-sending code in client

2016-11-01 Thread Paolo Bonzini
From: Eric Blake 

Rather than open-coding each option request, it's easier to
have common helper functions do the work.  That in turn requires
having convenient packed types for handling option requests
and replies.

Signed-off-by: Eric Blake 
Message-Id: <1476469998-28592-9-git-send-email-ebl...@redhat.com>
Signed-off-by: Paolo Bonzini 
---
 include/block/nbd.h |  25 +-
 nbd/client.c| 255 ++--
 nbd/nbd-internal.h  |   2 +-
 3 files changed, 131 insertions(+), 151 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index a33581b6..b69bf1d 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -26,15 +26,34 @@
 #include "io/channel-socket.h"
 #include "crypto/tlscreds.h"
 
-/* Note: these are _NOT_ the same as the network representation of an NBD
+/* Handshake phase structs - this struct is passed on the wire */
+
+struct nbd_option {
+uint64_t magic; /* NBD_OPTS_MAGIC */
+uint32_t option; /* NBD_OPT_* */
+uint32_t length;
+} QEMU_PACKED;
+typedef struct nbd_option nbd_option;
+
+struct nbd_opt_reply {
+uint64_t magic; /* NBD_REP_MAGIC */
+uint32_t option; /* NBD_OPT_* */
+uint32_t type; /* NBD_REP_* */
+uint32_t length;
+} QEMU_PACKED;
+typedef struct nbd_opt_reply nbd_opt_reply;
+
+/* Transmission phase structs
+ *
+ * Note: these are _NOT_ the same as the network representation of an NBD
  * request and reply!
  */
 struct NBDRequest {
 uint64_t handle;
 uint64_t from;
 uint32_t len;
-uint16_t flags;
-uint16_t type;
+uint16_t flags; /* NBD_CMD_FLAG_* */
+uint16_t type; /* NBD_CMD_* */
 };
 typedef struct NBDRequest NBDRequest;
 
diff --git a/nbd/client.c b/nbd/client.c
index fe24d31..e78000a 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -75,64 +75,128 @@ static QTAILQ_HEAD(, NBDExport) exports = 
QTAILQ_HEAD_INITIALIZER(exports);
 
 */
 
+/* Send an option request.
+ *
+ * The request is for option @opt, with @data containing @len bytes of
+ * additional payload for the request (@len may be -1 to treat @data as
+ * a C string; and @data may be NULL if @len is 0).
+ * Return 0 if successful, -1 with errp set if it is impossible to
+ * continue. */
+static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt,
+   uint32_t len, const char *data,
+   Error **errp)
+{
+nbd_option req;
+QEMU_BUILD_BUG_ON(sizeof(req) != 16);
+
+if (len == -1) {
+req.length = len = strlen(data);
+}
+TRACE("Sending option request %" PRIu32", len %" PRIu32, opt, len);
+
+stq_be_p(&req.magic, NBD_OPTS_MAGIC);
+stl_be_p(&req.option, opt);
+stl_be_p(&req.length, len);
+
+if (write_sync(ioc, &req, sizeof(req)) != sizeof(req)) {
+error_setg(errp, "Failed to send option request header");
+return -1;
+}
+
+if (len && write_sync(ioc, (char *) data, len) != len) {
+error_setg(errp, "Failed to send option request data");
+return -1;
+}
+
+return 0;
+}
+
+/* Receive the header of an option reply, which should match the given
+ * opt.  Read through the length field, but NOT the length bytes of
+ * payload. Return 0 if successful, -1 with errp set if it is
+ * impossible to continue. */
+static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt,
+nbd_opt_reply *reply, Error **errp)
+{
+QEMU_BUILD_BUG_ON(sizeof(*reply) != 20);
+if (read_sync(ioc, reply, sizeof(*reply)) != sizeof(*reply)) {
+error_setg(errp, "failed to read option reply");
+return -1;
+}
+be64_to_cpus(&reply->magic);
+be32_to_cpus(&reply->option);
+be32_to_cpus(&reply->type);
+be32_to_cpus(&reply->length);
+
+TRACE("Received option reply %" PRIx32", type %" PRIx32", len %" PRIu32,
+  reply->option, reply->type, reply->length);
 
-/* If type represents success, return 1 without further action.
- * If type represents an error reply, consume the rest of the packet on ioc.
- * Then return 0 for unsupported (so the client can fall back to
- * other approaches), or -1 with errp set for other errors.
+if (reply->magic != NBD_REP_MAGIC) {
+error_setg(errp, "Unexpected option reply magic");
+return -1;
+}
+if (reply->option != opt) {
+error_setg(errp, "Unexpected option type %x expected %x",
+   reply->option, opt);
+return -1;
+}
+return 0;
+}
+
+/* If reply represents success, return 1 without further action.
+ * If reply represents an error, consume the optional payload of
+ * the packet on ioc.  Then return 0 for unsupported (so the client
+ * can fall back to other approaches), or -1 with errp set for other
+ * errors.
  */
-static int nbd_handle_reply_err(QIOChannel *ioc, uint32_t opt, uint32_t type,
+static int nbd_handle_reply_err(QIOChannel *ioc, nbd_opt_reply *reply,
 Error **errp)
 

Re: [Qemu-devel] [PATCH RFC] PCMachineState: introduce acpi_build_enabled field

2016-11-01 Thread Eduardo Habkost
On Tue, Nov 01, 2016 at 04:53:17PM +, Wei Liu wrote:
> On Tue, Nov 01, 2016 at 02:48:27PM -0200, Eduardo Habkost wrote:
> [...]
> > >  static void pc_machine_set_nvdimm(Object *obj, bool value, Error **errp)
> > >  {
> > >  PCMachineState *pcms = PC_MACHINE(obj);
> > > @@ -2159,6 +2173,8 @@ static void pc_machine_initfn(Object *obj)
> > >  pcms->vmport = ON_OFF_AUTO_AUTO;
> > >  /* nvdimm is disabled on default. */
> > >  pcms->acpi_nvdimm_state.is_enabled = false;
> > > +/* acpi build is enabled by default. */
> > > +pcms->acpi_build_enabled = true;
> > 
> > If you set:
> >   pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build;
> > the default value will be more consistent with the actual results
> > (where pc-1.6 and older don't have ACPI build). Then you would
> > probably be able to remove the pcmc->has_acpi_build check from
> > acpi_setup() and only check pcms->acpi_build_enabled.
> > 
> 
> Thank you for your good advice.
> 
> I take it that you're ok with the name of the field and the code in
> general? If so I will drop RFC tag in my next submission.

The rest looks good to me, except that I don't see a reason to
add a "acpi-build" property if it's not used for anything (all
code is using the struct field directly).

-- 
Eduardo



[Qemu-devel] [PULL 28/30] vl: exit qemu on guest panic if -no-shutdown is not set

2016-11-01 Thread Paolo Bonzini
From: Christian Borntraeger 

For automated testing purposes it can be helpful to exit qemu
(poweroff) when the guest panics. Make this the default unless
-no-shutdown is specified.

For internal-errors like errors from KVM_RUN the behaviour is
not changed, in other words QEMU does not exit to allow debugging
in the QEMU monitor.

Signed-off-by: Christian Borntraeger 
Message-Id: <1476775794-108012-1-git-send-email-borntrae...@de.ibm.com>
Reviewed-by: Eric Blake 
Signed-off-by: Paolo Bonzini 
---
 qapi-schema.json | 4 ++--
 vl.c | 5 +
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/qapi-schema.json b/qapi-schema.json
index 5dc96af..b0b4bf6 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4621,10 +4621,10 @@
 #
 # @pause: system pauses
 #
-# Since: 2.1
+# Since: 2.1 (poweroff since 2.8)
 ##
 { 'enum': 'GuestPanicAction',
-  'data': [ 'pause' ] }
+  'data': [ 'pause', 'poweroff' ] }
 
 ##
 # @rtc-reset-reinjection
diff --git a/vl.c b/vl.c
index 368510f..319f641 100644
--- a/vl.c
+++ b/vl.c
@@ -1792,6 +1792,11 @@ void qemu_system_guest_panicked(void)
 }
 qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE, &error_abort);
 vm_stop(RUN_STATE_GUEST_PANICKED);
+if (!no_shutdown) {
+qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_POWEROFF,
+   &error_abort);
+qemu_system_shutdown_request();
+}
 }
 
 void qemu_system_reset_request(void)
-- 
2.7.4





[Qemu-devel] [PULL 14/30] nbd: Send message along with server NBD_REP_ERR errors

2016-11-01 Thread Paolo Bonzini
From: Eric Blake 

The NBD Protocol allows us to send human-readable messages
along with any NBD_REP_ERR error during option negotiation;
make use of this fact for clients that know what to do with
our message.

Signed-off-by: Eric Blake 
Message-Id: <1476469998-28592-8-git-send-email-ebl...@redhat.com>
Signed-off-by: Paolo Bonzini 
---
 nbd/server.c | 78 +---
 1 file changed, 59 insertions(+), 19 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index 0f0c68c..fa01e49 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -236,6 +236,38 @@ static int nbd_negotiate_send_rep(QIOChannel *ioc, 
uint32_t type, uint32_t opt)
 return nbd_negotiate_send_rep_len(ioc, type, opt, 0);
 }
 
+/* Send an error reply.
+ * Return -errno on error, 0 on success. */
+static int GCC_FMT_ATTR(4, 5)
+nbd_negotiate_send_rep_err(QIOChannel *ioc, uint32_t type,
+   uint32_t opt, const char *fmt, ...)
+{
+va_list va;
+char *msg;
+int ret;
+size_t len;
+
+va_start(va, fmt);
+msg = g_strdup_vprintf(fmt, va);
+va_end(va);
+len = strlen(msg);
+assert(len < 4096);
+TRACE("sending error message \"%s\"", msg);
+ret = nbd_negotiate_send_rep_len(ioc, type, opt, len);
+if (ret < 0) {
+goto out;
+}
+if (nbd_negotiate_write(ioc, msg, len) != len) {
+LOG("write failed (error message)");
+ret = -EIO;
+} else {
+ret = 0;
+}
+out:
+g_free(msg);
+return ret;
+}
+
 /* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload.
  * Return -errno on error, 0 on success. */
 static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp)
@@ -281,8 +313,9 @@ static int nbd_negotiate_handle_list(NBDClient *client, 
uint32_t length)
 if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
 return -EIO;
 }
-return nbd_negotiate_send_rep(client->ioc,
-  NBD_REP_ERR_INVALID, NBD_OPT_LIST);
+return nbd_negotiate_send_rep_err(client->ioc,
+  NBD_REP_ERR_INVALID, NBD_OPT_LIST,
+  "OPT_LIST should not have length");
 }
 
 /* For each export, send a NBD_REP_SERVER reply. */
@@ -329,7 +362,8 @@ fail:
 return rc;
 }
 
-
+/* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
+ * new channel for all further (now-encrypted) communication. */
 static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
  uint32_t length)
 {
@@ -343,7 +377,8 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient 
*client,
 if (nbd_negotiate_drop_sync(ioc, length) != length) {
 return NULL;
 }
-nbd_negotiate_send_rep(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS);
+nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS,
+   "OPT_STARTTLS should not have length");
 return NULL;
 }
 
@@ -474,13 +509,15 @@ static int nbd_negotiate_options(NBDClient *client)
 return -EINVAL;
 
 default:
-TRACE("Option 0x%" PRIx32 " not permitted before TLS",
-  clientflags);
 if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
 return -EIO;
 }
-ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_TLS_REQD,
- clientflags);
+ret = nbd_negotiate_send_rep_err(client->ioc,
+ NBD_REP_ERR_TLS_REQD,
+ clientflags,
+ "Option 0x%" PRIx32
+ "not permitted before TLS",
+ clientflags);
 if (ret < 0) {
 return ret;
 }
@@ -506,27 +543,30 @@ static int nbd_negotiate_options(NBDClient *client)
 return -EIO;
 }
 if (client->tlscreds) {
-TRACE("TLS already enabled");
-ret = nbd_negotiate_send_rep(client->ioc,
- NBD_REP_ERR_INVALID,
- clientflags);
+ret = nbd_negotiate_send_rep_err(client->ioc,
+ NBD_REP_ERR_INVALID,
+ clientflags,
+ "TLS already enabled");
 } else {
-TRACE("TLS not configured");
-ret = nbd_negotiate_send_rep(client->ioc,
-  

Re: [Qemu-devel] [RFC PATCH v3 00/18] x86: Secure Encrypted Virtualization (AMD)

2016-11-01 Thread Peter Maydell
On 1 November 2016 at 16:22,   wrote:
> Hi,
>
> Your series seems to have some coding style problems. See output below for
> more information:

Fam, it looks like patchew's test script is assuming git arguments
that don't exist on the git version it's trying to use:

> === TEST SCRIPT BEGIN ===
> #!/bin/bash
>
> BASE=base
> n=1
> total=$(git log --oneline $BASE.. | wc -l)
> failed=0
>
> # Useful git options
> git config --local diff.renamelimit 0
> git config --local diff.renames True
>
> commits="$(git log --format=%H --reverse $BASE..)"
> for c in $commits; do
> echo "Checking PATCH $n/$total: $(git show --no-patch --format=%s $c)..."
> if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; 
> then
> failed=1
> echo
> fi
> n=$((n+1))
> done
>
> exit $failed
> === TEST SCRIPT END ===

> === OUTPUT BEGIN ===
> fatal: unrecognized argument: --no-patch
> Checking PATCH 1/18: ...
> fatal: unrecognized argument: --no-patch
> Checking PATCH 2/18: ...
> fatal: unrecognized argument: --no-patch
[etc]

thanks
-- PMM



[Qemu-devel] [PULL 22/30] nbd: Implement NBD_CMD_WRITE_ZEROES on server

2016-11-01 Thread Paolo Bonzini
From: Eric Blake 

Upstream NBD protocol recently added the ability to efficiently
write zeroes without having to send the zeroes over the wire,
along with a flag to control whether the client wants to allow
a hole.

Note that when it comes to requiring full allocation, vs.
permitting optimizations, the NBD spec intentionally picked a
different sense for the flag; the rules in qemu are:
MAY_UNMAP == 0: must write zeroes
MAY_UNMAP == 1: may use holes if reads will see zeroes

while in NBD, the rules are:
FLAG_NO_HOLE == 1: must write zeroes
FLAG_NO_HOLE == 0: may use holes if reads will see zeroes

In all cases, the 'may use holes' scenario is optional (the
server need not use a hole, and must not use a hole if
subsequent reads would not see zeroes).

Signed-off-by: Eric Blake 
Message-Id: <1476469998-28592-16-git-send-email-ebl...@redhat.com>
Signed-off-by: Paolo Bonzini 
---
 include/block/nbd.h |  8 ++--
 nbd/server.c| 42 --
 2 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index eea7ef0..3e373f0 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -71,6 +71,7 @@ typedef struct NBDReply NBDReply;
 #define NBD_FLAG_SEND_FUA   (1 << 3)/* Send FUA (Force Unit 
Access) */
 #define NBD_FLAG_ROTATIONAL (1 << 4)/* Use elevator algorithm - 
rotational media */
 #define NBD_FLAG_SEND_TRIM  (1 << 5)/* Send TRIM (discard) */
+#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */
 
 /* New-style handshake (global) flags, sent from server to client, and
control what will happen during handshake phase. */
@@ -96,7 +97,8 @@ typedef struct NBDReply NBDReply;
 #define NBD_REP_ERR_SHUTDOWNNBD_REP_ERR(7)  /* Server shutting down */
 
 /* Request flags, sent from client to server during transmission phase */
-#define NBD_CMD_FLAG_FUA(1 << 0)
+#define NBD_CMD_FLAG_FUA(1 << 0) /* 'force unit access' during write */
+#define NBD_CMD_FLAG_NO_HOLE(1 << 1) /* don't punch hole on zero run */
 
 /* Supported request types */
 enum {
@@ -104,7 +106,9 @@ enum {
 NBD_CMD_WRITE = 1,
 NBD_CMD_DISC = 2,
 NBD_CMD_FLUSH = 3,
-NBD_CMD_TRIM = 4
+NBD_CMD_TRIM = 4,
+/* 5 reserved for failed experiment NBD_CMD_CACHE */
+NBD_CMD_WRITE_ZEROES = 6,
 };
 
 #define NBD_DEFAULT_PORT   10809
diff --git a/nbd/server.c b/nbd/server.c
index 0b50caa..5b76261 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -616,7 +616,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData 
*data)
 char buf[8 + 8 + 8 + 128];
 int rc;
 const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
-  NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
+  NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
+  NBD_FLAG_SEND_WRITE_ZEROES);
 bool oldStyle;
 size_t len;
 
@@ -1146,11 +1147,17 @@ static ssize_t nbd_co_receive_request(NBDRequestData 
*req,
 rc = request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
 goto out;
 }
-if (request->flags & ~NBD_CMD_FLAG_FUA) {
+if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
 LOG("unsupported flags (got 0x%x)", request->flags);
 rc = -EINVAL;
 goto out;
 }
+if (request->type != NBD_CMD_WRITE_ZEROES &&
+(request->flags & NBD_CMD_FLAG_NO_HOLE)) {
+LOG("unexpected flags (got 0x%x)", request->flags);
+rc = -EINVAL;
+goto out;
+}
 
 rc = 0;
 
@@ -1255,6 +1262,37 @@ static void nbd_trip(void *opaque)
 }
 break;
 
+case NBD_CMD_WRITE_ZEROES:
+TRACE("Request type is WRITE_ZEROES");
+
+if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
+TRACE("Server is read-only, return error");
+reply.error = EROFS;
+goto error_reply;
+}
+
+TRACE("Writing to device");
+
+flags = 0;
+if (request.flags & NBD_CMD_FLAG_FUA) {
+flags |= BDRV_REQ_FUA;
+}
+if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) {
+flags |= BDRV_REQ_MAY_UNMAP;
+}
+ret = blk_pwrite_zeroes(exp->blk, request.from + exp->dev_offset,
+request.len, flags);
+if (ret < 0) {
+LOG("writing to file failed");
+reply.error = -ret;
+goto error_reply;
+}
+
+if (nbd_co_send_reply(req, &reply, 0) < 0) {
+goto out;
+}
+break;
+
 case NBD_CMD_DISC:
 /* unreachable, thanks to special case in nbd_co_receive_request() */
 abort();
-- 
2.7.4





[Qemu-devel] [PULL 11/30] nbd: Rename NbdClientSession to NBDClientSession

2016-11-01 Thread Paolo Bonzini
From: Eric Blake 

It's better to use consistent capitalization of the namespace
used for NBD functions; we have more instances of NBD* than
Nbd*.

Signed-off-by: Eric Blake 
Message-Id: <1476469998-28592-5-git-send-email-ebl...@redhat.com>
Signed-off-by: Paolo Bonzini 
---
 block/nbd-client.c | 26 +-
 block/nbd-client.h |  6 +++---
 block/nbd.c|  4 ++--
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/block/nbd-client.c b/block/nbd-client.c
index 8ca5030..e6fe603 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -33,7 +33,7 @@
 #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
 #define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))
 
-static void nbd_recv_coroutines_enter_all(NbdClientSession *s)
+static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
 {
 int i;
 
@@ -46,7 +46,7 @@ static void nbd_recv_coroutines_enter_all(NbdClientSession *s)
 
 static void nbd_teardown_connection(BlockDriverState *bs)
 {
-NbdClientSession *client = nbd_get_client_session(bs);
+NBDClientSession *client = nbd_get_client_session(bs);
 
 if (!client->ioc) { /* Already closed */
 return;
@@ -68,7 +68,7 @@ static void nbd_teardown_connection(BlockDriverState *bs)
 static void nbd_reply_ready(void *opaque)
 {
 BlockDriverState *bs = opaque;
-NbdClientSession *s = nbd_get_client_session(bs);
+NBDClientSession *s = nbd_get_client_session(bs);
 uint64_t i;
 int ret;
 
@@ -119,7 +119,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
struct nbd_request *request,
QEMUIOVector *qiov)
 {
-NbdClientSession *s = nbd_get_client_session(bs);
+NBDClientSession *s = nbd_get_client_session(bs);
 AioContext *aio_context;
 int rc, ret, i;
 
@@ -167,7 +167,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
 return rc;
 }
 
-static void nbd_co_receive_reply(NbdClientSession *s,
+static void nbd_co_receive_reply(NBDClientSession *s,
  struct nbd_request *request,
  struct nbd_reply *reply,
  QEMUIOVector *qiov)
@@ -195,7 +195,7 @@ static void nbd_co_receive_reply(NbdClientSession *s,
 }
 }
 
-static void nbd_coroutine_start(NbdClientSession *s,
+static void nbd_coroutine_start(NBDClientSession *s,
struct nbd_request *request)
 {
 /* Poor man semaphore.  The free_sema is locked when no other request
@@ -209,7 +209,7 @@ static void nbd_coroutine_start(NbdClientSession *s,
 /* s->recv_coroutine[i] is set as soon as we get the send_lock.  */
 }
 
-static void nbd_coroutine_end(NbdClientSession *s,
+static void nbd_coroutine_end(NBDClientSession *s,
 struct nbd_request *request)
 {
 int i = HANDLE_TO_INDEX(s, request->handle);
@@ -222,7 +222,7 @@ static void nbd_coroutine_end(NbdClientSession *s,
 int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
  uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
-NbdClientSession *client = nbd_get_client_session(bs);
+NBDClientSession *client = nbd_get_client_session(bs);
 struct nbd_request request = {
 .type = NBD_CMD_READ,
 .from = offset,
@@ -248,7 +248,7 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t 
offset,
 int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
   uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
-NbdClientSession *client = nbd_get_client_session(bs);
+NBDClientSession *client = nbd_get_client_session(bs);
 struct nbd_request request = {
 .type = NBD_CMD_WRITE,
 .from = offset,
@@ -277,7 +277,7 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t 
offset,
 
 int nbd_client_co_flush(BlockDriverState *bs)
 {
-NbdClientSession *client = nbd_get_client_session(bs);
+NBDClientSession *client = nbd_get_client_session(bs);
 struct nbd_request request = { .type = NBD_CMD_FLUSH };
 struct nbd_reply reply;
 ssize_t ret;
@@ -302,7 +302,7 @@ int nbd_client_co_flush(BlockDriverState *bs)
 
 int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
 {
-NbdClientSession *client = nbd_get_client_session(bs);
+NBDClientSession *client = nbd_get_client_session(bs);
 struct nbd_request request = {
 .type = NBD_CMD_TRIM,
 .from = offset,
@@ -343,7 +343,7 @@ void nbd_client_attach_aio_context(BlockDriverState *bs,
 
 void nbd_client_close(BlockDriverState *bs)
 {
-NbdClientSession *client = nbd_get_client_session(bs);
+NBDClientSession *client = nbd_get_client_session(bs);
 struct nbd_request request = { .type = NBD_CMD_DISC };
 
 if (client->ioc == NULL) {
@@ -362,7 +362,7 @@ int nbd_client_init(BlockDriverState *bs,
 const char *hostname,
 Error **errp)
 {
-NbdClientSession *c

Re: [Qemu-devel] [PATCH RFC] PCMachineState: introduce acpi_build_enabled field

2016-11-01 Thread Wei Liu
On Tue, Nov 01, 2016 at 02:48:27PM -0200, Eduardo Habkost wrote:
[...]
> >  static void pc_machine_set_nvdimm(Object *obj, bool value, Error **errp)
> >  {
> >  PCMachineState *pcms = PC_MACHINE(obj);
> > @@ -2159,6 +2173,8 @@ static void pc_machine_initfn(Object *obj)
> >  pcms->vmport = ON_OFF_AUTO_AUTO;
> >  /* nvdimm is disabled on default. */
> >  pcms->acpi_nvdimm_state.is_enabled = false;
> > +/* acpi build is enabled by default. */
> > +pcms->acpi_build_enabled = true;
> 
> If you set:
>   pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build;
> the default value will be more consistent with the actual results
> (where pc-1.6 and older don't have ACPI build). Then you would
> probably be able to remove the pcmc->has_acpi_build check from
> acpi_setup() and only check pcms->acpi_build_enabled.
> 

Thank you for your good advice.

I take it that you're ok with the name of the field and the code in
general? If so I will drop RFC tag in my next submission.

Wei.



[Qemu-devel] [PULL 26/30] x86: add AVX512_4VNNIW and AVX512_4FMAPS features

2016-11-01 Thread Paolo Bonzini
From: Luwei Kang 

The spec can be found in Intel Software Developer Manual or in
Instruction Set Extensions Programming Reference.

Signed-off-by: Piotr Luc 
Signed-off-by: Luwei Kang 
Message-Id: <1477902446-5932-1-git-send-email-he.c...@linux.intel.com>
Signed-off-by: Paolo Bonzini 
---
 target-i386/cpu.c | 19 ++-
 target-i386/cpu.h |  4 
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 0f8a8fb..14c5186 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -239,6 +239,7 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t 
vendor1,
   CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM,
   CPUID_7_0_EBX_RDSEED */
 #define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE)
+#define TCG_7_0_EDX_FEATURES 0
 #define TCG_APM_FEATURES 0
 #define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT
 #define TCG_XSAVE_FEATURES (CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1)
@@ -444,6 +445,22 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
 .cpuid_reg = R_ECX,
 .tcg_features = TCG_7_0_ECX_FEATURES,
 },
+[FEAT_7_0_EDX] = {
+.feat_names = {
+NULL, NULL, "avx512-4vnniw", "avx512-4fmaps",
+NULL, NULL, NULL, NULL,
+NULL, NULL, NULL, NULL,
+NULL, NULL, NULL, NULL,
+NULL, NULL, NULL, NULL,
+NULL, NULL, NULL, NULL,
+NULL, NULL, NULL, NULL,
+NULL, NULL, NULL, NULL,
+},
+.cpuid_eax = 7,
+.cpuid_needs_ecx = true, .cpuid_ecx = 0,
+.cpuid_reg = R_EDX,
+.tcg_features = TCG_7_0_EDX_FEATURES,
+},
 [FEAT_8000_0007_EDX] = {
 .feat_names = {
 NULL, NULL, NULL, NULL,
@@ -2560,7 +2577,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 if ((*ecx & CPUID_7_0_ECX_PKU) && env->cr[4] & CR4_PKE_MASK) {
 *ecx |= CPUID_7_0_ECX_OSPKE;
 }
-*edx = 0; /* Reserved */
+*edx = env->features[FEAT_7_0_EDX]; /* Feature flags */
 } else {
 *eax = 0;
 *ebx = 0;
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 6303d65..c605724 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -443,6 +443,7 @@ typedef enum FeatureWord {
 FEAT_1_ECX, /* CPUID[1].ECX */
 FEAT_7_0_EBX,   /* CPUID[EAX=7,ECX=0].EBX */
 FEAT_7_0_ECX,   /* CPUID[EAX=7,ECX=0].ECX */
+FEAT_7_0_EDX,   /* CPUID[EAX=7,ECX=0].EDX */
 FEAT_8000_0001_EDX, /* CPUID[8000_0001].EDX */
 FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */
 FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */
@@ -629,6 +630,9 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
 #define CPUID_7_0_ECX_OSPKE(1U << 4)
 #define CPUID_7_0_ECX_RDPID(1U << 22)
 
+#define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network 
Instructions */
+#define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) /* AVX512 Multiply Accumulation 
Single Precision */
+
 #define CPUID_XSAVE_XSAVEOPT   (1U << 0)
 #define CPUID_XSAVE_XSAVEC (1U << 1)
 #define CPUID_XSAVE_XGETBV1(1U << 2)
-- 
2.7.4





[Qemu-devel] [PULL 05/30] exec.c: ensure all AddressSpaceDispatch updates under RCU

2016-11-01 Thread Paolo Bonzini
From: Alex Bennée 

The memory_dispatch field is meant to be protected by RCU so we should
use the correct primitives when accessing it. This race was flagged up
by the ThreadSanitizer.

Signed-off-by: Alex Bennée 
Message-Id: <20161021153418.21571-1-alex.ben...@linaro.org>
Signed-off-by: Paolo Bonzini 
---
 exec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/exec.c b/exec.c
index 4d08581..a19ed21 100644
--- a/exec.c
+++ b/exec.c
@@ -493,7 +493,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, 
hwaddr addr,
   hwaddr *xlat, hwaddr *plen)
 {
 MemoryRegionSection *section;
-AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
+AddressSpaceDispatch *d = 
atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
 
 section = address_space_translate_internal(d, addr, xlat, plen, false);
 
@@ -2376,7 +2376,7 @@ static void tcg_commit(MemoryListener *listener)
  * may have split the RCU critical section.
  */
 d = atomic_rcu_read(&cpuas->as->dispatch);
-cpuas->memory_dispatch = d;
+atomic_rcu_set(&cpuas->memory_dispatch, d);
 tlb_flush(cpuas->cpu, 1);
 }
 
-- 
2.7.4





[Qemu-devel] [PULL 21/30] nbd: Improve server handling of shutdown requests

2016-11-01 Thread Paolo Bonzini
From: Eric Blake 

NBD commit 6d34500b clarified how clients and servers are supposed
to behave before closing a connection. It added NBD_REP_ERR_SHUTDOWN
(for the server to announce it is about to go away during option
haggling, so the client should quit sending NBD_OPT_* other than
NBD_OPT_ABORT) and ESHUTDOWN (for the server to announce it is about
to go away during transmission, so the client should quit sending
NBD_CMD_* other than NBD_CMD_DISC).  It also clarified that
NBD_OPT_ABORT gets a reply, while NBD_CMD_DISC does not.

This patch merely adds the missing reply to NBD_OPT_ABORT and teaches
the client to recognize server errors.  Actually teaching the server
to send NBD_REP_ERR_SHUTDOWN or ESHUTDOWN would require knowing that
the server has been requested to shut down soon (maybe we could do
that by installing a SIGINT handler in qemu-nbd, which transitions
from RUNNING to a new state that waits for the client to react,
rather than just out-right quitting - but that's a bigger task for
another day).

Signed-off-by: Eric Blake 
Message-Id: <1476469998-28592-15-git-send-email-ebl...@redhat.com>
[Move dummy ESHUTDOWN to include/qemu/osdep.h. - Paolo]
Signed-off-by: Paolo Bonzini 
---
 include/block/nbd.h  | 13 +
 include/qemu/osdep.h |  3 +++
 nbd/client.c | 18 ++
 nbd/nbd-internal.h   |  1 +
 nbd/server.c | 10 ++
 5 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index d326308..eea7ef0 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -83,12 +83,17 @@ typedef struct NBDReply NBDReply;
 #define NBD_FLAG_C_NO_ZEROES  (1 << 1) /* End handshake without zeroes. */
 
 /* Reply types. */
+#define NBD_REP_ERR(value) ((UINT32_C(1) << 31) | (value))
+
 #define NBD_REP_ACK (1) /* Data sending finished. */
 #define NBD_REP_SERVER  (2) /* Export description. */
-#define NBD_REP_ERR_UNSUP   ((UINT32_C(1) << 31) | 1) /* Unknown option. */
-#define NBD_REP_ERR_POLICY  ((UINT32_C(1) << 31) | 2) /* Server denied */
-#define NBD_REP_ERR_INVALID ((UINT32_C(1) << 31) | 3) /* Invalid length. */
-#define NBD_REP_ERR_TLS_REQD((UINT32_C(1) << 31) | 5) /* TLS required */
+
+#define NBD_REP_ERR_UNSUP   NBD_REP_ERR(1)  /* Unknown option */
+#define NBD_REP_ERR_POLICY  NBD_REP_ERR(2)  /* Server denied */
+#define NBD_REP_ERR_INVALID NBD_REP_ERR(3)  /* Invalid length */
+#define NBD_REP_ERR_PLATFORMNBD_REP_ERR(4)  /* Not compiled in */
+#define NBD_REP_ERR_TLS_REQDNBD_REP_ERR(5)  /* TLS required */
+#define NBD_REP_ERR_SHUTDOWNNBD_REP_ERR(7)  /* Server shutting down */
 
 /* Request flags, sent from client to server during transmission phase */
 #define NBD_CMD_FLAG_FUA(1 << 0)
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 0e3c330..689f253 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -128,6 +128,9 @@ extern int daemon(int, int);
 #if !defined(EMEDIUMTYPE)
 #define EMEDIUMTYPE 4098
 #endif
+#if !defined(ESHUTDOWN)
+#define ESHUTDOWN 4099
+#endif
 #ifndef TIME_MAX
 #define TIME_MAX LONG_MAX
 #endif
diff --git a/nbd/client.c b/nbd/client.c
index 7bdce53..7db4301 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -40,6 +40,9 @@ static int nbd_errno_to_system_errno(int err)
 case NBD_ENOSPC:
 ret = ENOSPC;
 break;
+case NBD_ESHUTDOWN:
+ret = ESHUTDOWN;
+break;
 default:
 TRACE("Squashing unexpected error %d to EINVAL", err);
 /* fallthrough */
@@ -239,11 +242,21 @@ static int nbd_handle_reply_err(QIOChannel *ioc, 
nbd_opt_reply *reply,
reply->option);
 break;
 
+case NBD_REP_ERR_PLATFORM:
+error_setg(errp, "Server lacks support for option %" PRIx32,
+   reply->option);
+break;
+
 case NBD_REP_ERR_TLS_REQD:
 error_setg(errp, "TLS negotiation required before option %" PRIx32,
reply->option);
 break;
 
+case NBD_REP_ERR_SHUTDOWN:
+error_setg(errp, "Server shutting down before option %" PRIx32,
+   reply->option);
+break;
+
 default:
 error_setg(errp, "Unknown error code when asking for option %" PRIx32,
reply->option);
@@ -785,6 +798,11 @@ ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply)
 
 reply->error = nbd_errno_to_system_errno(reply->error);
 
+if (reply->error == ESHUTDOWN) {
+/* This works even on mingw which lacks a native ESHUTDOWN */
+LOG("server shutting down");
+return -EINVAL;
+}
 TRACE("Got reply: { magic = 0x%" PRIx32 ", .error = % " PRId32
   ", handle = %" PRIu64" }",
   magic, reply->error, reply->handle);
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index dd57e18..eee20ab 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -92,6 +92,7 @@
 #define NBD_ENOMEM 12

Re: [Qemu-devel] [PATCH v5 28/33] cputlb: make tlb_flush_by_mmuidx safe for MTTCG

2016-11-01 Thread Alex Bennée

Pranith Kumar  writes:

> On Tue, Nov 1, 2016 at 3:45 AM, Alex Bennée  wrote:
>
>>
>> Odd. I'll look into it. What was you configure string and host architecture?
>>
>
> It's a plain configure string, nothing special:
>
> $ ../configure --target-list=aarch64-softmmu
>
> But I did rebase your patches on master, May be something new in the
> tree tripped this?

Yeah, I'll look for a fix with dynamic page sizes when I re-base.

--
Alex Bennée



[Qemu-devel] [PULL 23/30] nbd: Implement NBD_CMD_WRITE_ZEROES on client

2016-11-01 Thread Paolo Bonzini
From: Eric Blake 

Upstream NBD protocol recently added the ability to efficiently
write zeroes without having to send the zeroes over the wire,
along with a flag to control whether the client wants a hole.

The generic block code takes care of falling back to the obvious
write of lots of zeroes if we return -ENOTSUP because the server
does not have WRITE_ZEROES.

Ideally, since NBD_CMD_WRITE_ZEROES does not involve any data
over the wire, we want to support transactions that are much
larger than the normal 32M limit imposed on NBD_CMD_WRITE.  But
the server may still have a limit smaller than UINT_MAX, so
until experimental NBD protocol additions for advertising various
command sizes is finalized (see [1], [2]), for now we just stick to
the same limits as normal writes.

[1] https://github.com/yoe/nbd/blob/extension-info/doc/proto.md
[2] https://sourceforge.net/p/nbd/mailman/message/35081223/

Signed-off-by: Eric Blake 
Message-Id: <1476469998-28592-17-git-send-email-ebl...@redhat.com>
Signed-off-by: Paolo Bonzini 
---
 block/nbd-client.c | 35 +++
 block/nbd-client.h |  2 ++
 block/nbd.c|  4 
 3 files changed, 41 insertions(+)

diff --git a/block/nbd-client.c b/block/nbd-client.c
index 3e5f9f5..2a302de 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -275,6 +275,41 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t 
offset,
 return -reply.error;
 }
 
+int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+int count, BdrvRequestFlags flags)
+{
+ssize_t ret;
+NBDClientSession *client = nbd_get_client_session(bs);
+NBDRequest request = {
+.type = NBD_CMD_WRITE_ZEROES,
+.from = offset,
+.len = count,
+};
+NBDReply reply;
+
+if (!(client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES)) {
+return -ENOTSUP;
+}
+
+if (flags & BDRV_REQ_FUA) {
+assert(client->nbdflags & NBD_FLAG_SEND_FUA);
+request.flags |= NBD_CMD_FLAG_FUA;
+}
+if (!(flags & BDRV_REQ_MAY_UNMAP)) {
+request.flags |= NBD_CMD_FLAG_NO_HOLE;
+}
+
+nbd_coroutine_start(client, &request);
+ret = nbd_co_send_request(bs, &request, NULL);
+if (ret < 0) {
+reply.error = -ret;
+} else {
+nbd_co_receive_reply(client, &request, &reply, NULL);
+}
+nbd_coroutine_end(client, &request);
+return -reply.error;
+}
+
 int nbd_client_co_flush(BlockDriverState *bs)
 {
 NBDClientSession *client = nbd_get_client_session(bs);
diff --git a/block/nbd-client.h b/block/nbd-client.h
index 51be419..f8d6006 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -48,6 +48,8 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t 
offset, int count);
 int nbd_client_co_flush(BlockDriverState *bs);
 int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
   uint64_t bytes, QEMUIOVector *qiov, int flags);
+int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+int count, BdrvRequestFlags flags);
 int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
  uint64_t bytes, QEMUIOVector *qiov, int flags);
 
diff --git a/block/nbd.c b/block/nbd.c
index b281484..9cff839 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -466,6 +466,7 @@ static int nbd_co_flush(BlockDriverState *bs)
 static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
 {
 bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE;
+bs->bl.max_pwrite_zeroes = NBD_MAX_BUFFER_SIZE;
 bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE;
 }
 
@@ -558,6 +559,7 @@ static BlockDriver bdrv_nbd = {
 .bdrv_file_open = nbd_open,
 .bdrv_co_preadv = nbd_client_co_preadv,
 .bdrv_co_pwritev= nbd_client_co_pwritev,
+.bdrv_co_pwrite_zeroes  = nbd_client_co_pwrite_zeroes,
 .bdrv_close = nbd_close,
 .bdrv_co_flush_to_os= nbd_co_flush,
 .bdrv_co_pdiscard   = nbd_client_co_pdiscard,
@@ -576,6 +578,7 @@ static BlockDriver bdrv_nbd_tcp = {
 .bdrv_file_open = nbd_open,
 .bdrv_co_preadv = nbd_client_co_preadv,
 .bdrv_co_pwritev= nbd_client_co_pwritev,
+.bdrv_co_pwrite_zeroes  = nbd_client_co_pwrite_zeroes,
 .bdrv_close = nbd_close,
 .bdrv_co_flush_to_os= nbd_co_flush,
 .bdrv_co_pdiscard   = nbd_client_co_pdiscard,
@@ -594,6 +597,7 @@ static BlockDriver bdrv_nbd_unix = {
 .bdrv_file_open = nbd_open,
 .bdrv_co_preadv = nbd_client_co_preadv,
 .bdrv_co_pwritev= nbd_client_co_pwritev,
+.bdrv_co_pwrite_zeroes  = nbd_client_co_pwrite_zeroes,
 .bdrv_close = nbd_close,
 .bdrv_co_flush_to_os= nbd_co_flush,
 .bdrv_co_pdiscard   = nbd_client_co_pdiscard,
-- 
2.7.4





[Qemu-devel] [PULL 16/30] nbd: Let server know when client gives up negotiation

2016-11-01 Thread Paolo Bonzini
From: Eric Blake 

The NBD spec says that a client should send NBD_OPT_ABORT
rather than just dropping the connection, if the client doesn't
like something the server sent during option negotiation.  This
is a best-effort attempt only, and can only be done in places
where we know the server is still in sync with what we've sent,
whether or not we've read everything the server has sent.
Technically, the server then has to reply with NBD_REP_ACK, but
it's not worth complicating the client to wait around for that
reply.

Signed-off-by: Eric Blake 
Message-Id: <1476469998-28592-10-git-send-email-ebl...@redhat.com>
Signed-off-by: Paolo Bonzini 
---
 nbd/client.c | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/nbd/client.c b/nbd/client.c
index e78000a..651e08c 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -111,6 +111,19 @@ static int nbd_send_option_request(QIOChannel *ioc, 
uint32_t opt,
 return 0;
 }
 
+/* Send NBD_OPT_ABORT as a courtesy to let the server know that we are
+ * not going to attempt further negotiation. */
+static void nbd_send_opt_abort(QIOChannel *ioc)
+{
+/* Technically, a compliant server is supposed to reply to us; but
+ * older servers disconnected instead. At any rate, we're allowed
+ * to disconnect without waiting for the server reply, so we don't
+ * even care if the request makes it to the server, let alone
+ * waiting around for whether the server replies. */
+nbd_send_option_request(ioc, NBD_OPT_ABORT, 0, NULL, NULL);
+}
+
+
 /* Receive the header of an option reply, which should match the given
  * opt.  Read through the length field, but NOT the length bytes of
  * payload. Return 0 if successful, -1 with errp set if it is
@@ -121,6 +134,7 @@ static int nbd_receive_option_reply(QIOChannel *ioc, 
uint32_t opt,
 QEMU_BUILD_BUG_ON(sizeof(*reply) != 20);
 if (read_sync(ioc, reply, sizeof(*reply)) != sizeof(*reply)) {
 error_setg(errp, "failed to read option reply");
+nbd_send_opt_abort(ioc);
 return -1;
 }
 be64_to_cpus(&reply->magic);
@@ -133,11 +147,13 @@ static int nbd_receive_option_reply(QIOChannel *ioc, 
uint32_t opt,
 
 if (reply->magic != NBD_REP_MAGIC) {
 error_setg(errp, "Unexpected option reply magic");
+nbd_send_opt_abort(ioc);
 return -1;
 }
 if (reply->option != opt) {
 error_setg(errp, "Unexpected option type %x expected %x",
reply->option, opt);
+nbd_send_opt_abort(ioc);
 return -1;
 }
 return 0;
@@ -206,6 +222,9 @@ static int nbd_handle_reply_err(QIOChannel *ioc, 
nbd_opt_reply *reply,
 
  cleanup:
 g_free(msg);
+if (result < 0) {
+nbd_send_opt_abort(ioc);
+}
 return result;
 }
 
@@ -229,25 +248,30 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, 
Error **errp)
 if (reply.type == NBD_REP_ACK) {
 if (len != 0) {
 error_setg(errp, "length too long for option end");
+nbd_send_opt_abort(ioc);
 return -1;
 }
 } else if (reply.type == NBD_REP_SERVER) {
 if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) {
 error_setg(errp, "incorrect option length %" PRIu32, len);
+nbd_send_opt_abort(ioc);
 return -1;
 }
 if (read_sync(ioc, &namelen, sizeof(namelen)) != sizeof(namelen)) {
 error_setg(errp, "failed to read option name length");
+nbd_send_opt_abort(ioc);
 return -1;
 }
 namelen = be32_to_cpu(namelen);
 len -= sizeof(namelen);
 if (len < namelen) {
 error_setg(errp, "incorrect option name length");
+nbd_send_opt_abort(ioc);
 return -1;
 }
 if (namelen > NBD_MAX_NAME_SIZE) {
 error_setg(errp, "export name length too long %" PRIu32, namelen);
+nbd_send_opt_abort(ioc);
 return -1;
 }
 
@@ -256,6 +280,7 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, 
Error **errp)
 error_setg(errp, "failed to read export name");
 g_free(*name);
 *name = NULL;
+nbd_send_opt_abort(ioc);
 return -1;
 }
 (*name)[namelen] = '\0';
@@ -267,6 +292,7 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, 
Error **errp)
 g_free(*name);
 g_free(buf);
 *name = NULL;
+nbd_send_opt_abort(ioc);
 return -1;
 }
 buf[len] = '\0';
@@ -276,6 +302,7 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, 
Error **errp)
 } else {
 error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x",
reply.type, NBD_REP_SERVER);
+nbd_send_opt_abort(ioc);
 return -1;
 }
 return 1;
@@ -325,6 +352,7 @@ static int nbd_receive_query_exports(QIOChannel *ioc,
 

[Qemu-devel] [PULL v2 00/30] Misc patches for 2016-10-31

2016-11-01 Thread Paolo Bonzini
The following changes since commit 39542105bbb19c690219d2f22844d8dfbd9bba05:

  Merge remote-tracking branch 'remotes/gkurz/tags/for-upstream' into staging 
(2016-11-01 12:48:07 +)

are available in the git repository at:

  git://github.com/bonzini/qemu.git tags/for-upstream

for you to fetch changes up to a92e5ebbcea7c1d16f4aa75b08fce298fcd4cdfa:

  main-loop: Suppress I/O thread warning under qtest (2016-11-01 16:07:37 +0100)


* NBD bugfix (Changlong)
* NBD write zeroes support (Eric)
* Memory backend fixes (Haozhong)
* Atomics fix (Alex)
* New AVX512 features (Luwei)
* "make check" logging fix (Paolo)
* Chardev refactoring fallout (Paolo)
* Small checkpatch improvements (Paolo, Jeff)


Alex Bennée (1):
  exec.c: ensure all AddressSpaceDispatch updates under RCU

Changlong Xie (1):
  nbd: Use CoQueue for free_sema instead of CoMutex

Christian Borntraeger (1):
  vl: exit qemu on guest panic if -no-shutdown is not set

Eric Blake (16):
  nbd: Add qemu-nbd -D for human-readable description
  nbd: Treat flags vs. command type as separate fields
  nbd: Rename NBDRequest to NBDRequestData
  nbd: Rename NbdClientSession to NBDClientSession
  nbd: Rename struct nbd_request and nbd_reply
  nbd: Share common reply-sending code in server
  nbd: Send message along with server NBD_REP_ERR errors
  nbd: Share common option-sending code in client
  nbd: Let server know when client gives up negotiation
  nbd: Let client skip portions of server reply
  nbd: Less allocation during NBD_OPT_LIST
  nbd: Support shorter handshake
  nbd: Refactor conversion to errno to silence checkpatch
  nbd: Improve server handling of shutdown requests
  nbd: Implement NBD_CMD_WRITE_ZEROES on server
  nbd: Implement NBD_CMD_WRITE_ZEROES on client

Haozhong Zhang (2):
  exec.c: do not truncate non-empty memory backend file
  exec.c: check memory backend file size with 'size' option

Jeff Cody (1):
  checkpatch: allow spaces before parenthesis for 'coroutine_fn'

Luwei Kang (1):
  x86: add AVX512_4VNNIW and AVX512_4FMAPS features

Max Reitz (1):
  main-loop: Suppress I/O thread warning under qtest

Paolo Bonzini (5):
  checkpatch: tweak "struct should normally be const" warning
  qemu-error: remove dependency of stubs on monitor
  tests: send error_report to test log
  qemu-char: do not forward events through the mux until QEMU has started
  slirp: fix CharDriver breakage

Pranith Kumar (1):
  docs/rcu.txt: Fix minor typo

 block/nbd-client.c  | 104 +
 block/nbd-client.h  |  12 +-
 block/nbd.c |   8 +-
 docs/rcu.txt|   2 +-
 exec.c  |  33 ++-
 include/block/nbd.h |  73 +--
 include/glib-compat.h   |  13 ++
 include/qemu/error-report.h |   1 +
 include/qemu/osdep.h|   3 +
 main-loop.c |   2 +-
 monitor.c   |  21 ++
 nbd/client.c| 498 
 nbd/nbd-internal.h  |  12 +-
 nbd/server.c| 292 ++
 net/slirp.c |   3 +-
 qapi-schema.json|   4 +-
 qemu-char.c |   8 +-
 qemu-nbd.c  |  12 +-
 qemu-nbd.texi   |   5 +-
 scripts/checkpatch.pl   |   6 +-
 stubs/Makefile.objs |   2 +-
 stubs/error-printf.c|  19 ++
 stubs/mon-printf.c  |  11 -
 target-i386/cpu.c   |  19 +-
 target-i386/cpu.h   |   4 +
 util/qemu-error.c   |  26 +--
 vl.c|   5 +
 27 files changed, 763 insertions(+), 435 deletions(-)
 create mode 100644 stubs/error-printf.c
 delete mode 100644 stubs/mon-printf.c
-- 
2.7.4




Re: [Qemu-devel] [PATCH v5] block/vxhs.c Add support for a new block device type called "vxhs"

2016-11-01 Thread ashish mittal
Done!

All builds are fine, but I have no way of knowing if that fixed the
issue! Please let me know if you still see a problem.

[root@rhel72ga-build-upstream qemu] 2016-11-01 09:46:06$ make
  CC  block/vxhs.o
  LINKqemu-nbd
  LINKqemu-img
  LINKqemu-io
  LINKaarch64-softmmu/qemu-system-aarch64
  LINKalpha-softmmu/qemu-system-alpha
  LINKarm-softmmu/qemu-system-arm
  LINKcris-softmmu/qemu-system-cris
  LINKi386-softmmu/qemu-system-i386
  LINKlm32-softmmu/qemu-system-lm32
  LINKm68k-softmmu/qemu-system-m68k
  LINKmicroblazeel-softmmu/qemu-system-microblazeel
  LINKmicroblaze-softmmu/qemu-system-microblaze
  LINKmips64el-softmmu/qemu-system-mips64el
  LINKmips64-softmmu/qemu-system-mips64
  LINKmipsel-softmmu/qemu-system-mipsel
  LINKmips-softmmu/qemu-system-mips
  LINKmoxie-softmmu/qemu-system-moxie
  LINKor32-softmmu/qemu-system-or32
  LINKppc64-softmmu/qemu-system-ppc64
  LINKppcemb-softmmu/qemu-system-ppcemb
  LINKppc-softmmu/qemu-system-ppc
  LINKs390x-softmmu/qemu-system-s390x
  LINKsh4eb-softmmu/qemu-system-sh4eb
  LINKsh4-softmmu/qemu-system-sh4
  LINKsparc64-softmmu/qemu-system-sparc64
  LINKsparc-softmmu/qemu-system-sparc
  LINKtricore-softmmu/qemu-system-tricore
  LINKunicore32-softmmu/qemu-system-unicore32
  LINKx86_64-softmmu/qemu-system-x86_64
  LINKxtensaeb-softmmu/qemu-system-xtensaeb
  LINKxtensa-softmmu/qemu-system-xtensa
[root@rhel72ga-build-upstream qemu] 2016-11-01 09:48:13$

Thanks!

On Tue, Nov 1, 2016 at 9:27 AM, Daniel P. Berrange  wrote:
> On Tue, Nov 01, 2016 at 09:09:31AM -0700, ashish mittal wrote:
>> Hi Daniel,
>>
>> Thanks for pointing that out. I had done a fresh configure and make
>> before submitting the patch. I am somehow not able to reproduce that
>> error!
>>
>> Checked out fresh source just now and tried again, still no luck.
>>
>> [root@rhel72ga-build-upstream qemu] 2016-11-01 08:51:15$ git apply
>> ~/qemu/31Oct2016/0001-block-vxhs.c-Add-support-for-a-new-block-device-type.patch
>> [root@rhel72ga-build-upstream qemu] 2016-11-01 08:51:31$ ./configure
>> --enable-vxhs
>> Install prefix/usr/local
>> BIOS directory/usr/local/share/qemu
>> binary directory  /usr/local/bin
>> library directory /usr/local/lib
>> module directory  /usr/local/lib/qemu
>> libexec directory /usr/local/libexec
>> include directory /usr/local/include
>> config directory  /usr/local/etc
>> local state directory   /usr/local/var
>> Manual directory  /usr/local/share/man
>> ELF interp prefix /usr/gnemul/qemu-%M
>> Source path   /root/qemu_second_buildarea/qemu
>> C compilercc
>> Host C compiler   cc
>> C++ compiler  c++
>> Objective-C compiler cc
>> ARFLAGS   rv
>> CFLAGS-O2 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -g
>> QEMU_CFLAGS   -I/usr/include/pixman-1-Werror
>> -DHAS_LIBSSH2_SFTP_FSYNC -pthread -I/usr/include/glib-2.0
>> -I/usr/lib64/glib-2.0/include -fPIE -DPIE -m64 -mcx16
>> -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE
>> -Wstrict-prototypes -Wredundant-decls -Wall -Wundef -Wwrite-strings
>> -Wmissing-prototypes -fno-strict-aliasing -fno-common -fwrapv
>> -Wendif-labels -Wmissing-include-dirs -Wempty-body -Wnested-externs
>> -Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers
>> -Wold-style-declaration -Wold-style-definition -Wtype-limits
>> -fstack-protector-strong   -I/usr/include/p11-kit-1
>> -I/usr/include/libpng15   -I/usr/include/spice-server
>> -I/usr/include/cacard -I/usr/include/glib-2.0
>> -I/usr/lib64/glib-2.0/include -I/usr/include/pixman-1
>> -I/usr/include/nss3 -I/usr/include/nspr4 -I/usr/include/spice-1
>> -I/usr/include/cacard -I/usr/include/nss3 -I/usr/include/nspr4
>> -I/usr/include/libusb-1.0
>> LDFLAGS   -Wl,--warn-common -Wl,-z,relro -Wl,-z,now -pie -m64 -g
>> make  make
>> install   install
>> pythonpython -B
>> smbd  /usr/sbin/smbd
>> module supportno
>> host CPU  x86_64
>> host big endian   no
>> target listaarch64-softmmu alpha-softmmu arm-softmmu
>> cris-softmmu i386-softmmu lm32-softmmu m68k-softmmu
>> microblazeel-softmmu microblaze-softmmu mips64el-softmmu
>> mips64-softmmu mipsel-softmmu mips-softmmu moxie-softmmu or32-softmmu
>> ppc64-softmmu ppcemb-softmmu ppc-softmmu s390x-softmmu sh4eb-softmmu
>> sh4-softmmu sparc64-softmmu sparc-softmmu tricore-softmmu
>> unicore32-softmmu x86_64-softmmu xtensaeb-softmmu xtensa-softmmu
>> aarch64-linux-user alpha-linux-user armeb-linux-user arm-linux-user
>> cris-linux-user i386-linux-user m68k-linux-user
>> microblazeel-linux-user microblaze-linux-user mips64el-linux-user
>> mips64-linux-user mipsel-linux-user mips-linux-user
>> mipsn32el-linux-user mipsn32-linux-user or32-linux-user
>> ppc64abi32-linux-user ppc64le-linux-user ppc64-linux-user
>> ppc-linux-user s390x-linux-user sh4eb-linux-user sh4-linux-user
>> sparc32plus-linux-user sparc64-linux-us

[Qemu-devel] [PULL 19/30] nbd: Support shorter handshake

2016-11-01 Thread Paolo Bonzini
From: Eric Blake 

The NBD Protocol allows the server and client to mutually agree
on a shorter handshake (omit the 124 bytes of reserved 0), via
the server advertising NBD_FLAG_NO_ZEROES and the client
acknowledging with NBD_FLAG_C_NO_ZEROES (only possible in
newstyle, whether or not it is fixed newstyle).  It doesn't
shave much off the wire, but we might as well implement it.

Signed-off-by: Eric Blake 
Reviewed-by: Alex Bligh 
Message-Id: <1476469998-28592-13-git-send-email-ebl...@redhat.com>
Signed-off-by: Paolo Bonzini 
---
 include/block/nbd.h |  6 --
 nbd/client.c|  8 +++-
 nbd/server.c| 15 +++
 3 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index b69bf1d..d326308 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -74,11 +74,13 @@ typedef struct NBDReply NBDReply;
 
 /* New-style handshake (global) flags, sent from server to client, and
control what will happen during handshake phase. */
-#define NBD_FLAG_FIXED_NEWSTYLE (1 << 0)/* Fixed newstyle protocol. */
+#define NBD_FLAG_FIXED_NEWSTYLE   (1 << 0) /* Fixed newstyle protocol. */
+#define NBD_FLAG_NO_ZEROES(1 << 1) /* End handshake without zeroes. */
 
 /* New-style client flags, sent from client to server to control what happens
during handshake phase. */
-#define NBD_FLAG_C_FIXED_NEWSTYLE   (1 << 0)/* Fixed newstyle protocol. */
+#define NBD_FLAG_C_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */
+#define NBD_FLAG_C_NO_ZEROES  (1 << 1) /* End handshake without zeroes. */
 
 /* Reply types. */
 #define NBD_REP_ACK (1) /* Data sending finished. */
diff --git a/nbd/client.c b/nbd/client.c
index 0afb8be..b29963b 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -440,6 +440,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char 
*name, uint16_t *flags,
 char buf[256];
 uint64_t magic, s;
 int rc;
+bool zeroes = true;
 
 TRACE("Receiving negotiation tlscreds=%p hostname=%s.",
   tlscreds, hostname ? hostname : "");
@@ -504,6 +505,11 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char 
*name, uint16_t *flags,
 TRACE("Server supports fixed new style");
 clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE;
 }
+if (globalflags & NBD_FLAG_NO_ZEROES) {
+zeroes = false;
+TRACE("Server supports no zeroes");
+clientflags |= NBD_FLAG_C_NO_ZEROES;
+}
 /* client requested flags */
 clientflags = cpu_to_be32(clientflags);
 if (write_sync(ioc, &clientflags, sizeof(clientflags)) !=
@@ -591,7 +597,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char 
*name, uint16_t *flags,
 }
 
 TRACE("Size is %" PRIu64 ", export flags %" PRIx16, *size, *flags);
-if (drop_sync(ioc, 124) != 124) {
+if (zeroes && drop_sync(ioc, 124) != 124) {
 error_setg(errp, "Failed to read reserved block");
 goto fail;
 }
diff --git a/nbd/server.c b/nbd/server.c
index fa01e49..afc1ec4 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -81,6 +81,7 @@ struct NBDClient {
 int refcount;
 void (*close)(NBDClient *client);
 
+bool no_zeroes;
 NBDExport *exp;
 QCryptoTLSCreds *tlscreds;
 char *tlsaclname;
@@ -450,6 +451,11 @@ static int nbd_negotiate_options(NBDClient *client)
 fixedNewstyle = true;
 flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
 }
+if (flags & NBD_FLAG_C_NO_ZEROES) {
+TRACE("Client supports no zeroes at handshake end");
+client->no_zeroes = true;
+flags &= ~NBD_FLAG_C_NO_ZEROES;
+}
 if (flags != 0) {
 TRACE("Unknown client flags 0x%" PRIx32 " received", flags);
 return -EIO;
@@ -602,6 +608,7 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData 
*data)
 const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
   NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
 bool oldStyle;
+size_t len;
 
 /* Old style negotiation header without options
 [ 0 ..   7]   passwd   ("NBDMAGIC")
@@ -618,7 +625,7 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData 
*data)
 options sent
 [18 ..  25]   size
 [26 ..  27]   export flags
-[28 .. 151]   reserved (0)
+[28 .. 151]   reserved (0, omit if no_zeroes)
  */
 
 qio_channel_set_blocking(client->ioc, false, NULL);
@@ -637,7 +644,7 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData 
*data)
 stw_be_p(buf + 26, client->exp->nbdflags | myflags);
 } else {
 stq_be_p(buf + 8, NBD_OPTS_MAGIC);
-stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE);
+stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES);
 }
 
 if (oldStyle) {
@@ -664,8 +671,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData 
*data)
   client->exp->size, client->exp->nbdflags | m

[Qemu-devel] [PULL 09/30] nbd: Treat flags vs. command type as separate fields

2016-11-01 Thread Paolo Bonzini
From: Eric Blake 

Current upstream NBD documents that requests have a 16-bit flags,
followed by a 16-bit type integer; although older versions mentioned
only a 32-bit field with masking to find flags.  Since the protocol
is in network order (big-endian over the wire), the ABI is unchanged;
but dealing with the flags as a separate field rather than masking
will make it easier to add support for upcoming NBD extensions that
increase the number of both flags and commands.

Improve some comments in nbd.h based on the current upstream
NBD protocol (https://github.com/yoe/nbd/blob/master/doc/proto.md),
and touch some nearby code to keep checkpatch.pl happy.

Signed-off-by: Eric Blake 
Message-Id: <1476469998-28592-3-git-send-email-ebl...@redhat.com>
Signed-off-by: Paolo Bonzini 
---
 block/nbd-client.c  |  9 +++--
 include/block/nbd.h | 18 --
 nbd/client.c|  9 ++---
 nbd/nbd-internal.h  |  4 ++--
 nbd/server.c| 39 +++
 5 files changed, 42 insertions(+), 37 deletions(-)

diff --git a/block/nbd-client.c b/block/nbd-client.c
index 40b28ab..8ca5030 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -1,6 +1,7 @@
 /*
  * QEMU Block driver for  NBD
  *
+ * Copyright (C) 2016 Red Hat, Inc.
  * Copyright (C) 2008 Bull S.A.S.
  * Author: Laurent Vivier 
  *
@@ -258,7 +259,7 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t 
offset,
 
 if (flags & BDRV_REQ_FUA) {
 assert(client->nbdflags & NBD_FLAG_SEND_FUA);
-request.type |= NBD_CMD_FLAG_FUA;
+request.flags |= NBD_CMD_FLAG_FUA;
 }
 
 assert(bytes <= NBD_MAX_BUFFER_SIZE);
@@ -343,11 +344,7 @@ void nbd_client_attach_aio_context(BlockDriverState *bs,
 void nbd_client_close(BlockDriverState *bs)
 {
 NbdClientSession *client = nbd_get_client_session(bs);
-struct nbd_request request = {
-.type = NBD_CMD_DISC,
-.from = 0,
-.len = 0
-};
+struct nbd_request request = { .type = NBD_CMD_DISC };
 
 if (client->ioc == NULL) {
 return;
diff --git a/include/block/nbd.h b/include/block/nbd.h
index fd58390..5fe2670 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -1,4 +1,5 @@
 /*
+ *  Copyright (C) 2016 Red Hat, Inc.
  *  Copyright (C) 2005  Anthony Liguori 
  *
  *  Network Block Device
@@ -32,7 +33,8 @@ struct nbd_request {
 uint64_t handle;
 uint64_t from;
 uint32_t len;
-uint32_t type;
+uint16_t flags;
+uint16_t type;
 };
 
 struct nbd_reply {
@@ -40,6 +42,8 @@ struct nbd_reply {
 uint32_t error;
 };
 
+/* Transmission (export) flags: sent from server to client during handshake,
+   but describe what will happen during transmission */
 #define NBD_FLAG_HAS_FLAGS  (1 << 0)/* Flags are there */
 #define NBD_FLAG_READ_ONLY  (1 << 1)/* Device is read-only */
 #define NBD_FLAG_SEND_FLUSH (1 << 2)/* Send FLUSH */
@@ -47,10 +51,12 @@ struct nbd_reply {
 #define NBD_FLAG_ROTATIONAL (1 << 4)/* Use elevator algorithm - 
rotational media */
 #define NBD_FLAG_SEND_TRIM  (1 << 5)/* Send TRIM (discard) */
 
-/* New-style global flags. */
+/* New-style handshake (global) flags, sent from server to client, and
+   control what will happen during handshake phase. */
 #define NBD_FLAG_FIXED_NEWSTYLE (1 << 0)/* Fixed newstyle protocol. */
 
-/* New-style client flags. */
+/* New-style client flags, sent from client to server to control what happens
+   during handshake phase. */
 #define NBD_FLAG_C_FIXED_NEWSTYLE   (1 << 0)/* Fixed newstyle protocol. */
 
 /* Reply types. */
@@ -61,10 +67,10 @@ struct nbd_reply {
 #define NBD_REP_ERR_INVALID ((UINT32_C(1) << 31) | 3) /* Invalid length. */
 #define NBD_REP_ERR_TLS_REQD((UINT32_C(1) << 31) | 5) /* TLS required */
 
+/* Request flags, sent from client to server during transmission phase */
+#define NBD_CMD_FLAG_FUA(1 << 0)
 
-#define NBD_CMD_MASK_COMMAND   0x
-#define NBD_CMD_FLAG_FUA   (1 << 16)
-
+/* Supported request types */
 enum {
 NBD_CMD_READ = 0,
 NBD_CMD_WRITE = 1,
diff --git a/nbd/client.c b/nbd/client.c
index f6db836..4620e8d 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -1,4 +1,5 @@
 /*
+ *  Copyright (C) 2016 Red Hat, Inc.
  *  Copyright (C) 2005  Anthony Liguori 
  *
  *  Network Block Device Client Side
@@ -714,11 +715,13 @@ ssize_t nbd_send_request(QIOChannel *ioc, struct 
nbd_request *request)
 
 TRACE("Sending request to server: "
   "{ .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64
-  ", .type=%" PRIu32 " }",
-  request->from, request->len, request->handle, request->type);
+  ", .flags = %" PRIx16 ", .type = %" PRIu16 " }",
+  request->from, request->len, request->handle,
+  request->flags, request->type);
 
 stl_be_p(buf, NBD_REQUEST_MAGIC);
-stl_be_p(buf + 4, request->type);
+stw_be_p(buf + 4, request->flags);
+stw_be_p(bu

[Qemu-devel] [PULL 02/30] nbd: Use CoQueue for free_sema instead of CoMutex

2016-11-01 Thread Paolo Bonzini
From: Changlong Xie 

NBD is using the CoMutex in a way that wasn't anticipated. For example, if 
there are
N(N=26, MAX_NBD_REQUESTS=16) nbd write requests, so we will invoke 
nbd_client_co_pwritev
N times.

time request Actions
11   in_flight=1, Coroutine=C1
22   in_flight=2, Coroutine=C2
...
15   15  in_flight=15, Coroutine=C15
16   16  in_flight=16, Coroutine=C16, free_sema->holder=C16, 
mutex->locked=true
17   17  in_flight=16, Coroutine=C17, queue C17 into free_sema->queue
18   18  in_flight=16, Coroutine=C18, queue C18 into free_sema->queue
...
26   N   in_flight=16, Coroutine=C26, queue C26 into free_sema->queue


Once nbd client recieves request No.16' reply, we will re-enter C16. It's ok, 
because
it's equal to 'free_sema->holder'.

time request Actions
27   16  in_flight=15, Coroutine=C16, free_sema->holder=C16, 
mutex->locked=false


Then nbd_coroutine_end invokes qemu_co_mutex_unlock what will pop coroutines 
from
free_sema->queue's head and enter C17. More free_sema->holder is C17 now.

time request Actions
28   17  in_flight=16, Coroutine=C17, free_sema->holder=C17, 
mutex->locked=true


In above scenario, we only recieves request No.16' reply. As time goes by, nbd 
client will
almostly recieves replies from requests 1 to 15 rather than request 17 who owns 
C17. In this
case, we will encounter assert "mutex->holder == self" failed since Kevin's 
commit 0e438cdc
"coroutine: Let CoMutex remember who holds it". For example, if nbd client 
recieves request
No.15' reply, qemu will stop unexpectedly:

time request   Actions
29   15(most case) in_flight=15, Coroutine=C15, free_sema->holder=C17, 
mutex->locked=false


Per Paolo's suggestion "The simplest fix is to change it to CoQueue, which is 
like a condition
variable", this patch replaces CoMutex with CoQueue.

Cc: Wen Congyang 
Reported-by: zhanghailiang 
Suggested-by: Paolo Bonzini 
Signed-off-by: Changlong Xie 
Message-Id: <1476267508-19499-1-git-send-email-xiecl.f...@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini 
---
 block/nbd-client.c | 8 
 block/nbd-client.h | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/block/nbd-client.c b/block/nbd-client.c
index 2cf3237..40b28ab 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -199,8 +199,8 @@ static void nbd_coroutine_start(NbdClientSession *s,
 {
 /* Poor man semaphore.  The free_sema is locked when no other request
  * can be accepted, and unlocked after receiving one reply.  */
-if (s->in_flight >= MAX_NBD_REQUESTS - 1) {
-qemu_co_mutex_lock(&s->free_sema);
+if (s->in_flight == MAX_NBD_REQUESTS) {
+qemu_co_queue_wait(&s->free_sema);
 assert(s->in_flight < MAX_NBD_REQUESTS);
 }
 s->in_flight++;
@@ -214,7 +214,7 @@ static void nbd_coroutine_end(NbdClientSession *s,
 int i = HANDLE_TO_INDEX(s, request->handle);
 s->recv_coroutine[i] = NULL;
 if (s->in_flight-- == MAX_NBD_REQUESTS) {
-qemu_co_mutex_unlock(&s->free_sema);
+qemu_co_queue_next(&s->free_sema);
 }
 }
 
@@ -386,7 +386,7 @@ int nbd_client_init(BlockDriverState *bs,
 }
 
 qemu_co_mutex_init(&client->send_mutex);
-qemu_co_mutex_init(&client->free_sema);
+qemu_co_queue_init(&client->free_sema);
 client->sioc = sioc;
 object_ref(OBJECT(client->sioc));
 
diff --git a/block/nbd-client.h b/block/nbd-client.h
index 044aca4..307b8b1 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -24,7 +24,7 @@ typedef struct NbdClientSession {
 off_t size;
 
 CoMutex send_mutex;
-CoMutex free_sema;
+CoQueue free_sema;
 Coroutine *send_coroutine;
 int in_flight;
 
-- 
2.7.4





[Qemu-devel] [PULL 17/30] nbd: Let client skip portions of server reply

2016-11-01 Thread Paolo Bonzini
From: Eric Blake 

The server has a nice helper function nbd_negotiate_drop_sync()
which lets it easily ignore fluff from the client (such as the
payload to an unknown option request).  We can't quite make it
common, since it depends on nbd_negotiate_read() which handles
coroutine magic, but we can copy the idea into the client where
we have places where we want to ignore data (such as the
description tacked on the end of NBD_REP_SERVER).

Signed-off-by: Eric Blake 
Message-Id: <1476469998-28592-11-git-send-email-ebl...@redhat.com>
Signed-off-by: Paolo Bonzini 
---
 nbd/client.c | 47 +--
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/nbd/client.c b/nbd/client.c
index 651e08c..5d94e34 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -75,6 +75,32 @@ static QTAILQ_HEAD(, NBDExport) exports = 
QTAILQ_HEAD_INITIALIZER(exports);
 
 */
 
+/* Discard length bytes from channel.  Return -errno on failure, or
+ * the amount of bytes consumed. */
+static ssize_t drop_sync(QIOChannel *ioc, size_t size)
+{
+ssize_t ret, dropped = size;
+char small[1024];
+char *buffer;
+
+buffer = sizeof(small) < size ? small : g_malloc(MIN(65536, size));
+while (size > 0) {
+ret = read_sync(ioc, buffer, MIN(65536, size));
+if (ret < 0) {
+goto cleanup;
+}
+assert(ret <= size);
+size -= ret;
+}
+ret = dropped;
+
+ cleanup:
+if (buffer != small) {
+g_free(buffer);
+}
+return ret;
+}
+
 /* Send an option request.
  *
  * The request is for option @opt, with @data containing @len bytes of
@@ -285,19 +311,12 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, 
Error **errp)
 }
 (*name)[namelen] = '\0';
 len -= namelen;
-if (len) {
-char *buf = g_malloc(len + 1);
-if (read_sync(ioc, buf, len) != len) {
-error_setg(errp, "failed to read export description");
-g_free(*name);
-g_free(buf);
-*name = NULL;
-nbd_send_opt_abort(ioc);
-return -1;
-}
-buf[len] = '\0';
-TRACE("Ignoring export description: %s", buf);
-g_free(buf);
+if (drop_sync(ioc, len) != len) {
+error_setg(errp, "failed to read export description");
+g_free(*name);
+*name = NULL;
+nbd_send_opt_abort(ioc);
+return -1;
 }
 } else {
 error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x",
@@ -577,7 +596,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char 
*name, uint16_t *flags,
 }
 
 TRACE("Size is %" PRIu64 ", export flags %" PRIx16, *size, *flags);
-if (read_sync(ioc, &buf, 124) != 124) {
+if (drop_sync(ioc, 124) != 124) {
 error_setg(errp, "Failed to read reserved block");
 goto fail;
 }
-- 
2.7.4





[Qemu-devel] [PULL 29/30] docs/rcu.txt: Fix minor typo

2016-11-01 Thread Paolo Bonzini
From: Pranith Kumar 

s/presented/prevented/

Signed-off-by: Pranith Kumar 
Message-Id: <20161018050418.4912-1-bobby.pr...@gmail.com>
Signed-off-by: Paolo Bonzini 
---
 docs/rcu.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/rcu.txt b/docs/rcu.txt
index a70b72c..c84e7f4 100644
--- a/docs/rcu.txt
+++ b/docs/rcu.txt
@@ -145,7 +145,7 @@ The core RCU API is small:
 and then read from there.
 
 RCU read-side critical sections must use atomic_rcu_read() to
-read data, unless concurrent writes are presented by another
+read data, unless concurrent writes are prevented by another
 synchronization mechanism.
 
 Furthermore, RCU read-side critical sections should traverse the
-- 
2.7.4





[Qemu-devel] [PULL 01/30] checkpatch: tweak "struct should normally be const" warning

2016-11-01 Thread Paolo Bonzini
Avoid triggering on

typedef struct BlockJobDriver BlockJobDriver;

or

struct BlockJobDriver {

Cc: John Snow 
Signed-off-by: Paolo Bonzini 
---
 scripts/checkpatch.pl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 3afa19a..1f1c9d3 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2498,8 +2498,8 @@ sub process {
VMStateDescription|
VMStateInfo}x;
if ($line !~ /\bconst\b/ &&
-   $line =~ /\b($struct_ops)\b/) {
-   ERROR("struct $1 should normally be const\n" .
+   $line =~ /\b($struct_ops)\b.*=/) {
+   ERROR("initializer for struct $1 should normally be 
const\n" .
$herecurr);
}
 
-- 
2.7.4





  1   2   3   >