[Qemu-devel] [RFC v2] translate-all: protect code_gen_buffer with RCU

2016-04-23 Thread Emilio G. Cota
[ Applies on top of bennee/mttcg/enable-mttcg-for-armv7-v1 after
reverting "translate-all: introduces tb_flush_safe". A trivial
conflict must be solved after applying. ]

This is a first attempt at making tb_flush not have to stop all CPUs.
There are issues as pointed out below, but this could be a good start.

Context:
  https://lists.gnu.org/archive/html/qemu-devel/2016-03/msg04658.html
  https://lists.gnu.org/archive/html/qemu-devel/2016-03/msg06942.html

Changes from v1:
- When a static buffer is used, split it in two instead of using
  a second buffer.

Known issues:
- Fails Alex' unit test with low enough -tb-size, see
  https://lists.gnu.org/archive/html/qemu-devel/2016-04/msg03465.html
  Seems to work in MTTCG, although I've only tested with tb_lock
  always being taken in tb_find_fast.
- Windows; not even compile-tested!

Signed-off-by: Emilio G. Cota 
---
 translate-all.c | 146 +++-
 1 file changed, 133 insertions(+), 13 deletions(-)

diff --git a/translate-all.c b/translate-all.c
index 8e70583..6830371 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -535,6 +535,9 @@ static inline void *split_cross_256mb(void *buf1, size_t 
size1)
 #ifdef USE_STATIC_CODE_GEN_BUFFER
 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
 __attribute__((aligned(CODE_GEN_ALIGN)));
+static int static_buf_mask = 1;
+static void *static_buf1;
+static void *static_buf2;
 
 # ifdef _WIN32
 static inline void do_protect(void *addr, long size, int prot)
@@ -577,6 +580,13 @@ static inline void map_none(void *addr, long size)
 }
 # endif /* WIN32 */
 
+static void map_static_code_gen_buffer(void *buf, size_t size)
+{
+map_exec(buf, size);
+map_none(buf + size, qemu_real_host_page_size);
+qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
+}
+
 static inline void *alloc_code_gen_buffer(void)
 {
 void *buf = static_code_gen_buffer;
@@ -586,28 +596,41 @@ static inline void *alloc_code_gen_buffer(void)
 full_size = (((uintptr_t)buf + sizeof(static_code_gen_buffer))
  & qemu_real_host_page_mask) - (uintptr_t)buf;
 
-/* Reserve a guard page.  */
-size = full_size - qemu_real_host_page_size;
+/*
+ * Reserve two guard pages, one after each of the two buffers:
+ * | buf1 |g1|buf2|g2|
+ */
+size = full_size - 2 * qemu_real_host_page_size;
 
 /* Honor a command-line option limiting the size of the buffer.  */
 if (size > tcg_ctx.code_gen_buffer_size) {
 size = (((uintptr_t)buf + tcg_ctx.code_gen_buffer_size)
 & qemu_real_host_page_mask) - (uintptr_t)buf;
 }
-tcg_ctx.code_gen_buffer_size = size;
 
 #ifdef __mips__
-if (cross_256mb(buf, size)) {
-buf = split_cross_256mb(buf, size);
-size = tcg_ctx.code_gen_buffer_size;
+/*
+ * Pass 'size + page_size', since we want 'buf1 | guard1 | buf2' to be
+ * within the boundary.
+ */
+if (cross_256mb(buf, size + qemu_real_host_page_size)) {
+buf = split_cross_256mb(buf, size + qemu_real_host_page_size);
+size = tcg_ctx.code_gen_buffer_size - qemu_real_host_page_size;
 }
 #endif
 
-map_exec(buf, size);
-map_none(buf + size, qemu_real_host_page_size);
-qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
+/* split the buffer in two */
+size /= 2;
+tcg_ctx.code_gen_buffer_size = size;
 
-return buf;
+static_buf1 = buf;
+static_buf2 = buf + size + qemu_real_host_page_mask;
+
+map_static_code_gen_buffer(static_buf1, size);
+map_static_code_gen_buffer(static_buf2, size);
+
+assert(static_buf_mask == 1);
+return static_buf1;
 }
 #elif defined(_WIN32)
 static inline void *alloc_code_gen_buffer(void)
@@ -825,10 +848,100 @@ static void page_flush_tb(void)
 }
 }
 
+#ifdef USE_STATIC_CODE_GEN_BUFFER
+
+struct code_gen_desc {
+struct rcu_head rcu;
+int clear_bit;
+};
+
+static void code_gen_buffer_clear(struct rcu_head *rcu)
+{
+struct code_gen_desc *desc = container_of(rcu, struct code_gen_desc, rcu);
+
+tb_lock();
+static_buf_mask &= ~desc->clear_bit;
+tb_unlock();
+g_free(desc);
+}
+
+static void *code_gen_buffer_replace(void)
+{
+struct code_gen_desc *desc = g_malloc0(sizeof(*desc));
+
+/*
+ * If both bits are set, we're having two concurrent flushes. This
+ * can easily happen if the buffers are heavily undersized.
+ */
+assert(static_buf_mask == 1 || static_buf_mask == 2);
+
+desc->clear_bit = static_buf_mask;
+call_rcu1(>rcu, code_gen_buffer_clear);
+
+if (static_buf_mask == 1) {
+static_buf_mask |= 2;
+return static_buf2;
+}
+static_buf_mask |= 1;
+return static_buf1;
+}
+
+#elif defined(_WIN32)
+
+struct code_gen_desc {
+struct rcu_head rcu;
+void *buf;
+};
+
+static void code_gen_buffer_vfree(struct rcu_head *rcu)
+{
+struct code_gen_desc *desc = container_of(rcu, struct code_gen_desc, 

Re: [Qemu-devel] [RFC] translate-all: protect code_gen_buffer with RCU

2016-04-23 Thread Emilio G. Cota
On Fri, Apr 22, 2016 at 15:41:13 +0100, Alex Bennée wrote:
> Emilio G. Cota  writes:
(snip)
> > Known issues:
> > - Basically compile-tested only, since I've only run this with
> >   single-threaded TCG; I also tried running it with linux-user,
> >   but in order to trigger tb_flush I had to make code_gen_buffer
> >   so small that the CPU calling tb_flush would immediately fill
> >   the 2nd buffer, triggering the assert. If you have a working
> >   multi-threaded workload that would be good to test this, please
> >   let me know.
> 
> With my latest mttcg unit tests:
> 
> ./arm-softmmu/qemu-system-arm -machine virt,accel=tcg -cpu cortex-a15 \
>   -device virtio-serial-device -device virtconsole,chardev=ctd \
>   -chardev testdev,id=ctd -display none -serial stdio \
>   -kernel arm/tcg-test.flat -smp 4 -tcg mttcg=on \
>   -append "tight smc irq mod=1 rounds=10"  -name arm,debug-threads=on

This is useful. Never mind the need for testing linux-user, I can test
both code paths (i.e. dynamic allocation and static buf) with qemu-system
by simply defining USE_STATIC_CODE_GEN_BUFFER.

After applying a modified version of this patch (that I'll send in
a jiffy) to your enable-mttcg-for-armv7-v1 branch (reverting first
"translate-all: introduces tb_flush_safe"), I can easily trigger
this error when setting a low enough TB size, e.g. -tb-size 32:

 CPU1: online and setting up with pattern 0xa0b78cbf
 CPU2: online and setting up with pattern 0x22287c45
 CPU3: online and setting up with pattern 0x6262c5c5
 CPU0: online and setting up with pattern 0xa65e7ad6
 qemu: flush code_size=10622184 nb_tbs=83886 avg_tb_size=126
 qemu: flush code_size=10469016 nb_tbs=83886 avg_tb_size=124
 qemu: flush code_size=10492920 nb_tbs=83886 avg_tb_size=125
 qemu: flush code_size=10477464 nb_tbs=83886 avg_tb_size=124
 qemu: flush code_size=10495800 nb_tbs=83886 avg_tb_size=125
 PASS: smc: irq: 0 errors, IRQs not checked
 Unhandled exception 3 (pabt)
 Exception frame registers:
 pc : []lr : [<40010700>]psr: a153
 sp : 400ac5c0  ip : 400ab4e8  fp : 40032ca8
 r10:   r9 :   r8 : 
 r7 :   r6 :   r5 :   r4 : 
 r3 :   r2 :   r1 : e59f2028  r0 : 
 Flags: NzCv  IRQs on  FIQs off  Mode SVC_32
 Control: 00c5107d  Table: 4006  DAC: 
 IFAR: e59f2028IFSR: 0205

Any input on where to look would be appreciated. Thanks,

Emilio



Re: [Qemu-devel] ARM PC-relative Loads, and TBs in soft MMU

2016-04-23 Thread Peter Maydell
On 22 April 2016 at 16:06, Tom Spink  wrote:
> So, my question is, how can a TB for a particular block containing a
> constant folded *virtual* PC work, if the MMU mappings change and a
> different virtual address is used to access the same physical address?
>
> E.g. assume we have an instruction such as: ldr r0, [pc, #8]
>
> If this is compiled in a block that begins at virtual address 0x1, then
> the load will be emitted as a constant load from address 0x10008 (and will
> be subject to the usual TLB lookup code).  But, if the MMU mappings change,
> and the block is entered from virtual address 0x2 (because 0x2 now
> points to the same physical page), then the load will be incorrect, as it
> would still be accessing address 0x1, but should actually be accessing
> address 0x20008.

TBs are looked up by (virtual) PC + flags + physical address, so if
the same lump of code is mapped at two different virtual addresses
we'll translate it twice. (More precisely, tb_find_fast() checks
only the virtual address, but it does so in a cache which is
invalidated when the guest does a TLB invalidate operation; if
the cache misses we fall back to tb_find_slow() which also checks
physical address.)

thanks
-- PMM



[Qemu-devel] [PATCH for 2.7 1/1] qcow2: improve qcow2_co_write_zeroes()

2016-04-23 Thread Denis V. Lunev
Unfortunately Linux kernel could send non-aligned requests to qemu-nbd
if the caller is using O_DIRECT and does not align in-memory data to
page. Thus qemu-nbd will call block layer with non-aligned requests.

qcow2_co_write_zeroes forcibly asks the caller to supply block-aligned
data. In the other case it rejects with ENOTSUP which is properly
handled on the upper level. The problem is that this grows the image.

This could be optimized a bit:
- particular request could be split to block aligned part and head/tail,
  which could be handled separately
- writes could be omitted when we do know that the image already contains
  zeroes at the offsets being written

Signed-off-by: Denis V. Lunev 
CC: Kevin Wolf 
CC: Max Reitz 
---
 block/qcow2.c | 58 +-
 1 file changed, 53 insertions(+), 5 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 470734b..9bdaa15 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2411,21 +2411,69 @@ finish:
 return ret;
 }
 
+static int write_zeroes_chunk(BlockDriverState *bs, int64_t sector_num, int nr)
+{
+int ret, count;
+BlockDriverState *file;
+uint8_t *buf;
+struct iovec iov;
+QEMUIOVector local_qiov;
+
+ret = bdrv_get_block_status_above(bs, NULL, sector_num, nr, , );
+if (ret > 0 && (ret & BDRV_BLOCK_ZERO) && count == nr) {
+/* Nothing to do. The area is zeroed already.
+   Worth to check to avoid image expansion for non-aligned reqs. */
+return 0;
+}
+
+buf = qemu_blockalign0(bs, nr << BDRV_SECTOR_BITS);
+iov = (struct iovec) {
+.iov_base   = buf,
+.iov_len= nr << BDRV_SECTOR_BITS,
+};
+qemu_iovec_init_external(_qiov, , 1);
+
+return qcow2_co_writev(bs, sector_num, nr, _qiov);
+}
+
 static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
 {
 int ret;
 BDRVQcow2State *s = bs->opaque;
 
-/* Emulate misaligned zero writes */
-if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) {
-return -ENOTSUP;
+int nr = sector_num % s->cluster_sectors;
+if (nr != 0) {
+nr = MIN(s->cluster_sectors - nr, nb_sectors);
+
+ret = write_zeroes_chunk(bs, sector_num, nr);
+if (ret < 0) {
+return ret;
+}
+
+sector_num += nr;
+nb_sectors -= nr;
+if (nb_sectors == 0) {
+return 0;
+}
+}
+
+nr = nb_sectors % s->cluster_sectors;
+if (nr != 0) {
+ret = write_zeroes_chunk(bs, sector_num + nb_sectors - nr, nr);
+if (ret < 0) {
+return ret;
+}
+
+nb_sectors -= nr;
+if (nb_sectors == 0) {
+return 0;
+}
 }
 
 /* Whatever is left can use real zero clusters */
 qemu_co_mutex_lock(>lock);
-ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS,
-nb_sectors);
+ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS, nb_sectors);
 qemu_co_mutex_unlock(>lock);
 
 return ret;
-- 
2.5.0




[Qemu-devel] [PATCH v2 2/3] Add ENET/Gbps Ethernet support to FEC device

2016-04-23 Thread Jean-Christophe Dubois
The ENET device (present in i.MX6) is "derived" from FEC and backward
compatible with it.

This patch add the necessary support of the added feature in the ENET
device to allow Linux to use it (on supported processors).

Signed-off-by: Jean-Christophe Dubois 
---
 hw/arm/fsl-imx25.c   |   3 +
 hw/net/imx_fec.c | 984 +++
 include/hw/net/imx_fec.h | 243 +---
 3 files changed, 929 insertions(+), 301 deletions(-)

diff --git a/hw/arm/fsl-imx25.c b/hw/arm/fsl-imx25.c
index 2f878b9..ddb2b22 100644
--- a/hw/arm/fsl-imx25.c
+++ b/hw/arm/fsl-imx25.c
@@ -191,6 +191,9 @@ static void fsl_imx25_realize(DeviceState *dev, Error 
**errp)
 }
 
 qdev_set_nic_properties(DEVICE(>fec), _table[0]);
+
+object_property_set_bool(OBJECT(>fec), true, "is-fec", _abort);
+
 object_property_set_bool(OBJECT(>fec), true, "realized", );
 if (err) {
 error_propagate(errp, err);
diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c
index e60e338..28713ac 100644
--- a/hw/net/imx_fec.c
+++ b/hw/net/imx_fec.c
@@ -24,6 +24,8 @@
 #include "qemu/osdep.h"
 #include "hw/net/imx_fec.h"
 #include "sysemu/dma.h"
+#include "net/checksum.h"
+#include "net/eth.h"
 
 /* For crc32 */
 #include 
@@ -52,36 +54,162 @@
 } \
 } while (0)
 
-static const VMStateDescription vmstate_imx_fec = {
+static const char *imx_default_reg_name(IMXFECState *s, uint32_t index)
+{
+static char tmp[20];
+sprintf(tmp, "index %d", index);
+return tmp;
+}
+
+static const char *imx_fec_reg_name(IMXFECState *s, uint32_t index)
+{
+switch (index) {
+case ENET_FRBR:
+return "FRBR";
+case ENET_FRSR:
+return "FRSR";
+case ENET_MIIGSK_CFGR:
+return "MIIGSK_CFGR";
+case ENET_MIIGSK_ENR:
+return "MIIGSK_ENR";
+default:
+return imx_default_reg_name(s, index);
+}
+}
+
+static const char *imx_enet_reg_name(IMXFECState *s, uint32_t index)
+{
+switch (index) {
+case ENET_RSFL:
+return "RSFL";
+case ENET_RSEM:
+return "RSEM";
+case ENET_RAEM:
+return "RAEM";
+case ENET_RAFL:
+return "RAFL";
+case ENET_TSEM:
+return "TSEM";
+case ENET_TAEM:
+return "TAEM";
+case ENET_TAFL:
+return "TAFL";
+case ENET_TIPG:
+return "TIPG";
+case ENET_FTRL:
+return "FTRL";
+case ENET_TACC:
+return "TACC";
+case ENET_RACC:
+return "RACC";
+case ENET_ATCR:
+return "ATCR";
+case ENET_ATVR:
+return "ATVR";
+case ENET_ATOFF:
+return "ATOFF";
+case ENET_ATPER:
+return "ATPER";
+case ENET_ATCOR:
+return "ATCOR";
+case ENET_ATINC:
+return "ATINC";
+case ENET_ATSTMP:
+return "ATSTMP";
+case ENET_TGSR:
+return "TGSR";
+case ENET_TCSR0:
+return "TCSR0";
+case ENET_TCCR0:
+return "TCCR0";
+case ENET_TCSR1:
+return "TCSR1";
+case ENET_TCCR1:
+return "TCCR1";
+case ENET_TCSR2:
+return "TCSR2";
+case ENET_TCCR2:
+return "TCCR2";
+case ENET_TCSR3:
+return "TCSR3";
+case ENET_TCCR3:
+return "TCCR3";
+default:
+return imx_default_reg_name(s, index);
+}
+}
+
+static const char *imx_eth_reg_name(IMXFECState *s, uint32_t index)
+{
+switch (index) {
+case ENET_EIR:
+return "EIR";
+case ENET_EIMR:
+return "EIMR";
+case ENET_RDAR:
+return "RDAR";
+case ENET_TDAR:
+return "TDAR";
+case ENET_ECR:
+return "ECR";
+case ENET_MMFR:
+return "MMFR";
+case ENET_MSCR:
+return "MSCR";
+case ENET_MIBC:
+return "MIBC";
+case ENET_RCR:
+return "RCR";
+case ENET_TCR:
+return "TCR";
+case ENET_PALR:
+return "PALR";
+case ENET_PAUR:
+return "PAUR";
+case ENET_OPD:
+return "OPD";
+case ENET_IAUR:
+return "IAUR";
+case ENET_IALR:
+return "IALR";
+case ENET_GAUR:
+return "GAUR";
+case ENET_GALR:
+return "GALR";
+case ENET_TFWR:
+return "TFWR";
+case ENET_RDSR:
+return "RDSR";
+case ENET_TDSR:
+return "TDSR";
+case ENET_MRBR:
+return "MRBR";
+default:
+if (s->is_fec) {
+return imx_fec_reg_name(s, index);
+} else {
+return imx_enet_reg_name(s, index);
+}
+}
+}
+
+static const VMStateDescription vmstate_imx_eth = {
 .name = TYPE_IMX_FEC,
-.version_id = 1,
-.minimum_version_id = 1,
+.version_id = 2,
+.minimum_version_id = 2,
 .fields = (VMStateField[]) {
-VMSTATE_UINT32(irq_state, IMXFECState),
-VMSTATE_UINT32(eir, IMXFECState),
-VMSTATE_UINT32(eimr, IMXFECState),
-VMSTATE_UINT32(rx_enabled, IMXFECState),
+VMSTATE_UINT32_ARRAY(regs, IMXFECState, ENET_MAX),
  

[Qemu-devel] [PATCH v2 3/3] Add ENET device to i.MX6 SOC.

2016-04-23 Thread Jean-Christophe Dubois
This adds the ENET device to the i.MX6 SOC.

This was tested by booting Linux on an Qemu i.MX6 instance and accessing
the internet from the linux guest.

Signed-off-by: Jean-Christophe Dubois 
---
 hw/arm/fsl-imx6.c | 17 +
 include/hw/arm/fsl-imx6.h |  2 ++
 2 files changed, 19 insertions(+)

diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c
index a5331bf..c08222c 100644
--- a/hw/arm/fsl-imx6.c
+++ b/hw/arm/fsl-imx6.c
@@ -105,6 +105,10 @@ static void fsl_imx6_init(Object *obj)
 snprintf(name, NAME_SIZE, "spi%d", i + 1);
 object_property_add_child(obj, name, OBJECT(>spi[i]), NULL);
 }
+
+object_initialize(>eth, sizeof(s->eth), TYPE_IMX_FEC);
+qdev_set_parent_bus(DEVICE(>eth), sysbus_get_default());
+object_property_add_child(obj, "eth", OBJECT(>eth), NULL);
 }
 
 static void fsl_imx6_realize(DeviceState *dev, Error **errp)
@@ -381,6 +385,19 @@ static void fsl_imx6_realize(DeviceState *dev, Error 
**errp)
 spi_table[i].irq));
 }
 
+object_property_set_bool(OBJECT(>eth), true, "realized", );
+if (err) {
+error_propagate(errp, err);
+return;
+}
+sysbus_mmio_map(SYS_BUS_DEVICE(>eth), 0, FSL_IMX6_ENET_ADDR);
+sysbus_connect_irq(SYS_BUS_DEVICE(>eth), 0,
+   qdev_get_gpio_in(DEVICE(>a9mpcore),
+FSL_IMX6_ENET_MAC_IRQ));
+sysbus_connect_irq(SYS_BUS_DEVICE(>eth), 1,
+   qdev_get_gpio_in(DEVICE(>a9mpcore),
+FSL_IMX6_ENET_MAC_1588_IRQ));
+
 /* ROM memory */
 memory_region_init_rom_device(>rom, NULL, NULL, NULL, "imx6.rom",
   FSL_IMX6_ROM_SIZE, );
diff --git a/include/hw/arm/fsl-imx6.h b/include/hw/arm/fsl-imx6.h
index d24aaee..98b7599 100644
--- a/include/hw/arm/fsl-imx6.h
+++ b/include/hw/arm/fsl-imx6.h
@@ -28,6 +28,7 @@
 #include "hw/gpio/imx_gpio.h"
 #include "hw/sd/sdhci.h"
 #include "hw/ssi/imx_spi.h"
+#include "hw/net/imx_fec.h"
 #include "exec/memory.h"
 
 #define TYPE_FSL_IMX6 "fsl,imx6"
@@ -57,6 +58,7 @@ typedef struct FslIMX6State {
 IMXGPIOState   gpio[FSL_IMX6_NUM_GPIOS];
 SDHCIState esdhc[FSL_IMX6_NUM_ESDHCS];
 IMXSPIStatespi[FSL_IMX6_NUM_ECSPIS];
+IMXFECStateeth;
 MemoryRegion   rom;
 MemoryRegion   caam;
 MemoryRegion   ocram;
-- 
2.7.4




[Qemu-devel] [PATCH v2 0/3] Add Ethernet device for i.MX6 SOC

2016-04-23 Thread Jean-Christophe Dubois
This patch series adds Gb ENET Ethernet device to the i.MX6 SOC.

The ENET device is an evolution of the FEC device present on the i.MX25 SOC
and is backward compatible with it.

Therefore the ENET support has been added to the actual Qemu FEC device.

The Patch has been tested by:
 * Booting linux on i.MX25 PDK board emulation and accessing internet
 * Booting linux on i.MX6 Sabrelite board emulation and accessing internet

Jean-Christophe Dubois (3):
  net: improve UDP/TCP checksum computation.
  Add ENET/Gbps Ethernet support to FEC device
  Add ENET device to i.MX6 SOC.

 hw/arm/fsl-imx25.c|   3 +
 hw/arm/fsl-imx6.c |  17 +
 hw/net/imx_fec.c  | 984 ++
 include/hw/arm/fsl-imx6.h |   2 +
 include/hw/net/imx_fec.h  | 243 +---
 net/checksum.c|  83 ++--
 6 files changed, 1005 insertions(+), 327 deletions(-)

-- 
2.7.4



[Qemu-devel] [PATCH v2 1/3] net: improve UDP/TCP checksum computation.

2016-04-23 Thread Jean-Christophe Dubois
This patch adds:
 * based on Eth, UDP, TCP struct present in eth.h instead of hardcoded indexes.
 * based on various macros present in eth.h.
 * allow to account for optional VLAN header.

Signed-off-by: Jean-Christophe Dubois 
---
 net/checksum.c | 83 --
 1 file changed, 57 insertions(+), 26 deletions(-)

diff --git a/net/checksum.c b/net/checksum.c
index d0fa424..fd25209 100644
--- a/net/checksum.c
+++ b/net/checksum.c
@@ -18,9 +18,7 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "net/checksum.h"
-
-#define PROTO_TCP  6
-#define PROTO_UDP 17
+#include "net/eth.h"
 
 uint32_t net_checksum_add_cont(int len, uint8_t *buf, int seq)
 {
@@ -57,40 +55,73 @@ uint16_t net_checksum_tcpudp(uint16_t length, uint16_t 
proto,
 
 void net_checksum_calculate(uint8_t *data, int length)
 {
-int hlen, plen, proto, csum_offset;
-uint16_t csum;
+int plen;
+struct ip_header *ip;
+
+/* Ensure we have at least a Eth header */
+if (length < sizeof(struct eth_header)) {
+return;
+}
 
-/* Ensure data has complete L2 & L3 headers. */
-if (length < 14 + 20) {
+/* Now check we have an IP header (with an optonnal VLAN header */
+if (length < eth_get_l2_hdr_length(data) + sizeof(struct ip_header)) {
 return;
 }
 
-if ((data[14] & 0xf0) != 0x40)
+ip = PKT_GET_IP_HDR(data);
+
+if (IP_HEADER_VERSION(ip) != IP_HEADER_VERSION_4) {
return; /* not IPv4 */
-hlen  = (data[14] & 0x0f) * 4;
-plen  = (data[16] << 8 | data[17]) - hlen;
-proto = data[23];
+}
+
+/* Last, check that we have enough data for the IP frame */
+if (length < eth_get_l2_hdr_length(data) + be16_to_cpu(ip->ip_len)) {
+return;
+}
+
+plen  = be16_to_cpu(ip->ip_len) - IP_HDR_GET_LEN(ip);
+
+switch (ip->ip_p) {
+case IP_PROTO_TCP:
+{
+uint16_t csum;
+tcp_header *tcp = (tcp_header *)(ip + 1);
+
+if (plen < sizeof(tcp_header)) {
+return;
+}
 
-switch (proto) {
-case PROTO_TCP:
-   csum_offset = 16;
+tcp->th_sum = 0;
+
+csum = net_checksum_tcpudp(plen, ip->ip_p,
+   (uint8_t *)>ip_src,
+   (uint8_t *)tcp);
+
+tcp->th_sum = cpu_to_be16(csum);
+}
break;
-case PROTO_UDP:
-   csum_offset = 6;
+case IP_PROTO_UDP:
+{
+uint16_t csum;
+udp_header *udp = (udp_header *)(ip + 1);
+
+if (plen < sizeof(udp_header)) {
+return;
+}
+
+udp->uh_sum = 0;
+
+csum = net_checksum_tcpudp(plen, ip->ip_p,
+   (uint8_t *)>ip_src,
+   (uint8_t *)udp);
+
+udp->uh_sum = cpu_to_be16(csum);
+}
break;
 default:
+/* Can't handle any other protocol */
return;
 }
-
-if (plen < csum_offset + 2 || 14 + hlen + plen > length) {
-return;
-}
-
-data[14+hlen+csum_offset]   = 0;
-data[14+hlen+csum_offset+1] = 0;
-csum = net_checksum_tcpudp(plen, proto, data+14+12, data+14+hlen);
-data[14+hlen+csum_offset]   = csum >> 8;
-data[14+hlen+csum_offset+1] = csum & 0xff;
 }
 
 uint32_t
-- 
2.7.4




Re: [Qemu-devel] [PATCH v3 41/44] nbd: Implement NBD_CMD_WRITE_ZEROES on server

2016-04-23 Thread Pavel Borzenkov
On Fri, Apr 22, 2016 at 05:40:49PM -0600, Eric Blake wrote:
> Upstream NBD protocol recently added the ability to efficiently
> write zeroes without having to send the zeroes over the wire,
> along with a flag to control whether the client wants a hole.
> 
> Signed-off-by: Eric Blake 
> 
> ---
> v3: abandon NBD_CMD_CLOSE extension, rebase to use blk_pwrite_zeroes
> ---
>  include/block/nbd.h |  7 +--
>  nbd/server.c| 42 --
>  2 files changed, 45 insertions(+), 4 deletions(-)
> 
> diff --git a/include/block/nbd.h b/include/block/nbd.h
> index 05c0e48..1072d9e 100644
> --- a/include/block/nbd.h
> +++ b/include/block/nbd.h
> @@ -70,6 +70,7 @@ typedef struct nbd_reply nbd_reply;
>  #define NBD_FLAG_SEND_FUA   (1 << 3)/* Send FUA (Force Unit 
> Access) */
>  #define NBD_FLAG_ROTATIONAL (1 << 4)/* Use elevator algorithm - 
> rotational media */
>  #define NBD_FLAG_SEND_TRIM  (1 << 5)/* Send TRIM (discard) */
> +#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */
> 
>  /* New-style handshake (global) flags, sent from server to client, and
> control what will happen during handshake phase. */
> @@ -102,7 +103,8 @@ typedef struct nbd_reply nbd_reply;
>  #define NBD_INFO_DESCRIPTION2
> 
>  /* Request flags, sent from client to server during transmission phase */
> -#define NBD_CMD_FLAG_FUA(1 << 0)
> +#define NBD_CMD_FLAG_FUA(1 << 0) /* 'force unit access' during write 
> */
> +#define NBD_CMD_FLAG_NO_HOLE(1 << 1) /* don't punch hole on zero run */
> 
>  /* Supported request types */
>  enum {
> @@ -110,7 +112,8 @@ enum {
>  NBD_CMD_WRITE = 1,
>  NBD_CMD_DISC = 2,
>  NBD_CMD_FLUSH = 3,
> -NBD_CMD_TRIM = 4
> +NBD_CMD_TRIM = 4,
> +NBD_CMD_WRITE_ZEROES = 5,

It's defined to 6 by the spec.

>  };
> 
>  #define NBD_DEFAULT_PORT 10809
> diff --git a/nbd/server.c b/nbd/server.c
> index 1edb5f3..563afb2 100644
> --- a/nbd/server.c
> +++ b/nbd/server.c
> @@ -689,7 +689,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData 
> *data)
>  char buf[8 + 8 + 8 + 128];
>  int rc;
>  const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
> -  NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
> +  NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
> +  NBD_FLAG_SEND_WRITE_ZEROES);
>  bool oldStyle;
>  size_t len;
> 
> @@ -1199,11 +1200,17 @@ static ssize_t nbd_co_receive_request(NBDRequest *req,
>  rc = -EINVAL;
>  goto out;
>  }
> -if (request->flags & ~NBD_CMD_FLAG_FUA) {
> +if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
>  LOG("unsupported flags (got 0x%x)", request->flags);
>  rc = -EINVAL;
>  goto out;
>  }
> +if (request->type != NBD_CMD_WRITE_ZEROES &&
> +(request->flags & NBD_CMD_FLAG_NO_HOLE)) {
> +LOG("unexpected flags (got 0x%x)", request->flags);
> +rc = -EINVAL;
> +goto out;
> +}
> 
>  rc = 0;
> 
> @@ -1308,6 +1315,37 @@ static void nbd_trip(void *opaque)
>  }
>  break;
> 
> +case NBD_CMD_WRITE_ZEROES:
> +TRACE("Request type is WRITE_ZEROES");
> +
> +if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
> +TRACE("Server is read-only, return error");
> +reply.error = EROFS;
> +goto error_reply;
> +}
> +
> +TRACE("Writing to device");
> +
> +flags = 0;
> +if (request.flags & NBD_CMD_FLAG_FUA) {
> +flags |= BDRV_REQ_FUA;
> +}
> +if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) {
> +flags |= BDRV_REQ_MAY_UNMAP;
> +}
> +ret = blk_pwrite_zeroes(exp->blk, request.from + exp->dev_offset,
> +request.len, flags);
> +if (ret < 0) {
> +LOG("writing to file failed");
> +reply.error = -ret;
> +goto error_reply;
> +}
> +
> +if (nbd_co_send_reply(req, , 0) < 0) {
> +goto out;
> +}
> +break;
> +
>  case NBD_CMD_DISC:
>  /* unreachable, thanks to special case in nbd_co_receive_request() */
>  abort();
> -- 
> 2.5.5
> 
> 



[Qemu-devel] [PATCH v2 0/3] Add guest PMU in machine virt

2016-04-23 Thread Shannon Zhao
From: Shannon Zhao 

KVM-ARM64 supports guest PMU now. This series add the support in machine
virt so that guest could use PMU.

Changes since v1:
* rebase on master
* Address Andrew's comments, add a macro PPI, fix code style, add
  cpu_to_le32()

Shannon Zhao (3):
  target-arm: kvm64: set guest PMUv3 feature bit if supported
  hw/arm/virt: Add PMU node for virt machine
  hw/arm/virt-acpi-build: Add PMU IRQ number in ACPI table

 hw/arm/virt-acpi-build.c |  3 +++
 hw/arm/virt.c| 31 +++
 include/hw/arm/virt.h|  4 
 include/sysemu/kvm.h |  1 +
 stubs/kvm.c  |  5 +
 target-arm/cpu-qom.h |  2 ++
 target-arm/kvm64.c   | 44 
 7 files changed, 90 insertions(+)

-- 
2.0.4





[Qemu-devel] [PATCH v2 1/3] target-arm: kvm64: set guest PMUv3 feature bit if supported

2016-04-23 Thread Shannon Zhao
From: Shannon Zhao 

Check if kvm supports guest PMUv3. If so, set the corresponding feature
bit for vcpu.

Signed-off-by: Shannon Zhao 
---
 target-arm/cpu-qom.h | 2 ++
 target-arm/kvm64.c   | 5 +
 2 files changed, 7 insertions(+)

diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index 1061c08..93aa6a4 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -105,6 +105,8 @@ typedef struct ARMCPU {
 bool powered_off;
 /* CPU has security extension */
 bool has_el3;
+/* CPU has PMU (Performance Monitor Unit) */
+bool has_pmu;
 
 /* CPU has memory protection unit */
 bool has_mpu;
diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
index e8527bf..b364789 100644
--- a/target-arm/kvm64.c
+++ b/target-arm/kvm64.c
@@ -461,6 +461,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
 if (!arm_feature(>env, ARM_FEATURE_AARCH64)) {
 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT;
 }
+if (kvm_irqchip_in_kernel() &&
+kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) {
+cpu->has_pmu = true;
+cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3;
+}
 
 /* Do KVM_ARM_VCPU_INIT ioctl */
 ret = kvm_arm_vcpu_init(cs);
-- 
2.0.4





[Qemu-devel] [PATCH v2 3/3] hw/arm/virt-acpi-build: Add PMU IRQ number in ACPI table

2016-04-23 Thread Shannon Zhao
From: Shannon Zhao 

Add PMU IRQ number in ACPI table, then we can use PMU in guest through
ACPI.

Signed-off-by: Shannon Zhao 
---
 hw/arm/virt-acpi-build.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index f51fe39..5031232 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -491,6 +491,9 @@ build_madt(GArray *table_data, GArray *linker, 
VirtGuestInfo *guest_info)
 gicc->arm_mpidr = armcpu->mp_affinity;
 gicc->uid = i;
 gicc->flags = cpu_to_le32(ACPI_GICC_ENABLED);
+
+if (armcpu->has_pmu)
+gicc->performance_interrupt = cpu_to_le32(PPI(VIRTUAL_PMU_IRQ));
 }
 
 if (guest_info->gic_version == 3) {
-- 
2.0.4





[Qemu-devel] [PATCH v2 2/3] hw/arm/virt: Add PMU node for virt machine

2016-04-23 Thread Shannon Zhao
From: Shannon Zhao 

Add a virtual PMU device for virt machine while use PPI 7 for PMU
overflow interrupt number.

Signed-off-by: Shannon Zhao 
---
 hw/arm/virt.c | 31 +++
 include/hw/arm/virt.h |  4 
 include/sysemu/kvm.h  |  1 +
 stubs/kvm.c   |  5 +
 target-arm/kvm64.c| 39 +++
 5 files changed, 80 insertions(+)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 56d35c7..d77b314 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -428,6 +428,35 @@ static void fdt_add_gic_node(VirtBoardInfo *vbi, int type)
 qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", vbi->gic_phandle);
 }
 
+static void fdt_add_pmu_nodes(const VirtBoardInfo *vbi)
+{
+CPUState *cpu;
+ARMCPU *armcpu;
+uint32_t irqflags = GIC_FDT_IRQ_FLAGS_LEVEL_HI;
+
+CPU_FOREACH(cpu) {
+armcpu = ARM_CPU(cpu);
+if (!armcpu->has_pmu) {
+return;
+}
+
+kvm_arm_pmu_create(cpu, PPI(VIRTUAL_PMU_IRQ));
+}
+
+irqflags = deposit32(irqflags, GIC_FDT_IRQ_PPI_CPU_START,
+ GIC_FDT_IRQ_PPI_CPU_WIDTH, (1 << vbi->smp_cpus) - 1);
+
+armcpu = ARM_CPU(qemu_get_cpu(0));
+qemu_fdt_add_subnode(vbi->fdt, "/pmu");
+if (arm_feature(>env, ARM_FEATURE_V8)) {
+const char compat[] = "arm,armv8-pmuv3";
+qemu_fdt_setprop(vbi->fdt, "/pmu", "compatible",
+ compat, sizeof(compat));
+qemu_fdt_setprop_cells(vbi->fdt, "/pmu", "interrupts",
+   GIC_FDT_IRQ_TYPE_PPI, VIRTUAL_PMU_IRQ, 
irqflags);
+}
+}
+
 static void create_v2m(VirtBoardInfo *vbi, qemu_irq *pic)
 {
 int i;
@@ -1246,6 +1275,8 @@ static void machvirt_init(MachineState *machine)
 
 create_gic(vbi, pic, gic_version, vms->secure);
 
+fdt_add_pmu_nodes(vbi);
+
 create_uart(vbi, pic, VIRT_UART, sysmem);
 
 if (vms->secure) {
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index ecd8589..b50f095 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -40,6 +40,10 @@
 #define ARCH_TIMER_NS_EL1_IRQ 14
 #define ARCH_TIMER_NS_EL2_IRQ 10
 
+#define VIRTUAL_PMU_IRQ 7
+
+#define PPI(irq) ((irq) + 16)
+
 enum {
 VIRT_FLASH,
 VIRT_MEM,
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 0e18f15..90c2c54 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -523,4 +523,5 @@ int kvm_set_one_reg(CPUState *cs, uint64_t id, void 
*source);
  * Returns: 0 on success, or a negative errno on failure.
  */
 int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target);
+void kvm_arm_pmu_create(CPUState *cs, int irq);
 #endif
diff --git a/stubs/kvm.c b/stubs/kvm.c
index ddd6204..58a348a 100644
--- a/stubs/kvm.c
+++ b/stubs/kvm.c
@@ -6,3 +6,8 @@ int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 {
 return 0;
 }
+
+void kvm_arm_pmu_create(CPUState *cs, int irq)
+{
+return;
+}
diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
index b364789..faec4fa 100644
--- a/target-arm/kvm64.c
+++ b/target-arm/kvm64.c
@@ -382,6 +382,45 @@ static CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, 
target_ulong addr)
 return NULL;
 }
 
+static bool kvm_arm_pmu_support_ctrl(CPUState *cs, struct kvm_device_attr 
*attr)
+{
+return kvm_vcpu_ioctl(cs, KVM_HAS_DEVICE_ATTR, attr) == 0;
+}
+
+void kvm_arm_pmu_create(CPUState *cs, int irq)
+{
+int err;
+
+struct kvm_device_attr attr = {
+.group = KVM_ARM_VCPU_PMU_V3_CTRL,
+.addr = (intptr_t),
+.attr = KVM_ARM_VCPU_PMU_V3_IRQ,
+.flags = 0,
+};
+
+if (!kvm_arm_pmu_support_ctrl(cs, )) {
+return;
+}
+
+err = kvm_vcpu_ioctl(cs, KVM_SET_DEVICE_ATTR, );
+if (err < 0) {
+fprintf(stderr, "KVM_{SET/GET}_DEVICE_ATTR failed: %s\n",
+strerror(-err));
+abort();
+}
+
+attr.group = KVM_ARM_VCPU_PMU_V3_CTRL;
+attr.attr = KVM_ARM_VCPU_PMU_V3_INIT;
+attr.addr = 0;
+attr.flags = 0;
+
+err = kvm_vcpu_ioctl(cs, KVM_SET_DEVICE_ATTR, );
+if (err < 0) {
+fprintf(stderr, "KVM_{SET/GET}_DEVICE_ATTR failed: %s\n",
+strerror(-err));
+abort();
+}
+}
 
 static inline void set_feature(uint64_t *features, int feature)
 {
-- 
2.0.4





[Qemu-devel] [PATCH v6 4/5] ACPI: move acpi_build_srat_memory to common place

2016-04-23 Thread Shannon Zhao
From: Shannon Zhao 

Move acpi_build_srat_memory to common place so that it could be reused
by ARM.

Cc: Michael S. Tsirkin 
Cc: Igor Mammedov 
Signed-off-by: Shannon Zhao 
---
 hw/acpi/aml-build.c | 12 
 hw/i386/acpi-build.c| 20 
 include/hw/acpi/aml-build.h | 10 ++
 3 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index ab89ca6..d167003 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -1563,3 +1563,15 @@ build_rsdt(GArray *table_data, GArray *linker, GArray 
*table_offsets,
 build_header(linker, table_data,
  (void *)rsdt, "RSDT", rsdt_len, 1, oem_id, oem_table_id);
 }
+
+void acpi_build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base,
+uint64_t len, int node, MemoryAffinityFlags flags)
+{
+numamem->type = ACPI_SRAT_MEMORY;
+numamem->length = sizeof(*numamem);
+memset(numamem->proximity, 0, 4);
+numamem->proximity[0] = node;
+numamem->flags = cpu_to_le32(flags);
+numamem->base_addr = cpu_to_le64(base);
+numamem->range_length = cpu_to_le64(len);
+}
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 9ae4c0d..cd93825 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2427,26 +2427,6 @@ build_tpm2(GArray *table_data, GArray *linker)
  (void *)tpm2_ptr, "TPM2", sizeof(*tpm2_ptr), 4, NULL, NULL);
 }
 
-typedef enum {
-MEM_AFFINITY_NOFLAGS  = 0,
-MEM_AFFINITY_ENABLED  = (1 << 0),
-MEM_AFFINITY_HOTPLUGGABLE = (1 << 1),
-MEM_AFFINITY_NON_VOLATILE = (1 << 2),
-} MemoryAffinityFlags;
-
-static void
-acpi_build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base,
-   uint64_t len, int node, MemoryAffinityFlags flags)
-{
-numamem->type = ACPI_SRAT_MEMORY;
-numamem->length = sizeof(*numamem);
-memset(numamem->proximity, 0, 4);
-numamem->proximity[0] = node;
-numamem->flags = cpu_to_le32(flags);
-numamem->base_addr = cpu_to_le64(base);
-numamem->range_length = cpu_to_le64(len);
-}
-
 static void
 build_srat(GArray *table_data, GArray *linker, MachineState *machine)
 {
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index 2c994b3..d8f9fca 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -198,6 +198,13 @@ typedef enum {
 AML_PULL_NONE = 3,
 } AmlPinConfig;
 
+typedef enum {
+MEM_AFFINITY_NOFLAGS  = 0,
+MEM_AFFINITY_ENABLED  = (1 << 0),
+MEM_AFFINITY_HOTPLUGGABLE = (1 << 1),
+MEM_AFFINITY_NON_VOLATILE = (1 << 2),
+} MemoryAffinityFlags;
+
 typedef
 struct AcpiBuildTables {
 GArray *table_data;
@@ -372,4 +379,7 @@ int
 build_append_named_dword(GArray *array, const char *name_format, ...)
 GCC_FMT_ATTR(2, 3);
 
+void acpi_build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base,
+uint64_t len, int node, MemoryAffinityFlags flags);
+
 #endif
-- 
2.0.4





[Qemu-devel] [PATCH v6 3/5] ACPI: Add GICC Affinity Structure

2016-04-23 Thread Shannon Zhao
From: Shannon Zhao 

Cc: Michael S. Tsirkin 
Cc: Igor Mammedov 
Signed-off-by: Shannon Zhao 
Reviewed-by: Andrew Jones 
---
 hw/i386/acpi-build.c|  2 +-
 include/hw/acpi/acpi-defs.h | 15 ++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 6477003..9ae4c0d 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2474,7 +2474,7 @@ build_srat(GArray *table_data, GArray *linker, 
MachineState *machine)
 int apic_id = apic_ids->cpus[i].arch_id;
 
 core = acpi_data_push(table_data, sizeof *core);
-core->type = ACPI_SRAT_PROCESSOR;
+core->type = ACPI_SRAT_PROCESSOR_APIC;
 core->length = sizeof(*core);
 core->local_apic_id = apic_id;
 curnode = pcms->node_cpu[apic_id];
diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h
index c7a03d4..bcf5c3f 100644
--- a/include/hw/acpi/acpi-defs.h
+++ b/include/hw/acpi/acpi-defs.h
@@ -455,8 +455,10 @@ struct AcpiSystemResourceAffinityTable
 } QEMU_PACKED;
 typedef struct AcpiSystemResourceAffinityTable AcpiSystemResourceAffinityTable;
 
-#define ACPI_SRAT_PROCESSOR  0
+#define ACPI_SRAT_PROCESSOR_APIC 0
 #define ACPI_SRAT_MEMORY 1
+#define ACPI_SRAT_PROCESSOR_x2APIC   2
+#define ACPI_SRAT_PROCESSOR_GICC 3
 
 struct AcpiSratProcessorAffinity
 {
@@ -483,6 +485,17 @@ struct AcpiSratMemoryAffinity
 } QEMU_PACKED;
 typedef struct AcpiSratMemoryAffinity AcpiSratMemoryAffinity;
 
+struct AcpiSratProcessorGiccAffinity
+{
+ACPI_SUB_HEADER_DEF
+uint32_tproximity;
+uint32_tacpi_processor_uid;
+uint32_tflags;
+uint32_tclock_domain;
+} QEMU_PACKED;
+
+typedef struct AcpiSratProcessorGiccAffinity AcpiSratProcessorGiccAffinity;
+
 /* PCI fw r3.0 MCFG table. */
 /* Subtable */
 struct AcpiMcfgAllocation {
-- 
2.0.4





[Qemu-devel] [PATCH v6 1/5] ARM: Virt: Set numa-node-id for CPUs

2016-04-23 Thread Shannon Zhao
From: Shannon Zhao 

Add a numa-node-id property to specify NUMA information for CPUs.

Signed-off-by: Shannon Zhao 
Reviewed-by: Andrew Jones 
---
 hw/arm/virt.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 56d35c7..fe6b11d 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -38,6 +38,7 @@
 #include "net/net.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/device_tree.h"
+#include "sysemu/numa.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/kvm.h"
 #include "hw/boards.h"
@@ -329,6 +330,7 @@ static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
 {
 int cpu;
 int addr_cells = 1;
+unsigned int i;
 
 /*
  * From Documentation/devicetree/bindings/arm/cpus.txt
@@ -378,6 +380,12 @@ static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
   armcpu->mp_affinity);
 }
 
+for (i = 0; i < nb_numa_nodes; i++) {
+if (test_bit(cpu, numa_info[i].node_cpu)) {
+qemu_fdt_setprop_cell(vbi->fdt, nodename, "numa-node-id", i);
+}
+}
+
 g_free(nodename);
 }
 }
-- 
2.0.4





[Qemu-devel] [PATCH v6 2/5] ARM: Add numa-node-id for /memory node

2016-04-23 Thread Shannon Zhao
From: Shannon Zhao 

When specifying NUMA for ARM machine, generate /memory node according to
NUMA topology.

Signed-off-by: Shannon Zhao 
---
 hw/arm/boot.c | 43 +--
 1 file changed, 37 insertions(+), 6 deletions(-)

diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 5975fbf..cbc65a7 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -14,6 +14,7 @@
 #include "hw/arm/linux-boot-if.h"
 #include "sysemu/kvm.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/numa.h"
 #include "hw/boards.h"
 #include "hw/loader.h"
 #include "elf.h"
@@ -405,6 +406,9 @@ static int load_dtb(hwaddr addr, const struct arm_boot_info 
*binfo,
 void *fdt = NULL;
 int size, rc;
 uint32_t acells, scells;
+char *nodename;
+unsigned int i;
+hwaddr mem_base, mem_len;
 
 if (binfo->dtb_filename) {
 char *filename;
@@ -456,12 +460,39 @@ static int load_dtb(hwaddr addr, const struct 
arm_boot_info *binfo,
 goto fail;
 }
 
-rc = qemu_fdt_setprop_sized_cells(fdt, "/memory", "reg",
-  acells, binfo->loader_start,
-  scells, binfo->ram_size);
-if (rc < 0) {
-fprintf(stderr, "couldn't set /memory/reg\n");
-goto fail;
+if (nb_numa_nodes > 0) {
+/*
+ * Turn the /memory node created before into a NOP node, then create
+ * /memory@addr nodes for all numa nodes respectively.
+ */
+qemu_fdt_nop_node(fdt, "/memory");
+mem_base = binfo->loader_start;
+for (i = 0; i < nb_numa_nodes; i++) {
+mem_len = numa_info[i].node_mem;
+nodename = g_strdup_printf("/memory@%" PRIx64, mem_base);
+qemu_fdt_add_subnode(fdt, nodename);
+qemu_fdt_setprop_string(fdt, nodename, "device_type", "memory");
+rc = qemu_fdt_setprop_sized_cells(fdt, nodename, "reg",
+  acells, mem_base,
+  scells, mem_len);
+if (rc < 0) {
+fprintf(stderr, "couldn't set %s/reg for node %d\n", nodename,
+i);
+goto fail;
+}
+
+qemu_fdt_setprop_cell(fdt, nodename, "numa-node-id", i);
+mem_base += mem_len;
+g_free(nodename);
+}
+} else {
+rc = qemu_fdt_setprop_sized_cells(fdt, "/memory", "reg",
+  acells, binfo->loader_start,
+  scells, binfo->ram_size);
+if (rc < 0) {
+fprintf(stderr, "couldn't set /memory/reg\n");
+goto fail;
+}
 }
 
 if (binfo->kernel_cmdline && *binfo->kernel_cmdline) {
-- 
2.0.4





[Qemu-devel] [PATCH v6 0/5] ARM: Add NUMA support for machine virt

2016-04-23 Thread Shannon Zhao
From: Shannon Zhao 

Add NUMA support for machine virt. Tested successfully running a guest
Linux kernel with the following patch applied:

- [PATCH v16 0/6] arm64, numa: Add numa support for arm64 platforms
https://lkml.org/lkml/2016/4/8/571
- [PATCH v5 00/14] ACPI NUMA support for ARM64
https://lkml.org/lkml/2016/4/19/852

Example qemu command line:
qemu-system-aarch64 \
-enable-kvm -smp 4\
-kernel Image \
-m 512 -machine virt,kernel_irqchip=on \
-initrd guestfs.cpio.gz \
-cpu host -nographic \
-numa node,mem=256M,cpus=0-1,nodeid=0 \
-numa node,mem=256M,cpus=2-3,nodeid=1 \
-append "console=ttyAMA0 root=/dev/ram"

Changes since v5:
* don't generate /distance-map node since it's optional
* improve the /memory node name
* move acpi_build_srat_memory to common place then reuse it to generate
  SRAT table

Changes since v4:
* rebased on new kernel driver and device bindings, especially the
  compatible string "numa-distance-map-v1" of /distance-map node
* set the numa-node-id for first /memory node

Changes since v3:
* based on new kernel driver and device bindings
* add ACPI part

Changes since v2:
* update to use NUMA node property arm,associativity.

Changes since v1:
Take into account Peter's comments:
* rename virt_memory_init to arm_generate_memory_dtb
* move arm_generate_memory_dtb to boot.c and make it a common func
* use a struct numa_map to generate numa dtb

Shannon Zhao (5):
  ARM: Virt: Set numa-node-id for CPUs
  ARM: Add numa-node-id for /memory node
  ACPI: Add GICC Affinity Structure
  ACPI: move acpi_build_srat_memory to common place
  ACPI: Virt: Generate SRAT table

 hw/acpi/aml-build.c | 12 +++
 hw/arm/boot.c   | 43 +++--
 hw/arm/virt-acpi-build.c| 52 +
 hw/arm/virt.c   |  8 +++
 hw/i386/acpi-build.c| 22 +--
 include/hw/acpi/acpi-defs.h | 15 -
 include/hw/acpi/aml-build.h | 10 +
 7 files changed, 134 insertions(+), 28 deletions(-)

-- 
2.0.4





[Qemu-devel] [PATCH v6 5/5] ACPI: Virt: Generate SRAT table

2016-04-23 Thread Shannon Zhao
From: Shannon Zhao 

To support NUMA, it needs to generate SRAT ACPI table.

Signed-off-by: Shannon Zhao 
---
 hw/arm/virt-acpi-build.c | 52 
 1 file changed, 52 insertions(+)

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index f51fe39..e0e90d4 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -43,6 +43,7 @@
 #include "hw/acpi/aml-build.h"
 #include "hw/pci/pcie_host.h"
 #include "hw/pci/pci.h"
+#include "sysemu/numa.h"
 
 #define ARM_SPI_BASE 32
 #define ACPI_POWER_BUTTON_DEVICE "PWRB"
@@ -414,6 +415,52 @@ build_spcr(GArray *table_data, GArray *linker, 
VirtGuestInfo *guest_info)
 }
 
 static void
+build_srat(GArray *table_data, GArray *linker, VirtGuestInfo *guest_info)
+{
+AcpiSystemResourceAffinityTable *srat;
+AcpiSratProcessorGiccAffinity *core;
+AcpiSratMemoryAffinity *numamem;
+int i, j, srat_start;
+uint64_t mem_base;
+uint32_t *cpu_node = g_malloc0(guest_info->smp_cpus * sizeof(uint32_t));
+
+for (i = 0; i < guest_info->smp_cpus; i++) {
+for (j = 0; j < nb_numa_nodes; j++) {
+if (test_bit(i, numa_info[j].node_cpu)) {
+cpu_node[i] = j;
+break;
+}
+}
+}
+
+srat_start = table_data->len;
+srat = acpi_data_push(table_data, sizeof(*srat));
+srat->reserved1 = cpu_to_le32(1);
+
+for (i = 0; i < guest_info->smp_cpus; ++i) {
+core = acpi_data_push(table_data, sizeof(*core));
+core->type = ACPI_SRAT_PROCESSOR_GICC;
+core->length = sizeof(*core);
+core->proximity = cpu_to_le32(cpu_node[i]);
+core->acpi_processor_uid = cpu_to_le32(i);
+core->flags = cpu_to_le32(1);
+}
+g_free(cpu_node);
+
+mem_base = guest_info->memmap[VIRT_MEM].base;
+for (i = 0; i < nb_numa_nodes; ++i) {
+numamem = acpi_data_push(table_data, sizeof(*numamem));
+acpi_build_srat_memory(numamem, mem_base, numa_info[i].node_mem, i,
+   MEM_AFFINITY_ENABLED);
+mem_base += numa_info[i].node_mem;
+}
+
+build_header(linker, table_data,
+ (void *)(table_data->data + srat_start), "SRAT",
+ table_data->len - srat_start, 3, NULL, NULL);
+}
+
+static void
 build_mcfg(GArray *table_data, GArray *linker, VirtGuestInfo *guest_info)
 {
 AcpiTableMcfg *mcfg;
@@ -638,6 +685,11 @@ void virt_acpi_build(VirtGuestInfo *guest_info, 
AcpiBuildTables *tables)
 acpi_add_table(table_offsets, tables_blob);
 build_spcr(tables_blob, tables->linker, guest_info);
 
+if (nb_numa_nodes > 0) {
+acpi_add_table(table_offsets, tables_blob);
+build_srat(tables_blob, tables->linker, guest_info);
+}
+
 /* RSDT is pointed to by RSDP */
 rsdt = tables_blob->len;
 build_rsdt(tables_blob, tables->linker, table_offsets, NULL, NULL);
-- 
2.0.4





[Qemu-devel] [PATCH 1/2] drive-backup: added support for data compression

2016-04-23 Thread Denis V. Lunev
From: Pavel Butsykin 

The idea is simple - backup is "written-once" data. It is written block
by block and it is large enough. It would be nice to save storage
space and compress it.

The patch adds a flag to the qmp/hmp drive-backup command which enables
block compression. Compression should be implemented in the format driver
to enable this feature.

There are some limitations of the format driver to allow compressed writes.
We can write data only once. Though for backup this is perfectly fine.
These limitations are maintained by the driver and the error will be
reported if we are doing something wrong.

Signed-off-by: Pavel Butsykin 
Signed-off-by: Denis V. Lunev 
CC: Jeff Cody 
CC: Markus Armbruster 
CC: Eric Blake 
CC: John Snow 
---
 block/backup.c| 13 +
 blockdev.c| 12 ++--
 hmp-commands.hx   |  8 +---
 hmp.c |  3 ++-
 include/block/block_int.h |  1 +
 qapi/block-core.json  |  2 +-
 qmp-commands.hx   |  4 +++-
 7 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 491fd14..cad0439 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -47,6 +47,7 @@ typedef struct BackupBlockJob {
 uint64_t sectors_read;
 unsigned long *done_bitmap;
 int64_t cluster_size;
+bool compress;
 QLIST_HEAD(, CowRequest) inflight_reqs;
 } BackupBlockJob;
 
@@ -157,6 +158,10 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
 ret = bdrv_co_write_zeroes(job->target,
start * sectors_per_cluster,
n, BDRV_REQ_MAY_UNMAP);
+} else if (job->compress) {
+ret = bdrv_write_compressed(job->target,
+start * sectors_per_cluster,
+iov.iov_base, n);
 } else {
 ret = bdrv_co_writev(job->target,
  start * sectors_per_cluster, n,
@@ -497,6 +502,7 @@ static void coroutine_fn backup_run(void *opaque)
 void backup_start(BlockDriverState *bs, BlockDriverState *target,
   int64_t speed, MirrorSyncMode sync_mode,
   BdrvDirtyBitmap *sync_bitmap,
+  bool compress,
   BlockdevOnError on_source_error,
   BlockdevOnError on_target_error,
   BlockCompletionFunc *cb, void *opaque,
@@ -534,6 +540,12 @@ void backup_start(BlockDriverState *bs, BlockDriverState 
*target,
 return;
 }
 
+if (compress && target->drv->bdrv_write_compressed == NULL) {
+error_setg(errp, "Compression is not supported for this drive %s",
+   bdrv_get_device_name(target));
+return;
+}
+
 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
 return;
 }
@@ -580,6 +592,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState 
*target,
 job->sync_mode = sync_mode;
 job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
sync_bitmap : NULL;
+job->compress = compress;
 
 /* If there is no backing file on the target, we cannot rely on COW if our
  * backup cluster size is smaller than the target cluster size. Even for
diff --git a/blockdev.c b/blockdev.c
index f1f520a..ef72f19 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1856,6 +1856,7 @@ static void do_drive_backup(const char *device, const 
char *target,
 bool has_mode, enum NewImageMode mode,
 bool has_speed, int64_t speed,
 bool has_bitmap, const char *bitmap,
+bool has_compress, bool compress,
 bool has_on_source_error,
 BlockdevOnError on_source_error,
 bool has_on_target_error,
@@ -1896,6 +1897,7 @@ static void drive_backup_prepare(BlkActionState *common, 
Error **errp)
 backup->has_mode, backup->mode,
 backup->has_speed, backup->speed,
 backup->has_bitmap, backup->bitmap,
+backup->has_compress, backup->compress,
 backup->has_on_source_error, backup->on_source_error,
 backup->has_on_target_error, backup->on_target_error,
 common->block_job_txn, _err);
@@ -3170,6 +3172,7 @@ static void do_drive_backup(const char *device, const 
char *target,
 bool has_mode, enum NewImageMode mode,
 bool has_speed, int64_t speed,
 bool has_bitmap, const char *bitmap,
+bool has_compress, bool compress,
 bool 

[Qemu-devel] [PATCH for 2.7 0/2] backup compression

2016-04-23 Thread Denis V. Lunev
The idea is simple - backup is "written-once" data. It is written block
by block and it is large enough. It would be nice to save storage
space and compress it.

These patches add the ability to compress data during backup. This
functionality is implemented by means of adding options to the qmp/hmp
commands(drive-backup, blockdev-backup). The implementation is quite
simple, because the responsibility for data compression imposed on the
format driver.

Signed-off-by: Pavel Butsykin 
Signed-off-by: Denis V. Lunev 
CC: Jeff Cody 
CC: Markus Armbruster 
CC: Eric Blake 
CC: John Snow 

Pavel Butsykin (2):
  drive-backup: added support for data compression
  blockdev-backup: added support for data compression

 block/backup.c| 13 +
 blockdev.c| 20 ++--
 hmp-commands.hx   |  8 +---
 hmp.c |  3 ++-
 include/block/block_int.h |  1 +
 qapi/block-core.json  |  3 ++-
 qmp-commands.hx   |  7 +--
 7 files changed, 46 insertions(+), 9 deletions(-)

-- 
2.1.4




[Qemu-devel] [PATCH 2/2] blockdev-backup: added support for data compression

2016-04-23 Thread Denis V. Lunev
From: Pavel Butsykin 

The idea is simple - backup is "written-once" data. It is written block
by block and it is large enough. It would be nice to save storage
space and compress it.

Signed-off-by: Pavel Butsykin 
Signed-off-by: Denis V. Lunev 
CC: Jeff Cody 
CC: Markus Armbruster 
CC: Eric Blake 
CC: John Snow 
---
 blockdev.c   | 10 +-
 qapi/block-core.json |  1 +
 qmp-commands.hx  |  3 ++-
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index ef72f19..353c1c8 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1940,6 +1940,7 @@ typedef struct BlockdevBackupState {
 static void do_blockdev_backup(const char *device, const char *target,
enum MirrorSyncMode sync,
bool has_speed, int64_t speed,
+   bool has_compress, bool compress,
bool has_on_source_error,
BlockdevOnError on_source_error,
bool has_on_target_error,
@@ -1987,6 +1988,7 @@ static void blockdev_backup_prepare(BlkActionState 
*common, Error **errp)
 do_blockdev_backup(backup->device, backup->target,
backup->sync,
backup->has_speed, backup->speed,
+   backup->has_compress, backup->compress,
backup->has_on_source_error, backup->on_source_error,
backup->has_on_target_error, backup->on_target_error,
common->block_job_txn, _err);
@@ -3335,6 +3337,7 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(Error 
**errp)
 void do_blockdev_backup(const char *device, const char *target,
  enum MirrorSyncMode sync,
  bool has_speed, int64_t speed,
+ bool has_compress, bool compress,
  bool has_on_source_error,
  BlockdevOnError on_source_error,
  bool has_on_target_error,
@@ -3356,6 +3359,9 @@ void do_blockdev_backup(const char *device, const char 
*target,
 if (!has_on_target_error) {
 on_target_error = BLOCKDEV_ON_ERROR_REPORT;
 }
+if (!has_compress) {
+compress = false;
+}
 
 blk = blk_by_name(device);
 if (!blk) {
@@ -3386,7 +3392,7 @@ void do_blockdev_backup(const char *device, const char 
*target,
 
 bdrv_ref(target_bs);
 bdrv_set_aio_context(target_bs, aio_context);
-backup_start(bs, target_bs, speed, sync, NULL, false, on_source_error,
+backup_start(bs, target_bs, speed, sync, NULL, compress, on_source_error,
  on_target_error, block_job_cb, bs, txn, _err);
 if (local_err != NULL) {
 bdrv_unref(target_bs);
@@ -3399,6 +3405,7 @@ out:
 void qmp_blockdev_backup(const char *device, const char *target,
  enum MirrorSyncMode sync,
  bool has_speed, int64_t speed,
+ bool has_compress, bool compress,
  bool has_on_source_error,
  BlockdevOnError on_source_error,
  bool has_on_target_error,
@@ -3406,6 +3413,7 @@ void qmp_blockdev_backup(const char *device, const char 
*target,
  Error **errp)
 {
 do_blockdev_backup(device, target, sync, has_speed, speed,
+   has_compress, compress,
has_on_source_error, on_source_error,
has_on_target_error, on_target_error,
NULL, errp);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index ebedf0d..8e47e57 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -941,6 +941,7 @@
   'data': { 'device': 'str', 'target': 'str',
 'sync': 'MirrorSyncMode',
 '*speed': 'int',
+'*compress': 'bool',
 '*on-source-error': 'BlockdevOnError',
 '*on-target-error': 'BlockdevOnError' } }
 
diff --git a/qmp-commands.hx b/qmp-commands.hx
index ce36518..ff9e491 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1241,7 +1241,7 @@ EQMP
 
 {
 .name   = "blockdev-backup",
-.args_type  = "sync:s,device:B,target:B,speed:i?,"
+.args_type  = "sync:s,device:B,target:B,speed:i?,compress:b?,"
   "on-source-error:s?,on-target-error:s?",
 .mhandler.cmd_new = qmp_marshal_blockdev_backup,
 },
@@ -1263,6 +1263,7 @@ Arguments:
   sectors allocated in the topmost image, or "none" to only replicate
   new I/O (MirrorSyncMode).
 - "speed": the maximum speed, in bytes per second (json-int, optional)
+- "compress": compress data blocks (if the target format supports it).
 - "on-source-error": the action to 

[Qemu-devel] emulation details of qemu

2016-04-23 Thread tutu sky
Hi everybody.
I want to know that is it possible to access registers or micro-architectural 
part of a core/cpu in qemu during run time?
if it is not possible, how we can hotplug a core in this emulator?

thanks a lot.


Re: [Qemu-devel] [PATCH v3 21/44] block: Switch blk_write_zeroes() to byte interface

2016-04-23 Thread Denis V. Lunev

On 04/23/2016 02:40 AM, Eric Blake wrote:

Sector-based blk_write() should die; convert the one-off
variant blk_write_zeroes().

Signed-off-by: Eric Blake 
---
  include/sysemu/block-backend.h | 4 ++--
  block/block-backend.c  | 8 
  block/parallels.c  | 3 ++-
  qemu-img.c | 3 ++-
  4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 662a106..1246699 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -96,8 +96,8 @@ int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, 
uint8_t *buf,
int count);
  int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
int nb_sectors);
-int blk_write_zeroes(BlockBackend *blk, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags);
+int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags);
  BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num,
   int nb_sectors, BdrvRequestFlags flags,
   BlockCompletionFunc *cb, void *opaque);
diff --git a/block/block-backend.c b/block/block-backend.c
index 5513b6f..ae08bd2 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -816,11 +816,11 @@ int blk_write(BlockBackend *blk, int64_t sector_num, 
const uint8_t *buf,
blk_write_entry, 0);
  }

-int blk_write_zeroes(BlockBackend *blk, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags)
+int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+  int count, BdrvRequestFlags flags)
  {
-return blk_rw(blk, sector_num, NULL, nb_sectors, blk_write_entry,
-  flags | BDRV_REQ_ZERO_WRITE);
+return blk_prw(blk, offset, NULL, count, blk_write_entry,
+   flags | BDRV_REQ_ZERO_WRITE);
  }

  static void error_callback_bh(void *opaque)
diff --git a/block/parallels.c b/block/parallels.c
index 2d8bc87..95bfc32 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -516,7 +516,8 @@ static int parallels_create(const char *filename, QemuOpts 
*opts, Error **errp)
  if (ret < 0) {
  goto exit;
  }
-ret = blk_write_zeroes(file, 1, bat_sectors - 1, 0);
+ret = blk_pwrite_zeroes(file, BDRV_SECTOR_SIZE,
+(bat_sectors - 1) << BDRV_SECTOR_BITS, 0);
  if (ret < 0) {
  goto exit;
  }
diff --git a/qemu-img.c b/qemu-img.c
index 2e4646e..376107c 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1601,7 +1601,8 @@ static int convert_write(ImgConvertState *s, int64_t 
sector_num, int nb_sectors,
  if (s->has_zero_init) {
  break;
  }
-ret = blk_write_zeroes(s->target, sector_num, n, 0);
+ret = blk_pwrite_zeroes(s->target, sector_num << BDRV_SECTOR_BITS,
+n << BDRV_SECTOR_BITS, 0);
  if (ret < 0) {
  return ret;
  }

Acked-by: Denis V. Lunev 



Re: [Qemu-devel] [PATCH v3 09/44] block: Allow BDRV_REQ_FUA through blk_pwrite()

2016-04-23 Thread Denis V. Lunev

On 04/23/2016 02:40 AM, Eric Blake wrote:

We have several block drivers that understand BDRV_REQ_FUA,
and emulate it in the block layer for the rest by a full flush.
But without a way to actually request BDRV_REQ_FUA during a
pass-through blk_pwrite(), FUA-aware block drivers like NBD are
forced to repeat the emulation logic of a full flush regardless
of whether the backend they are writing to could do it more
efficiently.

This patch just wires up a flags argument; a followup patch
will actually make use of it in the NBD driver and in qemu-io.

Signed-off-by: Eric Blake 
---
  include/sysemu/block-backend.h |  3 ++-
  block/block-backend.c  |  6 --
  block/crypto.c |  2 +-
  block/parallels.c  |  2 +-
  block/qcow.c   |  8 
  block/qcow2.c  |  4 ++--
  block/qed.c|  6 +++---
  block/sheepdog.c   |  2 +-
  block/vdi.c|  4 ++--
  block/vhdx.c   |  5 +++--
  block/vmdk.c   | 10 +-
  block/vpc.c| 10 +-
  hw/nvram/spapr_nvram.c |  4 ++--
  nbd/server.c   |  2 +-
  qemu-io-cmds.c |  2 +-
  15 files changed, 37 insertions(+), 33 deletions(-)

diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index c62b6fe..6991b26 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -102,7 +102,8 @@ BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t 
sector_num,
   int nb_sectors, BdrvRequestFlags flags,
   BlockCompletionFunc *cb, void *opaque);
  int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count);
-int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count);
+int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
+   BdrvRequestFlags flags);
  int64_t blk_getlength(BlockBackend *blk);
  void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr);
  int64_t blk_nb_sectors(BlockBackend *blk);
diff --git a/block/block-backend.c b/block/block-backend.c
index 16c9d5e..4551865 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -955,9 +955,11 @@ int blk_pread(BlockBackend *blk, int64_t offset, void 
*buf, int count)
  return count;
  }

-int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count)
+int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
+   BdrvRequestFlags flags)
  {
-int ret = blk_prw(blk, offset, (void*) buf, count, blk_write_entry, 0);
+int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
+  flags);
  if (ret < 0) {
  return ret;
  }
diff --git a/block/crypto.c b/block/crypto.c
index 1903e84..32ba17c 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -91,7 +91,7 @@ static ssize_t block_crypto_write_func(QCryptoBlock *block,
  struct BlockCryptoCreateData *data = opaque;
  ssize_t ret;

-ret = blk_pwrite(data->blk, offset, buf, buflen);
+ret = blk_pwrite(data->blk, offset, buf, buflen, 0);
  if (ret < 0) {
  error_setg_errno(errp, -ret, "Could not write encryption header");
  return ret;
diff --git a/block/parallels.c b/block/parallels.c
index 324ed43..2d8bc87 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -512,7 +512,7 @@ static int parallels_create(const char *filename, QemuOpts 
*opts, Error **errp)
  memset(tmp, 0, sizeof(tmp));
  memcpy(tmp, , sizeof(header));

-ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE);
+ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE, 0);
  if (ret < 0) {
  goto exit;
  }
diff --git a/block/qcow.c b/block/qcow.c
index 60ddb12..d6dc1b0 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -853,14 +853,14 @@ static int qcow_create(const char *filename, QemuOpts 
*opts, Error **errp)
  }

  /* write all the data */
-ret = blk_pwrite(qcow_blk, 0, , sizeof(header));
+ret = blk_pwrite(qcow_blk, 0, , sizeof(header), 0);
  if (ret != sizeof(header)) {
  goto exit;
  }

  if (backing_file) {
  ret = blk_pwrite(qcow_blk, sizeof(header),
-backing_file, backing_filename_len);
+ backing_file, backing_filename_len, 0);
  if (ret != backing_filename_len) {
  goto exit;
  }
@@ -869,8 +869,8 @@ static int qcow_create(const char *filename, QemuOpts 
*opts, Error **errp)
  tmp = g_malloc0(BDRV_SECTOR_SIZE);
  for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
  BDRV_SECTOR_SIZE); i++) {
-ret = blk_pwrite(qcow_blk, header_size +
-BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
+ret = blk_pwrite(qcow_blk, header_size + BDRV_SECTOR_SIZE * i,
+ tmp, 

Re: [Qemu-devel] [PATCH v5 3/5] ARM: Add numa-node-id for /memory node

2016-04-23 Thread Shannon Zhao


On 2016/4/23 15:45, Andrew Jones wrote:
 @@ -456,14 +460,39 @@ static int load_dtb(hwaddr addr, const struct 
 arm_boot_info *binfo,
 > >>  goto fail;
 > >>  }
 > >>  
 > >> +mem_len = (nb_numa_nodes > 0) ? numa_info[0].node_mem : 
 > >> binfo->ram_size;
 > >>  rc = qemu_fdt_setprop_sized_cells(fdt, "/memory", "reg",
>>> > > 
>>> > > So node0's memory node will still be called '/memory' instead of
>>> > > '/memory@addr' like the other nodes? Shouldn't we change it too?
>>> > > 
>> > Previously I deleted the /memory node creation codes in virt.c and
>> > create here, but that will cause other boards booting fail since
>> > load_dtb() is a common function. So to avoid more changes to other
>> > files, I just use current way. So is there any way to change the node
>> > name after it's created in qemu?
> I'm not sure if that's possible, but we could maybe use qemu_fdt_nop_node
> to turn /memory into a NOP node, and then add a new one?
This would be a good solution, I think. I'll update it using
qemu_fdt_nop_node.

Thanks,
-- 
Shannon




Re: [Qemu-devel] [PATCH 2/3] hw/arm/virt: Add PMU node for virt machine

2016-04-23 Thread Andrew Jones
On Sat, Apr 23, 2016 at 09:01:01AM +0800, Shannon Zhao wrote:
> 
> 
> On 2016/4/22 22:32, Andrew Jones wrote:
> > On Fri, Mar 25, 2016 at 05:46:20PM +0800, Shannon Zhao wrote:
> >> From: Shannon Zhao 
> >>
> >> Add a virtual PMU device for virt machine while use PPI 7 for PMU
> >> overflow interrupt number.
> >>
> >> Signed-off-by: Shannon Zhao 
> >> ---
> >>  hw/arm/virt.c | 31 +++
> >>  include/hw/arm/virt.h |  2 ++
> >>  include/sysemu/kvm.h  |  1 +
> >>  stubs/kvm.c   |  5 +
> >>  target-arm/kvm64.c| 51 
> >> +++
> >>  5 files changed, 90 insertions(+)
> >>
> >> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> >> index 95331a5..94c2beb 100644
> >> --- a/hw/arm/virt.c
> >> +++ b/hw/arm/virt.c
> >> @@ -427,6 +427,35 @@ static void fdt_add_gic_node(VirtBoardInfo *vbi, int 
> >> type)
> >>  qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", vbi->gic_phandle);
> >>  }
> >>  
> >> +static void fdt_add_pmu_nodes(const VirtBoardInfo *vbi)
> >> +{
> >> +CPUState *cpu;
> >> +ARMCPU *armcpu;
> >> +uint32_t irqflags = GIC_FDT_IRQ_FLAGS_LEVEL_HI;
> >> +
> >> +CPU_FOREACH(cpu) {
> >> +armcpu = ARM_CPU(cpu);
> >> +  if (!armcpu->has_pmu) {
> >> +  return;
> > 
> > funny indentation here
> > 
> >> +  }
> >> +
> >> +kvm_arm_pmu_create(cpu, VIRTUAL_PMU_IRQ + 16);
> > 
> > I think we should have a PPI(irq) ((irq) + 16) type of macro.
> > 
> >> +}
> >> +
> >> +irqflags = deposit32(irqflags, GIC_FDT_IRQ_PPI_CPU_START,
> >> + GIC_FDT_IRQ_PPI_CPU_WIDTH, (1 << vbi->smp_cpus) 
> >> - 1);
> >> +
> >> +armcpu = ARM_CPU(qemu_get_cpu(0));
> >> +qemu_fdt_add_subnode(vbi->fdt, "/pmu");
> >> +if (arm_feature(>env, ARM_FEATURE_V8)) {
> >> +const char compat[] = "arm,armv8-pmuv3";
> >> +qemu_fdt_setprop(vbi->fdt, "/pmu", "compatible",
> >> + compat, sizeof(compat));
> >> +qemu_fdt_setprop_cells(vbi->fdt, "/pmu", "interrupts",
> >> +   GIC_FDT_IRQ_TYPE_PPI, VIRTUAL_PMU_IRQ, 
> >> irqflags);
> >> +}
> > 
> > else what? I guess it's not possible to have has_pmu and !ARM_FEATURE_V8
> > at the same time right now, but it seems strange to create a /pmu node,
> > but then only conditionally populate it.
> > 
> Yeah, currently kvm only supports guest PMU for ARMv8, but maybe in the
> future it will support ARMv7.
> 
> >> +}
> >> +
> >>  static void create_v2m(VirtBoardInfo *vbi, qemu_irq *pic)
> >>  {
> >>  int i;
> >> @@ -1242,6 +1271,8 @@ static void machvirt_init(MachineState *machine)
> >>  
> >>  create_gic(vbi, pic, gic_version, vms->secure);
> >>  
> >> +fdt_add_pmu_nodes(vbi);
> >> +
> >>  create_uart(vbi, pic, VIRT_UART, sysmem);
> >>  
> >>  if (vms->secure) {
> >> diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
> >> index ecd8589..864eb49 100644
> >> --- a/include/hw/arm/virt.h
> >> +++ b/include/hw/arm/virt.h
> >> @@ -40,6 +40,8 @@
> >>  #define ARCH_TIMER_NS_EL1_IRQ 14
> >>  #define ARCH_TIMER_NS_EL2_IRQ 10
> >>  
> >> +#define VIRTUAL_PMU_IRQ 7
> > 
> > Can we find a way to make this configurable? a cpu property?
> > 
> Of course we can. But as we are the maker of the virt machine board, we
> can decide the design of the hardware. In addition, what's the purpose
> for making it configurable?

Yeah, nevermind. I can't think of any good reason right now. I was only
thinking about it because your KVM interface allows for either SPI or
PPI. But, even considering that, I guess we still don't need to allow
the number(s) to be configurable, just the type. For SPI we'd need to
reserve a range of numbers though, since each cpu needs their own.

> 
> >> +
> >>  enum {
> >>  VIRT_FLASH,
> >>  VIRT_MEM,
> >> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
> >> index 6695fa7..80b6cb3 100644
> >> --- a/include/sysemu/kvm.h
> >> +++ b/include/sysemu/kvm.h
> >> @@ -514,4 +514,5 @@ int kvm_set_one_reg(CPUState *cs, uint64_t id, void 
> >> *source);
> >>   * Returns: 0 on success, or a negative errno on failure.
> >>   */
> >>  int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target);
> >> +void kvm_arm_pmu_create(CPUState *cs, int irq);
> >>  #endif
> >> diff --git a/stubs/kvm.c b/stubs/kvm.c
> >> index ddd6204..58a348a 100644
> >> --- a/stubs/kvm.c
> >> +++ b/stubs/kvm.c
> >> @@ -6,3 +6,8 @@ int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
> >>  {
> >>  return 0;
> >>  }
> >> +
> >> +void kvm_arm_pmu_create(CPUState *cs, int irq)
> >> +{
> >> +return;
> >> +}
> >> diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
> >> index b364789..b97b9ef 100644
> >> --- a/target-arm/kvm64.c
> >> +++ b/target-arm/kvm64.c
> >> @@ -382,6 +382,57 @@ static CPUWatchpoint *find_hw_watchpoint(CPUState 
> >> *cpu, target_ulong addr)
> >>  return NULL;
> >>  }
> >>  
> >> +static bool 

Re: [Qemu-devel] [PATCH v5 3/5] ARM: Add numa-node-id for /memory node

2016-04-23 Thread Andrew Jones
On Sat, Apr 23, 2016 at 09:16:11AM +0800, Shannon Zhao wrote:
> 
> 
> On 2016/4/22 20:48, Andrew Jones wrote:
> > On Thu, Apr 21, 2016 at 02:23:52PM +0800, Shannon Zhao wrote:
> >> From: Shannon Zhao 
> >>
> >> When specifying NUMA for ARM machine, generate /memory node according to
> >> NUMA topology.
> >>
> >> Signed-off-by: Shannon Zhao 
> >> ---
> >>  hw/arm/boot.c | 31 ++-
> >>  1 file changed, 30 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/hw/arm/boot.c b/hw/arm/boot.c
> >> index 5975fbf..3770235 100644
> >> --- a/hw/arm/boot.c
> >> +++ b/hw/arm/boot.c
> >> @@ -14,6 +14,7 @@
> >>  #include "hw/arm/linux-boot-if.h"
> >>  #include "sysemu/kvm.h"
> >>  #include "sysemu/sysemu.h"
> >> +#include "sysemu/numa.h"
> >>  #include "hw/boards.h"
> >>  #include "hw/loader.h"
> >>  #include "elf.h"
> >> @@ -405,6 +406,9 @@ static int load_dtb(hwaddr addr, const struct 
> >> arm_boot_info *binfo,
> >>  void *fdt = NULL;
> >>  int size, rc;
> >>  uint32_t acells, scells;
> >> +char *nodename;
> >> +unsigned int i;
> >> +hwaddr mem_base, mem_len;
> >>  
> >>  if (binfo->dtb_filename) {
> >>  char *filename;
> >> @@ -456,14 +460,39 @@ static int load_dtb(hwaddr addr, const struct 
> >> arm_boot_info *binfo,
> >>  goto fail;
> >>  }
> >>  
> >> +mem_len = (nb_numa_nodes > 0) ? numa_info[0].node_mem : 
> >> binfo->ram_size;
> >>  rc = qemu_fdt_setprop_sized_cells(fdt, "/memory", "reg",
> > 
> > So node0's memory node will still be called '/memory' instead of
> > '/memory@addr' like the other nodes? Shouldn't we change it too?
> > 
> Previously I deleted the /memory node creation codes in virt.c and
> create here, but that will cause other boards booting fail since
> load_dtb() is a common function. So to avoid more changes to other
> files, I just use current way. So is there any way to change the node
> name after it's created in qemu?

I'm not sure if that's possible, but we could maybe use qemu_fdt_nop_node
to turn /memory into a NOP node, and then add a new one?

drew



Re: [Qemu-devel] [PATCH v5 1/5] ARM: Virt: Add /distance-map node for NUMA

2016-04-23 Thread Shannon Zhao


On 2016/4/23 15:03, Andrew Jones wrote:
> On Sat, Apr 23, 2016 at 09:17:25AM +0800, Shannon Zhao wrote:
>> > 
>> > 
>> > On 2016/4/22 20:25, Andrew Jones wrote:
>>> > > On Thu, Apr 21, 2016 at 02:23:50PM +0800, Shannon Zhao wrote:
> > >> > From: Shannon Zhao 
> > >> > 
> > >> > This /distance-map node is used to describe the accessing distance
> > >> > between NUMA nodes.
> > >> > 
> > >> > Signed-off-by: Shannon Zhao 
> > >> > ---
> > >> >  hw/arm/virt.c | 30 ++
> > >> >  1 file changed, 30 insertions(+)
> > >> > 
> > >> > diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> > >> > index 56d35c7..814a1eb 100644
> > >> > --- a/hw/arm/virt.c
> > >> > +++ b/hw/arm/virt.c
> > >> > @@ -40,6 +40,7 @@
> > >> >  #include "sysemu/device_tree.h"
> > >> >  #include "sysemu/sysemu.h"
> > >> >  #include "sysemu/kvm.h"
> > >> > +#include "sysemu/numa.h"
> > >> >  #include "hw/boards.h"
> > >> >  #include "hw/loader.h"
> > >> >  #include "exec/address-spaces.h"
> > >> > @@ -203,6 +204,9 @@ static VirtBoardInfo *find_machine_info(const 
> > >> > char *cpu)
> > >> >  
> > >> >  static void create_fdt(VirtBoardInfo *vbi)
> > >> >  {
> > >> > +unsigned int i, j, number, count;
>>> > > s/count/index/ ?
>>> > > 
> > >> > +uint64_t *matrix;
> > >> > +
> > >> >  void *fdt = create_device_tree(>fdt_size);
> > >> >  
> > >> >  if (!fdt) {
> > >> > @@ -239,6 +243,32 @@ static void create_fdt(VirtBoardInfo *vbi)
> > >> >  "clk24mhz");
> > >> >  qemu_fdt_setprop_cell(fdt, "/apb-pclk", "phandle", 
> > >> > vbi->clock_phandle);
> > >> >  
> > >> > +if (nb_numa_nodes <= 0) {
> > >> > +return;
> > >> > +}
> > >> > +
> > >> > +/* Add /distance-map node for NUMA */
> > >> > +qemu_fdt_add_subnode(fdt, "/distance-map");
> > >> > +qemu_fdt_setprop_string(fdt, "/distance-map", "compatible",
> > >> > +"numa-distance-map-v1");
> > >> > +
> > >> > +number = nb_numa_nodes * nb_numa_nodes * 6;
> > >> > +matrix = g_malloc0(number * sizeof(uint64_t));
> > >> > +for (i = 0; i < nb_numa_nodes; i++) {
> > >> > +for (j = 0; j < nb_numa_nodes; j++) {
> > >> > +count = (i * nb_numa_nodes + j) * 6;
> > >> > +matrix[count++] = 1;
> > >> > +matrix[count++] = i;
> > >> > +matrix[count++] = 1;
> > >> > +matrix[count++] = j;
> > >> > +matrix[count++] = 1;
> > >> > +matrix[count++] = (i == j) ? 10 : 20;
> > >> > +}
> > >> > +}
> > >> > +qemu_fdt_setprop_sized_cells_from_array(fdt, "/distance-map",
> > >> > +"distance-matrix", 
> > >> > number / 2,
> > >> > +matrix);
>>> > > I had to read qemu_fdt_setprop_sized_cells_from_array to understand why
>>> > > above we're using 6 instead of 3, and then placing all the 1's in every
>>> > > other slot, and then dividing number by 2 here. Is using this function
>>> > > worth the confusion?
>>> > > 
>>> > > I think the following would greatly improve reviewability, and shave off
>>> > > a bit of boot time (by not having to alloc more mem and copy the 
>>> > > matrix).
>>> > > 
>>> > > uint32_t *matrix;
>>> > > 
>>> > > number = nb_numa_nodes * nb_numa_nodes * 3;
>>> > > matrix = g_malloc0(number * sizeof(uint32_t));
>>> > > for (i = 0; i < nb_numa_nodes; i++) {
>>> > > for (j = 0; j < nb_numa_nodes; j++) {
>>> > > count = (i * nb_numa_nodes + j) * 3;
>>> > > matrix[count++] = cpu_to_be32(i);
>>> > > matrix[count++] = cpu_to_be32(j);
>>> > > matrix[count++] = cpu_to_be32(i == j ? 10 : 20);
> I noticed that /distance-map is an optional node by the latest version
> of the spec. In its absence default values will be used. Do we plan on
> putting anything other then the 10s and 20s here? If not, then we can
> leave it to Linux to determine what the defaults should be, and it'll
> use them by itself if we leave this node out.
> 
Agree. I thought this before. It could not provide this /distance-map
node as well as it doesn't provide SLIT table for ACPI. So I'll drop
this patch.

Thanks,
-- 
Shannon




Re: [Qemu-devel] [PATCH v5 1/5] ARM: Virt: Add /distance-map node for NUMA

2016-04-23 Thread Andrew Jones
On Sat, Apr 23, 2016 at 09:17:25AM +0800, Shannon Zhao wrote:
> 
> 
> On 2016/4/22 20:25, Andrew Jones wrote:
> > On Thu, Apr 21, 2016 at 02:23:50PM +0800, Shannon Zhao wrote:
> >> > From: Shannon Zhao 
> >> > 
> >> > This /distance-map node is used to describe the accessing distance
> >> > between NUMA nodes.
> >> > 
> >> > Signed-off-by: Shannon Zhao 
> >> > ---
> >> >  hw/arm/virt.c | 30 ++
> >> >  1 file changed, 30 insertions(+)
> >> > 
> >> > diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> >> > index 56d35c7..814a1eb 100644
> >> > --- a/hw/arm/virt.c
> >> > +++ b/hw/arm/virt.c
> >> > @@ -40,6 +40,7 @@
> >> >  #include "sysemu/device_tree.h"
> >> >  #include "sysemu/sysemu.h"
> >> >  #include "sysemu/kvm.h"
> >> > +#include "sysemu/numa.h"
> >> >  #include "hw/boards.h"
> >> >  #include "hw/loader.h"
> >> >  #include "exec/address-spaces.h"
> >> > @@ -203,6 +204,9 @@ static VirtBoardInfo *find_machine_info(const char 
> >> > *cpu)
> >> >  
> >> >  static void create_fdt(VirtBoardInfo *vbi)
> >> >  {
> >> > +unsigned int i, j, number, count;
> > s/count/index/ ?
> > 
> >> > +uint64_t *matrix;
> >> > +
> >> >  void *fdt = create_device_tree(>fdt_size);
> >> >  
> >> >  if (!fdt) {
> >> > @@ -239,6 +243,32 @@ static void create_fdt(VirtBoardInfo *vbi)
> >> >  "clk24mhz");
> >> >  qemu_fdt_setprop_cell(fdt, "/apb-pclk", "phandle", 
> >> > vbi->clock_phandle);
> >> >  
> >> > +if (nb_numa_nodes <= 0) {
> >> > +return;
> >> > +}
> >> > +
> >> > +/* Add /distance-map node for NUMA */
> >> > +qemu_fdt_add_subnode(fdt, "/distance-map");
> >> > +qemu_fdt_setprop_string(fdt, "/distance-map", "compatible",
> >> > +"numa-distance-map-v1");
> >> > +
> >> > +number = nb_numa_nodes * nb_numa_nodes * 6;
> >> > +matrix = g_malloc0(number * sizeof(uint64_t));
> >> > +for (i = 0; i < nb_numa_nodes; i++) {
> >> > +for (j = 0; j < nb_numa_nodes; j++) {
> >> > +count = (i * nb_numa_nodes + j) * 6;
> >> > +matrix[count++] = 1;
> >> > +matrix[count++] = i;
> >> > +matrix[count++] = 1;
> >> > +matrix[count++] = j;
> >> > +matrix[count++] = 1;
> >> > +matrix[count++] = (i == j) ? 10 : 20;
> >> > +}
> >> > +}
> >> > +qemu_fdt_setprop_sized_cells_from_array(fdt, "/distance-map",
> >> > +"distance-matrix", number / 
> >> > 2,
> >> > +matrix);
> > I had to read qemu_fdt_setprop_sized_cells_from_array to understand why
> > above we're using 6 instead of 3, and then placing all the 1's in every
> > other slot, and then dividing number by 2 here. Is using this function
> > worth the confusion?
> > 
> > I think the following would greatly improve reviewability, and shave off
> > a bit of boot time (by not having to alloc more mem and copy the matrix).
> > 
> > uint32_t *matrix;
> > 
> > number = nb_numa_nodes * nb_numa_nodes * 3;
> > matrix = g_malloc0(number * sizeof(uint32_t));
> > for (i = 0; i < nb_numa_nodes; i++) {
> > for (j = 0; j < nb_numa_nodes; j++) {
> > count = (i * nb_numa_nodes + j) * 3;
> > matrix[count++] = cpu_to_be32(i);
> > matrix[count++] = cpu_to_be32(j);
> > matrix[count++] = cpu_to_be32(i == j ? 10 : 20);

I noticed that /distance-map is an optional node by the latest version
of the spec. In its absence default values will be used. Do we plan on
putting anything other then the 10s and 20s here? If not, then we can
leave it to Linux to determine what the defaults should be, and it'll
use them by itself if we leave this node out.

> > }
> > }
> > qemu_fdt_setprop(fdt, "/distance-map", "distance-matrix",
> >  matrix, number * sizeof(uint32_t));
> > 
> > 
> >> > +g_free(matrix);
> > Also, I think it would nicer if all this was put in its own function, and
> > then just add the following to create_fdt.
> > 
> >   if (nb_numa_nodes) {
> >   virt_fdt_create_distance_map(fdt);
> >   }
> > 
> Ok, will update this.

Thanks,
drew

> 
> Thanks,
> -- 
> Shannon
> 
> 



[Qemu-devel] [PULL 0/1] ppc-for-2.6 queue 20160423

2016-04-23 Thread David Gibson
The following changes since commit 53343338a6e7b83777b82803398572b40afc8c0f:

  Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging 
(2016-04-22 16:17:12 +0100)

are available in the git repository at:

  git://github.com/dgibson/qemu.git tags/ppc-for-2.6-20160423

for you to fetch changes up to da34fed707a3a3ffa229f4e724aea06da1b53fb0:

  hw/ppc/spapr: Fix crash when specifying bad parameters to 
spapr-pci-host-bridge (2016-04-23 16:52:20 +1000)


ppc patch queue for 2016-03-23

A single fix for a bug in parameter handling for the spapr PCI host
bridge.



Peter,

This is definitely a bug fix, but it's not a regression since 2.5.
Your judgement call as to whether to merge it this late in the 2.6
cycle.

Thomas Huth (1):
  hw/ppc/spapr: Fix crash when specifying bad parameters to 
spapr-pci-host-bridge

 hw/ppc/spapr.c | 9 -
 hw/ppc/spapr_pci.c | 3 +++
 2 files changed, 7 insertions(+), 5 deletions(-)



[Qemu-devel] [PULL 1/1] hw/ppc/spapr: Fix crash when specifying bad parameters to spapr-pci-host-bridge

2016-04-23 Thread David Gibson
From: Thomas Huth 

QEMU currently crashes when using bad parameters for the
spapr-pci-host-bridge device:

$ qemu-system-ppc64 -device 
spapr-pci-host-bridge,buid=0x123,liobn=0x321,mem_win_addr=0x1,io_win_addr=0x10
Segmentation fault

The problem is that spapr_tce_find_by_liobn() might return NULL, but
the code in spapr_populate_pci_dt() does not check for this condition
and then tries to dereference this NULL pointer.
Apart from that, the return value of spapr_populate_pci_dt() also
has to be checked for all PCI buses, not only for the last one, to
make sure we catch all errors.

Signed-off-by: Thomas Huth 
Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c | 9 -
 hw/ppc/spapr_pci.c | 3 +++
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index feaab08..b69995e 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -940,11 +940,10 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr,
 
 QLIST_FOREACH(phb, >phbs, list) {
 ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt);
-}
-
-if (ret < 0) {
-fprintf(stderr, "couldn't setup PCI devices in fdt\n");
-exit(1);
+if (ret < 0) {
+error_report("couldn't setup PCI devices in fdt");
+exit(1);
+}
 }
 
 /* RTAS */
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 8c20d34..573e635 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1816,6 +1816,9 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
  sizeof(interrupt_map)));
 
 tcet = spapr_tce_find_by_liobn(SPAPR_PCI_LIOBN(phb->index, 0));
+if (!tcet) {
+return -1;
+}
 spapr_dma_dt(fdt, bus_off, "ibm,dma-window",
  tcet->liobn, tcet->bus_offset,
  tcet->nb_table << tcet->page_shift);
-- 
2.5.5




[Qemu-devel] [PATCH v4 0/1] qemu-img: check block status of backing file when converting.

2016-04-23 Thread Ren Kimura
I've just changed last patch to new one that uses loop iteration instead of 
recursion. https://lists.gnu.org/archive/html/qemu-block/2016-04/msg00584.html

At first, a head of chain has checked it's status by bdrv_get_block_status in 
"convert_iteration_sectors".
If this status is not BDRV_BLOCK_DATA nor BDRV_BLOCK_ZERO, then check backing 
files by using new function, get_backing_status.
This function iterates backing files chain and return the status 
BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO or error(< 0) caused by bdrv_get_block_status.
When none of backing files has valid status(i.e. backing file which must have 
data doesn't exist), then return -1(< 0) and goto fail.




[Qemu-devel] [PATCH v4 1/1] qemu-img: check block status of backing file when converting.

2016-04-23 Thread Ren Kimura
When converting images, check the block status of its backing file chain
to avoid needlessly reading zeros.

Signed-off-by: Ren Kimura 
---
 qemu-img.c | 31 +--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/qemu-img.c b/qemu-img.c
index 06264d9..b771227 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1451,6 +1451,22 @@ static void convert_select_part(ImgConvertState *s, 
int64_t sector_num)
 }
 }
 
+static int64_t get_backing_status(BlockDriverState *bs,
+  int64_t sector_num,
+  int nb_sectors, int *pnum)
+{
+while (bs->backing) {
+int64_t ret;
+BlockDriverState *file;
+bs = bs->backing->bs;
+ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum, );
+if (ret < 0 || ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
+return ret;
+}
+}
+return -1;
+}
+
 static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
 {
 int64_t ret;
@@ -1477,10 +1493,21 @@ static int convert_iteration_sectors(ImgConvertState 
*s, int64_t sector_num)
 } else if (!s->target_has_backing) {
 /* Without a target backing file we must copy over the contents of
  * the backing file as well. */
-/* TODO Check block status of the backing file chain to avoid
+/* Check block status of the backing file chain to avoid
  * needlessly reading zeroes and limiting the iteration to the
  * buffer size */
-s->status = BLK_DATA;
+ret = get_backing_status(blk_bs(s->src[s->src_cur]),
+ sector_num - s->src_cur_offset,
+ n, );
+if (ret < 0) {
+return ret;
+}
+
+if (ret & BDRV_BLOCK_ZERO) {
+s->status = BLK_ZERO;
+} else {
+s->status = BLK_DATA;
+}
 } else {
 s->status = BLK_BACKING_FILE;
 }
-- 
2.5.0