Re: [Qemu-devel] Virtualbox svga card in KVM

2013-04-05 Thread Stefan Hajnoczi
On Thu, Mar 21, 2013 at 10:53:21AM -0400, Alon Levy wrote:
> >  I am planning on bringing in the virtualbox svga card into kvm
> >  as a new svga card type (vbox probably?) so that we can load
> >  the VirtualBox SVGA card drivers in the guest.

I'm curious if the vbox SVGA card has features that existing QEMU
graphics cards do not provide?

Stefan



[Qemu-devel] [PATCH v17] Add pvpanic device driver

2013-04-05 Thread Hu Tao
pvpanic device is used to notify host(qemu) when guest panic happens.

Signed-off-by: Paolo Bonzini 
Signed-off-by: Hu Tao 
---

No change from v16. qemu patches at:

http://lists.nongnu.org/archive/html/qemu-devel/2013-04/msg01028.html

 src/acpi.c|  3 +++
 src/ssdt-misc.dsl | 46 ++
 2 files changed, 49 insertions(+)

diff --git a/src/acpi.c b/src/acpi.c
index bc4d8ea..fe504f0 100644
--- a/src/acpi.c
+++ b/src/acpi.c
@@ -534,6 +534,9 @@ build_ssdt(void)
 ssdt_ptr[acpi_pci64_valid[0]] = 0;
 }
 
+int pvpanic_port = romfile_loadint("etc/pvpanic-port", 0x0);
+*(u16 *)(ssdt_ptr + *ssdt_isa_pest) = pvpanic_port;
+
 ssdt_ptr += sizeof(ssdp_misc_aml);
 
 // build Scope(_SB_) header
diff --git a/src/ssdt-misc.dsl b/src/ssdt-misc.dsl
index 679422b..acc850e 100644
--- a/src/ssdt-misc.dsl
+++ b/src/ssdt-misc.dsl
@@ -55,4 +55,50 @@ DefinitionBlock ("ssdt-misc.aml", "SSDT", 0x01, "BXPC", 
"BXSSDTSUSP", 0x1)
 Zero   /* reserved */
 })
 }
+
+External(\_SB.PCI0, DeviceObj)
+External(\_SB.PCI0.ISA, DeviceObj)
+
+Scope(\_SB.PCI0.ISA) {
+Device(PEVT) {
+Name(_HID, "QEMU0001")
+/* PEST will be patched to be Zero if no such device */
+ACPI_EXTRACT_NAME_WORD_CONST ssdt_isa_pest
+Name(PEST, 0x)
+OperationRegion(PEOR, SystemIO, PEST, 0x01)
+Field(PEOR, ByteAcc, NoLock, Preserve) {
+PEPT,   8,
+}
+
+Method(_STA, 0, NotSerialized) {
+Store(PEST, Local0)
+If (LEqual(Local0, Zero)) {
+Return (0x00)
+} Else {
+Return (0x0F)
+}
+}
+
+Method(RDPT, 0, NotSerialized) {
+Store(PEPT, Local0)
+Return (Local0)
+}
+
+Method(WRPT, 1, NotSerialized) {
+Store(Arg0, PEPT)
+}
+
+Name(_CRS, ResourceTemplate() {
+IO(Decode16, 0x00, 0x00, 0x01, 0x01, IO)
+})
+
+CreateWordField(_CRS, IO._MIN, IOMN)
+CreateWordField(_CRS, IO._MAX, IOMX)
+
+Method(_INI, 0, NotSerialized) {
+Store(PEST, IOMN)
+Store(PEST, IOMX)
+}
+}
+}
 }
-- 
1.8.1.4




Re: [Qemu-devel] (no subject)

2013-04-05 Thread Stefan Hajnoczi
On Tue, Apr 02, 2013 at 12:02:52PM -0400, Elizabeth Brown wrote:
> I was wondering if anyone could help me by setting up a wiki account for me?

Done.

Stefan



[Qemu-devel] [PATCH v17] pvpanic: pvpanic device driver

2013-04-05 Thread Hu Tao
pvpanic device is a qemu simulated device through which guest panic
event is sent to host.

Signed-off-by: Hu Tao 
---

ref: http://lists.nongnu.org/archive/html/qemu-devel/2013-04/msg01028.html

 drivers/platform/x86/Kconfig   |   7 +++
 drivers/platform/x86/Makefile  |   2 +
 drivers/platform/x86/pvpanic.c | 115 +
 3 files changed, 124 insertions(+)
 create mode 100644 drivers/platform/x86/pvpanic.c

diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 3338437..527ed04 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -781,4 +781,11 @@ config APPLE_GMUX
  graphics as well as the backlight. Currently only backlight
  control is supported by the driver.
 
+config PVPANIC
+   tristate "pvpanic device support"
+   depends on ACPI
+   ---help---
+ This driver provides support for pvpanic device, which is a qemu
+ simulated device through which guest panic event is sent to host.
+
 endif # X86_PLATFORM_DEVICES
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index ace2b38..ef0ec74 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -51,3 +51,5 @@ obj-$(CONFIG_INTEL_OAKTRAIL)  += intel_oaktrail.o
 obj-$(CONFIG_SAMSUNG_Q10)  += samsung-q10.o
 obj-$(CONFIG_APPLE_GMUX)   += apple-gmux.o
 obj-$(CONFIG_CHROMEOS_LAPTOP)  += chromeos_laptop.o
+
+obj-$(CONFIG_PVPANIC)   += pvpanic.o
diff --git a/drivers/platform/x86/pvpanic.c b/drivers/platform/x86/pvpanic.c
new file mode 100644
index 000..81c95ec
--- /dev/null
+++ b/drivers/platform/x86/pvpanic.c
@@ -0,0 +1,115 @@
+/*
+ *  pvpanic.c - pvpanic Device Support
+ *
+ *  Copyright (C) 2013 Fujitsu.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  
USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+MODULE_AUTHOR("Hu Tao ");
+MODULE_DESCRIPTION("pvpanic device driver");
+MODULE_LICENSE("GPL");
+
+static int pvpanic_add(struct acpi_device *device);
+static int pvpanic_remove(struct acpi_device *device);
+
+static const struct acpi_device_id pvpanic_device_ids[] = {
+   { "QEMU0001", 0},
+   { "", 0},
+};
+MODULE_DEVICE_TABLE(acpi, pvpanic_device_ids);
+
+#define PVPANIC_PANICKED   (1 << 0)
+
+static acpi_handle handle;
+
+static struct acpi_driver pvpanic_driver = {
+   .name = "pvpanic",
+   .class ="QEMU",
+   .ids =  pvpanic_device_ids,
+   .ops =  {
+   .add =  pvpanic_add,
+   .remove =   pvpanic_remove,
+   },
+   .owner =THIS_MODULE,
+};
+
+static void
+pvpanic_send_event(unsigned int event)
+{
+   union acpi_object arg;
+   struct acpi_object_list arg_list;
+
+   if (!handle)
+   return;
+
+   arg.type = ACPI_TYPE_INTEGER;
+   arg.integer.value = event;
+
+   arg_list.count = 1;
+   arg_list.pointer = &arg;
+
+   acpi_evaluate_object(handle, "WRPT", &arg_list, NULL);
+}
+
+static int
+pvpanic_panic_notify(struct notifier_block *nb, unsigned long code,
+void *unused)
+{
+   pvpanic_send_event(PVPANIC_PANICKED);
+   return NOTIFY_DONE;
+}
+
+static struct notifier_block pvpanic_panic_nb = {
+   .notifier_call = pvpanic_panic_notify,
+};
+
+static int pvpanic_add(struct acpi_device *device)
+{
+   acpi_status status;
+   u64 ret;
+
+   status = acpi_evaluate_integer(device->handle, "_STA", NULL,
+  &ret);
+
+   if (ACPI_FAILURE(status) || (ret & 0x0B) != 0x0B)
+   return -ENODEV;
+
+   handle = device->handle;
+   atomic_notifier_chain_register(&panic_notifier_list,
+  &pvpanic_panic_nb);
+
+   return 0;
+}
+
+static int pvpanic_remove(struct acpi_device *device)
+{
+
+   atomic_notifier_chain_unregister(&panic_notifier_list,
+&pvpanic_panic_nb);
+   handle = NULL;
+   return 0;
+}
+
+module_acpi_driver(pvpanic_driver);
-- 
1.8.1.4




Re: [Qemu-devel] [PATCH 20/24] xen: re-enable refresh interval reporting for xenfb

2013-04-05 Thread Gerd Hoffmann
>> -#else
>>  ; /* nothing */
>> -#endif
>>  } else {
>>  /* we don't get update notifications, thus use the
>>   * sledge hammer approach ... */
> 
> You might as well remove the if () nothing; case.

Yep, will do.

>> @@ -785,6 +755,20 @@ static void xenfb_update(void *opaque)
>>  xenfb->up_fullscreen = 0;
>>  }
>>  
>> +static void xenfb_update_interval(void *opaque, uint64_t interval)
>> +{
>> +struct XenFB *xenfb = opaque;
>> +
>> +if (xenfb->feature_update) {
>> +#ifdef XENFB_TYPE_REFRESH_PERIOD
>> +if (xenfb_queue_full(xenfb)) {
>> +return;
>> +}
>> +xenfb_send_refresh_period(xenfb, interval);
> 
> Shouldn't we be updating xenfb->refresh_period here? And shouldn't we
> call xenfb_send_refresh_period only if interval != 
> xenfb->refresh_period?

> On the other hand if refresh_period is not useful anymore, shouldn't
> we remove it from struct XenFB?

xenfb_update_interval is only called when interval changes, which I
think means we don't need refresh_period any more, correct?

cheers,
  Gerd




[Qemu-devel] [PATCH] qemu-char: eliminate busy waiting on can_read returning zero

2013-04-05 Thread Paolo Bonzini
The character backend refactoring introduced an undesirable busy wait.
The busy wait happens if can_read returns zero and there is data available
on the character device's file descriptor.  Then, the I/O watch will
fire continuously and, with TCG, the CPU thread will never run.

1) Char backend asks front end if it can write
2) Front end says no
3) poll() finds the char backend's descriptor is available
4) Goto (1)

What we really want is this (note that step 3 avoids the busy wait):

1) Char backend asks front end if it can write
2) Front end says no
3) poll() goes on without char backend's descriptor
4) Goto (1) until qemu_chr_accept_input() called

5) Char backend asks front end if it can write
6) Front end says yes
7) poll() finds the char backend's descriptor is available
8) Backend handler called

After this patch, the IOWatchPoll source and the watch source are
separated.  The IOWatchPoll is simply a hook that runs during the prepare
phase on each main loop iteration.  The hook adds/removes the actual
source depending on the return value from can_read.

A simple reproducer is

qemu-system-i386 -serial mon:stdio

... followed by banging on the terminal as much as you can. :)  Without
this patch, emulation will hang.

Signed-off-by: Paolo Bonzini 
---
This supersedes Peter's patch.

 qemu-char.c  | 64 
+---
 1 file changed, 21 insertions(+), 43 deletions(-)

diff --git a/qemu-char.c b/qemu-char.c
index e5eb8dd..d4239b5 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -594,65 +594,52 @@ int recv_all(int fd, void *_buf, int len1, bool 
single_read)
 
 typedef struct IOWatchPoll
 {
+GSource parent;
+
 GSource *src;
-int max_size;
+bool active;
 
 IOCanReadHandler *fd_can_read;
 void *opaque;
-
-QTAILQ_ENTRY(IOWatchPoll) node;
 } IOWatchPoll;
 
-static QTAILQ_HEAD(, IOWatchPoll) io_watch_poll_list =
-QTAILQ_HEAD_INITIALIZER(io_watch_poll_list);
-
 static IOWatchPoll *io_watch_poll_from_source(GSource *source)
 {
-IOWatchPoll *i;
-
-QTAILQ_FOREACH(i, &io_watch_poll_list, node) {
-if (i->src == source) {
-return i;
-}
-}
-
-return NULL;
+return container_of(source, IOWatchPoll, parent);
 }
 
 static gboolean io_watch_poll_prepare(GSource *source, gint *timeout_)
 {
 IOWatchPoll *iwp = io_watch_poll_from_source(source);
-
-iwp->max_size = iwp->fd_can_read(iwp->opaque);
-if (iwp->max_size == 0) {
+bool active = iwp->fd_can_read(iwp->opaque) > 0;
+if (iwp->active == active) {
 return FALSE;
 }
 
-return g_io_watch_funcs.prepare(source, timeout_);
+iwp->active = active;
+if (active) {
+g_source_attach(iwp->src, NULL);
+} else {
+g_source_remove(g_source_get_id(iwp->src));
+}
+return FALSE;
 }
 
 static gboolean io_watch_poll_check(GSource *source)
 {
-IOWatchPoll *iwp = io_watch_poll_from_source(source);
-
-if (iwp->max_size == 0) {
-return FALSE;
-}
-
-return g_io_watch_funcs.check(source);
+return FALSE;
 }
 
 static gboolean io_watch_poll_dispatch(GSource *source, GSourceFunc callback,
gpointer user_data)
 {
-return g_io_watch_funcs.dispatch(source, callback, user_data);
+abort();
 }
 
 static void io_watch_poll_finalize(GSource *source)
 {
 IOWatchPoll *iwp = io_watch_poll_from_source(source);
-QTAILQ_REMOVE(&io_watch_poll_list, iwp, node);
-g_io_watch_funcs.finalize(source);
+g_source_unref(iwp->src);
 }
 
 static GSourceFuncs io_watch_poll_funcs = {
@@ -669,24 +657,15 @@ static guint io_add_watch_poll(GIOChannel *channel,
gpointer user_data)
 {
 IOWatchPoll *iwp;
-GSource *src;
-guint tag;
-
-src = g_io_create_watch(channel, G_IO_IN | G_IO_ERR | G_IO_HUP);
-g_source_set_funcs(src, &io_watch_poll_funcs);
-g_source_set_callback(src, (GSourceFunc)fd_read, user_data, NULL);
-tag = g_source_attach(src, NULL);
-g_source_unref(src);
 
-iwp = g_malloc0(sizeof(*iwp));
-iwp->src = src;
-iwp->max_size = 0;
+iwp = (IOWatchPoll *) g_source_new(&io_watch_poll_funcs, 
sizeof(IOWatchPoll));
+iwp->active = FALSE;
 iwp->fd_can_read = fd_can_read;
 iwp->opaque = user_data;
+iwp->src = g_io_create_watch(channel, G_IO_IN | G_IO_ERR | G_IO_HUP);
+g_source_set_callback(iwp->src, (GSourceFunc)fd_read, user_data, NULL);
 
-QTAILQ_INSERT_HEAD(&io_watch_poll_list, iwp, node);
-
-return tag;
+return g_source_attach(&iwp->parent, NULL);
 }
 
 #ifndef _WIN32



Re: [Qemu-devel] [PATCH][RFC v2 1/7] hw/irq: move struct IRQState to irq.h

2013-04-05 Thread Peter Maydell
On 5 April 2013 05:28, liguang  wrote:
> define struct IRQState in irq.c bring in
> a annoying result, if you want dereference of
> IRQState's member like opaque outside of
> irq.c, compiler will complain:
> "error: dereferencing pointer to incomplete type"

No, this is deliberate -- it's an opaque type which should
only be used inside irq.c. If you think you need to dereference
it you're probably doing something wrong.

thanks
-- PMM



Re: [Qemu-devel] [PATCH][RFC v2 2/7] hw/power: add main power chip implementation

2013-04-05 Thread Peter Maydell
On 5 April 2013 05:28, liguang  wrote:
> here, we will handle power state transition between
> on, off, suspend, wakeup, we treat reset as power
> on then off, and power out pin will be connected to
> power in pin of devices, if we want a transition,
> we will trigger a power signal(qemu_irq), then all
> connected devices will be reply for this.

What actual hardware is this supposed to be modelling?
I don't believe there's such a thing as a 'generic
power controller' that makes sense for all architectures.

-- PMM



Re: [Qemu-devel] [PATCH][RFC v2 1/7] hw/irq: move struct IRQState to irq.h

2013-04-05 Thread li guang
在 2013-04-05五的 09:34 +0100,Peter Maydell写道:
> On 5 April 2013 05:28, liguang  wrote:
> > define struct IRQState in irq.c bring in
> > a annoying result, if you want dereference of
> > IRQState's member like opaque outside of
> > irq.c, compiler will complain:
> > "error: dereferencing pointer to incomplete type"
> 
> No, this is deliberate -- it's an opaque type which should
> only be used inside irq.c. If you think you need to dereference
> it you're probably doing something wrong.
> 

 Yes, you're right, it's only a suggestion for
some hacking conditions.




[Qemu-devel] [PATCH] [RFC] Wire up disabled wait a panicked event on s390

2013-04-05 Thread Christian Borntraeger
On s390 the disabled wait state indicates a state of attention.
For example Linux uses that state after a panic. Lets
put the system into panicked state.

An alternative implementation would be to state
disabled-wait  instead of pause in the action field.
(e.g. z/OS, z/VM and other classic OSes use the address of the
disabled wait to indicate an error code).

Signed-off-by: Christian Borntraeger 
---
 target-s390x/kvm.c | 17 ++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index 644f484..0c111f0 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -34,6 +34,8 @@
 #include "sysemu/kvm.h"
 #include "cpu.h"
 #include "sysemu/device_tree.h"
+#include "qapi/qmp/qjson.h"
+#include "monitor/monitor.h"
 
 /* #define DEBUG_KVM */
 
@@ -705,9 +707,18 @@ static int handle_intercept(S390CPU *cpu)
 r = handle_instruction(cpu, run);
 break;
 case ICPT_WAITPSW:
-if (s390_del_running_cpu(cpu) == 0 &&
-is_special_wait_psw(cs)) {
-qemu_system_shutdown_request();
+/* disabled wait, since enabled wait is handled in kernel */
+if (s390_del_running_cpu(cpu) == 0) {
+if (is_special_wait_psw(cs)) {
+qemu_system_shutdown_request();
+} else {
+QObject *data;
+
+data = qobject_from_jsonf("{ 'action': %s }", "pause");
+monitor_protocol_event(QEVENT_GUEST_PANICKED, data);
+qobject_decref(data);
+vm_stop(RUN_STATE_GUEST_PANICKED);
+}
 }
 r = EXCP_HALTED;
 break;




[Qemu-devel] [PATCH v2 0/5] Data Driven device registers & Zynq DEVCFG

2013-04-05 Thread Peter Crosthwaite
Hi All. This is a new scheme i've come up with handling device registers in a
data driven way. My motivation for this is to factor out a lot of the access
checking that seems to be replicated in every device. See P2 commit message for
further discussion.

P1 is a trivial addition to bitops.h
P2 is the main patch, adds the register definition functionality
P3 is an example new device (the Xilinx Zynq devcfg) that uses this scheme.
P4 adds devcfg to the Zynq machine model

This devcfg device was particularly finnicky with per-bit restrictions which
prompted all this. Im also looking for a higher-than-usual modelling fidelity
on the register space, with semantics defined for random reserved bits
in-between otherwise consistent fields.

Heres an example of the qemu_log output for the devcfg device. This is produced
by now generic sharable code:

/machine/unattached/device[44]:Addr 0x08:CFG: write of value 0508
/machine/unattached/device[44]:Addr 0x80:MCTRL: write of value 00800010
/machine/unattached/device[44]:Addr 0x10:INT_MASK: write of value 
/machine/unattached/device[44]:Addr :CTRL: write of value 0c00607f

And an example of a rogue guest banging on a bad bit:

/machine/unattached/device[44]:Addr 0x14:STATUS bits 0x01 may not be \
written to 1

Future work: Theres a lot of overlap here with what Peter did with the ARM
coprocessor definitions. We could go further and generalise ARM CP to use this
or some further evolution of it. That and converting existing models to this
scheme. Some device models will lose a lot of weight. Also integrate with
memory API.

Changed from v1:
Added ONES macro patch
Dropped bogus former patch 1 (PMM review)
Addressed Blue, Gerd and MST comments.
Simplified to be more Memory API compatible.
Added Memory API helpers.
Please see discussion already on list and commit msgs for more detail.


Peter A. G. Crosthwaite (2):
  xilinx_devcfg: Zynq devcfg device model
  xilinx_zynq: added devcfg to machine model

Peter Crosthwaite (3):
  bitops: Add ONES macro
  register: Add Register API
  register: Add Memory API glue

 Makefile.target |2 +-
 hw/arm/Makefile.objs|2 +-
 hw/arm/xilinx_zynq.c|8 +
 hw/xilinx_devcfg.c  |  489 +++
 include/exec/register.h |  147 ++
 include/qemu/bitops.h   |2 +
 register.c  |  207 
 7 files changed, 855 insertions(+), 2 deletions(-)
 create mode 100644 hw/xilinx_devcfg.c
 create mode 100644 include/exec/register.h
 create mode 100644 register.c




[Qemu-devel] [PATCH v2 5/5] xilinx_zynq: added devcfg to machine model

2013-04-05 Thread Peter Crosthwaite
From: Peter A. G. Crosthwaite 

Signed-off-by: Peter A. G. Crosthwaite 
---
Changed since v1:
Added manual parenting of devcfg node (evil but needed for early access
to canonical path by devcfgs realize fn).

 hw/arm/xilinx_zynq.c |8 
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c
index 6f36286..fb3a089 100644
--- a/hw/arm/xilinx_zynq.c
+++ b/hw/arm/xilinx_zynq.c
@@ -220,6 +220,14 @@ static void zynq_init(QEMUMachineInitArgs *args)
 sysbus_connect_irq(busdev, n + 1, pic[dma_irqs[n] - IRQ_OFFSET]);
 }
 
+dev = qdev_create(NULL, "xlnx.ps7-dev-cfg");
+object_property_add_child(qdev_get_machine(), "xilinx-devcfg", OBJECT(dev),
+  NULL);
+qdev_init_nofail(dev);
+busdev = SYS_BUS_DEVICE(dev);
+sysbus_connect_irq(busdev, 0, pic[40-IRQ_OFFSET]);
+sysbus_mmio_map(busdev, 0, 0xF8007000);
+
 zynq_binfo.ram_size = ram_size;
 zynq_binfo.kernel_filename = kernel_filename;
 zynq_binfo.kernel_cmdline = kernel_cmdline;
-- 
1.7.0.4




Re: [Qemu-devel] [PATCH v2 1/5] bitops: Add ONES macro

2013-04-05 Thread Peter Maydell
On 5 April 2013 09:43, Peter Crosthwaite  wrote:
> Little macro that just gives you N ones (justified to LSB).
>
> Signed-off-by: Peter Crosthwaite 
> ---
>
>  include/qemu/bitops.h |2 ++
>  1 files changed, 2 insertions(+), 0 deletions(-)
>
> diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
> index affcc96..da47fc8 100644
> --- a/include/qemu/bitops.h
> +++ b/include/qemu/bitops.h
> @@ -273,4 +273,6 @@ static inline uint64_t deposit64(uint64_t value, int 
> start, int length,
>  return (value & ~mask) | ((fieldval << start) & mask);
>  }
>
> +#define ONES(num) ((num) == 64 ? ~0ull : (1ull << (num)) - 1)

You can avoid the ?: here (assuming you're happy to say that
ONES(0) is a silly thing to ask for):

#define ONES(num) (~0ULL >> (64 - (num)))

Needs a documentation comment, anyway.

thanks
-- PMM



[Qemu-devel] [PATCH v2 3/5] register: Add Memory API glue

2013-04-05 Thread Peter Crosthwaite
Add memory io handlers that glue the register API to the memory API.
Just translation functions at this stage. Although it does allow for
devices to be created without all-in-one mmio r/w handlers.

Signed-off-by: Peter Crosthwaite 
---

 include/exec/register.h |   13 +
 register.c  |   43 +++
 2 files changed, 56 insertions(+), 0 deletions(-)

diff --git a/include/exec/register.h b/include/exec/register.h
index 0b05439..f30c98e 100644
--- a/include/exec/register.h
+++ b/include/exec/register.h
@@ -13,6 +13,7 @@
 
 #include 
 #include 
+#include "exec/memory.h"
 
 typedef struct RegisterInfo RegisterInfo;
 typedef struct RegisterAccessInfo RegisterAccessInfo;
@@ -92,6 +93,8 @@ struct RegisterAccessInfo {
  * @prefix: String prefix for log and debug messages
  *
  * @opaque: Opaque data for the register
+ *
+ * @mem: optional Memory region for the register
  */
 
 struct RegisterInfo {
@@ -105,6 +108,8 @@ struct RegisterInfo {
 const char *prefix;
 
 void *opaque;
+
+MemoryRegion mem;
 };
 
 /**
@@ -131,4 +136,12 @@ uint64_t register_read(RegisterInfo *reg);
 
 void register_reset(RegisterInfo *reg);
 
+void register_write_memory_be(void *opaque, hwaddr addr, uint64_t value,
+  unsigned size);
+void register_write_memory_le(void *opaque, hwaddr addr, uint64_t value,
+  unsigned size);
+
+uint64_t register_read_memory_be(void *opaque, hwaddr addr, unsigned size);
+uint64_t register_read_memory_le(void *opaque, hwaddr addr, unsigned size);
+
 #endif
diff --git a/register.c b/register.c
index 439f2f2..7b7b6df 100644
--- a/register.c
+++ b/register.c
@@ -162,3 +162,46 @@ void register_reset(RegisterInfo *reg)
 
 register_write_val(reg, reg->access->reset);
 }
+
+static inline void register_write_memory(void *opaque, hwaddr addr,
+ uint64_t value, unsigned size, bool 
be)
+{
+RegisterInfo *reg = opaque;
+uint64_t we = (size == 8) ? ~0ull : (1ull << size * 8) - 1;
+int shift = 8 * (be ? reg->data_size - size - addr : addr);
+
+assert(size + addr <= reg->data_size);
+register_write(reg, value << shift, we << shift);
+}
+
+void register_write_memory_be(void *opaque, hwaddr addr, uint64_t value,
+  unsigned size)
+{
+register_write_memory(opaque, addr, value, size, true);
+}
+
+
+void register_write_memory_le(void *opaque, hwaddr addr, uint64_t value,
+  unsigned size)
+{
+register_write_memory(opaque, addr, value, size, false);
+}
+
+static inline uint64_t register_read_memory(void *opaque, hwaddr addr,
+unsigned size, bool be)
+{
+RegisterInfo *reg = opaque;
+int shift = 8 * (be ? reg->data_size - size - addr : addr);
+
+return register_read(reg) >> shift;
+}
+
+uint64_t register_read_memory_be(void *opaque, hwaddr addr, unsigned size)
+{
+return register_read_memory(opaque, addr, size, true);
+}
+
+uint64_t register_read_memory_le(void *opaque, hwaddr addr, unsigned size)
+{
+return register_read_memory(opaque, addr, size, false);
+}
-- 
1.7.0.4




[Qemu-devel] [PATCH v2 1/5] bitops: Add ONES macro

2013-04-05 Thread Peter Crosthwaite
Little macro that just gives you N ones (justified to LSB).

Signed-off-by: Peter Crosthwaite 
---

 include/qemu/bitops.h |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
index affcc96..da47fc8 100644
--- a/include/qemu/bitops.h
+++ b/include/qemu/bitops.h
@@ -273,4 +273,6 @@ static inline uint64_t deposit64(uint64_t value, int start, 
int length,
 return (value & ~mask) | ((fieldval << start) & mask);
 }
 
+#define ONES(num) ((num) == 64 ? ~0ull : (1ull << (num)) - 1)
+
 #endif
-- 
1.7.0.4




[Qemu-devel]  Qemu-devel mailing list submissions

2013-04-05 Thread Haitham Zuriq





Sent from Samsung tablet

Re: [Qemu-devel] [PATCH][RFC v2 2/7] hw/power: add main power chip implementation

2013-04-05 Thread li guang
在 2013-04-05五的 09:35 +0100,Peter Maydell写道:
> On 5 April 2013 05:28, liguang  wrote:
> > here, we will handle power state transition between
> > on, off, suspend, wakeup, we treat reset as power
> > on then off, and power out pin will be connected to
> > power in pin of devices, if we want a transition,
> > we will trigger a power signal(qemu_irq), then all
> > connected devices will be reply for this.
> 
> What actual hardware is this supposed to be modelling?
> I don't believe there's such a thing as a 'generic
> power controller' that makes sense for all architectures.
> 
 
unfortunately, I'm considering it as generic and abstract.
I think we have no need to model a specific type of power
controller, because what we are doing is to construct a 
power controlling process, not to realize a hardware platform's
device for special purpose.
Yes, you can say not all devices are happy to do with power
management, but I would say, I'm not going to force every
devices to do that.





[Qemu-devel] [PATCH v2 4/5] xilinx_devcfg: Zynq devcfg device model

2013-04-05 Thread Peter Crosthwaite
From: Peter A. G. Crosthwaite 

Minimal device model for devcfg module of Zynq. DMA capabilities and
interrupt generation supported.

Signed-off-by: Peter A. G. Crosthwaite 
---
Changed since v1:
Rebased against new version of Register API.
Use action callbacks for side effects rather than switch.
Documented reasons for ge0, ge1 (Verbatim from TRM)
Added ui1 definitions for unimplemented major features
Removed dead lock code

 hw/arm/Makefile.objs |2 +-
 hw/xilinx_devcfg.c   |  489 ++
 2 files changed, 490 insertions(+), 1 deletions(-)
 create mode 100644 hw/xilinx_devcfg.c

diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs
index f5f7d0e..29c2bd2 100644
--- a/hw/arm/Makefile.objs
+++ b/hw/arm/Makefile.objs
@@ -1,4 +1,4 @@
-obj-y += zynq_slcr.o
+obj-y += zynq_slcr.o xilinx_devcfg.o
 obj-y += xilinx_spips.o
 obj-y += arm_gic.o arm_gic_common.o
 obj-y += a9scu.o
diff --git a/hw/xilinx_devcfg.c b/hw/xilinx_devcfg.c
new file mode 100644
index 000..c4d7632
--- /dev/null
+++ b/hw/xilinx_devcfg.c
@@ -0,0 +1,489 @@
+/*
+ * QEMU model of the Xilinx Devcfg Interface
+ *
+ * Copyright (c) 2011 Peter A.G. Crosthwaite (peter.crosthwa...@petalogix.com)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "sysemu/sysemu.h"
+#include "sysemu/dma.h"
+#include "sysbus.h"
+#include "ptimer.h"
+#include "qemu/bitops.h"
+#include "exec/register.h"
+#include "qemu/bitops.h"
+
+#define TYPE_XILINX_DEVCFG "xlnx.ps7-dev-cfg"
+
+#define XILINX_DEVCFG(obj) \
+OBJECT_CHECK(XilinxDevcfg, (obj), TYPE_XILINX_DEVCFG)
+
+/* FIXME: get rid of hardcoded nastiness */
+
+#define FREQ_HZ 9
+
+#define BTT_MAX 0x400 /* bytes to transfer per delay inteval */
+#define CYCLES_BTT_MAX 1 /*approximate 10k cycles per delay interval */
+
+#ifndef XILINX_DEVCFG_ERR_DEBUG
+#define XILINX_DEVCFG_ERR_DEBUG 0
+#endif
+#define DB_PRINT(...) do { \
+if (XILINX_DEVCFG_ERR_DEBUG) { \
+fprintf(stderr,  ": %s: ", __func__); \
+fprintf(stderr, ## __VA_ARGS__); \
+} \
+} while (0);
+
+#define R_CTRL(0x00/4)
+#define FORCE_RST(1 << 31) /* Not supported, writes ignored */
+#define PCAP_PR  (1 << 27) /* Forced to 0 on bad unlock */
+#define PCAP_MODE(1 << 26)
+#define MULTIBOOT_EN (1 << 24)
+#define USER_MODE(1 << 15)
+#define PCFG_AES_FUSE(1 << 12) /* locked by AES_FUSE_LOCK */
+#define PCFG_AES_EN_SHIFT9 /* locked by AES_EN_LOCK */
+#define PCFG_AES_EN_LEN  3 /* locked by AES_EN_LOCK */
+#define PCFG_AES_EN_MASK (((1 << PCFG_AES_EN_LEN) - 1) \
+<< PCFG_AES_EN_SHIFT)
+#define SEU_EN   (1 << 8) /* locked by SEU_LOCK */
+#define SEC_EN   (1 << 7) /* locked by SEC_LOCK */
+#define SPNIDEN  (1 << 6) /* locked by DBG_LOCK */
+#define SPIDEN   (1 << 5) /* locked by DBG_LOCK */
+#define NIDEN(1 << 4) /* locked by DBG_LOCK */
+#define DBGEN(1 << 3) /* locked by DBG_LOCK */
+#define DAP_EN   (7 << 0) /* locked by DBG_LOCK */
+
+#define R_LOCK  (0x04/4)
+#define AES_FUSE_LOCK4
+#define AES_EN_LOCK  3
+#define SEU_LOCK 2
+#define SEC_LOCK 1
+#define DBG_LOCK 0
+
+/* mapping bits in R_LOCK to what they lock in R_CTRL */
+static const uint32_t lock_ctrl_map[] = {
+[AES_FUSE_LOCK] = PCFG_AES_FUSE,
+[AES_EN_LOCK] = PCFG_AES_EN_MASK,
+[SEU_LOCK] = SEU_LOCK,
+[SEC_LOCK] = SEC_LOCK,
+[DBG_LOCK] =  SPNIDEN | SPIDEN | NIDEN | DBGEN | DAP_EN,
+};
+
+#define R_CFG   (0x08/4)
+#define RFIFO_TH_SHIFT   10
+#define RFIFO_TH_LEN 2
+#define WFIFO_TH_SHIFT   8
+#define WFIFO_TH_LEN 2
+#define DISABLE_SRC_INC  (1 << 5)
+#defi

Re: [Qemu-devel] [PATCH] PPC: mac newworld: fix cpu NIP reset value

2013-04-05 Thread Fabien Chouteau
On 04/04/2013 06:47 PM, Alexander Graf wrote:
> On -M mac99, we can run 970 CPUs. However, these CPUs define the initial
> instruction pointer they start execution at as part of their bootup protocol,
> so effectively it's up to the board to decide where they start.
>
> This went unnoticed, because they used to boot at the same location our flash
> was mapped to, but due to the recent reset changes our 970 CPUs want to reset
> to 0x100 now, which is always a 0 instruction.
>

This is one of the regressions introduced by my patch, thanks for fixing
it.

Reviewed-by: Fabien Chouteau 

> Set the initial IP to something reasonable for -M mac99.
> 
> Signed-off-by: Alexander Graf 
> ---
>  hw/ppc/mac_newworld.c |2 ++
>  1 files changed, 2 insertions(+), 0 deletions(-)
> 
> diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
> index a08a6b2..ca7d98f 100644
> --- a/hw/ppc/mac_newworld.c
> +++ b/hw/ppc/mac_newworld.c
> @@ -126,6 +126,8 @@ static void ppc_core99_reset(void *opaque)
>  PowerPCCPU *cpu = opaque;
>  
>  cpu_reset(CPU(cpu));
> +/* 970 CPUs want to get their initial IP as part of their boot protocol 
> */
> +cpu->env.nip = PROM_ADDR + 0x100;
>  }
>  
>  /* PowerPC Mac99 hardware initialisation */
> 


-- 
Fabien Chouteau



Re: [Qemu-devel] [PATCH v2 1/5] bitops: Add ONES macro

2013-04-05 Thread Peter Crosthwaite
Hi Peter,

On Fri, Apr 5, 2013 at 6:53 PM, Peter Maydell  wrote:
> On 5 April 2013 09:43, Peter Crosthwaite  wrote:
>> Little macro that just gives you N ones (justified to LSB).
>>
>> Signed-off-by: Peter Crosthwaite 
>> ---
>>
>>  include/qemu/bitops.h |2 ++
>>  1 files changed, 2 insertions(+), 0 deletions(-)
>>
>> diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
>> index affcc96..da47fc8 100644
>> --- a/include/qemu/bitops.h
>> +++ b/include/qemu/bitops.h
>> @@ -273,4 +273,6 @@ static inline uint64_t deposit64(uint64_t value, int 
>> start, int length,
>>  return (value & ~mask) | ((fieldval << start) & mask);
>>  }
>>
>> +#define ONES(num) ((num) == 64 ? ~0ull : (1ull << (num)) - 1)
>
> You can avoid the ?: here (assuming you're happy to say that
> ONES(0) is a silly thing to ask for):
>

Not that silly. ONES(0) should just stay true to the contract and
return 0. Otherwise prospective clients may have to guard against 0
cases in loops which is ugly:

for (i = 0; i < N; i++) {

mask = ONES(i); /* rather than ones = i ? ones(i) : 0 */

}

But  I thought about this more, and perhaps just ditch the ? anyway,
and rely on the shift by 64 ending up as 0. Then the -1 should return
return 64 (all) ones anyway.

> #define ONES(num) (~0ULL >> (64 - (num)))
>
> Needs a documentation comment, anyway.
>

OK

Regards,
Peter

> thanks
> -- PMM
>



[Qemu-devel] [PATCH v2 2/5] register: Add Register API

2013-04-05 Thread Peter Crosthwaite
This API provides some encapsulation of registers and factors our some
common functionality to common code. Bits of device state (usually MMIO
registers), often have all sorts of access restrictions and semantics
associated with them. This API allow you to define what those
restrictions are on a bit-by-bit basis.

Helper functions are then used to access the register which observe the
semantics defined by the RegisterAccessInfo struct.

Some features:
Bits can be marked as read_only (ro field)
Bits can be marked as write-1-clear (w1c field)
Bits can be marked as write only (wo)
Bits can be marked as sticky (nw0 and nw1)
Reset values can be defined (reset)
Bits can throw guest errors when written certain values (ge0, ge1)
Bits can throw unimp errors when written certain values (ui0, ui1)
Bits can be marked clear on read (cor)
Pre and post action callbacks can be added to read and write ops
Verbose debugging info can be enabled/disabled

Useful for defining device register spaces in a data driven way. Cuts
down on a lot of the verbosity and repetition in the switch-case blocks
in the standard foo_mmio_read/write functions.

Signed-off-by: Peter Crosthwaite 
---
changed from v1:
Rebranded as the "Register API" - I think thats probably what it is.
Near total rewrite of implementation.
De-arrayified reset (this is client/Memory APIs job).
Moved out of bitops into its own file (Blue review)
Added debug, the register pointer, and prefix to a struct (Blue Review)
Made 64-bit to play friendlier with memory API (Blue review)
Made backend storage uint8_t (MST review)
Added read/write callbacks (Blue review)
Added ui0, ui1 (Blue review)
Moved re-purposed width (now byte width defining actual storage size)
Arrayified ge0, ge1 (ui0, ui1 too) and added .reason
Added wo field (not an April fools joke - this has genuine meaning here)
Added we mask to write accessor

 Makefile.target |2 +-
 include/exec/register.h |  134 ++
 register.c  |  164 +++
 3 files changed, 299 insertions(+), 1 deletions(-)
 create mode 100644 include/exec/register.h
 create mode 100644 register.c

diff --git a/Makefile.target b/Makefile.target
index 2bd6d14..9c35931 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -114,7 +114,7 @@ obj-y += hw/
 obj-$(CONFIG_FDT) += device_tree.o
 obj-$(CONFIG_KVM) += kvm-all.o
 obj-$(CONFIG_NO_KVM) += kvm-stub.o
-obj-y += memory.o savevm.o cputlb.o
+obj-y += memory.o register.o savevm.o cputlb.o
 obj-$(CONFIG_HAVE_GET_MEMORY_MAPPING) += memory_mapping.o
 obj-$(CONFIG_HAVE_CORE_DUMP) += dump.o
 obj-$(CONFIG_NO_GET_MEMORY_MAPPING) += memory_mapping-stub.o
diff --git a/include/exec/register.h b/include/exec/register.h
new file mode 100644
index 000..0b05439
--- /dev/null
+++ b/include/exec/register.h
@@ -0,0 +1,134 @@
+/*
+ * Register Definition API
+ *
+ * Copyright (c) 2013 Xilinx Inc.
+ * Copyright (c) 2013 Peter Crosthwaite 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef REGISTER_H
+#define REGISTER_H
+
+#include 
+#include 
+
+typedef struct RegisterInfo RegisterInfo;
+typedef struct RegisterAccessInfo RegisterAccessInfo;
+
+/**
+ * A register access error message
+ * @mask: Bits in the register the error applies to
+ * @reason: Reason why this access is an error
+ */
+
+typedef struct RegisterAccessError {
+uint64_t mask;
+const char *reason;
+} RegisterAccessError;
+
+/**
+ * Access description for a register that is part of guest accessible device
+ * state.
+ *
+ * @name: String name of the register
+ * @ro: whether or not the bit is read-only
+ * @wo: Bits that are write only (read as reset value)
+ * @w1c: bits with the common write 1 to clear semantic.
+ * @nw0: bits that can't be written with a 0 by the guest (sticky 1)
+ * @nw1: bits that can't be written with a 1 by the guest (sticky 0)
+ * @reset: reset value.
+ * @cor: Bits that are clear on read
+ *
+ * @ge1: Bits that when written 1 indicate a guest error
+ * @ge0: Bits that when written 0 indicate a guest error
+ * @ui1: Bits that when written 1 indicate use of an unimplemented feature
+ * @ui0: Bits that when written 0 indicate use of an unimplemented feature
+ *
+ * @pre_write: Pre write callback. Passed the value that's to be written,
+ * immediately before the actual write. The returned value is what is written,
+ * giving the handler a chance to modify the written value.
+ * @post_write: Post write callback. Passed the written value. Most write side
+ * effects should be implemented here.
+ *
+ * @pre_read: Pre read callback.
+ * @post_read: Post read callback. Passes the value that is about to be 
returned
+ * for a read. The return value from this function is what is ultimately read,
+ * allowing this function to modify the value before return to the client.
+ */
+
+struct RegisterAccessInfo {
+const char *name;
+uint64_

Re: [Qemu-devel] [PATCH 3/3] PPC PReP: can run without bios image

2013-04-05 Thread Fabien Chouteau
On 04/04/2013 07:22 PM, Andreas Färber wrote:
> Am 04.04.2013 18:26, schrieb Artyom Tarasenko:
>> On Thu, Apr 4, 2013 at 6:20 PM, Peter Maydell  
>> wrote:
>>> On 4 April 2013 17:17, Fabien Chouteau  wrote:
>>>
>>> But -kernel for QEMU specifically means Linux kernel; you might
>>> argue we should have picked a different option name but we're
>>> stuck with it now.
>>
>> No, it's not Linux-only. At least qemu-system-sparc can load NetBSD
>> kernel with this option.
>
> Look, you're not exactly making friends if you keep pointing out that
> something works somewhere and you try to deduce a rule out of that. ;)
>
> -bios loads something where the hardware expects it, not necessarily
> RAM. If using ELF, the entry point must be configured appropriately.
>
> -kernel loads something into RAM in a way a Linux kernel can run (and it
> does not limit itself to it, so other use cases may or may not work).
>

May I add the -pflash option to this list :)

> Loading something in a way that matches neither hardware nor Linux
> kernel is - for good or bad - simply not really supported at this time.
>
> PMM tried to get a discussion going about how to solve that latter case
> properly some months ago, possibly prompted by Xilinx, and there were
> not many responses, especially no concrete solution beyond vaguely
> pointing to devices/objects rather than fiddling with existing
> -kernel/-bios command line options.

I didn't see this discussion (there too many traffic on Qemu-devel for
me, I can't read everything).

> So I really feel this discussion is out of scope for this PReP patchset!

My goal with this patchset is to be able to load an ELF file and start
the board at its entry point. So we are exactly in the scope.

If the -kernel option is for Linux only, we have to rename it to -linux.
And we remove the ambiguous -kernel option. The question is: Do we want
a -bsd, -solaris, -vxworks, -rtems, etc...?

I think we can keep the -bios option (maybe with an -firmware alias,
this is a question of vocabulary of different architectures). But it
should be possible to disable the default bios, with '-bios -' or '-bios
none' or '-bios null' or '-bios disabled' or whatever.

And I make a proposition for new options:

-elf loads an ELF file in RAM, ROM or whatever memory area and start the
board at entry point.

-raw-bin file=,addr=0x loads something at 'addr'

Or we can mix the two with a -load option:

-load file=
-load file=
-load file=,addr=
-load file=,addr=,entry=

The option can be used as many times as needed on the command line.

-- 
Fabien Chouteau



Re: [Qemu-devel] [PATCH][RFC v2 2/7] hw/power: add main power chip implementation

2013-04-05 Thread Peter Maydell
On 5 April 2013 09:45, li guang  wrote:
> 在 2013-04-05五的 09:35 +0100,Peter Maydell写道:
>> What actual hardware is this supposed to be modelling?
>> I don't believe there's such a thing as a 'generic
>> power controller' that makes sense for all architectures.
>
> unfortunately, I'm considering it as generic and abstract.
> I think we have no need to model a specific type of power
> controller, because what we are doing is to construct a
> power controlling process, not to realize a hardware platform's
> device for special purpose.

QEMU is fundamentally modelling real hardware platforms,
not abstract devices. You have to model a real power
controller to at least some extent, because that's what
guest OSes expect to be interacting with, and what
device and board hardware models expect to be dealing with.

-- PMM



Re: [Qemu-devel] [PATCH v2 1/5] bitops: Add ONES macro

2013-04-05 Thread Peter Maydell
On 5 April 2013 10:11, Peter Crosthwaite  wrote:
> But  I thought about this more, and perhaps just ditch the ? anyway,
> and rely on the shift by 64 ending up as 0.

That's undefined behaviour, please don't.

-- PMM



Re: [Qemu-devel] [PATCH v2 2/5] register: Add Register API

2013-04-05 Thread Peter Maydell
On 5 April 2013 09:43, Peter Crosthwaite  wrote:
> This API provides some encapsulation of registers and factors our some
> common functionality to common code. Bits of device state (usually MMIO
> registers), often have all sorts of access restrictions and semantics
> associated with them. This API allow you to define what those
> restrictions are on a bit-by-bit basis.
> +void register_write(RegisterInfo *reg, uint64_t val, uint64_t we)
> +{
> +uint64_t old_val, new_val, test;
> +const RegisterAccessInfo *ac;
> +const RegisterAccessError *rae;
> +
> +assert(reg);
> +
> +ac = reg->access;
> +if (!ac || !ac->name) {
> +qemu_log_mask(LOG_GUEST_ERROR, "%s: write to undefined device state "
> +  "(written value: %#" PRIx64 ")\n", reg->prefix, val);
> +return;
> +}
> +
> +uint32_t no_w0_mask = ac->ro | ac->w1c | ac->nw0 | ~we;
> +uint32_t no_w1_mask = ac->ro | ac->w1c | ac->nw1 | ~we;
> +
> +if (reg->debug) {
> +fprintf(stderr, "%s:%s: write of value %#" PRIx64 "\n", reg->prefix,
> +ac->name, val);
> +}
> +
> +if (qemu_loglevel_mask(LOG_GUEST_ERROR)) {
> +for (rae = ac->ge1; rae && rae->mask; rae++) {
> +test = val & rae->mask;
> +if (test) {
> +register_write_log(reg, 1, test, LOG_GUEST_ERROR,
> +   "invalid", rae->reason);
> +}
> +}
> +for (rae = ac->ge0; rae && rae->mask; rae++) {
> +test = val & rae->mask;
> +if (test) {
> +register_write_log(reg, 0, test, LOG_GUEST_ERROR,
> +   "invalid", rae->reason);
> +}
> +}
> +}
> +
> +if (qemu_loglevel_mask(LOG_UNIMP)) {
> +for (rae = ac->ui1; rae && rae->mask; rae++) {
> +test = val & rae->mask;
> +if (test) {
> +register_write_log(reg, 1, test, LOG_GUEST_ERROR,
> +   "unimplmented", rae->reason);
> +}
> +}
> +for (rae = ac->ui0; rae && rae->mask; rae++) {
> +test = val & rae->mask;
> +if (test) {
> +register_write_log(reg, 0, test, LOG_GUEST_ERROR,
> +   "unimplemented", rae->reason);
> +}
> +}
> +}
> +
> +assert(reg->data);
> +old_val = register_read_val(reg);
> +
> +new_val = val & ~(no_w1_mask & val);
> +new_val |= no_w1_mask & old_val & val;
> +new_val |= no_w0_mask & old_val & ~val;
> +new_val &= ~(val & ac->w1c);
> +
> +if (ac->pre_write) {
> +new_val = ac->pre_write(reg, new_val);
> +}
> +register_write_val(reg, new_val);
> +if (ac->post_write) {
> +ac->post_write(reg, new_val);
> +}
> +}

Wow, this feels pretty heavyweight for "write a register"...

-- PMM



[Qemu-devel] [PATCH 3/7] spice-qemu-char: Remove #ifdef-ed code for old spice-server compat

2013-04-05 Thread Hans de Goede
We now require spice-server to be >= 0.12.0 so this is no longer needed.

Signed-off-by: Hans de Goede 
---
 spice-qemu-char.c | 27 +--
 1 file changed, 1 insertion(+), 26 deletions(-)

diff --git a/spice-qemu-char.c b/spice-qemu-char.c
index 535f955..c4f81cf 100644
--- a/spice-qemu-char.c
+++ b/spice-qemu-char.c
@@ -85,21 +85,6 @@ static void vmc_state(SpiceCharDeviceInstance *sin, int 
connected)
 {
 SpiceCharDriver *scd = container_of(sin, SpiceCharDriver, sin);
 
-#if SPICE_SERVER_VERSION < 0x000901
-/*
- * spice-server calls the state callback for the agent channel when the
- * spice client connects / disconnects. Given that not the client but
- * the server is doing the parsing of the messages this is wrong as the
- * server is still listening. Worse, this causes the parser in the server
- * to go out of sync, so we ignore state calls for subtype vdagent
- * spicevmc chardevs. For the full story see:
- * http://lists.freedesktop.org/archives/spice-devel/2011-July/004837.html
- */
-if (strcmp(sin->subtype, "vdagent") == 0) {
-return;
-}
-#endif
-
 if ((scd->chr->be_open && connected) ||
 (!scd->chr->be_open && !connected)) {
 return;
@@ -224,7 +209,6 @@ static CharDriverState *chr_open(const char *subtype)
 
 CharDriverState *qemu_chr_open_spice_vmc(const char *type)
 {
-CharDriverState *chr;
 const char **psubtype = spice_server_char_device_recognized_subtypes();
 
 if (type == NULL) {
@@ -243,16 +227,7 @@ CharDriverState *qemu_chr_open_spice_vmc(const char *type)
 return NULL;
 }
 
-chr = chr_open(type);
-
-#if SPICE_SERVER_VERSION < 0x000901
-/* See comment in vmc_state() */
-if (strcmp(type, "vdagent") == 0) {
-qemu_chr_generic_open(chr);
-}
-#endif
-
-return chr;
+return chr_open(type);
 }
 
 #if SPICE_SERVER_VERSION >= 0x000c02
-- 
1.8.1.4




[Qemu-devel] [PATCH 1/7] virtio-console: Also throttle when less was written then requested

2013-04-05 Thread Hans de Goede
This is necessary so that we get properly woken up to write the rest.

This patch also changes the len argument to the have_data callback, to
avoid doing an unsigned signed comparison.

Signed-off-by: Hans de Goede 
Acked-by: Amit Shah 
---
 hw/virtio-console.c | 8 +---
 hw/virtio-serial.h  | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/hw/virtio-console.c b/hw/virtio-console.c
index 284180f..61f9ff5 100644
--- a/hw/virtio-console.c
+++ b/hw/virtio-console.c
@@ -34,7 +34,8 @@ static gboolean chr_write_unblocked(GIOChannel *chan, 
GIOCondition cond,
 }
 
 /* Callback function that's called when the guest sends us data */
-static ssize_t flush_buf(VirtIOSerialPort *port, const uint8_t *buf, size_t 
len)
+static ssize_t flush_buf(VirtIOSerialPort *port,
+ const uint8_t *buf, ssize_t len)
 {
 VirtConsole *vcon = DO_UPCAST(VirtConsole, port, port);
 ssize_t ret;
@@ -47,7 +48,7 @@ static ssize_t flush_buf(VirtIOSerialPort *port, const 
uint8_t *buf, size_t len)
 ret = qemu_chr_fe_write(vcon->chr, buf, len);
 trace_virtio_console_flush_buf(port->id, len, ret);
 
-if (ret <= 0) {
+if (ret < len) {
 VirtIOSerialPortClass *k = VIRTIO_SERIAL_PORT_GET_CLASS(port);
 
 /*
@@ -56,7 +57,8 @@ static ssize_t flush_buf(VirtIOSerialPort *port, const 
uint8_t *buf, size_t len)
  * we had a finer-grained message, like -EPIPE, we could close
  * this connection.
  */
-ret = 0;
+if (ret < 0)
+ret = 0;
 if (!k->is_console) {
 virtio_serial_throttle_port(port, true);
 qemu_chr_fe_add_watch(vcon->chr, G_IO_OUT, chr_write_unblocked,
diff --git a/hw/virtio-serial.h b/hw/virtio-serial.h
index 516400f..4dc0c0a 100644
--- a/hw/virtio-serial.h
+++ b/hw/virtio-serial.h
@@ -104,7 +104,7 @@ typedef struct VirtIOSerialPortClass {
  * 'len'.  In this case, throttling will be enabled for this port.
  */
 ssize_t (*have_data)(VirtIOSerialPort *port, const uint8_t *buf,
- size_t len);
+ ssize_t len);
 } VirtIOSerialPortClass;
 
 /*
-- 
1.8.1.4




[Qemu-devel] [PATCH 0/7] usb-redir chardev flowcontrol patches v4

2013-04-05 Thread Hans de Goede
Hi Gerd,

Here is v4 of the series adding watch support to the spicevmc chardev
backend. This version resolved all outstanding discussions about this
patch-set, can you please include these in your next spice pull-req to
Anthony?

Note this also includes a few virtio-consoled bugfixes which were found
during the development of this series.

Changes in v2:
-Address Amit's review of "[PATCH 2/8] virtio-console: Remove any pending
watches on close"
-Drop 2 spice-qemu-char cleanup patches as Gerd has added them to his v5
-series
-Add a (forward ported) patch from Alon Levy to fix a long standing
 spice-qemu-char migration issue in a way which should be acceptable
-upstream

Changes in v3:
-Rebased on top of latest master (some trivial manual conflict resolution)
-Dropped Alon's patch for the spice-qemu-char migration issue, this is fixed
 differently in another series which is already par of the latest master
-Add a new patch:
  usb-serial: Remove double call to qemu_chr_add_handlers( NULL )
-Fixed a checkpatch issue in:
  usb-redir: Add flow control support

Changes in v4:
-Rebased on top of latest master (some trivial manual conflict resolution)
-Address Amit's review of "[PATCH 1/7] virtio-console: Also throttle when
 less was written then requested"
-Drop patches included in the usb.79 pull-req
-Add a new patch: "[PATCH 6/7] spice-qemu-char: vmc_write: Don't write more
 bytes then requested"

Thanks & Regards,

Hans



[Qemu-devel] [PATCH 2/7] virtio-console: Remove any pending watches on close

2013-04-05 Thread Hans de Goede
Signed-off-by: Hans de Goede 
---
 hw/virtio-console.c | 24 ++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/hw/virtio-console.c b/hw/virtio-console.c
index 61f9ff5..908ec17 100644
--- a/hw/virtio-console.c
+++ b/hw/virtio-console.c
@@ -18,6 +18,7 @@
 typedef struct VirtConsole {
 VirtIOSerialPort port;
 CharDriverState *chr;
+guint watch;
 } VirtConsole;
 
 /*
@@ -29,6 +30,7 @@ static gboolean chr_write_unblocked(GIOChannel *chan, 
GIOCondition cond,
 {
 VirtConsole *vcon = opaque;
 
+vcon->watch = 0;
 virtio_serial_throttle_port(&vcon->port, false);
 return FALSE;
 }
@@ -61,8 +63,10 @@ static ssize_t flush_buf(VirtIOSerialPort *port,
 ret = 0;
 if (!k->is_console) {
 virtio_serial_throttle_port(port, true);
-qemu_chr_fe_add_watch(vcon->chr, G_IO_OUT, chr_write_unblocked,
-  vcon);
+if (!vcon->watch) {
+vcon->watch = qemu_chr_fe_add_watch(vcon->chr, G_IO_OUT,
+chr_write_unblocked, vcon);
+}
 }
 }
 return ret;
@@ -106,6 +110,10 @@ static void chr_event(void *opaque, int event)
 virtio_serial_open(&vcon->port);
 break;
 case CHR_EVENT_CLOSED:
+if (vcon->watch) {
+g_source_remove(vcon->watch);
+vcon->watch = 0;
+}
 virtio_serial_close(&vcon->port);
 break;
 }
@@ -130,6 +138,17 @@ static int virtconsole_initfn(VirtIOSerialPort *port)
 return 0;
 }
 
+static int virtconsole_exitfn(VirtIOSerialPort *port)
+{
+VirtConsole *vcon = DO_UPCAST(VirtConsole, port, port);
+
+if (vcon->watch) {
+g_source_remove(vcon->watch);
+}
+
+return 0;
+}
+
 static Property virtconsole_properties[] = {
 DEFINE_PROP_CHR("chardev", VirtConsole, chr),
 DEFINE_PROP_END_OF_LIST(),
@@ -142,6 +161,7 @@ static void virtconsole_class_init(ObjectClass *klass, void 
*data)
 
 k->is_console = true;
 k->init = virtconsole_initfn;
+k->exit = virtconsole_exitfn;
 k->have_data = flush_buf;
 k->set_guest_connected = set_guest_connected;
 dc->props = virtconsole_properties;
-- 
1.8.1.4




[Qemu-devel] [PATCH 7/7] usb-serial: Remove double call to qemu_chr_add_handlers( NULL )

2013-04-05 Thread Hans de Goede
usb-serial has a qdev chardev property, and hw/qdev-properties-system.c
already contains:

static void release_chr(Object *obj, const char *name, void *opaque)
{
DeviceState *dev = DEVICE(obj);
Property *prop = opaque;
CharDriverState **ptr = qdev_get_prop_ptr(dev, prop);
CharDriverState *chr = *ptr;

if (chr) {
qemu_chr_add_handlers(chr, NULL, NULL, NULL, NULL);
qemu_chr_fe_release(chr);
}
}

So doing the qemu_chr_add_handlers(s->cs, NULL, NULL, NULL, NULL); from
the usb handle_destroy function too will lead to it being done twice.

Signed-off-by: Hans de Goede 
---
 hw/usb/dev-serial.c | 9 -
 1 file changed, 9 deletions(-)

diff --git a/hw/usb/dev-serial.c b/hw/usb/dev-serial.c
index 7c314dc..21ddef6 100644
--- a/hw/usb/dev-serial.c
+++ b/hw/usb/dev-serial.c
@@ -410,13 +410,6 @@ static void usb_serial_handle_data(USBDevice *dev, 
USBPacket *p)
 }
 }
 
-static void usb_serial_handle_destroy(USBDevice *dev)
-{
-USBSerialState *s = (USBSerialState *)dev;
-
-qemu_chr_add_handlers(s->cs, NULL, NULL, NULL, NULL);
-}
-
 static int usb_serial_can_read(void *opaque)
 {
 USBSerialState *s = opaque;
@@ -595,7 +588,6 @@ static void usb_serial_class_initfn(ObjectClass *klass, 
void *data)
 uc->handle_reset   = usb_serial_handle_reset;
 uc->handle_control = usb_serial_handle_control;
 uc->handle_data= usb_serial_handle_data;
-uc->handle_destroy = usb_serial_handle_destroy;
 dc->vmsd = &vmstate_usb_serial;
 dc->props = serial_properties;
 }
@@ -623,7 +615,6 @@ static void usb_braille_class_initfn(ObjectClass *klass, 
void *data)
 uc->handle_reset   = usb_serial_handle_reset;
 uc->handle_control = usb_serial_handle_control;
 uc->handle_data= usb_serial_handle_data;
-uc->handle_destroy = usb_serial_handle_destroy;
 dc->vmsd = &vmstate_usb_serial;
 dc->props = braille_properties;
 }
-- 
1.8.1.4




[Qemu-devel] [PATCH 5/7] spice-qemu-char: Remove intermediate buffer

2013-04-05 Thread Hans de Goede
From: Alon Levy 

virtio-serial's buffer is valid when it calls us, and we don't
access it otherwise: vmc_read is only called in response to wakeup,
or else we set datalen=0 and throttle. Then vmc_read is called back,
we return 0 (not accessing the buffer) and set the timer to unthrottle.

Also make datalen int and not ssize_t (to fit spice_chr_write signature).

HdG: Update to apply to spice-qemu-char with new gio-channel based
flowcontrol support.

Signed-off-by: Hans de Goede 
---
 spice-qemu-char.c | 12 +++-
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/spice-qemu-char.c b/spice-qemu-char.c
index 097a8c8..7e6bd2d 100644
--- a/spice-qemu-char.c
+++ b/spice-qemu-char.c
@@ -14,9 +14,8 @@ typedef struct SpiceCharDriver {
 char  *subtype;
 bool  active;
 bool  blocked;
-uint8_t   *buffer;
-uint8_t   *datapos;
-ssize_t   bufsize, datalen;
+const uint8_t *datapos;
+int   datalen;
 QLIST_ENTRY(SpiceCharDriver) next;
 } SpiceCharDriver;
 
@@ -186,12 +185,7 @@ static int spice_chr_write(CharDriverState *chr, const 
uint8_t *buf, int len)
 int read_bytes;
 
 assert(s->datalen == 0);
-if (s->bufsize < len) {
-s->bufsize = len;
-s->buffer = g_realloc(s->buffer, s->bufsize);
-}
-memcpy(s->buffer, buf, len);
-s->datapos = s->buffer;
+s->datapos = buf;
 s->datalen = len;
 spice_server_char_device_wakeup(&s->sin);
 read_bytes = len - s->datalen;
-- 
1.8.1.4




[Qemu-devel] [PATCH 4/7] spice-qemu-char: Add watch support

2013-04-05 Thread Hans de Goede
Signed-off-by: Hans de Goede 
---
 spice-qemu-char.c | 67 +++
 1 file changed, 63 insertions(+), 4 deletions(-)

diff --git a/spice-qemu-char.c b/spice-qemu-char.c
index c4f81cf..097a8c8 100644
--- a/spice-qemu-char.c
+++ b/spice-qemu-char.c
@@ -13,12 +13,18 @@ typedef struct SpiceCharDriver {
 SpiceCharDeviceInstance sin;
 char  *subtype;
 bool  active;
+bool  blocked;
 uint8_t   *buffer;
 uint8_t   *datapos;
 ssize_t   bufsize, datalen;
 QLIST_ENTRY(SpiceCharDriver) next;
 } SpiceCharDriver;
 
+typedef struct SpiceCharSource {
+GSource   source;
+SpiceCharDriver   *scd;
+} SpiceCharSource;
+
 static QLIST_HEAD(, SpiceCharDriver) spice_chars =
 QLIST_HEAD_INITIALIZER(spice_chars);
 
@@ -54,9 +60,10 @@ static int vmc_read(SpiceCharDeviceInstance *sin, uint8_t 
*buf, int len)
 scd->datapos += bytes;
 scd->datalen -= bytes;
 assert(scd->datalen >= 0);
-if (scd->datalen == 0) {
-scd->datapos = 0;
-}
+}
+if (scd->datalen == 0) {
+scd->datapos = 0;
+scd->blocked = false;
 }
 trace_spice_vmc_read(bytes, len);
 return bytes;
@@ -129,10 +136,54 @@ static void vmc_unregister_interface(SpiceCharDriver *scd)
 trace_spice_vmc_unregister_interface(scd);
 }
 
+static gboolean spice_char_source_prepare(GSource *source, gint *timeout)
+{
+SpiceCharSource *src = (SpiceCharSource *)source;
+
+*timeout = -1;
+
+return !src->scd->blocked;
+}
+
+static gboolean spice_char_source_check(GSource *source)
+{
+SpiceCharSource *src = (SpiceCharSource *)source;
+
+return !src->scd->blocked;
+}
+
+static gboolean spice_char_source_dispatch(GSource *source,
+GSourceFunc callback, gpointer user_data)
+{
+GIOFunc func = (GIOFunc)callback;
+
+return func(NULL, G_IO_OUT, user_data);
+}
+
+GSourceFuncs SpiceCharSourceFuncs = {
+.prepare  = spice_char_source_prepare,
+.check= spice_char_source_check,
+.dispatch = spice_char_source_dispatch,
+};
+
+static GSource *spice_chr_add_watch(CharDriverState *chr, GIOCondition cond)
+{
+SpiceCharDriver *scd = chr->opaque;
+SpiceCharSource *src;
+
+assert(cond == G_IO_OUT);
+
+src = (SpiceCharSource *)g_source_new(&SpiceCharSourceFuncs,
+  sizeof(SpiceCharSource));
+src->scd = scd;
+
+return (GSource *)src;
+}
 
 static int spice_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
 {
 SpiceCharDriver *s = chr->opaque;
+int read_bytes;
 
 assert(s->datalen == 0);
 if (s->bufsize < len) {
@@ -143,7 +194,14 @@ static int spice_chr_write(CharDriverState *chr, const 
uint8_t *buf, int len)
 s->datapos = s->buffer;
 s->datalen = len;
 spice_server_char_device_wakeup(&s->sin);
-return len;
+read_bytes = len - s->datalen;
+if (read_bytes != len) {
+/* We'll get passed in the unconsumed data with the next call */
+s->datalen = 0;
+s->datapos = NULL;
+s->blocked = true;
+}
+return read_bytes;
 }
 
 static void spice_chr_close(struct CharDriverState *chr)
@@ -199,6 +257,7 @@ static CharDriverState *chr_open(const char *subtype)
 s->sin.subtype = g_strdup(subtype);
 chr->opaque = s;
 chr->chr_write = spice_chr_write;
+chr->chr_add_watch = spice_chr_add_watch;
 chr->chr_close = spice_chr_close;
 chr->chr_set_fe_open = spice_chr_set_fe_open;
 
-- 
1.8.1.4




Re: [Qemu-devel] [PATCH 3/3] PPC PReP: can run without bios image

2013-04-05 Thread Peter Maydell
On 5 April 2013 10:19, Fabien Chouteau  wrote:
> If the -kernel option is for Linux only, we have to rename it to -linux.
> And we remove the ambiguous -kernel option.

This isn't possible, for backwards compatibility reasons. We
have to retain -kernel/-initrd/-append under those names.

> And I make a proposition for new options:
>
> -elf loads an ELF file in RAM, ROM or whatever memory area and start the
> board at entry point.
>
> -raw-bin file=,addr=0x loads something at 'addr'
>
> Or we can mix the two with a -load option:
>
> -load file=
> -load file=
> -load file=,addr=
> -load file=,addr=,entry=
>
> The option can be used as many times as needed on the command line.

Of these, I prefer the last (-load).

thanks
-- PMM



Re: [Qemu-devel] [PATCH 2/2] rbd: disable unsupported librbd functions at runtime

2013-04-05 Thread Kevin Wolf
Am 04.04.2013 um 18:50 hat Josh Durgin geschrieben:
> On 04/04/2013 03:10 AM, Kevin Wolf wrote:
> >After searching the net a bit and trying out some things myself, I'm
> >afraid that this approach doesn't work. It does seem to do the right
> >thing when build and runtime version are the same, but new build -> old
> >runtime segfaults (because the symbol doesn't become NULL) and old build
> >-> new runtime segfaults (because the symbol stays NULL). Unless I
> >missed some build option that is different in qemu than in my test
> >program.
> 
> It worked when downgrading the runtime version of librbd for me, so
> maybe something about qemu's build was different. This patch wouldn't
> allow newer functions in a runtime version to be used though, since
> they would be disabled if qemu were built against an older version.

Interesting that one way worked for you. At least it seems to be
unreliable, unfortunately.

> >So it looks as if you had to use dlsym() instead.
> 
> I tried this initially, using glib's portable wrappers, but found that
> it would require using the dlopen'd version only - not linking at
> build time against librbd at all. I thought this might be too big
> a change, but now it seems like the best way to go.

Aw, I didn't expect that... You're using g_module_open/symbol then? I
never used it, but from reading the docs maybe g_module_open with a NULL
filename might do the right thing when linked at build time?

On the other hand, not linking at build time at all and relying on
dlopen() only give us another nice feature: Distros can enable the rbd
block driver without introducing a hard dependency of qemu on librbd.
Maybe worth doing it for this reason alone.

> Using this approach, upgrading from a version of librbd that doesn't
> support e.g. rbd_aio_flush to one that does would not require
> recompiling qemu to be able to use the new function. In general, librbd
> would not be needed at compile time for qemu to be able to use it,
> which would make the rbd block driver much easier to install on
> distros where rbd isn't enabled at build time.
> 
> If you don't mind this approach, I'll post another version using
> only dlopen'd (via glib) librbd/librados.

Sure, if you don't mind implementing it I think it's a good idea. I
didn't mean to ask you for such a large change with my innocent
comments...

Kevin



[Qemu-devel] [PATCH v2 0/4] block: fix I/O throttling oscillations

2013-04-05 Thread Stefan Hajnoczi
Benoît Canet  reported that QEMU I/O throttling can
oscillate under continuous I/O.  The test case runs 50 threads performing
random writes and a -drive iops=150 limit is used.

Since QEMU I/O throttling is implemented using 100 millisecond time slices,
we'd expect 150 +/- 15 IOPS.  Anything outside that range indicates a problem
with the I/O throttling algorithm.

It turned out that even a single thread performing sequential I/O continuously
is throttled outside the 150 +/- 15 IOPS range.  The continous stream of I/O
slows down as time goes on but resets to 150 IOPS again when interrupted.  This
can be tested with:

  $ iostat -d 1 -x /dev/vdb &
  $ dd if=/dev/vdb of=/dev/null bs=4096 iflag=direct

This patches addresses these problems as follows:

1. Account for I/O requests when they are submitted instead of completed.  This
   ensures that we do not exceed the budget for this slice.  Exceeding the
   budget leads to fluctuations since we have to make up for this later.

2. Use constant 100 millisecond slice time.  Adjusting the slice time at
   run-time led to oscillations.  Since the reason for adjusting slice time is
   not clear, drop this behavior.

I have also included two code clean-up patches.

Tested-By: Benoit Canet 

v2:
 * Account slice_submitted after both bps and iops checks pass [kwolf]

Stefan Hajnoczi (4):
  block: fix I/O throttling accounting blind spot
  block: keep I/O throttling slice time constant
  block: drop duplicated slice extension code
  block: clean up I/O throttling wait_time code

 block.c   | 49 +--
 blockdev.c|  1 -
 include/block/block_int.h |  3 +--
 3 files changed, 23 insertions(+), 30 deletions(-)

-- 
1.8.1.4




[Qemu-devel] [PATCH v2 4/4] block: clean up I/O throttling wait_time code

2013-04-05 Thread Stefan Hajnoczi
The wait_time variable is in seconds.  Reflect this in a comment and use
NANOSECONDS_PER_SECOND instead of BLOCK_IO_SLICE_TIME * 10 (which
happens to have the right value).

Signed-off-by: Stefan Hajnoczi 
Tested-By: Benoit Canet 
---
 block.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/block.c b/block.c
index aa16fc4..602d8a4 100644
--- a/block.c
+++ b/block.c
@@ -3800,7 +3800,7 @@ static bool bdrv_exceed_bps_limits(BlockDriverState *bs, 
int nb_sectors,
 BLOCK_IO_SLICE_TIME;
 bs->slice_end += extension;
 if (wait) {
-*wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
+*wait = wait_time * NANOSECONDS_PER_SECOND;
 }
 
 return true;
@@ -3841,7 +3841,7 @@ static bool bdrv_exceed_iops_limits(BlockDriverState *bs, 
bool is_write,
 return false;
 }
 
-/* Calc approx time to dispatch */
+/* Calc approx time to dispatch, in seconds */
 wait_time = (ios_base + 1) / iops_limit;
 if (wait_time > elapsed_time) {
 wait_time = wait_time - elapsed_time;
@@ -3852,7 +3852,7 @@ static bool bdrv_exceed_iops_limits(BlockDriverState *bs, 
bool is_write,
 /* Exceeded current slice, extend it by another slice time */
 bs->slice_end += BLOCK_IO_SLICE_TIME;
 if (wait) {
-*wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
+*wait = wait_time * NANOSECONDS_PER_SECOND;
 }
 
 return true;
-- 
1.8.1.4




[Qemu-devel] [PATCH v2 1/4] block: fix I/O throttling accounting blind spot

2013-04-05 Thread Stefan Hajnoczi
I/O throttling relies on bdrv_acct_done() which is called when a request
completes.  This leaves a blind spot since we only charge for completed
requests, not submitted requests.

For example, if there is 1 operation remaining in this time slice the
guest could submit 3 operations and they will all be submitted
successfully since they don't actually get accounted for until they
complete.

Originally we probably thought this is okay since the requests will be
accounted when the time slice is extended.  In practice it causes
fluctuations since the guest can exceed its I/O limit and it will be
punished for this later on.

Account for I/O upon submission so that I/O limits are enforced
properly.

Signed-off-by: Stefan Hajnoczi 
Tested-By: Benoit Canet 
---
 block.c   | 21 ++---
 include/block/block_int.h |  2 +-
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/block.c b/block.c
index 0ae2e93..25976b5 100644
--- a/block.c
+++ b/block.c
@@ -141,7 +141,6 @@ void bdrv_io_limits_disable(BlockDriverState *bs)
 bs->slice_start = 0;
 bs->slice_end   = 0;
 bs->slice_time  = 0;
-memset(&bs->io_base, 0, sizeof(bs->io_base));
 }
 
 static void bdrv_block_timer(void *opaque)
@@ -1436,8 +1435,8 @@ static void bdrv_move_feature_fields(BlockDriverState 
*bs_dest,
 bs_dest->slice_time = bs_src->slice_time;
 bs_dest->slice_start= bs_src->slice_start;
 bs_dest->slice_end  = bs_src->slice_end;
+bs_dest->slice_submitted= bs_src->slice_submitted;
 bs_dest->io_limits  = bs_src->io_limits;
-bs_dest->io_base= bs_src->io_base;
 bs_dest->throttled_reqs = bs_src->throttled_reqs;
 bs_dest->block_timer= bs_src->block_timer;
 bs_dest->io_limits_enabled  = bs_src->io_limits_enabled;
@@ -3768,9 +3767,9 @@ static bool bdrv_exceed_bps_limits(BlockDriverState *bs, 
int nb_sectors,
 slice_time = bs->slice_end - bs->slice_start;
 slice_time /= (NANOSECONDS_PER_SECOND);
 bytes_limit = bps_limit * slice_time;
-bytes_base  = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
+bytes_base  = bs->slice_submitted.bytes[is_write];
 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
-bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
+bytes_base += bs->slice_submitted.bytes[!is_write];
 }
 
 /* bytes_base: the bytes of data which have been read/written; and
@@ -3828,9 +3827,9 @@ static bool bdrv_exceed_iops_limits(BlockDriverState *bs, 
bool is_write,
 slice_time = bs->slice_end - bs->slice_start;
 slice_time /= (NANOSECONDS_PER_SECOND);
 ios_limit  = iops_limit * slice_time;
-ios_base   = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
+ios_base   = bs->slice_submitted.ios[is_write];
 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
-ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
+ios_base += bs->slice_submitted.ios[!is_write];
 }
 
 if (ios_base + 1 <= ios_limit) {
@@ -3875,11 +3874,7 @@ static bool bdrv_exceed_io_limits(BlockDriverState *bs, 
int nb_sectors,
 bs->slice_start = now;
 bs->slice_end   = now + bs->slice_time;
 
-bs->io_base.bytes[is_write]  = bs->nr_bytes[is_write];
-bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
-
-bs->io_base.ios[is_write]= bs->nr_ops[is_write];
-bs->io_base.ios[!is_write]   = bs->nr_ops[!is_write];
+memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted));
 }
 
 elapsed_time  = now - bs->slice_start;
@@ -3907,6 +3902,10 @@ static bool bdrv_exceed_io_limits(BlockDriverState *bs, 
int nb_sectors,
 *wait = 0;
 }
 
+bs->slice_submitted.bytes[is_write] += (int64_t)nb_sectors *
+   BDRV_SECTOR_SIZE;
+bs->slice_submitted.ios[is_write]++;
+
 return false;
 }
 
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 0986a2d..83941d8 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -256,7 +256,7 @@ struct BlockDriverState {
 int64_t slice_start;
 int64_t slice_end;
 BlockIOLimit io_limits;
-BlockIOBaseValue  io_base;
+BlockIOBaseValue slice_submitted;
 CoQueue  throttled_reqs;
 QEMUTimer*block_timer;
 bool io_limits_enabled;
-- 
1.8.1.4




[Qemu-devel] [PATCH v2 2/4] block: keep I/O throttling slice time constant

2013-04-05 Thread Stefan Hajnoczi
It is not necessary to adjust the slice time at runtime.  We already
extend the current slice in order to carry over accounting into the next
slice.  Changing the actual slice time value introduces oscillations.

The guest may experience large changes in throughput or IOPS from one
moment to the next when slice times are adjusted.

Reported-by: Benoît Canet 
Signed-off-by: Stefan Hajnoczi 
Tested-By: Benoit Canet 
---
 block.c   | 19 +--
 blockdev.c|  1 -
 include/block/block_int.h |  1 -
 3 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/block.c b/block.c
index 25976b5..00eca27 100644
--- a/block.c
+++ b/block.c
@@ -140,7 +140,6 @@ void bdrv_io_limits_disable(BlockDriverState *bs)
 
 bs->slice_start = 0;
 bs->slice_end   = 0;
-bs->slice_time  = 0;
 }
 
 static void bdrv_block_timer(void *opaque)
@@ -1432,7 +1431,6 @@ static void bdrv_move_feature_fields(BlockDriverState 
*bs_dest,
 bs_dest->enable_write_cache = bs_src->enable_write_cache;
 
 /* i/o timing parameters */
-bs_dest->slice_time = bs_src->slice_time;
 bs_dest->slice_start= bs_src->slice_start;
 bs_dest->slice_end  = bs_src->slice_end;
 bs_dest->slice_submitted= bs_src->slice_submitted;
@@ -3749,6 +3747,7 @@ static bool bdrv_exceed_bps_limits(BlockDriverState *bs, 
int nb_sectors,
  bool is_write, double elapsed_time, uint64_t *wait)
 {
 uint64_t bps_limit = 0;
+uint64_t extension;
 double   bytes_limit, bytes_base, bytes_res;
 double   slice_time, wait_time;
 
@@ -3796,8 +3795,10 @@ static bool bdrv_exceed_bps_limits(BlockDriverState *bs, 
int nb_sectors,
  * info can be kept until the timer fire, so it is increased and tuned
  * based on the result of experiment.
  */
-bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
-bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
+extension = wait_time * NANOSECONDS_PER_SECOND;
+extension = DIV_ROUND_UP(extension, BLOCK_IO_SLICE_TIME) *
+BLOCK_IO_SLICE_TIME;
+bs->slice_end += extension;
 if (wait) {
 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
 }
@@ -3848,8 +3849,8 @@ static bool bdrv_exceed_iops_limits(BlockDriverState *bs, 
bool is_write,
 wait_time = 0;
 }
 
-bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
-bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
+/* Exceeded current slice, extend it by another slice time */
+bs->slice_end += BLOCK_IO_SLICE_TIME;
 if (wait) {
 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
 }
@@ -3868,12 +3869,10 @@ static bool bdrv_exceed_io_limits(BlockDriverState *bs, 
int nb_sectors,
 now = qemu_get_clock_ns(vm_clock);
 if ((bs->slice_start < now)
 && (bs->slice_end > now)) {
-bs->slice_end = now + bs->slice_time;
+bs->slice_end = now + BLOCK_IO_SLICE_TIME;
 } else {
-bs->slice_time  =  5 * BLOCK_IO_SLICE_TIME;
 bs->slice_start = now;
-bs->slice_end   = now + bs->slice_time;
-
+bs->slice_end   = now + BLOCK_IO_SLICE_TIME;
 memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted));
 }
 
diff --git a/blockdev.c b/blockdev.c
index 8cdc9ce..6dc999d 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1069,7 +1069,6 @@ void qmp_block_set_io_throttle(const char *device, 
int64_t bps, int64_t bps_rd,
 }
 
 bs->io_limits = io_limits;
-bs->slice_time = BLOCK_IO_SLICE_TIME;
 
 if (!bs->io_limits_enabled && bdrv_io_limits_enabled(bs)) {
 bdrv_io_limits_enable(bs);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 83941d8..9aa98b5 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -252,7 +252,6 @@ struct BlockDriverState {
 unsigned int copy_on_read_in_flight;
 
 /* the time for latest disk I/O */
-int64_t slice_time;
 int64_t slice_start;
 int64_t slice_end;
 BlockIOLimit io_limits;
-- 
1.8.1.4




[Qemu-devel] (target-arm) Patch for instruction-level tracing, any interest?

2013-04-05 Thread Japheth Lim
Hello all,

I have written a patch for the ARM TCG that prints the program counter and any 
load/store addresses for every instruction that's run. We use this feature to 
get detailed traces of our ARM programs.

We can tidy up and submit the patch; is there any interest in this feature?

Japheth Lim
Software Systems Research Group@NICTA



[Qemu-devel] [Bug 752476] Re: monitor command mouse_button 1 moves mouse

2013-04-05 Thread Ludwig Nussel
Ah, there was a discussion where a better solution was supposed to be found. 
Maybe a new try is needed with the argumentation that it doesn't make the 
current code worse at all:
http://lists.gnu.org/archive/html/qemu-devel/2011-04/msg00742.html

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/752476

Title:
  monitor command mouse_button 1 moves mouse

Status in QEMU:
  New

Bug description:
  via the qemu -monitor interface, it is possible to move and click the mouse 
using
  mouse_move 2 1
  mouse_button 1
  but the mouse_button command always moves the mouse to (0,0) making it rather 
unusable to (auto-)trigger any widgets in the VM from the outside.

  Would be nice to have this available for my qemu/kvm based os-autoinst
  testing framework.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/752476/+subscriptions



[Qemu-devel] [Bug 752476] Re: monitor command mouse_button 1 moves mouse

2013-04-05 Thread Ludwig Nussel
Bard, are you going to submit the patch for inclusion in qemu?

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/752476

Title:
  monitor command mouse_button 1 moves mouse

Status in QEMU:
  New

Bug description:
  via the qemu -monitor interface, it is possible to move and click the mouse 
using
  mouse_move 2 1
  mouse_button 1
  but the mouse_button command always moves the mouse to (0,0) making it rather 
unusable to (auto-)trigger any widgets in the VM from the outside.

  Would be nice to have this available for my qemu/kvm based os-autoinst
  testing framework.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/752476/+subscriptions



[Qemu-devel] [PATCH 6/7] spice-qemu-char: vmc_write: Don't write more bytes then we're asked too

2013-04-05 Thread Hans de Goede
This one took me eons to debug, but I've finally found it now, oh well.

The usage of the MIN macro in this line:
last_out = MIN(len, qemu_chr_be_can_write(scd->chr));

Causes qemu_chr_be_can_write to be called *twice*, since the MIN macro
evaluates its arguments twice (bad MIN macro, bad!). And the result of
the call can change between the 2 calls since the guest may have consumed
some data from the virtio ringbuffer between the calls!

When this happens it is possible for qemu_chr_be_can_write to return less
then len in the call made for the comparision, and then to return more then
len in the actual call for the return-value of MIN, after which we will end
up writing len data + some extra garbage, not good.

This patch fixes this by only calling qemu_chr_be_can_write once.

Signed-off-by: Hans de Goede 
---
 spice-qemu-char.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/spice-qemu-char.c b/spice-qemu-char.c
index 7e6bd2d..fb4af9a 100644
--- a/spice-qemu-char.c
+++ b/spice-qemu-char.c
@@ -35,7 +35,8 @@ static int vmc_write(SpiceCharDeviceInstance *sin, const 
uint8_t *buf, int len)
 uint8_t* p = (uint8_t*)buf;
 
 while (len > 0) {
-last_out = MIN(len, qemu_chr_be_can_write(scd->chr));
+int can_write = qemu_chr_be_can_write(scd->chr);
+last_out = MIN(len, can_write);
 if (last_out <= 0) {
 break;
 }
-- 
1.8.1.4




[Qemu-devel] [PATCH v2 3/4] block: drop duplicated slice extension code

2013-04-05 Thread Stefan Hajnoczi
The current slice is extended when an I/O request exceeds the limit.
There is no need to extend the slice every time we check a request.

Signed-off-by: Stefan Hajnoczi 
Tested-By: Benoit Canet 
---
 block.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/block.c b/block.c
index 00eca27..aa16fc4 100644
--- a/block.c
+++ b/block.c
@@ -3867,10 +3867,7 @@ static bool bdrv_exceed_io_limits(BlockDriverState *bs, 
int nb_sectors,
 int  bps_ret, iops_ret;
 
 now = qemu_get_clock_ns(vm_clock);
-if ((bs->slice_start < now)
-&& (bs->slice_end > now)) {
-bs->slice_end = now + BLOCK_IO_SLICE_TIME;
-} else {
+if (now > bs->slice_end) {
 bs->slice_start = now;
 bs->slice_end   = now + BLOCK_IO_SLICE_TIME;
 memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted));
-- 
1.8.1.4




Re: [Qemu-devel] [PATCH v2 2/5] register: Add Register API

2013-04-05 Thread Peter Crosthwaite
On Fri, Apr 5, 2013 at 7:26 PM, Peter Maydell  wrote:
> On 5 April 2013 09:43, Peter Crosthwaite  wrote:
>> This API provides some encapsulation of registers and factors our some
>> common functionality to common code. Bits of device state (usually MMIO
>> registers), often have all sorts of access restrictions and semantics
>> associated with them. This API allow you to define what those
>> restrictions are on a bit-by-bit basis.
>> +void register_write(RegisterInfo *reg, uint64_t val, uint64_t we)
>> +{
>> +uint64_t old_val, new_val, test;
>> +const RegisterAccessInfo *ac;
>> +const RegisterAccessError *rae;
>> +
>> +assert(reg);
>> +
>> +ac = reg->access;
>> +if (!ac || !ac->name) {
>> +qemu_log_mask(LOG_GUEST_ERROR, "%s: write to undefined device state 
>> "
>> +  "(written value: %#" PRIx64 ")\n", reg->prefix, val);
>> +return;
>> +}
>> +
>> +uint32_t no_w0_mask = ac->ro | ac->w1c | ac->nw0 | ~we;
>> +uint32_t no_w1_mask = ac->ro | ac->w1c | ac->nw1 | ~we;
>> +
>> +if (reg->debug) {
>> +fprintf(stderr, "%s:%s: write of value %#" PRIx64 "\n", reg->prefix,
>> +ac->name, val);
>> +}
>> +
>> +if (qemu_loglevel_mask(LOG_GUEST_ERROR)) {
>> +for (rae = ac->ge1; rae && rae->mask; rae++) {
>> +test = val & rae->mask;
>> +if (test) {
>> +register_write_log(reg, 1, test, LOG_GUEST_ERROR,
>> +   "invalid", rae->reason);
>> +}
>> +}
>> +for (rae = ac->ge0; rae && rae->mask; rae++) {
>> +test = val & rae->mask;
>> +if (test) {
>> +register_write_log(reg, 0, test, LOG_GUEST_ERROR,
>> +   "invalid", rae->reason);
>> +}
>> +}
>> +}
>> +
>> +if (qemu_loglevel_mask(LOG_UNIMP)) {
>> +for (rae = ac->ui1; rae && rae->mask; rae++) {
>> +test = val & rae->mask;
>> +if (test) {
>> +register_write_log(reg, 1, test, LOG_GUEST_ERROR,
>> +   "unimplmented", rae->reason);
>> +}
>> +}
>> +for (rae = ac->ui0; rae && rae->mask; rae++) {
>> +test = val & rae->mask;
>> +if (test) {
>> +register_write_log(reg, 0, test, LOG_GUEST_ERROR,
>> +   "unimplemented", rae->reason);
>> +}
>> +}
>> +}
>> +
>> +assert(reg->data);
>> +old_val = register_read_val(reg);
>> +
>> +new_val = val & ~(no_w1_mask & val);
>> +new_val |= no_w1_mask & old_val & val;
>> +new_val |= no_w0_mask & old_val & ~val;
>> +new_val &= ~(val & ac->w1c);
>> +
>> +if (ac->pre_write) {
>> +new_val = ac->pre_write(reg, new_val);
>> +}
>> +register_write_val(reg, new_val);
>> +if (ac->post_write) {
>> +ac->post_write(reg, new_val);
>> +}
>> +}
>
> Wow, this feels pretty heavyweight for "write a register"...
>

Its a pritty fast path through if all the optionals turned off. If I
was going to lighten it up, Id get rid of the reg_size and the byte
loop first although that would mandate uint64_t as the data type for
the backing store.

But the intended primary usage of the API is for device control and
status registers, where you are generally not interested in
performance, unless you are doing some form of PIO. In that case, you
can opt out on a per register basis and make yourself a fast path for
the critical registers (r/w fifo heads access regs etc). This is
designed for developer and debugging convenience, where you have a
large number or registers with a heteorgenous mix of accesses and side
effects but at the same time the registers are infrequent access. This
is true of most device registers.

Another solution is some sort of "lite" mode. A single check in the
definition that disables a lot of this and cuts to the chase (the
actual write).

> -- PMM
>



Re: [Qemu-devel] (target-arm) Patch for instruction-level tracing, any interest?

2013-04-05 Thread Peter Maydell
On 5 April 2013 07:59, Japheth Lim  wrote:
> I have written a patch for the ARM TCG that prints the program
> counter and any load/store addresses for every instruction that's
> run. We use this feature to get detailed traces of our ARM programs.
>
> We can tidy up and submit the patch; is there any interest in this
> feature?

I think I would really only be interested if it was a generic
feature that worked for all target-* CPU frontends (and which
wasn't too intrusive, and was enablable at runtime, not compile
time).

Wanting to do this sort of tracing is quite a common thing I think;
the difficulty is doing it as a long-term maintainable generic
feature.

-- PMM



[Qemu-devel] [PATCH] ide: refuse WIN_READ_NATIVE_MAX on empty device

2013-04-05 Thread Stefan Hajnoczi
What is the highest addressable sector on an empty CD-ROM?  Nothing is
addressable so produce an error.

This patch prevents a divide-by-zero in ide_set_sector() since
s->sectors and s->heads would be 0.  Not to mention that a sector=-1
argument would be nonsense.

Note that WIN_READ_NATIVE_MAX can be triggered using hdparm -N 1024
/dev/cdrom.  The LBA bit will be set to 1 though, so the only easy way
to go down the ide_set_sector() CHS code path which divides by zero is
to comment out the s->select & 0x40 case for testing.

Signed-off-by: Stefan Hajnoczi 
---
 hw/ide/core.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/hw/ide/core.c b/hw/ide/core.c
index 3743dc3..77f1379 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -1262,6 +1262,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
 lba48 = 1;
 /* fall through */
 case WIN_READ_NATIVE_MAX:
+/* Refuse if no sectors are addressable (e.g. medium not inserted) */
+if (s->nb_sectors == 0) {
+goto abort_cmd;
+}
ide_cmd_lba48_transform(s, lba48);
 ide_set_sector(s, s->nb_sectors - 1);
 s->status = READY_STAT | SEEK_STAT;
-- 
1.8.1.4




[Qemu-devel] [PATCH v6 0/2] block: Add support for Secure Shell (ssh) block device

2013-04-05 Thread Richard W.M. Jones
The only change here is to use g_get_user_name from glib, rebasing
against latest git, and to re-test everything still works.

Rich.




[Qemu-devel] [PATCH v6 2/2] iotests: Add 'check -ssh' option to test Secure Shell block device.

2013-04-05 Thread Richard W.M. Jones
From: "Richard W.M. Jones" 

Signed-off-by: Richard W.M. Jones 
---
 tests/qemu-iotests/common| 5 +
 tests/qemu-iotests/common.rc | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common
index b3aad89..6826ea7 100644
--- a/tests/qemu-iotests/common
+++ b/tests/qemu-iotests/common
@@ -137,6 +137,7 @@ check options
 -rbdtest rbd
 -sheepdog   test sheepdog
 -nbdtest nbd
+-sshtest ssh
 -xdiff graphical mode diff
 -nocache   use O_DIRECT on backing file
 -misalign  misalign memory allocations
@@ -206,6 +207,10 @@ testlist options
IMGPROTO=nbd
xpand=false
;;
+-ssh)
+IMGPROTO=ssh
+xpand=false
+;;
-nocache)
QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --nocache"
xpand=false
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index e522d61..a536bf7 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -52,6 +52,9 @@ if [ "$IMGPROTO" = "file" ]; then
 elif [ "$IMGPROTO" = "nbd" ]; then
 TEST_IMG_FILE=$TEST_DIR/t.$IMGFMT
 TEST_IMG="nbd:127.0.0.1:10810"
+elif [ "$IMGPROTO" = "ssh" ]; then
+TEST_IMG_FILE=$TEST_DIR/t.$IMGFMT
+TEST_IMG="ssh://127.0.0.1$TEST_IMG_FILE"
 else
 TEST_IMG=$IMGPROTO:$TEST_DIR/t.$IMGFMT
 fi
-- 
1.8.1.4




[Qemu-devel] [PATCH v6 1/2] block: Add support for Secure Shell (ssh) block device.

2013-04-05 Thread Richard W.M. Jones
From: "Richard W.M. Jones" 

  qemu-system-x86_64 -drive file=ssh://hostname/some/image

QEMU will ssh into 'hostname' and open '/some/image' which is made
available as a standard block device.

You can specify a username (ssh://user@host/...) and/or a port number
(ssh://host:port/...).  You can also use an alternate syntax using
properties (file.user, file.host, file.port, file.path).

Current limitations:

- Authentication must be done without passwords or passphrases, using
  ssh-agent.  Other authentication methods are not supported.

- Uses a single connection, instead of concurrent AIO with multiple
  SSH connections.

This is implemented using libssh2 on the client side.  The server just
requires a regular ssh daemon with sftp-server support.  Most ssh
daemons on Unix/Linux systems will work out of the box.

Signed-off-by: Richard W.M. Jones 
Cc: Stefan Hajnoczi 
Cc: Kevin Wolf 
---
 block/Makefile.objs |   1 +
 block/ssh.c | 863 
 configure   |  48 +++
 qemu-doc.texi   |  40 +++
 qemu-options.hx |  12 +
 5 files changed, 964 insertions(+)
 create mode 100644 block/ssh.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index c067f38..6c4b5bc 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -13,6 +13,7 @@ block-obj-$(CONFIG_LIBISCSI) += iscsi.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
+block-obj-$(CONFIG_LIBSSH2) += ssh.o
 endif
 
 common-obj-y += stream.o
diff --git a/block/ssh.c b/block/ssh.c
new file mode 100644
index 000..7c01233
--- /dev/null
+++ b/block/ssh.c
@@ -0,0 +1,863 @@
+/*
+ * Secure Shell (ssh) backend for QEMU.
+ *
+ * Copyright (C) 2013 Red Hat Inc., Richard W.M. Jones 
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include "block/block_int.h"
+#include "qemu/sockets.h"
+#include "qemu/uri.h"
+#include "qapi/qmp/qint.h"
+
+/* DEBUG_SSH=1 enables the DPRINTF (debugging printf) statements in
+ * this block driver code.
+ *
+ * TRACE_LIBSSH2= enables tracing in libssh2 itself.  Note
+ * that this requires that libssh2 was specially compiled with the
+ * `./configure --enable-debug' option, so most likely you will have
+ * to compile it yourself.  The meaning of  is described
+ * here: http://www.libssh2.org/libssh2_trace.html
+ */
+#define DEBUG_SSH 0
+#define TRACE_LIBSSH2 0 /* or try: LIBSSH2_TRACE_SFTP */
+
+#if DEBUG_SSH
+#define DPRINTF(fmt,...)\
+do {\
+fprintf(stderr, "ssh: %-15s " fmt "\n", __func__, ##__VA_ARGS__); \
+} while (0)
+#else
+#define DPRINTF(fmt,...) /* nothing */
+#endif
+
+typedef struct BDRVSSHState {
+/* Coroutine. */
+CoMutex lock;
+
+/* SSH connection. */
+int sock; /* socket */
+LIBSSH2_SESSION *session; /* ssh session */
+LIBSSH2_SFTP *sftp;   /* sftp session */
+LIBSSH2_SFTP_HANDLE *sftp_handle; /* sftp remote file handle */
+
+/* See ssh_seek() function below. */
+int64_t offset;
+bool offset_op_read;
+} BDRVSSHState;
+
+static void ssh_state_init(BDRVSSHState *s)
+{
+memset(s, 0, sizeof *s);
+s->sock = -1;
+s->offset = -1;
+qemu_co_mutex_init(&s->lock);
+}
+
+static void ssh_state_free(BDRVSSHState *s)
+{
+if (s->sftp_handle) {
+libssh2_sftp_close(s->sftp_handle);
+}
+if (s->sftp) {
+libssh2_sftp_shutdown(s->sftp);
+}
+if (s->session) {
+libssh2_session_disconnect(s->session,
+   "from qemu ssh client: "
+   "user closed the connection");
+libssh2_session_free(s->session);
+}
+if (s->sock >= 0) {
+close(s->sock);
+}
+}
+
+/* Wrapp

Re: [Qemu-devel] [PATCH 20/24] xen: re-enable refresh interval reporting for xenfb

2013-04-05 Thread Stefano Stabellini
On Fri, 5 Apr 2013, Gerd Hoffmann wrote:
> >> -#else
> >>; /* nothing */
> >> -#endif
> >>  } else {
> >>/* we don't get update notifications, thus use the
> >> * sledge hammer approach ... */
> > 
> > You might as well remove the if () nothing; case.
> 
> Yep, will do.
> 
> >> @@ -785,6 +755,20 @@ static void xenfb_update(void *opaque)
> >>  xenfb->up_fullscreen = 0;
> >>  }
> >>  
> >> +static void xenfb_update_interval(void *opaque, uint64_t interval)
> >> +{
> >> +struct XenFB *xenfb = opaque;
> >> +
> >> +if (xenfb->feature_update) {
> >> +#ifdef XENFB_TYPE_REFRESH_PERIOD
> >> +if (xenfb_queue_full(xenfb)) {
> >> +return;
> >> +}
> >> +xenfb_send_refresh_period(xenfb, interval);
> > 
> > Shouldn't we be updating xenfb->refresh_period here? And shouldn't we
> > call xenfb_send_refresh_period only if interval != 
> > xenfb->refresh_period?
> 
> > On the other hand if refresh_period is not useful anymore, shouldn't
> > we remove it from struct XenFB?
> 
> xenfb_update_interval is only called when interval changes, which I
> think means we don't need refresh_period any more, correct?
 
that's right



Re: [Qemu-devel] [PATCH v2 3/5] register: Add Memory API glue

2013-04-05 Thread Paolo Bonzini
Il 05/04/2013 10:43, Peter Crosthwaite ha scritto:
> Add memory io handlers that glue the register API to the memory API.
> Just translation functions at this stage. Although it does allow for
> devices to be created without all-in-one mmio r/w handlers.
> 
> Signed-off-by: Peter Crosthwaite 
> ---
> 
>  include/exec/register.h |   13 +

Please put it in include/hw/.

Putting these five in exec/ was a mistake:

#include "exec/address-spaces.h"
#include "exec/hwaddr.h"
#include "exec/ioport.h"
#include "exec/iorange.h"
#include "exec/memory.h"

Most files in hw/ should not need include/exec/.

>  register.c  |   43 +++

If my reorganization goes in before, please make it hw/core/register.c.

The next step in the reorganization would be to move memory.c to hw/core
too.

Paolo



[Qemu-devel] [PATCHv2 1/2] Xen PV backend: Move call to bdrv_new from blk_init to blk_connect

2013-04-05 Thread Alex Bligh
This commit delays the point at which bdrv_new (and hence blk_open
on the underlying device) is called from blk_init to blk_connect.
This ensures that in an inbound live migrate, the block device is
not opened until it has been closed at the other end. This is in
preparation for supporting devices with open/close consistency
without using O_DIRECT. This commit does NOT itself change O_DIRECT
semantics.

Commit f3903bbac78a81fcbce1350cdce860764a62783a (in xen's qemu-upstream
repo but not in qemu's repo) should be reverted prior to applying
this commit.

Signed-off-by: Alex Bligh 
---
 hw/xen_disk.c |   71 -
 1 file changed, 40 insertions(+), 31 deletions(-)

diff --git a/hw/xen_disk.c b/hw/xen_disk.c
index 69e1d9d..dd5a711 100644
--- a/hw/xen_disk.c
+++ b/hw/xen_disk.c
@@ -701,7 +701,7 @@ static void blk_alloc(struct XenDevice *xendev)
 static int blk_init(struct XenDevice *xendev)
 {
 struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
-int index, qflags, info = 0;
+int info = 0;
 
 /* read xenstore entries */
 if (blkdev->params == NULL) {
@@ -744,10 +744,7 @@ static int blk_init(struct XenDevice *xendev)
 }
 
 /* read-only ? */
-qflags = BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NATIVE_AIO;
-if (strcmp(blkdev->mode, "w") == 0) {
-qflags |= BDRV_O_RDWR;
-} else {
+if (strcmp(blkdev->mode, "w")) {
 info  |= VDISK_READONLY;
 }
 
@@ -756,6 +753,41 @@ static int blk_init(struct XenDevice *xendev)
 info  |= VDISK_CDROM;
 }
 
+blkdev->file_blk  = BLOCK_SIZE;
+
+/* fill info
+ * blk_connect supplies sector-size and sectors
+ */
+xenstore_write_be_int(&blkdev->xendev, "feature-flush-cache", 1);
+xenstore_write_be_int(&blkdev->xendev, "feature-persistent", 1);
+xenstore_write_be_int(&blkdev->xendev, "info", info);
+return 0;
+
+out_error:
+g_free(blkdev->params);
+blkdev->params = NULL;
+g_free(blkdev->mode);
+blkdev->mode = NULL;
+g_free(blkdev->type);
+blkdev->type = NULL;
+g_free(blkdev->dev);
+blkdev->dev = NULL;
+g_free(blkdev->devtype);
+blkdev->devtype = NULL;
+return -1;
+}
+
+static int blk_connect(struct XenDevice *xendev)
+{
+struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
+int pers, index, qflags;
+
+/* read-only ? */
+qflags = BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NATIVE_AIO;
+if (strcmp(blkdev->mode, "w") == 0) {
+qflags |= BDRV_O_RDWR;
+}
+
 /* init qemu block driver */
 index = (blkdev->xendev.dev - 202 * 256) / 16;
 blkdev->dinfo = drive_get(IF_XEN, 0, index);
@@ -771,7 +803,7 @@ static int blk_init(struct XenDevice *xendev)
 }
 }
 if (!blkdev->bs) {
-goto out_error;
+return -1;
 }
 } else {
 /* setup via qemu cmdline -> already setup for us */
@@ -793,33 +825,10 @@ static int blk_init(struct XenDevice *xendev)
   blkdev->type, blkdev->fileproto, blkdev->filename,
   blkdev->file_size, blkdev->file_size >> 20);
 
-/* fill info */
-xenstore_write_be_int(&blkdev->xendev, "feature-flush-cache", 1);
-xenstore_write_be_int(&blkdev->xendev, "feature-persistent", 1);
-xenstore_write_be_int(&blkdev->xendev, "info",info);
-xenstore_write_be_int(&blkdev->xendev, "sector-size", 
blkdev->file_blk);
+/* Fill in number of sector size and number of sectors */
+xenstore_write_be_int(&blkdev->xendev, "sector-size", blkdev->file_blk);
 xenstore_write_be_int(&blkdev->xendev, "sectors",
   blkdev->file_size / blkdev->file_blk);
-return 0;
-
-out_error:
-g_free(blkdev->params);
-blkdev->params = NULL;
-g_free(blkdev->mode);
-blkdev->mode = NULL;
-g_free(blkdev->type);
-blkdev->type = NULL;
-g_free(blkdev->dev);
-blkdev->dev = NULL;
-g_free(blkdev->devtype);
-blkdev->devtype = NULL;
-return -1;
-}
-
-static int blk_connect(struct XenDevice *xendev)
-{
-struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
-int pers;
 
 if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) 
== -1) {
 return -1;
-- 
1.7.9.5




[Qemu-devel] [PATCHv2 2/2] Xen PV backend: Disable use of O_DIRECT by default as it results in crashes.

2013-04-05 Thread Alex Bligh
Due to what is almost certainly a kernel bug, writes with O_DIRECT may
continue to reference the page after the write has been marked as
completed, particularly in the case of TCP retransmit. In other
scenarios, this "merely" risks data corruption on the write, but with
Xen pages from domU are only transiently mapped into dom0's memory,
resulting in kernel panics when they are subsequently accessed.

This brings PV devices in line with emulated devices.  Removing
O_DIRECT is safe as barrier operations are now correctly passed
through.

See:
   http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html
for more details.

Signed-off-by: Alex Bligh 
---
 hw/xen_disk.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/xen_disk.c b/hw/xen_disk.c
index dd5a711..195dbb2 100644
--- a/hw/xen_disk.c
+++ b/hw/xen_disk.c
@@ -783,7 +783,7 @@ static int blk_connect(struct XenDevice *xendev)
 int pers, index, qflags;
 
 /* read-only ? */
-qflags = BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NATIVE_AIO;
+qflags = BDRV_O_CACHE_WB | BDRV_O_NATIVE_AIO;
 if (strcmp(blkdev->mode, "w") == 0) {
 qflags |= BDRV_O_RDWR;
 }
-- 
1.7.9.5




[Qemu-devel] [PATCHv2 2/2] Xen PV backend (for qemu-upstream-4.2-testing): Disable use of O_DIRECT by default as it results in crashes.

2013-04-05 Thread Alex Bligh
Due to what is almost certainly a kernel bug, writes with O_DIRECT may
continue to reference the page after the write has been marked as
completed, particularly in the case of TCP retransmit. In other
scenarios, this "merely" risks data corruption on the write, but with
Xen pages from domU are only transiently mapped into dom0's memory,
resulting in kernel panics when they are subsequently accessed.

This brings PV devices in line with emulated devices.  Removing
O_DIRECT is safe as barrier operations are now correctly passed
through.

See:
   http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html
for more details.

Signed-off-by: Alex Bligh 
---
 hw/xen_disk.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/xen_disk.c b/hw/xen_disk.c
index 3cecf8d..50fa25e 100644
--- a/hw/xen_disk.c
+++ b/hw/xen_disk.c
@@ -641,7 +641,7 @@ static int blk_connect(struct XenDevice *xendev)
 int index, qflags;
 
 /* read-only ? */
-qflags = BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NATIVE_AIO;
+qflags = BDRV_O_CACHE_WB | BDRV_O_NATIVE_AIO;
 if (strcmp(blkdev->mode, "w") == 0) {
 qflags |= BDRV_O_RDWR;
 }
-- 
1.7.9.5




[Qemu-devel] [PATCHv2 1/2] Xen PV backend (for qemu-upstream-4.2-testing): Move call to bdrv_new from blk_init to blk_connect

2013-04-05 Thread Alex Bligh
This commit delays the point at which bdrv_new (and hence blk_open
on the underlying device) is called from blk_init to blk_connect.
This ensures that in an inbound live migrate, the block device is
not opened until it has been closed at the other end. This is in
preparation for supporting devices with open/close consistency
without using O_DIRECT. This commit does NOT itself change O_DIRECT
semantics.

Signed-off-by: Alex Bligh 
---
 hw/xen_disk.c |   69 -
 1 file changed, 39 insertions(+), 30 deletions(-)

diff --git a/hw/xen_disk.c b/hw/xen_disk.c
index a402ac8..3cecf8d 100644
--- a/hw/xen_disk.c
+++ b/hw/xen_disk.c
@@ -560,7 +560,7 @@ static void blk_alloc(struct XenDevice *xendev)
 static int blk_init(struct XenDevice *xendev)
 {
 struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
-int index, qflags, info = 0;
+int info = 0;
 
 /* read xenstore entries */
 if (blkdev->params == NULL) {
@@ -603,18 +603,49 @@ static int blk_init(struct XenDevice *xendev)
 }
 
 /* read-only ? */
-qflags = BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NATIVE_AIO;
-if (strcmp(blkdev->mode, "w") == 0) {
-qflags |= BDRV_O_RDWR;
-} else {
+if (strcmp(blkdev->mode, "w")) {
 info  |= VDISK_READONLY;
 }
-
+
 /* cdrom ? */
 if (blkdev->devtype && !strcmp(blkdev->devtype, "cdrom")) {
 info  |= VDISK_CDROM;
 }
+
+blkdev->file_blk  = BLOCK_SIZE;
+
+/* fill info
+ * blk_connect supplies sector-size and sectors
+ */
+xenstore_write_be_int(&blkdev->xendev, "feature-barrier", 1);
+xenstore_write_be_int(&blkdev->xendev, "info",info);
+return 0;
+
+out_error:
+g_free(blkdev->params);
+blkdev->params = NULL;
+g_free(blkdev->mode);
+blkdev->mode = NULL;
+g_free(blkdev->type);
+blkdev->type = NULL;
+g_free(blkdev->dev);
+blkdev->dev = NULL;
+g_free(blkdev->devtype);
+blkdev->devtype = NULL;
+return -1;
+}
+
+static int blk_connect(struct XenDevice *xendev)
+{
+struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
+int index, qflags;
 
+/* read-only ? */
+qflags = BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NATIVE_AIO;
+if (strcmp(blkdev->mode, "w") == 0) {
+qflags |= BDRV_O_RDWR;
+}
+
 /* init qemu block driver */
 index = (blkdev->xendev.dev - 202 * 256) / 16;
 blkdev->dinfo = drive_get(IF_XEN, 0, index);
@@ -630,7 +661,7 @@ static int blk_init(struct XenDevice *xendev)
 }
 }
 if (!blkdev->bs) {
-goto out_error;
+return -1;
 }
 } else {
 /* setup via qemu cmdline -> already setup for us */
@@ -638,7 +669,6 @@ static int blk_init(struct XenDevice *xendev)
 blkdev->bs = blkdev->dinfo->bdrv;
 }
 bdrv_attach_dev_nofail(blkdev->bs, blkdev);
-blkdev->file_blk  = BLOCK_SIZE;
 blkdev->file_size = bdrv_getlength(blkdev->bs);
 if (blkdev->file_size < 0) {
 xen_be_printf(&blkdev->xendev, 1, "bdrv_getlength: %d (%s) | drv %s\n",
@@ -652,31 +682,10 @@ static int blk_init(struct XenDevice *xendev)
   blkdev->type, blkdev->fileproto, blkdev->filename,
   blkdev->file_size, blkdev->file_size >> 20);
 
-/* fill info */
-xenstore_write_be_int(&blkdev->xendev, "feature-barrier", 1);
-xenstore_write_be_int(&blkdev->xendev, "info",info);
+/* Fill in number of sector size and number of sectors */
 xenstore_write_be_int(&blkdev->xendev, "sector-size", 
blkdev->file_blk);
 xenstore_write_be_int(&blkdev->xendev, "sectors",
   blkdev->file_size / blkdev->file_blk);
-return 0;
-
-out_error:
-g_free(blkdev->params);
-blkdev->params = NULL;
-g_free(blkdev->mode);
-blkdev->mode = NULL;
-g_free(blkdev->type);
-blkdev->type = NULL;
-g_free(blkdev->dev);
-blkdev->dev = NULL;
-g_free(blkdev->devtype);
-blkdev->devtype = NULL;
-return -1;
-}
-
-static int blk_connect(struct XenDevice *xendev)
-{
-struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
 
 if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) 
== -1) {
 return -1;
-- 
1.7.9.5




Re: [Qemu-devel] [PATCH] [RFC] Xen PV backend: Move call to bdrv_new from blk_init to blk_connect

2013-04-05 Thread Alex Bligh

Stefano,

--On 2 April 2013 12:08:25 +0100 Stefano Stabellini 
 wrote:



I'll redo and move setting sector-size and sectors into blk_connect, and
then add a second commit which removes BDRV_O_NOCACHE. Oh, and test it
...


Good :)


I've just sent patches for qemu-upstream-unstable and
qemu-upstream-4.2-testing to do this. These are pretty lightly
tested but should be fine as it's just moving lines around really.

--
Alex Bligh



Re: [Qemu-devel] [PATCH] ide: refuse WIN_READ_NATIVE_MAX on empty device

2013-04-05 Thread Markus Armbruster
Stefan Hajnoczi  writes:

> What is the highest addressable sector on an empty CD-ROM?  Nothing is
> addressable so produce an error.
>
> This patch prevents a divide-by-zero in ide_set_sector() since
> s->sectors and s->heads would be 0.  Not to mention that a sector=-1
> argument would be nonsense.
>
> Note that WIN_READ_NATIVE_MAX can be triggered using hdparm -N 1024
> /dev/cdrom.  The LBA bit will be set to 1 though, so the only easy way
> to go down the ide_set_sector() CHS code path which divides by zero is
> to comment out the s->select & 0x40 case for testing.

Suggests you did that.

Have you tried the reproducer with a physical drive?  Does it fail the
command when empty, too?



[Qemu-devel] [PATCH 2/2] qcow2: Fix L1 write error handling in qcow2_update_snapshot_refcount

2013-04-05 Thread Kevin Wolf
It ignored the error code, and at least the 'goto fail' is obvious
nonsense as it creates an endless loop (if the next attempt doesn't
magically succeed) and leaves the in-memory L1 table in big-endian
instead of converting it back.

In error cases, there's no point in writing an updated L1 table, so
skip this part for them.

Signed-off-by: Kevin Wolf 
---
 block/qcow2-refcount.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 4799681..b32738f 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -851,14 +851,16 @@ fail:
 }
 
 /* Update L1 only if it isn't deleted anyway (addend = -1) */
-if (addend >= 0 && l1_modified) {
-for(i = 0; i < l1_size; i++)
+if (ret == 0 && addend >= 0 && l1_modified) {
+for (i = 0; i < l1_size; i++) {
 cpu_to_be64s(&l1_table[i]);
-if (bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table,
-l1_size2) < 0)
-goto fail;
-for(i = 0; i < l1_size; i++)
+}
+
+ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, l1_size2);
+
+for (i = 0; i < l1_size; i++) {
 be64_to_cpus(&l1_table[i]);
+}
 }
 if (l1_allocated)
 g_free(l1_table);
-- 
1.8.1.4




[Qemu-devel] [PATCH 1/2] qcow2: Return real error in qcow2_update_snapshot_refcount

2013-04-05 Thread Kevin Wolf
This fixes the error message triggered by the following script:

cat > /tmp/blkdebug.cfg <
---
 block/qcow2-refcount.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index c38e970..4799681 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -747,10 +747,9 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
 if (l1_table_offset != s->l1_table_offset) {
 l1_table = g_malloc0(align_offset(l1_size2, 512));
 l1_allocated = 1;
-if (bdrv_pread(bs->file, l1_table_offset,
-   l1_table, l1_size2) != l1_size2)
-{
-ret = -EIO;
+
+ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
+if (ret < 0) {
 goto fail;
 }
 
@@ -802,7 +801,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
 }
 
 if (refcount < 0) {
-ret = -EIO;
+ret = refcount;
 goto fail;
 }
 }
@@ -833,7 +832,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
 refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
 }
 if (refcount < 0) {
-ret = -EIO;
+ret = refcount;
 goto fail;
 } else if (refcount == 1) {
 l2_offset |= QCOW_OFLAG_COPIED;
-- 
1.8.1.4




[Qemu-devel] [Bug 752476] Re: monitor command mouse_button 1 moves mouse

2013-04-05 Thread Andreas Färber
Apart from the patch missing a Signed-off-by, I have doubts whether
reusing the coordinates from the last monitor command is generally
correct. It should rather be the current coordinates, whether set by
monitor or via VNC/SDL, I guess.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/752476

Title:
  monitor command mouse_button 1 moves mouse

Status in QEMU:
  New

Bug description:
  via the qemu -monitor interface, it is possible to move and click the mouse 
using
  mouse_move 2 1
  mouse_button 1
  but the mouse_button command always moves the mouse to (0,0) making it rather 
unusable to (auto-)trigger any widgets in the VM from the outside.

  Would be nice to have this available for my qemu/kvm based os-autoinst
  testing framework.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/752476/+subscriptions



Re: [Qemu-devel] [PATCH v17 5/6] pvpanic: create pvpanic device by default

2013-04-05 Thread Paolo Bonzini
Il 05/04/2013 08:36, Hu Tao ha scritto:
> Also parse command line options for ioport and set
> it accordingly.

This does not do the correct thing for versioned machine types like "-M
pc-1.4".  Also, adding stuff to vl.c is really something you cannot do.
 Please don't be afraid to ask!

See the attached patch, untested.  Feel free to take it with my S-o-b line.

Patches 1-4 and 6 are okay.

Paolo

> Signed-off-by: Hu Tao 
> ---
>  hw/i386/pc_piix.c |  2 ++
>  hw/i386/pc_q35.c  |  1 +
>  hw/pc.h   |  4 
>  hw/pvpanic.c  | 42 ++
>  vl.c  |  2 ++
>  5 files changed, 51 insertions(+)
> 
> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> index 0abc9f1..897254a 100644
> --- a/hw/i386/pc_piix.c
> +++ b/hw/i386/pc_piix.c
> @@ -217,6 +217,8 @@ static void pc_init1(MemoryRegion *system_memory,
>  if (pci_enabled) {
>  pc_pci_device_init(pci_bus);
>  }
> +
> +pvpanic_init(isa_bus);
>  }
>  
>  static void pc_init_pci(QEMUMachineInitArgs *args)
> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> index 4f5f347..fd9ab01 100644
> --- a/hw/i386/pc_q35.c
> +++ b/hw/i386/pc_q35.c
> @@ -204,6 +204,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
>  pc_vga_init(isa_bus, host_bus);
>  audio_init(isa_bus, host_bus);
>  pc_nic_init(isa_bus, host_bus);
> +pvpanic_init(isa_bus);
>  if (pci_enabled) {
>  pc_pci_device_init(host_bus);
>  }
> diff --git a/hw/pc.h b/hw/pc.h
> index 8e1dd4c..1311037 100644
> --- a/hw/pc.h
> +++ b/hw/pc.h
> @@ -178,6 +178,10 @@ static inline bool isa_ne2000_init(ISABus *bus, int 
> base, int irq, NICInfo *nd)
>  /* pc_sysfw.c */
>  void pc_system_firmware_init(MemoryRegion *rom_memory);
>  
> +/* pvpanic.c */
> +int pvpanic_init(ISABus *bus);
> +int pvpanic_ioport(QemuOpts *opts, void *opaque);
> +
>  /* e820 types */
>  #define E820_RAM1
>  #define E820_RESERVED   2
> diff --git a/hw/pvpanic.c b/hw/pvpanic.c
> index b3d10e2..2e237a4 100644
> --- a/hw/pvpanic.c
> +++ b/hw/pvpanic.c
> @@ -19,6 +19,8 @@
>  #include 
>  
>  #include "hw/fw_cfg.h"
> +#include "hw/pc.h"
> +#include "qapi/string-input-visitor.h"
>  
>  /* The bit of supported pv event */
>  #define PVPANIC_F_PANICKED  0
> @@ -57,6 +59,36 @@ typedef struct PVPanicState {
>  uint16_t ioport;
>  } PVPanicState;
>  
> +static uint16_t ioport;
> +
> +static int set_ioport(const char *name, const char *value, void *opaque)
> +{
> +StringInputVisitor *mi;
> +
> +if (strcmp(name, "ioport") == 0) {
> +mi = string_input_visitor_new(value);
> +visit_type_uint16(string_input_get_visitor(mi), &ioport, "ioport",
> +  NULL);
> +string_input_visitor_cleanup(mi);
> +}
> +
> +return 0;
> +}
> +
> +int pvpanic_ioport(QemuOpts *opts, void *opaque)
> +{
> +const char *driver;
> +
> +driver = qemu_opt_get(opts, "driver");
> +if (!driver || strcmp(driver, "pvpanic")) {
> +return 0;
> +}
> +
> +qemu_opt_foreach(opts, set_ioport, NULL, 0);
> +
> +return 0;
> +}
> +
>  /* return supported events on read */
>  static uint64_t pvpanic_ioport_read(void *opaque, hwaddr addr, unsigned size)
>  {
> @@ -84,6 +116,10 @@ static int pvpanic_isa_initfn(ISADevice *dev)
>  static bool port_configured;
>  void *fw_cfg;
>  
> +if (ioport) {
> +s->ioport = ioport;
> +}
> +
>  memory_region_init_io(&s->io, &pvpanic_ops, s, "pvpanic", 1);
>  isa_register_ioport(dev, &s->io, s->ioport);
>  
> @@ -115,6 +151,12 @@ static void pvpanic_isa_class_init(ObjectClass *klass, 
> void *data)
>  dc->props = pvpanic_isa_properties;
>  }
>  
> +int pvpanic_init(ISABus *bus)
> +{
> +isa_create_simple(bus, TYPE_ISA_PVPANIC_DEVICE);
> +return 0;
> +}
> +
>  static TypeInfo pvpanic_isa_info = {
>  .name  = TYPE_ISA_PVPANIC_DEVICE,
>  .parent= TYPE_ISA_DEVICE,
> diff --git a/vl.c b/vl.c
> index c91fd4e..1a3ffa2 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -4302,6 +4302,8 @@ int main(int argc, char **argv, char **envp)
>  exit (i == 1 ? 1 : 0);
>  }
>  
> +qemu_opts_foreach(qemu_find_opts("device"), pvpanic_ioport, NULL, 0);
> +
>  if (machine->compat_props) {
>  qdev_prop_register_global_list(machine->compat_props);
>  }
> 




[Qemu-devel] [PATCH 0/2] qcow2: Internal snapshot error handling fixes

2013-04-05 Thread Kevin Wolf
Kevin Wolf (2):
  qcow2: Return real error in qcow2_update_snapshot_refcount
  qcow2: Fix L1 write error handling in qcow2_update_snapshot_refcount

 block/qcow2-refcount.c | 25 +
 1 file changed, 13 insertions(+), 12 deletions(-)

-- 
1.8.1.4




Re: [Qemu-devel] [PATCH 00/11] build ACPI MADT for fw_cfg clients

2013-04-05 Thread Laszlo Ersek
On 04/05/13 01:22, Kevin O'Connor wrote:
> On Thu, Apr 04, 2013 at 09:52:31AM +0200, Laszlo Ersek wrote:
>> On 04/03/13 22:05, Anthony Liguori wrote:
>>> Laszlo Ersek  writes:
 Any chance patches 01 to 09 could be considered? Esp. 06 which removes
 an out-of-bounds access (an innocent-looking one, admittedly).

 I'm OK too if the series is dropped (patch 11 was the main motivation,
 but the interface that it extends was deemed unsuitable going forward on
 the seabios list). I'd just like to hear the maintainer with
 jurisdiction say the NAK. ("Too expensive even to review for too little
 gain" is a good reason.)
>>>
>>> The whole thing looks pretty nice to me.
>>
>> That's awesome, thank you very much!
>>
>>>  I'll merge the full series in
>>> a day or so unless anyone objects.
>>
>> For transparency's sake: Kevin, this is where you'd object to patch 11:
>> it adds an MADT to the existing fw_cfg blob, which, combined with an
>> older (=current) SeaBIOS, leads to a duplicated MADT; see also the blurb
>> in 00/11 which quotes that from
> 
> Right.  I don't think we should commit patch 11 as that would cause
> the current QEMU/SeaBIOS to incorrectly create two MADT tables.  We
> should instead create the new MADT in a separate fw_cfg entry.
> 
> The other patches in the series look sane to me.

Anthony committed 01-10/11, I'm going to rework & post 11/11 as a
separate patch. Many thanks.

Laszlo




Re: [Qemu-devel] [PATCH v17] pvpanic: pvpanic device driver

2013-04-05 Thread Paolo Bonzini
Il 05/04/2013 09:10, Hu Tao ha scritto:
> pvpanic device is a qemu simulated device through which guest panic
> event is sent to host.
> 
> Signed-off-by: Hu Tao 

Reviewed-by: Paolo Bonzini 

Matthew, it would be nice to include this in 3.10.

The implementation of the device in QEMU is final, the only thing that
needs a respin is how it is enabled by default.

Paolo

> ---
> 
> ref: http://lists.nongnu.org/archive/html/qemu-devel/2013-04/msg01028.html
> 
>  drivers/platform/x86/Kconfig   |   7 +++
>  drivers/platform/x86/Makefile  |   2 +
>  drivers/platform/x86/pvpanic.c | 115 
> +
>  3 files changed, 124 insertions(+)
>  create mode 100644 drivers/platform/x86/pvpanic.c
> 
> diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
> index 3338437..527ed04 100644
> --- a/drivers/platform/x86/Kconfig
> +++ b/drivers/platform/x86/Kconfig
> @@ -781,4 +781,11 @@ config APPLE_GMUX
> graphics as well as the backlight. Currently only backlight
> control is supported by the driver.
>  
> +config PVPANIC
> + tristate "pvpanic device support"
> + depends on ACPI
> + ---help---
> +   This driver provides support for pvpanic device, which is a qemu
> +   simulated device through which guest panic event is sent to host.
> +
>  endif # X86_PLATFORM_DEVICES
> diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
> index ace2b38..ef0ec74 100644
> --- a/drivers/platform/x86/Makefile
> +++ b/drivers/platform/x86/Makefile
> @@ -51,3 +51,5 @@ obj-$(CONFIG_INTEL_OAKTRAIL)+= intel_oaktrail.o
>  obj-$(CONFIG_SAMSUNG_Q10)+= samsung-q10.o
>  obj-$(CONFIG_APPLE_GMUX) += apple-gmux.o
>  obj-$(CONFIG_CHROMEOS_LAPTOP)+= chromeos_laptop.o
> +
> +obj-$(CONFIG_PVPANIC)   += pvpanic.o
> diff --git a/drivers/platform/x86/pvpanic.c b/drivers/platform/x86/pvpanic.c
> new file mode 100644
> index 000..81c95ec
> --- /dev/null
> +++ b/drivers/platform/x86/pvpanic.c
> @@ -0,0 +1,115 @@
> +/*
> + *  pvpanic.c - pvpanic Device Support
> + *
> + *  Copyright (C) 2013 Fujitsu.
> + *
> + *  This program is free software; you can redistribute it and/or modify
> + *  it under the terms of the GNU General Public License as published by
> + *  the Free Software Foundation; either version 2 of the License, or
> + *  (at your option) any later version.
> + *
> + *  This program is distributed in the hope that it will be useful,
> + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + *  GNU General Public License for more details.
> + *
> + *  You should have received a copy of the GNU General Public License
> + *  along with this program; if not, write to the Free Software
> + *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  
> 02110-1301  USA
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +MODULE_AUTHOR("Hu Tao ");
> +MODULE_DESCRIPTION("pvpanic device driver");
> +MODULE_LICENSE("GPL");
> +
> +static int pvpanic_add(struct acpi_device *device);
> +static int pvpanic_remove(struct acpi_device *device);
> +
> +static const struct acpi_device_id pvpanic_device_ids[] = {
> + { "QEMU0001", 0},
> + { "", 0},
> +};
> +MODULE_DEVICE_TABLE(acpi, pvpanic_device_ids);
> +
> +#define PVPANIC_PANICKED (1 << 0)
> +
> +static acpi_handle handle;
> +
> +static struct acpi_driver pvpanic_driver = {
> + .name = "pvpanic",
> + .class ="QEMU",
> + .ids =  pvpanic_device_ids,
> + .ops =  {
> + .add =  pvpanic_add,
> + .remove =   pvpanic_remove,
> + },
> + .owner =THIS_MODULE,
> +};
> +
> +static void
> +pvpanic_send_event(unsigned int event)
> +{
> + union acpi_object arg;
> + struct acpi_object_list arg_list;
> +
> + if (!handle)
> + return;
> +
> + arg.type = ACPI_TYPE_INTEGER;
> + arg.integer.value = event;
> +
> + arg_list.count = 1;
> + arg_list.pointer = &arg;
> +
> + acpi_evaluate_object(handle, "WRPT", &arg_list, NULL);
> +}
> +
> +static int
> +pvpanic_panic_notify(struct notifier_block *nb, unsigned long code,
> +  void *unused)
> +{
> + pvpanic_send_event(PVPANIC_PANICKED);
> + return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block pvpanic_panic_nb = {
> + .notifier_call = pvpanic_panic_notify,
> +};
> +
> +static int pvpanic_add(struct acpi_device *device)
> +{
> + acpi_status status;
> + u64 ret;
> +
> + status = acpi_evaluate_integer(device->handle, "_STA", NULL,
> +&ret);
> +
> + if (ACPI_FAILURE(status) || (ret & 0x0B) != 0x0B)
> + return -ENODEV;
> +
> + handle = device->handle;
> + atomic_notifier_chain_

Re: [Qemu-devel] [PATCH][RFC v2 3/7] vl: create power chip device

2013-04-05 Thread Paolo Bonzini
Il 05/04/2013 06:28, liguang ha scritto:
> Signed-off-by: liguang 
> ---
>  vl.c |4 
>  1 files changed, 4 insertions(+), 0 deletions(-)
> 
> diff --git a/vl.c b/vl.c
> index aeed7f4..a14549e 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -171,6 +171,8 @@ int main(int argc, char **argv)
>  #include "ui/qemu-spice.h"
>  #include "qapi/string-input-visitor.h"
>  
> +#include "hw/power.h"
> +
>  //#define DEBUG_NET
>  //#define DEBUG_SLIRP
>  
> @@ -4295,6 +4297,8 @@ int main(int argc, char **argv, char **envp)
>  
>  qdev_machine_init();
>  
> +qdev_init_nofail(qdev_create(NULL, TYPE_POWER_CHIP));
> +

You cannot just add a random device to the machine.

Perhaps what you want to do is define a QOM interface that some device
in the machine will implement.

But right now this all seems very nebulous.  Honestly, I read the
patches and I have no idea _why_ you are doing this.

Paolo

>  QEMUMachineInitArgs args = { .ram_size = ram_size,
>   .boot_device = (boot_devices[0] == '\0') ?
>  machine->boot_order :
> 




[Qemu-devel] [Bug 752476] Re: monitor command mouse_button 1 moves mouse

2013-04-05 Thread Paolo Bonzini
I think the right fix would be to remember the last position in
ui/input.c, and use a new function kbd_mouse_button_event.  mouse_move,
followed by moving the mouse in the UI, followed by mouse_button 5
minutes later should not remember the position of the first mouse_move.

Also, mouse_move should call dpy_mouse_set.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/752476

Title:
  monitor command mouse_button 1 moves mouse

Status in QEMU:
  New

Bug description:
  via the qemu -monitor interface, it is possible to move and click the mouse 
using
  mouse_move 2 1
  mouse_button 1
  but the mouse_button command always moves the mouse to (0,0) making it rather 
unusable to (auto-)trigger any widgets in the VM from the outside.

  Would be nice to have this available for my qemu/kvm based os-autoinst
  testing framework.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/752476/+subscriptions



Re: [Qemu-devel] [PATCH] ide: refuse WIN_READ_NATIVE_MAX on empty device

2013-04-05 Thread Stefan Hajnoczi
On Fri, Apr 05, 2013 at 12:56:07PM +0200, Markus Armbruster wrote:
> Stefan Hajnoczi  writes:
> 
> > What is the highest addressable sector on an empty CD-ROM?  Nothing is
> > addressable so produce an error.
> >
> > This patch prevents a divide-by-zero in ide_set_sector() since
> > s->sectors and s->heads would be 0.  Not to mention that a sector=-1
> > argument would be nonsense.
> >
> > Note that WIN_READ_NATIVE_MAX can be triggered using hdparm -N 1024
> > /dev/cdrom.  The LBA bit will be set to 1 though, so the only easy way
> > to go down the ide_set_sector() CHS code path which divides by zero is
> > to comment out the s->select & 0x40 case for testing.
> 
> Suggests you did that.
> 
> Have you tried the reproducer with a physical drive?  Does it fail the
> command when empty, too?

Believe it or not, I don't have access to an ATAPI CD-ROM drive.  Would
you be able to try out hdparm -N 1024 /dev/cdrom?

Note that READ NATIVE MAX is optional, real drives may not implement it
since it seems geared towards the Host Protected Area feature which
makes no sense on CD-ROMs.  (The idea is a reserved area on the disk
where system data can be stored and the OS will not touch it.)

Stefan



Re: [Qemu-devel] [PATCH v2 0/4] block: fix I/O throttling oscillations

2013-04-05 Thread Kevin Wolf
Am 05.04.2013 um 11:32 hat Stefan Hajnoczi geschrieben:
> Benoît Canet  reported that QEMU I/O throttling can
> oscillate under continuous I/O.  The test case runs 50 threads performing
> random writes and a -drive iops=150 limit is used.
> 
> Since QEMU I/O throttling is implemented using 100 millisecond time slices,
> we'd expect 150 +/- 15 IOPS.  Anything outside that range indicates a problem
> with the I/O throttling algorithm.
> 
> It turned out that even a single thread performing sequential I/O continuously
> is throttled outside the 150 +/- 15 IOPS range.  The continous stream of I/O
> slows down as time goes on but resets to 150 IOPS again when interrupted.  
> This
> can be tested with:
> 
>   $ iostat -d 1 -x /dev/vdb &
>   $ dd if=/dev/vdb of=/dev/null bs=4096 iflag=direct
> 
> This patches addresses these problems as follows:
> 
> 1. Account for I/O requests when they are submitted instead of completed.  
> This
>ensures that we do not exceed the budget for this slice.  Exceeding the
>budget leads to fluctuations since we have to make up for this later.
> 
> 2. Use constant 100 millisecond slice time.  Adjusting the slice time at
>run-time led to oscillations.  Since the reason for adjusting slice time is
>not clear, drop this behavior.
> 
> I have also included two code clean-up patches.
> 
> Tested-By: Benoit Canet 
> 
> v2:
>  * Account slice_submitted after both bps and iops checks pass [kwolf]

Thanks, applied all to the block branch.

Kevin



Re: [Qemu-devel] [PATCH v4 5/7] aes: move aes.h from include/block to include/qemu

2013-04-05 Thread Stefan Hajnoczi
On Sun, Mar 31, 2013 at 01:02:24PM +0200, Aurelien Jarno wrote:
> Move aes.h from include/block to include/qemu to show it can be reused
> by other subsystems.
> 
> Cc: Kevin Wolf 
> Cc: Stefan Hajnoczi 
> Reviewed-by: Edgar E. Iglesias 
> Reviewed-by: Richard Henderson 
> Signed-off-by: Aurelien Jarno 
> ---
>  block/qcow.c|2 +-
>  block/qcow2.c   |2 +-
>  block/qcow2.h   |2 +-
>  include/block/aes.h |   26 --
>  include/qemu/aes.h  |   26 ++
>  util/aes.c  |2 +-
>  6 files changed, 30 insertions(+), 30 deletions(-)
>  delete mode 100644 include/block/aes.h
>  create mode 100644 include/qemu/aes.h

Reviewed-by: Stefan Hajnoczi 



Re: [Qemu-devel] [PATCH] qemu-char: eliminate busy waiting on can_read returning zero

2013-04-05 Thread Anthony Liguori
Paolo Bonzini  writes:

> The character backend refactoring introduced an undesirable busy wait.
> The busy wait happens if can_read returns zero and there is data available
> on the character device's file descriptor.  Then, the I/O watch will
> fire continuously and, with TCG, the CPU thread will never run.
>
> 1) Char backend asks front end if it can write
> 2) Front end says no
> 3) poll() finds the char backend's descriptor is available
> 4) Goto (1)
>
> What we really want is this (note that step 3 avoids the busy wait):
>
> 1) Char backend asks front end if it can write
> 2) Front end says no
> 3) poll() goes on without char backend's descriptor
> 4) Goto (1) until qemu_chr_accept_input() called
>
> 5) Char backend asks front end if it can write
> 6) Front end says yes
> 7) poll() finds the char backend's descriptor is available
> 8) Backend handler called
>
> After this patch, the IOWatchPoll source and the watch source are
> separated.  The IOWatchPoll is simply a hook that runs during the prepare
> phase on each main loop iteration.  The hook adds/removes the actual
> source depending on the return value from can_read.
>
> A simple reproducer is
>
> qemu-system-i386 -serial mon:stdio
>
> ... followed by banging on the terminal as much as you can. :)  Without
> this patch, emulation will hang.
>
> Signed-off-by: Paolo Bonzini 
> ---
> This supersedes Peter's patch.

I was thinking of this too last night.  I think this is okay in its own
right but I still think Peter's patch is necessary.

A busy I/O thread should not starve the VCPU thread.

>
>  qemu-char.c  | 64 
> +---
>  1 file changed, 21 insertions(+), 43 deletions(-)
>
> diff --git a/qemu-char.c b/qemu-char.c
> index e5eb8dd..d4239b5 100644
> --- a/qemu-char.c
> +++ b/qemu-char.c
> @@ -594,65 +594,52 @@ int recv_all(int fd, void *_buf, int len1, bool 
> single_read)
>  
>  typedef struct IOWatchPoll
>  {
> +GSource parent;
> +
>  GSource *src;
> -int max_size;
> +bool active;
>  
>  IOCanReadHandler *fd_can_read;
>  void *opaque;
> -
> -QTAILQ_ENTRY(IOWatchPoll) node;
>  } IOWatchPoll;
>  
> -static QTAILQ_HEAD(, IOWatchPoll) io_watch_poll_list =
> -QTAILQ_HEAD_INITIALIZER(io_watch_poll_list);
> -
>  static IOWatchPoll *io_watch_poll_from_source(GSource *source)
>  {
> -IOWatchPoll *i;
> -
> -QTAILQ_FOREACH(i, &io_watch_poll_list, node) {
> -if (i->src == source) {
> -return i;
> -}
> -}
> -
> -return NULL;
> +return container_of(source, IOWatchPoll, parent);
>  }
>  
>  static gboolean io_watch_poll_prepare(GSource *source, gint *timeout_)
>  {
>  IOWatchPoll *iwp = io_watch_poll_from_source(source);
> -
> -iwp->max_size = iwp->fd_can_read(iwp->opaque);
> -if (iwp->max_size == 0) {
> +bool active = iwp->fd_can_read(iwp->opaque) > 0;
> +if (iwp->active == active) {
>  return FALSE;
>  }
>  
> -return g_io_watch_funcs.prepare(source, timeout_);
> +iwp->active = active;
> +if (active) {
> +g_source_attach(iwp->src, NULL);
> +} else {
> +g_source_remove(g_source_get_id(iwp->src));
> +}
> +return FALSE;
>  }
>  
>  static gboolean io_watch_poll_check(GSource *source)
>  {
> -IOWatchPoll *iwp = io_watch_poll_from_source(source);
> -
> -if (iwp->max_size == 0) {
> -return FALSE;
> -}
> -
> -return g_io_watch_funcs.check(source);
> +return FALSE;
>  }
>  
>  static gboolean io_watch_poll_dispatch(GSource *source, GSourceFunc callback,
> gpointer user_data)
>  {
> -return g_io_watch_funcs.dispatch(source, callback, user_data);
> +abort();

Why is this abort() okay?

Regards,

Anthony Liguori

>  }
>  
>  static void io_watch_poll_finalize(GSource *source)
>  {
>  IOWatchPoll *iwp = io_watch_poll_from_source(source);
> -QTAILQ_REMOVE(&io_watch_poll_list, iwp, node);
> -g_io_watch_funcs.finalize(source);
> +g_source_unref(iwp->src);
>  }
>  
>  static GSourceFuncs io_watch_poll_funcs = {
> @@ -669,24 +657,15 @@ static guint io_add_watch_poll(GIOChannel *channel,
> gpointer user_data)
>  {
>  IOWatchPoll *iwp;
> -GSource *src;
> -guint tag;
> -
> -src = g_io_create_watch(channel, G_IO_IN | G_IO_ERR | G_IO_HUP);
> -g_source_set_funcs(src, &io_watch_poll_funcs);
> -g_source_set_callback(src, (GSourceFunc)fd_read, user_data, NULL);
> -tag = g_source_attach(src, NULL);
> -g_source_unref(src);
>  
> -iwp = g_malloc0(sizeof(*iwp));
> -iwp->src = src;
> -iwp->max_size = 0;
> +iwp = (IOWatchPoll *) g_source_new(&io_watch_poll_funcs, 
> sizeof(IOWatchPoll));
> +iwp->active = FALSE;
>  iwp->fd_can_read = fd_can_read;
>  iwp->opaque = user_data;
> +iwp->src = g_io_create_watch(channel, G_IO_IN | G_IO_ERR | G_IO

Re: [Qemu-devel] [Qemu-trivial] [PATCH] linux-user: Don't omit comma for strace of rt_sigaction()

2013-04-05 Thread Stefan Hajnoczi
On Thu, Mar 28, 2013 at 02:33:24PM +, Peter Maydell wrote:
> Pass the 'last' parameter of print_signal() through to
> print_raw_param(); this fixes a problem where we weren't printing
> the comma separator for strace of rt_sigaction() when the signal
> was an unnamed (ie realtime) one:
>   6856 rt_sigaction(230xf6fff870,0xf6fff8fc) = 0
> 
> Signed-off-by: Peter Maydell 
> ---
>  linux-user/strace.c |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Thanks, applied to the trivial patches tree:
https://github.com/stefanha/qemu/commits/trivial-patches

Stefan



Re: [Qemu-devel] [PATCH] Allow clock_gettime() monotonic clock to be utilized on more OS's

2013-04-05 Thread Anthony Liguori
Applied.  Thanks.

Regards,

Anthony Liguori




Re: [Qemu-devel] [PATCH] tpm: Fix several compiler warnings (redefined data types)

2013-04-05 Thread Anthony Liguori
Applied.  Thanks.

Regards,

Anthony Liguori




Re: [Qemu-devel] [PATCH] configure: remove unset variables

2013-04-05 Thread Anthony Liguori
Applied.  Thanks.

Regards,

Anthony Liguori




Re: [Qemu-devel] [PATCH 00/11] build ACPI MADT for fw_cfg clients

2013-04-05 Thread Anthony Liguori
Applied.  Thanks.

Regards,

Anthony Liguori




Re: [Qemu-devel] [PATCH] help: add docs for missing 'queues' option of tap

2013-04-05 Thread Anthony Liguori
Applied.  Thanks.

Regards,

Anthony Liguori




Re: [Qemu-devel] [PATCH 0/3] chardev-frontends: Explicitly check, inc and dec avail_connections v2

2013-04-05 Thread Anthony Liguori
Applied.  Thanks.

Regards,

Anthony Liguori




Re: [Qemu-devel] [v2 PATCH] acpi: initialize s4_val used in s4 shutdown

2013-04-05 Thread Anthony Liguori
Applied.  Thanks.

Regards,

Anthony Liguori




Re: [Qemu-devel] [PATCH] target-i386: Check for host features before filter_features_for_kvm()

2013-04-05 Thread Anthony Liguori
Applied.  Thanks.

Regards,

Anthony Liguori




Re: [Qemu-devel] [PATCH] target-s390: Fix SRNMT

2013-04-05 Thread Stefan Hajnoczi
On Sat, Mar 30, 2013 at 10:03:25AM -0700, Richard Henderson wrote:
> Fallthough into abort = oops.
> 
> Cc: qemu-triv...@nongnu.org
> Cc: Alexander Graf 
> Signed-off-by: Richard Henderson 
> ---
>  target-s390x/translate.c | 1 +
>  1 file changed, 1 insertion(+)

Thanks, applied to the trivial patches tree:
https://github.com/stefanha/qemu/commits/trivial-patches

Stefan



Re: [Qemu-devel] [PATCH] qemu-char: eliminate busy waiting on can_read returning zero

2013-04-05 Thread Anthony Liguori
Paolo Bonzini  writes:

> The character backend refactoring introduced an undesirable busy wait.
> The busy wait happens if can_read returns zero and there is data available
> on the character device's file descriptor.  Then, the I/O watch will
> fire continuously and, with TCG, the CPU thread will never run.
>
> 1) Char backend asks front end if it can write
> 2) Front end says no
> 3) poll() finds the char backend's descriptor is available
> 4) Goto (1)
>
> What we really want is this (note that step 3 avoids the busy wait):
>
> 1) Char backend asks front end if it can write
> 2) Front end says no
> 3) poll() goes on without char backend's descriptor
> 4) Goto (1) until qemu_chr_accept_input() called
>
> 5) Char backend asks front end if it can write
> 6) Front end says yes
> 7) poll() finds the char backend's descriptor is available
> 8) Backend handler called
>
> After this patch, the IOWatchPoll source and the watch source are
> separated.  The IOWatchPoll is simply a hook that runs during the prepare
> phase on each main loop iteration.  The hook adds/removes the actual
> source depending on the return value from can_read.
>
> A simple reproducer is
>
> qemu-system-i386 -serial mon:stdio
>
> ... followed by banging on the terminal as much as you can. :)  Without
> this patch, emulation will hang.
>
> Signed-off-by: Paolo Bonzini 

I guess this works with migration because we assume that after migration
the main loop will do a complete run?  Is this a safe assumption or does
there need to be a qemu_notify_event() somewhere after migration to make
sure this doesn't cause a hang?

Regards,

Anthony Liguori

> ---
> This supersedes Peter's patch.
>
>  qemu-char.c  | 64 
> +---
>  1 file changed, 21 insertions(+), 43 deletions(-)
>
> diff --git a/qemu-char.c b/qemu-char.c
> index e5eb8dd..d4239b5 100644
> --- a/qemu-char.c
> +++ b/qemu-char.c
> @@ -594,65 +594,52 @@ int recv_all(int fd, void *_buf, int len1, bool 
> single_read)
>  
>  typedef struct IOWatchPoll
>  {
> +GSource parent;
> +
>  GSource *src;
> -int max_size;
> +bool active;
>  
>  IOCanReadHandler *fd_can_read;
>  void *opaque;
> -
> -QTAILQ_ENTRY(IOWatchPoll) node;
>  } IOWatchPoll;
>  
> -static QTAILQ_HEAD(, IOWatchPoll) io_watch_poll_list =
> -QTAILQ_HEAD_INITIALIZER(io_watch_poll_list);
> -
>  static IOWatchPoll *io_watch_poll_from_source(GSource *source)
>  {
> -IOWatchPoll *i;
> -
> -QTAILQ_FOREACH(i, &io_watch_poll_list, node) {
> -if (i->src == source) {
> -return i;
> -}
> -}
> -
> -return NULL;
> +return container_of(source, IOWatchPoll, parent);
>  }
>  
>  static gboolean io_watch_poll_prepare(GSource *source, gint *timeout_)
>  {
>  IOWatchPoll *iwp = io_watch_poll_from_source(source);
> -
> -iwp->max_size = iwp->fd_can_read(iwp->opaque);
> -if (iwp->max_size == 0) {
> +bool active = iwp->fd_can_read(iwp->opaque) > 0;
> +if (iwp->active == active) {
>  return FALSE;
>  }
>  
> -return g_io_watch_funcs.prepare(source, timeout_);
> +iwp->active = active;
> +if (active) {
> +g_source_attach(iwp->src, NULL);
> +} else {
> +g_source_remove(g_source_get_id(iwp->src));
> +}
> +return FALSE;
>  }
>  
>  static gboolean io_watch_poll_check(GSource *source)
>  {
> -IOWatchPoll *iwp = io_watch_poll_from_source(source);
> -
> -if (iwp->max_size == 0) {
> -return FALSE;
> -}
> -
> -return g_io_watch_funcs.check(source);
> +return FALSE;
>  }
>  
>  static gboolean io_watch_poll_dispatch(GSource *source, GSourceFunc callback,
> gpointer user_data)
>  {
> -return g_io_watch_funcs.dispatch(source, callback, user_data);
> +abort();
>  }
>  
>  static void io_watch_poll_finalize(GSource *source)
>  {
>  IOWatchPoll *iwp = io_watch_poll_from_source(source);
> -QTAILQ_REMOVE(&io_watch_poll_list, iwp, node);
> -g_io_watch_funcs.finalize(source);
> +g_source_unref(iwp->src);
>  }
>  
>  static GSourceFuncs io_watch_poll_funcs = {
> @@ -669,24 +657,15 @@ static guint io_add_watch_poll(GIOChannel *channel,
> gpointer user_data)
>  {
>  IOWatchPoll *iwp;
> -GSource *src;
> -guint tag;
> -
> -src = g_io_create_watch(channel, G_IO_IN | G_IO_ERR | G_IO_HUP);
> -g_source_set_funcs(src, &io_watch_poll_funcs);
> -g_source_set_callback(src, (GSourceFunc)fd_read, user_data, NULL);
> -tag = g_source_attach(src, NULL);
> -g_source_unref(src);
>  
> -iwp = g_malloc0(sizeof(*iwp));
> -iwp->src = src;
> -iwp->max_size = 0;
> +iwp = (IOWatchPoll *) g_source_new(&io_watch_poll_funcs, 
> sizeof(IOWatchPoll));
> +iwp->active = FALSE;
>  iwp->fd_can_read = fd_can_read;
>  iwp->opaque = user_data;
> +iwp->src = g_io_cr

Re: [Qemu-devel] [PATCH] ide: refuse WIN_READ_NATIVE_MAX on empty device

2013-04-05 Thread Markus Armbruster
Stefan Hajnoczi  writes:

> On Fri, Apr 05, 2013 at 12:56:07PM +0200, Markus Armbruster wrote:
>> Stefan Hajnoczi  writes:
>> 
>> > What is the highest addressable sector on an empty CD-ROM?  Nothing is
>> > addressable so produce an error.
>> >
>> > This patch prevents a divide-by-zero in ide_set_sector() since
>> > s->sectors and s->heads would be 0.  Not to mention that a sector=-1
>> > argument would be nonsense.
>> >
>> > Note that WIN_READ_NATIVE_MAX can be triggered using hdparm -N 1024
>> > /dev/cdrom.  The LBA bit will be set to 1 though, so the only easy way
>> > to go down the ide_set_sector() CHS code path which divides by zero is
>> > to comment out the s->select & 0x40 case for testing.
>> 
>> Suggests you did that.
>> 
>> Have you tried the reproducer with a physical drive?  Does it fail the
>> command when empty, too?
>
> Believe it or not, I don't have access to an ATAPI CD-ROM drive.  Would
> you be able to try out hdparm -N 1024 /dev/cdrom?
>
> Note that READ NATIVE MAX is optional, real drives may not implement it
> since it seems geared towards the Host Protected Area feature which
> makes no sense on CD-ROMs.  (The idea is a reserved area on the disk
> where system data can be stored and the OS will not touch it.)
>
> Stefan

# hdparm -N /dev/cdrom

/dev/cdrom:
 READ_NATIVE_MAX_ADDRESS failed: Input/output error
# hdparm -N 1024 /dev/cdrom

/dev/cdrom:
 setting max visible sectors to 1024 (temporary)
 READ_NATIVE_MAX_ADDRESS failed: Input/output error
 READ_NATIVE_MAX_ADDRESS failed: Input/output error

Same with and without media.

If the command makes no sense for CD-ROMs, and generally isn't
implemented by them, we should consider not implementing either, by
clearing its IDE_CD bit in ide_cmd_table.



Re: [Qemu-devel] [PATCH v2] hw/i386/pc: reject to boot a wrong header magic kernel

2013-04-05 Thread Stefan Hajnoczi
On Mon, Apr 01, 2013 at 09:21:57AM +0800, liguang wrote:
> if head magic is missing or wrong unexpectedly, we'd
> better to reject booting.
> e.g.
> I make a mistake to boot a vmlinuz for MIPS(which
> I think it's for x86) like this:
> qemu-system-x86_64 -kernel vmlinuz -initrd demord
> then qemu report:
> "qemu: linux kernel too old to load a ram disk"
> that's misleading.
> 
> Signed-off-by: liguang 
> ---
>  hw/i386/pc.c |3 +++
>  1 files changed, 3 insertions(+), 0 deletions(-)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index b1e06fa..bfbb5fe 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -683,6 +683,9 @@ static void load_linux(void *fw_cfg,
>  if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename,
> kernel_cmdline, kernel_size, header)) {
>  return;
> +} else {
> +fprintf(stderr, "not a valid multiboot image!\n");
> +exit(1);
>  }
>  protocol = 0;

The point of protocol = 0 is to support old Linux kernels.  If we exit
when probing load_multiboot() fails then QEMU will no longer load old
Linux kernels!

I suggest just:
fprintf(stderr, "warning: not a valid multiboot or modern kernel image\n");

Stefan



Re: [Qemu-devel] [PATCH] qemu-char: eliminate busy waiting on can_read returning zero

2013-04-05 Thread Paolo Bonzini
Il 05/04/2013 14:54, Anthony Liguori ha scritto:
> I guess this works with migration because we assume that after migration
> the main loop will do a complete run?

Yes, migration will terminate in an fd handler, and the next round of
the main loop will re-evaluate chr_read.

> Is this a safe assumption or does
> there need to be a qemu_notify_event() somewhere after migration to make
> sure this doesn't cause a hang?

There could be a qemu_chr_accept_input() for all character devices after
migration.  I think that would be a separate patch.

Regarding the need or not for Peter's patch: the patch might be needed
this kind of busy-wait fix was required often.  As far as I recall, this
is the first we ever had, and it came after an almost-complete rewrite.
 It seems rare enough, that it's much better to fix the root causes when
they appear---not the symptoms.

Paolo



Re: [Qemu-devel] [Qemu-trivial] [PATCH v1 0/2] Remove un-needed use of ssi_create_slave_no_init

2013-04-05 Thread Stefan Hajnoczi
On Thu, Apr 04, 2013 at 11:03:14AM +1000, Peter Crosthwaite wrote:
> Trivial code cleanup of the PetaLogix and Zynq machine models.
> 
> 
> Peter Crosthwaite (2):
>   petalogix_ml605_mmu: Cleanup ssi_create_slave()
>   xilinx_zynq: Cleanup ssi_create_slave
> 
>  hw/arm/xilinx_zynq.c|3 +--
>  hw/microblaze/petalogix_ml605_mmu.c |3 +--
>  2 files changed, 2 insertions(+), 4 deletions(-)
> 
> 

Thanks, applied to the trivial patches tree:
https://github.com/stefanha/qemu/commits/trivial-patches

Stefan



Re: [Qemu-devel] [Qemu-trivial] [PATCH] target-i386: fix order of checks in cpu_get_phys_page_debug

2013-04-05 Thread Stefan Hajnoczi
On Thu, Apr 04, 2013 at 07:13:03PM -0400, Brendan Dolan-Gavitt wrote:
> In target-i386 cpu_get_phys_page_debug, the CR4_PAE bit is checked
> before CR0_PG. This means that if paging is disabled but the PAE bit has
> been set in CR4, cpu_get_phys_page_debug will return the wrong result
> (it will try to translate the address as virtual rather than using it as
> a physical address). This patch fixes that by moving the CR0_PG check to
> the beginning of the function.
> 
> This shows up when booting the Linux kernel on amd64 with "-d in_asm".
> The kernel turns on the PAE bit in CR4 before turning on paging, and so
> QEMU's disassembler will fail because it will try to walk the page
> tables to fetch code even though paging is disabled. The symptom is
> incorrect disassembly and some "Disassembler disagrees with translator
> over instruction decoding" messages.
> 
> This was also reported as bug #1163065.
> 
> Signed-off-by: Brendan Dolan-Gavitt 
> ---
>  target-i386/helper.c |  121 
> ++
>  1 file changed, 64 insertions(+), 57 deletions(-)

Sorry, not trivial :).

Stefan



[Qemu-devel] [PATCH v7 2/2] iotests: Add 'check -ssh' option to test Secure Shell block device.

2013-04-05 Thread Richard W.M. Jones
From: "Richard W.M. Jones" 

Note in order to run these tests on ssh, you must be running a local
ssh daemon, and that daemon must accept loopback connections, and
ssh-agent has to be set up to allow logins on the local daemon.  In
other words, the following command should just work without demanding
any passphrase:

 ssh localhost

Signed-off-by: Richard W.M. Jones 
---
 tests/qemu-iotests/common| 5 +
 tests/qemu-iotests/common.rc | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common
index b3aad89..6826ea7 100644
--- a/tests/qemu-iotests/common
+++ b/tests/qemu-iotests/common
@@ -137,6 +137,7 @@ check options
 -rbdtest rbd
 -sheepdog   test sheepdog
 -nbdtest nbd
+-sshtest ssh
 -xdiff graphical mode diff
 -nocache   use O_DIRECT on backing file
 -misalign  misalign memory allocations
@@ -206,6 +207,10 @@ testlist options
IMGPROTO=nbd
xpand=false
;;
+-ssh)
+IMGPROTO=ssh
+xpand=false
+;;
-nocache)
QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --nocache"
xpand=false
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index e522d61..a536bf7 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -52,6 +52,9 @@ if [ "$IMGPROTO" = "file" ]; then
 elif [ "$IMGPROTO" = "nbd" ]; then
 TEST_IMG_FILE=$TEST_DIR/t.$IMGFMT
 TEST_IMG="nbd:127.0.0.1:10810"
+elif [ "$IMGPROTO" = "ssh" ]; then
+TEST_IMG_FILE=$TEST_DIR/t.$IMGFMT
+TEST_IMG="ssh://127.0.0.1$TEST_IMG_FILE"
 else
 TEST_IMG=$IMGPROTO:$TEST_DIR/t.$IMGFMT
 fi
-- 
1.8.1.4




Re: [Qemu-devel] [PATCH v6 0/2] block: Add support for Secure Shell (ssh) block device

2013-04-05 Thread Richard W.M. Jones
On Fri, Apr 05, 2013 at 10:55:57AM +0100, Richard W.M. Jones wrote:
> The only change here is to use g_get_user_name from glib, rebasing
> against latest git, and to re-test everything still works.

Ignore v6.  I just posted v7 on this list.

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
virt-p2v converts physical machines to virtual machines.  Boot with a
live CD or over the network (PXE) and turn machines into KVM guests.
http://libguestfs.org/virt-v2v



[Qemu-devel] [PATCH v7 0/2] block: Add support for Secure Shell (ssh) block device.

2013-04-05 Thread Richard W.M. Jones
Since v6:

- This fixes a rather serious race condition in the previous versions,
  which only manifested itself when using 'snapshot=on'.

- Store the attributes (like filesize) in the state.

- Macros for LOCK/UNLOCK to make debugging simpler.

- Added an ssh driver to libguestfs and tested with that:
  https://www.redhat.com/archives/libguestfs/2013-April/msg00022.html

Rich.




[Qemu-devel] [PATCH v7 1/2] block: Add support for Secure Shell (ssh) block device.

2013-04-05 Thread Richard W.M. Jones
From: "Richard W.M. Jones" 

  qemu-system-x86_64 -drive file=ssh://hostname/some/image

QEMU will ssh into 'hostname' and open '/some/image' which is made
available as a standard block device.

You can specify a username (ssh://user@host/...) and/or a port number
(ssh://host:port/...).  You can also use an alternate syntax using
properties (file.user, file.host, file.port, file.path).

Current limitations:

- Authentication must be done without passwords or passphrases, using
  ssh-agent.  Other authentication methods are not supported.

- Uses a single connection, instead of concurrent AIO with multiple
  SSH connections.

This is implemented using libssh2 on the client side.  The server just
requires a regular ssh daemon with sftp-server support.  Most ssh
daemons on Unix/Linux systems will work out of the box.

Signed-off-by: Richard W.M. Jones 
Cc: Stefan Hajnoczi 
Cc: Kevin Wolf 
---
 block/Makefile.objs |   1 +
 block/ssh.c | 880 
 configure   |  48 +++
 qemu-doc.texi   |  40 +++
 qemu-options.hx |  12 +
 5 files changed, 981 insertions(+)
 create mode 100644 block/ssh.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index c067f38..6c4b5bc 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -13,6 +13,7 @@ block-obj-$(CONFIG_LIBISCSI) += iscsi.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
+block-obj-$(CONFIG_LIBSSH2) += ssh.o
 endif
 
 common-obj-y += stream.o
diff --git a/block/ssh.c b/block/ssh.c
new file mode 100644
index 000..9ed2451
--- /dev/null
+++ b/block/ssh.c
@@ -0,0 +1,880 @@
+/*
+ * Secure Shell (ssh) backend for QEMU.
+ *
+ * Copyright (C) 2013 Red Hat Inc., Richard W.M. Jones 
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include "block/block_int.h"
+#include "qemu/sockets.h"
+#include "qemu/uri.h"
+#include "qapi/qmp/qint.h"
+
+/* DEBUG_SSH=1 enables the DPRINTF (debugging printf) statements in
+ * this block driver code.
+ *
+ * TRACE_LIBSSH2= enables tracing in libssh2 itself.  Note
+ * that this requires that libssh2 was specially compiled with the
+ * `./configure --enable-debug' option, so most likely you will have
+ * to compile it yourself.  The meaning of  is described
+ * here: http://www.libssh2.org/libssh2_trace.html
+ */
+#define DEBUG_SSH 0
+#define TRACE_LIBSSH2 0 /* or try: LIBSSH2_TRACE_SFTP */
+
+#if DEBUG_SSH
+#define DPRINTF(fmt,...)\
+do {\
+fprintf(stderr, "ssh: %-15s " fmt "\n", __func__, ##__VA_ARGS__); \
+} while (0)
+#else
+#define DPRINTF(fmt,...) /* nothing */
+#endif
+
+typedef struct BDRVSSHState {
+/* Coroutine. */
+CoMutex lock;
+
+/* SSH connection. */
+int sock; /* socket */
+LIBSSH2_SESSION *session; /* ssh session */
+LIBSSH2_SFTP *sftp;   /* sftp session */
+LIBSSH2_SFTP_HANDLE *sftp_handle; /* sftp remote file handle */
+
+/* See ssh_seek() function below. */
+int64_t offset;
+bool offset_op_read;
+
+/* File attributes at open.  We try to keep the .filesize field
+ * updated if it changes (eg by writing at the end of the file).
+ */
+LIBSSH2_SFTP_ATTRIBUTES attrs;
+} BDRVSSHState;
+
+#define LOCK(s) do {\
+DPRINTF("acquiring the lock");  \
+qemu_co_mutex_lock(&s->lock);   \
+DPRINTF("acquired the lock");   \
+} while (0)
+
+#define UNLOCK(s) do {\
+qemu_co_mutex_unlock(&s->lock);   \
+DPRINTF("released the lock"); \
+} while (0)
+
+static void ssh_state_init(BDRVSSHState *s)
+{
+memset(s, 0, sizeof *s);
+s->

[Qemu-devel] [PULL 0/4] QMP queue

2013-04-05 Thread Luiz Capitulino
The changes (since d05ef160453e98546a4197496dc8a3cb2defac53) are available
in the following repository:

git://repo.or.cz/qemu/qmp-unstable.git queue/qmp

Luiz Capitulino (4):
  qstring: add qstring_get_length()
  Monitor: Make output buffer dynamic
  hmp: human-monitor-command: stop using the Memory chardev driver
  chardev: drop the Memory chardev driver

 include/qapi/qmp/qstring.h |  1 +
 monitor.c  | 60 +--
 qemu-char.c| 64 --
 qobject/qstring.c  |  8 ++
 4 files changed, 44 insertions(+), 89 deletions(-)

-- 
1.8.1.4



[Qemu-devel] [PULL 1/4] qstring: add qstring_get_length()

2013-04-05 Thread Luiz Capitulino
Long overdue.

Signed-off-by: Luiz Capitulino 
Reviewed-by: Eric Blake 
Acked-by: Gerd Hoffmann 
---
 include/qapi/qmp/qstring.h | 1 +
 qobject/qstring.c  | 8 
 2 files changed, 9 insertions(+)

diff --git a/include/qapi/qmp/qstring.h b/include/qapi/qmp/qstring.h
index 0e690f4..1bc3666 100644
--- a/include/qapi/qmp/qstring.h
+++ b/include/qapi/qmp/qstring.h
@@ -26,6 +26,7 @@ typedef struct QString {
 QString *qstring_new(void);
 QString *qstring_from_str(const char *str);
 QString *qstring_from_substr(const char *str, int start, int end);
+size_t qstring_get_length(const QString *qstring);
 const char *qstring_get_str(const QString *qstring);
 void qstring_append_int(QString *qstring, int64_t value);
 void qstring_append(QString *qstring, const char *str);
diff --git a/qobject/qstring.c b/qobject/qstring.c
index 5f7376c..607b7a1 100644
--- a/qobject/qstring.c
+++ b/qobject/qstring.c
@@ -32,6 +32,14 @@ QString *qstring_new(void)
 }
 
 /**
+ * qstring_get_length(): Get the length of a QString
+ */
+size_t qstring_get_length(const QString *qstring)
+{
+return qstring->length;
+}
+
+/**
  * qstring_from_substr(): Create a new QString from a C string substring
  *
  * Return string reference
-- 
1.8.1.4




[Qemu-devel] [PULL 2/4] Monitor: Make output buffer dynamic

2013-04-05 Thread Luiz Capitulino
Commit f628926bb423fa8a7e0b114511400ea9df38b76a changed monitor_flush()
to retry on qemu_chr_fe_write() errors. However, the Monitor's output
buffer can keep growing while the retry is not issued and this can
cause the buffer to overflow.

To reproduce this issue, just start qemu and type on the Monitor:

(qemu) ?

This will cause an assertion to trig.

To fix this problem this commit makes the Monitor buffer dynamic,
which means that it can grow as much as needed.

Signed-off-by: Luiz Capitulino 
Reviewed-by: Eric Blake 
Acked-by: Gerd Hoffmann 
---
 monitor.c | 45 -
 1 file changed, 28 insertions(+), 17 deletions(-)

diff --git a/monitor.c b/monitor.c
index 4ec1db9..8712c53 100644
--- a/monitor.c
+++ b/monitor.c
@@ -188,8 +188,7 @@ struct Monitor {
 int reset_seen;
 int flags;
 int suspend_cnt;
-uint8_t outbuf[1024];
-int outbuf_index;
+QString *outbuf;
 ReadLineState *rs;
 MonitorControl *mc;
 CPUArchState *mon_cpu;
@@ -271,45 +270,52 @@ static gboolean monitor_unblocked(GIOChannel *chan, 
GIOCondition cond,
 void monitor_flush(Monitor *mon)
 {
 int rc;
+size_t len;
+const char *buf;
+
+buf = qstring_get_str(mon->outbuf);
+len = qstring_get_length(mon->outbuf);
 
-if (mon && mon->outbuf_index != 0 && !mon->mux_out) {
-rc = qemu_chr_fe_write(mon->chr, mon->outbuf, mon->outbuf_index);
-if (rc == mon->outbuf_index) {
+if (mon && len && !mon->mux_out) {
+rc = qemu_chr_fe_write(mon->chr, (const uint8_t *) buf, len);
+if (rc == len) {
 /* all flushed */
-mon->outbuf_index = 0;
+QDECREF(mon->outbuf);
+mon->outbuf = qstring_new();
 return;
 }
 if (rc > 0) {
 /* partinal write */
-memmove(mon->outbuf, mon->outbuf + rc, mon->outbuf_index - rc);
-mon->outbuf_index -= rc;
+QString *tmp = qstring_from_str(buf + rc);
+QDECREF(mon->outbuf);
+mon->outbuf = tmp;
 }
 qemu_chr_fe_add_watch(mon->chr, G_IO_OUT, monitor_unblocked, mon);
 }
 }
 
-/* flush at every end of line or if the buffer is full */
+/* flush at every end of line */
 static void monitor_puts(Monitor *mon, const char *str)
 {
 char c;
 
 for(;;) {
-assert(mon->outbuf_index < sizeof(mon->outbuf) - 1);
 c = *str++;
 if (c == '\0')
 break;
-if (c == '\n')
-mon->outbuf[mon->outbuf_index++] = '\r';
-mon->outbuf[mon->outbuf_index++] = c;
-if (mon->outbuf_index >= (sizeof(mon->outbuf) - 1)
-|| c == '\n')
+if (c == '\n') {
+qstring_append_chr(mon->outbuf, '\r');
+}
+qstring_append_chr(mon->outbuf, c);
+if (c == '\n') {
 monitor_flush(mon);
+}
 }
 }
 
 void monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
 {
-char buf[4096];
+char *buf;
 
 if (!mon)
 return;
@@ -318,8 +324,9 @@ void monitor_vprintf(Monitor *mon, const char *fmt, va_list 
ap)
 return;
 }
 
-vsnprintf(buf, sizeof(buf), fmt, ap);
+buf = g_strdup_vprintf(fmt, ap);
 monitor_puts(mon, buf);
+g_free(buf);
 }
 
 void monitor_printf(Monitor *mon, const char *fmt, ...)
@@ -671,6 +678,8 @@ char *qmp_human_monitor_command(const char *command_line, 
bool has_cpu_index,
 CharDriverState mchar;
 
 memset(&hmp, 0, sizeof(hmp));
+hmp.outbuf = qstring_new();
+
 qemu_chr_init_mem(&mchar);
 hmp.chr = &mchar;
 
@@ -699,6 +708,7 @@ char *qmp_human_monitor_command(const char *command_line, 
bool has_cpu_index,
 }
 
 out:
+QDECREF(hmp.outbuf);
 qemu_chr_close_mem(hmp.chr);
 return output;
 }
@@ -4749,6 +4759,7 @@ void monitor_init(CharDriverState *chr, int flags)
 }
 
 mon = g_malloc0(sizeof(*mon));
+mon->outbuf = qstring_new();
 
 mon->chr = chr;
 mon->flags = flags;
-- 
1.8.1.4




[Qemu-devel] [PULL 4/4] chardev: drop the Memory chardev driver

2013-04-05 Thread Luiz Capitulino
It's not used anymore since the last commit.

Signed-off-by: Luiz Capitulino 
Reviewed-by: Eric Blake 
Acked-by: Gerd Hoffmann 
---
 qemu-char.c | 64 -
 1 file changed, 64 deletions(-)

diff --git a/qemu-char.c b/qemu-char.c
index e5eb8dd..0f7f32d 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -2796,70 +2796,6 @@ static CharDriverState *qemu_chr_open_socket(QemuOpts 
*opts)
 return NULL;
 }
 
-/***/
-/* Memory chardev */
-typedef struct {
-size_t outbuf_size;
-size_t outbuf_capacity;
-uint8_t *outbuf;
-} MemoryDriver;
-
-static int mem_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
-{
-MemoryDriver *d = chr->opaque;
-
-/* TODO: the QString implementation has the same code, we should
- * introduce a generic way to do this in cutils.c */
-if (d->outbuf_capacity < d->outbuf_size + len) {
-/* grow outbuf */
-d->outbuf_capacity += len;
-d->outbuf_capacity *= 2;
-d->outbuf = g_realloc(d->outbuf, d->outbuf_capacity);
-}
-
-memcpy(d->outbuf + d->outbuf_size, buf, len);
-d->outbuf_size += len;
-
-return len;
-}
-
-void qemu_chr_init_mem(CharDriverState *chr)
-{
-MemoryDriver *d;
-
-d = g_malloc(sizeof(*d));
-d->outbuf_size = 0;
-d->outbuf_capacity = 4096;
-d->outbuf = g_malloc0(d->outbuf_capacity);
-
-memset(chr, 0, sizeof(*chr));
-chr->opaque = d;
-chr->chr_write = mem_chr_write;
-}
-
-QString *qemu_chr_mem_to_qs(CharDriverState *chr)
-{
-MemoryDriver *d = chr->opaque;
-return qstring_from_substr((char *) d->outbuf, 0, d->outbuf_size - 1);
-}
-
-/* NOTE: this driver can not be closed with qemu_chr_delete()! */
-void qemu_chr_close_mem(CharDriverState *chr)
-{
-MemoryDriver *d = chr->opaque;
-
-g_free(d->outbuf);
-g_free(chr->opaque);
-chr->opaque = NULL;
-chr->chr_write = NULL;
-}
-
-size_t qemu_chr_mem_osize(const CharDriverState *chr)
-{
-const MemoryDriver *d = chr->opaque;
-return d->outbuf_size;
-}
-
 /*/
 /* Ring buffer chardev */
 
-- 
1.8.1.4




[Qemu-devel] [PULL 3/4] hmp: human-monitor-command: stop using the Memory chardev driver

2013-04-05 Thread Luiz Capitulino
The Memory chardev driver was added because, as the Monitor's output
buffer was static, we needed a way to accumulate the output of an
HMP commmand when ran by human-monitor-command.

However, the Monitor's output buffer is now dynamic, so it's possible
for the human-monitor-command to use it instead of the Memory chardev
driver.

This commit does that change, but there are two important
observations about it:

 1. We need a way to signal to the Monitor that it shouldn't call
chardev functions when flushing its output. This is done
by adding a new flag to the Monitor object called skip_flush
(which is set to true by qmp_human_monitor_command())

 2. The current code has buffered semantics: QMP clients will
only see a command's output if it flushes its output with
a new-line character. This commit changes this to unbuffered,
which means that QMP clients will see a command's output
whenever the command prints anything.

I don't think this will matter in practice though, as I believe
all HMP commands print the new-line character anyway.

Signed-off-by: Luiz Capitulino 
Reviewed-by: Eric Blake 
Acked-by: Gerd Hoffmann 
---
 monitor.c | 17 -
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/monitor.c b/monitor.c
index 8712c53..b4bda77 100644
--- a/monitor.c
+++ b/monitor.c
@@ -188,6 +188,7 @@ struct Monitor {
 int reset_seen;
 int flags;
 int suspend_cnt;
+bool skip_flush;
 QString *outbuf;
 ReadLineState *rs;
 MonitorControl *mc;
@@ -273,6 +274,10 @@ void monitor_flush(Monitor *mon)
 size_t len;
 const char *buf;
 
+if (mon->skip_flush) {
+return;
+}
+
 buf = qstring_get_str(mon->outbuf);
 len = qstring_get_length(mon->outbuf);
 
@@ -675,13 +680,10 @@ char *qmp_human_monitor_command(const char *command_line, 
bool has_cpu_index,
 {
 char *output = NULL;
 Monitor *old_mon, hmp;
-CharDriverState mchar;
 
 memset(&hmp, 0, sizeof(hmp));
 hmp.outbuf = qstring_new();
-
-qemu_chr_init_mem(&mchar);
-hmp.chr = &mchar;
+hmp.skip_flush = true;
 
 old_mon = cur_mon;
 cur_mon = &hmp;
@@ -699,17 +701,14 @@ char *qmp_human_monitor_command(const char *command_line, 
bool has_cpu_index,
 handle_user_command(&hmp, command_line);
 cur_mon = old_mon;
 
-if (qemu_chr_mem_osize(hmp.chr) > 0) {
-QString *str = qemu_chr_mem_to_qs(hmp.chr);
-output = g_strdup(qstring_get_str(str));
-QDECREF(str);
+if (qstring_get_length(hmp.outbuf) > 0) {
+output = g_strdup(qstring_get_str(hmp.outbuf));
 } else {
 output = g_strdup("");
 }
 
 out:
 QDECREF(hmp.outbuf);
-qemu_chr_close_mem(hmp.chr);
 return output;
 }
 
-- 
1.8.1.4




Re: [Qemu-devel] [PATCH] [RFC] Wire up disabled wait a panicked event on s390

2013-04-05 Thread Luiz Capitulino
On Fri, 05 Apr 2013 10:42:38 +0200
Christian Borntraeger  wrote:

> On s390 the disabled wait state indicates a state of attention.
> For example Linux uses that state after a panic. Lets
> put the system into panicked state.
> 
> An alternative implementation would be to state
> disabled-wait  instead of pause in the action field.
> (e.g. z/OS, z/VM and other classic OSes use the address of the
> disabled wait to indicate an error code).
> 
> Signed-off-by: Christian Borntraeger 
> ---
>  target-s390x/kvm.c | 17 ++---
>  1 file changed, 14 insertions(+), 3 deletions(-)
> 
> diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
> index 644f484..0c111f0 100644
> --- a/target-s390x/kvm.c
> +++ b/target-s390x/kvm.c
> @@ -34,6 +34,8 @@
>  #include "sysemu/kvm.h"
>  #include "cpu.h"
>  #include "sysemu/device_tree.h"
> +#include "qapi/qmp/qjson.h"
> +#include "monitor/monitor.h"
>  
>  /* #define DEBUG_KVM */
>  
> @@ -705,9 +707,18 @@ static int handle_intercept(S390CPU *cpu)
>  r = handle_instruction(cpu, run);
>  break;
>  case ICPT_WAITPSW:
> -if (s390_del_running_cpu(cpu) == 0 &&
> -is_special_wait_psw(cs)) {
> -qemu_system_shutdown_request();
> +/* disabled wait, since enabled wait is handled in kernel */
> +if (s390_del_running_cpu(cpu) == 0) {
> +if (is_special_wait_psw(cs)) {
> +qemu_system_shutdown_request();
> +} else {
> +QObject *data;
> +
> +data = qobject_from_jsonf("{ 'action': %s }", "pause");
> +monitor_protocol_event(QEVENT_GUEST_PANICKED, data);
> +qobject_decref(data);
> +vm_stop(RUN_STATE_GUEST_PANICKED);

This is on top of the pvpanic device, right?

Seems good to me, although I don't remember the exact semantics of the
new event. Also, I think you could move this code to a function and
share it with the pvpanic device.

> +}
>  }
>  r = EXCP_HALTED;
>  break;
> 




[Qemu-devel] [PATCH 5/8] block: drop duplicated slice extension code

2013-04-05 Thread Kevin Wolf
From: Stefan Hajnoczi 

The current slice is extended when an I/O request exceeds the limit.
There is no need to extend the slice every time we check a request.

Signed-off-by: Stefan Hajnoczi 
Tested-By: Benoit Canet 
Signed-off-by: Kevin Wolf 
---
 block.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/block.c b/block.c
index 00eca27..aa16fc4 100644
--- a/block.c
+++ b/block.c
@@ -3867,10 +3867,7 @@ static bool bdrv_exceed_io_limits(BlockDriverState *bs, 
int nb_sectors,
 int  bps_ret, iops_ret;
 
 now = qemu_get_clock_ns(vm_clock);
-if ((bs->slice_start < now)
-&& (bs->slice_end > now)) {
-bs->slice_end = now + BLOCK_IO_SLICE_TIME;
-} else {
+if (now > bs->slice_end) {
 bs->slice_start = now;
 bs->slice_end   = now + BLOCK_IO_SLICE_TIME;
 memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted));
-- 
1.8.1.4




[Qemu-devel] [PATCH 2/8] usb-storage: Forward serial number to scsi-disk

2013-04-05 Thread Kevin Wolf
usb-storage takes care to fetch the USB serial number from -drive
options, but it neglected to pass its own 'serial' property to the
scsi-disk it creates. With this patch, the 'serial' qdev property and
the 'serial' option in -drive behave the same and correctly apply the
serial number on both USB and SCSI level.

Signed-off-by: Kevin Wolf 
Acked-by: Gerd Hoffmann 
---
 hw/pci/pci-hotplug.c | 2 +-
 hw/scsi-bus.c| 8 ++--
 hw/scsi.h| 3 ++-
 hw/usb/dev-storage.c | 2 +-
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/hw/pci/pci-hotplug.c b/hw/pci/pci-hotplug.c
index f38df30..180ee07 100644
--- a/hw/pci/pci-hotplug.c
+++ b/hw/pci/pci-hotplug.c
@@ -99,7 +99,7 @@ static int scsi_hot_add(Monitor *mon, DeviceState *adapter,
 dinfo->unit = qemu_opt_get_number(dinfo->opts, "unit", -1);
 dinfo->bus = scsibus->busnr;
 scsidev = scsi_bus_legacy_add_drive(scsibus, dinfo->bdrv, dinfo->unit,
-false, -1);
+false, -1, NULL);
 if (!scsidev) {
 return -1;
 }
diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index 08787c2..ac2093a 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -207,7 +207,8 @@ static int scsi_qdev_exit(DeviceState *qdev)
 
 /* handle legacy '-drive if=scsi,...' cmd line args */
 SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockDriverState *bdrv,
-  int unit, bool removable, int bootindex)
+  int unit, bool removable, int bootindex,
+  const char *serial)
 {
 const char *driver;
 DeviceState *dev;
@@ -221,6 +222,9 @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, 
BlockDriverState *bdrv,
 if (object_property_find(OBJECT(dev), "removable", NULL)) {
 qdev_prop_set_bit(dev, "removable", removable);
 }
+if (serial) {
+qdev_prop_set_string(dev, "serial", serial);
+}
 if (qdev_prop_set_drive(dev, "drive", bdrv) < 0) {
 qdev_free(dev);
 return NULL;
@@ -243,7 +247,7 @@ int scsi_bus_legacy_handle_cmdline(SCSIBus *bus)
 continue;
 }
 qemu_opts_loc_restore(dinfo->opts);
-if (!scsi_bus_legacy_add_drive(bus, dinfo->bdrv, unit, false, -1)) {
+if (!scsi_bus_legacy_add_drive(bus, dinfo->bdrv, unit, false, -1, 
NULL)) {
 res = -1;
 break;
 }
diff --git a/hw/scsi.h b/hw/scsi.h
index 33e2e0b..02a1497 100644
--- a/hw/scsi.h
+++ b/hw/scsi.h
@@ -160,7 +160,8 @@ static inline SCSIBus *scsi_bus_from_device(SCSIDevice *d)
 }
 
 SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockDriverState *bdrv,
-  int unit, bool removable, int bootindex);
+  int unit, bool removable, int bootindex,
+  const char *serial);
 int scsi_bus_legacy_handle_cmdline(SCSIBus *bus);
 
 /*
diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c
index d3f01aa..21651b3 100644
--- a/hw/usb/dev-storage.c
+++ b/hw/usb/dev-storage.c
@@ -625,7 +625,7 @@ static int usb_msd_initfn_storage(USBDevice *dev)
 usb_desc_init(dev);
 scsi_bus_new(&s->bus, &s->dev.qdev, &usb_msd_scsi_info_storage);
 scsi_dev = scsi_bus_legacy_add_drive(&s->bus, bs, 0, !!s->removable,
-s->conf.bootindex);
+s->conf.bootindex, s->serial);
 if (!scsi_dev) {
 return -1;
 }
-- 
1.8.1.4




[Qemu-devel] [PATCH 3/8] block: fix I/O throttling accounting blind spot

2013-04-05 Thread Kevin Wolf
From: Stefan Hajnoczi 

I/O throttling relies on bdrv_acct_done() which is called when a request
completes.  This leaves a blind spot since we only charge for completed
requests, not submitted requests.

For example, if there is 1 operation remaining in this time slice the
guest could submit 3 operations and they will all be submitted
successfully since they don't actually get accounted for until they
complete.

Originally we probably thought this is okay since the requests will be
accounted when the time slice is extended.  In practice it causes
fluctuations since the guest can exceed its I/O limit and it will be
punished for this later on.

Account for I/O upon submission so that I/O limits are enforced
properly.

Signed-off-by: Stefan Hajnoczi 
Tested-By: Benoit Canet 
Signed-off-by: Kevin Wolf 
---
 block.c   | 21 ++---
 include/block/block_int.h |  2 +-
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/block.c b/block.c
index 0ae2e93..25976b5 100644
--- a/block.c
+++ b/block.c
@@ -141,7 +141,6 @@ void bdrv_io_limits_disable(BlockDriverState *bs)
 bs->slice_start = 0;
 bs->slice_end   = 0;
 bs->slice_time  = 0;
-memset(&bs->io_base, 0, sizeof(bs->io_base));
 }
 
 static void bdrv_block_timer(void *opaque)
@@ -1436,8 +1435,8 @@ static void bdrv_move_feature_fields(BlockDriverState 
*bs_dest,
 bs_dest->slice_time = bs_src->slice_time;
 bs_dest->slice_start= bs_src->slice_start;
 bs_dest->slice_end  = bs_src->slice_end;
+bs_dest->slice_submitted= bs_src->slice_submitted;
 bs_dest->io_limits  = bs_src->io_limits;
-bs_dest->io_base= bs_src->io_base;
 bs_dest->throttled_reqs = bs_src->throttled_reqs;
 bs_dest->block_timer= bs_src->block_timer;
 bs_dest->io_limits_enabled  = bs_src->io_limits_enabled;
@@ -3768,9 +3767,9 @@ static bool bdrv_exceed_bps_limits(BlockDriverState *bs, 
int nb_sectors,
 slice_time = bs->slice_end - bs->slice_start;
 slice_time /= (NANOSECONDS_PER_SECOND);
 bytes_limit = bps_limit * slice_time;
-bytes_base  = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
+bytes_base  = bs->slice_submitted.bytes[is_write];
 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
-bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
+bytes_base += bs->slice_submitted.bytes[!is_write];
 }
 
 /* bytes_base: the bytes of data which have been read/written; and
@@ -3828,9 +3827,9 @@ static bool bdrv_exceed_iops_limits(BlockDriverState *bs, 
bool is_write,
 slice_time = bs->slice_end - bs->slice_start;
 slice_time /= (NANOSECONDS_PER_SECOND);
 ios_limit  = iops_limit * slice_time;
-ios_base   = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
+ios_base   = bs->slice_submitted.ios[is_write];
 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
-ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
+ios_base += bs->slice_submitted.ios[!is_write];
 }
 
 if (ios_base + 1 <= ios_limit) {
@@ -3875,11 +3874,7 @@ static bool bdrv_exceed_io_limits(BlockDriverState *bs, 
int nb_sectors,
 bs->slice_start = now;
 bs->slice_end   = now + bs->slice_time;
 
-bs->io_base.bytes[is_write]  = bs->nr_bytes[is_write];
-bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
-
-bs->io_base.ios[is_write]= bs->nr_ops[is_write];
-bs->io_base.ios[!is_write]   = bs->nr_ops[!is_write];
+memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted));
 }
 
 elapsed_time  = now - bs->slice_start;
@@ -3907,6 +3902,10 @@ static bool bdrv_exceed_io_limits(BlockDriverState *bs, 
int nb_sectors,
 *wait = 0;
 }
 
+bs->slice_submitted.bytes[is_write] += (int64_t)nb_sectors *
+   BDRV_SECTOR_SIZE;
+bs->slice_submitted.ios[is_write]++;
+
 return false;
 }
 
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 0986a2d..83941d8 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -256,7 +256,7 @@ struct BlockDriverState {
 int64_t slice_start;
 int64_t slice_end;
 BlockIOLimit io_limits;
-BlockIOBaseValue  io_base;
+BlockIOBaseValue slice_submitted;
 CoQueue  throttled_reqs;
 QEMUTimer*block_timer;
 bool io_limits_enabled;
-- 
1.8.1.4




[Qemu-devel] [PATCH 8/8] qcow2: Fix L1 write error handling in qcow2_update_snapshot_refcount

2013-04-05 Thread Kevin Wolf
It ignored the error code, and at least the 'goto fail' is obvious
nonsense as it creates an endless loop (if the next attempt doesn't
magically succeed) and leaves the in-memory L1 table in big-endian
instead of converting it back.

In error cases, there's no point in writing an updated L1 table, so
skip this part for them.

Signed-off-by: Kevin Wolf 
---
 block/qcow2-refcount.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 4799681..b32738f 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -851,14 +851,16 @@ fail:
 }
 
 /* Update L1 only if it isn't deleted anyway (addend = -1) */
-if (addend >= 0 && l1_modified) {
-for(i = 0; i < l1_size; i++)
+if (ret == 0 && addend >= 0 && l1_modified) {
+for (i = 0; i < l1_size; i++) {
 cpu_to_be64s(&l1_table[i]);
-if (bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table,
-l1_size2) < 0)
-goto fail;
-for(i = 0; i < l1_size; i++)
+}
+
+ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, l1_size2);
+
+for (i = 0; i < l1_size; i++) {
 be64_to_cpus(&l1_table[i]);
+}
 }
 if (l1_allocated)
 g_free(l1_table);
-- 
1.8.1.4




  1   2   3   >