[Qemu-devel] [PATCH V2 2/2] hw/virtio: group virtio flags into an enum

2016-02-10 Thread Marcel Apfelbaum
Minimizes the possibility to assign
the same bit to different features.

Signed-off-by: Marcel Apfelbaum 
---
 hw/virtio/virtio-pci.h | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
index 6686b10..e4548c2 100644
--- a/hw/virtio/virtio-pci.h
+++ b/hw/virtio/virtio-pci.h
@@ -58,30 +58,33 @@ typedef struct VirtioBusClass VirtioPCIBusClass;
 #define VIRTIO_PCI_BUS_CLASS(klass) \
 OBJECT_CLASS_CHECK(VirtioPCIBusClass, klass, TYPE_VIRTIO_PCI_BUS)
 
+enum {
+VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT,
+VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT,
+VIRTIO_PCI_FLAG_DISABLE_LEGACY_BIT,
+VIRTIO_PCI_FLAG_DISABLE_MODERN_BIT,
+VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT,
+VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT,
+VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT,
+};
+
 /* Need to activate work-arounds for buggy guests at vmstate load. */
-#define VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT  0
 #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION \
 (1 << VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT)
 
 /* Performance improves when virtqueue kick processing is decoupled from the
  * vcpu thread using ioeventfd for some devices. */
-#define VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT 1
 #define VIRTIO_PCI_FLAG_USE_IOEVENTFD   (1 << 
VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)
 
 /* virtio version flags */
-#define VIRTIO_PCI_FLAG_DISABLE_LEGACY_BIT 2
-#define VIRTIO_PCI_FLAG_DISABLE_MODERN_BIT 3
-#define VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT 6
 #define VIRTIO_PCI_FLAG_DISABLE_LEGACY (1 << 
VIRTIO_PCI_FLAG_DISABLE_LEGACY_BIT)
 #define VIRTIO_PCI_FLAG_DISABLE_MODERN (1 << 
VIRTIO_PCI_FLAG_DISABLE_MODERN_BIT)
 #define VIRTIO_PCI_FLAG_DISABLE_PCIE (1 << VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT)
 
 /* migrate extra state */
-#define VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT 4
 #define VIRTIO_PCI_FLAG_MIGRATE_EXTRA (1 << VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT)
 
 /* have pio notification for modern device ? */
-#define VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT 5
 #define VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY \
 (1 << VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT)
 
-- 
2.4.3




Re: [Qemu-devel] [PULL v3 00/33] Misc patches for 2016-02-08

2016-02-10 Thread Peter Maydell
On 10 February 2016 at 12:48, Paolo Bonzini  wrote:
>
>
> On 09/02/2016 17:13, Paolo Bonzini wrote:
>> The following changes since commit ac1be2ae6b2995b99430c48329eb971b0281acf1:
>>
>>   Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2016-02-09' 
>> into staging (2016-02-09 11:42:43 +)
>>
>> are available in the git repository at:
>>
>>   git://github.com/bonzini/qemu.git tags/for-upstream
>>
>> for you to fetch changes up to 150dcd1aed6f9ebcf370dbb9b666e7d7c6d908e2:
>>
>>   qemu-char, io: fix ordering of arguments for UDP socket creation 
>> (2016-02-09 17:09:15 +0100)
>>
>> 
>> * switch to C11 atomics (Alex)
>> * Coverity fixes for IPMI (Corey), i386 (Paolo), qemu-char (Paolo)
>> * at long last, fail on wrong .pc files if -m32 is in use (Daniel)
>> * qemu-char regression fix (Daniel)
>> * SAS1068 device (Paolo)
>> * memory region docs improvements (Peter)
>> * target-i386 cleanups (Richard)
>> * qemu-nbd docs improvements (Sitsofe)
>> * thread-safe memory hotplug (Stefan)
>>
>> 

> Self-NACK, this breaks (at least) FreeDOS.

Oops, I read this email five seconds after pushing the merge to master.
Can you send out reverts for the appropriate patches and I'll apply
them direct to master?

thanks
-- PMM



Re: [Qemu-devel] [PATCH] target-i386/kvm.c: Fix the order of FPU registers in xsave

2016-02-10 Thread Paolo Bonzini


On 10/02/2016 12:02, Asia Slowinska wrote:
> Stick to the expected order of the FPU registers in xsave (as specified
> in the
> Intel manual.) Otherwise, instructions loading the FPU state don't set
> it up
> correctly.
> 
> To set up FPU, software needs to provide a buffer of 80 bytes
> storing 8 FPU registers. They are organized in a stack. FPU assumes that the
> first field of the buffer is ST0, then ST1, and so on. QEMU maintains a
> circular buffer. When preparing these 80 bytes for KVM, QEMU just uses
> memcpy
> instead of copying the elements in a proper order.
> 
> Signed-off-by: Asia Slowinska >
> ---
>  target-i386/kvm.c | 12 
>  1 file changed, 8 insertions(+), 4 deletions(-)
> 
> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index 94024bc..c77fe73 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -1325,8 +1325,10 @@ static int kvm_put_xsave(X86CPU *cpu)
>  xsave->region[XSAVE_FTW_FOP] = (uint32_t)(env->fpop << 16) + twd;
>  memcpy(>region[XSAVE_CWD_RIP], >fpip, sizeof(env->fpip));
>  memcpy(>region[XSAVE_CWD_RDP], >fpdp, sizeof(env->fpdp));
> -memcpy(>region[XSAVE_ST_SPACE], env->fpregs,
> -sizeof env->fpregs);
> +for (i = 0; i < 8; i++) {
> +memcpy(_region[HXSAVE_ST_SPACE + i * 4],
> +>fpregs[(env->fpstt + i) & 7], 16);
> +}
>  xsave->region[XSAVE_MXCSR] = env->mxcsr;
>  *(uint64_t *)>region[XSAVE_XSTATE_BV] = env->xstate_bv;
>  memcpy(>region[XSAVE_BNDREGS], env->bnd_regs,
> @@ -1745,8 +1747,10 @@ static int kvm_get_xsave(X86CPU *cpu)
>  memcpy(>fpip, >region[XSAVE_CWD_RIP], sizeof(env->fpip));
>  memcpy(>fpdp, >region[XSAVE_CWD_RDP], sizeof(env->fpdp));
>  env->mxcsr = xsave->region[XSAVE_MXCSR];
> -memcpy(env->fpregs, >region[XSAVE_ST_SPACE],
> -sizeof env->fpregs);
> +for (i = 0; i < 8; i++) {
> +memcpy(>fpregs[(env->fpstt + i) & 7],
> +_region[HXSAVE_ST_SPACE + i * 4], 16);

Hi, the patch is missing a definition of HXSAVE_ST_SPACE.

Thanks,

Paolo

> +}
>  env->xstate_bv = *(uint64_t *)>region[XSAVE_XSTATE_BV];
>  memcpy(env->bnd_regs, >region[XSAVE_BNDREGS],
>  sizeof env->bnd_regs);
> -- 
> 1.9.1
> 
> 



Re: [Qemu-devel] [PATCH 2/6] qmp: add query-block-dirty-bitmap-ranges

2016-02-10 Thread Denis V. Lunev

On 02/10/2016 06:26 PM, John Snow wrote:


On 02/10/2016 08:57 AM, Denis V. Lunev wrote:

On 02/10/2016 01:08 PM, Stefan Hajnoczi wrote:

On Sat, Jan 30, 2016 at 01:56:30PM +0300, Vladimir Sementsov-Ogievskiy
wrote:

Add qmp command to query dirty bitmap contents. This is needed for
external backup.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
   block/dirty-bitmap.c | 55
+++
   blockdev.c   | 62

   include/block/dirty-bitmap.h |  7 +
   qapi/block-core.json | 54
++
   qmp-commands.hx  | 33 +++
   5 files changed, 211 insertions(+)

This API produces large replies and/or requires many calls to fetch all
bitmap data.  The worst case is a 101010... bitmap.

I consider the dirty bitmap to be data (vs control) and not something
that should be sent over a control channel like the QMP monitor.

How about writing the dirty bitmap to a file?  The new bitmap file
format that Fam is working on could be used.  That way the dirty bitmap
can be saved asynchronously without hogging the QMP monitor.

Reasonable point.

May be it would be better to setup "special" NBD server inside
QEMU which will allow to directly "read" bitmap data.

Any opinion?

Den

Or perhaps something like migration, where the client receiving the data
opens a socket of some sort, and QEMU connects to that socket to send
the data.

no. The point is that QEMU should be queried for data.
May be even via several sockets to provide it in
parallel.

Den



[Qemu-devel] [PATCH V2 0/2] hw/virtio: fix double use of a virtio flag

2016-02-10 Thread Marcel Apfelbaum
To the obvious question of "how did that happen?"
I can say we had an unlucky break.
Both Jason and me worked on a new different virtio feature in the same
time, and they were both merged in the same pull request.
We both saw BIT 3 as the last used 
https://lists.gnu.org/archive/html/qemu-devel/2015-11/msg03041.html

Commits 1811e64c and a6df8adf use the same virtio feature bit 4
for different features.

Fix it by using different bits.
While at it, group all the virtio flags into an enum to avoid that
in the feature.


v1 -> v2:
  - Addressed Laurent Vivier's comment:
 - forgot to remove a flag
  - Added teset-by to the first patch

Marcel Apfelbaum (2):
  hw/virtio: fix double use of a virtio flag
  hw/virtio: group virtio flags into an enum

 hw/virtio/virtio-pci.h | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

-- 
2.4.3




[Qemu-devel] [PATCH V2 1/2] hw/virtio: fix double use of a virtio flag

2016-02-10 Thread Marcel Apfelbaum
Commits 1811e64c and a6df8adf use the same virtio feature bit 4
for different features.

Fix it by using different bits.

Reported-by: Laurent Vivier 
Tested-by: Laurent Vivier 
Signed-off-by: Marcel Apfelbaum 
---
 hw/virtio/virtio-pci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
index e096e98..6686b10 100644
--- a/hw/virtio/virtio-pci.h
+++ b/hw/virtio/virtio-pci.h
@@ -71,7 +71,7 @@ typedef struct VirtioBusClass VirtioPCIBusClass;
 /* virtio version flags */
 #define VIRTIO_PCI_FLAG_DISABLE_LEGACY_BIT 2
 #define VIRTIO_PCI_FLAG_DISABLE_MODERN_BIT 3
-#define VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT 4
+#define VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT 6
 #define VIRTIO_PCI_FLAG_DISABLE_LEGACY (1 << 
VIRTIO_PCI_FLAG_DISABLE_LEGACY_BIT)
 #define VIRTIO_PCI_FLAG_DISABLE_MODERN (1 << 
VIRTIO_PCI_FLAG_DISABLE_MODERN_BIT)
 #define VIRTIO_PCI_FLAG_DISABLE_PCIE (1 << VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT)
-- 
2.4.3




Re: [Qemu-devel] [PATCH v2 1/5] target-arm: Add the pmceid0 and pmceid1 registers

2016-02-10 Thread Aaron Lindsay
On Feb 09 15:11, Alistair Francis wrote:
> On Tue, Feb 9, 2016 at 9:19 AM, Peter Maydell  
> wrote:
> > On 6 February 2016 at 00:55, Alistair Francis
> >  wrote:
> >> Signed-off-by: Aaron Lindsay 
> >> Signed-off-by: Alistair Francis 
> >> Tested-by: Nathan Rossi 
> >> ---
> >>
> >>  target-arm/cpu-qom.h | 2 ++
> >>  target-arm/cpu.c | 2 ++
> >>  target-arm/cpu64.c   | 2 ++
> >>  target-arm/helper.c  | 8 
> >>  4 files changed, 14 insertions(+)
> >>
> >> diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
> >> index 07c0a71..1cc4502 100644
> >> --- a/target-arm/cpu-qom.h
> >> +++ b/target-arm/cpu-qom.h
> >> @@ -148,6 +148,8 @@ typedef struct ARMCPU {
> >>  uint32_t id_pfr0;
> >>  uint32_t id_pfr1;
> >>  uint32_t id_dfr0;
> >> +uint32_t pmceid0;
> >> +uint32_t pmceid1;
> >>  uint32_t id_afr0;
> >>  uint32_t id_mmfr0;
> >>  uint32_t id_mmfr1;
> >> diff --git a/target-arm/cpu.c b/target-arm/cpu.c
> >> index 7ddbf3d..937f845 100644
> >> --- a/target-arm/cpu.c
> >> +++ b/target-arm/cpu.c
> >> @@ -1156,6 +1156,8 @@ static void cortex_a15_initfn(Object *obj)
> >>  cpu->id_pfr0 = 0x1131;
> >>  cpu->id_pfr1 = 0x00011011;
> >>  cpu->id_dfr0 = 0x02010555;
> >> +cpu->pmceid0 = 0x0481; /* PMUv3 events 0x0, 0x8, and 0x11 */
> >
> > These are:
> >  SW_INCR   # insn architecturally executed, cc pass, software increment
> >  INST_RETIRED # insn architecturally executed
> >  CPU_CYCLES # cycle
> >
> > However we don't actually implement any of these, so should
> > we be advertising them?
> 
> So this part I took directly from Chris's RFC. I'm happy to take it
> out if you would like.

I think removing the PMCEID0 change makes sense since these patches
don't implement the advertised counters. We have other patches which do
implement them, but they need some more work, so we can make this change
if/when they're actually implemented.

-Aaron

> 
> >
> >> +cpu->pmceid1 = 0x;
> >>  cpu->id_afr0 = 0x;
> >>  cpu->id_mmfr0 = 0x10201105;
> >>  cpu->id_mmfr1 = 0x2000;
> >> diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c
> >> index c847513..8c4b6fd 100644
> >> --- a/target-arm/cpu64.c
> >> +++ b/target-arm/cpu64.c
> >> @@ -134,6 +134,8 @@ static void aarch64_a57_initfn(Object *obj)
> >>  cpu->id_isar5 = 0x00011121;
> >>  cpu->id_aa64pfr0 = 0x;
> >>  cpu->id_aa64dfr0 = 0x10305106;
> >> +cpu->pmceid0 = 0x0481; /* PMUv3 events 0x0, 0x8, and 0x11 */
> >> +cpu->pmceid1 = 0x;
> >>  cpu->id_aa64isar0 = 0x00011120;
> >>  cpu->id_aa64mmfr0 = 0x1124;
> >>  cpu->dbgdidr = 0x3516d000;
> >> diff --git a/target-arm/helper.c b/target-arm/helper.c
> >> index 5ea507f..66aa406 100644
> >> --- a/target-arm/helper.c
> >> +++ b/target-arm/helper.c
> >> @@ -4192,6 +4192,14 @@ void register_cp_regs_for_features(ARMCPU *cpu)
> >>.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 1,
> >>.access = PL1_R, .type = ARM_CP_CONST,
> >>.resetvalue = cpu->id_aa64dfr1 },
> >> +{ .name = "PMCEID0_EL0", .state = ARM_CP_STATE_AA64,
> >> +  .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 6,
> >> +  .access = PL1_R, .type = ARM_CP_CONST,
> >> +  .resetvalue = cpu->pmceid0},
> >
> > These have 32-bit versions from v8 and up (sadly not with the
> > right opc values to use STATE_BOTH, so second stanza needed).
> 
> Ok, I have added PMCEID0 and PMCEID1.
> 
> >
> > These are configurably RO from EL0, controlled by PMUSERENR_EL0.EN,
> > so you want
> >.access = PL0_R, .accessfn = pmreg_access
> >
> > Space before final "}", please.
> >
> > Can we move these down so they're not placed right in the
> > middle of the ID_AA64* registers ?
> 
> Fixed the rest.
> 
> Thanks,
> 
> Alistair
> 
> >
> >> +{ .name = "PMCEID1_EL0", .state = ARM_CP_STATE_AA64,
> >> +  .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 7,
> >> +  .access = PL1_R, .type = ARM_CP_CONST,
> >> +  .resetvalue = cpu->pmceid1},
> >
> > Ditto.
> >
> >>  { .name = "ID_AA64AFR0_EL1", .state = ARM_CP_STATE_AA64,
> >>.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 4,
> >>.access = PL1_R, .type = ARM_CP_CONST,
> >> --
> >> 2.5.0
> >
> > thanks
> > -- PMM
> >



[Qemu-devel] [PATCH] Rename cpu_get_icount_{locked,biased}

2016-02-10 Thread Christopher Covington
The function does not provide locking but rather adds a bias value.

Signed-off-by: Christopher Covington 
---
 cpus.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cpus.c b/cpus.c
index 898426c..50403c4 100644
--- a/cpus.c
+++ b/cpus.c
@@ -164,7 +164,7 @@ int64_t cpu_get_icount_raw(void)
 }
 
 /* Return the virtual CPU time, based on the instruction counter.  */
-static int64_t cpu_get_icount_locked(void)
+static int64_t cpu_get_icount_biased(void)
 {
 int64_t icount = cpu_get_icount_raw();
 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
@@ -177,7 +177,7 @@ int64_t cpu_get_icount(void)
 
 do {
 start = seqlock_read_begin(_state.vm_clock_seqlock);
-icount = cpu_get_icount_locked();
+icount = cpu_get_icount_biased();
 } while (seqlock_read_retry(_state.vm_clock_seqlock, start));
 
 return icount;
@@ -293,7 +293,7 @@ static void icount_adjust(void)
 
 seqlock_write_lock(_state.vm_clock_seqlock);
 cur_time = cpu_get_clock_locked();
-cur_icount = cpu_get_icount_locked();
+cur_icount = cpu_get_icount_biased();
 
 delta = cur_icount - cur_time;
 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  
*/
@@ -356,7 +356,7 @@ static void icount_warp_rt(void)
  * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
  * far ahead of real time.
  */
-int64_t cur_icount = cpu_get_icount_locked();
+int64_t cur_icount = cpu_get_icount_biased();
 int64_t delta = clock - cur_icount;
 warp_delta = MIN(warp_delta, delta);
 }
-- 
Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project




Re: [Qemu-devel] [PATCH] Rename cpu_get_icount_{locked,biased}

2016-02-10 Thread Paolo Bonzini


On 10/02/2016 15:06, Christopher Covington wrote:
> The function does not provide locking but rather adds a bias value.

"Locked" means that you need to take a look outside its call; see how
cpu_get_icount() uses it.

Basically the idea is that a "_locked" in the name warns you to pay
attention. :)

Paolo

> Signed-off-by: Christopher Covington 
> ---
>  cpus.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/cpus.c b/cpus.c
> index 898426c..50403c4 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -164,7 +164,7 @@ int64_t cpu_get_icount_raw(void)
>  }
>  
>  /* Return the virtual CPU time, based on the instruction counter.  */
> -static int64_t cpu_get_icount_locked(void)
> +static int64_t cpu_get_icount_biased(void)
>  {
>  int64_t icount = cpu_get_icount_raw();
>  return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
> @@ -177,7 +177,7 @@ int64_t cpu_get_icount(void)
>  
>  do {
>  start = seqlock_read_begin(_state.vm_clock_seqlock);
> -icount = cpu_get_icount_locked();
> +icount = cpu_get_icount_biased();
>  } while (seqlock_read_retry(_state.vm_clock_seqlock, start));
>  
>  return icount;
> @@ -293,7 +293,7 @@ static void icount_adjust(void)
>  
>  seqlock_write_lock(_state.vm_clock_seqlock);
>  cur_time = cpu_get_clock_locked();
> -cur_icount = cpu_get_icount_locked();
> +cur_icount = cpu_get_icount_biased();
>  
>  delta = cur_icount - cur_time;
>  /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. 
>  */
> @@ -356,7 +356,7 @@ static void icount_warp_rt(void)
>   * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
>   * far ahead of real time.
>   */
> -int64_t cur_icount = cpu_get_icount_locked();
> +int64_t cur_icount = cpu_get_icount_biased();
>  int64_t delta = clock - cur_icount;
>  warp_delta = MIN(warp_delta, delta);
>  }
> 



Re: [Qemu-devel] [Qemu-ppc] [PATCH] spapr: skip configuration section during migration of older machines

2016-02-10 Thread Greg Kurz
Cc'ing Dave and Juan since this is migration stuff.

On Mon, 08 Feb 2016 16:59:47 +0100
Greg Kurz  wrote:

> Since QEMU 2.4, we have a configuration section in the migration stream.
> This must be skipped for older machines, like it is already done for x86.
> 
> Fixes: 61964c23e5ddd5a33f15699e45ce126f879e3e33
> Cc: qemu-sta...@nongnu.org
> Signed-off-by: Greg Kurz 
> ---
>  hw/ppc/spapr.c |1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 5bd8fd3ef842..bca7cb8a5d27 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2446,6 +2446,7 @@ static void 
> spapr_machine_2_3_instance_options(MachineState *machine)
>  spapr_machine_2_4_instance_options(machine);
>  savevm_skip_section_footers();
>  global_state_set_optional();
> +savevm_skip_configuration();
>  }
> 
>  static void spapr_machine_2_3_class_options(MachineClass *mc)
> 
> 




Re: [Qemu-devel] lock-free monitor?

2016-02-10 Thread Markus Armbruster
"Dr. David Alan Gilbert"  writes:

> * Markus Armbruster (arm...@redhat.com) wrote:
>> "Dr. David Alan Gilbert"  writes:
>> 
>> > Hi,
>> >   I wondered what it would take to be able to do a lock-free monitor;
>> > i.e. one that could respond to (some) commands while the qemu big lock is 
>> > held.
>> 
>> Requires a careful audit of the monitor code.
>> 
>> For instance, cur_mon needs to be made thread local for running multiple
>> monitors concurrently.
>
> Hmm that's fun;  and cur_mon is used all over the place when 'fd' passing
> is used - although that probably should be cleaned up somehow.

When cur_mon got created, we had no threads.  Even now, monitors only
ever run under the BQL, so cur_mon's still fine.

Having a current monitor is simpler than passing one around, especially
when you go through layers that don't know about it.  Such cases exist,
and they made us abandon commit 376253e's hope to eliminate cur_mon.
Unfortunately, I've since forgotten the details, so I can't point you to
an example.

>> > My reason for asking is that there are cases in migration and colo
>> > where a network block device has failed and is blocking, but it fails
>> > at just the wrong time while the lock is held, and now, you don't have
>> 
>> Is it okay to block while holding the BQL?
>
> I'd hope the simple answer was no; unfortunately the more complex answer
> is that we keep finding places that do.

Would you call these bugs?

Even if you do, we may want to recover from them.

>  The cases I'm aware of are
> mostly bdrv_drain_all calls in migration/colo, where one of the block
> devices (e.g. an NBD network device) fails.  Generally these are called
> at the end of a migration cycle when we're just ensuring that everything
> really is drained and are normally called with the lock held to ensure
> that nothing else is generating new traffic as we're clearing everything else.

Using the BQL for that drives a cork into the I/O pipeline with a Really
Big Hammer.  Can we do better?

The answer may be "yes, but don't hold your breath."  Then we may need
means to better cope with the bugs.

>> > any way to unblock it since you can't do anything on the monitors.
>> > If I could issue a command then I could have it end up doing a shutdown(2)
>> > on the network connection and free things up.
>> >
>> > Maybe this would also help real-time people?
>> >
>> > I was thinking then, we could:
>> >a) Have a monitor that wasn't tied to the main loop at all - each 
>> > instance
>> > would be it's own thread and have it's own poll() (probably using the new
>> > IO channels?)
>> >b) for most commands it would take the big lock before execute the 
>> > command
>> > and release it afterwards - so there would be no risk in those commands.
>> 
>> Saves you the auditing their code, which would probably be impractical.
>> 
>> >c) Some commands you could mark as 'lock free' - they would be required
>> > not to take any locks or wait for *anything* and for those the monitor 
>> > would
>> > not take the lock.
>> 
>> They also must not access shared state without suitable synchronization.
>
> Right; I'd expect most of the cases to either be reading simple status 
> information,
> or having to find whatever they need to do using rcu list walks.
>
>> >d) Perhaps have some way of restricting a particular monitor session to 
>> > only
>> > allowing non-blocking commands.
>> 
>> Why?  To ensure you always have an emergency monitor available?
>
> Yes, mostly to stop screwups of putting a potentially blocking command down 
> your
> emergency route.

Understand.

>> >e) Then I'd have to have a way of finding the broken device in a 
>> > lockfree
>> > way (RTU list walk?) and issuing the shutdown(2).
>> >
>> > Bonus points to anyone who can statically check commands in (c).
>> 
>> Tall order.
>
> Yes :-)   A (compile time selected) dynamic check might be possible
> where the normal global lock functions and rcu block functions check if 
> they're
> in a monitor thread and assert.
>
>> > Does this make sense to everyone else, or does anyone have any better
>> > suggestions?
>> 
>> Sounds like a big, hairy task to me.
>> 
>> Any alternatives?
>
> I hope so; although is the idea of making a lock-free monitor a generally
> good idea we should do anyway?

There's no shortage of good ideas, but our bandwidth to implement,
review, document and test them has limits.

The general plan is to reduce the scope of the BQL gradually.
Liberating the monitor from the BQL is one step on the road to complete
BQL elimination.  The question is whether this step needs to be taken
*now*.



Re: [Qemu-devel] [PATCH 2/6] qmp: add query-block-dirty-bitmap-ranges

2016-02-10 Thread Denis V. Lunev

On 02/10/2016 06:37 PM, John Snow wrote:


On 02/10/2016 10:36 AM, Denis V. Lunev wrote:

On 02/10/2016 06:26 PM, John Snow wrote:

On 02/10/2016 08:57 AM, Denis V. Lunev wrote:

On 02/10/2016 01:08 PM, Stefan Hajnoczi wrote:

On Sat, Jan 30, 2016 at 01:56:30PM +0300, Vladimir Sementsov-Ogievskiy
wrote:

Add qmp command to query dirty bitmap contents. This is needed for
external backup.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
block/dirty-bitmap.c | 55
+++
blockdev.c   | 62

include/block/dirty-bitmap.h |  7 +
qapi/block-core.json | 54
++
qmp-commands.hx  | 33 +++
5 files changed, 211 insertions(+)

This API produces large replies and/or requires many calls to fetch all
bitmap data.  The worst case is a 101010... bitmap.

I consider the dirty bitmap to be data (vs control) and not something
that should be sent over a control channel like the QMP monitor.

How about writing the dirty bitmap to a file?  The new bitmap file
format that Fam is working on could be used.  That way the dirty bitmap
can be saved asynchronously without hogging the QMP monitor.

Reasonable point.

May be it would be better to setup "special" NBD server inside
QEMU which will allow to directly "read" bitmap data.

Any opinion?

Den

Or perhaps something like migration, where the client receiving the data
opens a socket of some sort, and QEMU connects to that socket to send
the data.

no. The point is that QEMU should be queried for data.
May be even via several sockets to provide it in
parallel.

Den

I don't follow.

You'd use a QMP command to tell QEMU where to connect to send the data.
You're still "querying" QEMU, it's just not acting as the server for the
data channel.

ok, I have understood you wrong. This looks working
at the first glance.



Re: [Qemu-devel] KVH call for agenda for 2016-02-16

2016-02-10 Thread Christian Borntraeger
On 02/10/2016 04:42 PM, Juan Quintela wrote:
> 
> 
> Hi
> 
> Please, send any topic that you are interested in covering.
> 
> At the end of Monday I will send an email with the agenda or the
> cancellation of the call, so hurry up.
> 
> After discussions on the QEMU Summit, we are going to have always open a
> KVM call where you can add topics.
> 
>  Call details:
> 
> By popular demand, a google calendar public entry with it
> 
>   
> https://www.google.com/calendar/embed?src=dG9iMXRqcXAzN3Y4ZXZwNzRoMHE4a3BqcXNAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ
> 
> (Let me know if you have any problems with the calendar entry.  I just
> gave up about getting right at the same time CEST, CET, EDT and DST).
> 
> If you need phone number details,  contact me privately
> 
> Thanks, Juan.


Since we seem to be stuck, I propose CPU hotplug as agenda item.




Re: [Qemu-devel] [PATCH V2 2/2] hw/virtio: group virtio flags into an enum

2016-02-10 Thread Laurent Vivier


On 10/02/2016 14:31, Marcel Apfelbaum wrote:
> Minimizes the possibility to assign
> the same bit to different features.
> 
> Signed-off-by: Marcel Apfelbaum 
Reviewed-by: Laurent Vivier 



Re: [Qemu-devel] [PULL v3 00/33] Misc patches for 2016-02-08

2016-02-10 Thread Paolo Bonzini


On 10/02/2016 14:49, Peter Maydell wrote:
>> > Self-NACK, this breaks (at least) FreeDOS.
> Oops, I read this email five seconds after pushing the merge to master.
> Can you send out reverts for the appropriate patches and I'll apply
> them direct to master?

I can send the fix.

Paolo



Re: [Qemu-devel] [PATCH 2/6] qmp: add query-block-dirty-bitmap-ranges

2016-02-10 Thread John Snow


On 02/10/2016 08:57 AM, Denis V. Lunev wrote:
> On 02/10/2016 01:08 PM, Stefan Hajnoczi wrote:
>> On Sat, Jan 30, 2016 at 01:56:30PM +0300, Vladimir Sementsov-Ogievskiy
>> wrote:
>>> Add qmp command to query dirty bitmap contents. This is needed for
>>> external backup.
>>>
>>> Signed-off-by: Vladimir Sementsov-Ogievskiy 
>>> ---
>>>   block/dirty-bitmap.c | 55
>>> +++
>>>   blockdev.c   | 62
>>> 
>>>   include/block/dirty-bitmap.h |  7 +
>>>   qapi/block-core.json | 54
>>> ++
>>>   qmp-commands.hx  | 33 +++
>>>   5 files changed, 211 insertions(+)
>> This API produces large replies and/or requires many calls to fetch all
>> bitmap data.  The worst case is a 101010... bitmap.
>>
>> I consider the dirty bitmap to be data (vs control) and not something
>> that should be sent over a control channel like the QMP monitor.
>>
>> How about writing the dirty bitmap to a file?  The new bitmap file
>> format that Fam is working on could be used.  That way the dirty bitmap
>> can be saved asynchronously without hogging the QMP monitor.
> Reasonable point.
> 
> May be it would be better to setup "special" NBD server inside
> QEMU which will allow to directly "read" bitmap data.
> 
> Any opinion?
> 
> Den

Or perhaps something like migration, where the client receiving the data
opens a socket of some sort, and QEMU connects to that socket to send
the data.



Re: [Qemu-devel] [PATCH] cirrus_vga: fix off-by-one in blit_region_is_unsafe

2016-02-10 Thread Laszlo Ersek
On 02/10/16 16:29, Paolo Bonzini wrote:
> 
> 
> On 10/02/2016 15:55, Laszlo Ersek wrote:
 Hmm, not sure why.  We're comparing against the inclusive-exclusive
 range [0,s->vga.vram_size).  The right way to check if something is
 within the range is >= min && < max; the right way to check if something
 is outside the range is < min || >= max.
>> Absolutely: if the thing you are verifying against the interval is
>> itself an inclusive thing, that is, a pixel or byte *drawn*. However,
>> exactly as you stated in the commit message, for the maximum check, what
>> we are comparing is the first offset *not* processed.
> 
> Right, what my patch does is setting min and max both to pixels that are
> drawn.

Do you understand my concern with that? It's okay if you dismiss my
concern (or even better if you prove it is unfounded). But I hope you at
least understand it.

When you set "max" to the last pixel that is drawn, you are calculating
a new quantity in C that was not calculated before. By subtracting 1,
you could theoretically turn "max" into a negative number.

Have you checked and excluded this possibility, or proved why it doesn't
matter?

When I reviewed the underlying CVE fix (downstream), I spent hours on
tracking down all possibilities, with Gerd's help. With your patch, that
argument goes out the window, *for me*. I don't mind -- in particular
because it *could* be easy to prove your patch is safe --, but I can't
tell if it's going to be an easy proof without actually trying. And I'm
not going to try, now.

Changing the relop would be mathematically equivalent, and keep my
earlier argument intact.

Anyway, you don't need my personal R-b for this.

Thanks
Laszlo




Re: [Qemu-devel] [PATCH 3/3] replay: introduce block devices record/replay

2016-02-10 Thread Pavel Dovgalyuk
> From: Kevin Wolf [mailto:kw...@redhat.com]
> Am 10.02.2016 um 13:51 hat Pavel Dovgalyuk geschrieben:
> > > From: Kevin Wolf [mailto:kw...@redhat.com]
> > > Am 10.02.2016 um 13:05 hat Pavel Dovgalyuk geschrieben:
> > > > > Am 09.02.2016 um 12:52 hat Pavel Dovgalyuk geschrieben:
> > > > > > > From: Kevin Wolf [mailto:kw...@redhat.com]
> > > > > > > But even this doesn't feel completely right, because block 
> > > > > > > drivers are
> > > > > > > already layered and there is no need to hardcode something 
> > > > > > > optional (and
> > > > > > > rarely used) in the hot code path that could just be another 
> > > > > > > layer.
> > > > > > >
> > > > > > > I assume that you know beforehand if you want to replay 
> > > > > > > something, so
> > > > > > > requiring you to configure your block devices with a replay 
> > > > > > > driver on
> > > > > > > top of the stack seems reasonable enough.
> > > > > >
> > > > > > I cannot use block drivers for this. When driver functions are 
> > > > > > used, QEMU
> > > > > > is already used coroutines (and probably started bottom halves).
> > > > > > Coroutines make execution non-deterministic.
> > > > > > That's why we have to intercept blk_aio_ functions, that are called
> > > > > > deterministically.
> > > > >
> > > > > What does "deterministic" mean in this context, i.e. what are your 
> > > > > exact
> > > > > requirements?
> > > >
> > > > "Deterministic" means that the replayed execution should run exactly
> > > > the same guest instructions in the same sequence, as in recording 
> > > > session.
> > >
> > > Okay. I think with this we can do better than what you have now.
> > >
> > > > > I don't think that coroutines introduce anything non-deterministic per
> > > > > se. Depending on what you mean by it, the block layer code paths in
> > > > > block.c may contain problematic code.
> > > >
> > > > They are non-deterministic if we need instruction-level accuracy.
> > > > Thread switching (and therefore callbacks and BH execution) is 
> > > > non-deterministic.
> > >
> > > Thread switching depends on an external event (the kernel scheduler
> > > deciding to switch), so agreed, if a thread switch ever influences what
> > > the guest sees, that would be a problem.
> > >
> > > Generally, however, callbacks and BHs don't involve a thread switch at
> > > all (BHs can be invoked from a different thread in theory, but we have
> > > very few of those cases and they shouldn't be visible for the guest).
> > > The same is true for coroutines, which are semantically equivalent to
> > > callbacks.
> > >
> > > > In two different executions these callbacks may happen at different 
> > > > moments of
> > > > time (counting in number of executed instructions).
> > > > All operations with virtual devices (including memory, interrupt 
> > > > controller,
> > > > and disk drive controller) should happen at deterministic moments of 
> > > > time
> > > > to be replayable.
> > >
> > > Right, so let's talk about what this external non-deterministic event
> > > really is.
> > >
> > > I think the only thing whose timing is unknown in the block layer is the
> > > completion of I/O requests. This non-determinism comes from the time the
> > > I/O syscalls made by the lowest layer (usually raw-posix) take.
> >
> > Right.
> >
> > > This means that we can add logic to remove the non-determinism at the
> > > point of our choice between raw-posix and the guest device emulation. A
> > > block driver on top is as good as anything else.
> > >
> > > While recording, this block driver would just pass the request to next
> > > lower layer (starting a request is deterministic, so it doesn't need to
> > > be logged) and once the request completes it logs it. While replaying,
> > > the completion of requests is delayed until we read it in the log; if we
> > > read it in the log and the request hasn't completed yet, we do a busy
> > > wait for it (while(!completed) aio_poll();).
> >
> > I tried serializing all bottom halves and worker thread callbacks in
> > previous version of the patches. That code was much more complicated
> > and error-prone than the current version. We had to classify all bottom
> > halves to recorded and non-recorded (because sometimes they are used
> > for qemu's purposes, not the guest ones).
> >
> > However, I don't understand yet which layer do you offer as the candidate
> > for record/replay? What functions should be changed?
> > I would like to investigate this way, but I don't got it yet.
> 
> At the core, I wouldn't change any existing function, but introduce a
> new block driver. You could copy raw_bsd.c for a start and then tweak
> it. Leave out functions that you don't want to support, and add the
> necessary magic to .bdrv_co_readv/writev.
> 
> Something like this (can probably be generalised for more than just
> reads as the part after the bdrv_co_reads() call should be the same for
> reads, writes and any other request types):
> 
> int blkreplay_co_readv()
> {
> 

Re: [Qemu-devel] [PATCH] target-arm: Fix MDCCSR_EL0 instruction encoding

2016-02-10 Thread Andreas Färber
Am 09.02.2016 um 21:57 schrieb Dirk Müller:
> See C5.1.5 of the ARMv8 Reference Manual
> 
> Signed-off-by: Dirk Mueller 

Reviewed-by: Andreas Färber 

Thanks,
Andreas

-- 
SUSE Linux GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Felix Imendörffer, Jane Smithard, Graham Norton; HRB 21284 (AG Nürnberg)



Re: [Qemu-devel] [PATCH 2/6] qmp: add query-block-dirty-bitmap-ranges

2016-02-10 Thread John Snow


On 02/10/2016 10:36 AM, Denis V. Lunev wrote:
> On 02/10/2016 06:26 PM, John Snow wrote:
>>
>> On 02/10/2016 08:57 AM, Denis V. Lunev wrote:
>>> On 02/10/2016 01:08 PM, Stefan Hajnoczi wrote:
 On Sat, Jan 30, 2016 at 01:56:30PM +0300, Vladimir Sementsov-Ogievskiy
 wrote:
> Add qmp command to query dirty bitmap contents. This is needed for
> external backup.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>block/dirty-bitmap.c | 55
> +++
>blockdev.c   | 62
> 
>include/block/dirty-bitmap.h |  7 +
>qapi/block-core.json | 54
> ++
>qmp-commands.hx  | 33 +++
>5 files changed, 211 insertions(+)
 This API produces large replies and/or requires many calls to fetch all
 bitmap data.  The worst case is a 101010... bitmap.

 I consider the dirty bitmap to be data (vs control) and not something
 that should be sent over a control channel like the QMP monitor.

 How about writing the dirty bitmap to a file?  The new bitmap file
 format that Fam is working on could be used.  That way the dirty bitmap
 can be saved asynchronously without hogging the QMP monitor.
>>> Reasonable point.
>>>
>>> May be it would be better to setup "special" NBD server inside
>>> QEMU which will allow to directly "read" bitmap data.
>>>
>>> Any opinion?
>>>
>>> Den
>> Or perhaps something like migration, where the client receiving the data
>> opens a socket of some sort, and QEMU connects to that socket to send
>> the data.
> no. The point is that QEMU should be queried for data.
> May be even via several sockets to provide it in
> parallel.
> 
> Den

I don't follow.

You'd use a QMP command to tell QEMU where to connect to send the data.
You're still "querying" QEMU, it's just not acting as the server for the
data channel.



[Qemu-devel] [PATCH v2 2/4] rng: move request queue from RngEgd to RngBackend

2016-02-10 Thread Ladi Prosek
The 'requests' field now lives in the RngBackend parent class.
There are no functional changes in this commit.

Signed-off-by: Ladi Prosek 
---
 backends/rng-egd.c   | 28 +---
 include/sysemu/rng.h | 11 +++
 2 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/backends/rng-egd.c b/backends/rng-egd.c
index 0b2976a..b061362 100644
--- a/backends/rng-egd.c
+++ b/backends/rng-egd.c
@@ -25,19 +25,8 @@ typedef struct RngEgd
 
 CharDriverState *chr;
 char *chr_name;
-
-GSList *requests;
 } RngEgd;
 
-typedef struct RngRequest
-{
-EntropyReceiveFunc *receive_entropy;
-uint8_t *data;
-void *opaque;
-size_t offset;
-size_t size;
-} RngRequest;
-
 static void rng_egd_request_entropy(RngBackend *b, size_t size,
 EntropyReceiveFunc *receive_entropy,
 void *opaque)
@@ -66,7 +55,7 @@ static void rng_egd_request_entropy(RngBackend *b, size_t 
size,
 size -= len;
 }
 
-s->requests = g_slist_append(s->requests, req);
+s->parent.requests = g_slist_append(s->parent.requests, req);
 }
 
 static void rng_egd_free_request(RngRequest *req)
@@ -81,7 +70,7 @@ static int rng_egd_chr_can_read(void *opaque)
 GSList *i;
 int size = 0;
 
-for (i = s->requests; i; i = i->next) {
+for (i = s->parent.requests; i; i = i->next) {
 RngRequest *req = i->data;
 size += req->size - req->offset;
 }
@@ -94,8 +83,8 @@ static void rng_egd_chr_read(void *opaque, const uint8_t 
*buf, int size)
 RngEgd *s = RNG_EGD(opaque);
 size_t buf_offset = 0;
 
-while (size > 0 && s->requests) {
-RngRequest *req = s->requests->data;
+while (size > 0 && s->parent.requests) {
+RngRequest *req = s->parent.requests->data;
 int len = MIN(size, req->size - req->offset);
 
 memcpy(req->data + req->offset, buf + buf_offset, len);
@@ -104,7 +93,8 @@ static void rng_egd_chr_read(void *opaque, const uint8_t 
*buf, int size)
 size -= len;
 
 if (req->offset == req->size) {
-s->requests = g_slist_remove_link(s->requests, s->requests);
+s->parent.requests = g_slist_remove_link(s->parent.requests,
+ s->parent.requests);
 
 req->receive_entropy(req->opaque, req->data, req->size);
 
@@ -117,12 +107,12 @@ static void rng_egd_free_requests(RngEgd *s)
 {
 GSList *i;
 
-for (i = s->requests; i; i = i->next) {
+for (i = s->parent.requests; i; i = i->next) {
 rng_egd_free_request(i->data);
 }
 
-g_slist_free(s->requests);
-s->requests = NULL;
+g_slist_free(s->parent.requests);
+s->parent.requests = NULL;
 }
 
 static void rng_egd_opened(RngBackend *b, Error **errp)
diff --git a/include/sysemu/rng.h b/include/sysemu/rng.h
index c7da17d..084164c 100644
--- a/include/sysemu/rng.h
+++ b/include/sysemu/rng.h
@@ -25,6 +25,7 @@
 #define RNG_BACKEND_CLASS(klass) \
 OBJECT_CLASS_CHECK(RngBackendClass, (klass), TYPE_RNG_BACKEND)
 
+typedef struct RngRequest RngRequest;
 typedef struct RngBackendClass RngBackendClass;
 typedef struct RngBackend RngBackend;
 
@@ -32,6 +33,15 @@ typedef void (EntropyReceiveFunc)(void *opaque,
   const void *data,
   size_t size);
 
+struct RngRequest
+{
+EntropyReceiveFunc *receive_entropy;
+uint8_t *data;
+void *opaque;
+size_t offset;
+size_t size;
+};
+
 struct RngBackendClass
 {
 ObjectClass parent_class;
@@ -48,6 +58,7 @@ struct RngBackend
 
 /*< protected >*/
 bool opened;
+GSList *requests;
 };
 
 /**
-- 
2.5.0




[Qemu-devel] [PATCH v2 1/4] rng: remove the unused request cancellation code

2016-02-10 Thread Ladi Prosek
rng_backend_cancel_requests has no callers and none of the code
deleted in this commit ever runs.

Signed-off-by: Ladi Prosek 
---
 backends/rng-egd.c   | 12 
 backends/rng.c   |  9 -
 include/sysemu/rng.h | 11 ---
 3 files changed, 32 deletions(-)

diff --git a/backends/rng-egd.c b/backends/rng-egd.c
index 2de5cd5..0b2976a 100644
--- a/backends/rng-egd.c
+++ b/backends/rng-egd.c
@@ -125,17 +125,6 @@ static void rng_egd_free_requests(RngEgd *s)
 s->requests = NULL;
 }
 
-static void rng_egd_cancel_requests(RngBackend *b)
-{
-RngEgd *s = RNG_EGD(b);
-
-/* We simply delete the list of pending requests.  If there is data in the 
- * queue waiting to be read, this is okay, because there will always be
- * more data than we requested originally
- */
-rng_egd_free_requests(s);
-}
-
 static void rng_egd_opened(RngBackend *b, Error **errp)
 {
 RngEgd *s = RNG_EGD(b);
@@ -213,7 +202,6 @@ static void rng_egd_class_init(ObjectClass *klass, void 
*data)
 RngBackendClass *rbc = RNG_BACKEND_CLASS(klass);
 
 rbc->request_entropy = rng_egd_request_entropy;
-rbc->cancel_requests = rng_egd_cancel_requests;
 rbc->opened = rng_egd_opened;
 }
 
diff --git a/backends/rng.c b/backends/rng.c
index b7820ef..2f2f3ee 100644
--- a/backends/rng.c
+++ b/backends/rng.c
@@ -26,15 +26,6 @@ void rng_backend_request_entropy(RngBackend *s, size_t size,
 }
 }
 
-void rng_backend_cancel_requests(RngBackend *s)
-{
-RngBackendClass *k = RNG_BACKEND_GET_CLASS(s);
-
-if (k->cancel_requests) {
-k->cancel_requests(s);
-}
-}
-
 static bool rng_backend_prop_get_opened(Object *obj, Error **errp)
 {
 RngBackend *s = RNG_BACKEND(obj);
diff --git a/include/sysemu/rng.h b/include/sysemu/rng.h
index 0a27c9b..c7da17d 100644
--- a/include/sysemu/rng.h
+++ b/include/sysemu/rng.h
@@ -38,7 +38,6 @@ struct RngBackendClass
 
 void (*request_entropy)(RngBackend *s, size_t size,
 EntropyReceiveFunc *receive_entropy, void *opaque);
-void (*cancel_requests)(RngBackend *s);
 
 void (*opened)(RngBackend *s, Error **errp);
 };
@@ -69,14 +68,4 @@ struct RngBackend
 void rng_backend_request_entropy(RngBackend *s, size_t size,
  EntropyReceiveFunc *receive_entropy,
  void *opaque);
-
-/**
- * rng_backend_cancel_requests:
- * @s: the backend to cancel all pending requests in
- *
- * Cancels all pending requests submitted by @rng_backend_request_entropy.  
This
- * should be used by a device during reset or in preparation for live migration
- * to stop tracking any request.
- */
-void rng_backend_cancel_requests(RngBackend *s);
 #endif
-- 
2.5.0




[Qemu-devel] [PATCH v2 0/4] rng-random: implement request queue

2016-02-10 Thread Ladi Prosek
As suggested by Paolo, I have moved the RngRequest implementation
up to the RngBackend parent class and made both child classes use
it. Apart from the refactoring, the only functional change
compared to v1 is the use of heap instead of stack allocation for
the read buffer in rng-random.

The parent class takes care of creating new requests and adding
them to the queue, as well as removing them from the queue and
deleting them. Child classes have access to the raw GSList * to 
do whatever else they need to do (walk the queue, peek the head
of the queue, ..)

[PATCH v2 1/4] rng: remove the unused request cancellation code
[PATCH v2 2/4] rng: move request queue from RngEgd to RngBackend
[PATCH v2 3/4] rng: move request queue cleanup from RngEgd to
[PATCH v2 4/4] rng: add request queue support to rng-random




Re: [Qemu-devel] [PATCH 3/3] replay: introduce block devices record/replay

2016-02-10 Thread Kevin Wolf
Am 10.02.2016 um 13:51 hat Pavel Dovgalyuk geschrieben:
> > From: Kevin Wolf [mailto:kw...@redhat.com]
> > Am 10.02.2016 um 13:05 hat Pavel Dovgalyuk geschrieben:
> > > > Am 09.02.2016 um 12:52 hat Pavel Dovgalyuk geschrieben:
> > > > > > From: Kevin Wolf [mailto:kw...@redhat.com]
> > > > > > But even this doesn't feel completely right, because block drivers 
> > > > > > are
> > > > > > already layered and there is no need to hardcode something optional 
> > > > > > (and
> > > > > > rarely used) in the hot code path that could just be another layer.
> > > > > >
> > > > > > I assume that you know beforehand if you want to replay something, 
> > > > > > so
> > > > > > requiring you to configure your block devices with a replay driver 
> > > > > > on
> > > > > > top of the stack seems reasonable enough.
> > > > >
> > > > > I cannot use block drivers for this. When driver functions are used, 
> > > > > QEMU
> > > > > is already used coroutines (and probably started bottom halves).
> > > > > Coroutines make execution non-deterministic.
> > > > > That's why we have to intercept blk_aio_ functions, that are called
> > > > > deterministically.
> > > >
> > > > What does "deterministic" mean in this context, i.e. what are your exact
> > > > requirements?
> > >
> > > "Deterministic" means that the replayed execution should run exactly
> > > the same guest instructions in the same sequence, as in recording session.
> > 
> > Okay. I think with this we can do better than what you have now.
> > 
> > > > I don't think that coroutines introduce anything non-deterministic per
> > > > se. Depending on what you mean by it, the block layer code paths in
> > > > block.c may contain problematic code.
> > >
> > > They are non-deterministic if we need instruction-level accuracy.
> > > Thread switching (and therefore callbacks and BH execution) is 
> > > non-deterministic.
> > 
> > Thread switching depends on an external event (the kernel scheduler
> > deciding to switch), so agreed, if a thread switch ever influences what
> > the guest sees, that would be a problem.
> > 
> > Generally, however, callbacks and BHs don't involve a thread switch at
> > all (BHs can be invoked from a different thread in theory, but we have
> > very few of those cases and they shouldn't be visible for the guest).
> > The same is true for coroutines, which are semantically equivalent to
> > callbacks.
> > 
> > > In two different executions these callbacks may happen at different 
> > > moments of
> > > time (counting in number of executed instructions).
> > > All operations with virtual devices (including memory, interrupt 
> > > controller,
> > > and disk drive controller) should happen at deterministic moments of time
> > > to be replayable.
> > 
> > Right, so let's talk about what this external non-deterministic event
> > really is.
> > 
> > I think the only thing whose timing is unknown in the block layer is the
> > completion of I/O requests. This non-determinism comes from the time the
> > I/O syscalls made by the lowest layer (usually raw-posix) take.
> 
> Right.
> 
> > This means that we can add logic to remove the non-determinism at the
> > point of our choice between raw-posix and the guest device emulation. A
> > block driver on top is as good as anything else.
> > 
> > While recording, this block driver would just pass the request to next
> > lower layer (starting a request is deterministic, so it doesn't need to
> > be logged) and once the request completes it logs it. While replaying,
> > the completion of requests is delayed until we read it in the log; if we
> > read it in the log and the request hasn't completed yet, we do a busy
> > wait for it (while(!completed) aio_poll();).
> 
> I tried serializing all bottom halves and worker thread callbacks in
> previous version of the patches. That code was much more complicated 
> and error-prone than the current version. We had to classify all bottom
> halves to recorded and non-recorded (because sometimes they are used
> for qemu's purposes, not the guest ones).
> 
> However, I don't understand yet which layer do you offer as the candidate
> for record/replay? What functions should be changed?
> I would like to investigate this way, but I don't got it yet.

At the core, I wouldn't change any existing function, but introduce a
new block driver. You could copy raw_bsd.c for a start and then tweak
it. Leave out functions that you don't want to support, and add the
necessary magic to .bdrv_co_readv/writev.

Something like this (can probably be generalised for more than just
reads as the part after the bdrv_co_reads() call should be the same for
reads, writes and any other request types):

int blkreplay_co_readv()
{
BlockReplayState *s = bs->opaque;
int reqid = s->reqid++;

bdrv_co_readv(bs->file, ...);

if (mode == record) {
log(reqid, time);
} else {
assert(mode == replay);
bool *done = req_replayed_list_get(reqid)
if (done) {
*done 

Re: [Qemu-devel] [PATCH 3/3] replay: introduce block devices record/replay

2016-02-10 Thread Kevin Wolf
Am 10.02.2016 um 14:33 hat Pavel Dovgalyuk geschrieben:
> > From: Kevin Wolf [mailto:kw...@redhat.com]
> > Am 10.02.2016 um 13:51 hat Pavel Dovgalyuk geschrieben:
> > > > From: Kevin Wolf [mailto:kw...@redhat.com]
> > > > Am 10.02.2016 um 13:05 hat Pavel Dovgalyuk geschrieben:
> > > > > > Am 09.02.2016 um 12:52 hat Pavel Dovgalyuk geschrieben:
> > > > > > > > From: Kevin Wolf [mailto:kw...@redhat.com]
> > > > > > > > But even this doesn't feel completely right, because block 
> > > > > > > > drivers are
> > > > > > > > already layered and there is no need to hardcode something 
> > > > > > > > optional (and
> > > > > > > > rarely used) in the hot code path that could just be another 
> > > > > > > > layer.
> > > > > > > >
> > > > > > > > I assume that you know beforehand if you want to replay 
> > > > > > > > something, so
> > > > > > > > requiring you to configure your block devices with a replay 
> > > > > > > > driver on
> > > > > > > > top of the stack seems reasonable enough.
> > > > > > >
> > > > > > > I cannot use block drivers for this. When driver functions are 
> > > > > > > used, QEMU
> > > > > > > is already used coroutines (and probably started bottom halves).
> > > > > > > Coroutines make execution non-deterministic.
> > > > > > > That's why we have to intercept blk_aio_ functions, that are 
> > > > > > > called
> > > > > > > deterministically.
> > > > > >
> > > > > > What does "deterministic" mean in this context, i.e. what are your 
> > > > > > exact
> > > > > > requirements?
> > > > >
> > > > > "Deterministic" means that the replayed execution should run exactly
> > > > > the same guest instructions in the same sequence, as in recording 
> > > > > session.
> > > >
> > > > Okay. I think with this we can do better than what you have now.
> > > >
> > > > > > I don't think that coroutines introduce anything non-deterministic 
> > > > > > per
> > > > > > se. Depending on what you mean by it, the block layer code paths in
> > > > > > block.c may contain problematic code.
> > > > >
> > > > > They are non-deterministic if we need instruction-level accuracy.
> > > > > Thread switching (and therefore callbacks and BH execution) is 
> > > > > non-deterministic.
> > > >
> > > > Thread switching depends on an external event (the kernel scheduler
> > > > deciding to switch), so agreed, if a thread switch ever influences what
> > > > the guest sees, that would be a problem.
> > > >
> > > > Generally, however, callbacks and BHs don't involve a thread switch at
> > > > all (BHs can be invoked from a different thread in theory, but we have
> > > > very few of those cases and they shouldn't be visible for the guest).
> > > > The same is true for coroutines, which are semantically equivalent to
> > > > callbacks.
> > > >
> > > > > In two different executions these callbacks may happen at different 
> > > > > moments of
> > > > > time (counting in number of executed instructions).
> > > > > All operations with virtual devices (including memory, interrupt 
> > > > > controller,
> > > > > and disk drive controller) should happen at deterministic moments of 
> > > > > time
> > > > > to be replayable.
> > > >
> > > > Right, so let's talk about what this external non-deterministic event
> > > > really is.
> > > >
> > > > I think the only thing whose timing is unknown in the block layer is the
> > > > completion of I/O requests. This non-determinism comes from the time the
> > > > I/O syscalls made by the lowest layer (usually raw-posix) take.
> > >
> > > Right.
> > >
> > > > This means that we can add logic to remove the non-determinism at the
> > > > point of our choice between raw-posix and the guest device emulation. A
> > > > block driver on top is as good as anything else.
> > > >
> > > > While recording, this block driver would just pass the request to next
> > > > lower layer (starting a request is deterministic, so it doesn't need to
> > > > be logged) and once the request completes it logs it. While replaying,
> > > > the completion of requests is delayed until we read it in the log; if we
> > > > read it in the log and the request hasn't completed yet, we do a busy
> > > > wait for it (while(!completed) aio_poll();).
> > >
> > > I tried serializing all bottom halves and worker thread callbacks in
> > > previous version of the patches. That code was much more complicated
> > > and error-prone than the current version. We had to classify all bottom
> > > halves to recorded and non-recorded (because sometimes they are used
> > > for qemu's purposes, not the guest ones).
> > >
> > > However, I don't understand yet which layer do you offer as the candidate
> > > for record/replay? What functions should be changed?
> > > I would like to investigate this way, but I don't got it yet.
> > 
> > At the core, I wouldn't change any existing function, but introduce a
> > new block driver. You could copy raw_bsd.c for a start and then tweak
> > it. Leave out functions that you don't want to support, and add the
> > 

Re: [Qemu-devel] [PATCH 0/8] ipmi: a couple of enhancements to the BMC simulator (round 2)

2016-02-10 Thread Cédric Le Goater
Hello Corey,

On 02/09/2016 07:25 PM, Corey Minyard wrote:
> On 02/09/2016 06:13 AM, Cédric Le Goater wrote:
>> The first patches are cleanups and prepare ground for an extension of
>> the BMC simulator providing a SDR loader using a file. A simple FRU
>> support comes next.
>>
>> The last patches introduce APIs to populate a guest device tree with
>> the available sensors and to generate events, which is needed by
>> platforms in some occasions.
>>
> 
> I have reviewed all of these, and they look good.  I only have one
> comment: The naming of the properties probably needs to be
> fixed.
> 
> "sdr" should be "sdrfile" to be consistent with everything else.

yes. I agree. I am glad you took a look. 

> Technically, a "FRU" is a piece of hardware that can be replaced
> in the field, "FRU data" is the data describing that FRU, and a "FRU
> area" is the memory used to store FRU data.  I know this is mixed
> up a lot (and I have done so) but some people are picky about this.
> 
> With the renaming of sdr (fru is your option):

I will rename the "sdr" property to "sdrfile".

As for FRU, you would rather have the code use FruData than Fru ? 
Something like: 

typedef struct IPMIFruData {
char *filename;
unsigned int nentries;
uint16_t size;
uint8_t *area;
} IPMIFruData;

The code using the IPMIFruData structure would look like :

uint8_t *fru_area;

...
fru_area = >frudata.area[fruid * ibs->frudata.size];
...

Changing all the names is not a problem. Let's get them right.

And, so, the properties would become :

DEFINE_PROP_UINT16("frudatasize", IPMIBmcSim, frudata.size, 1024),
DEFINE_PROP_STRING("frudatafile", IPMIBmcSim, frudata.filename),
DEFINE_PROP_STRING("sdrfile", IPMIBmcSim, sdr_filename),

> Acked-by: Corey Minyard 
> 
> for all patches.
> 
> Oh, and I assume you need to add documentation for the
> properties to qemu-options.hx.

Yes. Forgot that.

Thanks,

C. 

> -corey
> 
>> Based on e4a096b1cd43 and also available here  :
>>
>>https://github.com/legoater/qemu/commits/ipmi
>>
>> Thanks,
>>
>> C.
>>
>> Cédric Le Goater (8):
>>ipmi: add a realize function to the device class
>>ipmi: use a function to initialize the SDR table
>>ipmi: remove the need of an ending record in the SDR table
>>ipmi: add some local variables in ipmi_sdr_init
>>ipmi: use a file to load SDRs
>>ipmi: provide support for FRUs
>>ipmi: introduce an ipmi_bmc_sdr_find() API
>>ipmi: introduce an ipmi_bmc_gen_event() API
>>
>>   hw/ipmi/ipmi_bmc_sim.c | 256 
>> +++--
>>   include/hw/ipmi/ipmi.h |   4 +
>>   2 files changed, 233 insertions(+), 27 deletions(-)
>>
> 




[Qemu-devel] [PATCH] memory: fix usage of find_next_bit and find_next_zero_bit

2016-02-10 Thread Paolo Bonzini
The last two arguments to these functions are the last and first bit to
check relative to the base.  The code was using incorrectly the first
bit and the number of bits.  Fix this in cpu_physical_memory_get_dirty
and cpu_physical_memory_all_dirty.  This requires a few changes in the
iteration; change the code in cpu_physical_memory_set_dirty_range to
match.

Fixes: 5b82b70
Cc: Stefan Hajnoczi 
Signed-off-by: Paolo Bonzini 
---
 include/exec/ram_addr.h | 55 -
 1 file changed, 36 insertions(+), 19 deletions(-)

diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index b1413a1..5d33def 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -121,6 +121,7 @@ static inline bool cpu_physical_memory_get_dirty(ram_addr_t 
start,
 {
 DirtyMemoryBlocks *blocks;
 unsigned long end, page;
+unsigned long idx, offset, base;
 bool dirty = false;
 
 assert(client < DIRTY_MEMORY_NUM);
@@ -132,17 +133,22 @@ static inline bool 
cpu_physical_memory_get_dirty(ram_addr_t start,
 
 blocks = atomic_rcu_read(_list.dirty_memory[client]);
 
+idx = page / DIRTY_MEMORY_BLOCK_SIZE;
+offset = page % DIRTY_MEMORY_BLOCK_SIZE;
+base = page - offset;
 while (page < end) {
-unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
-unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
-unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
-
-if (find_next_bit(blocks->blocks[idx], offset, num) < num) {
+unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
+unsigned long num = next - base;
+unsigned long found = find_next_bit(blocks->blocks[idx], num, offset);
+if (found < num) {
 dirty = true;
 break;
 }
 
-page += num;
+page = next;
+idx++;
+offset = 0;
+base += DIRTY_MEMORY_BLOCK_SIZE;
 }
 
 rcu_read_unlock();
@@ -156,6 +162,7 @@ static inline bool cpu_physical_memory_all_dirty(ram_addr_t 
start,
 {
 DirtyMemoryBlocks *blocks;
 unsigned long end, page;
+unsigned long idx, offset, base;
 bool dirty = true;
 
 assert(client < DIRTY_MEMORY_NUM);
@@ -167,17 +174,22 @@ static inline bool 
cpu_physical_memory_all_dirty(ram_addr_t start,
 
 blocks = atomic_rcu_read(_list.dirty_memory[client]);
 
+idx = page / DIRTY_MEMORY_BLOCK_SIZE;
+offset = page % DIRTY_MEMORY_BLOCK_SIZE;
+base = page - offset;
 while (page < end) {
-unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
-unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
-unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
-
-if (find_next_zero_bit(blocks->blocks[idx], offset, num) < num) {
+unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
+unsigned long num = next - base;
+unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, 
offset);
+if (found < num) {
 dirty = false;
 break;
 }
 
-page += num;
+page = next;
+idx++;
+offset = 0;
+base += DIRTY_MEMORY_BLOCK_SIZE;
 }
 
 rcu_read_unlock();
@@ -248,6 +260,7 @@ static inline void 
cpu_physical_memory_set_dirty_range(ram_addr_t start,
 {
 DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM];
 unsigned long end, page;
+unsigned long idx, offset, base;
 int i;
 
 if (!mask && !xen_enabled()) {
@@ -263,25 +276,29 @@ static inline void 
cpu_physical_memory_set_dirty_range(ram_addr_t start,
 blocks[i] = atomic_rcu_read(_list.dirty_memory[i]);
 }
 
+idx = page / DIRTY_MEMORY_BLOCK_SIZE;
+offset = page % DIRTY_MEMORY_BLOCK_SIZE;
+base = page - offset;
 while (page < end) {
-unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
-unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
-unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
+unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
 
 if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) {
 bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx],
-  offset, num);
+  offset, next - page);
 }
 if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) {
 bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx],
-  offset, num);
+  offset, next - page);
 }
 if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) {
 bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx],
-  offset, num);
+  offset, next - page);
 }
 
-page += num;
+page = next;
+idx++;
+offset = 0;
+base += 

Re: [Qemu-devel] [PATCH] target-arm: Implement DBGDTRRX_EL0/DBGDTRTX_EL0 MSR

2016-02-10 Thread Andreas Färber
Hi Andreas,

Am 09.02.2016 um 21:59 schrieb Dirk Müller:
> This is used by the ARM JTAG DCC console in the Linux kernel,
> but can be ignored in order to continue booting.
> 
> Co-Authored-By: Andreas Schwab 

If this was co-authored by you, we need a proper Signed-off-by please.

> Signed-off-by: Dirk Mueller 
> ---
>  target-arm/helper.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/target-arm/helper.c b/target-arm/helper.c
> index 954e6e8..abce416 100644
> --- a/target-arm/helper.c
> +++ b/target-arm/helper.c
> @@ -3704,6 +3704,9 @@ static const ARMCPRegInfo debug_cp_reginfo[] = {
>  { .name = "DBGVCR",
>.cp = 14, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0,
>.access = PL1_RW, .type = ARM_CP_NOP },
> +{ .name = "DBGDTRxX_EL0", .state = ARM_CP_STATE_BOTH,
> +  .cp = 14, .opc0 = 2, .opc1 = 3, .crn = 0, .crm = 5, .opc2 = 0,
> +  .access = PL0_RW, .type = ARM_CP_NOP },
>  REGINFO_SENTINEL
>  };
>  

Otherwise this can have my Reviewed-by. The small x was suggested by me
since it's actually RX/TX depending on R/W.

Regards,
Andreas

-- 
SUSE Linux GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Felix Imendörffer, Jane Smithard, Graham Norton; HRB 21284 (AG Nürnberg)



Re: [Qemu-devel] [PATCH] memory: fix usage of find_next_bit and find_next_zero_bit

2016-02-10 Thread Stefan Hajnoczi
On Wed, Feb 10, 2016 at 03:11:45PM +0100, Paolo Bonzini wrote:
> The last two arguments to these functions are the last and first bit to
> check relative to the base.  The code was using incorrectly the first
> bit and the number of bits.  Fix this in cpu_physical_memory_get_dirty
> and cpu_physical_memory_all_dirty.  This requires a few changes in the
> iteration; change the code in cpu_physical_memory_set_dirty_range to
> match.
> 
> Fixes: 5b82b70
> Cc: Stefan Hajnoczi 
> Signed-off-by: Paolo Bonzini 
> ---
>  include/exec/ram_addr.h | 55 
> -
>  1 file changed, 36 insertions(+), 19 deletions(-)

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH v5 1/9] tcg: pass down TranslationBlock to tcg_code_gen

2016-02-10 Thread Alex Bennée

Richard Henderson  writes:

> On 02/05/2016 01:56 AM, Alex Bennée wrote:
>> diff --git a/translate-all.c b/translate-all.c
>> index ab61fac..dce00d5 100644
>> --- a/translate-all.c
>> +++ b/translate-all.c
>> @@ -1055,7 +1055,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>>   TranslationBlock *tb;
>>   tb_page_addr_t phys_pc, phys_page2;
>>   target_ulong virt_page2;
>> -tcg_insn_unit *gen_code_buf;
>>   int gen_code_size, search_size;
>>   #ifdef CONFIG_PROFILER
>>   int64_t ti;
>> @@ -1078,8 +1077,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>>   tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
>>   }
>>
>> -gen_code_buf = tcg_ctx.code_gen_ptr;
>> -tb->tc_ptr = gen_code_buf;
>> +tb->tc_ptr = tcg_ctx.code_gen_ptr;
>
> Why get rid of the gen_code_buf variable?  You're forcing the compiler to keep
> reloading the value from memory.
>
> Certainly that's not relevant to passing down TB to tcg_gen_code, and is a
> separate change that ought to be separately defended.

Fixed. Thanks.

>
>
> r~
>
>>   tb->cs_base = cs_base;
>>   tb->flags = flags;
>>   tb->cflags = cflags;
>> @@ -1119,11 +1117,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>>  the tcg optimization currently hidden inside tcg_gen_code.  All
>>  that should be required is to flush the TBs, allocate a new TB,
>>  re-initialize it per above, and re-do the actual code generation.  
>> */
>> -gen_code_size = tcg_gen_code(_ctx, gen_code_buf);
>> +gen_code_size = tcg_gen_code(_ctx, tb);
>>   if (unlikely(gen_code_size < 0)) {
>>   goto buffer_overflow;
>>   }
>> -search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
>> +search_size = encode_search(tb, (void *)tb->tc_ptr + gen_code_size);
>>   if (unlikely(search_size < 0)) {
>>   goto buffer_overflow;
>>   }
>> @@ -1145,7 +1143,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>>   #endif
>>
>>   tcg_ctx.code_gen_ptr = (void *)
>> -ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
>> +ROUND_UP((uintptr_t)tb->tc_ptr + gen_code_size + search_size,
>>CODE_GEN_ALIGN);
>>
>>   /* check next page if needed */
>>


--
Alex Bennée



Re: [Qemu-devel] lock-free monitor?

2016-02-10 Thread Dr. David Alan Gilbert
* Markus Armbruster (arm...@redhat.com) wrote:
> "Dr. David Alan Gilbert"  writes:
> 
> > * Markus Armbruster (arm...@redhat.com) wrote:
> >> "Dr. David Alan Gilbert"  writes:
> >> 
> >> > Hi,
> >> >   I wondered what it would take to be able to do a lock-free monitor;
> >> > i.e. one that could respond to (some) commands while the qemu big lock 
> >> > is held.
> >> 
> >> Requires a careful audit of the monitor code.
> >> 
> >> For instance, cur_mon needs to be made thread local for running multiple
> >> monitors concurrently.
> >
> > Hmm that's fun;  and cur_mon is used all over the place when 'fd' passing
> > is used - although that probably should be cleaned up somehow.
> 
> When cur_mon got created, we had no threads.  Even now, monitors only
> ever run under the BQL, so cur_mon's still fine.
> 
> Having a current monitor is simpler than passing one around, especially
> when you go through layers that don't know about it.  Such cases exist,
> and they made us abandon commit 376253e's hope to eliminate cur_mon.
> Unfortunately, I've since forgotten the details, so I can't point you to
> an example.

Yes,  I think maybe if we can try and remove the use of cur_mon one
use at a time outside of the monitor it might move it in the right direction.

> >> > My reason for asking is that there are cases in migration and colo
> >> > where a network block device has failed and is blocking, but it fails
> >> > at just the wrong time while the lock is held, and now, you don't have
> >> 
> >> Is it okay to block while holding the BQL?
> >
> > I'd hope the simple answer was no; unfortunately the more complex answer
> > is that we keep finding places that do.
> 
> Would you call these bugs?
> 
> Even if you do, we may want to recover from them.

They're a bug in the end result that needs fixing; so for example a 
crashing NBD server shouldn't lock you out of the monitor during a migrate,
and I don't think we current;y have other solutions.

> >  The cases I'm aware of are
> > mostly bdrv_drain_all calls in migration/colo, where one of the block
> > devices (e.g. an NBD network device) fails.  Generally these are called
> > at the end of a migration cycle when we're just ensuring that everything
> > really is drained and are normally called with the lock held to ensure
> > that nothing else is generating new traffic as we're clearing everything 
> > else.
> 
> Using the BQL for that drives a cork into the I/O pipeline with a Really
> Big Hammer.  Can we do better?
> 
> The answer may be "yes, but don't hold your breath."  Then we may need
> means to better cope with the bugs.

Yeh there are some places that the migraiton code holds the BQL where
I don't really understand all the things it's guarding against.

> >> > any way to unblock it since you can't do anything on the monitors.
> >> > If I could issue a command then I could have it end up doing a 
> >> > shutdown(2)
> >> > on the network connection and free things up.
> >> >
> >> > Maybe this would also help real-time people?
> >> >
> >> > I was thinking then, we could:
> >> >a) Have a monitor that wasn't tied to the main loop at all - each 
> >> > instance
> >> > would be it's own thread and have it's own poll() (probably using the new
> >> > IO channels?)
> >> >b) for most commands it would take the big lock before execute the 
> >> > command
> >> > and release it afterwards - so there would be no risk in those commands.
> >> 
> >> Saves you the auditing their code, which would probably be impractical.
> >> 
> >> >c) Some commands you could mark as 'lock free' - they would be 
> >> > required
> >> > not to take any locks or wait for *anything* and for those the monitor 
> >> > would
> >> > not take the lock.
> >> 
> >> They also must not access shared state without suitable synchronization.
> >
> > Right; I'd expect most of the cases to either be reading simple status 
> > information,
> > or having to find whatever they need to do using rcu list walks.
> >
> >> >d) Perhaps have some way of restricting a particular monitor session 
> >> > to only
> >> > allowing non-blocking commands.
> >> 
> >> Why?  To ensure you always have an emergency monitor available?
> >
> > Yes, mostly to stop screwups of putting a potentially blocking command down 
> > your
> > emergency route.
> 
> Understand.
> 
> >> >e) Then I'd have to have a way of finding the broken device in a 
> >> > lockfree
> >> > way (RTU list walk?) and issuing the shutdown(2).
> >> >
> >> > Bonus points to anyone who can statically check commands in (c).
> >> 
> >> Tall order.
> >
> > Yes :-)   A (compile time selected) dynamic check might be possible
> > where the normal global lock functions and rcu block functions check if 
> > they're
> > in a monitor thread and assert.
> >
> >> > Does this make sense to everyone else, or does anyone have any better
> >> > suggestions?
> >> 
> >> Sounds like a big, 

Re: [Qemu-devel] [PATCH] cirrus_vga: fix off-by-one in blit_region_is_unsafe

2016-02-10 Thread Paolo Bonzini


On 10/02/2016 15:55, Laszlo Ersek wrote:
>> > Hmm, not sure why.  We're comparing against the inclusive-exclusive
>> > range [0,s->vga.vram_size).  The right way to check if something is
>> > within the range is >= min && < max; the right way to check if something
>> > is outside the range is < min || >= max.
> Absolutely: if the thing you are verifying against the interval is
> itself an inclusive thing, that is, a pixel or byte *drawn*. However,
> exactly as you stated in the commit message, for the maximum check, what
> we are comparing is the first offset *not* processed.

Right, what my patch does is setting min and max both to pixels that are
drawn.

Paolo



Re: [Qemu-devel] [PATCH v3 00/10] Allow hotplug of s390 CPUs

2016-02-10 Thread David Hildenbrand
> Changes from v2->v3:
> 
> * Call cpu_remove_sync rather than cpu_remove().
> * Pull latest version of patches from pseries set (v6).  Trivial change to 
>   "Reclaim VCPU objects" to fix checkpatch error.
> * Add object_unparent during s390_cpu_release to accomodate changes in 
>   Patch 4 "Reclaim VCPU objects."
> * Remove a cleanup patch in favor of 2 patches from pseries set.
> 
> **
> 
> The following patchset enables hotplug of s390 CPUs.
> 
> The standard interface is used -- to configure a guest with 2 CPUs online at 
> boot and 4 maximum:
> 
> qemu -smp 2,maxcpus=4
> 
> To subsequently hotplug a CPU:
> 
> Issue 'device_add s390-cpu,id=' from monitor.

(questions for the bigger audience)

For x86, cpu models are realized by making x86_64-cpu an abstract class and
creating loads of new classes, e.g. host-x86_64-cpu or haswell-x86_64-cpu.

How does 'device_add ' play together with the x86 cpu model
approach? And with cpu models specified via "-cpu" in general?

Or does that in return mean, that "making models own classes" is outdated? Or
will some internal conversion happen that I am missing?

What is the plan for cpu models and cpu hotplug? How are cpu models to be
defined in the future?

David




Re: [Qemu-devel] [PATCH v6 3/4] tcg: Add type for vCPU pointers

2016-02-10 Thread Lluís Vilanova
Lluís Vilanova writes:

> Richard Henderson writes:
>> On 02/10/2016 08:14 AM, Lluís Vilanova wrote:
>>> Adds the 'TCGv_env' type for pointers to 'CPUArchState' objects. The
>>> tracing infrastructure later needs to differentiate between regular
>>> pointers and pointers to vCPUs.
>>> 
>>> Also changes all targets to use the new 'TCGv_cpu' type instead of the
>>> generic 'TCGv_ptr'. As of now, the change is merely cosmetic ('TCGv_env'
>>> translates into 'TCGv_ptr'), but that could change in the future to
>>> enforce the difference.

>> I suppose.

>> We won't be able distinguish TCGv_env from TCGv_ptr until env can be
>> auto-converted to ptr.  Which I can't imagine happening without switching to
>> C++.

> It's difficult to differenciate between TCGv_ptr and TCGv_env in 
> "tcg/tcg-op.h"
> unless an explicit operation is added to perform casts or to get a TCGv_ptr 
> from
> a TCGv_env+offset (e.g., add a tcg_gen_env_ld8u_i32 built on top of
> tcg_gen_ld8u_i32). That is, unless QEMU switches to C++.

> But types could be easily enforced in helper declarations, which can 
> internally
> cast to the pointer type.
[...]


BTW, type overload can also be achieved in C using GCC's
__builtin_types_compatible_p and __builtin_choose_expr intrinsics:

  http://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html


Cheers,
  Lluis



Re: [Qemu-devel] [Qemu-stable] [PATCH] spapr: skip configuration section during migration of older machines

2016-02-10 Thread Greg Kurz
On Mon, 08 Feb 2016 16:59:47 +0100
Greg Kurz  wrote:

> Since QEMU 2.4, we have a configuration section in the migration stream.
> This must be skipped for older machines, like it is already done for x86.
> 
> Fixes: 61964c23e5ddd5a33f15699e45ce126f879e3e33
> Cc: qemu-sta...@nongnu.org
> Signed-off-by: Greg Kurz 
> ---

Heh this patch doesn't apply on 2.4.1 actually because the context
changed a bit... does it make sense to backport this patch to
stable-2.4 or should the issue be fixed downstream ?

Cc'ed stable release maintainer Michael Roth for inputs.

>  hw/ppc/spapr.c |1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 5bd8fd3ef842..bca7cb8a5d27 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2446,6 +2446,7 @@ static void 
> spapr_machine_2_3_instance_options(MachineState *machine)
>  spapr_machine_2_4_instance_options(machine);
>  savevm_skip_section_footers();
>  global_state_set_optional();
> +savevm_skip_configuration();
>  }
> 
>  static void spapr_machine_2_3_class_options(MachineClass *mc)
> 
> 




[Qemu-devel] KVH call for agenda for 2016-02-16

2016-02-10 Thread Juan Quintela


Hi

Please, send any topic that you are interested in covering.

At the end of Monday I will send an email with the agenda or the
cancellation of the call, so hurry up.

After discussions on the QEMU Summit, we are going to have always open a
KVM call where you can add topics.

 Call details:

By popular demand, a google calendar public entry with it

  
https://www.google.com/calendar/embed?src=dG9iMXRqcXAzN3Y4ZXZwNzRoMHE4a3BqcXNAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ

(Let me know if you have any problems with the calendar entry.  I just
gave up about getting right at the same time CEST, CET, EDT and DST).

If you need phone number details,  contact me privately

Thanks, Juan.



Re: [Qemu-devel] [PATCH 2/2] hw/virtio: group virtio flags into an enum

2016-02-10 Thread Marcel Apfelbaum

On 02/10/2016 03:07 PM, Laurent Vivier wrote:



On 10/02/2016 13:22, Marcel Apfelbaum wrote:

Minimizes the possibility to assign
the same bit to different features.

Signed-off-by: Marcel Apfelbaum 
---
  hw/virtio/virtio-pci.h | 16 ++--
  1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
index 6686b10..817bcde 100644
--- a/hw/virtio/virtio-pci.h
+++ b/hw/virtio/virtio-pci.h
@@ -58,6 +58,16 @@ typedef struct VirtioBusClass VirtioPCIBusClass;
  #define VIRTIO_PCI_BUS_CLASS(klass) \
  OBJECT_CLASS_CHECK(VirtioPCIBusClass, klass, TYPE_VIRTIO_PCI_BUS)

+enum {
+VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT,
+VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT,
+VIRTIO_PCI_FLAG_DISABLE_LEGACY_BIT,
+VIRTIO_PCI_FLAG_DISABLE_MODERN_BIT,
+VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT,
+VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT,
+VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT,
+};


You can keep the "#define", doing something like this after the enum:

#define VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT

So, if someone includes this ".h" out of qemu and wants to use some
"#ifdef" to see if the bit is defined, it always works.
(see for an example, /usr/include/linux/rtnetlink.h).


Thanks for the idea, but I don't think anyone would want to use those out
of qemu :)
I am also afraid that developers will forget to add it to the enum
and will still use the defines only.




+
  /* Need to activate work-arounds for buggy guests at vmstate load. */
  #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT  0


You should remove this one too.


How did I manage to miss it? Thanks!
I'll send a v2
Marcel




  #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION \
@@ -65,23 +75,17 @@ typedef struct VirtioBusClass VirtioPCIBusClass;

  /* Performance improves when virtqueue kick processing is decoupled from the
   * vcpu thread using ioeventfd for some devices. */
-#define VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT 1
  #define VIRTIO_PCI_FLAG_USE_IOEVENTFD   (1 << 
VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)

  /* virtio version flags */
-#define VIRTIO_PCI_FLAG_DISABLE_LEGACY_BIT 2
-#define VIRTIO_PCI_FLAG_DISABLE_MODERN_BIT 3
-#define VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT 6
  #define VIRTIO_PCI_FLAG_DISABLE_LEGACY (1 << 
VIRTIO_PCI_FLAG_DISABLE_LEGACY_BIT)
  #define VIRTIO_PCI_FLAG_DISABLE_MODERN (1 << 
VIRTIO_PCI_FLAG_DISABLE_MODERN_BIT)
  #define VIRTIO_PCI_FLAG_DISABLE_PCIE (1 << VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT)

  /* migrate extra state */
-#define VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT 4
  #define VIRTIO_PCI_FLAG_MIGRATE_EXTRA (1 << VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT)

  /* have pio notification for modern device ? */
-#define VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT 5
  #define VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY \
  (1 << VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT)










Re: [Qemu-devel] [PATCH 2/6] qmp: add query-block-dirty-bitmap-ranges

2016-02-10 Thread Denis V. Lunev

On 02/10/2016 01:08 PM, Stefan Hajnoczi wrote:

On Sat, Jan 30, 2016 at 01:56:30PM +0300, Vladimir Sementsov-Ogievskiy wrote:

Add qmp command to query dirty bitmap contents. This is needed for
external backup.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  block/dirty-bitmap.c | 55 +++
  blockdev.c   | 62 
  include/block/dirty-bitmap.h |  7 +
  qapi/block-core.json | 54 ++
  qmp-commands.hx  | 33 +++
  5 files changed, 211 insertions(+)

This API produces large replies and/or requires many calls to fetch all
bitmap data.  The worst case is a 101010... bitmap.

I consider the dirty bitmap to be data (vs control) and not something
that should be sent over a control channel like the QMP monitor.

How about writing the dirty bitmap to a file?  The new bitmap file
format that Fam is working on could be used.  That way the dirty bitmap
can be saved asynchronously without hogging the QMP monitor.

Reasonable point.

May be it would be better to setup "special" NBD server inside
QEMU which will allow to directly "read" bitmap data.

Any opinion?

Den



Re: [Qemu-devel] [PATCH] target-i386/kvm.c: Fix the order of FPU registers in xsave

2016-02-10 Thread Asia Slowinska
Many thanks for the reply. I'm sorry for the typo in the previous patch.
Below comes a new one.

Best regards,
asia

Stick to the right order of the FPU registers in xsave (as specified in the
Intel manual.)  Otherwise, instructions loading the FPU state don't set it
up
correctly.

To set up FPU, software needs to provide a buffer of 80 bytes storing 8 FPU
registers. They are organized in a stack. FPU assumes that the first field
of
the buffer is ST0, then ST1, and so on. QEMU maintains a circular buffer.
When
preparing these 80 bytes for KVM, QEMU just uses memcpy instead of copying
the
elements in a proper order.

Signed-off-by: Asia Slowinska 
---
 target-i386/kvm.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 94024bc..a9e6c75 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -1325,8 +1325,10 @@ static int kvm_put_xsave(X86CPU *cpu)
 xsave->region[XSAVE_FTW_FOP] = (uint32_t)(env->fpop << 16) + twd;
 memcpy(>region[XSAVE_CWD_RIP], >fpip, sizeof(env->fpip));
 memcpy(>region[XSAVE_CWD_RDP], >fpdp, sizeof(env->fpdp));
-memcpy(>region[XSAVE_ST_SPACE], env->fpregs,
-sizeof env->fpregs);
+for (i = 0; i < 8; i++) {
+memcpy(_region[XSAVE_ST_SPACE + i * 4],
+>fpregs[(env->fpstt + i) & 7], 16);
+}
 xsave->region[XSAVE_MXCSR] = env->mxcsr;
 *(uint64_t *)>region[XSAVE_XSTATE_BV] = env->xstate_bv;
 memcpy(>region[XSAVE_BNDREGS], env->bnd_regs,
@@ -1745,8 +1747,10 @@ static int kvm_get_xsave(X86CPU *cpu)
 memcpy(>fpip, >region[XSAVE_CWD_RIP], sizeof(env->fpip));
 memcpy(>fpdp, >region[XSAVE_CWD_RDP], sizeof(env->fpdp));
 env->mxcsr = xsave->region[XSAVE_MXCSR];
-memcpy(env->fpregs, >region[XSAVE_ST_SPACE],
-sizeof env->fpregs);
+for (i = 0; i < 8; i++) {
+memcpy(>fpregs[(env->fpstt + i) & 7],
+_region[XSAVE_ST_SPACE + i * 4], 16);
+}
 env->xstate_bv = *(uint64_t *)>region[XSAVE_XSTATE_BV];
 memcpy(env->bnd_regs, >region[XSAVE_BNDREGS],
 sizeof env->bnd_regs);
-- 
1.9.1



On Wed, Feb 10, 2016 at 3:17 PM, Paolo Bonzini  wrote:

>
>
> On 10/02/2016 12:02, Asia Slowinska wrote:
> > Stick to the expected order of the FPU registers in xsave (as specified
> > in the
> > Intel manual.) Otherwise, instructions loading the FPU state don't set
> > it up
> > correctly.
> >
> > To set up FPU, software needs to provide a buffer of 80 bytes
> > storing 8 FPU registers. They are organized in a stack. FPU assumes that
> the
> > first field of the buffer is ST0, then ST1, and so on. QEMU maintains a
> > circular buffer. When preparing these 80 bytes for KVM, QEMU just uses
> > memcpy
> > instead of copying the elements in a proper order.
> >
> > Signed-off-by: Asia Slowinska >
> > ---
> >  target-i386/kvm.c | 12 
> >  1 file changed, 8 insertions(+), 4 deletions(-)
> >
> > diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> > index 94024bc..c77fe73 100644
> > --- a/target-i386/kvm.c
> > +++ b/target-i386/kvm.c
> > @@ -1325,8 +1325,10 @@ static int kvm_put_xsave(X86CPU *cpu)
> >  xsave->region[XSAVE_FTW_FOP] = (uint32_t)(env->fpop << 16) + twd;
> >  memcpy(>region[XSAVE_CWD_RIP], >fpip,
> sizeof(env->fpip));
> >  memcpy(>region[XSAVE_CWD_RDP], >fpdp,
> sizeof(env->fpdp));
> > -memcpy(>region[XSAVE_ST_SPACE], env->fpregs,
> > -sizeof env->fpregs);
> > +for (i = 0; i < 8; i++) {
> > +memcpy(_region[HXSAVE_ST_SPACE + i * 4],
> > +>fpregs[(env->fpstt + i) & 7], 16);
> > +}
> >  xsave->region[XSAVE_MXCSR] = env->mxcsr;
> >  *(uint64_t *)>region[XSAVE_XSTATE_BV] = env->xstate_bv;
> >  memcpy(>region[XSAVE_BNDREGS], env->bnd_regs,
> > @@ -1745,8 +1747,10 @@ static int kvm_get_xsave(X86CPU *cpu)
> >  memcpy(>fpip, >region[XSAVE_CWD_RIP],
> sizeof(env->fpip));
> >  memcpy(>fpdp, >region[XSAVE_CWD_RDP],
> sizeof(env->fpdp));
> >  env->mxcsr = xsave->region[XSAVE_MXCSR];
> > -memcpy(env->fpregs, >region[XSAVE_ST_SPACE],
> > -sizeof env->fpregs);
> > +for (i = 0; i < 8; i++) {
> > +memcpy(>fpregs[(env->fpstt + i) & 7],
> > +_region[HXSAVE_ST_SPACE + i * 4], 16);
>
> Hi, the patch is missing a definition of HXSAVE_ST_SPACE.
>
> Thanks,
>
> Paolo
>
> > +}
> >  env->xstate_bv = *(uint64_t *)>region[XSAVE_XSTATE_BV];
> >  memcpy(env->bnd_regs, >region[XSAVE_BNDREGS],
> >  sizeof env->bnd_regs);
> > --
> > 1.9.1
> >
> >
>


Re: [Qemu-devel] [PATCH] cirrus_vga: fix off-by-one in blit_region_is_unsafe

2016-02-10 Thread Laszlo Ersek
On 02/10/16 13:32, Paolo Bonzini wrote:
> 
> 
> On 09/02/2016 20:08, Laszlo Ersek wrote:
>> On 02/09/16 11:59, Paolo Bonzini wrote:
>>> The "max" value is being compared with >=, but addr + width points to
>>> the first byte that will _not_ be copied.  Subtract one like it is
>>> already done above for the height.
>>>
>>> Cc: Gerd Hoffmann 
>>> Signed-off-by: Paolo Bonzini 
>>> ---
>>>  hw/display/cirrus_vga.c | 4 ++--
>>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c
>>> index b6ce1c8..e7939d2 100644
>>> --- a/hw/display/cirrus_vga.c
>>> +++ b/hw/display/cirrus_vga.c
>>> @@ -275,14 +275,14 @@ static bool blit_region_is_unsafe(struct 
>>> CirrusVGAState *s,
>>>  int64_t min = addr
>>>  + ((int64_t)s->cirrus_blt_height-1) * pitch;
>>>  int32_t max = addr
>>> -+ s->cirrus_blt_width;
>>> ++ s->cirrus_blt_width-1;
>>>  if (min < 0 || max >= s->vga.vram_size) {
>>>  return true;
>>>  }
>>>  } else {
>>>  int64_t max = addr
>>>  + ((int64_t)s->cirrus_blt_height-1) * pitch
>>> -+ s->cirrus_blt_width;
>>> ++ s->cirrus_blt_width-1;
>>>  if (max >= s->vga.vram_size) {
>>>  return true;
>>>  }
>>>
>>
>> (a) I reported this issue in an internal discussion @RH, more than a
>> year ago. Please refer to Message-Id: <54b7a2d7.5010...@redhat.com>,
>> points (2) and (5).
>>
>> (b) I think the commit message should be clearer about the fact that
>> this is not a security problem -- the off-by-one errs on the side of
>> safety (i.e., the behavior is too strict, not too lax).
>>
>> (c) The patch is mathematically correct, but I'd feel safer if, rather
>> than decrementing max, it would replace the
>>
>>   max >= s->vga.vram_size
>>
>> comparisons with
>>
>>   max > s->vga.vram_size
> 
> Hmm, not sure why.  We're comparing against the inclusive-exclusive
> range [0,s->vga.vram_size).  The right way to check if something is
> within the range is >= min && < max; the right way to check if something
> is outside the range is < min || >= max.

Absolutely: if the thing you are verifying against the interval is
itself an inclusive thing, that is, a pixel or byte *drawn*. However,
exactly as you stated in the commit message, for the maximum check, what
we are comparing is the first offset *not* processed.

As I said, my suggestion doesn't change the meaning of your patch at
all, it only reformulates it.

Consider, pre-patch we have the following condition for rejection (max
check only):

  T := addr + s->cirrus_blt_width

  reject if (T >= s->vga.vram_size)[0]

This is overprotective, because (T == s->vga.vram_size) should be
accepted. (Because, as you wrote, both T and s->vga.vram_size are
exclusive.) Therefore the right condition is:

  reject if (T > s->vga.vram_size) [1]

This is equivalent to:

  reject if (T >= s->vga.vram_size + 1)[2]

Which is further equivalent to

  reject if (T - 1 >= s->vga.vram_size)[3]

Your patch encodes [3], by setting the "max" variable to (T-1), and
keeping the >= relop.

My suggestion is to preserve "max" set to T, and encode [1]: replace the
>= relop with >.

Mathematically they are equivalent, but in C, I feel [1] is safer
(without putting in the work to see that [3] is safe as well.)

Thanks
Laszlo



Re: [Qemu-devel] [PATCH 0/8] ipmi: a couple of enhancements to the BMC simulator (round 2)

2016-02-10 Thread Cédric Le Goater
On 02/10/2016 05:06 PM, Corey Minyard wrote:
> On 02/10/2016 08:05 AM, Cédric Le Goater wrote:
>> Hello Corey,
>>
>> On 02/09/2016 07:25 PM, Corey Minyard wrote:
>>> On 02/09/2016 06:13 AM, Cédric Le Goater wrote:
 The first patches are cleanups and prepare ground for an extension of
 the BMC simulator providing a SDR loader using a file. A simple FRU
 support comes next.

 The last patches introduce APIs to populate a guest device tree with
 the available sensors and to generate events, which is needed by
 platforms in some occasions.

>>> I have reviewed all of these, and they look good.  I only have one
>>> comment: The naming of the properties probably needs to be
>>> fixed.
>>>
>>> "sdr" should be "sdrfile" to be consistent with everything else.
>> yes. I agree. I am glad you took a look.
>>
>>> Technically, a "FRU" is a piece of hardware that can be replaced
>>> in the field, "FRU data" is the data describing that FRU, and a "FRU
>>> area" is the memory used to store FRU data.  I know this is mixed
>>> up a lot (and I have done so) but some people are picky about this.
>>>
>>> With the renaming of sdr (fru is your option):
>> I will rename the "sdr" property to "sdrfile".
>>
>> As for FRU, you would rather have the code use FruData than Fru ?
>> Something like:
>>
>> typedef struct IPMIFruData {
>> char *filename;
>> unsigned int nentries;
>> uint16_t size;
>> uint8_t *area;
>> } IPMIFruData;
>>
>> The code using the IPMIFruData structure would look like :
>>
>>  uint8_t *fru_area;
>>
>>  ...
>>  fru_area = >frudata.area[fruid * ibs->frudata.size];
>>  ...
>>
>> Changing all the names is not a problem. Let's get them right.
>>
>> And, so, the properties would become :
>>
>>  DEFINE_PROP_UINT16("frudatasize", IPMIBmcSim, frudata.size, 1024),
> 
> I would name this "fruareasize" to be clear that it is not the size of the 
> "frudatafile".
> 
> Other than that, I'm happy with those names.
>
> -corey


ok. I will only change the names of the properties and add Documentation
for them in v2

Thanks,

C.  

 
>>  DEFINE_PROP_STRING("frudatafile", IPMIBmcSim, frudata.filename),
>>  DEFINE_PROP_STRING("sdrfile", IPMIBmcSim, sdr_filename),
>>
>>> Acked-by: Corey Minyard 
>>>
>>> for all patches.
>>>
>>> Oh, and I assume you need to add documentation for the
>>> properties to qemu-options.hx.
>> Yes. Forgot that.
>>
>> Thanks,
>>
>> C.
>>
>>> -corey
>>>
 Based on e4a096b1cd43 and also available here  :

 https://github.com/legoater/qemu/commits/ipmi

 Thanks,

 C.

 Cédric Le Goater (8):
 ipmi: add a realize function to the device class
 ipmi: use a function to initialize the SDR table
 ipmi: remove the need of an ending record in the SDR table
 ipmi: add some local variables in ipmi_sdr_init
 ipmi: use a file to load SDRs
 ipmi: provide support for FRUs
 ipmi: introduce an ipmi_bmc_sdr_find() API
 ipmi: introduce an ipmi_bmc_gen_event() API

hw/ipmi/ipmi_bmc_sim.c | 256 
 +++--
include/hw/ipmi/ipmi.h |   4 +
2 files changed, 233 insertions(+), 27 deletions(-)

> 




Re: [Qemu-devel] [PULL 48/49] i386: populate floppy drive information in DSDT

2016-02-10 Thread John Snow


On 02/09/2016 01:48 PM, Michael S. Tsirkin wrote:
> On Tue, Feb 09, 2016 at 07:36:12PM +0100, Laszlo Ersek wrote:
>> On 02/09/16 17:22, John Snow wrote:
>>>
>>>
>>> On 02/09/2016 10:52 AM, Roman Kagan wrote:
 On Mon, Feb 08, 2016 at 03:20:47PM -0500, John Snow wrote:
> On 02/08/2016 08:14 AM, Roman Kagan wrote:
>> On Fri, Feb 05, 2016 at 07:25:07PM +0100, Igor Mammedov wrote:
 +aml_append(fdi,
 +aml_int(cylinders - 1));  /* Maximum Cylinder Number */
>>> this puts uint64_t(-1) in AML i.e. cylinders == 0 and overflow happens 
>>> here
>>>
>>> CCing Jon
>>
>> I guess this is the effect of John's fdc rework.  I used to think zero
>> geometry was impossible at the time this patch was developed.
>>
>> I wonder if it hasn't been fixed already by
>>
>>   commit fd9bdbd3459e5b9d51534f0747049bc5b6145e07
>>   Author: John Snow 
>>   Date:   Wed Feb 3 11:28:55 2016 -0500
>>
>>   fdc: fix detection under Linux
>
> Yes, hopefully solved on my end. The geometry values for an empty disk
> are not well defined (they certainly don't have any *meaning*) so if you
> are populating tables based on an empty drive, I just hope you also have
> the mechanisms needed to update said tables when the media changes.

 I don't.  At the time the patch was developed there basically were no
 mechanisms to update the geometry at all (and this was what you patchset
 addressed, in particular, wasn't it?) so I didn't care.

>>>
>>> That's not true.
>>>
>>> You could swap different 1.44MB-class diskettes for other geometries,
>>> check this out:
>>>
>>> static const FDFormat fd_formats[] = {
>>> /* First entry is default format */
>>> /* 1.44 MB 3"1/2 floppy disks */
>>> { FDRIVE_DRV_144, 18, 80, 1, FDRIVE_RATE_500K, },
>>> { FDRIVE_DRV_144, 20, 80, 1, FDRIVE_RATE_500K, },
>>> { FDRIVE_DRV_144, 21, 80, 1, FDRIVE_RATE_500K, },
>>> { FDRIVE_DRV_144, 21, 82, 1, FDRIVE_RATE_500K, },
>>> { FDRIVE_DRV_144, 21, 83, 1, FDRIVE_RATE_500K, },
>>> { FDRIVE_DRV_144, 22, 80, 1, FDRIVE_RATE_500K, },
>>> { FDRIVE_DRV_144, 23, 80, 1, FDRIVE_RATE_500K, },
>>> { FDRIVE_DRV_144, 24, 80, 1, FDRIVE_RATE_500K, },
>>> ...
>>>
>>> You absolutely could get different sector and track counts before my
>>> patchset.
>>>
>>>
 Now if it actually has to be fully dynamic it's gonna be more
 involved...

> What do the guests use these values for? Are they fixed at boot?

 Only Windows guests use it so it's hard to tell.  I can only claim that
 if I stick bogus values into that ACPI object the guest fails to read
 the floppy.
>>
>> We discussed this with John a bit on IRC.
>>
>> In my opinion, the real mess in this case is in the ACPI spec itself. If
>> you re-read the _FDI control method's description, the Package that it
>> returns contains *dynamic* geometry data, about the *disk* (not *drive*):
>>
>> - Maximum Cylinder Number // Integer (WORD)
>> - Maximum Sector Number   // Integer (WORD)
>> - Maximum Head Number // Integer (WORD)
>>
>> What this seems to require is: the firmware developer should write ACPI
>> code that
>> - dynamically accesses the floppy drive / controller (using MMIO or IO
>>   port registers),
>> - retrieves the geometry of the disk actually inserted,
>> - and returns the data nicely packaged.
>>
>> In effect, an ACPI-level driver for the disk.
>>
>> Now, John explained to me (and please keep in mind that this is my
>> personal account of his explanation, not a factual rendering thereof),
>> that there used to be no *standard* way to interrogate the current
>> disk's geometry, other than trial and error with seeking.
>>

At the very least, the Intel 82078 does not appear to be capable of
replying to any query with a CHS triplet. It *can* report back the total
number of sectors and the size of each sector, but that still requires
geometry guesswork outside of the drive.

>> Apparently in UEFI Windows, Microsoft didn't want to implement this
>> trial and error seeking, so -- given there was also no portable
>> *hardware spec* to retrieve the same data, directly from the disk drive
>> or controller -- they punted the entire question to ACPI. That is, to
>> the firmware implementor.
>>
>> This is entirely bogus. For one, it ties the platform firmware (the UEFI
>> binary in the flash chip on your motherboard) to your specific floppy
>> drive / controller. Say good-bye to any independently bought & installed
>> floppy drives.
>>
>> In theory, a floppy controller that comes with a separate PCI card could
>> offer an option ROM with a UEFI driver in it, and that UEFI driver could
>> install a separate SSDT with the hardware-matching _FDI in it. Still an
>> unreasonable requirement, given that the *only* way Windows can be
>> installed unattended (with external device drivers) is to provide those
>> drivers on a floppy. 

Re: [Qemu-devel] [PATCH v3 00/10] Allow hotplug of s390 CPUs

2016-02-10 Thread Andreas Färber
Am 10.02.2016 um 16:28 schrieb David Hildenbrand:
> For x86, cpu models are realized by making x86_64-cpu an abstract class and
> creating loads of new classes, e.g. host-x86_64-cpu or haswell-x86_64-cpu.
> 
> How does 'device_add ' play together with the x86 cpu model
> approach? And with cpu models specified via "-cpu" in general?

device_add needs to use an instantiatable type, like the ones you sketch
above.

> Or does that in return mean, that "making models own classes" is outdated? Or
> will some internal conversion happen that I am missing?
> 
> What is the plan for cpu models and cpu hotplug? How are cpu models to be
> defined in the future?

Someone at IBM was working on defining models for s390x - not sure what
the status is?

Regards,
Andreas

-- 
SUSE Linux GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Felix Imendörffer, Jane Smithard, Graham Norton; HRB 21284 (AG Nürnberg)



Re: [Qemu-devel] [PULL 02/32] memory: RCU ram_list.dirty_memory[] for safe RAM hotplug

2016-02-10 Thread Leon Alrae
Hi,

I just noticed significant performance hit with this change. Booting
small system (I tried on system mips only) was usually taking around 20
seconds, now reaches 3 minutes with this change.

Leon

On 09/02/16 12:13, Paolo Bonzini wrote:
> From: Stefan Hajnoczi 
> 
> Although accesses to ram_list.dirty_memory[] use atomics so multiple
> threads can safely dirty the bitmap, the data structure is not fully
> thread-safe yet.
> 
> This patch handles the RAM hotplug case where ram_list.dirty_memory[] is
> grown.  ram_list.dirty_memory[] is change from a regular bitmap to an
> RCU array of pointers to fixed-size bitmap blocks.  Threads can continue
> accessing bitmap blocks while the array is being extended.  See the
> comments in the code for an in-depth explanation of struct
> DirtyMemoryBlocks.
> 
> I have tested that live migration with virtio-blk dataplane works.
> 
> Signed-off-by: Stefan Hajnoczi 
> Message-Id: <1453728801-5398-2-git-send-email-stefa...@redhat.com>
> Signed-off-by: Paolo Bonzini 
> ---
>  exec.c  |  75 +++
>  include/exec/ram_addr.h | 189 
> ++--
>  migration/ram.c |   4 -
>  3 files changed, 225 insertions(+), 43 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index ab37360..7d67c11 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -980,8 +980,9 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t 
> start,
>ram_addr_t length,
>unsigned client)
>  {
> +DirtyMemoryBlocks *blocks;
>  unsigned long end, page;
> -bool dirty;
> +bool dirty = false;
>  
>  if (length == 0) {
>  return false;
> @@ -989,8 +990,22 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t 
> start,
>  
>  end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
>  page = start >> TARGET_PAGE_BITS;
> -dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
> - page, end - page);
> +
> +rcu_read_lock();
> +
> +blocks = atomic_rcu_read(_list.dirty_memory[client]);
> +
> +while (page < end) {
> +unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
> +unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
> +unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - 
> offset);
> +
> +dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
> +  offset, num);
> +page += num;
> +}
> +
> +rcu_read_unlock();
>  
>  if (dirty && tcg_enabled()) {
>  tlb_reset_dirty_range_all(start, length);
> @@ -1504,6 +1519,47 @@ int qemu_ram_resize(ram_addr_t base, ram_addr_t 
> newsize, Error **errp)
>  return 0;
>  }
>  
> +/* Called with ram_list.mutex held */
> +static void dirty_memory_extend(ram_addr_t old_ram_size,
> +ram_addr_t new_ram_size)
> +{
> +ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
> + DIRTY_MEMORY_BLOCK_SIZE);
> +ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
> + DIRTY_MEMORY_BLOCK_SIZE);
> +int i;
> +
> +/* Only need to extend if block count increased */
> +if (new_num_blocks <= old_num_blocks) {
> +return;
> +}
> +
> +for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
> +DirtyMemoryBlocks *old_blocks;
> +DirtyMemoryBlocks *new_blocks;
> +int j;
> +
> +old_blocks = atomic_rcu_read(_list.dirty_memory[i]);
> +new_blocks = g_malloc(sizeof(*new_blocks) +
> +  sizeof(new_blocks->blocks[0]) * 
> new_num_blocks);
> +
> +if (old_num_blocks) {
> +memcpy(new_blocks->blocks, old_blocks->blocks,
> +   old_num_blocks * sizeof(old_blocks->blocks[0]));
> +}
> +
> +for (j = old_num_blocks; j < new_num_blocks; j++) {
> +new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
> +}
> +
> +atomic_rcu_set(_list.dirty_memory[i], new_blocks);
> +
> +if (old_blocks) {
> +g_free_rcu(old_blocks, rcu);
> +}
> +}
> +}
> +
>  static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
>  {
>  RAMBlock *block;
> @@ -1543,6 +1599,7 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, 
> Error **errp)
>(new_block->offset + new_block->max_length) >> 
> TARGET_PAGE_BITS);
>  if (new_ram_size > old_ram_size) {
>  migration_bitmap_extend(old_ram_size, new_ram_size);
> +dirty_memory_extend(old_ram_size, new_ram_size);
>  }
>  /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
>   * QLIST (which has an RCU-friendly variant) does not have insertion at
> @@ -1568,18 

Re: [Qemu-devel] [PATCH 07/15] tcg-mips: Adjust qemu_ld/st for mips64

2016-02-10 Thread Richard Henderson

On 02/11/2016 03:34 AM, James Hogan wrote:

Hi Richard,

On Tue, Feb 09, 2016 at 09:39:55PM +1100, Richard Henderson wrote:

@@ -1212,11 +1237,24 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg 
base, TCGReg addrl,
 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
  int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);

-tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addrl,
-   TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
-tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0,
-(CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
-tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, TCG_AREG0);
+if (use_mips32r2_instructions) {
+if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) {
+tcg_out_opc_bf(s, OPC_EXT, TCG_REG_A0, addrl,
+   TARGET_PAGE_BITS + CPU_TLB_ENTRY_BITS - 1,
+   CPU_TLB_ENTRY_BITS);
+} else {
+tcg_out_opc_bf64(s, OPC_DEXT, OPC_DEXTM, OPC_DEXTU,
+ TCG_REG_A0, addrl,
+ TARGET_PAGE_BITS + CPU_TLB_ENTRY_BITS - 1,
+ CPU_TLB_ENTRY_BITS);
+}


The ext/dext here will end up with bits below bit CPU_TLB_ENTRY_BITS
set, which will result in load of addend from slightly offset address,
so things go badly wrong. You still need to either ANDI off the low bits
or trim them off with the ext/dext and shift it left again.

So I don't think there's any benefit to the use of these instructions
unless CPU_TLB_SIZE + CPU_TLB_ENTRY_BITS exceeds the 16-bits available
in the ANDI immediate field for the non r2 case.


Hmm.  I thought I'd deleted this code back out.  I must have messed up copying 
trees between machines and overwritten this.



r~



Re: [Qemu-devel] [PULL 48/49] i386: populate floppy drive information in DSDT

2016-02-10 Thread Michael S. Tsirkin
On Wed, Feb 10, 2016 at 11:14:30AM -0500, John Snow wrote:
> 
> 
> On 02/09/2016 01:48 PM, Michael S. Tsirkin wrote:
> > On Tue, Feb 09, 2016 at 07:36:12PM +0100, Laszlo Ersek wrote:
> >> On 02/09/16 17:22, John Snow wrote:
> >>>
> >>>
> >>> On 02/09/2016 10:52 AM, Roman Kagan wrote:
>  On Mon, Feb 08, 2016 at 03:20:47PM -0500, John Snow wrote:
> > On 02/08/2016 08:14 AM, Roman Kagan wrote:
> >> On Fri, Feb 05, 2016 at 07:25:07PM +0100, Igor Mammedov wrote:
>  +aml_append(fdi,
>  +aml_int(cylinders - 1));  /* Maximum Cylinder Number */
> >>> this puts uint64_t(-1) in AML i.e. cylinders == 0 and overflow 
> >>> happens here
> >>>
> >>> CCing Jon
> >>
> >> I guess this is the effect of John's fdc rework.  I used to think zero
> >> geometry was impossible at the time this patch was developed.
> >>
> >> I wonder if it hasn't been fixed already by
> >>
> >>   commit fd9bdbd3459e5b9d51534f0747049bc5b6145e07
> >>   Author: John Snow 
> >>   Date:   Wed Feb 3 11:28:55 2016 -0500
> >>
> >>   fdc: fix detection under Linux
> >
> > Yes, hopefully solved on my end. The geometry values for an empty disk
> > are not well defined (they certainly don't have any *meaning*) so if you
> > are populating tables based on an empty drive, I just hope you also have
> > the mechanisms needed to update said tables when the media changes.
> 
>  I don't.  At the time the patch was developed there basically were no
>  mechanisms to update the geometry at all (and this was what you patchset
>  addressed, in particular, wasn't it?) so I didn't care.
> 
> >>>
> >>> That's not true.
> >>>
> >>> You could swap different 1.44MB-class diskettes for other geometries,
> >>> check this out:
> >>>
> >>> static const FDFormat fd_formats[] = {
> >>> /* First entry is default format */
> >>> /* 1.44 MB 3"1/2 floppy disks */
> >>> { FDRIVE_DRV_144, 18, 80, 1, FDRIVE_RATE_500K, },
> >>> { FDRIVE_DRV_144, 20, 80, 1, FDRIVE_RATE_500K, },
> >>> { FDRIVE_DRV_144, 21, 80, 1, FDRIVE_RATE_500K, },
> >>> { FDRIVE_DRV_144, 21, 82, 1, FDRIVE_RATE_500K, },
> >>> { FDRIVE_DRV_144, 21, 83, 1, FDRIVE_RATE_500K, },
> >>> { FDRIVE_DRV_144, 22, 80, 1, FDRIVE_RATE_500K, },
> >>> { FDRIVE_DRV_144, 23, 80, 1, FDRIVE_RATE_500K, },
> >>> { FDRIVE_DRV_144, 24, 80, 1, FDRIVE_RATE_500K, },
> >>> ...
> >>>
> >>> You absolutely could get different sector and track counts before my
> >>> patchset.
> >>>
> >>>
>  Now if it actually has to be fully dynamic it's gonna be more
>  involved...
> 
> > What do the guests use these values for? Are they fixed at boot?
> 
>  Only Windows guests use it so it's hard to tell.  I can only claim that
>  if I stick bogus values into that ACPI object the guest fails to read
>  the floppy.
> >>
> >> We discussed this with John a bit on IRC.
> >>
> >> In my opinion, the real mess in this case is in the ACPI spec itself. If
> >> you re-read the _FDI control method's description, the Package that it
> >> returns contains *dynamic* geometry data, about the *disk* (not *drive*):
> >>
> >> - Maximum Cylinder Number // Integer (WORD)
> >> - Maximum Sector Number   // Integer (WORD)
> >> - Maximum Head Number // Integer (WORD)
> >>
> >> What this seems to require is: the firmware developer should write ACPI
> >> code that
> >> - dynamically accesses the floppy drive / controller (using MMIO or IO
> >>   port registers),
> >> - retrieves the geometry of the disk actually inserted,
> >> - and returns the data nicely packaged.
> >>
> >> In effect, an ACPI-level driver for the disk.
> >>
> >> Now, John explained to me (and please keep in mind that this is my
> >> personal account of his explanation, not a factual rendering thereof),
> >> that there used to be no *standard* way to interrogate the current
> >> disk's geometry, other than trial and error with seeking.
> >>
> 
> At the very least, the Intel 82078 does not appear to be capable of
> replying to any query with a CHS triplet. It *can* report back the total
> number of sectors and the size of each sector, but that still requires
> geometry guesswork outside of the drive.
> 
> >> Apparently in UEFI Windows, Microsoft didn't want to implement this
> >> trial and error seeking, so -- given there was also no portable
> >> *hardware spec* to retrieve the same data, directly from the disk drive
> >> or controller -- they punted the entire question to ACPI. That is, to
> >> the firmware implementor.
> >>
> >> This is entirely bogus. For one, it ties the platform firmware (the UEFI
> >> binary in the flash chip on your motherboard) to your specific floppy
> >> drive / controller. Say good-bye to any independently bought & installed
> >> floppy drives.
> >>
> >> In theory, a floppy controller that comes with a separate PCI card could
> >> offer an option ROM with a UEFI 

Re: [Qemu-devel] [PATCH] qemu-ga: Fixed minor version switch issue

2016-02-10 Thread Michael Roth
Quoting Leonid Bloch (2016-01-11 03:12:41)
> With automatically generated GUID, on minor version changes, an error
> occurred, stating that there is a problem with the installer.
> Now, a notification is shown, warning the user that another version of
> this product is already installed, and that configuration or removal of
> the existing version is possible through Add/Remove Programs on the
> Control Panel (expected behavior).
> 
> Signed-off-by: Leonid Bloch 

Reviewed-by: Michael Roth 

> ---
>  qga/installer/qemu-ga.wxs | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/qga/installer/qemu-ga.wxs b/qga/installer/qemu-ga.wxs
> index 9473875..7f92891 100644
> --- a/qga/installer/qemu-ga.wxs
> +++ b/qga/installer/qemu-ga.wxs
> @@ -41,7 +41,7 @@
> 
>  Name="QEMU guest agent"
> -Id="*"
> +Id="{DF9974AD-E41A-4304-81AD-69AA8F299766}"
>  UpgradeCode="{EB6B8302-C06E-4BEC-ADAC-932C68A3A98D}"
>  Manufacturer="$(env.QEMU_GA_MANUFACTURER)"
>  Version="$(env.QEMU_GA_VERSION)"
> -- 
> 2.4.3
> 




Re: [Qemu-devel] [PATCH 3/3] qemu-iotests: 140: use virtio-scsi instead of IDE

2016-02-10 Thread Max Reitz
On 09.02.2016 14:15, Sascha Silbe wrote:
> IDE is only implemented by very few architectures (mostly PC). Use
> virtio-scsi instead so the test works on all architectures that
> support virtio. In particular, this fixes qemu-iotests on s390x.
> 
> Fixes: 16dee418 ("iotests: Add test for eject under NBD server")
> Signed-off-by: Sascha Silbe 
> ---
>  tests/qemu-iotests/140 | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/tests/qemu-iotests/140 b/tests/qemu-iotests/140
> index f78c317..0c448e6 100755
> --- a/tests/qemu-iotests/140
> +++ b/tests/qemu-iotests/140
> @@ -49,8 +49,8 @@ _make_test_img 64k
>  $QEMU_IO -c 'write -P 42 0 64k' "$TEST_IMG" | _filter_qemu_io
>  
>  keep_stderr=y \
> -_launch_qemu -drive 
> if=ide,media=cdrom,id=drv,file="$TEST_IMG",format=$IMGFMT \
> -2> >(_filter_nbd)
> +_launch_qemu -drive 
> if=none,media=cdrom,id=drv,file="$TEST_IMG",format=$IMGFMT \
> +-device virtio-scsi -device scsi-cd,drive=drv 2> >(_filter_nbd)

Why not just omit the device (and the media=cdrom along with it, keeping
if=none)? This will change the reference output because there is no
longer any tray to be moved, but this will still test what it's supposed to.

(This may sound hypocritical coming from me, because I wrote this test
so I could have just done so in the first place; I guess I just didn't
realize that 'eject' works on device-less drives, too.)

Max

>  _send_qemu_cmd $QEMU_HANDLE \
>  "{ 'execute': 'qmp_capabilities' }" \
> 




signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH] cirrus_vga: fix off-by-one in blit_region_is_unsafe

2016-02-10 Thread Paolo Bonzini


On 10/02/2016 16:54, Laszlo Ersek wrote:
> On 02/10/16 16:29, Paolo Bonzini wrote:
>>
>>
>> On 10/02/2016 15:55, Laszlo Ersek wrote:
> Hmm, not sure why.  We're comparing against the inclusive-exclusive
> range [0,s->vga.vram_size).  The right way to check if something is
> within the range is >= min && < max; the right way to check if something
> is outside the range is < min || >= max.
>>> Absolutely: if the thing you are verifying against the interval is
>>> itself an inclusive thing, that is, a pixel or byte *drawn*. However,
>>> exactly as you stated in the commit message, for the maximum check, what
>>> we are comparing is the first offset *not* processed.
>>
>> Right, what my patch does is setting min and max both to pixels that are
>> drawn.
> 
> Do you understand my concern with that? It's okay if you dismiss my
> concern (or even better if you prove it is unfounded). But I hope you at
> least understand it.

No, I didn't.  Now I do, and you're right.

> When you set "max" to the last pixel that is drawn, you are calculating
> a new quantity in C that was not calculated before. By subtracting 1,
> you could theoretically turn "max" into a negative number.

Gotcha.

> Have you checked and excluded this possibility, or proved why it doesn't
> matter?

It doesn't matter because width == 0 is handled properly later on; it
does not do anything, including not loading and not storing anything. So
the check is pointless anyway in that case.

But as I said: you're right and I will proceed to send v2.  Your reason
for preferring a > check makes sense.

An alternative possibility is to make max uint64_t, and ensure that all
the addends are properly sign extended.  I mention it just for the sake
of completeness. :)

> When I reviewed the underlying CVE fix (downstream), I spent hours on
> tracking down all possibilities, with Gerd's help. With your patch, that
> argument goes out the window, *for me*. I don't mind -- in particular
> because it *could* be easy to prove your patch is safe --, but I can't
> tell if it's going to be an easy proof without actually trying. And I'm
> not going to try, now.
> 
> Changing the relop would be mathematically equivalent, and keep my
> earlier argument intact.
> 
> Anyway, you don't need my personal R-b for this.

I was interested in your reasoning, I just couldn't get it.

Paolo



[Qemu-devel] [PATCH v2 4/4] rng: add request queue support to rng-random

2016-02-10 Thread Ladi Prosek
Requests are now created in the RngBackend parent class and the
code path is shared by both rng-egd and rng-random.

This commit fixes the rng-random implementation which currently
processes only one request at a time and simply discards all
but the most recent one. In the guest this manifests as delayed
completion of reads from virtio-rng, i.e. a read is completed
only after another read is issued.

Signed-off-by: Ladi Prosek 
---
 backends/rng-egd.c| 16 ++--
 backends/rng-random.c | 43 +++
 backends/rng.c| 13 -
 include/sysemu/rng.h  |  3 +--
 4 files changed, 34 insertions(+), 41 deletions(-)

diff --git a/backends/rng-egd.c b/backends/rng-egd.c
index 8f2bd16..30332ed 100644
--- a/backends/rng-egd.c
+++ b/backends/rng-egd.c
@@ -27,20 +27,10 @@ typedef struct RngEgd
 char *chr_name;
 } RngEgd;
 
-static void rng_egd_request_entropy(RngBackend *b, size_t size,
-EntropyReceiveFunc *receive_entropy,
-void *opaque)
+static void rng_egd_request_entropy(RngBackend *b, RngRequest *req)
 {
 RngEgd *s = RNG_EGD(b);
-RngRequest *req;
-
-req = g_malloc(sizeof(*req));
-
-req->offset = 0;
-req->size = size;
-req->receive_entropy = receive_entropy;
-req->opaque = opaque;
-req->data = g_malloc(req->size);
+size_t size = req->size;
 
 while (size > 0) {
 uint8_t header[2];
@@ -54,8 +44,6 @@ static void rng_egd_request_entropy(RngBackend *b, size_t 
size,
 
 size -= len;
 }
-
-s->parent.requests = g_slist_append(s->parent.requests, req);
 }
 
 static int rng_egd_chr_can_read(void *opaque)
diff --git a/backends/rng-random.c b/backends/rng-random.c
index 8cdad6a..a6cb385 100644
--- a/backends/rng-random.c
+++ b/backends/rng-random.c
@@ -22,10 +22,6 @@ struct RndRandom
 
 int fd;
 char *filename;
-
-EntropyReceiveFunc *receive_func;
-void *opaque;
-size_t size;
 };
 
 /**
@@ -38,36 +34,35 @@ struct RndRandom
 static void entropy_available(void *opaque)
 {
 RndRandom *s = RNG_RANDOM(opaque);
-uint8_t buffer[s->size];
-ssize_t len;
 
-len = read(s->fd, buffer, s->size);
-if (len < 0 && errno == EAGAIN) {
-return;
+while (s->parent.requests != NULL) {
+RngRequest *req = s->parent.requests->data;
+ssize_t len;
+
+len = read(s->fd, req->data, req->size);
+if (len < 0 && errno == EAGAIN) {
+return;
+}
+g_assert(len != -1);
+
+req->receive_entropy(req->opaque, req->data, len);
+
+rng_backend_finalize_request(>parent, req);
 }
-g_assert(len != -1);
-
-s->receive_func(s->opaque, buffer, len);
-s->receive_func = NULL;
 
+/* We've drained all requests, the fd handler can be reset. */
 qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
 }
 
-static void rng_random_request_entropy(RngBackend *b, size_t size,
-EntropyReceiveFunc *receive_entropy,
-void *opaque)
+static void rng_random_request_entropy(RngBackend *b, RngRequest *req)
 {
 RndRandom *s = RNG_RANDOM(b);
 
-if (s->receive_func) {
-s->receive_func(s->opaque, NULL, 0);
+if (s->parent.requests == NULL) {
+/* If there are no pending requests yet, we need to
+ * install our fd handler. */
+qemu_set_fd_handler(s->fd, entropy_available, NULL, s);
 }
-
-s->receive_func = receive_entropy;
-s->opaque = opaque;
-s->size = size;
-
-qemu_set_fd_handler(s->fd, entropy_available, NULL, s);
 }
 
 static void rng_random_opened(RngBackend *b, Error **errp)
diff --git a/backends/rng.c b/backends/rng.c
index 014cb9d..277a41b 100644
--- a/backends/rng.c
+++ b/backends/rng.c
@@ -20,9 +20,20 @@ void rng_backend_request_entropy(RngBackend *s, size_t size,
  void *opaque)
 {
 RngBackendClass *k = RNG_BACKEND_GET_CLASS(s);
+RngRequest *req;
 
 if (k->request_entropy) {
-k->request_entropy(s, size, receive_entropy, opaque);
+req = g_malloc(sizeof(*req));
+
+req->offset = 0;
+req->size = size;
+req->receive_entropy = receive_entropy;
+req->opaque = opaque;
+req->data = g_malloc(req->size);
+
+k->request_entropy(s, req);
+
+s->requests = g_slist_append(s->requests, req);
 }
 }
 
diff --git a/include/sysemu/rng.h b/include/sysemu/rng.h
index c2c9035..a7ed580 100644
--- a/include/sysemu/rng.h
+++ b/include/sysemu/rng.h
@@ -46,8 +46,7 @@ struct RngBackendClass
 {
 ObjectClass parent_class;
 
-void (*request_entropy)(RngBackend *s, size_t size,
-EntropyReceiveFunc *receive_entropy, void *opaque);
+void (*request_entropy)(RngBackend *s, RngRequest *req);
 
 void (*opened)(RngBackend *s, Error **errp);
 };
-- 
2.5.0




[Qemu-devel] [PATCH v2 3/4] rng: move request queue cleanup from RngEgd to RngBackend

2016-02-10 Thread Ladi Prosek
RngBackend is now in charge of cleaning up the linked list on
instance finalization. It also exposes a function to finalize
individual RngRequest instances, called by its child classes.

Signed-off-by: Ladi Prosek 
---
 backends/rng-egd.c   | 25 +
 backends/rng.c   | 32 
 include/sysemu/rng.h | 12 
 3 files changed, 45 insertions(+), 24 deletions(-)

diff --git a/backends/rng-egd.c b/backends/rng-egd.c
index b061362..8f2bd16 100644
--- a/backends/rng-egd.c
+++ b/backends/rng-egd.c
@@ -58,12 +58,6 @@ static void rng_egd_request_entropy(RngBackend *b, size_t 
size,
 s->parent.requests = g_slist_append(s->parent.requests, req);
 }
 
-static void rng_egd_free_request(RngRequest *req)
-{
-g_free(req->data);
-g_free(req);
-}
-
 static int rng_egd_chr_can_read(void *opaque)
 {
 RngEgd *s = RNG_EGD(opaque);
@@ -93,28 +87,13 @@ static void rng_egd_chr_read(void *opaque, const uint8_t 
*buf, int size)
 size -= len;
 
 if (req->offset == req->size) {
-s->parent.requests = g_slist_remove_link(s->parent.requests,
- s->parent.requests);
-
 req->receive_entropy(req->opaque, req->data, req->size);
 
-rng_egd_free_request(req);
+rng_backend_finalize_request(>parent, req);
 }
 }
 }
 
-static void rng_egd_free_requests(RngEgd *s)
-{
-GSList *i;
-
-for (i = s->parent.requests; i; i = i->next) {
-rng_egd_free_request(i->data);
-}
-
-g_slist_free(s->parent.requests);
-s->parent.requests = NULL;
-}
-
 static void rng_egd_opened(RngBackend *b, Error **errp)
 {
 RngEgd *s = RNG_EGD(b);
@@ -183,8 +162,6 @@ static void rng_egd_finalize(Object *obj)
 }
 
 g_free(s->chr_name);
-
-rng_egd_free_requests(s);
 }
 
 static void rng_egd_class_init(ObjectClass *klass, void *data)
diff --git a/backends/rng.c b/backends/rng.c
index 2f2f3ee..014cb9d 100644
--- a/backends/rng.c
+++ b/backends/rng.c
@@ -64,6 +64,30 @@ static void rng_backend_prop_set_opened(Object *obj, bool 
value, Error **errp)
 s->opened = true;
 }
 
+static void rng_backend_free_request(RngRequest *req)
+{
+g_free(req->data);
+g_free(req);
+}
+
+static void rng_backend_free_requests(RngBackend *s)
+{
+GSList *i;
+
+for (i = s->requests; i; i = i->next) {
+rng_backend_free_request(i->data);
+}
+
+g_slist_free(s->requests);
+s->requests = NULL;
+}
+
+void rng_backend_finalize_request(RngBackend *s, RngRequest *req)
+{
+s->requests = g_slist_remove(s->requests, req);
+rng_backend_free_request(req);
+}
+
 static void rng_backend_init(Object *obj)
 {
 object_property_add_bool(obj, "opened",
@@ -72,6 +96,13 @@ static void rng_backend_init(Object *obj)
  NULL);
 }
 
+static void rng_backend_finalize(Object *obj)
+{
+RngBackend *s = RNG_BACKEND(obj);
+
+rng_backend_free_requests(s);
+}
+
 static void rng_backend_class_init(ObjectClass *oc, void *data)
 {
 UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
@@ -84,6 +115,7 @@ static const TypeInfo rng_backend_info = {
 .parent = TYPE_OBJECT,
 .instance_size = sizeof(RngBackend),
 .instance_init = rng_backend_init,
+.instance_finalize = rng_backend_finalize,
 .class_size = sizeof(RngBackendClass),
 .class_init = rng_backend_class_init,
 .abstract = true,
diff --git a/include/sysemu/rng.h b/include/sysemu/rng.h
index 084164c..c2c9035 100644
--- a/include/sysemu/rng.h
+++ b/include/sysemu/rng.h
@@ -61,6 +61,7 @@ struct RngBackend
 GSList *requests;
 };
 
+
 /**
  * rng_backend_request_entropy:
  * @s: the backend to request entropy from
@@ -79,4 +80,15 @@ struct RngBackend
 void rng_backend_request_entropy(RngBackend *s, size_t size,
  EntropyReceiveFunc *receive_entropy,
  void *opaque);
+
+/**
+ * rng_backend_free_request:
+ * @s: the backend that created the request
+ * @req: the request to finalize
+ *
+ * Used by child rng backend classes to finalize requests once they've been
+ * processed. The request is removed from the list of active requests and
+ * deleted.
+ */
+void rng_backend_finalize_request(RngBackend *s, RngRequest *req);
 #endif
-- 
2.5.0




[Qemu-devel] [PATCH v2] cirrus_vga: fix off-by-one in blit_region_is_unsafe

2016-02-10 Thread Paolo Bonzini
The "max" value is being compared with >=, but addr + width points to
the first byte that will _not_ be copied.  Laszlo suggested using a
"greater than" comparison, instead of subtracting one like it is
already done above for the height, so that max remains always positive.

The mistake is "safe"---it will reject some blits, but will never cause
out-of-bounds writes.

Cc: Gerd Hoffmann 
Signed-off-by: Paolo Bonzini 
---
 hw/display/cirrus_vga.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c
index b6ce1c8..57b91a7 100644
--- a/hw/display/cirrus_vga.c
+++ b/hw/display/cirrus_vga.c
@@ -276,14 +276,14 @@ static bool blit_region_is_unsafe(struct CirrusVGAState 
*s,
 + ((int64_t)s->cirrus_blt_height-1) * pitch;
 int32_t max = addr
 + s->cirrus_blt_width;
-if (min < 0 || max >= s->vga.vram_size) {
+if (min < 0 || max > s->vga.vram_size) {
 return true;
 }
 } else {
 int64_t max = addr
 + ((int64_t)s->cirrus_blt_height-1) * pitch
 + s->cirrus_blt_width;
-if (max >= s->vga.vram_size) {
+if (max > s->vga.vram_size) {
 return true;
 }
 }
-- 
2.5.0




Re: [Qemu-devel] [PATCH v2] cirrus_vga: fix off-by-one in blit_region_is_unsafe

2016-02-10 Thread Laszlo Ersek
On 02/10/16 17:17, Paolo Bonzini wrote:
> The "max" value is being compared with >=, but addr + width points to
> the first byte that will _not_ be copied.  Laszlo suggested using a
> "greater than" comparison, instead of subtracting one like it is
> already done above for the height, so that max remains always positive.
> 
> The mistake is "safe"---it will reject some blits, but will never cause
> out-of-bounds writes.
> 
> Cc: Gerd Hoffmann 
> Signed-off-by: Paolo Bonzini 
> ---
>  hw/display/cirrus_vga.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c
> index b6ce1c8..57b91a7 100644
> --- a/hw/display/cirrus_vga.c
> +++ b/hw/display/cirrus_vga.c
> @@ -276,14 +276,14 @@ static bool blit_region_is_unsafe(struct CirrusVGAState 
> *s,
>  + ((int64_t)s->cirrus_blt_height-1) * pitch;
>  int32_t max = addr
>  + s->cirrus_blt_width;
> -if (min < 0 || max >= s->vga.vram_size) {
> +if (min < 0 || max > s->vga.vram_size) {
>  return true;
>  }
>  } else {
>  int64_t max = addr
>  + ((int64_t)s->cirrus_blt_height-1) * pitch
>  + s->cirrus_blt_width;
> -if (max >= s->vga.vram_size) {
> +if (max > s->vga.vram_size) {
>  return true;
>  }
>  }
> 

Thank you for your patience. :)

Reviewed-by: Laszlo Ersek 



Re: [Qemu-devel] [PULL 02/32] memory: RCU ram_list.dirty_memory[] for safe RAM hotplug

2016-02-10 Thread Paolo Bonzini


On 10/02/2016 17:56, Leon Alrae wrote:
> Hi,
> 
> I just noticed significant performance hit with this change. Booting
> small system (I tried on system mips only) was usually taking around 20
> seconds, now reaches 3 minutes with this change.

You're lucky that it booted at all. :)  Unfortunately I noticed a slow
boot yesterday (6 minutes for Fedora 21 on TCG), but I blamed that I was
running on top of TCG _and_ NFS.  In retrospect that was not too smart.

Today I tried FreeDOS and it hangs completely, but my self-nak and
Peter's push unfortunately crossed.

I've posted a fix and Stefan has already reviewed it.  If you can test
it, that would be great.  The above Fedora 21 image takes about 50
seconds to boot with the patch, so that's in line with the x9 slowdown
you reported.

http://article.gmane.org/gmane.comp.emulators.qemu/393105/raw

Paolo

> Leon
> 
> On 09/02/16 12:13, Paolo Bonzini wrote:
>> From: Stefan Hajnoczi 
>>
>> Although accesses to ram_list.dirty_memory[] use atomics so multiple
>> threads can safely dirty the bitmap, the data structure is not fully
>> thread-safe yet.
>>
>> This patch handles the RAM hotplug case where ram_list.dirty_memory[] is
>> grown.  ram_list.dirty_memory[] is change from a regular bitmap to an
>> RCU array of pointers to fixed-size bitmap blocks.  Threads can continue
>> accessing bitmap blocks while the array is being extended.  See the
>> comments in the code for an in-depth explanation of struct
>> DirtyMemoryBlocks.
>>
>> I have tested that live migration with virtio-blk dataplane works.
>>
>> Signed-off-by: Stefan Hajnoczi 
>> Message-Id: <1453728801-5398-2-git-send-email-stefa...@redhat.com>
>> Signed-off-by: Paolo Bonzini 
>> ---
>>  exec.c  |  75 +++
>>  include/exec/ram_addr.h | 189 
>> ++--
>>  migration/ram.c |   4 -
>>  3 files changed, 225 insertions(+), 43 deletions(-)
>>
>> diff --git a/exec.c b/exec.c
>> index ab37360..7d67c11 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -980,8 +980,9 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t 
>> start,
>>ram_addr_t length,
>>unsigned client)
>>  {
>> +DirtyMemoryBlocks *blocks;
>>  unsigned long end, page;
>> -bool dirty;
>> +bool dirty = false;
>>  
>>  if (length == 0) {
>>  return false;
>> @@ -989,8 +990,22 @@ bool 
>> cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
>>  
>>  end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
>>  page = start >> TARGET_PAGE_BITS;
>> -dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
>> - page, end - page);
>> +
>> +rcu_read_lock();
>> +
>> +blocks = atomic_rcu_read(_list.dirty_memory[client]);
>> +
>> +while (page < end) {
>> +unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
>> +unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
>> +unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - 
>> offset);
>> +
>> +dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
>> +  offset, num);
>> +page += num;
>> +}
>> +
>> +rcu_read_unlock();
>>  
>>  if (dirty && tcg_enabled()) {
>>  tlb_reset_dirty_range_all(start, length);
>> @@ -1504,6 +1519,47 @@ int qemu_ram_resize(ram_addr_t base, ram_addr_t 
>> newsize, Error **errp)
>>  return 0;
>>  }
>>  
>> +/* Called with ram_list.mutex held */
>> +static void dirty_memory_extend(ram_addr_t old_ram_size,
>> +ram_addr_t new_ram_size)
>> +{
>> +ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
>> + DIRTY_MEMORY_BLOCK_SIZE);
>> +ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
>> + DIRTY_MEMORY_BLOCK_SIZE);
>> +int i;
>> +
>> +/* Only need to extend if block count increased */
>> +if (new_num_blocks <= old_num_blocks) {
>> +return;
>> +}
>> +
>> +for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
>> +DirtyMemoryBlocks *old_blocks;
>> +DirtyMemoryBlocks *new_blocks;
>> +int j;
>> +
>> +old_blocks = atomic_rcu_read(_list.dirty_memory[i]);
>> +new_blocks = g_malloc(sizeof(*new_blocks) +
>> +  sizeof(new_blocks->blocks[0]) * 
>> new_num_blocks);
>> +
>> +if (old_num_blocks) {
>> +memcpy(new_blocks->blocks, old_blocks->blocks,
>> +   old_num_blocks * sizeof(old_blocks->blocks[0]));
>> +}
>> +
>> +for (j = old_num_blocks; j < new_num_blocks; j++) {
>> +new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
>> +}
>> +
>> +

Re: [Qemu-devel] [PULL 48/49] i386: populate floppy drive information in DSDT

2016-02-10 Thread John Snow


On 02/10/2016 12:10 PM, Roman Kagan wrote:
> On Tue, Feb 09, 2016 at 07:36:12PM +0100, Laszlo Ersek wrote:
>> In my opinion, the real mess in this case is in the ACPI spec itself. If
>> you re-read the _FDI control method's description, the Package that it
>> returns contains *dynamic* geometry data, about the *disk* (not *drive*):
>>
>> - Maximum Cylinder Number // Integer (WORD)
>> - Maximum Sector Number   // Integer (WORD)
>> - Maximum Head Number // Integer (WORD)
>>
>> What this seems to require is: the firmware developer should write ACPI
>> code that
>> - dynamically accesses the floppy drive / controller (using MMIO or IO
>>   port registers),
>> - retrieves the geometry of the disk actually inserted,
>> - and returns the data nicely packaged.
>>
>> In effect, an ACPI-level driver for the disk.
>>
>> Now, John explained to me (and please keep in mind that this is my
>> personal account of his explanation, not a factual rendering thereof),
>> that there used to be no *standard* way to interrogate the current
>> disk's geometry, other than trial and error with seeking.
>>
>> Apparently in UEFI Windows, Microsoft didn't want to implement this
>> trial and error seeking, so -- given there was also no portable
>> *hardware spec* to retrieve the same data, directly from the disk drive
>> or controller -- they punted the entire question to ACPI. That is, to
>> the firmware implementor.
> 
> Well, as I wrote in another mail, SeaBIOS, which is supposed to provide
> the same information to int 0x13/0x08, populates it with static data
> based only on the drive type as encoded in CMOS at initialization time,
> and everyone seem to have been fine with that so far.  I'll need to
> re-test it with real Windows guests, though.
> 

OK, but what we appear to be doing currently is polling the current
geometry values for a drive instead of some pre-chosen ones based on the
drive type.

What values does SeaBIOS use? (Can you point me to the table it uses?)

We could just duplicate those, probably.

>> IMHO, do the *absolute minimum* to adapt this AML generation patch to
>> John's FDC rework, and ignore all dynamic aspects (like media change).
> 
> Hopefully that'll suffice.
> 
> Roman.
> 



Re: [Qemu-devel] [PULL 48/49] i386: populate floppy drive information in DSDT

2016-02-10 Thread Roman Kagan
On Wed, Feb 10, 2016 at 12:16:32PM -0500, John Snow wrote:
> On 02/10/2016 12:10 PM, Roman Kagan wrote:
> > Well, as I wrote in another mail, SeaBIOS, which is supposed to provide
> > the same information to int 0x13/0x08, populates it with static data
> > based only on the drive type as encoded in CMOS at initialization time,
> > and everyone seem to have been fine with that so far.  I'll need to
> > re-test it with real Windows guests, though.
> > 
> 
> OK, but what we appear to be doing currently is polling the current
> geometry values for a drive instead of some pre-chosen ones based on the
> drive type.
> 
> What values does SeaBIOS use? (Can you point me to the table it uses?)

src/hw/floppy.c:

struct floppyinfo_s FloppyInfo[] VARFSEG = {
// Unknown
{ {0, 0, 0}, 0x00, 0x00},
// 1 - 360KB, 5.25" - 2 heads, 40 tracks, 9 sectors
{ {2, 40, 9}, FLOPPY_SIZE_525, FLOPPY_RATE_300K},
// 2 - 1.2MB, 5.25" - 2 heads, 80 tracks, 15 sectors
{ {2, 80, 15}, FLOPPY_SIZE_525, FLOPPY_RATE_500K},
// 3 - 720KB, 3.5"  - 2 heads, 80 tracks, 9 sectors
{ {2, 80, 9}, FLOPPY_SIZE_350, FLOPPY_RATE_250K},
// 4 - 1.44MB, 3.5" - 2 heads, 80 tracks, 18 sectors
{ {2, 80, 18}, FLOPPY_SIZE_350, FLOPPY_RATE_500K},
// 5 - 2.88MB, 3.5" - 2 heads, 80 tracks, 36 sectors
{ {2, 80, 36}, FLOPPY_SIZE_350, FLOPPY_RATE_1M},
// 6 - 160k, 5.25"  - 1 heads, 40 tracks, 8 sectors
{ {1, 40, 8}, FLOPPY_SIZE_525, FLOPPY_RATE_250K},
// 7 - 180k, 5.25"  - 1 heads, 40 tracks, 9 sectors
{ {1, 40, 9}, FLOPPY_SIZE_525, FLOPPY_RATE_300K},
// 8 - 320k, 5.25"  - 2 heads, 40 tracks, 8 sectors
{ {2, 40, 8}, FLOPPY_SIZE_525, FLOPPY_RATE_250K},
};

The array is indexed by the floppy drive type from CMOS

Roman.



Re: [Qemu-devel] [PATCH 1/2] qemu-iotests: add _filter_qmp_events() for filtering out QMP events

2016-02-10 Thread Max Reitz
On 09.02.2016 14:23, Sascha Silbe wrote:
> The order of some QMP events may depend on the architecture being
> tested. Add support for filtering out QMP events so we can use a
> single reference output for all architecture when the test doesn't
> care about the events.
> 
> Signed-off-by: Sascha Silbe 
> ---
>  tests/qemu-iotests/common.filter | 6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/tests/qemu-iotests/common.filter 
> b/tests/qemu-iotests/common.filter
> index 84b7434..b908aa2 100644
> --- a/tests/qemu-iotests/common.filter
> +++ b/tests/qemu-iotests/common.filter
> @@ -178,6 +178,12 @@ _filter_qmp()
>  -e 'QMP_VERSION'
>  }
>  
> +# remove QMP events from output
> +_filter_qmp_events()
> +{
> +sed -e '/^{\(.*, \)"event": ".*}$/ d'
> +}

There is a pretty good reason test 067 uses -qmp-pretty (as you yourself
say, the lines get pretty long otherwise, and if we have any change
within, the whole line needs to be changed). Using the following ugly
piece of code here instead, we would still be able to use it:

tr '\n' '\t' \
  | sed -e
's/{\s*"timestamp":\s*{[^}]*},\s*"event":[^,}]*\(,\s*"data":\s*{[^}]*}\)\?\s*}\s*//g'
\
  | tr '\t' '\n'

(I'm too lazy for multi-line sed, obviously; and this will break if the
data contains any JSON objects in turn.)

The correct way would be to actually parse the JSON (using perl, python
or whatever) and remove all the top-level objects containing an "event"
key, obviously... But that's probably too much.

I'm not strictly against just dropping -qmp-pretty in 067, but there is
a good reason it's there.

Max

> +
>  # replace driver-specific options in the "Formatting..." line
>  _filter_img_create()
>  {
> 




signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PULL 48/49] i386: populate floppy drive information in DSDT

2016-02-10 Thread Roman Kagan
On Tue, Feb 09, 2016 at 11:22:01AM -0500, John Snow wrote:
> > I don't.  At the time the patch was developed there basically were no
> > mechanisms to update the geometry at all (and this was what you patchset
> > addressed, in particular, wasn't it?) so I didn't care.
> 
> That's not true.
> 
> You could swap different 1.44MB-class diskettes for other geometries,
> check this out:
> 
> static const FDFormat fd_formats[] = {
> /* First entry is default format */
> /* 1.44 MB 3"1/2 floppy disks */
> { FDRIVE_DRV_144, 18, 80, 1, FDRIVE_RATE_500K, },
> { FDRIVE_DRV_144, 20, 80, 1, FDRIVE_RATE_500K, },
> { FDRIVE_DRV_144, 21, 80, 1, FDRIVE_RATE_500K, },
> { FDRIVE_DRV_144, 21, 82, 1, FDRIVE_RATE_500K, },
> { FDRIVE_DRV_144, 21, 83, 1, FDRIVE_RATE_500K, },
> { FDRIVE_DRV_144, 22, 80, 1, FDRIVE_RATE_500K, },
> { FDRIVE_DRV_144, 23, 80, 1, FDRIVE_RATE_500K, },
> { FDRIVE_DRV_144, 24, 80, 1, FDRIVE_RATE_500K, },
> ...
> 
> You absolutely could get different sector and track counts before my
> patchset.

Indeed (sorry the patch was developed a couple of months ago so I had to
look at the code to refresh my memory).

However, I tried to implement the part of ACPI spec that read

> 9.9.2 _FDI (Floppy Disk Information)
> 
> This object returns information about a floppy disk drive. This
> information is the same as that returned by the INT 13 Function 08H on
> IA-PCs.

so I went ahead and looked into what SeaBIOS did for int 0x13/0x08.  And
what it did was read the CMOS at 0x10 and obtain the drive type, and
then return a hardcoded set of parameters (including geometry)
associated to that drive type.  So this was what I basically did here,
too.  (As a matter of fact the first patch I submitted was just pure ASL
which mimicked exactly the SeaBIOS behavior: read the CMOS and return
the corresponding Package with parameters.)

And IIRC the drive type couldn't change at runtime so I thought I wasn't
doing worse than it was.

As for what to do now, I'll try to check how tolerant the guests are of
changing the floppy geometry under them without updating _FDI, and then
decide.

Roman.



Re: [Qemu-devel] [PATCH] memory: fix usage of find_next_bit and find_next_zero_bit

2016-02-10 Thread Thomas Huth
On 10.02.2016 15:11, Paolo Bonzini wrote:
> The last two arguments to these functions are the last and first bit to
> check relative to the base.  The code was using incorrectly the first
> bit and the number of bits.  Fix this in cpu_physical_memory_get_dirty
> and cpu_physical_memory_all_dirty.  This requires a few changes in the
> iteration; change the code in cpu_physical_memory_set_dirty_range to
> match.
> 
> Fixes: 5b82b70
> Cc: Stefan Hajnoczi 
> Signed-off-by: Paolo Bonzini 

That commit 5b82b70 also broke the pseries machine on qemu-ppc64:

- 8< --
$ ppc64-softmmu/qemu-system-ppc64 -net none -nographic -vga none 


SLOF **
QEMU Starting
 Build Date = Nov  5 2015 15:23:31
 FW Version = git-b4c93802a5b2c72f
 Press "s" to enter Open Firmware.



SLOF **
QEMU Starting
 Build Date = Nov  5 2015 15:23:31
 FW Version = git-b4c93802a5b2c72f
ERROR: Flatten device tree not available!
 Press "s" to enter Open Firmware.

  !!! roomfs lookup(bootinfo) = 1
Cannot find romfs file xvect
  !!! roomfs lookup(bootinfo) = 1
ERROR: Not enough memory for Open Firmware
qemu: fatal: Trying to execute code outside RAM or ROM at 0xffbf
- 8< --

With your patch here applied, SLOF boots fine again, so:

Tested-by: Thomas Huth 




Re: [Qemu-devel] [PATCH v2 5/5] kvm/x86: Hyper-V VMBus hypercall userspace exit

2016-02-10 Thread Paolo Bonzini


On 21/01/2016 15:01, Andrey Smetanin wrote:
> The patch implements KVM_EXIT_HYPERV userspace exit
> functionality for Hyper-V VMBus hypercalls:
> HV_X64_HCALL_POST_MESSAGE, HV_X64_HCALL_SIGNAL_EVENT.
> 
> Changes v2:
> * use KVM_EXIT_HYPERV for hypercalls
> 
> Signed-off-by: Andrey Smetanin 
> Reviewed-by: Roman Kagan 
> CC: Gleb Natapov 
> CC: Paolo Bonzini 
> CC: Joerg Roedel 
> CC: "K. Y. Srinivasan" 
> CC: Haiyang Zhang 
> CC: Roman Kagan 
> CC: Denis V. Lunev 
> CC: qemu-devel@nongnu.org
> ---
>  Documentation/virtual/kvm/api.txt |  6 ++
>  arch/x86/kvm/hyperv.c | 29 ++---
>  arch/x86/kvm/hyperv.h |  1 +
>  arch/x86/kvm/x86.c|  5 +
>  include/uapi/linux/kvm.h  |  6 ++
>  5 files changed, 40 insertions(+), 7 deletions(-)
> 
> diff --git a/Documentation/virtual/kvm/api.txt 
> b/Documentation/virtual/kvm/api.txt
> index 053f613..1bf1a07 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -3339,6 +3339,7 @@ EOI was received.
>  
>   struct kvm_hyperv_exit {
>  #define KVM_EXIT_HYPERV_SYNIC  1
> +#define KVM_EXIT_HYPERV_HCALL  2
>   __u32 type;
>   union {
>   struct {
> @@ -3347,6 +3348,11 @@ EOI was received.
>   __u64 evt_page;
>   __u64 msg_page;
>   } synic;
> + struct {
> + __u64 input;
> + __u64 result;
> + __u64 params[2];
> + } hcall;
>   } u;
>   };
>   /* KVM_EXIT_HYPERV */
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index e1daa8b..26ae973 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -1093,6 +1093,14 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
>   case HV_X64_HCALL_NOTIFY_LONG_SPIN_WAIT:
>   kvm_vcpu_on_spin(vcpu);
>   break;
> + case HV_X64_HCALL_POST_MESSAGE:
> + case HV_X64_HCALL_SIGNAL_EVENT:
> + vcpu->run->exit_reason = KVM_EXIT_HYPERV;
> + vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
> + vcpu->run->hyperv.u.hcall.input = param;
> + vcpu->run->hyperv.u.hcall.params[0] = ingpa;
> + vcpu->run->hyperv.u.hcall.params[1] = outgpa;
> + return 0;
>   default:
>   res = HV_STATUS_INVALID_HYPERCALL_CODE;
>   break;
> @@ -1100,12 +1108,19 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
>  
>  set_result:
>   ret = res | (((u64)rep_done & 0xfff) << 32);
> - if (longmode) {
> - kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
> - } else {
> - kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
> - kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0x);
> - }
> -
> + kvm_hv_hypercall_set_result(vcpu, ret);
>   return 1;
>  }
> +
> +void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
> +{
> + bool longmode;
> +
> + longmode = is_64_bit_mode(vcpu);
> + if (longmode)
> + kvm_register_write(vcpu, VCPU_REGS_RAX, result);
> + else {
> + kvm_register_write(vcpu, VCPU_REGS_RDX, result >> 32);
> + kvm_register_write(vcpu, VCPU_REGS_RAX, result & 0x);
> + }
> +}
> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
> index 60eccd4..64a4a3b 100644
> --- a/arch/x86/kvm/hyperv.h
> +++ b/arch/x86/kvm/hyperv.h
> @@ -52,6 +52,7 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, 
> u64 *pdata);
>  
>  bool kvm_hv_hypercall_enabled(struct kvm *kvm);
>  int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
> +void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result);
>  
>  void kvm_hv_irq_routing_update(struct kvm *kvm);
>  int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index f53f5b1..e5c842b 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6891,6 +6891,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, 
> struct kvm_run *kvm_run)
>   } else
>   WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
>  
> + if (unlikely(kvm_run->exit_reason == KVM_EXIT_HYPERV) &&
> + kvm_run->hyperv.type == KVM_EXIT_HYPERV_HCALL)
> + kvm_hv_hypercall_set_result(vcpu,
> + kvm_run->hyperv.u.hcall.result);

Can you use vcpu->arch.complete_userspace_io here instead?

Otherwise looks great, thanks.

Paolo

> +
>   r = 

Re: [Qemu-devel] [PULL 48/49] i386: populate floppy drive information in DSDT

2016-02-10 Thread Roman Kagan
On Tue, Feb 09, 2016 at 07:36:12PM +0100, Laszlo Ersek wrote:
> In my opinion, the real mess in this case is in the ACPI spec itself. If
> you re-read the _FDI control method's description, the Package that it
> returns contains *dynamic* geometry data, about the *disk* (not *drive*):
> 
> - Maximum Cylinder Number // Integer (WORD)
> - Maximum Sector Number   // Integer (WORD)
> - Maximum Head Number // Integer (WORD)
> 
> What this seems to require is: the firmware developer should write ACPI
> code that
> - dynamically accesses the floppy drive / controller (using MMIO or IO
>   port registers),
> - retrieves the geometry of the disk actually inserted,
> - and returns the data nicely packaged.
> 
> In effect, an ACPI-level driver for the disk.
> 
> Now, John explained to me (and please keep in mind that this is my
> personal account of his explanation, not a factual rendering thereof),
> that there used to be no *standard* way to interrogate the current
> disk's geometry, other than trial and error with seeking.
> 
> Apparently in UEFI Windows, Microsoft didn't want to implement this
> trial and error seeking, so -- given there was also no portable
> *hardware spec* to retrieve the same data, directly from the disk drive
> or controller -- they punted the entire question to ACPI. That is, to
> the firmware implementor.

Well, as I wrote in another mail, SeaBIOS, which is supposed to provide
the same information to int 0x13/0x08, populates it with static data
based only on the drive type as encoded in CMOS at initialization time,
and everyone seem to have been fine with that so far.  I'll need to
re-test it with real Windows guests, though.

> IMHO, do the *absolute minimum* to adapt this AML generation patch to
> John's FDC rework, and ignore all dynamic aspects (like media change).

Hopefully that'll suffice.

Roman.



Re: [Qemu-devel] [PATCH] memory: fix usage of find_next_bit and find_next_zero_bit

2016-02-10 Thread Leon Alrae
On 10/02/16 14:11, Paolo Bonzini wrote:
> The last two arguments to these functions are the last and first bit to
> check relative to the base.  The code was using incorrectly the first
> bit and the number of bits.  Fix this in cpu_physical_memory_get_dirty
> and cpu_physical_memory_all_dirty.  This requires a few changes in the
> iteration; change the code in cpu_physical_memory_set_dirty_range to
> match.
> 
> Fixes: 5b82b70
> Cc: Stefan Hajnoczi 
> Signed-off-by: Paolo Bonzini 
> ---
>  include/exec/ram_addr.h | 55 
> -
>  1 file changed, 36 insertions(+), 19 deletions(-)

It fixes the performance problem I was seeing:

Tested-by: Leon Alrae 

Thanks,
Leon




Re: [Qemu-devel] [PATCH v6 0/5] add ACPI node for fw_cfg on pc and arm

2016-02-10 Thread Gabriel L. Somlo
Ping.

Now that the kernel side seems to have been accepted (Thanks again
Laszlo and Matt for all the help and advice!!!), is there anything
left to clean up before this series could be applied to QEMU ?

gmane.org quick links:
1/5: http://article.gmane.org/gmane.comp.emulators.qemu/389896/raw
2/5: http://article.gmane.org/gmane.comp.emulators.qemu/389894/raw
3/5: http://article.gmane.org/gmane.comp.emulators.qemu/389895/raw
4/5: http://article.gmane.org/gmane.comp.emulators.qemu/389900/raw
5/5: http://article.gmane.org/gmane.comp.emulators.qemu/389898/raw

Thanks much,
--Gabriel


On Wed, Jan 27, 2016 at 10:00:52PM -0500, Gabriel L. Somlo wrote:
> New since v5:
> 
>   - rebased on top of latest QEMU git master
> 
> Thanks,
>   --Gabriel
> 
> >New since v4:
> >
> > - rebased on top of Marc's DMA series
> > - drop machine compat dependency for insertion into x86/ssdt
> >   (patch 3/5), following agreement between Igor and Eduardo
> > - [mm]io register range now covers DMA register as well, if
> >   available.
> > - s/bios/firmware in doc file updates
> >
> >>New since v3:
> >>
> >>- rebased to work on top of 87e896ab (introducing pc-*-25 classes),
> >>  inserting fw_cfg acpi node only for machines >= 2.5.
> >>
> >>- reintroduce _STA with value 0x0B (bit 2 for u/i visibility turned
> >>  off to avoid Windows complaining -- thanks Igor for catching that!)
> >>
> >>If there's any other feedback besides questions regarding the
> >>appropriateness of "QEMU0002" as the value of _HID, please don't hesitate!
> >>
> >>>New since v2:
> >>>
> >>>   - pc/i386 node in ssdt only on machine types *newer* than 2.4
> >>> (as suggested by Eduardo)
> >>>
> >>>I appreciate any further comments and reviews. Hopefully we can make
> >>>this palatable for upstream, modulo the lingering concerns about whether
> >>>"QEMU0002" is ok to use as the value of _HID, which I'll hopefully get
> >>>sorted out with the kernel crew...
> >>>
> New since v1:
> 
>   - expose control register size (suggested by Marc Marí)
> 
>   - leaving out _UID and _STA fields (thanks Shannon & Igor)
> 
>   - using "QEMU0002" as the value of _HID (thanks Michael)
> 
>   - added documentation blurb to docs/specs/fw_cfg.txt
> (mainly to record usage of the "QEMU0002" string with fw_cfg).
> 
> > This series adds a fw_cfg device node to the SSDT (on pc), or to the
> > DSDT (on arm).
> >
> > - Patch 1/3 moves (and renames) the BIOS_CFG_IOPORT (0x510)
> >   define from pc.c to pc.h, so that it could be used from
> >   acpi-build.c in patch 2/3.
> > 
> > - Patch 2/3 adds a fw_cfg node to the pc SSDT.
> > 
> > - Patch 3/3 adds a fw_cfg node to the arm DSDT.
> >
> > I made up some names - "FWCF" for the node name, and "FWCF0001"
> > for _HID; no idea whether that's appropriate, or how else I should
> > figure out what to use instead...
> >
> > Also, using scope "\\_SB", based on where fw_cfg shows up in the
> > output of "info qtree". Again, if that's wrong, please point me in
> > the right direction.
> >
> > Re. 3/3 (also mentioned after the commit blurb in the patch itself),
> > I noticed none of the other DSDT entries contain a _STA field, wondering
> > why it would (not) make sense to include that, same as on the PC.
> 
> Gabriel L. Somlo (5):
>   fw_cfg: expose control register size in fw_cfg.h
>   pc: fw_cfg: move ioport base constant to pc.h
>   acpi: pc: add fw_cfg device node to ssdt
>   acpi: arm: add fw_cfg device node to dsdt
>   fw_cfg: document ACPI device node information
> 
>  docs/specs/fw_cfg.txt |  9 +
>  hw/arm/virt-acpi-build.c  | 15 +++
>  hw/i386/acpi-build.c  | 29 +
>  hw/i386/pc.c  |  5 ++---
>  hw/nvram/fw_cfg.c |  4 +++-
>  include/hw/i386/pc.h  |  2 ++
>  include/hw/nvram/fw_cfg.h |  3 +++
>  7 files changed, 63 insertions(+), 4 deletions(-)
> 
> -- 
> 2.4.3
> 



Re: [Qemu-devel] [PATCH 07/15] tcg-mips: Adjust qemu_ld/st for mips64

2016-02-10 Thread James Hogan
Hi Richard,

On Tue, Feb 09, 2016 at 09:39:55PM +1100, Richard Henderson wrote:
> @@ -1212,11 +1237,24 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg 
> base, TCGReg addrl,
> : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
>  int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
>  
> -tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addrl,
> -   TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
> -tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0,
> -(CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
> -tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, TCG_AREG0);
> +if (use_mips32r2_instructions) {
> +if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) {
> +tcg_out_opc_bf(s, OPC_EXT, TCG_REG_A0, addrl,
> +   TARGET_PAGE_BITS + CPU_TLB_ENTRY_BITS - 1,
> +   CPU_TLB_ENTRY_BITS);
> +} else {
> +tcg_out_opc_bf64(s, OPC_DEXT, OPC_DEXTM, OPC_DEXTU,
> + TCG_REG_A0, addrl,
> + TARGET_PAGE_BITS + CPU_TLB_ENTRY_BITS - 1,
> + CPU_TLB_ENTRY_BITS);
> +}

The ext/dext here will end up with bits below bit CPU_TLB_ENTRY_BITS
set, which will result in load of addend from slightly offset address,
so things go badly wrong. You still need to either ANDI off the low bits
or trim them off with the ext/dext and shift it left again.

So I don't think there's any benefit to the use of these instructions
unless CPU_TLB_SIZE + CPU_TLB_ENTRY_BITS exceeds the 16-bits available
in the ANDI immediate field for the non r2 case.

Cheers
James

> +} else {
> +tcg_out_opc_sa(s, ALIAS_TSRL, TCG_REG_A0, addrl,
> +   TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
> +tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0,
> +(CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
> +}
> +tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, TCG_REG_A0, TCG_AREG0);
>  
>  /* Compensate for very large offsets.  */
>  if (add_off >= 0x8000) {


signature.asc
Description: Digital signature


Re: [Qemu-devel] [PATCH v2 4/4] rng: add request queue support to rng-random

2016-02-10 Thread Ladi Prosek
> 
> 
> On 10/02/2016 16:53, Ladi Prosek wrote:
> > +req->size = size;
> > +req->receive_entropy = receive_entropy;
> > +req->opaque = opaque;
> > +req->data = g_malloc(req->size);
> > +
> > +k->request_entropy(s, req);
> > +
> > +s->requests = g_slist_append(s->requests, req);
> >  }
> 
> g_slist_append has to traverse the entire list to find the place to add
> the node.  You probably are better off using QSIMPLEQ (which is an
> intrusive list unlike GSList).

This is what rng-egd does today and I would argue that since the expected
length of the list is very small - it's going to be longer than 1 only
very rarely - a simple lightweight data structure is a better choice than
trying to be O(1) in the worst case.

I'll be happy to switch to QSIMPLEQ if you want though. Your call.

Thanks!
Ladi
 
> Thanks,
> 
> Paolo
> 
> 



Re: [Qemu-devel] [PATCH v2 4/4] rng: add request queue support to rng-random

2016-02-10 Thread Paolo Bonzini


On 10/02/2016 17:40, Ladi Prosek wrote:
>>
>>
>> On 10/02/2016 16:53, Ladi Prosek wrote:
>>> +req->size = size;
>>> +req->receive_entropy = receive_entropy;
>>> +req->opaque = opaque;
>>> +req->data = g_malloc(req->size);
>>> +
>>> +k->request_entropy(s, req);
>>> +
>>> +s->requests = g_slist_append(s->requests, req);
>>>  }
>>
>> g_slist_append has to traverse the entire list to find the place to add
>> the node.  You probably are better off using QSIMPLEQ (which is an
>> intrusive list unlike GSList).
> 
> This is what rng-egd does today and I would argue that since the expected
> length of the list is very small - it's going to be longer than 1 only
> very rarely - a simple lightweight data structure is a better choice than
> trying to be O(1) in the worst case.
> 
> I'll be happy to switch to QSIMPLEQ if you want though. Your call.

Ok, it can be done on top I guess.  I'll let others review the patches
more closely!

Paolo



Re: [Qemu-devel] [PULL 48/49] i386: populate floppy drive information in DSDT

2016-02-10 Thread Roman Kagan
On Wed, Feb 10, 2016 at 11:14:30AM -0500, John Snow wrote:
> On 02/09/2016 01:48 PM, Michael S. Tsirkin wrote:
> > On Tue, Feb 09, 2016 at 07:36:12PM +0100, Laszlo Ersek wrote:
> >> Implementing this in QEMU would require:
> >> - inventing virt-only registers for the FDC that provide the current
> >> disk geometry,
> 
> We do secretly have these registers, but of course there's no spec to
> interpreting what they mean. For instance, what do they read when the
> drive is empty? I am not sure that information is codified in the ACPI spec.

There are various possibilities, like returning False for the
corresponding drive in _FDE (Floppy Disk Enumerate) object, or returning
0 aka unknown as drive type in _FDI.  Anyway I'd hate needing to expose
all of that in an ACPI-accessible form, this is going to become too fat.

> Could be wrong, you seemed to indicate that the ACPI spec said that the
> info matches what you get from a certain legacy bios routine, but I
> don't know the specifics of that routine, either.

Indeed, it's supposed to do the same as
https://en.wikipedia.org/wiki/INT_13H#INT_13h_AH.3D08h:_Read_Drive_Parameters

and, as I wrote in another mail, the SeaBIOS implementation here is
rather simplistic.

> Roman, does the 0xFF "empty disk geometry" hack appear to work for
> Windows 10?
> 
> Maybe it's fine enough as-is, as per Laszlo's good synopsis here.

I'll test and let you know.

Roman.



Re: [Qemu-devel] [PATCH v5 6/9] qemu-log: new option -dfilter to limit output

2016-02-10 Thread Alex Bennée

Richard Henderson  writes:

> On 02/05/2016 01:56 AM, Alex Bennée wrote:
>> +gchar *range_op = g_strstr_len(r, -1, "-");
>
> This is strchr.
>
>> +range_op = g_strstr_len(r, -1, ".");
>
> Or at least if you're going to make use of strstr, search for "..".
>
>> +g_strdelimit(range_val, ".", ' ');
>
> Cause this is just weird.  It accepts "1+.2" just the same as "1..2".
>
>> +err = qemu_strtoul(r, NULL, 0, );
>
> This should be strtoull.  You probably broke 32-bit compilation here.

OK I think this version is a lot cleaner:

  void qemu_set_dfilter_ranges(const char *filter_spec)
  {
  gchar **ranges = g_strsplit(filter_spec, ",", 0);
  if (ranges) {
  gchar **next = ranges;
  gchar *r = *next++;
  debug_regions = g_array_sized_new(FALSE, FALSE,
sizeof(Range), 
g_strv_length(ranges));
  while (r) {
  gchar *range_op = g_strstr_len(r, -1, "-");
  gchar *r2 = range_op ? range_op + 1 : NULL;
  if (!range_op) {
  range_op = g_strstr_len(r, -1, "+");
  r2 = range_op ? range_op + 1 : NULL;
  }
  if (!range_op) {
  range_op = g_strstr_len(r, -1, "..");
  r2 = range_op ? range_op + 2 : NULL;
  }
  if (range_op) {
  struct Range range;
  int err;
  const char *e = NULL;

  err = qemu_strtoull(r, , 0, );

  g_assert(e == range_op);

  switch (*range_op) {
  case '+':
  {
  unsigned long len;
  err |= qemu_strtoull(r2, NULL, 0, );
  range.end = range.begin + len;
  break;
  }
  case '-':
  {
  unsigned long len;
  err |= qemu_strtoull(r2, NULL, 0, );
  range.end = range.begin;
  range.begin = range.end - len;
  break;
  }
  case '.':
  err |= qemu_strtoull(r2, NULL, 0, );
  break;
  default:
  g_assert_not_reached();
  }
  if (err) {
  g_error("Failed to parse range in: %s, %d", r, err);
  } else {
  g_array_append_val(debug_regions, range);
  }
  } else {
  g_error("Bad range specifier in: %s", r);
  }
  r = *next++;
  }
  g_strfreev(ranges);
  }
  }


>
>> +case '+':
>> +{
>> +unsigned long len;
>> +err |= qemu_strtoul(range_val, NULL, 0, );
>> +range.end = range.begin + len;
>> +break;
>> +}
>> +case '-':
>> +{
>> +unsigned long len;
>> +err |= qemu_strtoul(range_val, NULL, 0, );
>> +range.end = range.begin;
>> +range.begin = range.end - len;
>> +break;
>> +}
>
> Both of these have off-by-one bugs, since end is inclusive.

Sorry I don't quite follow, do you mean the position of range_val (now
r2) or the final value of range.end?

>
>> +case '.':
>> +err |= qemu_strtoul(range_val, NULL, 0, );
>> +break;
>
> I'd think multiple dot detection belongs here, and you need not smash them to 
> '
> ' but merely notice that there are two of them and then strtoull
> range_val+1.

I think this is covered with the new code now.

>
>
> r~


--
Alex Bennée



Re: [Qemu-devel] [PATCH 0/8] ipmi: a couple of enhancements to the BMC simulator (round 2)

2016-02-10 Thread Corey Minyard

On 02/10/2016 08:05 AM, Cédric Le Goater wrote:

Hello Corey,

On 02/09/2016 07:25 PM, Corey Minyard wrote:

On 02/09/2016 06:13 AM, Cédric Le Goater wrote:

The first patches are cleanups and prepare ground for an extension of
the BMC simulator providing a SDR loader using a file. A simple FRU
support comes next.

The last patches introduce APIs to populate a guest device tree with
the available sensors and to generate events, which is needed by
platforms in some occasions.


I have reviewed all of these, and they look good.  I only have one
comment: The naming of the properties probably needs to be
fixed.

"sdr" should be "sdrfile" to be consistent with everything else.

yes. I agree. I am glad you took a look.


Technically, a "FRU" is a piece of hardware that can be replaced
in the field, "FRU data" is the data describing that FRU, and a "FRU
area" is the memory used to store FRU data.  I know this is mixed
up a lot (and I have done so) but some people are picky about this.

With the renaming of sdr (fru is your option):

I will rename the "sdr" property to "sdrfile".

As for FRU, you would rather have the code use FruData than Fru ?
Something like:

typedef struct IPMIFruData {
char *filename;
unsigned int nentries;
uint16_t size;
uint8_t *area;
} IPMIFruData;

The code using the IPMIFruData structure would look like :

 uint8_t *fru_area;

 ...
 fru_area = >frudata.area[fruid * ibs->frudata.size];
 ...

Changing all the names is not a problem. Let's get them right.

And, so, the properties would become :

 DEFINE_PROP_UINT16("frudatasize", IPMIBmcSim, frudata.size, 1024),


I would name this "fruareasize" to be clear that it is not the size of 
the "frudatafile".


Other than that, I'm happy with those names.

-corey


 DEFINE_PROP_STRING("frudatafile", IPMIBmcSim, frudata.filename),
 DEFINE_PROP_STRING("sdrfile", IPMIBmcSim, sdr_filename),


Acked-by: Corey Minyard 

for all patches.

Oh, and I assume you need to add documentation for the
properties to qemu-options.hx.

Yes. Forgot that.

Thanks,

C.


-corey


Based on e4a096b1cd43 and also available here  :

https://github.com/legoater/qemu/commits/ipmi

Thanks,

C.

Cédric Le Goater (8):
ipmi: add a realize function to the device class
ipmi: use a function to initialize the SDR table
ipmi: remove the need of an ending record in the SDR table
ipmi: add some local variables in ipmi_sdr_init
ipmi: use a file to load SDRs
ipmi: provide support for FRUs
ipmi: introduce an ipmi_bmc_sdr_find() API
ipmi: introduce an ipmi_bmc_gen_event() API

   hw/ipmi/ipmi_bmc_sim.c | 256 
+++--
   include/hw/ipmi/ipmi.h |   4 +
   2 files changed, 233 insertions(+), 27 deletions(-)






Re: [Qemu-devel] [PATCH v2 4/4] rng: add request queue support to rng-random

2016-02-10 Thread Paolo Bonzini


On 10/02/2016 16:53, Ladi Prosek wrote:
> +req->size = size;
> +req->receive_entropy = receive_entropy;
> +req->opaque = opaque;
> +req->data = g_malloc(req->size);
> +
> +k->request_entropy(s, req);
> +
> +s->requests = g_slist_append(s->requests, req);
>  }

g_slist_append has to traverse the entire list to find the place to add
the node.  You probably are better off using QSIMPLEQ (which is an
intrusive list unlike GSList).

Thanks,

Paolo



[Qemu-devel] [PATCH 0/4] hw/ppc/spapr: Add "Processor Register Hypervisor Resource Access" H-calls

2016-02-10 Thread Thomas Huth
While we were recently debugging a problem with the H_SET_DABR
call [1], I noticed that some hypercalls from the chapter 14.5.4.3
("Processor Register Hypervisor Resource Access") from the LoPAPR
spec [2] are still missing in QEMU.
So here's are some patches that implement these hypercalls. Linux
apparently does not depend on these hypercalls yet (otherwise somebody
would have noticed this earlier), but the hypercalls are rather simple,
so I think the implementations are quite straight-forward and easy to
read.

[1] 
http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=760a7364f27d974d

[2] https://members.openpowerfoundation.org/document/dl/469

Thomas Huth (4):
  hw/ppc/spapr: Add h_set_sprg0 hypercall
  hw/ppc/spapr: Implement h_set_dabr
  hw/ppc/spapr: Implement the h_set_xdabr hypercall
  hw/ppc/spapr: Implement the h_page_init hypercall

 hw/ppc/spapr_hcall.c | 105 +++
 1 file changed, 98 insertions(+), 7 deletions(-)

-- 
1.8.3.1




Re: [Qemu-devel] [PATCH v5 6/9] qemu-log: new option -dfilter to limit output

2016-02-10 Thread Alex Bennée

Richard Henderson  writes:

> On 02/11/2016 04:40 AM, Alex Bennée wrote:
>> OK I think this version is a lot cleaner:
>>
>>void qemu_set_dfilter_ranges(const char *filter_spec)
>>{
>>gchar **ranges = g_strsplit(filter_spec, ",", 0);
>>if (ranges) {
>>gchar **next = ranges;
>>gchar *r = *next++;
>>debug_regions = g_array_sized_new(FALSE, FALSE,
>>  sizeof(Range), 
>> g_strv_length(ranges));
>>while (r) {
>>gchar *range_op = g_strstr_len(r, -1, "-");
>>gchar *r2 = range_op ? range_op + 1 : NULL;
>>if (!range_op) {
>>range_op = g_strstr_len(r, -1, "+");
>>r2 = range_op ? range_op + 1 : NULL;
>>}
>>if (!range_op) {
>>range_op = g_strstr_len(r, -1, "..");
>>r2 = range_op ? range_op + 2 : NULL;
>>}
>
> I guess I'll quit quibbling about silly glib functions.  But really, with the
> -1 argument, you gain nothing except obfuscation over using the
> standard C library.

No you are quite right to quibble. It's a hard habit to break because
I've gotten used to glib's arguably more predictable behaviour when
string munging.

>
>>if (range_op) {
>>struct Range range;
>>int err;
>>const char *e = NULL;
>>
>>err = qemu_strtoull(r, , 0, );
>>
>>g_assert(e == range_op);
>>
>>switch (*range_op) {
>>case '+':
>>{
>>unsigned long len;
>>err |= qemu_strtoull(r2, NULL, 0, );
>
> You can't or errno's together and then...
>
>>g_error("Failed to parse range in: %s, %d", r, err);
>
> ... expect to get anything meaningful out of them.

True, I'll drop the %d, I was just trying to avoid having multiple error
handling legs.

>
 +case '+':
 +{
 +unsigned long len;
 +err |= qemu_strtoul(range_val, NULL, 0, );
 +range.end = range.begin + len;
 +break;
 +}
 +case '-':
 +{
 +unsigned long len;
 +err |= qemu_strtoul(range_val, NULL, 0, );
 +range.end = range.begin;
 +range.begin = range.end - len;
 +break;
 +}
>>>
>>> Both of these have off-by-one bugs, since end is inclusive.
>>
>> Sorry I don't quite follow, do you mean the position of range_val (now
>> r2) or the final value of range.end?
>
> Final value of range.end.  In that
>
> 0x1000..0x1000
> and
> 0x1000+1
>
> should both produce a range that covers a single byte at 0x1000.

Ahh OK. I suppose if I'm being good about this I should add some tests
to defend the ranges. I wonder how easy command line parsing unit tests
are in qtest?

>
>
> r~


--
Alex Bennée



[Qemu-devel] [PATCH v6 14/16] nbd: enable use of TLS with NBD block driver

2016-02-10 Thread Daniel P. Berrange
This modifies the NBD driver so that it is possible to request
use of TLS. This is done by providing the 'tls-creds' parameter
with the ID of a previously created QCryptoTLSCreds object.

For example

  $QEMU -object tls-creds-x509,id=tls0,endpoint=client,\
dir=/home/berrange/security/qemutls \
-drive driver=nbd,host=localhost,port=9000,tls-creds=tls0

The client will drop the connection if the NBD server does not
provide TLS.

Signed-off-by: Daniel P. Berrange 
---
 block/nbd-client.c | 10 ---
 block/nbd-client.h |  2 ++
 block/nbd.c| 78 +++---
 3 files changed, 78 insertions(+), 12 deletions(-)

diff --git a/block/nbd-client.c b/block/nbd-client.c
index 1c79e4b..6a9b4c7 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -394,8 +394,12 @@ void nbd_client_close(BlockDriverState *bs)
 nbd_teardown_connection(bs);
 }
 
-int nbd_client_init(BlockDriverState *bs, QIOChannelSocket *sioc,
-const char *export, Error **errp)
+int nbd_client_init(BlockDriverState *bs,
+QIOChannelSocket *sioc,
+const char *export,
+QCryptoTLSCreds *tlscreds,
+const char *hostname,
+Error **errp)
 {
 NbdClientSession *client = nbd_get_client_session(bs);
 int ret;
@@ -406,7 +410,7 @@ int nbd_client_init(BlockDriverState *bs, QIOChannelSocket 
*sioc,
 
 ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
 >nbdflags,
-NULL, NULL,
+tlscreds, hostname,
 >ioc,
 >size, errp);
 if (ret < 0) {
diff --git a/block/nbd-client.h b/block/nbd-client.h
index e8b3283..53f116d 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -39,6 +39,8 @@ NbdClientSession *nbd_get_client_session(BlockDriverState 
*bs);
 int nbd_client_init(BlockDriverState *bs,
 QIOChannelSocket *sock,
 const char *export_name,
+QCryptoTLSCreds *tlscreds,
+const char *hostname,
 Error **errp);
 void nbd_client_close(BlockDriverState *bs);
 
diff --git a/block/nbd.c b/block/nbd.c
index d7116e2..db57b49 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -258,36 +258,92 @@ static QIOChannelSocket 
*nbd_establish_connection(SocketAddress *saddr,
 return sioc;
 }
 
+
+static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp)
+{
+Object *obj;
+QCryptoTLSCreds *creds;
+
+obj = object_resolve_path_component(
+object_get_objects_root(), id);
+if (!obj) {
+error_setg(errp, "No TLS credentials with id '%s'",
+   id);
+return NULL;
+}
+creds = (QCryptoTLSCreds *)
+object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS);
+if (!creds) {
+error_setg(errp, "Object with id '%s' is not TLS credentials",
+   id);
+return NULL;
+}
+
+if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) {
+error_setg(errp,
+   "Expecting TLS credentials with a client endpoint");
+return NULL;
+}
+object_ref(obj);
+return creds;
+}
+
+
 static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
 BDRVNBDState *s = bs->opaque;
 char *export = NULL;
-int result;
-QIOChannelSocket *sioc;
+QIOChannelSocket *sioc = NULL;
 SocketAddress *saddr;
+const char *tlscredsid;
+QCryptoTLSCreds *tlscreds = NULL;
+const char *hostname = NULL;
+int ret = -EINVAL;
 
 /* Pop the config into our state object. Exit if invalid. */
 saddr = nbd_config(s, options, , errp);
 if (!saddr) {
-return -EINVAL;
+goto error;
+}
+
+tlscredsid = g_strdup(qdict_get_try_str(options, "tls-creds"));
+if (tlscredsid) {
+qdict_del(options, "tls-creds");
+tlscreds = nbd_get_tls_creds(tlscredsid, errp);
+if (!tlscreds) {
+goto error;
+}
+
+if (saddr->type != SOCKET_ADDRESS_KIND_INET) {
+error_setg(errp, "TLS only supported over IP sockets");
+goto error;
+}
+hostname = saddr->u.inet->host;
 }
 
 /* establish TCP connection, return error if it fails
  * TODO: Configurable retry-until-timeout behaviour.
  */
 sioc = nbd_establish_connection(saddr, errp);
-qapi_free_SocketAddress(saddr);
 if (!sioc) {
-g_free(export);
-return -ECONNREFUSED;
+ret = -ECONNREFUSED;
+goto error;
 }
 
 /* NBD handshake */
-result = nbd_client_init(bs, sioc, export, errp);
-object_unref(OBJECT(sioc));
+ret = nbd_client_init(bs, sioc, export,
+  tlscreds, hostname, errp);
+ error:
+

Re: [Qemu-devel] [PATCH 3/3] qemu-iotests: 140: use virtio-scsi instead of IDE

2016-02-10 Thread Sascha Silbe
Dear Max,

Max Reitz  writes:

[tests/qemu-iotests/140]
>> -_launch_qemu -drive 
>> if=ide,media=cdrom,id=drv,file="$TEST_IMG",format=$IMGFMT \
>> -2> >(_filter_nbd)
>> +_launch_qemu -drive 
>> if=none,media=cdrom,id=drv,file="$TEST_IMG",format=$IMGFMT \
>> +-device virtio-scsi -device scsi-cd,drive=drv 2> >(_filter_nbd)
>
> Why not just omit the device (and the media=cdrom along with it, keeping
> if=none)? This will change the reference output because there is no
> longer any tray to be moved, but this will still test what it's supposed to.
>
> (This may sound hypocritical coming from me, because I wrote this test
> so I could have just done so in the first place; I guess I just didn't
> realize that 'eject' works on device-less drives, too.)

Is this supposed to work? I.e. can we rely on it? If so, that would
certainly be the easier route for this particular test. Test coverage
should be unaffected as 139 already tests ejection (using virtio, unlike
118 which is PC-only).

The aliases patch has a value of its own, but that's a separate
matter.

Sascha
-- 
Softwareentwicklung Sascha Silbe, Niederhofenstraße 5/1, 71229 Leonberg
https://se-silbe.de/
USt-IdNr. DE281696641




[Qemu-devel] commit 5b82b70 boot problems

2016-02-10 Thread Gabriel L. Somlo
Hi Stefan,

After pulling from upstream this morning, my guest VMs were no longer
able to boot. For instance, running something like

bin/qemu-system-x86_64 -machine q35,accel=kvm -m 2048 -monitor stdio \
  -device ide-drive,bus=ide.2,drive=CD \
  -drive id=CD,if=none,snapshot=on,file=Fedora-Live-Workstation-x86_64-22-3.iso

Would just hang with a black screen, or sometimes partially display
the GRUB boot menu, and never make it any further than that.

After a bisect, I narrowed it down to 5b82b70 ("memory: RCU
ram_list.dirty_memory[] for safe RAM hotplug"); reverting that
commit got me back to where my VMs were working fine, but the
actual content of the changes isn't immediately obvious.

Apologies for the noise in case you're already aware of the problem.
Otherwise, I'd be happy to test and send you debug output.

Please advise.

Thanks much,
--Gabriel



[Qemu-devel] [PULL 00/11] Ide patches

2016-02-10 Thread John Snow
The following changes since commit c9f19dff101e2c2cf3fa3967eceec2833e845e40:

  Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging 
(2016-02-09 19:34:46 +)

are available in the git repository at:

  https://github.com/jnsnow/qemu.git tags/ide-pull-request

for you to fetch changes up to d590474922d37372c56075adb229c86d3aeae967:

  ahci: prohibit "restarting" the FIS or CLB engines (2016-02-10 13:29:40 -0500)



Untested with Clang.



John Snow (11):
  ide: Prohibit RESET on IDE drives
  ide: code motion
  ide: move buffered DMA cancel to core
  ide: replace blk_drain_all by blk_drain
  ide: Add silent DRQ cancellation
  ide: fix device_reset to not ignore pending AIO
  fdc: always compile-check debug prints
  ahci: Do not unmap NULL addresses
  ahci: handle LIST_ON and FIS_ON in map helpers
  ahci: explicitly reject bad engine states on post_load
  ahci: prohibit "restarting" the FIS or CLB engines

 hw/block/fdc.c|  15 ++--
 hw/ide/ahci.c |  96 ++--
 hw/ide/core.c | 217 --
 hw/ide/internal.h |   1 +
 hw/ide/pci.c  |  36 +
 5 files changed, 213 insertions(+), 152 deletions(-)

-- 
2.4.3




[Qemu-devel] [PULL 09/11] ahci: handle LIST_ON and FIS_ON in map helpers

2016-02-10 Thread John Snow
Instead of relying on ahci_cond_start_engines to maintain the
engine status indicators itself, have the lower-layer CLB and FIS mapper
helpers do it themselves.

This makes the cond_start routine slightly nicer to read, and makes sure
that the status indicators will always be correct.

Signed-off-by: John Snow 
Message-id: 1454103689-13042-3-git-send-email-js...@redhat.com
---
 hw/ide/ahci.c | 28 ++--
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 3a95dad..ff338fe 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -210,9 +210,7 @@ static int ahci_cond_start_engines(AHCIDevice *ad, bool 
allow_stop)
 AHCIPortRegs *pr = >port_regs;
 
 if (pr->cmd & PORT_CMD_START) {
-if (ahci_map_clb_address(ad)) {
-pr->cmd |= PORT_CMD_LIST_ON;
-} else {
+if (!ahci_map_clb_address(ad)) {
 error_report("AHCI: Failed to start DMA engine: "
  "bad command list buffer address");
 return -1;
@@ -220,7 +218,6 @@ static int ahci_cond_start_engines(AHCIDevice *ad, bool 
allow_stop)
 } else if (pr->cmd & PORT_CMD_LIST_ON) {
 if (allow_stop) {
 ahci_unmap_clb_address(ad);
-pr->cmd = pr->cmd & ~(PORT_CMD_LIST_ON);
 } else {
 error_report("AHCI: DMA engine should be off, "
  "but appears to still be running");
@@ -229,9 +226,7 @@ static int ahci_cond_start_engines(AHCIDevice *ad, bool 
allow_stop)
 }
 
 if (pr->cmd & PORT_CMD_FIS_RX) {
-if (ahci_map_fis_address(ad)) {
-pr->cmd |= PORT_CMD_FIS_ON;
-} else {
+if (!ahci_map_fis_address(ad)) {
 error_report("AHCI: Failed to start FIS receive engine: "
  "bad FIS receive buffer address");
 return -1;
@@ -239,7 +234,6 @@ static int ahci_cond_start_engines(AHCIDevice *ad, bool 
allow_stop)
 } else if (pr->cmd & PORT_CMD_FIS_ON) {
 if (allow_stop) {
 ahci_unmap_fis_address(ad);
-pr->cmd = pr->cmd & ~(PORT_CMD_FIS_ON);
 } else {
 error_report("AHCI: FIS receive engine should be off, "
  "but appears to still be running");
@@ -657,7 +651,13 @@ static bool ahci_map_fis_address(AHCIDevice *ad)
 AHCIPortRegs *pr = >port_regs;
 map_page(ad->hba->as, >res_fis,
  ((uint64_t)pr->fis_addr_hi << 32) | pr->fis_addr, 256);
-return ad->res_fis != NULL;
+if (ad->res_fis != NULL) {
+pr->cmd |= PORT_CMD_FIS_ON;
+return true;
+}
+
+pr->cmd &= ~PORT_CMD_FIS_ON;
+return false;
 }
 
 static void ahci_unmap_fis_address(AHCIDevice *ad)
@@ -666,6 +666,7 @@ static void ahci_unmap_fis_address(AHCIDevice *ad)
 DPRINTF(ad->port_no, "Attempt to unmap NULL FIS address\n");
 return;
 }
+ad->port_regs.cmd &= ~PORT_CMD_FIS_ON;
 dma_memory_unmap(ad->hba->as, ad->res_fis, 256,
  DMA_DIRECTION_FROM_DEVICE, 256);
 ad->res_fis = NULL;
@@ -677,7 +678,13 @@ static bool ahci_map_clb_address(AHCIDevice *ad)
 ad->cur_cmd = NULL;
 map_page(ad->hba->as, >lst,
  ((uint64_t)pr->lst_addr_hi << 32) | pr->lst_addr, 1024);
-return ad->lst != NULL;
+if (ad->lst != NULL) {
+pr->cmd |= PORT_CMD_LIST_ON;
+return true;
+}
+
+pr->cmd &= ~PORT_CMD_LIST_ON;
+return false;
 }
 
 static void ahci_unmap_clb_address(AHCIDevice *ad)
@@ -686,6 +693,7 @@ static void ahci_unmap_clb_address(AHCIDevice *ad)
 DPRINTF(ad->port_no, "Attempt to unmap NULL CLB address\n");
 return;
 }
+ad->port_regs.cmd &= ~PORT_CMD_LIST_ON;
 dma_memory_unmap(ad->hba->as, ad->lst, 1024,
  DMA_DIRECTION_FROM_DEVICE, 1024);
 ad->lst = NULL;
-- 
2.4.3




[Qemu-devel] [PATCH v6 01/16] qom: add helpers for UserCreatable object types

2016-02-10 Thread Daniel P. Berrange
The QMP monitor code has two helper methods object_add
and qmp_object_del that are called from several places
in the code (QMP, HMP and main emulator startup).

The HMP and main emulator startup code also share
further logic that extracts the qom-type & id
values from a qdict.

We soon need to use this logic from qemu-img, qemu-io
and qemu-nbd too, but don't want those to depend on
the monitor, nor do we want to duplicate the code.

To avoid this, move some code out of qmp.c and hmp.c
adding new methods to qom/object_interfaces.c

 - user_creatable_add - takes a QDict holding a full
   object definition & instantiates it
 - user_creatable_add_type - takes an ID, type name,
   and QDict holding object properties & instantiates
   it
 - user_creatable_add_opts - takes a QemuOpts holding
   a full object definition & instantiates it
 - user_creatable_add_opts_foreach - variant on
   user_creatable_add_opts which can be directly used
   in conjunction with qemu_opts_foreach.
 - user_creatable_del - takes an ID and deletes the
   corresponding object

The existing code is updated to use these new methods.

Signed-off-by: Daniel P. Berrange 
---
 hmp.c   |  52 +++-
 include/monitor/monitor.h   |   3 -
 include/qom/object_interfaces.h |  92 +
 qmp.c   |  76 ++
 qom/object_interfaces.c | 174 
 vl.c|  68 ++--
 6 files changed, 290 insertions(+), 175 deletions(-)

diff --git a/hmp.c b/hmp.c
index c6419da..41fb9ca 100644
--- a/hmp.c
+++ b/hmp.c
@@ -30,6 +30,7 @@
 #include "qapi/string-output-visitor.h"
 #include "qapi/util.h"
 #include "qapi-visit.h"
+#include "qom/object_interfaces.h"
 #include "ui/console.h"
 #include "block/qapi.h"
 #include "qemu-io.h"
@@ -1655,58 +1656,27 @@ void hmp_netdev_del(Monitor *mon, const QDict *qdict)
 void hmp_object_add(Monitor *mon, const QDict *qdict)
 {
 Error *err = NULL;
-Error *err_end = NULL;
 QemuOpts *opts;
-char *type = NULL;
-char *id = NULL;
 OptsVisitor *ov;
-QDict *pdict;
-Visitor *v;
+Object *obj = NULL;
 
 opts = qemu_opts_from_qdict(qemu_find_opts("object"), qdict, );
 if (err) {
-goto out;
+hmp_handle_error(mon, );
+return;
 }
 
 ov = opts_visitor_new(opts);
-pdict = qdict_clone_shallow(qdict);
-v = opts_get_visitor(ov);
-
-visit_start_struct(v, NULL, NULL, 0, );
-if (err) {
-goto out_clean;
-}
-
-qdict_del(pdict, "qom-type");
-visit_type_str(v, "qom-type", , );
-if (err) {
-goto out_end;
-}
+obj = user_creatable_add(qdict, opts_get_visitor(ov), );
+opts_visitor_cleanup(ov);
+qemu_opts_del(opts);
 
-qdict_del(pdict, "id");
-visit_type_str(v, "id", , );
 if (err) {
-goto out_end;
+hmp_handle_error(mon, );
 }
-
-object_add(type, id, pdict, v, );
-
-out_end:
-visit_end_struct(v, _end);
-if (!err && err_end) {
-qmp_object_del(id, NULL);
+if (obj) {
+object_unref(obj);
 }
-error_propagate(, err_end);
-out_clean:
-opts_visitor_cleanup(ov);
-
-QDECREF(pdict);
-qemu_opts_del(opts);
-g_free(id);
-g_free(type);
-
-out:
-hmp_handle_error(mon, );
 }
 
 void hmp_getfd(Monitor *mon, const QDict *qdict)
@@ -1934,7 +1904,7 @@ void hmp_object_del(Monitor *mon, const QDict *qdict)
 const char *id = qdict_get_str(qdict, "id");
 Error *err = NULL;
 
-qmp_object_del(id, );
+user_creatable_del(id, );
 hmp_handle_error(mon, );
 }
 
diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
index 91b95ae..aa0f373 100644
--- a/include/monitor/monitor.h
+++ b/include/monitor/monitor.h
@@ -43,9 +43,6 @@ void monitor_read_command(Monitor *mon, int show_prompt);
 int monitor_read_password(Monitor *mon, ReadLineFunc *readline_func,
   void *opaque);
 
-void object_add(const char *type, const char *id, const QDict *qdict,
-Visitor *v, Error **errp);
-
 AddfdInfo *monitor_fdset_add_fd(int fd, bool has_fdset_id, int64_t fdset_id,
 bool has_opaque, const char *opaque,
 Error **errp);
diff --git a/include/qom/object_interfaces.h b/include/qom/object_interfaces.h
index 283ae0d..d579746 100644
--- a/include/qom/object_interfaces.h
+++ b/include/qom/object_interfaces.h
@@ -2,6 +2,8 @@
 #define OBJECT_INTERFACES_H
 
 #include "qom/object.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/visitor.h"
 
 #define TYPE_USER_CREATABLE "user-creatable"
 
@@ -72,4 +74,94 @@ void user_creatable_complete(Object *obj, Error **errp);
  * from implements USER_CREATABLE interface.
  */
 bool user_creatable_can_be_deleted(UserCreatable *uc, Error **errp);
+
+/**
+ * user_creatable_add:
+ * @qdict: the object definition
+ * @v: the visitor
+ * @errp: if an 

[Qemu-devel] [PATCH v6 04/16] nbd: convert qemu-nbd server to use I/O channels for connection setup

2016-02-10 Thread Daniel P. Berrange
This converts the qemu-nbd server to use the QIOChannelSocket
class for initial listener socket setup and accepting of client
connections. Actual I/O is still being performed against the
socket file descriptor using the POSIX socket APIs.

In this initial conversion though, all I/O is still actually done
using the raw POSIX sockets APIs.

Signed-off-by: Daniel P. Berrange 
---
 qemu-nbd.c | 91 --
 1 file changed, 53 insertions(+), 38 deletions(-)

diff --git a/qemu-nbd.c b/qemu-nbd.c
index 130c306..bc309e0 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -22,19 +22,17 @@
 #include "block/block_int.h"
 #include "block/nbd.h"
 #include "qemu/main-loop.h"
-#include "qemu/sockets.h"
 #include "qemu/error-report.h"
 #include "qemu/config-file.h"
 #include "block/snapshot.h"
 #include "qapi/util.h"
 #include "qapi/qmp/qstring.h"
 #include "qom/object_interfaces.h"
+#include "io/channel-socket.h"
 
 #include 
-#include 
-#include 
-#include 
-#include 
+#include 
+#include 
 #include 
 #include 
 
@@ -53,7 +51,8 @@ static int persistent = 0;
 static enum { RUNNING, TERMINATE, TERMINATING, TERMINATED } state;
 static int shared = 1;
 static int nb_fds;
-static int server_fd;
+static QIOChannelSocket *server_ioc;
+static int server_watch = -1;
 
 static void usage(const char *name)
 {
@@ -236,19 +235,21 @@ static void *nbd_client_thread(void *arg)
 char *device = arg;
 off_t size;
 uint32_t nbdflags;
-int fd, sock;
+QIOChannelSocket *sioc;
+int fd;
 int ret;
 pthread_t show_parts_thread;
 Error *local_error = NULL;
 
-
-sock = socket_connect(saddr, _error, NULL, NULL);
-if (sock < 0) {
+sioc = qio_channel_socket_new();
+if (qio_channel_socket_connect_sync(sioc,
+saddr,
+_error) < 0) {
 error_report_err(local_error);
 goto out;
 }
 
-ret = nbd_receive_negotiate(sock, NULL, ,
+ret = nbd_receive_negotiate(sioc->fd, NULL, ,
 , _error);
 if (ret < 0) {
 if (local_error) {
@@ -264,7 +265,7 @@ static void *nbd_client_thread(void *arg)
 goto out_socket;
 }
 
-ret = nbd_init(fd, sock, nbdflags, size);
+ret = nbd_init(fd, sioc->fd, nbdflags, size);
 if (ret < 0) {
 goto out_fd;
 }
@@ -285,13 +286,14 @@ static void *nbd_client_thread(void *arg)
 goto out_fd;
 }
 close(fd);
+object_unref(OBJECT(sioc));
 kill(getpid(), SIGTERM);
 return (void *) EXIT_SUCCESS;
 
 out_fd:
 close(fd);
 out_socket:
-closesocket(sock);
+object_unref(OBJECT(sioc));
 out:
 kill(getpid(), SIGTERM);
 return (void *) EXIT_FAILURE;
@@ -308,7 +310,7 @@ static void nbd_export_closed(NBDExport *exp)
 state = TERMINATED;
 }
 
-static void nbd_update_server_fd_handler(int fd);
+static void nbd_update_server_watch(void);
 
 static void nbd_client_closed(NBDClient *client)
 {
@@ -316,37 +318,51 @@ static void nbd_client_closed(NBDClient *client)
 if (nb_fds == 0 && !persistent && state == RUNNING) {
 state = TERMINATE;
 }
-nbd_update_server_fd_handler(server_fd);
+nbd_update_server_watch();
 nbd_client_put(client);
 }
 
-static void nbd_accept(void *opaque)
+static gboolean nbd_accept(QIOChannel *ioc, GIOCondition cond, gpointer opaque)
 {
-struct sockaddr_in addr;
-socklen_t addr_len = sizeof(addr);
+QIOChannelSocket *cioc;
+int fd;
 
-int fd = accept(server_fd, (struct sockaddr *), _len);
-if (fd < 0) {
-perror("accept");
-return;
+cioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(ioc),
+ NULL);
+if (!cioc) {
+return TRUE;
 }
 
 if (state >= TERMINATE) {
-close(fd);
-return;
+object_unref(OBJECT(cioc));
+return TRUE;
 }
 
 nb_fds++;
-nbd_update_server_fd_handler(server_fd);
-nbd_client_new(exp, fd, nbd_client_closed);
+nbd_update_server_watch();
+fd = dup(cioc->fd);
+if (fd >= 0) {
+nbd_client_new(exp, fd, nbd_client_closed);
+}
+object_unref(OBJECT(cioc));
+
+return TRUE;
 }
 
-static void nbd_update_server_fd_handler(int fd)
+static void nbd_update_server_watch(void)
 {
 if (nbd_can_accept()) {
-qemu_set_fd_handler(fd, nbd_accept, NULL, (void *)(uintptr_t)fd);
+if (server_watch == -1) {
+server_watch = qio_channel_add_watch(QIO_CHANNEL(server_ioc),
+ G_IO_IN,
+ nbd_accept,
+ NULL, NULL);
+}
 } else {
-qemu_set_fd_handler(fd, NULL, NULL, NULL);
+if (server_watch != -1) {
+g_source_remove(server_watch);
+server_watch = -1;
+}
 }
 }
 
@@ -433,7 +449,6 @@ int 

[Qemu-devel] [PATCH v6 10/16] nbd: allow setting of an export name for qemu-nbd server

2016-02-10 Thread Daniel P. Berrange
The qemu-nbd server currently always uses the old style protocol
since it never sets any export name. This is a problem because
future TLS support will require use of the new style protocol
negotiation.

This adds "--exportname NAME" / "-x NAME" arguments to qemu-nbd
which allow the user to set an explicit export name. When an
export name is set the server will always use the new style
NBD protocol.

Signed-off-by: Daniel P. Berrange 
---
 qemu-nbd.c| 14 --
 qemu-nbd.texi |  3 +++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/qemu-nbd.c b/qemu-nbd.c
index 5e54290..9710a26 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -44,6 +44,7 @@
 #define QEMU_NBD_OPT_OBJECT5
 
 static NBDExport *exp;
+static bool newproto;
 static int verbose;
 static char *srcpath;
 static SocketAddress *saddr;
@@ -339,7 +340,7 @@ static gboolean nbd_accept(QIOChannel *ioc, GIOCondition 
cond, gpointer opaque)
 
 nb_fds++;
 nbd_update_server_watch();
-nbd_client_new(exp, cioc, nbd_client_closed);
+nbd_client_new(newproto ? NULL : exp, cioc, nbd_client_closed);
 object_unref(OBJECT(cioc));
 
 return TRUE;
@@ -413,7 +414,7 @@ int main(int argc, char **argv)
 off_t fd_size;
 QemuOpts *sn_opts = NULL;
 const char *sn_id_or_name = NULL;
-const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:tl:";
+const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:tl:x:";
 struct option lopt[] = {
 { "help", 0, NULL, 'h' },
 { "version", 0, NULL, 'V' },
@@ -437,6 +438,7 @@ int main(int argc, char **argv)
 { "persistent", 0, NULL, 't' },
 { "verbose", 0, NULL, 'v' },
 { "object", 1, NULL, QEMU_NBD_OPT_OBJECT },
+{ "export-name", 1, NULL, 'x' },
 { NULL, 0, NULL, 0 }
 };
 int ch;
@@ -453,6 +455,7 @@ int main(int argc, char **argv)
 Error *local_err = NULL;
 BlockdevDetectZeroesOptions detect_zeroes = 
BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF;
 QDict *options = NULL;
+const char *export_name = NULL;
 
 /* The client thread uses SIGTERM to interrupt the server.  A signal
  * handler ensures that "qemu-nbd -v -c" exits with a nice status code.
@@ -604,6 +607,9 @@ int main(int argc, char **argv)
 case 't':
 persistent = 1;
 break;
+case 'x':
+export_name = optarg;
+break;
 case 'v':
 verbose = 1;
 break;
@@ -783,6 +789,10 @@ int main(int argc, char **argv)
 error_report_err(local_err);
 exit(EXIT_FAILURE);
 }
+if (export_name) {
+nbd_export_set_name(exp, export_name);
+newproto = true;
+}
 
 server_ioc = qio_channel_socket_new();
 if (qio_channel_socket_listen_sync(server_ioc, saddr, _err) < 0) {
diff --git a/qemu-nbd.texi b/qemu-nbd.texi
index a56ebc3..2516963 100644
--- a/qemu-nbd.texi
+++ b/qemu-nbd.texi
@@ -73,6 +73,9 @@ Disconnect the device @var{dev}
 Allow up to @var{num} clients to share the device (default @samp{1})
 @item -t, --persistent
 Don't exit on the last connection
+@item -x NAME, --export-name=NAME
+Set the NDB volume export name. This switches the server to use
+the new style NBD protocol negotiation
 @item -v, --verbose
 Display extra debugging information
 @item -h, --help
-- 
2.5.0




[Qemu-devel] [PATCH v6 09/16] nbd: make client request fixed new style if advertized

2016-02-10 Thread Daniel P. Berrange
If the server advertizes support for the fixed new style
negotiation, the client should in turn enable new style.
This will allow the client to negotiate further NBD
options besides the export name.

Signed-off-by: Daniel P. Berrange 
---
 nbd/client.c | 27 +--
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/nbd/client.c b/nbd/client.c
index 5064788..88f2ada 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -76,7 +76,6 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, 
uint32_t *flags,
 {
 char buf[256];
 uint64_t magic, s;
-uint16_t tmp;
 int rc;
 
 TRACE("Receiving negotiation.");
@@ -117,19 +116,26 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char 
*name, uint32_t *flags,
 TRACE("Magic is 0x%" PRIx64, magic);
 
 if (magic == NBD_OPTS_MAGIC) {
-uint32_t reserved = 0;
+uint32_t clientflags = 0;
 uint32_t opt;
 uint32_t namesize;
+uint16_t globalflags;
+uint16_t exportflags;
 
-if (read_sync(ioc, , sizeof(tmp)) != sizeof(tmp)) {
+if (read_sync(ioc, , sizeof(globalflags)) !=
+sizeof(globalflags)) {
 error_setg(errp, "Failed to read server flags");
 goto fail;
 }
-*flags = be16_to_cpu(tmp) << 16;
-/* reserved for future use */
-if (write_sync(ioc, , sizeof(reserved)) !=
-sizeof(reserved)) {
-error_setg(errp, "Failed to read reserved field");
+*flags = be16_to_cpu(globalflags) << 16;
+if (globalflags & NBD_FLAG_FIXED_NEWSTYLE) {
+TRACE("Server supports fixed new style");
+clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE;
+}
+/* client requested flags */
+if (write_sync(ioc, , sizeof(clientflags)) !=
+sizeof(clientflags)) {
+error_setg(errp, "Failed to send clientflags field");
 goto fail;
 }
 /* write the export name */
@@ -165,11 +171,12 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char 
*name, uint32_t *flags,
 *size = be64_to_cpu(s);
 TRACE("Size is %" PRIu64, *size);
 
-if (read_sync(ioc, , sizeof(tmp)) != sizeof(tmp)) {
+if (read_sync(ioc, , sizeof(exportflags)) !=
+sizeof(exportflags)) {
 error_setg(errp, "Failed to read export flags");
 goto fail;
 }
-*flags |= be16_to_cpu(tmp);
+*flags |= be16_to_cpu(exportflags);
 } else if (magic == NBD_CLIENT_MAGIC) {
 if (name) {
 error_setg(errp, "Server does not support export names");
-- 
2.5.0




Re: [Qemu-devel] [PATCH 1/2] qemu-iotests: add _filter_qmp_events() for filtering out QMP events

2016-02-10 Thread Sascha Silbe
Dear Max,

Max Reitz  writes:

>> +# remove QMP events from output
>> +_filter_qmp_events()
>> +{
>> +sed -e '/^{\(.*, \)"event": ".*}$/ d'
>> +}
>
> There is a pretty good reason test 067 uses -qmp-pretty (as you yourself
> say, the lines get pretty long otherwise, and if we have any change
> within, the whole line needs to be changed).

Additionally, it's a lot easier to read when indented properly,
especially with the block info containing nested dicts.

> Using the following ugly
> piece of code here instead, we would still be able to use it:
>
> tr '\n' '\t' \
>   | sed -e
> 's/{\s*"timestamp":\s*{[^}]*},\s*"event":[^,}]*\(,\s*"data":\s*{[^}]*}\)\?\s*}\s*//g'
> \
>   | tr '\t' '\n'

Nice trick. Why didn't I come up with it? ;)

It definitely is a bit ugly, though. We can't just drop the entire line
(using "d") as the entire stream now is a single line. Matching
parenthesis pairs is context sensitive, so we can't just use regular
expressions to aggregate results into lines. And before I start
implementing a JSON indenter in awk, I'd rather rewrite the whole test
in Python. So if we stay with the shell test for now, we need something
like your incantation above. It's not perfect, but good enough for now
and I can't think of anything significantly simpler right now either.

Will test your version and send a v2. Thanks for the suggestion!

Sascha
-- 
Softwareentwicklung Sascha Silbe, Niederhofenstraße 5/1, 71229 Leonberg
https://se-silbe.de/
USt-IdNr. DE281696641




Re: [Qemu-devel] [PATCH v2] linux-user: add option to intercept execve() syscalls

2016-02-10 Thread Laurent Vivier


Le 27/01/2016 09:49, Petros Angelatos a écrit :
> From: Petros Angelatos 
> 
> In order for one to use QEMU user mode emulation under a chroot, it is
> required to use binfmt_misc. This can be avoided by QEMU never doing a
> raw execve() to the host system.
> 
> Introduce a new option, -execve, that uses the current QEMU interpreter
> to intercept execve().
> 
> qemu_execve() will prepend the interpreter path , similar to what
> binfmt_misc would do, and then pass the modified execve() to the host.
> 
> It is necessary to parse hashbang scripts in that function otherwise
> the kernel will try to run the interpreter of a script without QEMU and
> get an invalid exec format error.
> 
> Signed-off-by: Petros Angelatos 
> ---

You can put here the difference between v1 and v2.

>  linux-user/main.c|  36 
>  linux-user/qemu.h|   1 +
>  linux-user/syscall.c | 117 
> ++-
>  3 files changed, 153 insertions(+), 1 deletion(-)
> 
> diff --git a/linux-user/main.c b/linux-user/main.c
> index ee12035..751eafa 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -22,6 +22,7 @@
>  #include 
>  #include 
>  #include 
> +#include 

This line needs to be rebased.

Otherwise:

Tested-by: Laurent Vivier 
Reviewed-by: Laurent Vivier 

>  #include 
>  #include 
>  #include 
> @@ -79,6 +80,7 @@ static void usage(int exitcode);
>  
>  static const char *interp_prefix = CONFIG_QEMU_INTERP_PREFIX;
>  const char *qemu_uname_release;
> +const char *qemu_execve_path;
>  
>  /* XXX: on x86 MAP_GROWSDOWN only works if ESP <= address + 32, so
> we allocate a bigger stack. Need a better solution, for example
> @@ -3828,6 +3830,38 @@ static void handle_arg_guest_base(const char *arg)
>  have_guest_base = 1;
>  }
>  
> +static void handle_arg_execve(const char *arg)
> +{
> +const char *execfn;
> +char buf[PATH_MAX];
> +char *ret;
> +int len;
> +
> +/* try getauxval() */
> +execfn = (const char *) getauxval(AT_EXECFN);
> +
> +if (execfn != 0) {
> +ret = realpath(execfn, buf);
> +
> +if (ret != NULL) {
> +qemu_execve_path = strdup(buf);
> +return;
> +}
> +}
> +
> +/* try /proc/self/exe */
> +len = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
> +
> +if (len != -1) {
> +buf[len] = '\0';
> +qemu_execve_path = strdup(buf);
> +return;
> +}
> +
> +fprintf(stderr, "qemu_execve: unable to determine intepreter's path\n");
> +exit(EXIT_FAILURE);
> +}
> +
>  static void handle_arg_reserved_va(const char *arg)
>  {
>  char *p;
> @@ -3913,6 +3947,8 @@ static const struct qemu_argument arg_table[] = {
>   "uname",  "set qemu uname release string to 'uname'"},
>  {"B",  "QEMU_GUEST_BASE",  true,  handle_arg_guest_base,
>   "address","set guest_base address to 'address'"},
> +{"execve", "QEMU_EXECVE",  false, handle_arg_execve,
> + "",   "use this interpreter when a process calls execve()"},
>  {"R",  "QEMU_RESERVED_VA", true,  handle_arg_reserved_va,
>   "size",   "reserve 'size' bytes for guest virtual address space"},
>  {"d",  "QEMU_LOG", true,  handle_arg_log,
> diff --git a/linux-user/qemu.h b/linux-user/qemu.h
> index bd90cc3..0d9b058 100644
> --- a/linux-user/qemu.h
> +++ b/linux-user/qemu.h
> @@ -140,6 +140,7 @@ void init_task_state(TaskState *ts);
>  void task_settid(TaskState *);
>  void stop_all_tasks(void);
>  extern const char *qemu_uname_release;
> +extern const char *qemu_execve_path;
>  extern unsigned long mmap_min_addr;
>  
>  /* ??? See if we can avoid exposing so much of the loader internals.  */
> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> index 0cbace4..4755978 100644
> --- a/linux-user/syscall.c
> +++ b/linux-user/syscall.c
> @@ -111,6 +111,7 @@ int __clone2(int (*fn)(void *), void *child_stack_base,
>  #include 
>  #include 
>  #include 
> +#include 
>  #include "linux_loop.h"
>  #include "uname.h"
>  
> @@ -5854,6 +5855,118 @@ static target_timer_t get_timer_id(abi_long arg)
>  return timerid;
>  }
>  
> +/* qemu_execve() Must return target values and target errnos. */
> +static abi_long qemu_execve(char *filename, char *argv[],
> +  char *envp[])
> +{
> +char *i_arg = NULL, *i_name = NULL;
> +char **new_argp;
> +int argc, fd, ret, i, offset = 3;
> +char *cp;
> +char buf[BINPRM_BUF_SIZE];
> +
> +/* normal execve case */
> +if (qemu_execve_path == NULL || *qemu_execve_path == 0) {
> +return get_errno(execve(filename, argv, envp));
> +}
> +
> +for (argc = 0; argv[argc] != NULL; argc++) {
> +/* nothing */ ;
> +}
> +
> +fd = open(filename, O_RDONLY);
> +if (fd == -1) {
> +return get_errno(fd);
> +}
> +
> +ret = read(fd, buf, 

[Qemu-devel] [PULL 04/11] ide: replace blk_drain_all by blk_drain

2016-02-10 Thread John Snow
Target the drain for just one device.

Signed-off-by: John Snow 
Reported-by: Kevin Wolf 
Reviewed-by: Stefan Hajnoczi 
Message-id: 1453225191-11871-5-git-send-email-js...@redhat.com
---
 hw/ide/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/ide/core.c b/hw/ide/core.c
index 5cafcf5..40b6cc8 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -649,7 +649,7 @@ void ide_cancel_dma_sync(IDEState *s)
 #ifdef DEBUG_IDE
 printf("%s: draining all remaining requests", __func__);
 #endif
-blk_drain_all();
+blk_drain(s->blk);
 assert(s->bus->dma->aiocb == NULL);
 }
 }
-- 
2.4.3




[Qemu-devel] [PULL 03/11] ide: move buffered DMA cancel to core

2016-02-10 Thread John Snow
Buffered DMA cancellation was added to ATAPI devices and implemented
for the BMDMA HBA. Move the code over to common IDE code and allow
it to be used for any HBA.

Signed-off-by: John Snow 
Reported-by: Kevin Wolf 
Reviewed-by: Stefan Hajnoczi 
Message-id: 1453225191-11871-4-git-send-email-js...@redhat.com
---
 hw/ide/core.c | 45 +
 hw/ide/internal.h |  1 +
 hw/ide/pci.c  | 36 +---
 3 files changed, 47 insertions(+), 35 deletions(-)

diff --git a/hw/ide/core.c b/hw/ide/core.c
index 37d1058..5cafcf5 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -609,6 +609,51 @@ BlockAIOCB *ide_buffered_readv(IDEState *s, int64_t 
sector_num,
 return aioreq;
 }
 
+/**
+ * Cancel all pending DMA requests.
+ * Any buffered DMA requests are instantly canceled,
+ * but any pending unbuffered DMA requests must be waited on.
+ */
+void ide_cancel_dma_sync(IDEState *s)
+{
+IDEBufferedRequest *req;
+
+/* First invoke the callbacks of all buffered requests
+ * and flag those requests as orphaned. Ideally there
+ * are no unbuffered (Scatter Gather DMA Requests or
+ * write requests) pending and we can avoid to drain. */
+QLIST_FOREACH(req, >buffered_requests, list) {
+if (!req->orphaned) {
+#ifdef DEBUG_IDE
+printf("%s: invoking cb %p of buffered request %p with"
+   " -ECANCELED\n", __func__, req->original_cb, req);
+#endif
+req->original_cb(req->original_opaque, -ECANCELED);
+}
+req->orphaned = true;
+}
+
+/*
+ * We can't cancel Scatter Gather DMA in the middle of the
+ * operation or a partial (not full) DMA transfer would reach
+ * the storage so we wait for completion instead (we beahve
+ * like if the DMA was completed by the time the guest trying
+ * to cancel dma with bmdma_cmd_writeb with BM_CMD_START not
+ * set).
+ *
+ * In the future we'll be able to safely cancel the I/O if the
+ * whole DMA operation will be submitted to disk with a single
+ * aio operation with preadv/pwritev.
+ */
+if (s->bus->dma->aiocb) {
+#ifdef DEBUG_IDE
+printf("%s: draining all remaining requests", __func__);
+#endif
+blk_drain_all();
+assert(s->bus->dma->aiocb == NULL);
+}
+}
+
 static void ide_sector_read(IDEState *s);
 
 static void ide_sector_read_cb(void *opaque, int ret)
diff --git a/hw/ide/internal.h b/hw/ide/internal.h
index 2d1e2d2..86bde26 100644
--- a/hw/ide/internal.h
+++ b/hw/ide/internal.h
@@ -586,6 +586,7 @@ BlockAIOCB *ide_issue_trim(BlockBackend *blk,
 BlockAIOCB *ide_buffered_readv(IDEState *s, int64_t sector_num,
QEMUIOVector *iov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque);
+void ide_cancel_dma_sync(IDEState *s);
 
 /* hw/ide/atapi.c */
 void ide_atapi_cmd(IDEState *s);
diff --git a/hw/ide/pci.c b/hw/ide/pci.c
index fa5b63b..92ffee7 100644
--- a/hw/ide/pci.c
+++ b/hw/ide/pci.c
@@ -234,41 +234,7 @@ void bmdma_cmd_writeb(BMDMAState *bm, uint32_t val)
 /* Ignore writes to SSBM if it keeps the old value */
 if ((val & BM_CMD_START) != (bm->cmd & BM_CMD_START)) {
 if (!(val & BM_CMD_START)) {
-/* First invoke the callbacks of all buffered requests
- * and flag those requests as orphaned. Ideally there
- * are no unbuffered (Scatter Gather DMA Requests or
- * write requests) pending and we can avoid to drain. */
-IDEBufferedRequest *req;
-IDEState *s = idebus_active_if(bm->bus);
-QLIST_FOREACH(req, >buffered_requests, list) {
-if (!req->orphaned) {
-#ifdef DEBUG_IDE
-printf("%s: invoking cb %p of buffered request %p with"
-   " -ECANCELED\n", __func__, req->original_cb, req);
-#endif
-req->original_cb(req->original_opaque, -ECANCELED);
-}
-req->orphaned = true;
-}
-/*
- * We can't cancel Scatter Gather DMA in the middle of the
- * operation or a partial (not full) DMA transfer would reach
- * the storage so we wait for completion instead (we beahve
- * like if the DMA was completed by the time the guest trying
- * to cancel dma with bmdma_cmd_writeb with BM_CMD_START not
- * set).
- *
- * In the future we'll be able to safely cancel the I/O if the
- * whole DMA operation will be submitted to disk with a single
- * aio operation with preadv/pwritev.
- */
-if (bm->bus->dma->aiocb) {
-#ifdef DEBUG_IDE
-printf("%s: draining all remaining requests", __func__);
-#endif
-blk_drain_all();
-assert(bm->bus->dma->aiocb == NULL);
-  

[Qemu-devel] [PULL 02/11] ide: code motion

2016-02-10 Thread John Snow
Shuffle the reset function upwards.

Signed-off-by: John Snow 
Reported-by: Kevin Wolf 
Reviewed-by: Stefan Hajnoczi 
Message-id: 1453225191-11871-3-git-send-email-js...@redhat.com
---
 hw/ide/core.c | 116 +-
 1 file changed, 58 insertions(+), 58 deletions(-)

diff --git a/hw/ide/core.c b/hw/ide/core.c
index 88d5fab..37d1058 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -1175,6 +1175,64 @@ void ide_ioport_write(void *opaque, uint32_t addr, 
uint32_t val)
 }
 }
 
+static void ide_reset(IDEState *s)
+{
+#ifdef DEBUG_IDE
+printf("ide: reset\n");
+#endif
+
+if (s->pio_aiocb) {
+blk_aio_cancel(s->pio_aiocb);
+s->pio_aiocb = NULL;
+}
+
+if (s->drive_kind == IDE_CFATA)
+s->mult_sectors = 0;
+else
+s->mult_sectors = MAX_MULT_SECTORS;
+/* ide regs */
+s->feature = 0;
+s->error = 0;
+s->nsector = 0;
+s->sector = 0;
+s->lcyl = 0;
+s->hcyl = 0;
+
+/* lba48 */
+s->hob_feature = 0;
+s->hob_sector = 0;
+s->hob_nsector = 0;
+s->hob_lcyl = 0;
+s->hob_hcyl = 0;
+
+s->select = 0xa0;
+s->status = READY_STAT | SEEK_STAT;
+
+s->lba48 = 0;
+
+/* ATAPI specific */
+s->sense_key = 0;
+s->asc = 0;
+s->cdrom_changed = 0;
+s->packet_transfer_size = 0;
+s->elementary_transfer_size = 0;
+s->io_buffer_index = 0;
+s->cd_sector_size = 0;
+s->atapi_dma = 0;
+s->tray_locked = 0;
+s->tray_open = 0;
+/* ATA DMA state */
+s->io_buffer_size = 0;
+s->req_nb_sectors = 0;
+
+ide_set_signature(s);
+/* init the transfer handler so that 0x is returned on data
+   accesses */
+s->end_transfer_func = ide_dummy_transfer_stop;
+ide_dummy_transfer_stop(s);
+s->media_changed = 0;
+}
+
 static bool cmd_nop(IDEState *s, uint8_t cmd)
 {
 return true;
@@ -2183,64 +2241,6 @@ static void ide_dummy_transfer_stop(IDEState *s)
 s->io_buffer[3] = 0xff;
 }
 
-static void ide_reset(IDEState *s)
-{
-#ifdef DEBUG_IDE
-printf("ide: reset\n");
-#endif
-
-if (s->pio_aiocb) {
-blk_aio_cancel(s->pio_aiocb);
-s->pio_aiocb = NULL;
-}
-
-if (s->drive_kind == IDE_CFATA)
-s->mult_sectors = 0;
-else
-s->mult_sectors = MAX_MULT_SECTORS;
-/* ide regs */
-s->feature = 0;
-s->error = 0;
-s->nsector = 0;
-s->sector = 0;
-s->lcyl = 0;
-s->hcyl = 0;
-
-/* lba48 */
-s->hob_feature = 0;
-s->hob_sector = 0;
-s->hob_nsector = 0;
-s->hob_lcyl = 0;
-s->hob_hcyl = 0;
-
-s->select = 0xa0;
-s->status = READY_STAT | SEEK_STAT;
-
-s->lba48 = 0;
-
-/* ATAPI specific */
-s->sense_key = 0;
-s->asc = 0;
-s->cdrom_changed = 0;
-s->packet_transfer_size = 0;
-s->elementary_transfer_size = 0;
-s->io_buffer_index = 0;
-s->cd_sector_size = 0;
-s->atapi_dma = 0;
-s->tray_locked = 0;
-s->tray_open = 0;
-/* ATA DMA state */
-s->io_buffer_size = 0;
-s->req_nb_sectors = 0;
-
-ide_set_signature(s);
-/* init the transfer handler so that 0x is returned on data
-   accesses */
-s->end_transfer_func = ide_dummy_transfer_stop;
-ide_dummy_transfer_stop(s);
-s->media_changed = 0;
-}
-
 void ide_bus_reset(IDEBus *bus)
 {
 bus->unit = 0;
-- 
2.4.3




[Qemu-devel] [PULL 01/11] ide: Prohibit RESET on IDE drives

2016-02-10 Thread John Snow
This command is meant for ATAPI devices only, prohibit acknowledging it with
a command aborted response when an IDE device is busy.

Signed-off-by: John Snow 
Reported-by: Kevin Wolf 
Reviewed-by: Stefan Hajnoczi 
Message-id: 1453225191-11871-2-git-send-email-js...@redhat.com
---
 hw/ide/core.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/hw/ide/core.c b/hw/ide/core.c
index 4c46453..88d5fab 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -1877,9 +1877,13 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
 return;
 }
 
-/* Only DEVICE RESET is allowed while BSY or/and DRQ are set */
-if ((s->status & (BUSY_STAT|DRQ_STAT)) && val != WIN_DEVICE_RESET)
-return;
+/* Only RESET is allowed while BSY and/or DRQ are set,
+ * and only to ATAPI devices. */
+if (s->status & (BUSY_STAT|DRQ_STAT)) {
+if (val != WIN_DEVICE_RESET || s->drive_kind != IDE_CD) {
+return;
+}
+}
 
 if (!ide_cmd_permitted(s, val)) {
 ide_abort_command(s);
-- 
2.4.3




[Qemu-devel] [PATCH 3/4] hw/ppc/spapr: Implement the h_set_xdabr hypercall

2016-02-10 Thread Thomas Huth
The H_SET_XDABR hypercall is similar to H_SET_DABR, but also sets
the extended DABR (DABRX) register.

Signed-off-by: Thomas Huth 
---
 hw/ppc/spapr_hcall.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 6b9d512..f14f849 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -366,6 +366,27 @@ static target_ulong h_set_dabr(PowerPCCPU *cpu, 
sPAPRMachineState *spapr,
 return H_SUCCESS;
 }
 
+static target_ulong h_set_xdabr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
+target_ulong opcode, target_ulong *args)
+{
+CPUState *cs = CPU(cpu);
+target_ulong dabrx = args[1];
+
+if (!has_spr(cpu, SPR_DABR) || !has_spr(cpu, SPR_DABRX)) {
+return H_HARDWARE;
+}
+
+if ((dabrx & ~0xfULL) != 0 || (dabrx & H_DABRX_HYPERVISOR) != 0
+|| (dabrx & (H_DABRX_KERNEL | H_DABRX_USER)) == 0) {
+return H_PARAMETER;
+}
+
+set_spr(cs, SPR_DABRX, dabrx, -1L);
+set_spr(cs, SPR_DABR, args[0], -1L);
+
+return H_SUCCESS;
+}
+
 #define FLAGS_REGISTER_VPA 0x2000ULL
 #define FLAGS_REGISTER_DTL 0x4000ULL
 #define FLAGS_REGISTER_SLBSHADOW   0x6000ULL
@@ -1024,6 +1045,7 @@ static void hypercall_register_types(void)
 /* processor register resource access h-calls */
 spapr_register_hypercall(H_SET_SPRG0, h_set_sprg0);
 spapr_register_hypercall(H_SET_DABR, h_set_dabr);
+spapr_register_hypercall(H_SET_XDABR, h_set_xdabr);
 spapr_register_hypercall(H_SET_MODE, h_set_mode);
 
 /* "debugger" hcalls (also used by SLOF). Note: We do -not- differenciate
-- 
1.8.3.1




[Qemu-devel] [PATCH 2/4] hw/ppc/spapr: Implement h_set_dabr

2016-02-10 Thread Thomas Huth
According to LoPAPR, h_set_dabr should simply set DABRX to 3
(if the register is available), and load the parameter into DABR.
If DABRX is not available, the hypervisor has to check the
"Breakpoint Translation" bit of the DABR register first.

Signed-off-by: Thomas Huth 
---
 hw/ppc/spapr_hcall.c | 26 +-
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 58103ef..6b9d512 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -38,6 +38,12 @@ static void set_spr(CPUState *cs, int spr, target_ulong 
value,
 run_on_cpu(cs, do_spr_sync, );
 }
 
+static bool has_spr(PowerPCCPU *cpu, int spr)
+{
+/* We can test whether the SPR is defined by checking for a valid name */
+return cpu->env.spr_cb[spr].name != NULL;
+}
+
 static inline bool valid_pte_index(CPUPPCState *env, target_ulong pte_index)
 {
 /*
@@ -344,8 +350,20 @@ static target_ulong h_set_sprg0(PowerPCCPU *cpu, 
sPAPRMachineState *spapr,
 static target_ulong h_set_dabr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
 {
-/* FIXME: actually implement this */
-return H_HARDWARE;
+CPUState *cs = CPU(cpu);
+
+if (!has_spr(cpu, SPR_DABR)) {
+return H_HARDWARE;  /* DABR register not available */
+}
+
+if (has_spr(cpu, SPR_DABRX)) {
+set_spr(cs, SPR_DABRX, 0x3, -1L);
+} else if (!(args[0] & 0x4)) {  /* Breakpoint Translation set? */
+return H_RESERVED_DABR;
+}
+
+set_spr(cs, SPR_DABR, args[0], -1L);
+return H_SUCCESS;
 }
 
 #define FLAGS_REGISTER_VPA 0x2000ULL
@@ -999,15 +1017,13 @@ static void hypercall_register_types(void)
 /* hcall-bulk */
 spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove);
 
-/* hcall-dabr */
-spapr_register_hypercall(H_SET_DABR, h_set_dabr);
-
 /* hcall-splpar */
 spapr_register_hypercall(H_REGISTER_VPA, h_register_vpa);
 spapr_register_hypercall(H_CEDE, h_cede);
 
 /* processor register resource access h-calls */
 spapr_register_hypercall(H_SET_SPRG0, h_set_sprg0);
+spapr_register_hypercall(H_SET_DABR, h_set_dabr);
 spapr_register_hypercall(H_SET_MODE, h_set_mode);
 
 /* "debugger" hcalls (also used by SLOF). Note: We do -not- differenciate
-- 
1.8.3.1




Re: [Qemu-devel] [PATCH] linux-user: Don't assert if guest tries shmdt(0)

2016-02-10 Thread Laurent Vivier


Le 09/02/2016 16:57, Peter Maydell a écrit :
> Our implementation of shmat() and shmdt() for linux-user was
> using "zero guest address" as its marker for "entry in the
> shm_regions[] array is not in use". This meant that if the
> guest did a shmdt(0) we would match on an unused array entry

Is shmdt(0) valid ?

I mean, if shmat() is called with shmaddr equal to 0:
"the system chooses a suitable (unused) address at which
to attach the segment."

and

"The to-be-detached segment must be currently attached with shmaddr
equal to the value returned by the attaching shmat() call."

Did you check shmat() can return 0 ?
(I think our mmap_find_vma() cannot return 0)

Why don't you fail on shmdt(0) (EINVAL) ?

> and call page_set_flags() with both start and end addresses zero,
> which causes an assertion failure.
> 
> Use an explicit in_use flag to manage the shm_regions[] array,
> so that we avoid this problem.
> 
> Signed-off-by: Peter Maydell 
> Reported-by: Pavel Shamis 
> ---
>  linux-user/syscall.c | 12 +++-
>  1 file changed, 7 insertions(+), 5 deletions(-)
> 
> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> index 54ce14a..f46abf7 100644
> --- a/linux-user/syscall.c
> +++ b/linux-user/syscall.c
> @@ -2598,8 +2598,9 @@ static abi_long do_socketcall(int num, abi_ulong vptr)
>  #define N_SHM_REGIONS32
>  
>  static struct shm_region {
> -abi_ulongstart;
> -abi_ulongsize;
> +abi_ulong start;
> +abi_ulong size;
> +bool in_use;
>  } shm_regions[N_SHM_REGIONS];
>  
>  struct target_semid_ds
> @@ -3291,7 +3292,8 @@ static inline abi_ulong do_shmat(int shmid, abi_ulong 
> shmaddr, int shmflg)
> ((shmflg & SHM_RDONLY)? 0 : PAGE_WRITE));
>  
>  for (i = 0; i < N_SHM_REGIONS; i++) {
> -if (shm_regions[i].start == 0) {
> +if (!shm_regions[i].in_use) {
> +shm_regions[i].in_use = true;
>  shm_regions[i].start = raddr;
>  shm_regions[i].size = shm_info.shm_segsz;
>  break;
> @@ -3308,8 +3310,8 @@ static inline abi_long do_shmdt(abi_ulong shmaddr)
>  int i;
>  
>  for (i = 0; i < N_SHM_REGIONS; ++i) {
> -if (shm_regions[i].start == shmaddr) {
> -shm_regions[i].start = 0;
> +if (shm_regions[i].in_use && shm_regions[i].start == shmaddr) {
> +shm_regions[i].in_use = false;
>  page_set_flags(shmaddr, shmaddr + shm_regions[i].size, 0);
>  break;
>  }
> 



[Qemu-devel] [PATCH v6 06/16] nbd: convert to using I/O channels for actual socket I/O

2016-02-10 Thread Daniel P. Berrange
Now that all callers are converted to use I/O channels for
initial connection setup, it is possible to switch the core
NBD protocol handling core over to use QIOChannel APIs for
actual sockets I/O.

Signed-off-by: Daniel P. Berrange 
---
 block/nbd-client.c  |  19 +++
 blockdev-nbd.c  |   6 +--
 include/block/nbd.h |  20 ---
 nbd/client.c|  40 +++---
 nbd/common.c|  68 ++--
 nbd/nbd-internal.h  |  18 +++
 nbd/server.c| 150 +---
 qemu-nbd.c  |  10 ++--
 8 files changed, 180 insertions(+), 151 deletions(-)

diff --git a/block/nbd-client.c b/block/nbd-client.c
index c4379a9..75bb2d9 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -80,7 +80,7 @@ static void nbd_reply_ready(void *opaque)
  * that another thread has done the same thing in parallel, so
  * the socket is not readable anymore.
  */
-ret = nbd_receive_reply(s->sioc->fd, >reply);
+ret = nbd_receive_reply(s->ioc, >reply);
 if (ret == -EAGAIN) {
 return;
 }
@@ -131,6 +131,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
 }
 }
 
+g_assert(qemu_in_coroutine());
 assert(i < MAX_NBD_REQUESTS);
 request->handle = INDEX_TO_HANDLE(s, i);
 
@@ -146,17 +147,17 @@ static int nbd_co_send_request(BlockDriverState *bs,
nbd_reply_ready, nbd_restart_write, bs);
 if (qiov) {
 qio_channel_set_cork(s->ioc, true);
-rc = nbd_send_request(s->sioc->fd, request);
+rc = nbd_send_request(s->ioc, request);
 if (rc >= 0) {
-ret = qemu_co_sendv(s->sioc->fd, qiov->iov, qiov->niov,
-offset, request->len);
+ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov,
+   offset, request->len, 0);
 if (ret != request->len) {
 rc = -EIO;
 }
 }
 qio_channel_set_cork(s->ioc, false);
 } else {
-rc = nbd_send_request(s->sioc->fd, request);
+rc = nbd_send_request(s->ioc, request);
 }
 aio_set_fd_handler(aio_context, s->sioc->fd, false,
nbd_reply_ready, NULL, bs);
@@ -180,8 +181,8 @@ static void nbd_co_receive_reply(NbdClientSession *s,
 reply->error = EIO;
 } else {
 if (qiov && reply->error == 0) {
-ret = qemu_co_recvv(s->sioc->fd, qiov->iov, qiov->niov,
-offset, request->len);
+ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov,
+   offset, request->len, 1);
 if (ret != request->len) {
 reply->error = EIO;
 }
@@ -388,7 +389,7 @@ void nbd_client_close(BlockDriverState *bs)
 return;
 }
 
-nbd_send_request(client->sioc->fd, );
+nbd_send_request(client->ioc, );
 
 nbd_teardown_connection(bs);
 }
@@ -403,7 +404,7 @@ int nbd_client_init(BlockDriverState *bs, QIOChannelSocket 
*sioc,
 logout("session init %s\n", export);
 qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);
 
-ret = nbd_receive_negotiate(sioc->fd, export,
+ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
 >nbdflags, >size, errp);
 if (ret < 0) {
 logout("Failed to negotiate with the NBD server\n");
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index 9baf883..2148753 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -27,7 +27,6 @@ static gboolean nbd_accept(QIOChannel *ioc, GIOCondition 
condition,
gpointer opaque)
 {
 QIOChannelSocket *cioc;
-int fd;
 
 cioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(ioc),
  NULL);
@@ -35,10 +34,7 @@ static gboolean nbd_accept(QIOChannel *ioc, GIOCondition 
condition,
 return TRUE;
 }
 
-fd = dup(cioc->fd);
-if (fd >= 0) {
-nbd_client_new(NULL, fd, nbd_client_put);
-}
+nbd_client_new(NULL, cioc, nbd_client_put);
 object_unref(OBJECT(cioc));
 return TRUE;
 }
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 7eccb41..1080ef8 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -23,6 +23,7 @@
 
 #include "qemu-common.h"
 #include "qemu/option.h"
+#include "io/channel-socket.h"
 
 struct nbd_request {
 uint32_t magic;
@@ -73,12 +74,17 @@ enum {
 /* Maximum size of a single READ/WRITE data buffer */
 #define NBD_MAX_BUFFER_SIZE (32 * 1024 * 1024)
 
-ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read);
-int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
+ssize_t nbd_wr_syncv(QIOChannel *ioc,
+ struct iovec *iov,
+ size_t niov,
+ size_t offset,
+ size_t length,
+ bool do_read);
+int 

[Qemu-devel] [PATCH v6 15/16] nbd: enable use of TLS with qemu-nbd server

2016-02-10 Thread Daniel P. Berrange
This modifies the qemu-nbd program so that it is possible to
request the use of TLS with the server. It simply adds a new
command line option --tls-creds which is used to provide the
ID of a QCryptoTLSCreds object previously created via the
--object command line option.

For example

  qemu-nbd --object tls-creds-x509,id=tls0,endpoint=server,\
dir=/home/berrange/security/qemutls \
   --tls-creds tls0 \
   --exportname default

TLS requires the new style NBD protocol, so if no export name
is set (via --export-name), then we use the default NBD protocol
export name ""

TLS is only supported when using an IPv4/IPv6 socket listener.
It is not possible to use with UNIX sockets, which includes
when connecting the NBD server to a host device.

Signed-off-by: Daniel P. Berrange 
---
 qemu-nbd.c| 62 ++-
 qemu-nbd.texi |  4 
 2 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/qemu-nbd.c b/qemu-nbd.c
index 8acd515..933ca4a 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -42,6 +42,7 @@
 #define QEMU_NBD_OPT_DISCARD   3
 #define QEMU_NBD_OPT_DETECT_ZEROES 4
 #define QEMU_NBD_OPT_OBJECT5
+#define QEMU_NBD_OPT_TLSCREDS  6
 
 static NBDExport *exp;
 static bool newproto;
@@ -54,6 +55,7 @@ static int shared = 1;
 static int nb_fds;
 static QIOChannelSocket *server_ioc;
 static int server_watch = -1;
+static QCryptoTLSCreds *tlscreds;
 
 static void usage(const char *name)
 {
@@ -342,7 +344,7 @@ static gboolean nbd_accept(QIOChannel *ioc, GIOCondition 
cond, gpointer opaque)
 nb_fds++;
 nbd_update_server_watch();
 nbd_client_new(newproto ? NULL : exp, cioc,
-   NULL, NULL, nbd_client_closed);
+   tlscreds, NULL, nbd_client_closed);
 object_unref(OBJECT(cioc));
 
 return TRUE;
@@ -402,6 +404,37 @@ static QemuOptsList qemu_object_opts = {
 };
 
 
+
+static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp)
+{
+Object *obj;
+QCryptoTLSCreds *creds;
+
+obj = object_resolve_path_component(
+object_get_objects_root(), id);
+if (!obj) {
+error_setg(errp, "No TLS credentials with id '%s'",
+   id);
+return NULL;
+}
+creds = (QCryptoTLSCreds *)
+object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS);
+if (!creds) {
+error_setg(errp, "Object with id '%s' is not TLS credentials",
+   id);
+return NULL;
+}
+
+if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) {
+error_setg(errp,
+   "Expecting TLS credentials with a server endpoint");
+return NULL;
+}
+object_ref(obj);
+return creds;
+}
+
+
 int main(int argc, char **argv)
 {
 BlockBackend *blk;
@@ -441,6 +474,7 @@ int main(int argc, char **argv)
 { "verbose", 0, NULL, 'v' },
 { "object", 1, NULL, QEMU_NBD_OPT_OBJECT },
 { "export-name", 1, NULL, 'x' },
+{ "tls-creds", 1, NULL, QEMU_NBD_OPT_TLSCREDS },
 { NULL, 0, NULL, 0 }
 };
 int ch;
@@ -458,6 +492,7 @@ int main(int argc, char **argv)
 BlockdevDetectZeroesOptions detect_zeroes = 
BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF;
 QDict *options = NULL;
 const char *export_name = NULL;
+const char *tlscredsid = NULL;
 
 /* The client thread uses SIGTERM to interrupt the server.  A signal
  * handler ensures that "qemu-nbd -v -c" exits with a nice status code.
@@ -634,6 +669,9 @@ int main(int argc, char **argv)
 exit(EXIT_FAILURE);
 }
 }   break;
+case QEMU_NBD_OPT_TLSCREDS:
+tlscredsid = optarg;
+break;
 }
 }
 
@@ -650,6 +688,28 @@ int main(int argc, char **argv)
 exit(EXIT_FAILURE);
 }
 
+if (tlscredsid) {
+if (sockpath) {
+error_report("TLS is only supported with IPv4/IPv6");
+exit(EXIT_FAILURE);
+}
+if (device) {
+error_report("TLS is not supported with a host device");
+exit(EXIT_FAILURE);
+}
+if (!export_name) {
+/* Set the default NBD protocol export name, since
+ * we *must* use new style protocol for TLS */
+export_name = "";
+}
+tlscreds = nbd_get_tls_creds(tlscredsid, _err);
+if (local_err) {
+error_report("Failed to get TLS creds %s",
+ error_get_pretty(local_err));
+exit(EXIT_FAILURE);
+}
+}
+
 if (disconnect) {
 int nbdfd = open(argv[optind], O_RDWR);
 if (nbdfd < 0) {
diff --git a/qemu-nbd.texi b/qemu-nbd.texi
index 2516963..417f9c6 100644
--- a/qemu-nbd.texi
+++ b/qemu-nbd.texi
@@ -76,6 +76,10 @@ Don't exit on the last connection
 @item -x NAME, --export-name=NAME
 Set the NDB volume export name. This switches the server to use
 the new style NBD protocol negotiation
+@item 

[Qemu-devel] make distclean can fail do to a configuration check

2016-02-10 Thread John Snow
Stuff like this:

> ~/s/q/b/git> make distclean
> config-host.mak is out-of-date, running configure
>
> ERROR: sizeof(size_t) doesn't match GLIB_SIZEOF_SIZE_T.
>You probably need to set PKG_CONFIG_LIBDIR
>to point to the right pkg-config files for your
>build target
>
> Makefile:35: recipe for target 'config-host.mak' failed
> make: *** [config-host.mak] Error 1`

is obnoxious. We had patches from Fam to allow some targets to bypass
the configuration check, did those die? Did we not want them for some
reason?

--js



[Qemu-devel] [PATCH v6 13/16] nbd: implement TLS support in the protocol negotiation

2016-02-10 Thread Daniel P. Berrange
This extends the NBD protocol handling code so that it is capable
of negotiating TLS support during the connection setup. This involves
requesting the STARTTLS protocol option before any other NBD options.

Signed-off-by: Daniel P. Berrange 
---
 block/nbd-client.c  |  12 +++--
 blockdev-nbd.c  |   2 +-
 include/block/nbd.h |   8 
 nbd/client.c| 136 +++-
 nbd/common.c|  15 ++
 nbd/nbd-internal.h  |  14 ++
 nbd/server.c| 118 ++---
 qemu-nbd.c  |   4 +-
 8 files changed, 296 insertions(+), 13 deletions(-)

diff --git a/block/nbd-client.c b/block/nbd-client.c
index 75bb2d9..1c79e4b 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -405,7 +405,10 @@ int nbd_client_init(BlockDriverState *bs, QIOChannelSocket 
*sioc,
 qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);
 
 ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
->nbdflags, >size, errp);
+>nbdflags,
+NULL, NULL,
+>ioc,
+>size, errp);
 if (ret < 0) {
 logout("Failed to negotiate with the NBD server\n");
 return ret;
@@ -415,8 +418,11 @@ int nbd_client_init(BlockDriverState *bs, QIOChannelSocket 
*sioc,
 qemu_co_mutex_init(>free_sema);
 client->sioc = sioc;
 object_ref(OBJECT(client->sioc));
-client->ioc = QIO_CHANNEL(sioc);
-object_ref(OBJECT(client->ioc));
+
+if (!client->ioc) {
+client->ioc = QIO_CHANNEL(sioc);
+object_ref(OBJECT(client->ioc));
+}
 
 /* Now that we're connected, set the socket to be non-blocking and
  * kick the reply mechanism.  */
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index 2148753..f181840 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -34,7 +34,7 @@ static gboolean nbd_accept(QIOChannel *ioc, GIOCondition 
condition,
 return TRUE;
 }
 
-nbd_client_new(NULL, cioc, nbd_client_put);
+nbd_client_new(NULL, cioc, NULL, NULL, nbd_client_put);
 object_unref(OBJECT(cioc));
 return TRUE;
 }
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 1080ef8..b197adc 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -24,6 +24,7 @@
 #include "qemu-common.h"
 #include "qemu/option.h"
 #include "io/channel-socket.h"
+#include "crypto/tlscreds.h"
 
 struct nbd_request {
 uint32_t magic;
@@ -56,7 +57,10 @@ struct nbd_reply {
 #define NBD_REP_ACK (1) /* Data sending finished. */
 #define NBD_REP_SERVER  (2) /* Export description. */
 #define NBD_REP_ERR_UNSUP   ((UINT32_C(1) << 31) | 1) /* Unknown option. */
+#define NBD_REP_ERR_POLICY  ((UINT32_C(1) << 31) | 2) /* Server denied */
 #define NBD_REP_ERR_INVALID ((UINT32_C(1) << 31) | 3) /* Invalid length. */
+#define NBD_REP_ERR_TLS_REQD((UINT32_C(1) << 31) | 5) /* TLS required */
+
 
 #define NBD_CMD_MASK_COMMAND   0x
 #define NBD_CMD_FLAG_FUA   (1 << 16)
@@ -81,6 +85,8 @@ ssize_t nbd_wr_syncv(QIOChannel *ioc,
  size_t length,
  bool do_read);
 int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint32_t *flags,
+  QCryptoTLSCreds *tlscreds, const char *hostname,
+  QIOChannel **outioc,
   off_t *size, Error **errp);
 int nbd_init(int fd, QIOChannelSocket *sioc, uint32_t flags, off_t size);
 ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request);
@@ -106,6 +112,8 @@ void nbd_export_close_all(void);
 
 void nbd_client_new(NBDExport *exp,
 QIOChannelSocket *sioc,
+QCryptoTLSCreds *tlscreds,
+const char *tlsaclname,
 void (*close)(NBDClient *));
 void nbd_client_get(NBDClient *client);
 void nbd_client_put(NBDClient *client);
diff --git a/nbd/client.c b/nbd/client.c
index 5e47ac7..9e5b651 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -83,10 +83,18 @@ static int nbd_handle_reply_err(uint32_t opt, uint32_t 
type, Error **errp)
 error_setg(errp, "Unsupported option type %x", opt);
 break;
 
+case NBD_REP_ERR_POLICY:
+error_setg(errp, "Denied by server for option %x", opt);
+break;
+
 case NBD_REP_ERR_INVALID:
 error_setg(errp, "Invalid data length for option %x", opt);
 break;
 
+case NBD_REP_ERR_TLS_REQD:
+error_setg(errp, "TLS negotiation required before option %x", opt);
+break;
+
 default:
 error_setg(errp, "Unknown error code when asking for option %x", opt);
 break;
@@ -242,17 +250,127 @@ static int nbd_receive_query_exports(QIOChannel *ioc,
 return 0;
 }
 
+static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
+   

[Qemu-devel] [PATCH v6 08/16] nbd: make server compliant with fixed newstyle spec

2016-02-10 Thread Daniel P. Berrange
If the client does not request the fixed new style protocol,
then we should only accept NBD_OPT_EXPORT_NAME. All other
options are only valid when fixed new style has been activated.

The qemu-nbd client doesn't currently request fixed new style
protocol, but this change won't break qemu-nbd, because it
fortunately only ever uses NBD_OPT_EXPORT_NAME, so was never
triggering the non-compliant server behaviour.

Signed-off-by: Daniel P. Berrange 
---
 nbd/server.c | 69 
 1 file changed, 46 insertions(+), 23 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index 15aa03d..074a1e6 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -310,6 +310,7 @@ fail:
 static int nbd_negotiate_options(NBDClient *client)
 {
 uint32_t flags;
+bool fixedNewstyle = false;
 
 /* Client sends:
 [ 0 ..   3]   client flags
@@ -332,14 +333,19 @@ static int nbd_negotiate_options(NBDClient *client)
 }
 TRACE("Checking client flags");
 be32_to_cpus();
-if (flags != 0 && flags != NBD_FLAG_C_FIXED_NEWSTYLE) {
-LOG("Bad client flags received");
+if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
+TRACE("Support supports fixed newstyle handshake");
+fixedNewstyle = true;
+flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
+}
+if (flags != 0) {
+TRACE("Unknown client flags 0x%x received", flags);
 return -EIO;
 }
 
 while (1) {
 int ret;
-uint32_t tmp, length;
+uint32_t clientflags, length;
 uint64_t magic;
 
 if (nbd_negotiate_read(client->ioc, , sizeof(magic)) !=
@@ -353,10 +359,12 @@ static int nbd_negotiate_options(NBDClient *client)
 return -EINVAL;
 }
 
-if (nbd_negotiate_read(client->ioc, , sizeof(tmp)) != sizeof(tmp)) 
{
+if (nbd_negotiate_read(client->ioc, ,
+   sizeof(clientflags)) != sizeof(clientflags)) {
 LOG("read failed");
 return -EINVAL;
 }
+clientflags = be32_to_cpu(clientflags);
 
 if (nbd_negotiate_read(client->ioc, , sizeof(length)) !=
 sizeof(length)) {
@@ -365,26 +373,41 @@ static int nbd_negotiate_options(NBDClient *client)
 }
 length = be32_to_cpu(length);
 
-TRACE("Checking option");
-switch (be32_to_cpu(tmp)) {
-case NBD_OPT_LIST:
-ret = nbd_negotiate_handle_list(client, length);
-if (ret < 0) {
-return ret;
+TRACE("Checking option 0x%x", clientflags);
+if (fixedNewstyle) {
+switch (clientflags) {
+case NBD_OPT_LIST:
+ret = nbd_negotiate_handle_list(client, length);
+if (ret < 0) {
+return ret;
+}
+break;
+
+case NBD_OPT_ABORT:
+return -EINVAL;
+
+case NBD_OPT_EXPORT_NAME:
+return nbd_negotiate_handle_export_name(client, length);
+
+default:
+TRACE("Unsupported option 0x%x", clientflags);
+nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_UNSUP,
+   clientflags);
+return -EINVAL;
+}
+} else {
+/*
+ * If broken new-style we should drop the connection
+ * for anything except NBD_OPT_EXPORT_NAME
+ */
+switch (clientflags) {
+case NBD_OPT_EXPORT_NAME:
+return nbd_negotiate_handle_export_name(client, length);
+
+default:
+TRACE("Unsupported option 0x%x", clientflags);
+return -EINVAL;
 }
-break;
-
-case NBD_OPT_ABORT:
-return -EINVAL;
-
-case NBD_OPT_EXPORT_NAME:
-return nbd_negotiate_handle_export_name(client, length);
-
-default:
-tmp = be32_to_cpu(tmp);
-LOG("Unsupported option 0x%x", tmp);
-nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_UNSUP, tmp);
-return -EINVAL;
 }
 }
 }
-- 
2.5.0




Re: [Qemu-devel] [PULL v3 00/33] Misc patches for 2016-02-08

2016-02-10 Thread John Snow


On 02/10/2016 07:48 AM, Paolo Bonzini wrote:
> 
> 
> On 09/02/2016 17:13, Paolo Bonzini wrote:
>> The following changes since commit ac1be2ae6b2995b99430c48329eb971b0281acf1:
>>
>>   Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2016-02-09' 
>> into staging (2016-02-09 11:42:43 +)
>>
>> are available in the git repository at:
>>
>>   git://github.com/bonzini/qemu.git tags/for-upstream
>>
>> for you to fetch changes up to 150dcd1aed6f9ebcf370dbb9b666e7d7c6d908e2:
>>
>>   qemu-char, io: fix ordering of arguments for UDP socket creation 
>> (2016-02-09 17:09:15 +0100)
>>
>> 
>> * switch to C11 atomics (Alex)
>> * Coverity fixes for IPMI (Corey), i386 (Paolo), qemu-char (Paolo)
>> * at long last, fail on wrong .pc files if -m32 is in use (Daniel)
>> * qemu-char regression fix (Daniel)
>> * SAS1068 device (Paolo)
>> * memory region docs improvements (Peter)
>> * target-i386 cleanups (Richard)
>> * qemu-nbd docs improvements (Sitsofe)
>> * thread-safe memory hotplug (Stefan)
>>
>> 
>>
>> v2->v3: rebased due to semantic conflict, split MAINTAINERS patch
>>
>> Alex Bennée (1):
>>   include/qemu/atomic.h: default to __atomic functions
>>
>> Andrew Jones (1):
>>   kvm-all: trace: strerror fixup
>>
>> Corey Minyard (2):
>>   ipmi_bmc_sim: Fix off by one in check.
>>   ipmi_bmc_sim: Add break to correct watchdog NMI check
>>
>> Daniel P. Berrange (2):
>>   configure: sanity check the glib library that pkg-config finds

Appears to break clang 3.5.0 on F22;

jsnow@scv ((977a82a...)) ~/s/q/b/git> ../../configure --cxx=clang++
--cc=clang --host-cc=clang --extra-cflags=-Werror
--extra-cflags=-fsanitize=undefined
--extra-cflags=-Wno-deprecated-declarations
--extra-cflags=-fno-sanitize=float-divide-by-zero
--target-list="x86_64-softmmu"

ERROR: sizeof(size_t) doesn't match GLIB_SIZEOF_SIZE_T.
   You probably need to set PKG_CONFIG_LIBDIR
   to point to the right pkg-config files for your
   build target



Problem appears to be this:

/usr/include/glib-2.0/glib/gmem.h:76:58: error: unknown attribute
'__alloc_size__' ignored [-Werror,-Wunknown-attributes]
gpointer g_malloc (gsize n_bytes) G_GNUC_MALLOC
G_GNUC_ALLOC_SIZE(1);
^
/usr/include/glib-2.0/glib/gmacros.h:67:45: note: expanded from macro
'G_GNUC_ALLOC_SIZE'
#define G_GNUC_ALLOC_SIZE(x) __attribute__((__alloc_size__(x)))


>>   char: fix repeated registration of tcp chardev I/O handlers
>>
>> Janosch Frank (1):
>>   scripts/kvm/kvm_stat: Fix tracefs access checking
>>
>> John Snow (1):
>>   nbd: avoid unaligned uint64_t store
>>
>> Paolo Bonzini (9):
>>   memory: add early bail out from cpu_physical_memory_set_dirty_range
>>   qemu-char: Keep pty slave file descriptor open until the master is 
>> closed
>>   scsi: push WWN fields up to SCSIDevice
>>   scsi-generic: grab device and port SAS addresses from backend
>>   hw: Add support for LSI SAS1068 (mptsas) device
>>   ipmi: do not take/drop iothread lock
>>   target-i386: fix PSE36 mode
>>   get_maintainer.pl: fall back to git if only lists are found
>>   qemu-char, io: fix ordering of arguments for UDP socket creation
>>
>> Peter Maydell (1):
>>   docs/memory.txt: Improve list of different memory regions
>>
>> Richard Henderson (10):
>>   target-i386: Create gen_lea_v_seg
>>   target-i386: Introduce mo_stacksize
>>   target-i386: Use gen_lea_v_seg in gen_lea_modrm
>>   target-i386: Use gen_lea_v_seg in stack subroutines
>>   target-i386: Access segs via TCG registers
>>   target-i386: Use gen_lea_v_seg in pusha/popa
>>   target-i386: Rewrite gen_enter inline
>>   target-i386: Rewrite leave
>>   target-i386: Tidy gen_add_A0_im
>>   target-i386: Deconstruct the cpu_T array
>>
>> Sitsofe Wheeler (3):
>>   qemu-nbd: Fix unintended texi verbatim formatting
>>   qemu-nbd: Minor texi updates
>>   qemu-nbd: Fix texi sentence capitalisation
>>
>> Stefan Hajnoczi (1):
>>   memory: RCU ram_list.dirty_memory[] for safe RAM hotplug
>>
>> Stephen Warren (1):
>>   MAINTAINERS: add all-match entry for qemu-devel@
>>
>>  MAINTAINERS   |5 +
>>  configure |   24 +
>>  default-configs/pci.mak   |1 +
>>  docs/memory.txt   |   26 +-
>>  exec.c|   75 +-
>>  hw/ipmi/ipmi.c|2 -
>>  hw/ipmi/ipmi_bmc_sim.c|4 +-
>>  hw/scsi/Makefile.objs |1 +
>>  hw/scsi/mpi.h | 1153 ++
>>  hw/scsi/mptconfig.c   |  904 
>>  hw/scsi/mptendian.c   |  204 ++
>>  hw/scsi/mptsas.c  | 1441 +
>>  hw/scsi/mptsas.h  |  100 +++
>>  hw/scsi/scsi-disk.c   |   23 +-
>>  

[Qemu-devel] [PULL 07/11] fdc: always compile-check debug prints

2016-02-10 Thread John Snow
Coverity noticed that some variables are only used by debug prints, and
called them unused. Always compile the print statements. While we're
here, print to stderr as well.

Bonus: Fix a debug printf I broke in f31937aa8

Signed-off-by: John Snow 
Reviewed-by: Eric Blake 
[Touched up commit message. --js]
Message-id: 1454971529-14830-1-git-send-email-js...@redhat.com
---
 hw/block/fdc.c | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index a6f22ef..9838d21 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -41,14 +41,15 @@
 
 //
 /* debug Floppy devices */
-//#define DEBUG_FLOPPY
 
-#ifdef DEBUG_FLOPPY
+#define DEBUG_FLOPPY 0
+
 #define FLOPPY_DPRINTF(fmt, ...)\
-do { printf("FLOPPY: " fmt , ## __VA_ARGS__); } while (0)
-#else
-#define FLOPPY_DPRINTF(fmt, ...)
-#endif
+do {\
+if (DEBUG_FLOPPY) { \
+fprintf(stderr, "FLOPPY: " fmt , ## __VA_ARGS__);   \
+}   \
+} while (0)
 
 //
 /* Floppy drive emulation   */
@@ -353,7 +354,7 @@ static int pick_geometry(FDrive *drv)
 parse = _formats[size_match];
 FLOPPY_DPRINTF("User requested floppy drive type '%s', "
"but inserted medium appears to be a "
-   "%d sector '%s' type\n",
+   "%"PRId64" sector '%s' type\n",
FloppyDriveType_lookup[drv->drive],
nb_sectors,
FloppyDriveType_lookup[parse->drive]);
-- 
2.4.3




[Qemu-devel] [PULL 08/11] ahci: Do not unmap NULL addresses

2016-02-10 Thread John Snow
Definitely don't try to unmap a garbage address.

Reported-by: Zuozhi fzz 
Signed-off-by: John Snow 
Message-id: 1454103689-13042-2-git-send-email-js...@redhat.com
---
 hw/ide/ahci.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 7e87b18..3a95dad 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -662,6 +662,10 @@ static bool ahci_map_fis_address(AHCIDevice *ad)
 
 static void ahci_unmap_fis_address(AHCIDevice *ad)
 {
+if (ad->res_fis == NULL) {
+DPRINTF(ad->port_no, "Attempt to unmap NULL FIS address\n");
+return;
+}
 dma_memory_unmap(ad->hba->as, ad->res_fis, 256,
  DMA_DIRECTION_FROM_DEVICE, 256);
 ad->res_fis = NULL;
@@ -678,6 +682,10 @@ static bool ahci_map_clb_address(AHCIDevice *ad)
 
 static void ahci_unmap_clb_address(AHCIDevice *ad)
 {
+if (ad->lst == NULL) {
+DPRINTF(ad->port_no, "Attempt to unmap NULL CLB address\n");
+return;
+}
 dma_memory_unmap(ad->hba->as, ad->lst, 1024,
  DMA_DIRECTION_FROM_DEVICE, 1024);
 ad->lst = NULL;
-- 
2.4.3




[Qemu-devel] [PATCH v6 00/16] Implement TLS support to QEMU NBD server & client

2016-02-10 Thread Daniel P. Berrange
This is an update of the series previously posted:

  v1: https://lists.gnu.org/archive/html/qemu-devel/2015-11/msg06126.html
v2: https://lists.gnu.org/archive/html/qemu-devel/2016-01/msg01580.html
  v3: https://lists.gnu.org/archive/html/qemu-devel/2016-01/msg03440.html
v4: https://lists.gnu.org/archive/html/qemu-devel/2016-01/msg04160.html

This series of patches implements support for TLS in the QEMU NBD
server and client code.

It is implementing the NBD_OPT_STARTTLS option that was previously
discussed here:

  https://www.redhat.com/archives/libvir-list/2014-October/msg00506.html

And is also described in the NBD spec here:

  https://github.com/yoe/nbd/blob/master/doc/proto.md

To ensure that clients always get a suitable error message from the
NBD server when it is configured with TLS, a client speaking the
new style protocol will always send NBD_OPT_LIST as the first thing
it does, so that we can see the NBD_REP_ERR_TLS_REQD response. This
should all be backwards & forwards compatible with previous QEMU
impls of NBD

Usage of TLS is described in the commit messages for each patch,
but for sake of people who don't want to explore the series, here's
the summary

Starting QEMU system emulator with a disk backed by an TLS encrypted
NBD export

  $ qemu-system-x86_64 \
 -object 
tls-creds-x509,id=tls0,endpoint=client,dir=/home/berrange/security/qemutls \
 -drive driver=nbd,host=localhost,port=9000,tls-creds=tls0

Starting a standalone NBD server providing a TLS encrypted NBD export

  $ qemu-nbd \
 --object 
tls-creds-x509,id=tls0,endpoint=server,dir=/home/berrange/security/qemutls
 --tls-creds tls0 \
 --export-name default \
 $IMAGEFILE

The --export-name is optional, if omitted, the default "" will
be used.

Starting a QEMU system emulator built-in NBD server

  $ qemu-system-x86_64 \
 -qmp unix:/tmp/qmp,server \
 -hda /home/berrange/Fedora-Server-netinst-x86_64-23.iso \
 -object 
tls-creds-x509,id=tls0,dir=/home/berrange/security/qemutls,endpoint=server

  $ qmp-shell /tmp/qmp
 (qmp) nbd-server-start addr={"host":"localhost","port":"9000"}
 tls-creds=tls0
 (qmp) nbd-server-add device=ide0-hd0

The first 2 patches are taken from this other pending patch
series in order to facilitate merge:

  https://lists.gnu.org/archive/html/qemu-devel/2016-02/msg00296.html

The first 4 patches are the conversion to the I/O channels
framework.

The next 6 patches are general tweaks to QEMU's impl of the
NBD protocol for better compliance and/or future proofing.

The next patch provides the NBD protocol TLS implementation.

The final 3 patches allow TLS to be enabled in the QEMU NBD
client and servers.

Changed in v6:

 - Rebase to resolve conflicts with recent qapi changes (Eric)
   and recent nbd changes (Paolo) which merged
 - Add MODULE_INIT_QOM to qemu-io & qemu-img to ensure they
   can load the QIOChannel object types

Changed in v5:

 - Pulled in https://lists.gnu.org/archive/html/qemu-devel/2016-02/msg00297.html
   and applied fixes for issues Eric mentioned in that review
 - Pulled in https://lists.gnu.org/archive/html/qemu-devel/2016-02/msg00302.html
 - Rebased to latest git master

Changed in v4:

 - Don't pick the first export name in the list if no export
   name is provided (Paolo)
 - Set client requested export name to "" if none is provided
   by the user (Paolo)
 - Set server advertized export name to "" if TLS is enabled
   and none is provided by the user (Paolo)
 - Rename qemu-nbd --exportname to --export-name (Paolo)
 - Use iov_discard_front() to simplify iov handling (Paolo)

Changed in v3:

 - Rebase to resolve conflicts with recently merged NBD patches

Changed in v2:

 - Fix error codes used during NBD TLS option negotiate
 - Update patch with helpers for UserCreatable object types

Daniel P. Berrange (16):
  qom: add helpers for UserCreatable object types
  qemu-nbd: add support for --object command line arg
  nbd: convert block client to use I/O channels for connection setup
  nbd: convert qemu-nbd server to use I/O channels for connection setup
  nbd: convert blockdev NBD server to use I/O channels for connection
setup
  nbd: convert to using I/O channels for actual socket I/O
  nbd: invert client logic for negotiating protocol version
  nbd: make server compliant with fixed newstyle spec
  nbd: make client request fixed new style if advertized
  nbd: allow setting of an export name for qemu-nbd server
  nbd: always query export list in fixed new style protocol
  nbd: use "" as a default export name if none provided
  nbd: implement TLS support in the protocol negotiation
  nbd: enable use of TLS with NBD block driver
  nbd: enable use of TLS with qemu-nbd server
  nbd: enable use of TLS with nbd-server-start command

 Makefile|   6 +-
 block/nbd-client.c  |  91 ++---
 block/nbd-client.h  |  10 +-
 block/nbd.c | 105 --
 blockdev-nbd.c

[Qemu-devel] [PATCH v6 02/16] qemu-nbd: add support for --object command line arg

2016-02-10 Thread Daniel P. Berrange
Allow creation of user creatable object types with qemu-nbd
via a new --object command line arg. This will be used to supply
passwords and/or encryption keys to the various block driver
backends via the recently added 'secret' object type.

 # printf letmein > mypasswd.txt
 # qemu-nbd --object secret,id=sec0,file=mypasswd.txt \
  ...other nbd args...

Reviewed-by: Eric Blake 
Signed-off-by: Daniel P. Berrange 
---
 qemu-nbd.c| 34 ++
 qemu-nbd.texi |  6 ++
 2 files changed, 40 insertions(+)

diff --git a/qemu-nbd.c b/qemu-nbd.c
index d374015..130c306 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -24,9 +24,11 @@
 #include "qemu/main-loop.h"
 #include "qemu/sockets.h"
 #include "qemu/error-report.h"
+#include "qemu/config-file.h"
 #include "block/snapshot.h"
 #include "qapi/util.h"
 #include "qapi/qmp/qstring.h"
+#include "qom/object_interfaces.h"
 
 #include 
 #include 
@@ -41,6 +43,7 @@
 #define QEMU_NBD_OPT_AIO   2
 #define QEMU_NBD_OPT_DISCARD   3
 #define QEMU_NBD_OPT_DETECT_ZEROES 4
+#define QEMU_NBD_OPT_OBJECT5
 
 static NBDExport *exp;
 static int verbose;
@@ -74,6 +77,9 @@ static void usage(const char *name)
 "  -o, --offset=OFFSET   offset into the image\n"
 "  -P, --partition=NUM   only expose partition NUM\n"
 "\n"
+"General purpose options:\n"
+"  --object type,id=ID,...   define an object such as 'secret' for providing\n"
+"passwords and/or encryption keys\n"
 #ifdef __linux__
 "Kernel NBD client support:\n"
 "  -c, --connect=DEV connect FILE to the local NBD device DEV\n"
@@ -371,6 +377,16 @@ static SocketAddress *nbd_build_socket_address(const char 
*sockpath,
 }
 
 
+static QemuOptsList qemu_object_opts = {
+.name = "object",
+.implied_opt_name = "qom-type",
+.head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
+.desc = {
+{ }
+},
+};
+
+
 int main(int argc, char **argv)
 {
 BlockBackend *blk;
@@ -408,6 +424,7 @@ int main(int argc, char **argv)
 { "format", 1, NULL, 'f' },
 { "persistent", 0, NULL, 't' },
 { "verbose", 0, NULL, 'v' },
+{ "object", 1, NULL, QEMU_NBD_OPT_OBJECT },
 { NULL, 0, NULL, 0 }
 };
 int ch;
@@ -433,6 +450,8 @@ int main(int argc, char **argv)
 memset(_sigterm, 0, sizeof(sa_sigterm));
 sa_sigterm.sa_handler = termsig_handler;
 sigaction(SIGTERM, _sigterm, NULL);
+module_call_init(MODULE_INIT_QOM);
+qemu_add_opts(_object_opts);
 qemu_init_exec_dir(argv[0]);
 
 while ((ch = getopt_long(argc, argv, sopt, lopt, _ind)) != -1) {
@@ -588,6 +607,14 @@ int main(int argc, char **argv)
 case '?':
 error_report("Try `%s --help' for more information.", argv[0]);
 exit(EXIT_FAILURE);
+case QEMU_NBD_OPT_OBJECT: {
+QemuOpts *opts;
+opts = qemu_opts_parse_noisily(_object_opts,
+   optarg, true);
+if (!opts) {
+exit(EXIT_FAILURE);
+}
+}   break;
 }
 }
 
@@ -597,6 +624,13 @@ int main(int argc, char **argv)
 exit(EXIT_FAILURE);
 }
 
+if (qemu_opts_foreach(_object_opts,
+  user_creatable_add_opts_foreach,
+  NULL, _err)) {
+error_report_err(local_err);
+exit(EXIT_FAILURE);
+}
+
 if (disconnect) {
 fd = open(argv[optind], O_RDWR);
 if (fd < 0) {
diff --git a/qemu-nbd.texi b/qemu-nbd.texi
index 0027841..a56ebc3 100644
--- a/qemu-nbd.texi
+++ b/qemu-nbd.texi
@@ -18,6 +18,12 @@ Export a QEMU disk image using the NBD protocol.
 @var{dev} is an NBD device.
 
 @table @option
+@item --object type,id=@var{id},...props...
+Define a new instance of the @var{type} object class identified by @var{id}.
+See the @code{qemu(1)} manual page for full details of the properties
+supported. The common object type that it makes sense to define is the
+@code{secret} object, which is used to supply passwords and/or encryption
+keys.
 @item -p, --port=@var{port}
 The TCP port to listen on (default @samp{10809})
 @item -o, --offset=@var{offset}
-- 
2.5.0




  1   2   3   >