date:20160606



On 2016/5/26 22:55, Peter Maydell wrote:
> A half-shuffle operation takes a word with zeros in the high half:
>      ABCD EFGH IJKL MNOP
> and spreads the bits out so they are in every other bit of the word:
>  0A0B 0C0D 0E0F 0G0H 0I0J 0K0L 0M0N 0O0P
> A half-unshuffle performs the reverse operation.
> 
> Provide functions in bitops.h which implement these operations
> for 32-bit and 64-bit inputs, and add tests for them.
> 
> Signed-off-by: Peter Maydell 
> ---
>  include/qemu/bitops.h | 108 
> ++
>  tests/test-bitops.c   |  68 +++
>  2 files changed, 176 insertions(+)
> 
> diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
> index 755fdd1..15418a8 100644
> --- a/include/qemu/bitops.h
> +++ b/include/qemu/bitops.h
> @@ -428,4 +428,112 @@ static inline uint64_t deposit64(uint64_t value, int 
> start, int length,
>  return (value & ~mask) | ((fieldval << start) & mask);
>  }
>  
> +/**
> + * half_shuffle32:
> + * @value: 32-bit value (of which only the bottom 16 bits are of interest)
> + *
> + * Given an input value:
> + *      ABCD EFGH IJKL MNOP
> + * return the value where the bottom 16 bits are spread out into
> + * the odd bits in the word, and the even bits are zeroed:
> + *  0A0B 0C0D 0E0F 0G0H 0I0J 0K0L 0M0N 0O0P
> + *
> + * Any bits set in the top half of the input are ignored.
> + *
> + * Returns: the shuffled bits.
> + */
> +static inline uint32_t half_shuffle32(uint32_t x)
> +{
> +/* This algorithm is from _Hacker's Delight_ section 7-2 "Shuffling 
> Bits".
> + * It ignores any bits set in the top half of the input.
> + */
> +x = ((x & 0xFF00) << 8) | (x & 0x00FF);
> +x = ((x << 4) | x) & 0x0F0F0F0F;
> +x = ((x << 2) | x) & 0x;
> +x = ((x << 1) | x) & 0x;
> +return x;
> +}
> +
> +/**
> + * half_shuffle64:
> + * @value: 64-bit value (of which only the bottom 32 bits are of interest)
> + *
> + * Given an input value:
> + *        ABCD EFGH IJKL MNOP QRST UVWX YZab cdef
> + * return the value where the bottom 32 bits are spread out into
> + * the odd bits in the word, and the even bits are zeroed:
> + *  0A0B 0C0D 0E0F 0G0H 0I0J 0K0L 0M0N  0U0V 0W0X 0Y0Z 0a0b 0c0d 0e0f
> + *
> + * Any bits set in the top half of the input are ignored.
> + *
> + * Returns: the shuffled bits.
> + */
> +static inline uint64_t half_shuffle64(uint64_t x)
> +{
> +/* This algorithm is from _Hacker's Delight_ section 7-2 "Shuffling 
> Bits".
> + * It ignores any bits set in the top half of the input.
> + */
> +x = ((x & 0xULL) << 16) | (x & 0x);
> +x = ((x << 8) | x) & 0x00FF00FF00FF00FFULL;
> +x = ((x << 4) | x) & 0x0F0F0F0F0F0F0F0FULL;
> +x = ((x << 2) | x) & 0xULL;
> +x = ((x << 1) | x) & 0xULL;
> +return x;
> +}
> +
> +/**
> + * half_unshuffle32:
> + * @value: 32-bit value (of which only the odd bits are of interest)
> + *
> + * Given an input value:
> + *  xAxB xCxD xExF xGxH xIxJ xKxL xMxN xOxP
> + * return the value where all the odd bits are compressed down
> + * into the low half of the word, and the high half is zeroed:
> + *      ABCD EFGH IJKL MNOP
> + *
> + * Any even bits set in the input are ignored.
> + *
> + * Returns: the unshuffled bits.
> + */
> +static inline uint32_t half_unshuffle32(uint32_t x)
> +{
> +/* This algorithm is from _Hacker's Delight_ section 7-2 "Shuffling 
> Bits".
> + * where it is called an inverse half shuffle.
> + */
> +x &= 0x;
> +x = ((x >> 1) | x) & 0x;
> +x = ((x >> 2) | x) & 0x0F0F0F0F;
> +x = ((x >> 4) | x) & 0x00FF00FF;
> +x = ((x >> 8) | x) & 0x;
> +return x;
> +}
> +
> +/**
> + * half_unshuffle64:
> + * @value: 64-bit value (of which only the odd bits are of interest)
> + *
> + * Given an input value:
> + *  xAxB xCxD xExF xGxH xIxJ xKxL xMxN  xUxV xWxX xYxZ xaxb xcxd xexf
> + * return the value where all the odd bits are compressed down
> + * into the low half of the word, and the high half is zeroed:
> + *        ABCD EFGH IJKL MNOP QRST UVWX YZab cdef
> + *
> + * Any even bits set in the input are ignored.
> + *
> + * Returns: the unshuffled bits.
> + */
> +static inline uint64_t half_unshuffle64(uint64_t x)
> +{
> +/* This algorithm is from _Hacker's Delight_ section 7-2 "Shuffling 
> Bits".
> + * where it is called an inverse half shuffle.
> + */
> +x &= 0xULL;
> +x = ((x >> 1) | x) & 0xULL;
> +x = ((x >> 2) | x) & 0x0F0F0F0F0F0F0F0FULL;
> +x = ((x >> 4) | x) & 0x00FF00FF00FF00FFULL;
> +x = ((x >> 8) | x) & 0xULL;
> +x = ((x >> 16) | x) & 0xULL;
> +return x;
> +}
> +
>  #endif
> diff --git a/tests/test-bitops.c b/tests/test-bitops.c
> index 5050950..353fc98 100644
> --- a/tests/test-bitops.c

Re: [Qemu-devel] [PATCH v2 03/22] target-arm: Define new arm_is_el3_or_mon() function



On 2016/5/26 22:55, Peter Maydell wrote:
> The GICv3 system registers need to know if the CPU is AArch64
> in EL3 or AArch32 in Monitor mode. This happens to be the first
> part of the check for arm_is_secure(), so factor it out into a
> new arm_is_el3_or_mon() function that the GIC can also use.
> 
> Signed-off-by: Peter Maydell 

Reviewed-by: Shannon Zhao 

> ---
>  target-arm/cpu.h | 13 +++--
>  1 file changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/target-arm/cpu.h b/target-arm/cpu.h
> index c741b53..2fa1f41 100644
> --- a/target-arm/cpu.h
> +++ b/target-arm/cpu.h
> @@ -1133,8 +1133,8 @@ static inline bool arm_is_secure_below_el3(CPUARMState 
> *env)
>  }
>  }
>  
> -/* Return true if the processor is in secure state */
> -static inline bool arm_is_secure(CPUARMState *env)
> +/* Return true if the CPU is AArch64 EL3 or AArch32 Mon */
> +static inline bool arm_is_el3_or_mon(CPUARMState *env)
>  {
>  if (arm_feature(env, ARM_FEATURE_EL3)) {
>  if (is_a64(env) && extract32(env->pstate, 2, 2) == 3) {
> @@ -1146,6 +1146,15 @@ static inline bool arm_is_secure(CPUARMState *env)
>  return true;
>  }
>  }
> +return false;
> +}
> +
> +/* Return true if the processor is in secure state */
> +static inline bool arm_is_secure(CPUARMState *env)
> +{
> +if (arm_is_el3_or_mon(env)) {
> +return true;
> +}
>  return arm_is_secure_below_el3(env);
>  }
>  
> 

-- 
Shannon

Re: [Qemu-devel] [PATCH v4 1/9] target-avr: AVR cores support is added. 1. basic CPU structure 2. registers 3. no instructions

2016-06-06 Thread Michael Rolnik

Hi Richard,

*Consider making the vm save state reflect the actual hardware format.
That way you can change the qemu internal format while retaining migration
compatibility.*

How it can be done? how can I modify a value passed to VMSTATE_UINT32?


On Mon, Jun 6, 2016 at 11:15 PM, Richard Henderson  wrote:

> On 06/06/2016 03:37 AM, Michael Rolnik wrote:
>
>> +int print_insn_avr(bfd_vma addr, disassemble_info *info)
>> +{
>> +int length  = 0;;
>> +/*  TODO*/
>> +return length;
>> +}
>>
>
> Again, delete this file.  This prohibits the default implementation from
> working.
>
> +static void avr_cpu_reset(CPUState *s)
>> +{
>> +AVRCPU *cpu = AVR_CPU(s);
>> +AVRCPUClass *mcc = AVR_CPU_GET_CLASS(cpu);
>> +CPUAVRState *env = &cpu->env;
>> +
>> +mcc->parent_reset(s);
>> +
>> +memset(env, 0, sizeof(CPUAVRState));
>>
>
> You didn't fix the memset issue I pointed out.
>
> +}
>> +AVRCPU *cpu_avr_init(const char *cpu_model)
>>
>
> Spacing.
>
> +/*
>> +NOTE:   all registers are assumed to hold 8 bit values.
>> +so all operations done on registers should preseve this
>> property
>> +*/
>> +
>> +/*
>> +NOTE:   the flags C,H,V,N,V have either 0 or 1 values
>> +NOTE:   the flag Z has inverse logic, when the value of Zf is 0 the
>> flag is assumed to be set, non zero - not set
>> +*/
>>
>
> Don't put these at the top, put them next to the declarations of the
> actual variables, where they're useful.
>
> +#define TARGET_IS_LIENDIAN 1
>>
>
> Didn't fix the typo, or really just delete this.
>
> In case you're wondering, this *doesn't* mean little-endian.
>
> TARGET_IS_BIENDIAN means that the target changes between big- and
> little-endian at runtime.
>
> One selects the endian-ness of the target in configure.  The default is
> little; set target_bigendian to select big.
>
>
> +uint32_tsregC;  /*   0x0001 1 bits */
>> +uint32_tsregZ;  /*   0x00ff 8 bits */
>>
>
> This isn't quite true; sregZ may contain 0x after adwi.
>
> +static inline int cpu_mmu_index(CPUAVRState *env, bool ifetch)
>> +{
>> +return  0;
>> +}
>>
>
> This should be
>
>   return ifetch ? CODE_INDEX : DATA_INDEX;
>
> +sreg =   (env->sregC & 0x01) << 0
>> +|   (env->sregZ & 0x01) << 1
>> +|   (env->sregN & 0x01) << 2
>> +|   (env->sregV & 0x01) << 3
>> +|   (env->sregS & 0x01) << 4
>> +|   (env->sregH & 0x01) << 5
>> +|   (env->sregT & 0x01) << 6
>> +|   (env->sregI & 0x01) << 7;
>>
>
> Didn't add the function I requested.
> Plus, there's an error in handling of sregZ.
>
> +env->sregC = (tmp >> 0) & 0x01;
>> +env->sregZ = (tmp >> 1) & 0x01;
>> +env->sregN = (tmp >> 2) & 0x01;
>> +env->sregV = (tmp >> 3) & 0x01;
>> +env->sregS = (tmp >> 4) & 0x01;
>> +env->sregH = (tmp >> 5) & 0x01;
>> +env->sregT = (tmp >> 6) & 0x01;
>> +env->sregI = (tmp >> 7) & 0x01;
>>
>
> Likewise.
>
> +VMSTATE_UINT32_ARRAY(r, CPUAVRState, 32),
>> +
>> +VMSTATE_UINT32(sregC,   CPUAVRState),
>> +VMSTATE_UINT32(sregZ,   CPUAVRState),
>> +VMSTATE_UINT32(sregN,   CPUAVRState),
>> +VMSTATE_UINT32(sregV,   CPUAVRState),
>> +VMSTATE_UINT32(sregS,   CPUAVRState),
>> +VMSTATE_UINT32(sregH,   CPUAVRState),
>> +VMSTATE_UINT32(sregT,   CPUAVRState),
>> +VMSTATE_UINT32(sregI,   CPUAVRState),
>> +
>> +VMSTATE_UINT32(rampD,   CPUAVRState),
>> +VMSTATE_UINT32(rampX,   CPUAVRState),
>> +VMSTATE_UINT32(rampY,   CPUAVRState),
>> +VMSTATE_UINT32(rampZ,   CPUAVRState),
>> +
>> +VMSTATE_UINT32(eind,CPUAVRState),
>> +VMSTATE_UINT32(sp,  CPUAVRState),
>> +VMSTATE_UINT32(pc,  CPUAVRState),
>>
>
> Consider making the vm save state reflect the actual hardware format.
> That way you can change the qemu internal format while retaining migration
> compatibility.
>
> +cpu_fprintf(f, "rampX: %02x\n", env->rampX);
>> +cpu_fprintf(f, "rampY: %02x\n", env->rampY);
>> +cpu_fprintf(f, "rampZ: %02x\n", env->rampZ);
>> +cpu_fprintf(f, "rampD: %02x\n", env->rampD);
>> +cpu_fprintf(f, "EIND:  %02x\n", env->eind);
>>
>
> This format probably isn't what you intended after shifting these values.
>
>
> r~
>



-- 
Best Regards,
Michael Rolnik

Re: [Qemu-devel] [PATCH v19 08/10] Implement new driver for block replication

2016-06-06 Thread Changlong Xie


On 05/20/2016 03:36 PM, Changlong Xie wrote:

+if (!failover) {
+/*
+ * This BDS will be closed, and the job should be completed
+ * before the BDS is closed, because we will access hidden
+ * disk, secondary disk in backup_job_completed().
+ */
+if (s->secondary_disk->bs->job) {
+block_job_cancel_sync(s->secondary_disk->bs->job);
+}
+secondary_do_checkpoint(s, errp);
+s->replication_state = BLOCK_REPLICATION_DONE;
+aio_context_release(aio_context);
+return;
+}
+
+s->replication_state = BLOCK_REPLICATION_FAILOVER;
+if (s->secondary_disk->bs->job) {
+block_job_cancel(s->secondary_disk->bs->job);
+}


Since commit b6d2e599 "block: Convert block job core to BlockBackend", 
blockjob uses BB instead of bdrv_ref(), this introduces unexpected 
Segmentation fault with COLO.


In the below backtrace, you can see that. During failover, s->target was 
changed to an illegal value "0x1e1e1e1e1e1e1e1e" in bakup_complete.
Then the active commit job what also has a pointer that refer to 
s->target will use this illegal pointer. To avoid this, we should use 
"bloc_job_cancel_sync" to ensure backup job complete synchronously.


% MALLOC_PERTURB_=$(($RANDOM % 255 + 1))
% export MALLOC_PERTURB_
% gdb --args ./tests/test-replication
(gdb) b backup_complete
(gdb) r
(gdb) n
(gdb) n
(gdb) watch s->target
(gdb) c
Old value = (BlockBackend *) 0x55f1d990
New value = (BlockBackend *) 0x1e1e1e1e1e1e1e1e
0x75a811eb in memset () from /lib64/libc.so.6
(gdb) bt
#0  0x75a811eb in memset () from /lib64/libc.so.6
#1  0x75a7500e in _int_free () from /lib64/libc.so.6
#2  0x7705bf7f in g_free () from /lib64/libglib-2.0.so.0
#3  0x5557e924 in block_job_unref (job=0x55f1d630) at 
blockjob.c:124
#4  0x5557e9da in block_job_completed_single 
(job=0x55f1d630) at blockjob.c:143
#5  0x5557ecc8 in block_job_completed (job=0x55f1d630, 
ret=0) at blockjob.c:215
#6  0x555e6d49 in backup_complete (job=0x55f1d630, 
opaque=0x55f1dd50) at block/backup.c:325
#7  0x5557f5f4 in block_job_defer_to_main_loop_bh 
(opaque=0x596e1dc0) at blockjob.c:500

#8  0x555747d7 in aio_bh_call (bh=0x596e1c30) at async.c:66
#9  0x55574899 in aio_bh_poll (ctx=0x55ce2d60) at async.c:94
#10 0x55581d4d in aio_dispatch (ctx=0x55ce2d60) at 
aio-posix.c:308
#11 0x555823cb in aio_poll (ctx=0x55ce2d60, blocking=false) 
at aio-posix.c:479
#12 0x555d639b in bdrv_drain_poll (bs=0x55dec210) at 
block/io.c:190
#13 0x555d6566 in bdrv_drained_begin (bs=0x55dec210) at 
block/io.c:240
#14 0x55577261 in bdrv_child_cb_drained_begin 
(child=0x596e1ab0) at block.c:665
#15 0x555d5e81 in bdrv_parent_drained_begin (bs=0x55f0e130) 
at block/io.c:54
#16 0x555d652b in bdrv_drained_begin (bs=0x55f0e130) at 
block/io.c:232
#17 0x55577261 in bdrv_child_cb_drained_begin 
(child=0x596e1810) at block.c:665
#18 0x555d5e81 in bdrv_parent_drained_begin (bs=0x596d7030) 
at block/io.c:54
#19 0x555d652b in bdrv_drained_begin (bs=0x596d7030) at 
block/io.c:232
#20 0x55577261 in bdrv_child_cb_drained_begin 
(child=0x55df8830) at block.c:665
#21 0x555d5e81 in bdrv_parent_drained_begin (bs=0x55df0bb0) 
at block/io.c:54

#22 0x555d66ee in bdrv_drain_all () at block/io.c:301
#23 0x55579f9f in bdrv_reopen_multiple (bs_queue=0x55f1dd30, 
errp=0x7fffd768) at block.c:1953
#24 0x5557a169 in bdrv_reopen (bs=0x55df0bb0, 
bdrv_flags=24578, errp=0x7fffd8e0) at block.c:1994
#25 0x555d54a7 in commit_active_start (bs=0x55f0e130, 
base=0x55df0bb0, speed=0, on_error=BLOCKDEV_ON_ERROR_REPORT, 
cb=0x555e8b97 ,
opaque=0x55dec210, errp=0x7fffd8e0, auto_complete=true) at 
block/mirror.c:901
#26 0x555e8d98 in replication_stop (rs=0x5746f7b0, 
failover=true, errp=0x7fffd8e0) at block/replication.c:623
#27 0x555873d1 in replication_stop_all (failover=true, 
errp=0x7fffd928) at replication.c:98
#28 0x5557406d in test_secondary_start () at 
tests/test-replication.c:403
#29 0x7707a5e1 in g_test_run_suite_internal () from 
/lib64/libglib-2.0.so.0
#30 0x7707a7a6 in g_test_run_suite_internal () from 
/lib64/libglib-2.0.so.0
#31 0x7707a7a6 in g_test_run_suite_internal () from 
/lib64/libglib-2.0.so.0

#32 0x7707ab1b in g_test_run_suite () from /lib64/libglib-2.0.so.0
#33 0x555746c8 in main (argc=1, argv=0x7fffdda8) at 
tests/test-replication.c:545

(gdb) d breakpoints
(gdb) c
Program received signal SIGSEGV, Segmentation fault.
0x555c7d6c in blk_bs (blk=0x1e1e1e1e1e1e1e1e) at 
block/block-backend.c:389

389 return blk->root ? b

Re: [Qemu-devel] [PATCH v3 1/3] IOMMU: add VTD_CAP_CM to vIOMMU capability exposed to guest

2016-06-06 Thread Huang, Kai




On 6/7/2016 3:58 PM, Alex Williamson wrote:

On Tue, 7 Jun 2016 11:20:32 +0800
Peter Xu  wrote:


On Mon, Jun 06, 2016 at 11:02:11AM -0600, Alex Williamson wrote:

On Mon, 6 Jun 2016 21:43:17 +0800
Peter Xu  wrote:


On Mon, Jun 06, 2016 at 07:11:41AM -0600, Alex Williamson wrote:

On Mon, 6 Jun 2016 13:04:07 +0800
Peter Xu  wrote:

[...]

Besides the reason that there might have guests that do not support
CM=1, will there be performance considerations? When user's
configuration does not require CM capability (e.g., generic VM
configuration, without VFIO), shall we allow user to disable the CM
bit so that we can have better IOMMU performance (avoid extra and
useless invalidations)?


With Alexey's proposed patch to have callback ops when the iommu
notifier list adds its first entry and removes its last, any of the
additional overhead to generate notifies when nobody is listening can
be avoided.  These same callbacks would be the ones that need to
generate a hw_error if a notifier is added while running in CM=0.


Not familar with Alexey's patch


https://lists.nongnu.org/archive/html/qemu-devel/2016-06/msg00079.html


Thanks for the pointer. :)




, but is that for VFIO only?


vfio is currently the only user of the iommu notifier, but the
interface is generic, which is how it should (must) be.


Yes.




I mean, if
we configured CMbit=1, guest kernel will send invalidation request
every time it creates new entries (context entries, or iotlb
entries). Even without VFIO notifiers, guest need to trap into QEMU
and process the invalidation requests. This is avoidable if we are not
using VFIO devices at all (so no need to maintain any mappings),
right?


CM=1 only defines that not-present and invalid entries can be cached,
any changes to existing entries requires an invalidation regardless of
CM.  What you're looking for sounds more like ECAP.C:


Yes, but I guess what I was talking about is CM bit but not ECAP.C.
When we clear/replace one context entry, guest kernel will definitely
send one context entry invalidation to QEMU:

static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 
devfn)
{
if (!iommu)
return;

clear_context_table(iommu, bus, devfn);
iommu->flush.flush_context(iommu, 0, 0, 0,
   DMA_CCMD_GLOBAL_INVL);
iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
}

... While if we are creating a new one (like attaching a new VFIO
device?), it's an optional behavior depending on whether CM bit is
set:

static int domain_context_mapping_one(struct dmar_domain *domain,
  struct intel_iommu *iommu,
  u8 bus, u8 devfn)
{
...
/*
 * It's a non-present to present mapping. If hardware doesn't cache
 * non-present entry we only need to flush the write-buffer. If the
 * _does_ cache non-present entries, then it does so in the special
 * domain #0, which we have to flush:
 */
if (cap_caching_mode(iommu->cap)) {
iommu->flush.flush_context(iommu, 0,
   (((u16)bus) << 8) | devfn,
   DMA_CCMD_MASK_NOBIT,
   DMA_CCMD_DEVICE_INVL);
iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
} else {
iommu_flush_write_buffer(iommu);
}
...
}

Only if cap_caching_mode() is set (which is bit 7, the CM bit), we
will send these invalidations. What I meant is that, we should allow
user to specify the CM bit, so that when we are not using VFIO
devices, we can skip the above flush_content() and flush_iotlb()
etc... So, besides the truth that we have some guests do not support
CM bit (like Jailhouse), performance might be another consideration
point that we should allow user to specify the CM bit themselfs.


I'm dubious of this, IOMMU drivers are already aware that hardware
flushes are expensive and do batching to optimize it.  The queued
invalidation mechanism itself is meant to allow asynchronous
invalidations.  QEMU invalidating a virtual IOMMU might very well be
faster than hardware.


Do batching doesn't mean we can eliminate the IOTLB flush for mappings 
from non-present to present, in case of CM=1, while in case CM=0 those 
IOTLB flush are not necessary, just like the code above shows. Therefore 
generally speaking CM=0 should have better performance than CM=1, even 
for Qemu's vIOMMU.


In my understanding the purpose of exposing CM=1 is to force guest do 
IOTLB flush for each mapping change (including from non-present to 
present) so Qemu is able to emulate each mapping change from guest 
(correct me if I was wrong). If previous statement stands, CM=1 is 
really a workaround for making vfio assigned devices and vIOMMU work 
together, and unfortunately this cannot work on other vend

[Qemu-devel] [PATCH v3] spapr: Ensure all LMBs are represented in ibm, dynamic-memory

2016-06-06 Thread Bharata B Rao

Memory hotplug can fail for some combinations of RAM and maxmem when
DDW is enabled in the presence of devices like nec-usb-xhci. DDW depends
on maximum addressable memory returned by guest and this value is currently
being calculated wrongly by the guest kernel routine memory_hotplug_max().
While there is an attempt to fix the guest kernel, this patch works
around the problem within QEMU itself.

memory_hotplug_max() routine in the guest kernel arrives at max
addressable memory by multiplying lmb-size with the lmb-count obtained
from ibm,dynamic-memory property. There are two assumptions here:

- All LMBs are part of ibm,dynamic memory: This is not true for PowerKVM
  where only hot-pluggable LMBs are present in this property.
- The memory area comprising of RAM and hotplug region is contiguous: This
  needn't be true always for PowerKVM as there can be gap between
  boot time RAM and hotplug region.

To work around this guest kernel bug, ensure that ibm,dynamic-memory
has information about all the LMBs (RMA, boot-time LMBs, future
hotpluggable LMBs, and dummy LMBs to cover the gap between RAM and
hotpluggable region).

RMA is represented separately by memory@0 node. Hence mark RMA LMBs
and also the LMBs for the gap b/n RAM and hotpluggable region as
reserved so that these LMBs are not recounted/counted by guest.

Signed-off-by: Bharata B Rao 
---
Changes in v3:

- Not touching spapr_create_lmb_dr_connectors() so that we continue
  to have DRC objects for only hotpluggable LMBs.
- Simplified the logic of creating dynamic-memory node based on comments
  from Michael Roth and David Gibson.

v2: https://lists.gnu.org/archive/html/qemu-devel/2016-06/msg01316.html

 hw/ppc/spapr.c | 51 --
 include/hw/ppc/spapr.h |  5 +++--
 2 files changed, 36 insertions(+), 20 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 0636642..9d1d43d 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -762,14 +762,17 @@ static int spapr_populate_drconf_memory(sPAPRMachineState 
*spapr, void *fdt)
 int ret, i, offset;
 uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
 uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)};
-uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size;
+uint32_t hotplug_lmb_start = spapr->hotplug_memory.base / lmb_size;
+uint32_t nr_lmbs = (spapr->hotplug_memory.base +
+   memory_region_size(&spapr->hotplug_memory.mr)) /
+   lmb_size;
 uint32_t *int_buf, *cur_index, buf_len;
 int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
 
 /*
- * Don't create the node if there are no DR LMBs.
+ * Don't create the node if there is no hotpluggable memory
  */
-if (!nr_lmbs) {
+if (machine->ram_size == machine->maxram_size) {
 return 0;
 }
 
@@ -805,24 +808,36 @@ static int spapr_populate_drconf_memory(sPAPRMachineState 
*spapr, void *fdt)
 for (i = 0; i < nr_lmbs; i++) {
 sPAPRDRConnector *drc;
 sPAPRDRConnectorClass *drck;
-uint64_t addr = i * lmb_size + spapr->hotplug_memory.base;;
+uint64_t addr = i * lmb_size;
 uint32_t *dynamic_memory = cur_index;
 
-drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
-   addr/lmb_size);
-g_assert(drc);
-drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-
-dynamic_memory[0] = cpu_to_be32(addr >> 32);
-dynamic_memory[1] = cpu_to_be32(addr & 0x);
-dynamic_memory[2] = cpu_to_be32(drck->get_index(drc));
-dynamic_memory[3] = cpu_to_be32(0); /* reserved */
-dynamic_memory[4] = cpu_to_be32(numa_get_node(addr, NULL));
-if (addr < machine->ram_size ||
-memory_region_present(get_system_memory(), addr)) {
-dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED);
+if (i >= hotplug_lmb_start) {
+drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
+   addr / lmb_size);
+g_assert(drc);
+drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+dynamic_memory[0] = cpu_to_be32(addr >> 32);
+dynamic_memory[1] = cpu_to_be32(addr & 0x);
+dynamic_memory[2] = cpu_to_be32(drck->get_index(drc));
+dynamic_memory[3] = cpu_to_be32(0); /* reserved */
+dynamic_memory[4] = cpu_to_be32(numa_get_node(addr, NULL));
+if (memory_region_present(get_system_memory(), addr)) {
+dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED);
+} else {
+dynamic_memory[5] = cpu_to_be32(0);
+}
 } else {
-dynamic_memory[5] = cpu_to_be32(0);
+/*
+ * LMB information for RMA, boot time RAM and gap b/n RAM and
+ * hotplug memory region -- all these are marked as reserved.
+ */
+

Re: [Qemu-devel] [PATCH] net: mipsnet: check transmit buffer size before sending

2016-06-06 Thread P J P

+-- On Fri, 3 Jun 2016, P J P wrote --+
| +-- On Thu, 2 Jun 2016, Peter Maydell wrote --+
| | >  case MIPSNET_TX_DATA_COUNT:
| | > -   s->tx_count = (val <= MAX_ETH_FRAME_SIZE) ? val : 0;
| | > +s->tx_count = (val < MAX_ETH_FRAME_SIZE) ? val : 
MAX_ETH_FRAME_SIZE;
| | >  s->tx_written = 0;
| | 
| | This is a behaviour change -- the register will now read
| | back as MAX_ETH_FRAME_SIZE rather than 0 if written with
| | an overlarge value.
| 
|   IIUC, 's->tx_count' indicates expected packet data length to be processed. 
| Maybe if this value was zero, packet was not to be sent; not sure.
| 
| | Do we have any documentation on how this (simulated)
| | device is supposed to behave in this case?

@Jason: @Leon: ping...!
--
Prasad J Pandit / Red Hat Product Security Team
47AF CE69 3A90 54AA 9045 1053 DD13 3D32 FE5B 041F

Re: [Qemu-devel] [PATCH v3 1/3] IOMMU: add VTD_CAP_CM to vIOMMU capability exposed to guest

2016-06-06 Thread Peter Xu

On Mon, Jun 06, 2016 at 09:58:09PM -0600, Alex Williamson wrote:
> On Tue, 7 Jun 2016 11:20:32 +0800
> Peter Xu  wrote:
[...]
> > Only if cap_caching_mode() is set (which is bit 7, the CM bit), we
> > will send these invalidations. What I meant is that, we should allow
> > user to specify the CM bit, so that when we are not using VFIO
> > devices, we can skip the above flush_content() and flush_iotlb()
> > etc... So, besides the truth that we have some guests do not support
> > CM bit (like Jailhouse), performance might be another consideration
> > point that we should allow user to specify the CM bit themselfs.
> 
> I'm dubious of this, IOMMU drivers are already aware that hardware
> flushes are expensive and do batching to optimize it.  The queued
> invalidation mechanism itself is meant to allow asynchronous
> invalidations.  QEMU invalidating a virtual IOMMU might very well be
> faster than hardware.

Agree. However it seems that current Linux is still not taking this
advantage... check qi_flush_context() and qi_flush_iotlb().
qi_submit_sync() is used for both, which sends one invalidation with a
explicit wait to make sure it's sync.

> 
> > > 
> > > C: Page-walk Coherency
> > >   This field indicates if hardware access to the root, context,
> > >   extended-context and interrupt-remap tables, and second-level paging
> > >   structures for requests-without PASID, are coherent (snooped) or not.
> > > • 0: Indicates hardware accesses to remapping structures are 
> > > non-coherent.
> > > • 1: Indicates hardware accesses to remapping structures are coherent.
> > > 
> > > Without both CM=0 and C=0, our only virtualization mechanism for
> > > maintaining a hardware cache coherent with the guest view of the iommu
> > > would be to shadow all of the VT-d structures.  For purely emulated
> > > devices, maybe we can get away with that, but I doubt the current
> > > ghashes used for the iotlb are prepared for it.  
> > 
> > Actually I haven't noticed this bit yet. I see that this will decide
> > whether guest kernel need to send specific clflush() when modifying
> > IOMMU PTEs, but shouldn't we flush the memory cache always so that we
> > can sure IOMMU can see the same memory data as CPU does?
> 
> I think it would be a question of how much the g_hash code really buys
> us in the VT-d code, it might be faster to do a lookup each time if it
> means fewer flushes.  Those hashes are useless overhead for assigned
> devices, so maybe we can avoid them when we only have assigned
> devices ;)  Thanks,

Errr, I just noticed that VFIO devices do not need emulated
cache. There are indeed lots of pending works TBD on vIOMMU side...

Thanks!

-- peterx

Re: [Qemu-devel] [PATCH v19 08/10] Implement new driver for block replication

2016-06-06 Thread Changlong Xie


On 05/20/2016 03:36 PM, Changlong Xie wrote:

+
+/*
+ * Must protect backup target if backup job was stopped/cancelled
+ * unexpectedly
+ */
+bdrv_ref(s->hidden_disk->bs);
+
+backup_start(s->secondary_disk->bs, s->hidden_disk->bs, 0,
+ MIRROR_SYNC_MODE_NONE, NULL, BLOCKDEV_ON_ERROR_REPORT,
+ BLOCKDEV_ON_ERROR_REPORT, backup_job_completed,
+ s, NULL, &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+backup_job_cleanup(s);
+bdrv_unref(s->hidden_disk->bs);
+aio_context_release(aio_context);
+return;
+}
+break;
+default:
+aio_context_release(aio_context);
+abort();
+}


commit 5c438bc6 introduce BB for I/O, so we don't need protect backup 
target by ourself now.


-/*
- * Must protect backup target if backup job was stopped/cancelled
- * unexpectedly
- */
-bdrv_ref(s->hidden_disk->bs);
-
 backup_start(s->secondary_disk->bs, s->hidden_disk->bs, 0,
  MIRROR_SYNC_MODE_NONE, NULL, 
BLOCKDEV_ON_ERROR_REPORT,

  BLOCKDEV_ON_ERROR_REPORT, backup_job_completed,
@@ -508,7 +502,6 @@ static void replication_start(ReplicationState *rs, 
ReplicationMode mode,

 if (local_err) {
 error_propagate(errp, local_err);
 backup_job_cleanup(s);
-bdrv_unref(s->hidden_disk->bs);
 aio_context_release(aio_context);
 return;
 }

will update in next version.

Re: [Qemu-devel] [PATCH v2 5/5] qcow2: Implement .bdrv_co_pwritev()

On 06/06/2016 08:59 AM, Kevin Wolf wrote:
> This changes qcow2 to implement the byte-based .bdrv_co_pwritev
> interface rather than the sector-based old one.
> 
> As preallocation uses the same allocation function as normal writes, and
> the interface of that function needs to be changed, it is converted in
> the same patch.
> 
> Signed-off-by: Kevin Wolf 
> ---
>  block/qcow2-cluster.c | 13 
>  block/qcow2.c | 89 
> ---
>  block/qcow2.h |  3 +-
>  trace-events  |  6 ++--
>  4 files changed, 52 insertions(+), 59 deletions(-)
> 

Reviewed-by: Eric Blake 

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v3 1/3] IOMMU: add VTD_CAP_CM to vIOMMU capability exposed to guest

2016-06-06 Thread Alex Williamson

On Tue, 7 Jun 2016 11:20:32 +0800
Peter Xu  wrote:

> On Mon, Jun 06, 2016 at 11:02:11AM -0600, Alex Williamson wrote:
> > On Mon, 6 Jun 2016 21:43:17 +0800
> > Peter Xu  wrote:
> >   
> > > On Mon, Jun 06, 2016 at 07:11:41AM -0600, Alex Williamson wrote:  
> > > > On Mon, 6 Jun 2016 13:04:07 +0800
> > > > Peter Xu  wrote:
> > > [...]  
> > > > > Besides the reason that there might have guests that do not support
> > > > > CM=1, will there be performance considerations? When user's
> > > > > configuration does not require CM capability (e.g., generic VM
> > > > > configuration, without VFIO), shall we allow user to disable the CM
> > > > > bit so that we can have better IOMMU performance (avoid extra and
> > > > > useless invalidations)?
> > > > 
> > > > With Alexey's proposed patch to have callback ops when the iommu
> > > > notifier list adds its first entry and removes its last, any of the
> > > > additional overhead to generate notifies when nobody is listening can
> > > > be avoided.  These same callbacks would be the ones that need to
> > > > generate a hw_error if a notifier is added while running in CM=0.
> > > 
> > > Not familar with Alexey's patch  
> > 
> > https://lists.nongnu.org/archive/html/qemu-devel/2016-06/msg00079.html  
> 
> Thanks for the pointer. :)
> 
> >   
> > >, but is that for VFIO only?  
> > 
> > vfio is currently the only user of the iommu notifier, but the
> > interface is generic, which is how it should (must) be.  
> 
> Yes.
> 
> >   
> > > I mean, if
> > > we configured CMbit=1, guest kernel will send invalidation request
> > > every time it creates new entries (context entries, or iotlb
> > > entries). Even without VFIO notifiers, guest need to trap into QEMU
> > > and process the invalidation requests. This is avoidable if we are not
> > > using VFIO devices at all (so no need to maintain any mappings),
> > > right?  
> > 
> > CM=1 only defines that not-present and invalid entries can be cached,
> > any changes to existing entries requires an invalidation regardless of
> > CM.  What you're looking for sounds more like ECAP.C:  
> 
> Yes, but I guess what I was talking about is CM bit but not ECAP.C.
> When we clear/replace one context entry, guest kernel will definitely
> send one context entry invalidation to QEMU:
> 
> static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 
> devfn)
> {
>   if (!iommu)
>   return;
> 
>   clear_context_table(iommu, bus, devfn);
>   iommu->flush.flush_context(iommu, 0, 0, 0,
>  DMA_CCMD_GLOBAL_INVL);
>   iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
> }
> 
> ... While if we are creating a new one (like attaching a new VFIO
> device?), it's an optional behavior depending on whether CM bit is
> set:
> 
> static int domain_context_mapping_one(struct dmar_domain *domain,
> struct intel_iommu *iommu,
> u8 bus, u8 devfn)
> {
> ...
>   /*
>* It's a non-present to present mapping. If hardware doesn't cache
>* non-present entry we only need to flush the write-buffer. If the
>* _does_ cache non-present entries, then it does so in the special
>* domain #0, which we have to flush:
>*/
>   if (cap_caching_mode(iommu->cap)) {
>   iommu->flush.flush_context(iommu, 0,
>  (((u16)bus) << 8) | devfn,
>  DMA_CCMD_MASK_NOBIT,
>  DMA_CCMD_DEVICE_INVL);
>   iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
>   } else {
>   iommu_flush_write_buffer(iommu);
>   }
> ...
> }
> 
> Only if cap_caching_mode() is set (which is bit 7, the CM bit), we
> will send these invalidations. What I meant is that, we should allow
> user to specify the CM bit, so that when we are not using VFIO
> devices, we can skip the above flush_content() and flush_iotlb()
> etc... So, besides the truth that we have some guests do not support
> CM bit (like Jailhouse), performance might be another consideration
> point that we should allow user to specify the CM bit themselfs.

I'm dubious of this, IOMMU drivers are already aware that hardware
flushes are expensive and do batching to optimize it.  The queued
invalidation mechanism itself is meant to allow asynchronous
invalidations.  QEMU invalidating a virtual IOMMU might very well be
faster than hardware.

> > 
> > C: Page-walk Coherency
> >   This field indicates if hardware access to the root, context,
> >   extended-context and interrupt-remap tables, and second-level paging
> >   structures for requests-without PASID, are coherent (snooped) or not.
> > • 0: Indicates hardware accesses to remapping structures are 
> > non-coherent.
> > • 1: Indicates hardware accesses to remapping structures are coherent.
> > 
> >

Re: [Qemu-devel] [PATCH v2 4/5] qcow2: Use bytes instead of sectors for QCowL2Meta

On 06/06/2016 08:59 AM, Kevin Wolf wrote:
> In preparation for implementing .bdrv_co_pwritev in qcow2.
> 
> Signed-off-by: Kevin Wolf 
> ---
>  block/qcow2-cluster.c | 32 
>  block/qcow2.h | 13 +++--
>  2 files changed, 15 insertions(+), 30 deletions(-)
> 

Reviewed-by: Eric Blake 

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v2 3/5] qcow2: Make copy_sectors() byte based

On 06/06/2016 08:59 AM, Kevin Wolf wrote:
> This will allow copy on write operations where the overwritten part of
> the cluster is not aligned to sector boundaries.
> 
> Also rename the function because it has nothing to do with sectors any
> more.
> 
> Signed-off-by: Kevin Wolf 
> ---
>  block/qcow2-cluster.c | 54 
> +--
>  1 file changed, 26 insertions(+), 28 deletions(-)
> 

>  
>  if (bs->encrypted) {
>  Error *err = NULL;
> +int sector = (cluster_offset + offset_in_cluster) >> 
> BDRV_SECTOR_BITS;

Potentially the wrong type...

>  assert(s->cipher);
> -if (qcow2_encrypt_sectors(s, start_sect + n_start,
> -  iov.iov_base, iov.iov_base, n,
> -  true, &err) < 0) {
> +assert((offset_in_cluster & BDRV_SECTOR_MASK) == 0);

Why is this one true? If I have a cluster of 4 sectors, why must
offset_in_cluster fall within only the first of those sectors?  Are you
missing a ~, to instead be asserting that offset_in_cluster is
sector-aligned?

> +assert((bytes & ~BDRV_SECTOR_MASK) == 0);

This one looks correct, stating that the number of bytes to copy is a
sector multiple.

> +if (qcow2_encrypt_sectors(s, sector, iov.iov_base, iov.iov_base,
> +  bytes >> BDRV_SECTOR_BITS, true, &err) < 
> 0) {

...since encryption allows a 64-bit sector number for the case where the
image is larger than 2T.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH qemu v17 06/12] memory: Add reporting of supported page sizes

2016-06-06 Thread Alexey Kardashevskiy

On 06/06/16 23:31, Paolo Bonzini wrote:
> 
> 
> On 02/06/2016 05:35, David Gibson wrote:
>> On Wed, Jun 01, 2016 at 06:57:37PM +1000, Alexey Kardashevskiy wrote:
 Every IOMMU has some granularity which MemoryRegionIOMMUOps::translate
 uses when translating, however this information is not available outside
 the translate context for various checks.

 This adds a get_page_sizes callback to MemoryRegionIOMMUOps and
 a wrapper for it so IOMMU users (such as VFIO) can know the actual
 page size(s) used by an IOMMU.

 As IOMMU MR represents a guest IOMMU, this uses TARGET_PAGE_SIZE
 as fallback.

 This removes vfio_container_granularity() and uses new helper in
 memory_region_iommu_replay() when replaying IOMMU mappings on added
 IOMMU memory region.

 Signed-off-by: Alexey Kardashevskiy 
 Reviewed-by: David Gibson 
>> Paolo,
>>
>> Looks like you were left off the CC for this one.
>>
>> I think this is ready to go - do you want to merge, comment or ack and
>> we'll take it either through my tree or Alex's?
> 
> It's okay for you to merge, but the callback should be called
> "get_page_size" or "get_replay_granularity".  The plural is weird.

I'll repost next time with get_page_size() then. Thanks.

-- 
Alexey

Re: [Qemu-devel] [PATCH v2 01/22] migration: Define VMSTATE_UINT64_2DARRAY



On 2016/5/26 22:55, Peter Maydell wrote:
> Define a VMSTATE_UINT64_2DARRAY macro, to go with the ones we
> already have for other type sizes.
> 
> Signed-off-by: Peter Maydell 

Reviewed-by: Shannon Zhao 

> ---
>  include/migration/vmstate.h | 6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
> index 30ecc44..aec9531 100644
> --- a/include/migration/vmstate.h
> +++ b/include/migration/vmstate.h
> @@ -846,6 +846,12 @@ extern const VMStateInfo vmstate_info_bitmap;
>  #define VMSTATE_UINT64_ARRAY(_f, _s, _n)  \
>  VMSTATE_UINT64_ARRAY_V(_f, _s, _n, 0)
>  
> +#define VMSTATE_UINT64_2DARRAY(_f, _s, _n1, _n2)  \
> +VMSTATE_UINT64_2DARRAY_V(_f, _s, _n1, _n2, 0)
> +
> +#define VMSTATE_UINT64_2DARRAY_V(_f, _s, _n1, _n2, _v) \
> +VMSTATE_2DARRAY(_f, _s, _n1, _n2, _v, vmstate_info_uint64, uint64_t)
> +
>  #define VMSTATE_INT16_ARRAY_V(_f, _s, _n, _v) \
>  VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_int16, int16_t)
>  
> 

-- 
Shannon

[Qemu-devel] [PATCH 2/9] ppc: Fix tlb invalidations on 6xx/7xx/7xxx 32-bit processors

The processor only uses some bits of the address and invalidates an
entire congruence class. Some OSes such as Darwin and HelenOS take
advantage of this and occasionally invalidate the entire TLB by just
doing a series of 64 consecutive tlbie for example.

Our code tries to be too smart here only invalidating a segment
congruence class (ie, allowing more address bits to be relevant
in the invalidation), this fails miserably on those OSes.

Instead don't bother, do like ppc64 and blow the whole tlb when tlbie
is executed.

Signed-off-by: Benjamin Herrenschmidt 
---
 target-ppc/mmu_helper.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
index f5c4e69..a5e3878 100644
--- a/target-ppc/mmu_helper.c
+++ b/target-ppc/mmu_helper.c
@@ -1969,6 +1969,11 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, 
target_ulong addr)
 /* XXX: this case should be optimized,
  * giving a mask to tlb_flush_page
  */
+/* This is broken, some CPUs invalidate a whole congruence
+ * class on an even smaller subset of bits and some OSes take
+ * advantage of this. Just blow the whole thing away.
+ */
+#if 0
 tlb_flush_page(cs, addr | (0x0 << 28));
 tlb_flush_page(cs, addr | (0x1 << 28));
 tlb_flush_page(cs, addr | (0x2 << 28));
@@ -1985,6 +1990,9 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, 
target_ulong addr)
 tlb_flush_page(cs, addr | (0xD << 28));
 tlb_flush_page(cs, addr | (0xE << 28));
 tlb_flush_page(cs, addr | (0xF << 28));
+#else
+tlb_flush(cs, 1);
+#endif
 break;
 #if defined(TARGET_PPC64)
 case POWERPC_MMU_64B:
-- 
2.5.5

[Qemu-devel] [PATCH 3/9] ppc: Batch TLB flushes on 32-bit 6xx/7xx/7xxx in hash mode

This ports the existing 64-bit mechanism to 32-bit, thus series
of 64 tlbie's followed by a sync like some versions of Darwin
(ab)use will result in a single flush.

We apply a pending flush on any sync instruction though, as Darwin
doesn't use tlbsync on non-SMP systems.

Signed-off-by: Benjamin Herrenschmidt 
---
 target-ppc/cpu.h |  2 +-
 target-ppc/helper_regs.h |  2 +-
 target-ppc/mmu_helper.c  | 44 
 target-ppc/translate.c   | 27 +--
 4 files changed, 31 insertions(+), 44 deletions(-)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index d8f8f7e..c2962d7 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -959,7 +959,6 @@ struct CPUPPCState {
 ppc_slb_t slb[MAX_SLB_ENTRIES];
 int32_t slb_nr;
 /* tcg TLB needs flush (deferred slb inval instruction typically) */
-uint32_t tlb_need_flush;
 #endif
 /* segment registers */
 hwaddr htab_base;
@@ -985,6 +984,7 @@ struct CPUPPCState {
 target_ulong pb[4];
 bool tlb_dirty;   /* Set to non-zero when modifying TLB  */
 bool kvm_sw_tlb;  /* non-zero if KVM SW TLB API is active*/
+uint32_t tlb_need_flush; /* Delayed flush needed */
 #endif
 
 /* Other registers */
diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
index 104b690..8fc0934 100644
--- a/target-ppc/helper_regs.h
+++ b/target-ppc/helper_regs.h
@@ -151,7 +151,7 @@ static inline int hreg_store_msr(CPUPPCState *env, 
target_ulong value,
 return excp;
 }
 
-#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64)
+#if !defined(CONFIG_USER_ONLY)
 static inline void check_tlb_flush(CPUPPCState *env)
 {
 CPUState *cs = CPU(ppc_env_get_cpu(env));
diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
index a5e3878..485d5b8 100644
--- a/target-ppc/mmu_helper.c
+++ b/target-ppc/mmu_helper.c
@@ -1935,8 +1935,8 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
 case POWERPC_MMU_2_06a:
 case POWERPC_MMU_2_07:
 case POWERPC_MMU_2_07a:
-env->tlb_need_flush = 0;
 #endif /* defined(TARGET_PPC64) */
+env->tlb_need_flush = 0;
 tlb_flush(CPU(cpu), 1);
 break;
 default:
@@ -1949,9 +1949,6 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
 void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
 {
 #if !defined(FLUSH_ALL_TLBS)
-PowerPCCPU *cpu = ppc_env_get_cpu(env);
-CPUState *cs;
-
 addr &= TARGET_PAGE_MASK;
 switch (env->mmu_model) {
 case POWERPC_MMU_SOFT_6xx:
@@ -1963,36 +1960,12 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, 
target_ulong addr)
 break;
 case POWERPC_MMU_32B:
 case POWERPC_MMU_601:
-/* tlbie invalidate TLBs for all segments */
-addr &= ~((target_ulong)-1ULL << 28);
-cs = CPU(cpu);
-/* XXX: this case should be optimized,
- * giving a mask to tlb_flush_page
- */
-/* This is broken, some CPUs invalidate a whole congruence
- * class on an even smaller subset of bits and some OSes take
- * advantage of this. Just blow the whole thing away.
+/* Actual CPUs invalidate entire congruence classes based on the
+ * geometry of their TLBs and some OSes take that into account,
+ * we just mark the TLB to be flushed later (context synchronizing
+ * event or sync instruction on 32-bit).
  */
-#if 0
-tlb_flush_page(cs, addr | (0x0 << 28));
-tlb_flush_page(cs, addr | (0x1 << 28));
-tlb_flush_page(cs, addr | (0x2 << 28));
-tlb_flush_page(cs, addr | (0x3 << 28));
-tlb_flush_page(cs, addr | (0x4 << 28));
-tlb_flush_page(cs, addr | (0x5 << 28));
-tlb_flush_page(cs, addr | (0x6 << 28));
-tlb_flush_page(cs, addr | (0x7 << 28));
-tlb_flush_page(cs, addr | (0x8 << 28));
-tlb_flush_page(cs, addr | (0x9 << 28));
-tlb_flush_page(cs, addr | (0xA << 28));
-tlb_flush_page(cs, addr | (0xB << 28));
-tlb_flush_page(cs, addr | (0xC << 28));
-tlb_flush_page(cs, addr | (0xD << 28));
-tlb_flush_page(cs, addr | (0xE << 28));
-tlb_flush_page(cs, addr | (0xF << 28));
-#else
-tlb_flush(cs, 1);
-#endif
+env->tlb_need_flush = 1;
 break;
 #if defined(TARGET_PPC64)
 case POWERPC_MMU_64B:
@@ -2058,13 +2031,12 @@ target_ulong helper_load_sr(CPUPPCState *env, 
target_ulong sr_num)
 
 void helper_store_sr(CPUPPCState *env, target_ulong srnum, target_ulong value)
 {
-PowerPCCPU *cpu = ppc_env_get_cpu(env);
-
 qemu_log_mask(CPU_LOG_MMU,
 "%s: reg=%d " TARGET_FMT_lx " " TARGET_FMT_lx "\n", __func__,
 (int)srnum, value, env->sr[srnum]);
 #if defined(TARGET_PPC64)
 if (env->mmu_model & POWERPC_MMU_64) {
+PowerPCCPU *cpu = ppc_env_get_cpu(env);
 uint64_t esid, vsid;
 
 /* ESID = srnum */
@@ -2093,7 +2065,7 @@ void helper_store_sr(CPUPPCState *env, target_ulong

Re: [Qemu-devel] [PATCH 0/6] Small tests/docker fixes

On Mon, 06/06 16:46, Paolo Bonzini wrote:
> Small fixes, some of them being very close to personal preference.

Looks good to me. Queued in my docker tree.

https://github.com/famz/qemu/tree/docker.next

Thanks,

Fam

[Qemu-devel] [PATCH 4/9] ppc: POWER7 had ACOP and PID registers

We only had them on POWER8, add them to POWER7 as well

Signed-off-by: Benjamin Herrenschmidt 
---
 target-ppc/translate_init.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 55f8553..ad6f2f3 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8024,6 +8024,21 @@ static void gen_spr_power8_book4(CPUPPCState *env)
 #endif
 }
 
+static void gen_spr_power7_book4(CPUPPCState *env)
+{
+/* Add a number of P7 book4 registers */
+#if !defined(CONFIG_USER_ONLY)
+spr_register_kvm(env, SPR_ACOP, "ACOP",
+ SPR_NOACCESS, SPR_NOACCESS,
+ &spr_read_generic, &spr_write_generic,
+ KVM_REG_PPC_ACOP, 0);
+spr_register_kvm(env, SPR_BOOKS_PID, "PID",
+ SPR_NOACCESS, SPR_NOACCESS,
+ &spr_read_generic, &spr_write_generic,
+ KVM_REG_PPC_PID, 0);
+#endif
+}
+
 static void init_proc_book3s_64(CPUPPCState *env, int version)
 {
 gen_spr_ne_601(env);
@@ -8066,6 +8081,9 @@ static void init_proc_book3s_64(CPUPPCState *env, int 
version)
 gen_spr_power6_common(env);
 gen_spr_power6_dbg(env);
 }
+if (version == BOOK3S_CPU_POWER7) {
+gen_spr_power7_book4(env);
+}
 if (version >= BOOK3S_CPU_POWER8) {
 gen_spr_power8_tce_address_control(env);
 gen_spr_power8_ids(env);
-- 
2.5.5

[Qemu-devel] [PATCH 8/9] ppc: Add missing slbfee. instruction on ppc64 BookS processors

Used to lookup SLB entries by address, for some reason it was missing.

Signed-off-by: Benjamin Herrenschmidt 
---
 target-ppc/helper.h |  1 +
 target-ppc/mmu-hash64.c | 30 ++
 target-ppc/translate.c  | 26 ++
 3 files changed, 57 insertions(+)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 0526322..f4410a8 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -550,6 +550,7 @@ DEF_HELPER_FLAGS_2(tlbiva, TCG_CALL_NO_RWG, void, env, tl)
 DEF_HELPER_FLAGS_3(store_slb, TCG_CALL_NO_RWG, void, env, tl, tl)
 DEF_HELPER_2(load_slb_esid, tl, env, tl)
 DEF_HELPER_2(load_slb_vsid, tl, env, tl)
+DEF_HELPER_2(find_slb_vsid, tl, env, tl)
 DEF_HELPER_FLAGS_1(slbia, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_FLAGS_2(slbie, TCG_CALL_NO_RWG, void, env, tl)
 #endif
diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index ea6e99a..668da5e 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -219,6 +219,24 @@ static int ppc_load_slb_vsid(PowerPCCPU *cpu, target_ulong 
rb,
 return 0;
 }
 
+static int ppc_find_slb_vsid(PowerPCCPU *cpu, target_ulong rb,
+ target_ulong *rt)
+{
+CPUPPCState *env = &cpu->env;
+ppc_slb_t *slb;
+
+if (!msr_is_64bit(env, env->msr)) {
+rb &= 0x;
+}
+slb = slb_lookup(cpu, rb);
+if (slb == NULL) {
+*rt = (target_ulong)-1ul;
+} else {
+*rt = slb->vsid;
+}
+return 0;
+}
+
 void helper_store_slb(CPUPPCState *env, target_ulong rb, target_ulong rs)
 {
 PowerPCCPU *cpu = ppc_env_get_cpu(env);
@@ -241,6 +259,18 @@ target_ulong helper_load_slb_esid(CPUPPCState *env, 
target_ulong rb)
 return rt;
 }
 
+target_ulong helper_find_slb_vsid(CPUPPCState *env, target_ulong rb)
+{
+PowerPCCPU *cpu = ppc_env_get_cpu(env);
+target_ulong rt = 0;
+
+if (ppc_find_slb_vsid(cpu, rb, &rt) < 0) {
+helper_raise_exception_err(env, POWERPC_EXCP_PROGRAM,
+   POWERPC_EXCP_INVAL);
+}
+return rt;
+}
+
 target_ulong helper_load_slb_vsid(CPUPPCState *env, target_ulong rb)
 {
 PowerPCCPU *cpu = ppc_env_get_cpu(env);
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 33a9223..a3de142 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -4847,6 +4847,31 @@ static void gen_slbmfev(DisasContext *ctx)
  cpu_gpr[rB(ctx->opcode)]);
 #endif
 }
+
+static void gen_slbfee_(DisasContext *ctx)
+{
+#if defined(CONFIG_USER_ONLY)
+gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
+#else
+TCGLabel *l1, *l2;
+
+if (unlikely(ctx->pr)) {
+gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
+return;
+}
+gen_helper_find_slb_vsid(cpu_gpr[rS(ctx->opcode)], cpu_env,
+ cpu_gpr[rB(ctx->opcode)]);
+l1 = gen_new_label();
+l2 = gen_new_label();
+tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
+tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rS(ctx->opcode)], -1, l1);
+tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 1 << CRF_EQ);
+tcg_gen_br(l2);
+gen_set_label(l1);
+tcg_gen_movi_tl(cpu_gpr[rS(ctx->opcode)], 0);
+gen_set_label(l2);
+#endif
+}
 #endif /* defined(TARGET_PPC64) */
 
 /***  Lookaside buffer management  ***/
@@ -9972,6 +9997,7 @@ GEN_HANDLER2(mtsrin_64b, "mtsrin", 0x1F, 0x12, 0x07, 
0x001F0001,
 GEN_HANDLER2(slbmte, "slbmte", 0x1F, 0x12, 0x0C, 0x001F0001, PPC_SEGMENT_64B),
 GEN_HANDLER2(slbmfee, "slbmfee", 0x1F, 0x13, 0x1C, 0x001F0001, 
PPC_SEGMENT_64B),
 GEN_HANDLER2(slbmfev, "slbmfev", 0x1F, 0x13, 0x1A, 0x001F0001, 
PPC_SEGMENT_64B),
+GEN_HANDLER2(slbfee_, "slbfee.", 0x1F, 0x13, 0x1E, 0x001F, 
PPC_SEGMENT_64B),
 #endif
 GEN_HANDLER(tlbia, 0x1F, 0x12, 0x0B, 0x03FFFC01, PPC_MEM_TLBIA),
 /* XXX Those instructions will need to be handled differently for
-- 
2.5.5

[Qemu-devel] [PATCH 7/9] ppc: Fix slbia decode

Since at least the 2.05 architecture, the slbia instruction takes an
IH field in the opcode to provide some control on the effect of the
slbia on the ERATs (level-1 TLB).

We can safely ignore it as we always flush the whole qemu TLB but
we should allow the bits in the decode.

Signed-off-by: Benjamin Herrenschmidt 
---
 target-ppc/translate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 3255184..33a9223 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -9980,7 +9980,7 @@ GEN_HANDLER(tlbiel, 0x1F, 0x12, 0x08, 0x001F0001, 
PPC_MEM_TLBIE),
 GEN_HANDLER(tlbie, 0x1F, 0x12, 0x09, 0x001F0001, PPC_MEM_TLBIE),
 GEN_HANDLER(tlbsync, 0x1F, 0x16, 0x11, 0x03FFF801, PPC_MEM_TLBSYNC),
 #if defined(TARGET_PPC64)
-GEN_HANDLER(slbia, 0x1F, 0x12, 0x0F, 0x03FFFC01, PPC_SLBI),
+GEN_HANDLER(slbia, 0x1F, 0x12, 0x0F, 0x031FFC01, PPC_SLBI),
 GEN_HANDLER(slbie, 0x1F, 0x12, 0x0D, 0x03FF0001, PPC_SLBI),
 #endif
 GEN_HANDLER(eciwx, 0x1F, 0x16, 0x0D, 0x0001, PPC_EXTERN),
-- 
2.5.5

Re: [Qemu-devel] [PATCH] docker: Don't use eval trick on Makefile

On Mon, 06/06 12:53, Eduardo Habkost wrote:
> The eval trick for defining DOCKER_SRC_COPY doesn't do anything
> useful, as DOCKER_SRC_COPY is immediately expanded just after it
> is defined, and CUR_TIME is already defined using ":=". Simply
> define it using ":=" so it is evaluated only once.
> 
> The eval trick was also triggering an weird error on Travis builds:
>   qemu/tests/docker/Makefile.include:34: *** unterminated variable reference. 
>  Stop.
> 
> The issue is not easily reproducible (maybe it's a bug in some
> versions of Make), but it is avoided if removing the eval trick.
> 
> Signed-off-by: Eduardo Habkost 
> ---
>  tests/docker/Makefile.include | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include
> index 2fd2ca3..134dc6f 100644
> --- a/tests/docker/Makefile.include
> +++ b/tests/docker/Makefile.include
> @@ -28,8 +28,7 @@ make-archive-maybe = $(if $(wildcard $1/*), \
>   "  ARCHIVE $(notdir $2)"))
>  
>  CUR_TIME := $(shell date +%Y-%m-%d-%H.%M.%S.)
> -# Makes the definition constant after the first expansion
> -DOCKER_SRC_COPY = $(eval DOCKER_SRC_COPY := 
> docker-src.$(CUR_TIME))$(DOCKER_SRC_COPY)
> +DOCKER_SRC_COPY := docker-src.$(CUR_TIME)
>  
>  $(DOCKER_SRC_COPY):
>   @mkdir $@
> -- 
> 2.5.5
> 

Applied, thanks.

Fam

Re: [Qemu-devel] [PATCH v3 1/3] IOMMU: add VTD_CAP_CM to vIOMMU capability exposed to guest

2016-06-06 Thread Peter Xu

On Mon, Jun 06, 2016 at 11:02:11AM -0600, Alex Williamson wrote:
> On Mon, 6 Jun 2016 21:43:17 +0800
> Peter Xu  wrote:
> 
> > On Mon, Jun 06, 2016 at 07:11:41AM -0600, Alex Williamson wrote:
> > > On Mon, 6 Jun 2016 13:04:07 +0800
> > > Peter Xu  wrote:  
> > [...]
> > > > Besides the reason that there might have guests that do not support
> > > > CM=1, will there be performance considerations? When user's
> > > > configuration does not require CM capability (e.g., generic VM
> > > > configuration, without VFIO), shall we allow user to disable the CM
> > > > bit so that we can have better IOMMU performance (avoid extra and
> > > > useless invalidations)?  
> > > 
> > > With Alexey's proposed patch to have callback ops when the iommu
> > > notifier list adds its first entry and removes its last, any of the
> > > additional overhead to generate notifies when nobody is listening can
> > > be avoided.  These same callbacks would be the ones that need to
> > > generate a hw_error if a notifier is added while running in CM=0.  
> > 
> > Not familar with Alexey's patch
> 
> https://lists.nongnu.org/archive/html/qemu-devel/2016-06/msg00079.html

Thanks for the pointer. :)

> 
> >, but is that for VFIO only?
> 
> vfio is currently the only user of the iommu notifier, but the
> interface is generic, which is how it should (must) be.

Yes.

> 
> > I mean, if
> > we configured CMbit=1, guest kernel will send invalidation request
> > every time it creates new entries (context entries, or iotlb
> > entries). Even without VFIO notifiers, guest need to trap into QEMU
> > and process the invalidation requests. This is avoidable if we are not
> > using VFIO devices at all (so no need to maintain any mappings),
> > right?
> 
> CM=1 only defines that not-present and invalid entries can be cached,
> any changes to existing entries requires an invalidation regardless of
> CM.  What you're looking for sounds more like ECAP.C:

Yes, but I guess what I was talking about is CM bit but not ECAP.C.
When we clear/replace one context entry, guest kernel will definitely
send one context entry invalidation to QEMU:

static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 
devfn)
{
if (!iommu)
return;

clear_context_table(iommu, bus, devfn);
iommu->flush.flush_context(iommu, 0, 0, 0,
   DMA_CCMD_GLOBAL_INVL);
iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
}

... While if we are creating a new one (like attaching a new VFIO
device?), it's an optional behavior depending on whether CM bit is
set:

static int domain_context_mapping_one(struct dmar_domain *domain,
  struct intel_iommu *iommu,
  u8 bus, u8 devfn)
{
...
/*
 * It's a non-present to present mapping. If hardware doesn't cache
 * non-present entry we only need to flush the write-buffer. If the
 * _does_ cache non-present entries, then it does so in the special
 * domain #0, which we have to flush:
 */
if (cap_caching_mode(iommu->cap)) {
iommu->flush.flush_context(iommu, 0,
   (((u16)bus) << 8) | devfn,
   DMA_CCMD_MASK_NOBIT,
   DMA_CCMD_DEVICE_INVL);
iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
} else {
iommu_flush_write_buffer(iommu);
}
...
}

Only if cap_caching_mode() is set (which is bit 7, the CM bit), we
will send these invalidations. What I meant is that, we should allow
user to specify the CM bit, so that when we are not using VFIO
devices, we can skip the above flush_content() and flush_iotlb()
etc... So, besides the truth that we have some guests do not support
CM bit (like Jailhouse), performance might be another consideration
point that we should allow user to specify the CM bit themselfs.

> 
> C: Page-walk Coherency
>   This field indicates if hardware access to the root, context,
>   extended-context and interrupt-remap tables, and second-level paging
>   structures for requests-without PASID, are coherent (snooped) or not.
> • 0: Indicates hardware accesses to remapping structures are non-coherent.
> • 1: Indicates hardware accesses to remapping structures are coherent.
> 
> Without both CM=0 and C=0, our only virtualization mechanism for
> maintaining a hardware cache coherent with the guest view of the iommu
> would be to shadow all of the VT-d structures.  For purely emulated
> devices, maybe we can get away with that, but I doubt the current
> ghashes used for the iotlb are prepared for it.

Actually I haven't noticed this bit yet. I see that this will decide
whether guest kernel need to send specific clflush() when modifying
IOMMU PTEs, but shouldn't we flush the memory cache always so that we
can s

Re: [Qemu-devel] [PATCH qemu v17 07/12] vfio: spapr: Add DMA memory preregistering (SPAPR IOMMU v2)

2016-06-06 Thread Alexey Kardashevskiy

On 07/06/16 03:20, Alex Williamson wrote:
> On Mon, 6 Jun 2016 16:04:57 +1000
> Alexey Kardashevskiy  wrote:
> 
>> On 04/06/16 02:13, Alex Williamson wrote:
>>> On Wed,  1 Jun 2016 18:57:38 +1000
>>> Alexey Kardashevskiy  wrote:
>>>   
 This makes use of the new "memory registering" feature. The idea is
 to provide the userspace ability to notify the host kernel about pages
 which are going to be used for DMA. Having this information, the host
 kernel can pin them all once per user process, do locked pages
 accounting (once) and not spent time on doing that in real time with
 possible failures which cannot be handled nicely in some cases.

 This adds a prereg memory listener which listens on address_space_memory
 and notifies a VFIO container about memory which needs to be
 pinned/unpinned. VFIO MMIO regions (i.e. "skip dump" regions) are skipped.

 As there is no per-IOMMU-type release() callback anymore, this stores
 the IOMMU type in the container so vfio_listener_release() can determine
 if it needs to unregister @prereg_listener.

 The feature is only enabled for SPAPR IOMMU v2. The host kernel changes
 are required. Since v2 does not need/support VFIO_IOMMU_ENABLE, this does
 not call it when v2 is detected and enabled.

 This enforces guest RAM blocks to be host page size aligned; however
 this is not new as KVM already requires memory slots to be host page
 size aligned.

 Signed-off-by: Alexey Kardashevskiy 
 ---
 Changes:
 v17:
 * s/prereg\.c/spapr.c/
 * s/vfio_prereg_gpa_to_ua/vfio_prereg_gpa_to_vaddr/
 * vfio_prereg_listener_skipped_section does hw_error() on IOMMUs

 v16:
 * switched to 64bit math everywhere as there is no chance to see
 region_add on RAM blocks even remotely close to 1<<64bytes.

 v15:
 * banned unaligned sections
 * added an vfio_prereg_gpa_to_ua() helper

 v14:
 * s/free_container_exit/listener_release_exit/g
 * added "if memory_region_is_iommu()" to 
 vfio_prereg_listener_skipped_section
 ---
  hw/vfio/Makefile.objs |   1 +
  hw/vfio/common.c  |  38 +---
  hw/vfio/spapr.c   | 137 
 ++
  include/hw/vfio/vfio-common.h |   4 ++
  trace-events  |   2 +
  5 files changed, 172 insertions(+), 10 deletions(-)
  create mode 100644 hw/vfio/spapr.c

 diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs
 index ceddbb8..c25e32b 100644
 --- a/hw/vfio/Makefile.objs
 +++ b/hw/vfio/Makefile.objs
 @@ -4,4 +4,5 @@ obj-$(CONFIG_PCI) += pci.o pci-quirks.o
  obj-$(CONFIG_SOFTMMU) += platform.o
  obj-$(CONFIG_SOFTMMU) += calxeda-xgmac.o
  obj-$(CONFIG_SOFTMMU) += amd-xgbe.o
 +obj-$(CONFIG_SOFTMMU) += spapr.o
  endif
 diff --git a/hw/vfio/common.c b/hw/vfio/common.c
 index f1a12b0..770f630 100644
 --- a/hw/vfio/common.c
 +++ b/hw/vfio/common.c
 @@ -504,6 +504,9 @@ static const MemoryListener vfio_memory_listener = {
  static void vfio_listener_release(VFIOContainer *container)
  {
  memory_listener_unregister(&container->listener);
 +if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
 +memory_listener_unregister(&container->prereg_listener);
 +}
  }
  
  static struct vfio_info_cap_header *
 @@ -862,8 +865,8 @@ static int vfio_connect_container(VFIOGroup *group, 
 AddressSpace *as)
  goto free_container_exit;
  }
  
 -ret = ioctl(fd, VFIO_SET_IOMMU,
 -v2 ? VFIO_TYPE1v2_IOMMU : VFIO_TYPE1_IOMMU);
 +container->iommu_type = v2 ? VFIO_TYPE1v2_IOMMU : 
 VFIO_TYPE1_IOMMU;
 +ret = ioctl(fd, VFIO_SET_IOMMU, container->iommu_type);
  if (ret) {
  error_report("vfio: failed to set iommu for container: %m");
  ret = -errno;
 @@ -888,8 +891,10 @@ static int vfio_connect_container(VFIOGroup *group, 
 AddressSpace *as)
  if ((ret == 0) && (info.flags & VFIO_IOMMU_INFO_PGSIZES)) {
  container->iova_pgsizes = info.iova_pgsizes;
  }
 -} else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU)) {
 +} else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU) ||
 +   ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_v2_IOMMU)) {
  struct vfio_iommu_spapr_tce_info info;
 +bool v2 = !!ioctl(fd, VFIO_CHECK_EXTENSION, 
 VFIO_SPAPR_TCE_v2_IOMMU);
  
  ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
  if (ret) {
 @@ -897,7 +902,9 @@ static int vfio_connect_container(VFIOGroup *group, 
 AddressSpace *as)
  ret = -errno;
  goto free_container_exit;
  }

Re: [Qemu-devel] [PATCH 2/2] Explicitly print out default vnc option in use

2016-06-06 Thread Robert Hu

On Tue, 2016-06-07 at 08:28 +0800, Robert Hu wrote:
> On Mon, 2016-06-06 at 09:28 +0200, Markus Armbruster wrote:
> > Robert Hu  writes:
> > 
> > > On Tue, 2016-05-31 at 13:17 +0200, Markus Armbruster wrote:
> > >> Robert Hu  writes:
> > >> 
> > >> > On Tue, 2016-05-31 at 09:51 +0200, Markus Armbruster wrote:
> [trim...]
> > > I don't see a './configure' option related to this '-vnc to' param. Is
> > > there any?
> > > '--help', you mean 'qemu-system_x86-64 --help'? or './configure --help'?
> [seems repeated contents, trim...]
> > > I don't see a './configure' option related to this '-vnc to' param. Is
> > > there any?
> > > '--help', you mean 'qemu-system_x86-64 --help'? or './configure --help'?
> > 
> > The former.
> > 
> > The modern way to select a display is -display.  The older -nographic,
> > -curses, -sdl are retained for backward compatibility.
> > 
> > Relevant parts of -help:
> > 
> > Display options:
> > -display sdl[,frame=on|off][,alt_grab=on|off][,ctrl_grab=on|off]
> > [,window_close=on|off]|curses|none|
> > gtk[,grab_on_hover=on|off]|
> > vnc=[,]
> > select display type
> > -nographic  disable graphical output and redirect serial I/Os to 
> > console
> > -curses use a curses/ncurses interface instead of SDL
> > [...]
> > -sdlenable SDL
> > [...]
> > -vnc displaystart a VNC server on display
> > 
> > Issues:
> > 
> > * Help for -display is broken: the mutually exclusive option arguments
> >   are concatenated.  -display curses and -display none are undocumented.
> >   It should look more like this:
> > 
> > -display sdl[,frame=on|off][,alt_grab=on|off][,ctrl_grab=on|off]
> > [,window_close=on|off]|curses|none|
> > -display gtk[,grab_on_hover=on|off]|
> > -display vnc=[,]
> > -display curses
> > -display none
> > select display type
> > 
> > * -display sdl,gl=on|off and -display gtk,gl=on|off are undocumented
> >(missed in commit 0b71a5d5c and 97edf3b).
> > 
> > * There is no help on the  in -display vnc=.
> > 
> > * There is no help on the default.  main() picks the default depending
> >   on configure options:
> > 
> > #if defined(CONFIG_GTK)
> > display_type = DT_GTK;
> > #elif defined(CONFIG_SDL)
> > display_type = DT_SDL;
> > #elif defined(CONFIG_COCOA)
> > display_type = DT_COCOA;
> > #elif defined(CONFIG_VNC)
> > vnc_parse("localhost:0,to=99,id=default", &error_abort);
> > show_vnc_port = 1;
> > #else
> > display_type = DT_NONE;
> > #endif
> > 
> >   Help should show the default this binary will pick.  This is what I
> >   meant by "Ideally, --help output would show the defaults for this
> >   build's configuration."
> > 
> > * Help should explain syntacic sugar:
> >   -curses is sugar for -display curses
> >   -sdl is sugar for -display sdl
> >   -vnc display is sugar for -display vnc=display
> > 
> >   -nographic is also sugar, but too complicated to explain; I'd leave it
> >   as is.
> > 
> > Non-issue
> > 
> > * Help shows options even when they're not compiled in.  That's okay,
> >   because trying to use them fails with an "FOO support is disabled"
> >   error message.
> > 
> > >> If we decide users need more information than the current "VNC server
> > >> running on" line, perhaps it should be included right in that line.
> > 
> > This would complement, but not replace better -help ouput.
> > 
> > If you would like to work on these issues, let us know.
> 
> OK, if not in a hurry and assuming this is not a huge amount of work.
> I also need to look into the build arch so that completely understand
> your 'the default this binary will pick', till now I don't.
> 
> Another concern is that I'm not a native English speaker, so those
> description words may not be that apt and concise.
> 
> Meanwhile, this is another work extended from the original patch. How
> about accept the patch 1 first? as you and Paolo both think it is OK.
> Ought I rework a version 2 of single patch 1? or not necessary?
Oh, I see Paolo already get it in. Thanks!
> 
>

[Qemu-devel] [PATCH 1/9] ppc: Properly tag the translation cache based on MMU mode

We used to always flush the TLB when changing relocation mode in
MSR:IR and MSR:DR (ie. MMU on/off for Instructions and Data).

We don't anymore since we have split mmu_idx for instruction and data.

However, since we hard code the mmu_idx in the translated code, we
now need to also make sure MSR:IR and MSR:DR are part of the hflags
used to tag translated code, so that we use different translated
code for different MMU settings.

Darwin gets hurt by this problem.

Signed-off-by: Benjamin Herrenschmidt 
---
 target-ppc/helper_regs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
index 12af61c..104b690 100644
--- a/target-ppc/helper_regs.h
+++ b/target-ppc/helper_regs.h
@@ -95,7 +95,7 @@ static inline void hreg_compute_hflags(CPUPPCState *env)
 /* We 'forget' FE0 & FE1: we'll never generate imprecise exceptions */
 hflags_mask = (1 << MSR_VR) | (1 << MSR_AP) | (1 << MSR_SA) |
 (1 << MSR_PR) | (1 << MSR_FP) | (1 << MSR_SE) | (1 << MSR_BE) |
-(1 << MSR_LE) | (1 << MSR_VSX);
+(1 << MSR_LE) | (1 << MSR_VSX) | (1 << MSR_IR) | (1 << MSR_DR);
 hflags_mask |= (1ULL << MSR_CM) | (1ULL << MSR_SF) | MSR_HVB;
 hreg_compute_mem_idx(env);
 env->hflags = env->msr & hflags_mask;
-- 
2.5.5

Re: [Qemu-devel] [RFC PATCH v4 1/3] Mediated device Core driver

2016-06-06 Thread Tian, Kevin

> From: Alex Williamson [mailto:alex.william...@redhat.com]
> Sent: Tuesday, June 07, 2016 3:31 AM
> 
> On Mon, 6 Jun 2016 10:44:25 -0700
> Neo Jia  wrote:
> 
> > On Mon, Jun 06, 2016 at 04:29:11PM +0800, Dong Jia wrote:
> > > On Sun, 5 Jun 2016 23:27:42 -0700
> > > Neo Jia  wrote:
> > >
> > > 2. VFIO_DEVICE_CCW_CMD_REQUEST
> > > This intends to handle an intercepted channel I/O instruction. It
> > > basically need to do the following thing:
> >
> > May I ask how and when QEMU knows that he needs to issue such VFIO ioctl at
> > first place?
> 
> Yep, this is my question as well.  It sounds a bit like there's an
> emulated device in QEMU that's trying to tell the mediated device when
> to start an operation when we probably should be passing through
> whatever i/o operations indicate that status directly to the mediated
> device. Thanks,
> 
> Alex

Below is copied from Dong's earlier post which said clear that
a guest cmd submission will trigger the whole flow:

Explanation:
Q1-Q4: Qemu side process.
K1-K6: Kernel side process.

Q1. Intercept a ssch instruction.
Q2. Translate the guest ccw program to a user space ccw program
(u_ccwchain).
Q3. Call VFIO_DEVICE_CCW_CMD_REQUEST (u_ccwchain, orb, irb).
K1. Copy from u_ccwchain to kernel (k_ccwchain).
K2. Translate the user space ccw program to a kernel space ccw
program, which becomes runnable for a real device.
K3. With the necessary information contained in the orb passed in
by Qemu, issue the k_ccwchain to the device, and wait event q
for the I/O result.
K4. Interrupt handler gets the I/O result, and wakes up the wait q.
K5. CMD_REQUEST ioctl gets the I/O result, and uses the result to
update the user space irb.
K6. Copy irb and scsw back to user space.
Q4. Update the irb for the guest.

My understanding is that such thing belongs to how device is mediated
(so device driver specific), instead of something to be abstracted in 
VFIO which manages resource but doesn't care how resource is used.

Actually we have same requirement in vGPU case, that a guest driver 
needs submit GPU commands through some MMIO register. vGPU device 
model will intercept the submission request (in its own way), do its 
necessary scan/audit to ensure correctness/security, and then submit 
to physical GPU through vendor specific interface. 

No difference with channel I/O here.

Thanks
Kevin

[Qemu-devel] [PATCH 9/9] ppc: Do not take exceptions on unknown SPRs in privileged mode

The architecture specifies that mtspr/mfspr on an unknown SPR number
should act as a nop in privileged mode.

I haven't removed the warning however as it can be useful for
diagnosing.

Signed-off-by: Benjamin Herrenschmidt 
---
 target-ppc/translate.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index a3de142..dd0ecc8 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -4351,7 +4351,10 @@ static inline void gen_op_mfspr(DisasContext *ctx)
 qemu_log("Trying to read invalid spr %d (0x%03x) at "
  TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
 }
-gen_inval_exception(ctx, POWERPC_EXCP_INVAL_SPR);
+/* Only generate an exception in user space, otherwise this is a nop */
+if (ctx->pr) {
+gen_inval_exception(ctx, POWERPC_EXCP_INVAL_SPR);
+}
 }
 }
 
@@ -4503,7 +4506,11 @@ static void gen_mtspr(DisasContext *ctx)
 }
 fprintf(stderr, "Trying to write invalid spr %d (0x%03x) at "
 TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
-gen_inval_exception(ctx, POWERPC_EXCP_INVAL_SPR);
+
+/* Only generate an exception in user space, otherwise this is a nop */
+if (ctx->pr) {
+gen_inval_exception(ctx, POWERPC_EXCP_INVAL_SPR);
+}
 }
 }
 
-- 
2.5.5

[Qemu-devel] [PATCH 6/9] ppc: Fix mtmsr decoding

We had code to handle the L bit in the opcode but we didn't
allow it in the decode mask.

Signed-off-by: Benjamin Herrenschmidt 
---
 target-ppc/translate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index b34289f..3255184 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -9944,7 +9944,7 @@ GEN_HANDLER(mtcrf, 0x1F, 0x10, 0x04, 0x0801, 
PPC_MISC),
 #if defined(TARGET_PPC64)
 GEN_HANDLER(mtmsrd, 0x1F, 0x12, 0x05, 0x001EF801, PPC_64B),
 #endif
-GEN_HANDLER(mtmsr, 0x1F, 0x12, 0x04, 0x001FF801, PPC_MISC),
+GEN_HANDLER(mtmsr, 0x1F, 0x12, 0x04, 0x001EF801, PPC_MISC),
 GEN_HANDLER(mtspr, 0x1F, 0x13, 0x0E, 0x, PPC_MISC),
 GEN_HANDLER(dcbf, 0x1F, 0x16, 0x02, 0x03C1, PPC_CACHE),
 GEN_HANDLER(dcbi, 0x1F, 0x16, 0x0E, 0x03E1, PPC_CACHE),
-- 
2.5.5

[Qemu-devel] [PATCH 5/9] ppc: POWER7 has lq/stq instructions and stq need to check ISA

The PPC_64BX instruction flag is used for a couple of newer
instructions currently on POWER8 but our implementation for
them works for POWER7 too (and already does the proper checking
of what is permitted) with one exception: stq needs to check
the ISA version.

This fixes the latter and add the instructions to POWER7

Signed-off-by: Benjamin Herrenschmidt 
---
 target-ppc/translate.c  | 5 -
 target-ppc/translate_init.c | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index ab5862f..b34289f 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -3047,10 +3047,13 @@ static void gen_std(DisasContext *ctx)
 
 rs = rS(ctx->opcode);
 if ((ctx->opcode & 0x3) == 0x2) { /* stq */
-
 bool legal_in_user_mode = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;
 bool le_is_supported = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;
 
+if (!(ctx->insns_flags & PPC_64BX)) {
+gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
+}
+
 if (!legal_in_user_mode && ctx->pr) {
 gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 return;
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index ad6f2f3..a1db500 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8377,7 +8377,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
PPC_MEM_SYNC | PPC_MEM_EIEIO |
PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-   PPC_64B | PPC_64H | PPC_ALTIVEC |
+   PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
PPC_SEGMENT_64B | PPC_SLBI |
PPC_POPCNTB | PPC_POPCNTWD;
 pcc->insns_flags2 = PPC2_VSX | PPC2_DFP | PPC2_DBRX | PPC2_ISA205 |
-- 
2.5.5

[Qemu-devel] [PATCH v5 1/3] target-arm: kvm64: set guest PMUv3 feature bit if supported

From: Shannon Zhao 

Check if kvm supports guest PMUv3. If so, set the corresponding feature
bit for vcpu.

Signed-off-by: Shannon Zhao 
Reviewed-by: Andrew Jones 
---
 target-arm/cpu.h   | 2 ++
 target-arm/kvm64.c | 5 +
 2 files changed, 7 insertions(+)

diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 401955f..fa6388f 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -574,6 +574,8 @@ struct ARMCPU {
 bool powered_off;
 /* CPU has security extension */
 bool has_el3;
+/* CPU has PMU (Performance Monitor Unit) */
+bool has_pmu;
 
 /* CPU has memory protection unit */
 bool has_mpu;
diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
index e2a34f6..75383c8 100644
--- a/target-arm/kvm64.c
+++ b/target-arm/kvm64.c
@@ -461,6 +461,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
 if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT;
 }
+if (kvm_irqchip_in_kernel() &&
+kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) {
+cpu->has_pmu = true;
+cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3;
+}
 
 /* Do KVM_ARM_VCPU_INIT ioctl */
 ret = kvm_arm_vcpu_init(cs);
-- 
2.0.4

[Qemu-devel] [PATCH v5 3/3] hw/arm/virt-acpi-build: Add PMU IRQ number in ACPI table

From: Shannon Zhao 

Add PMU IRQ number in ACPI table, then we can use PMU in guest through
ACPI.

Signed-off-by: Shannon Zhao 
Reviewed-by: Andrew Jones 
---
 hw/arm/virt-acpi-build.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 83a5420..2586769 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -539,6 +539,10 @@ build_madt(GArray *table_data, GArray *linker, 
VirtGuestInfo *guest_info)
 gicc->arm_mpidr = armcpu->mp_affinity;
 gicc->uid = i;
 gicc->flags = cpu_to_le32(ACPI_GICC_ENABLED);
+
+if (armcpu->has_pmu) {
+gicc->performance_interrupt = cpu_to_le32(PPI(VIRTUAL_PMU_IRQ));
+}
 }
 
 if (guest_info->gic_version == 3) {
-- 
2.0.4

[Qemu-devel] [PATCH v5 2/3] hw/arm/virt: Add PMU node for virt machine

From: Shannon Zhao 

Add a virtual PMU device for virt machine while use PPI 7 for PMU
overflow interrupt number.

Signed-off-by: Shannon Zhao 
Reviewed-by: Andrew Jones 
---
 hw/arm/virt.c | 33 +
 include/hw/arm/virt.h |  4 
 target-arm/kvm32.c|  6 ++
 target-arm/kvm64.c| 41 +
 target-arm/kvm_arm.h  |  7 +++
 5 files changed, 91 insertions(+)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 8e46137..f5ffe65 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -436,6 +436,37 @@ static void fdt_add_gic_node(VirtBoardInfo *vbi, int type)
 qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", vbi->gic_phandle);
 }
 
+static void fdt_add_pmu_nodes(const VirtBoardInfo *vbi, int gictype)
+{
+CPUState *cpu;
+ARMCPU *armcpu;
+uint32_t irqflags = GIC_FDT_IRQ_FLAGS_LEVEL_HI;
+
+CPU_FOREACH(cpu) {
+armcpu = ARM_CPU(cpu);
+if (!armcpu->has_pmu ||
+!kvm_arm_pmu_create(cpu, PPI(VIRTUAL_PMU_IRQ))) {
+return;
+}
+}
+
+if (gictype == 2) {
+irqflags = deposit32(irqflags, GIC_FDT_IRQ_PPI_CPU_START,
+ GIC_FDT_IRQ_PPI_CPU_WIDTH,
+ (1 << vbi->smp_cpus) - 1);
+}
+
+armcpu = ARM_CPU(qemu_get_cpu(0));
+qemu_fdt_add_subnode(vbi->fdt, "/pmu");
+if (arm_feature(&armcpu->env, ARM_FEATURE_V8)) {
+const char compat[] = "arm,armv8-pmuv3";
+qemu_fdt_setprop(vbi->fdt, "/pmu", "compatible",
+ compat, sizeof(compat));
+qemu_fdt_setprop_cells(vbi->fdt, "/pmu", "interrupts",
+   GIC_FDT_IRQ_TYPE_PPI, VIRTUAL_PMU_IRQ, 
irqflags);
+}
+}
+
 static void create_v2m(VirtBoardInfo *vbi, qemu_irq *pic)
 {
 int i;
@@ -1259,6 +1290,8 @@ static void machvirt_init(MachineState *machine)
 
 create_gic(vbi, pic, gic_version, vms->secure);
 
+fdt_add_pmu_nodes(vbi, gic_version);
+
 create_uart(vbi, pic, VIRT_UART, sysmem);
 
 if (vms->secure) {
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 82703d2..9650193 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -41,6 +41,10 @@
 #define ARCH_TIMER_NS_EL1_IRQ 14
 #define ARCH_TIMER_NS_EL2_IRQ 10
 
+#define VIRTUAL_PMU_IRQ 7
+
+#define PPI(irq) ((irq) + 16)
+
 enum {
 VIRT_FLASH,
 VIRT_MEM,
diff --git a/target-arm/kvm32.c b/target-arm/kvm32.c
index c03e3e5..c35c676 100644
--- a/target-arm/kvm32.c
+++ b/target-arm/kvm32.c
@@ -522,3 +522,9 @@ bool kvm_arm_hw_debug_active(CPUState *cs)
 {
 return false;
 }
+
+int kvm_arm_pmu_create(CPUState *cs, int irq)
+{
+qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
+return 0;
+}
diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
index 75383c8..2d6a310 100644
--- a/target-arm/kvm64.c
+++ b/target-arm/kvm64.c
@@ -382,6 +382,47 @@ static CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, 
target_ulong addr)
 return NULL;
 }
 
+static bool kvm_arm_pmu_support_ctrl(CPUState *cs, struct kvm_device_attr 
*attr)
+{
+return kvm_vcpu_ioctl(cs, KVM_HAS_DEVICE_ATTR, attr) == 0;
+}
+
+int kvm_arm_pmu_create(CPUState *cs, int irq)
+{
+int err;
+
+struct kvm_device_attr attr = {
+.group = KVM_ARM_VCPU_PMU_V3_CTRL,
+.addr = (intptr_t)&irq,
+.attr = KVM_ARM_VCPU_PMU_V3_IRQ,
+.flags = 0,
+};
+
+if (!kvm_arm_pmu_support_ctrl(cs, &attr)) {
+return 0;
+}
+
+err = kvm_vcpu_ioctl(cs, KVM_SET_DEVICE_ATTR, &attr);
+if (err < 0) {
+fprintf(stderr, "KVM_SET_DEVICE_ATTR failed: %s\n",
+strerror(-err));
+abort();
+}
+
+attr.group = KVM_ARM_VCPU_PMU_V3_CTRL;
+attr.attr = KVM_ARM_VCPU_PMU_V3_INIT;
+attr.addr = 0;
+attr.flags = 0;
+
+err = kvm_vcpu_ioctl(cs, KVM_SET_DEVICE_ATTR, &attr);
+if (err < 0) {
+fprintf(stderr, "KVM_SET_DEVICE_ATTR failed: %s\n",
+strerror(-err));
+abort();
+}
+
+return 1;
+}
 
 static inline void set_feature(uint64_t *features, int feature)
 {
diff --git a/target-arm/kvm_arm.h b/target-arm/kvm_arm.h
index 345233c..a419368 100644
--- a/target-arm/kvm_arm.h
+++ b/target-arm/kvm_arm.h
@@ -194,6 +194,8 @@ int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu);
 
 int kvm_arm_vgic_probe(void);
 
+int kvm_arm_pmu_create(CPUState *cs, int irq);
+
 #else
 
 static inline int kvm_arm_vgic_probe(void)
@@ -201,6 +203,11 @@ static inline int kvm_arm_vgic_probe(void)
 return 0;
 }
 
+static inline int kvm_arm_pmu_create(CPUState *cs, int irq)
+{
+return 0;
+}
+
 #endif
 
 static inline const char *gic_class_name(void)
-- 
2.0.4

Re: [Qemu-devel] [RFC PATCH v3 3/3] VFIO Type1 IOMMU change: to support with iommu and without iommu

2016-06-06 Thread Tian, Kevin

> From: Dong Jia
> Sent: Monday, June 06, 2016 2:59 PM
> 

[...]

> Channel I/O is quite different to PCI, so I copied some more details
> here. Hope these could help.
> 
> Channel subsystem:
> The channel subsystem directs the flow of information between I/O devices
> and main storage. It relieves CPUs of the task of communicating directly
> with I/O devices and permits data processing to proceed concurrently with
> I/O processing.
> 
> Channel path:
> The channel subsystem uses one or more channel paths as the communication
> link in managing the flow of information to or from I/O devices.
> 
> Subchannel:
> Within the channel subsystem are subchannels. One subchannel of type I/O
> is provided for and dedicated to each I/O device accessible to the channel
> 
> subsystem.
> 
> Control unit:
> A control unit implements a standardized interface which translates between
> the Channel Subsystem and the actual device. It does this by adapting the
> characteristics of each device so that it can respond to the standard form
> of control provided by the channel subsystem.
> 
> Channel Path:
> The channel subsystem communicates with the I/O devices by means of
> channel paths between the channel subsystem and control units.
> 
> +---+
> | channel subsystem |
> +---+
> |   |
> |   +--+|  +--+++
> |   |subchannel|| channel path | control unit || I/O device |
> |   +---+
> |   | subchno  ||  |  ||devno   |
> |   +--+|  +--+++
> |   |
> +---+
> 
> There is no concept of ccw-device registers by the subchannel. Control
> unit will interact with the device, collect the I/O result, and inform
> the result to the subchannel.
> So it seems to me, there is no needs to provide region info for
> isolation. As mentioned above, the isolation is quite natural.
> 
> Please correct me in case I misunderstood some of the concepts in your
> questions and gave irrelevant answers. :>
> 

Thanks for above background which is very useful. Several follow-up Qs:

1) Does it mean that VFIO is managing resource in subchannel level, so Qemu
can only operate subchannels assigned to itself (then emulate as the complete
channel io sub-system to guest)?

2) How are ccw commands associated with a subchannel? Are they submitted
through a dedicated subchannel interface (so VFIO can easily map that interface)
or that subchannel is specified by a special ccw cmd (means VFIO-ccw needs
to scan cmds to avoid malicious attempts on non-assigned subchannels)?

Thanks
Kevin

[Qemu-devel] [PATCH v5 0/3] Add guest PMU in machine virt

From: Shannon Zhao 

KVM-ARM64 supports guest PMU now. This series add the support in machine
virt so that guest could use PMU.

The ACPI part is tested with below guest kernel patches.
https://lkml.org/lkml/2016/4/12/755

Changes since v4:
* fix building failure due to kvm_arm_pmu_create()
* rebase on master

Changes since v3:
* if kvm_arm_pmu_create returns a failure, don't create pmu dts node for
  guest

Changes since v2:
* address Andrew's comments on PATCH 2, thanks

Changes since v1:
* rebase on master
* Address Andrew's comments, add a macro PPI, fix code style, add
  cpu_to_le32()

Shannon Zhao (3):
  target-arm: kvm64: set guest PMUv3 feature bit if supported
  hw/arm/virt: Add PMU node for virt machine
  hw/arm/virt-acpi-build: Add PMU IRQ number in ACPI table

 hw/arm/virt-acpi-build.c |  4 
 hw/arm/virt.c| 33 +
 include/hw/arm/virt.h|  4 
 target-arm/cpu.h |  2 ++
 target-arm/kvm32.c   |  6 ++
 target-arm/kvm64.c   | 46 ++
 target-arm/kvm_arm.h |  7 +++
 7 files changed, 102 insertions(+)

-- 
2.0.4

Re: [Qemu-devel] coroutines: block: Co-routine re-entered recursively when migrating disk with iothreads

On Mon, 06/06 14:55, Jason J. Herne wrote:
> > I'll see if I can reproduce it here.
> > 
> > Fam
> > 
> 
> Hi Fam,
> Have you had any luck reproducing this?

No I cannot reproduce so far.

Re: [Qemu-devel] [PATCH 1/6] tests/docker: fix make-archive-maybe

On Mon, 06/06 16:46, Paolo Bonzini wrote:
> make-archive-maybe expects an archive path relative
> to $1, but receives a path relative to the current directory.  Redirect
> the output outside the subshell to bypass the "cd $1".
> 
> Signed-off-by: Paolo Bonzini 
> ---
>  tests/docker/Makefile.include | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include
> index 2fd2ca3..d0ad36c 100644
> --- a/tests/docker/Makefile.include
> +++ b/tests/docker/Makefile.include
> @@ -21,10 +21,10 @@ IMAGES ?= %
>  make-archive-maybe = $(if $(wildcard $1/*), \
>   $(call quiet-command, \
>   (cd $1; if git diff-index --quiet HEAD -- &>/dev/null; then \
> - git archive -1 HEAD --format=tar.gz -o $2; \
> + git archive -1 HEAD --format=tar.gz; \
>   else \
> - git archive -1 $$(git stash create) --format=tar.gz -o 
> $2; \
> - fi), \
> + git archive -1 $$(git stash create) --format=tar.gz; \
> + fi) > $2, \
>   "  ARCHIVE $(notdir $2)"))
>  
>  CUR_TIME := $(shell date +%Y-%m-%d-%H.%M.%S.)
> -- 
> 1.8.3.1
> 
> 

I think this is better than my patch because it also reduces the duplication
between the two "git archive" commands a little. So I'm dropping mine.

Fam

Re: [Qemu-devel] [PATCH v4 2/3] hw/arm/virt: Add PMU node for virt machine



On 2016/6/6 23:59, Peter Maydell wrote:
> On 26 April 2016 at 12:40, Shannon Zhao  wrote:
>> > From: Shannon Zhao 
>> >
>> > Add a virtual PMU device for virt machine while use PPI 7 for PMU
>> > overflow interrupt number.
>> >
>> > Signed-off-by: Shannon Zhao 
>> > diff --git a/stubs/kvm.c b/stubs/kvm.c
>> > index ddd6204..667e269 100644
>> > --- a/stubs/kvm.c
>> > +++ b/stubs/kvm.c
>> > @@ -6,3 +6,8 @@ int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
>> >  {
>> >  return 0;
>> >  }
>> > +
>> > +int kvm_arm_pmu_create(CPUState *cs, int irq)
>> > +{
>> > +return 0;
>> > +}
> Hi. I'm afraid this breaks compilation on 32-bit ARM hosts:
> for 32-bit ARM we don't have a real kvm_arm_pmu_create() so we
> end up pulling in the stubs/kvm.o object file from the stub
> library. Unfortunately that then means we have two versions of
> kvm_arch_irqchip_create() and the linker refuses to link.
> 
Oops, I'll fix this and resend.

-- 
Shannon

Re: [Qemu-devel] [RFC PATCH v2] spapr: Ensure all LMBs are represented in ibm, dynamic-memory

2016-06-06 Thread David Gibson

On Mon, Jun 06, 2016 at 07:46:53PM -0500, Michael Roth wrote:
> Quoting Bharata B Rao (2016-06-06 06:37:49)
> > Memory hotplug can fail for some combinations of RAM and maxmem when
> > DDW is enabled in the presence of devices like nec-usb-xhci. DDW depends
> > on maximum addressable memory returned by guest and this value is currently
> > being calculated wrongly by the guest kernel routine memory_hotplug_max().
> > While there is an attempt to fix the guest kernel, this patch works
> > around the problem within QEMU itself.
> > 
> > memory_hotplug_max() routine in the guest kernel arrives at max
> > addressable memory by multiplying lmb-size with the lmb-count obtained
> > from ibm,dynamic-memory property. There are two assumptions here:
> > 
> > - All LMBs are part of ibm,dynamic memory: This is not true for PowerKVM
> >   where only hot-pluggable LMBs are present in this property.
> > - The memory area comprising of RAM and hotplug region is contiguous: This
> >   needn't be true always for PowerKVM as there can be gap between
> >   boot time RAM and hotplug region.
> > 
> > To work around this guest kernel bug, ensure that ibm,dynamic-memory
> > has information about all the LMBs (RMA, boot-time LMBs, future
> > hotpluggable LMBs, and dummy LMBs to cover the gap between RAM and
> > hotpluggable region).
> > 
> > RMA is represented separately by memory@0 node. Hence mark RMA LMBs
> > and also the LMBs for the gap b/n RAM and hotpluggable region as
> > reserved so that these LMBs are not recounted/counted by guest.
> > 
> > Signed-off-by: Bharata B Rao 
> > ---
> > Changes in v2:
> > 
> > - Dropped the patch that removed alignment gap b/n RAM and hotplug
> >   region, but instead populated ibm,dynamic-memory with LMBs represented
> >   as RESERVED for that gap. We create DRC objects for these LMBs
> >   as it simplifies the logic of populating ibm,dynamic-memory. There can
> >   at max be 4 such DRC objects for the gap area (1GB max) and hence it 
> > should
> >   be fine.
> > 
> > v1: https://lists.gnu.org/archive/html/qemu-devel/2016-06/msg00627.html
> > 
> >  hw/ppc/spapr.c | 51 
> > --
> >  include/hw/ppc/spapr.h |  5 +++--
> >  2 files changed, 40 insertions(+), 16 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 0636642..0f4f7a3 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -762,18 +762,14 @@ static int 
> > spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
> >  int ret, i, offset;
> >  uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
> >  uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)};
> > -uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size;
> > +uint32_t hotplug_lmb_start = spapr->hotplug_memory.base / lmb_size;
> > +uint32_t nr_lmbs = (spapr->hotplug_memory.base +
> > +   memory_region_size(&spapr->hotplug_memory.mr)) /
> > +   lmb_size;
> >  uint32_t *int_buf, *cur_index, buf_len;
> >  int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
> > 
> >  /*
> > - * Don't create the node if there are no DR LMBs.
> > - */
> > -if (!nr_lmbs) {
> > -return 0;
> > -}
> > -
> > -/*
> >   * Allocate enough buffer size to fit in ibm,dynamic-memory
> >   * or ibm,associativity-lookup-arrays
> >   */
> > @@ -805,11 +801,18 @@ static int 
> > spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
> >  for (i = 0; i < nr_lmbs; i++) {
> >  sPAPRDRConnector *drc;
> >  sPAPRDRConnectorClass *drck;
> > -uint64_t addr = i * lmb_size + spapr->hotplug_memory.base;;
> > +uint64_t addr;
> >  uint32_t *dynamic_memory = cur_index;
> > 
> > +if (i < hotplug_lmb_start) {
> > +addr = i * lmb_size;
> > +} else {
> > +addr = (i - hotplug_lmb_start) * lmb_size +
> > +   spapr->hotplug_memory.base;
> > +}
> 
> I think both cases reduce to addr = i * lmb_size

Yes, I think so too.

> > +
> >  drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
> > -   addr/lmb_size);
> > +   addr / lmb_size);
> >  g_assert(drc);
> >  drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> > 
> > @@ -820,7 +823,14 @@ static int 
> > spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
> >  dynamic_memory[4] = cpu_to_be32(numa_get_node(addr, NULL));
> >  if (addr < machine->ram_size ||
> >  memory_region_present(get_system_memory(), addr)) {
> > -dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED);
> > +if (addr < spapr->rma_size) {
> > +dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_RESERVED);
> > +} else {
> > +dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED);

Re: [Qemu-devel] [PATCH 2/2] ppc: Fix tlb invalidations on 6xx/7xx/7xxx 32-bit processors

On Tue, 2016-06-07 at 11:40 +1000, David Gibson wrote:
> Ugh, this patch too is showing as corrupt for me.  I suspect the
> problem is on my end, but I have no idea what, yet.

No it's on mine. The latest update of evolution in Fedora broke sending
patches :-(

It unconditionally replaces 2 or more consecutive spaces with some
unicode non-breakable space character even in text/plain and preformat
mode. Ugh...

I'll resend the whole lot later today using git-send-email

> The concept looks good here, but I don't see much point to keeping the
> old broken code around under the #if 0.  I'll rewite accordingly and merge.

Don't bother, my next patch that does the batching takes it out.

Cheers,
Ben.

[Qemu-devel] [PATCH 1/6] ppc: POWER7 had ACOP and PID registers

We only had them on POWER8, add them to POWER7 as well

Signed-off-by: Benjamin Herrenschmidt 
---
 target-ppc/translate_init.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 55f8553..ad6f2f3 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8024,6 +8024,21 @@ static void gen_spr_power8_book4(CPUPPCState *env)
 #endif
 }
 
+static void gen_spr_power7_book4(CPUPPCState *env)
+{
+/* Add a number of P7 book4 registers */
+#if !defined(CONFIG_USER_ONLY)
+spr_register_kvm(env, SPR_ACOP, "ACOP",
+ SPR_NOACCESS, SPR_NOACCESS,
+ &spr_read_generic, &spr_write_generic,
+ KVM_REG_PPC_ACOP, 0);
+spr_register_kvm(env, SPR_BOOKS_PID, "PID",
+ SPR_NOACCESS, SPR_NOACCESS,
+ &spr_read_generic, &spr_write_generic,
+ KVM_REG_PPC_PID, 0);
+#endif
+}
+
 static void init_proc_book3s_64(CPUPPCState *env, int version)
 {
 gen_spr_ne_601(env);
@@ -8066,6 +8081,9 @@ static void init_proc_book3s_64(CPUPPCState *env, int 
version)
 gen_spr_power6_common(env);
 gen_spr_power6_dbg(env);
 }
+if (version == BOOK3S_CPU_POWER7) {
+gen_spr_power7_book4(env);
+}
 if (version >= BOOK3S_CPU_POWER8) {
 gen_spr_power8_tce_address_control(env);
 gen_spr_power8_ids(env);

[Qemu-devel] [PATCH 1/6] ppc: POWER7 had ACOP and PID registers

We only had them on POWER8, add them to POWER7 as well

Signed-off-by: Benjamin Herrenschmidt 
---
 target-ppc/translate_init.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 55f8553..ad6f2f3 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8024,6 +8024,21 @@ static void gen_spr_power8_book4(CPUPPCState *env)
 #endif
 }
 
+static void gen_spr_power7_book4(CPUPPCState *env)
+{
+/* Add a number of P7 book4 registers */
+#if !defined(CONFIG_USER_ONLY)
+spr_register_kvm(env, SPR_ACOP, "ACOP",
+ SPR_NOACCESS, SPR_NOACCESS,
+ &spr_read_generic, &spr_write_generic,
+ KVM_REG_PPC_ACOP, 0);
+spr_register_kvm(env, SPR_BOOKS_PID, "PID",
+ SPR_NOACCESS, SPR_NOACCESS,
+ &spr_read_generic, &spr_write_generic,
+ KVM_REG_PPC_PID, 0);
+#endif
+}
+
 static void init_proc_book3s_64(CPUPPCState *env, int version)
 {
 gen_spr_ne_601(env);
@@ -8066,6 +8081,9 @@ static void init_proc_book3s_64(CPUPPCState *env, int 
version)
 gen_spr_power6_common(env);
 gen_spr_power6_dbg(env);
 }
+if (version == BOOK3S_CPU_POWER7) {
+gen_spr_power7_book4(env);
+}
 if (version >= BOOK3S_CPU_POWER8) {
 gen_spr_power8_tce_address_control(env);
 gen_spr_power8_ids(env);

Re: [Qemu-devel] [PATCH 2/2] ppc: Fix tlb invalidations on 6xx/7xx/7xxx 32-bit processors

2016-06-06 Thread David Gibson

On Mon, Jun 06, 2016 at 07:52:48PM +1000, Benjamin Herrenschmidt wrote:
> The processor only uses some bits of the address and invalidates an
> entire congruence class. Some OSes such as Darwin and HelenOS take
> advantage of this and occasionally invalidate the entire TLB by just
> doing a series of 64 consecutive tlbie for example.
> 
> Our code tries to be too smart here only invalidating a segment
> congruence class (ie, allowing more address bits to be relevant
> in the invalidation), this fails miserably on those OSes.
> 
> Instead don't bother, do like ppc64 and blow the whole tlb when tlbie
> is executed.
> 
> Signed-off-by: Benjamin Herrenschmidt 

Ugh, this patch too is showing as corrupt for me.  I suspect the
problem is on my end, but I have no idea what, yet.


The concept looks good here, but I don't see much point to keeping the
old broken code around under the #if 0.  I'll rewite accordingly and merge.

> ---
>  target-ppc/mmu_helper.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
> index f5c4e69..a5e3878 100644
> --- a/target-ppc/mmu_helper.c
> +++ b/target-ppc/mmu_helper.c
> @@ -1969,6 +1969,11 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, 
> target_ulong addr)
>  /* XXX: this case should be optimized,
>   * giving a mask to tlb_flush_page
>   */
> +/* This is broken, some CPUs invalidate a whole congruence
> + * class on an even smaller subset of bits and some OSes take
> + * advantage of this. Just blow the whole thing away.
> + */
> +#if 0
>  tlb_flush_page(cs, addr | (0x0 << 28));
>  tlb_flush_page(cs, addr | (0x1 << 28));
>  tlb_flush_page(cs, addr | (0x2 << 28));
> @@ -1985,6 +1990,9 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, 
> target_ulong addr)
>  tlb_flush_page(cs, addr | (0xD << 28));
>  tlb_flush_page(cs, addr | (0xE << 28));
>  tlb_flush_page(cs, addr | (0xF << 28));
> +#else
> +tlb_flush(cs, 1);
> +#endif
>  break;
>  #if defined(TARGET_PPC64)
>  case POWERPC_MMU_64B:
> 
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH 1/2] ppc: Properly tag the translation cache based on MMU mode

2016-06-06 Thread David Gibson

On Mon, Jun 06, 2016 at 07:52:44PM +1000, Benjamin Herrenschmidt wrote:
> We used to always flush the TLB when changing relocation mode in
> MSR:IR and MSR:DR (ie. MMU on/off for Instructions and Data).
> 
> We don't anymore since we have split mmu_idx for instruction and data.
> 
> However, since we hard code the mmu_idx in the translated code, we
> now need to also make sure MSR:IR and MSR:DR are part of the hflags
> used to tag translated code, so that we use different translated
> code for different MMU settings.
> 
> Darwin gets hurt by this problem.
> 
> Signed-off-by: Benjamin Herrenschmidt 

Bizarrely, patch complained about a malformed diff on this one for
reasons I couldn't figure out.  I recreated manually and applied to
ppc-for-2.7.

> ---
>  target-ppc/helper_regs.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
> index 12af61c..104b690 100644
> --- a/target-ppc/helper_regs.h
> +++ b/target-ppc/helper_regs.h
> @@ -95,7 +95,7 @@ static inline void hreg_compute_hflags(CPUPPCState *env)
>  /* We 'forget' FE0 & FE1: we'll never generate imprecise exceptions */
>  hflags_mask = (1 << MSR_VR) | (1 << MSR_AP) | (1 << MSR_SA) |
>  (1 << MSR_PR) | (1 << MSR_FP) | (1 << MSR_SE) | (1 << MSR_BE) |
> -(1 << MSR_LE) | (1 << MSR_VSX);
> +(1 << MSR_LE) | (1 << MSR_VSX) | (1 << MSR_IR) | (1 << MSR_DR);
>  hflags_mask |= (1ULL << MSR_CM) | (1ULL << MSR_SF) | MSR_HVB;
>  hreg_compute_mem_idx(env);
>  env->hflags = env->msr & hflags_mask;
> 
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH] scsi-disk: fix reads from scsi-disk devices

On Fri, 2016-06-03 at 06:17 +0100, Mark Cave-Ayland wrote:
> Commit fcaafb1001b9c42817714dd3b2aadcfdb997b53d accidentally broke
> reads from
> scsi-disk devices when being updated from its original form to use
> the new
> byte-based block functions. Add the extra missing sector to offset
> conversion
> in order to restore read functionality.
> 
> Signed-off-by: Mark Cave-Ayland 

This fixes powerpc PAPR booting

Tested-by: Benjamin Herrenschmidt 

> ---
>  hw/scsi/scsi-disk.c |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
> index ace65e0..ab7cf9c 100644
> --- a/hw/scsi/scsi-disk.c
> +++ b/hw/scsi/scsi-disk.c
> @@ -347,7 +347,7 @@ static void scsi_do_read(SCSIDiskReq *r, int ret)
>  scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
>  block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
>   r->qiov.size, BLOCK_ACCT_READ);
> -r->req.aiocb = sdc->dma_readv(r->sector, &r->qiov,
> +r->req.aiocb = sdc->dma_readv(r->sector << BDRV_SECTOR_BITS,
> &r->qiov,
>    scsi_read_complete, r, r);
>  }
>

[Qemu-devel] [PATCH 6/6] ppc: Do not take exceptions on unknown SPRs in privileged mode

The architecture specifies that mtspr/mfspr on an unknown SPR number
should act as a nop in privileged mode.

I haven't removed the warning however as it can be useful for
diagnosing.

Signed-off-by: Benjamin Herrenschmidt 
---
 target-ppc/translate.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index a3de142..dd0ecc8 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -4351,7 +4351,10 @@ static inline void gen_op_mfspr(DisasContext *ctx)
 qemu_log("Trying to read invalid spr %d (0x%03x) at "
  TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
 }
-gen_inval_exception(ctx, POWERPC_EXCP_INVAL_SPR);
+/* Only generate an exception in user space, otherwise this is a nop */
+if (ctx->pr) {
+gen_inval_exception(ctx, POWERPC_EXCP_INVAL_SPR);
+}
 }
 }
 
@@ -4503,7 +4506,11 @@ static void gen_mtspr(DisasContext *ctx)
 }
 fprintf(stderr, "Trying to write invalid spr %d (0x%03x) at "
 TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
-gen_inval_exception(ctx, POWERPC_EXCP_INVAL_SPR);
+
+/* Only generate an exception in user space, otherwise this is a nop */
+if (ctx->pr) {
+gen_inval_exception(ctx, POWERPC_EXCP_INVAL_SPR);
+}
 }
 }

[Qemu-devel] [PATCH 4/6] ppc: Fix slbia decode

Since at least the 2.05 architecture, the slbia instruction takes an
IH field in the opcode to provide some control on the effect of the
slbia on the ERATs (level-1 TLB).

We can safely ignore it as we always flush the whole qemu TLB but
we should allow the bits in the decode.

Signed-off-by: Benjamin Herrenschmidt 
---
 target-ppc/translate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 3255184..33a9223 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -9980,7 +9980,7 @@ GEN_HANDLER(tlbiel, 0x1F, 0x12, 0x08, 0x001F0001, 
PPC_MEM_TLBIE),
 GEN_HANDLER(tlbie, 0x1F, 0x12, 0x09, 0x001F0001, PPC_MEM_TLBIE),
 GEN_HANDLER(tlbsync, 0x1F, 0x16, 0x11, 0x03FFF801, PPC_MEM_TLBSYNC),
 #if defined(TARGET_PPC64)
-GEN_HANDLER(slbia, 0x1F, 0x12, 0x0F, 0x03FFFC01, PPC_SLBI),
+GEN_HANDLER(slbia, 0x1F, 0x12, 0x0F, 0x031FFC01, PPC_SLBI),
 GEN_HANDLER(slbie, 0x1F, 0x12, 0x0D, 0x03FF0001, PPC_SLBI),
 #endif
 GEN_HANDLER(eciwx, 0x1F, 0x16, 0x0D, 0x0001, PPC_EXTERN),

[Qemu-devel] [PATCH 5/6] ppc: Add missing slbfee. instruction on ppc64 BookS processors

Used to lookup SLB entries by address, for some reason it was missing.

Signed-off-by: Benjamin Herrenschmidt 
---

A version of this was in my earlier powernv series but had a bug, this
one should be correct.

 target-ppc/helper.h |  1 +
 target-ppc/mmu-hash64.c | 30 ++
 target-ppc/translate.c  | 26 ++
 3 files changed, 57 insertions(+)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 0526322..f4410a8 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -550,6 +550,7 @@ DEF_HELPER_FLAGS_2(tlbiva, TCG_CALL_NO_RWG, void, env, tl)
 DEF_HELPER_FLAGS_3(store_slb, TCG_CALL_NO_RWG, void, env, tl, tl)
 DEF_HELPER_2(load_slb_esid, tl, env, tl)
 DEF_HELPER_2(load_slb_vsid, tl, env, tl)
+DEF_HELPER_2(find_slb_vsid, tl, env, tl)
 DEF_HELPER_FLAGS_1(slbia, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_FLAGS_2(slbie, TCG_CALL_NO_RWG, void, env, tl)
 #endif
diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index ea6e99a..668da5e 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -219,6 +219,24 @@ static int ppc_load_slb_vsid(PowerPCCPU *cpu, target_ulong 
rb,
 return 0;
 }
 
+static int ppc_find_slb_vsid(PowerPCCPU *cpu, target_ulong rb,
+ target_ulong *rt)
+{
+CPUPPCState *env = &cpu->env;
+ppc_slb_t *slb;
+
+if (!msr_is_64bit(env, env->msr)) {
+rb &= 0x;
+}
+slb = slb_lookup(cpu, rb);
+if (slb == NULL) {
+*rt = (target_ulong)-1ul;
+} else {
+*rt = slb->vsid;
+}
+return 0;
+}
+
 void helper_store_slb(CPUPPCState *env, target_ulong rb, target_ulong rs)
 {
 PowerPCCPU *cpu = ppc_env_get_cpu(env);
@@ -241,6 +259,18 @@ target_ulong helper_load_slb_esid(CPUPPCState *env, 
target_ulong rb)
 return rt;
 }
 
+target_ulong helper_find_slb_vsid(CPUPPCState *env, target_ulong rb)
+{
+PowerPCCPU *cpu = ppc_env_get_cpu(env);
+target_ulong rt = 0;
+
+if (ppc_find_slb_vsid(cpu, rb, &rt) < 0) {
+helper_raise_exception_err(env, POWERPC_EXCP_PROGRAM,
+   POWERPC_EXCP_INVAL);
+}
+return rt;
+}
+
 target_ulong helper_load_slb_vsid(CPUPPCState *env, target_ulong rb)
 {
 PowerPCCPU *cpu = ppc_env_get_cpu(env);
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 33a9223..a3de142 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -4847,6 +4847,31 @@ static void gen_slbmfev(DisasContext *ctx)
  cpu_gpr[rB(ctx->opcode)]);
 #endif
 }
+
+static void gen_slbfee_(DisasContext *ctx)
+{
+#if defined(CONFIG_USER_ONLY)
+gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
+#else
+TCGLabel *l1, *l2;
+
+if (unlikely(ctx->pr)) {
+gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
+return;
+}
+gen_helper_find_slb_vsid(cpu_gpr[rS(ctx->opcode)], cpu_env,
+ cpu_gpr[rB(ctx->opcode)]);
+l1 = gen_new_label();
+l2 = gen_new_label();
+tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
+tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rS(ctx->opcode)], -1, l1);
+tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 1 << CRF_EQ);
+tcg_gen_br(l2);
+gen_set_label(l1);
+tcg_gen_movi_tl(cpu_gpr[rS(ctx->opcode)], 0);
+gen_set_label(l2);
+#endif
+}
 #endif /* defined(TARGET_PPC64) */
 
 /***  Lookaside buffer management  ***/
@@ -9972,6 +9997,7 @@ GEN_HANDLER2(mtsrin_64b, "mtsrin", 0x1F, 0x12, 0x07, 
0x001F0001,
 GEN_HANDLER2(slbmte, "slbmte", 0x1F, 0x12, 0x0C, 0x001F0001, PPC_SEGMENT_64B),
 GEN_HANDLER2(slbmfee, "slbmfee", 0x1F, 0x13, 0x1C, 0x001F0001, 
PPC_SEGMENT_64B),
 GEN_HANDLER2(slbmfev, "slbmfev", 0x1F, 0x13, 0x1A, 0x001F0001, 
PPC_SEGMENT_64B),
+GEN_HANDLER2(slbfee_, "slbfee.", 0x1F, 0x13, 0x1E, 0x001F, 
PPC_SEGMENT_64B),
 #endif
 GEN_HANDLER(tlbia, 0x1F, 0x12, 0x0B, 0x03FFFC01, PPC_MEM_TLBIA),
 /* XXX Those instructions will need to be handled differently for

[Qemu-devel] [PATCH 3/6] ppc: Fix mtmsr decoding

We had code to handle the L bit in the opcode but we didn't
allow it in the decode mask.

Signed-off-by: Benjamin Herrenschmidt 
---
 target-ppc/translate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index b34289f..3255184 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -9944,7 +9944,7 @@ GEN_HANDLER(mtcrf, 0x1F, 0x10, 0x04, 0x0801, 
PPC_MISC),
 #if defined(TARGET_PPC64)
 GEN_HANDLER(mtmsrd, 0x1F, 0x12, 0x05, 0x001EF801, PPC_64B),
 #endif
-GEN_HANDLER(mtmsr, 0x1F, 0x12, 0x04, 0x001FF801, PPC_MISC),
+GEN_HANDLER(mtmsr, 0x1F, 0x12, 0x04, 0x001EF801, PPC_MISC),
 GEN_HANDLER(mtspr, 0x1F, 0x13, 0x0E, 0x, PPC_MISC),
 GEN_HANDLER(dcbf, 0x1F, 0x16, 0x02, 0x03C1, PPC_CACHE),
 GEN_HANDLER(dcbi, 0x1F, 0x16, 0x0E, 0x03E1, PPC_CACHE),

[Qemu-devel] [PATCH 2/6] ppc: POWER7 has lq/stq instructions and stq need to check ISA version

The PPC_64BX instruction flag is used for a couple of newer
instructions currently on POWER8 but our implementation for
them works for POWER7 too (and already does the proper checking
of what is permitted) with one exception: stq needs to check
the ISA version.

This fixes the latter and add the instructions to POWER7

Signed-off-by: Benjamin Herrenschmidt 
---
 target-ppc/translate.c  | 5 -
 target-ppc/translate_init.c | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index ab5862f..b34289f 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -3047,10 +3047,13 @@ static void gen_std(DisasContext *ctx)
 
 rs = rS(ctx->opcode);
 if ((ctx->opcode & 0x3) == 0x2) { /* stq */
-
 bool legal_in_user_mode = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;
 bool le_is_supported = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;
 
+if (!(ctx->insns_flags & PPC_64BX)) {
+gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
+}
+
 if (!legal_in_user_mode && ctx->pr) {
 gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 return;
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index ad6f2f3..a1db500 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8377,7 +8377,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
PPC_MEM_SYNC | PPC_MEM_EIEIO |
PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-   PPC_64B | PPC_64H | PPC_ALTIVEC |
+   PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
PPC_SEGMENT_64B | PPC_SLBI |
PPC_POPCNTB | PPC_POPCNTWD;
 pcc->insns_flags2 = PPC2_VSX | PPC2_DFP | PPC2_DBRX | PPC2_ISA205 |

[Qemu-devel] [Bug 1580459] Re: Windows (10?) guest freezes entire host on shutdown if using PCI passthrough

2016-06-06 Thread Jimi

I've been not using USB host passthrough this whole time, as my PCI USB3
card covers that need pretty well. Speaking of those cards, for those of
you who also use one, does it work perfectly? If so, I'd like to know
its model so I can go buy it, because while my card works, about 50% of
the time I try to use it, I get some bad output when I run "dmesg | grep
-i vfio" (the standard spam when a device doesn't get passed through
properly that's full of messages related to power management) and the VM
doesn't seem to have any access to it. When this happens, I have to
restart the whole host to get another 50% chance at using the card.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1580459

Title:
  Windows (10?) guest freezes entire host on shutdown if using PCI
  passthrough

Status in libvirt:
  New
Status in QEMU:
  New
Status in Arch Linux:
  New
Status in Debian:
  New
Status in Fedora:
  New

Bug description:
  Problem: after leaving a Windows VM that uses PCI passthrough (as we
  do for gaming graphics cards, sound cards, and in my case, a USB card)
  running for some amount of time between 1 and 2 hours (it's not
  consistent with exactly how long), and for any amount of time longer
  than that, shutting down that guest will, right as it finishes
  shutting down, freeze the host computer, making it require a hard
  reboot. Unbinding (or in the other user's case, unbinding and THEN
  binding) any PCI device in sysfs, even one that has nothing to do with
  the VM, also has the same effect as shutting down the VM (if the VM
  has been running long enough). So, it's probably an issue related to
  unbinding and binding PCI devices.

  There's a lot of info on this problem over at 
https://bbs.archlinux.org/viewtopic.php?id=206050
  Here's a better-organized list of main details:
  -at least 2 confirmed victims of this bug; 2 (including me) have provided 
lots of info in the link
  -I'm on Arch Linux and the other one is on Gentoo (distro-nonspecific)
  -issue affects my Windows 10 guest and others' Windows guests, but not my 
Arch Linux guest (the others don't have non-Windows guests to test)
  -I'm using libvirt but the other user is not, so it's not an issue with 
libvirt
  -It seems to be version non-specific, too. I first noticed it at, or when 
testing versions still had the issue at (whichever version is lower), Linux 4.1 
and qemu 2.4.0. It still persists in all releases of both since, including the 
newest ones.
  -I can't track down exactly what package downgrade can fix it, as downgrading 
further than Linux 4.1 and qemu 2.4.0 requires Herculean and system-destroying 
changes such as downgrading ncurses, meaning I don't know whether it's a bug in 
QEMU, the Linux kernel, or some weird seemingly unrelated thing.
  -According to the other user, "graphics intensive gameplay (GTA V) can cause 
the crash to happen sooner," as soon as "15 minutes"
  -Also, "bringing up a second passthrough VM with separate hardware will cause 
the same crash," and "bringing up another VM before the two-hour mark will not 
result in a crash," further cementing that it's triggered by the un/binding of 
PCI devices.
  -This is NOT related to the very similar bug that can be worked around by not 
passing through the HDMI device or sound card. Even when we removed all traces 
of any sort of sound card from the VM, it still had the same behavior.

To manage notifications about this bug go to:
https://bugs.launchpad.net/libvirt/+bug/1580459/+subscriptions

Re: [Qemu-devel] [PATCH v6 08/15] qdist: add module to represent frequency distributions of data

2016-06-06 Thread Emilio G. Cota

On Sat, May 28, 2016 at 21:15:06 +0300, Sergey Fedorov wrote:
> On 25/05/16 04:13, Emilio G. Cota wrote:
> > diff --git a/util/qdist.c b/util/qdist.c
> > new file mode 100644
> > index 000..3343640
> > --- /dev/null
> > +++ b/util/qdist.c
> > @@ -0,0 +1,386 @@
> (snip)
> > +
> > +void qdist_add(struct qdist *dist, double x, long count)
> > +{
> > +struct qdist_entry *entry = NULL;
> > +
> > +if (dist->entries) {
> > +struct qdist_entry e;
> > +
> > +e.x = x;
> > +entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp);
> > +}
> > +
> > +if (entry) {
> > +entry->count += count;
> > +return;
> > +}
> > +
> > +dist->entries = g_realloc(dist->entries,
> > +  sizeof(*dist->entries) * (dist->n + 1));
> 
> Repeated doubling?

Can you please elaborate?

> > +dist->n++;
> > +entry = &dist->entries[dist->n - 1];
> 
> What if we combine the above two lines:
> 
> entry = &dist->entries[dist->n++];
> 
> or just reverse them:
> 
> entry = &dist->entries[dist->n];
> dist->n++;

I have less trouble understanding the original.

> > +entry->x = x;
> > +entry->count = count;
> > +qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp);
> > +}
> > +
> (snip)
> > +static char *qdist_pr_internal(const struct qdist *dist)
> > +{
> > +double min, max, step;
> > +GString *s = g_string_new("");
> > +size_t i;
> > +
> > +/* if only one entry, its printout will be either full or empty */
> > +if (dist->n == 1) {
> > +if (dist->entries[0].count) {
> > +g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 
> > 1]);
> > +} else {
> > +g_string_append_c(s, ' ');
> > +}
> > +goto out;
> > +}
> > +
> > +/* get min and max counts */
> > +min = dist->entries[0].count;
> > +max = min;
> > +for (i = 0; i < dist->n; i++) {
> > +struct qdist_entry *e = &dist->entries[i];
> > +
> > +if (e->count < min) {
> > +min = e->count;
> > +}
> > +if (e->count > max) {
> > +max = e->count;
> > +}
> > +}
> > +
> > +/* floor((count - min) * step) will give us the block index */
> > +step = (QDIST_NR_BLOCK_CODES - 1) / (max - min);
> > +
> > +for (i = 0; i < dist->n; i++) {
> > +struct qdist_entry *e = &dist->entries[i];
> > +int index;
> > +
> > +/* make an exception with 0; instead of using block[0], print a 
> > space */
> > +if (e->count) {
> > +index = (int)((e->count - min) * step);
> 
> So "e->count == min" gives us one eighth block instead of just space?

Yes, only 0 can print a space.

> > +g_string_append_unichar(s, qdist_blocks[index]);
> > +} else {
> > +g_string_append_c(s, ' ');
> > +}
> > +}
> > + out:
> > +return g_string_free(s, FALSE);
> > +}
> > +
> > +/*
> > + * Bin the distribution in @from into @n bins of consecutive, 
> > non-overlapping
> > + * intervals, copying the result to @to.
> > + *
> > + * This function is internal to qdist: only this file and test code should
> > + * ever call it.
> > + *
> > + * Note: calling this function on an already-binned qdist is a bug.
> > + *
> > + * If @n == 0 or @from->n == 1, use @from->n.
> > + */
> > +void qdist_bin__internal(struct qdist *to, const struct qdist *from, 
> > size_t n)
> > +{
> > +double xmin, xmax;
> > +double step;
> > +size_t i, j, j_min;
> > +
> > +qdist_init(to);
> > +
> > +if (!from->entries) {
> > +return;
> > +}
> > +if (!n || from->n == 1) {
> > +n = from->n;
> > +}
> > +
> > +/* set equally-sized bins between @from's left and right */
> > +xmin = qdist_xmin(from);
> > +xmax = qdist_xmax(from);
> > +step = (xmax - xmin) / n;
> > +
> > +if (n == from->n) {
> > +/* if @from's entries are equally spaced, no need to re-bin */
> > +for (i = 0; i < from->n; i++) {
> > +if (from->entries[i].x != xmin + i * step) {
> > +goto rebin;
> 
> static inline function instead of goto?

It would have quite a few arguments, I think the goto is fine.

> > +}
> > +}
> > +/* they're equally spaced, so copy the dist and bail out */
> > +to->entries = g_malloc(sizeof(*to->entries) * from->n);
> 
> g_new()?

Changed.

> > +to->n = from->n;
> > +memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n);
> > +return;
> > +}
> > +
> > + rebin:
> > +j_min = 0;
> > +for (i = 0; i < n; i++) {
> > +double x;
> > +double left, right;
> > +
> > +left = xmin + i * step;
> > +right = xmin + (i + 1) * step;
> > +
> > +/* Add x, even if it might not get any counts later */
> > +x = left;
> 
> This way we round down to the left margin of each bin like this:
> 
> xm

Re: [Qemu-devel] [RFC PATCH v2] spapr: Ensure all LMBs are represented in ibm, dynamic-memory

2016-06-06 Thread Michael Roth

Quoting Bharata B Rao (2016-06-06 06:37:49)
> Memory hotplug can fail for some combinations of RAM and maxmem when
> DDW is enabled in the presence of devices like nec-usb-xhci. DDW depends
> on maximum addressable memory returned by guest and this value is currently
> being calculated wrongly by the guest kernel routine memory_hotplug_max().
> While there is an attempt to fix the guest kernel, this patch works
> around the problem within QEMU itself.
> 
> memory_hotplug_max() routine in the guest kernel arrives at max
> addressable memory by multiplying lmb-size with the lmb-count obtained
> from ibm,dynamic-memory property. There are two assumptions here:
> 
> - All LMBs are part of ibm,dynamic memory: This is not true for PowerKVM
>   where only hot-pluggable LMBs are present in this property.
> - The memory area comprising of RAM and hotplug region is contiguous: This
>   needn't be true always for PowerKVM as there can be gap between
>   boot time RAM and hotplug region.
> 
> To work around this guest kernel bug, ensure that ibm,dynamic-memory
> has information about all the LMBs (RMA, boot-time LMBs, future
> hotpluggable LMBs, and dummy LMBs to cover the gap between RAM and
> hotpluggable region).
> 
> RMA is represented separately by memory@0 node. Hence mark RMA LMBs
> and also the LMBs for the gap b/n RAM and hotpluggable region as
> reserved so that these LMBs are not recounted/counted by guest.
> 
> Signed-off-by: Bharata B Rao 
> ---
> Changes in v2:
> 
> - Dropped the patch that removed alignment gap b/n RAM and hotplug
>   region, but instead populated ibm,dynamic-memory with LMBs represented
>   as RESERVED for that gap. We create DRC objects for these LMBs
>   as it simplifies the logic of populating ibm,dynamic-memory. There can
>   at max be 4 such DRC objects for the gap area (1GB max) and hence it should
>   be fine.
> 
> v1: https://lists.gnu.org/archive/html/qemu-devel/2016-06/msg00627.html
> 
>  hw/ppc/spapr.c | 51 
> --
>  include/hw/ppc/spapr.h |  5 +++--
>  2 files changed, 40 insertions(+), 16 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 0636642..0f4f7a3 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -762,18 +762,14 @@ static int 
> spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
>  int ret, i, offset;
>  uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
>  uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)};
> -uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size;
> +uint32_t hotplug_lmb_start = spapr->hotplug_memory.base / lmb_size;
> +uint32_t nr_lmbs = (spapr->hotplug_memory.base +
> +   memory_region_size(&spapr->hotplug_memory.mr)) /
> +   lmb_size;
>  uint32_t *int_buf, *cur_index, buf_len;
>  int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
> 
>  /*
> - * Don't create the node if there are no DR LMBs.
> - */
> -if (!nr_lmbs) {
> -return 0;
> -}
> -
> -/*
>   * Allocate enough buffer size to fit in ibm,dynamic-memory
>   * or ibm,associativity-lookup-arrays
>   */
> @@ -805,11 +801,18 @@ static int 
> spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
>  for (i = 0; i < nr_lmbs; i++) {
>  sPAPRDRConnector *drc;
>  sPAPRDRConnectorClass *drck;
> -uint64_t addr = i * lmb_size + spapr->hotplug_memory.base;;
> +uint64_t addr;
>  uint32_t *dynamic_memory = cur_index;
> 
> +if (i < hotplug_lmb_start) {
> +addr = i * lmb_size;
> +} else {
> +addr = (i - hotplug_lmb_start) * lmb_size +
> +   spapr->hotplug_memory.base;
> +}

I think both cases reduce to addr = i * lmb_size

> +
>  drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
> -   addr/lmb_size);
> +   addr / lmb_size);
>  g_assert(drc);
>  drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> 
> @@ -820,7 +823,14 @@ static int 
> spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
>  dynamic_memory[4] = cpu_to_be32(numa_get_node(addr, NULL));
>  if (addr < machine->ram_size ||
>  memory_region_present(get_system_memory(), addr)) {
> -dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED);
> +if (addr < spapr->rma_size) {
> +dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_RESERVED);
> +} else {
> +dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED);
> +}
> +} else if (addr >= machine->ram_size &&
> +   addr < spapr->hotplug_memory.base) {
> +dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_RESERVED);
>  } else {
>  dynamic_memory[5] = cpu_to_be32(0);
>  }
> @@ -1652,16 +166

Re: [Qemu-devel] [PATCH 2/2] Explicitly print out default vnc option in use

2016-06-06 Thread Robert Hu

On Mon, 2016-06-06 at 09:28 +0200, Markus Armbruster wrote:
> Robert Hu  writes:
> 
> > On Tue, 2016-05-31 at 13:17 +0200, Markus Armbruster wrote:
> >> Robert Hu  writes:
> >> 
> >> > On Tue, 2016-05-31 at 09:51 +0200, Markus Armbruster wrote:
[trim...]
> > I don't see a './configure' option related to this '-vnc to' param. Is
> > there any?
> > '--help', you mean 'qemu-system_x86-64 --help'? or './configure --help'?
[seems repeated contents, trim...]
> > I don't see a './configure' option related to this '-vnc to' param. Is
> > there any?
> > '--help', you mean 'qemu-system_x86-64 --help'? or './configure --help'?
> 
> The former.
> 
> The modern way to select a display is -display.  The older -nographic,
> -curses, -sdl are retained for backward compatibility.
> 
> Relevant parts of -help:
> 
> Display options:
> -display sdl[,frame=on|off][,alt_grab=on|off][,ctrl_grab=on|off]
> [,window_close=on|off]|curses|none|
> gtk[,grab_on_hover=on|off]|
> vnc=[,]
> select display type
> -nographic  disable graphical output and redirect serial I/Os to 
> console
> -curses use a curses/ncurses interface instead of SDL
> [...]
> -sdlenable SDL
> [...]
> -vnc displaystart a VNC server on display
> 
> Issues:
> 
> * Help for -display is broken: the mutually exclusive option arguments
>   are concatenated.  -display curses and -display none are undocumented.
>   It should look more like this:
> 
> -display sdl[,frame=on|off][,alt_grab=on|off][,ctrl_grab=on|off]
> [,window_close=on|off]|curses|none|
> -display gtk[,grab_on_hover=on|off]|
> -display vnc=[,]
> -display curses
> -display none
> select display type
> 
> * -display sdl,gl=on|off and -display gtk,gl=on|off are undocumented
>(missed in commit 0b71a5d5c and 97edf3b).
> 
> * There is no help on the  in -display vnc=.
> 
> * There is no help on the default.  main() picks the default depending
>   on configure options:
> 
> #if defined(CONFIG_GTK)
> display_type = DT_GTK;
> #elif defined(CONFIG_SDL)
> display_type = DT_SDL;
> #elif defined(CONFIG_COCOA)
> display_type = DT_COCOA;
> #elif defined(CONFIG_VNC)
> vnc_parse("localhost:0,to=99,id=default", &error_abort);
> show_vnc_port = 1;
> #else
> display_type = DT_NONE;
> #endif
> 
>   Help should show the default this binary will pick.  This is what I
>   meant by "Ideally, --help output would show the defaults for this
>   build's configuration."
> 
> * Help should explain syntacic sugar:
>   -curses is sugar for -display curses
>   -sdl is sugar for -display sdl
>   -vnc display is sugar for -display vnc=display
> 
>   -nographic is also sugar, but too complicated to explain; I'd leave it
>   as is.
> 
> Non-issue
> 
> * Help shows options even when they're not compiled in.  That's okay,
>   because trying to use them fails with an "FOO support is disabled"
>   error message.
> 
> >> If we decide users need more information than the current "VNC server
> >> running on" line, perhaps it should be included right in that line.
> 
> This would complement, but not replace better -help ouput.
> 
> If you would like to work on these issues, let us know.

OK, if not in a hurry and assuming this is not a huge amount of work.
I also need to look into the build arch so that completely understand
your 'the default this binary will pick', till now I don't.

Another concern is that I'm not a native English speaker, so those
description words may not be that apt and concise.

Meanwhile, this is another work extended from the original patch. How
about accept the patch 1 first? as you and Paolo both think it is OK.
Ought I rework a version 2 of single patch 1? or not necessary?

[Qemu-devel] [Bug 1580459] Re: Windows (10?) guest freezes entire host on shutdown if using PCI passthrough

2016-06-06 Thread James Newman

So guys, new information.

I was having trouble getting the HTC Vive passed through in host mode.
The thing shows up as 10+ devices! I've also some logitech webcams that
don't seem to work via usb host passthrough. So I gave windows my entire
usb controller (only 1 for all my ports on this mobo). Since then, I
haven't noticed an issue. Furthermore, wy more stable overall. I
used to get random blue screens.

I'm going to order a usb3 pcie card for my other windows host. For now,
I'm using a remote desktop connection to it for IO.

Anyway, still tinkering. I'm curious if anyone having the issues would
try with no usb 'host' passthrough?

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1580459

Title:
  Windows (10?) guest freezes entire host on shutdown if using PCI
  passthrough

Status in libvirt:
  New
Status in QEMU:
  New
Status in Arch Linux:
  New
Status in Debian:
  New
Status in Fedora:
  New

Bug description:
  Problem: after leaving a Windows VM that uses PCI passthrough (as we
  do for gaming graphics cards, sound cards, and in my case, a USB card)
  running for some amount of time between 1 and 2 hours (it's not
  consistent with exactly how long), and for any amount of time longer
  than that, shutting down that guest will, right as it finishes
  shutting down, freeze the host computer, making it require a hard
  reboot. Unbinding (or in the other user's case, unbinding and THEN
  binding) any PCI device in sysfs, even one that has nothing to do with
  the VM, also has the same effect as shutting down the VM (if the VM
  has been running long enough). So, it's probably an issue related to
  unbinding and binding PCI devices.

  There's a lot of info on this problem over at 
https://bbs.archlinux.org/viewtopic.php?id=206050
  Here's a better-organized list of main details:
  -at least 2 confirmed victims of this bug; 2 (including me) have provided 
lots of info in the link
  -I'm on Arch Linux and the other one is on Gentoo (distro-nonspecific)
  -issue affects my Windows 10 guest and others' Windows guests, but not my 
Arch Linux guest (the others don't have non-Windows guests to test)
  -I'm using libvirt but the other user is not, so it's not an issue with 
libvirt
  -It seems to be version non-specific, too. I first noticed it at, or when 
testing versions still had the issue at (whichever version is lower), Linux 4.1 
and qemu 2.4.0. It still persists in all releases of both since, including the 
newest ones.
  -I can't track down exactly what package downgrade can fix it, as downgrading 
further than Linux 4.1 and qemu 2.4.0 requires Herculean and system-destroying 
changes such as downgrading ncurses, meaning I don't know whether it's a bug in 
QEMU, the Linux kernel, or some weird seemingly unrelated thing.
  -According to the other user, "graphics intensive gameplay (GTA V) can cause 
the crash to happen sooner," as soon as "15 minutes"
  -Also, "bringing up a second passthrough VM with separate hardware will cause 
the same crash," and "bringing up another VM before the two-hour mark will not 
result in a crash," further cementing that it's triggered by the un/binding of 
PCI devices.
  -This is NOT related to the very similar bug that can be worked around by not 
passing through the HDMI device or sound card. Even when we removed all traces 
of any sort of sound card from the VM, it still had the same behavior.

To manage notifications about this bug go to:
https://bugs.launchpad.net/libvirt/+bug/1580459/+subscriptions

Re: [Qemu-devel] [PATCH v6 08/15] qdist: add module to represent frequency distributions of data

2016-06-06 Thread Emilio G. Cota

On Fri, Jun 03, 2016 at 20:46:07 +0300, Sergey Fedorov wrote:
> On 03/06/16 20:29, Sergey Fedorov wrote:
> > On 03/06/16 20:22, Emilio G. Cota wrote:
> >> On Sat, May 28, 2016 at 21:15:06 +0300, Sergey Fedorov wrote:
> >>> On 25/05/16 04:13, Emilio G. Cota wrote:
> >>> (snip)
>  +double qdist_avg(const struct qdist *dist)
>  +{
>  +unsigned long count;
>  +size_t i;
>  +double ret = 0;
>  +
>  +count = qdist_sample_count(dist);
>  +if (!count) {
>  +return NAN;
>  +}
>  +for (i = 0; i < dist->n; i++) {
>  +struct qdist_entry *e = &dist->entries[i];
>  +
>  +ret += e->x * e->count / count;
> >>> Please use Welford’s method or something like that, see
> >>> http://stackoverflow.com/a/1346890.
> >> Yes, the way the mean is computed right now, we might suffer
> >> from underflow if count is huge. But I'd rather take that, than the
> >> perf penalty of an iterative method (such as the one used
> >> in Welford's). Note that we might have huge amounts of
> >> items, e.g. one item per head bucket in qht's occupancy qdist
> >> (and 0.5M head buckets is easy to achieve).
> >>
> >> If we were to use an iterative method, we'd need to do something
> >> like:
> >>
> >> double qdist_avg(const struct qdist *dist)
> >> {
> >> size_t i, j;
> >> double ret = 0;
> >>
> >> if (!qdist_sample_count(dist)) {
> >> return NAN;
> >> }
> >> /* compute moving average to prevent under/overflow */
> >> for (i = 0; i < dist->n; i++) {
> >> struct qdist_entry *e = &dist->entries[i];
> >>
> >> for (j = 0; j < e->count; j++) {
> >>
> >> ret += (e->x - ret) / (i + j + 1);
> >> }
> >> }
> >> return ret;
> >> }
> >>
> >> Note that skipping the inner loop would be incorrect.
> > Ah, it's a shame. I'm wondering if there is some other algorithm that
> > could work for us?
> 
> Maybe something like
> https://en.wikipedia.org/wiki/Kahan_summation_algorithm could help?

That algorithm is overkill for what we're doing. Pairwise summation
should suffice:

diff --git a/util/qdist.c b/util/qdist.c
index 3343640..909bd2b 100644
--- a/util/qdist.c
+++ b/util/qdist.c
@@ -367,20 +367,34 @@ unsigned long qdist_sample_count(const struct qdist *dist)
 return count;
 }
 
+static double qdist_pairwise_avg(const struct qdist *dist, size_t index,
+ size_t n, unsigned long count)
+{
+if (n <= 2) {
+size_t i;
+double ret = 0;
+
+for (i = 0; i < n; i++) {
+struct qdist_entry *e = &dist->entries[index + i];
+
+ret += e->x * e->count / count;
+}
+return ret;
+} else {
+size_t n2 = n / 2;
+
+return qdist_pairwise_avg(dist, index, n2, count) +
+   qdist_pairwise_avg(dist, index + n2, n - n2, count);
+}
+}
+
 double qdist_avg(const struct qdist *dist)
 {
 unsigned long count;
-size_t i;
-double ret = 0;
 
 count = qdist_sample_count(dist);
 if (!count) {
 return NAN;
 }
-for (i = 0; i < dist->n; i++) {
-struct qdist_entry *e = &dist->entries[i];
-
-ret += e->x * e->count / count;
-}
-return ret;
+return qdist_pairwise_avg(dist, 0, dist->n, count);
 }

Emilio

Re: [Qemu-devel] [PATCH v4 8/9] target-avr: adding instruction translation


On 06/06/2016 03:37 AM, Michael Rolnik wrote:

+int avr_translate_AND(CPUAVRState *env, DisasContext *ctx, uint32_t opcode)
+{
+TCGv Rd = cpu_r[AND_Rd(opcode)];
+TCGv Rr = cpu_r[AND_Rr(opcode)];
+TCGv R = tcg_temp_new_i32();
+
+/*  op  */
+tcg_gen_and_tl(R, Rd, Rr); /*  Rd = Rd and Rr  */
+
+/*  Vf  */
+tcg_gen_movi_tl(cpu_Vf, 0x00);  /*  Vf = 0  */
+
+/*  Zf  */
+tcg_gen_mov_tl(cpu_Zf, R); /*  Zf = R  */
+
+gen_ZNSf(R);


Double setting of Zf.


+int avr_translate_BRBC(CPUAVRState *env, DisasContext *ctx, uint32_t opcode)

...

+int avr_translate_BRBS(CPUAVRState *env, DisasContext *ctx, uint32_t opcode)


You could share some code between these two functions.
See e.g. tcg_invert_cond.


+TCGv H = cpu_rampD;
+
+tcg_gen_mov_tl(addr, H); /*  addr = H:M:L  */
+tcg_gen_shli_tl(addr, addr, 16);


rampD is already shifted.



r~

Re: [Qemu-devel] [RFC/PATCH] ppc: Batch TLB flushes on 32-bit 6xx/7xx/7xxx in hash mode

On Mon, 2016-06-06 at 23:36 +0100, Mark Cave-Ayland wrote:
> 
> After another run of the OpenBIOS tests with this patch applied on top
> of the previous 2 patches, I see no regressions introduced. Like Cédric
> I don't get the feeling that the Mac machines necessarily run faster,
> however the overall experience does feel smoother and more responsive.
> 
> Tested-by: Mark Cave-Ayland 

Thanks !

Cheers,
Ben.

Re: [Qemu-devel] [RFC/PATCH] ppc: Batch TLB flushes on 32-bit 6xx/7xx/7xxx in hash mode

2016-06-06 Thread Mark Cave-Ayland

On 06/06/16 11:23, Benjamin Herrenschmidt wrote:

> This ports the existing 64-bit mechanism to 32-bit, thus series
> of 64 tlbie's followed by a sync like some versions of Darwin
> (ab)use will result in a single flush.
> 
> We apply a pending flush on any sync instruction though, as Darwin
> doesn't use tlbsync on non-SMP systems.
> 
> Signed-off-by: Benjamin Herrenschmidt 
> ---
> 
> Note: I haven't done any performance impact measurements with this
> one ... feel free to let me know what it does for you :-)
> 
>  target-ppc/cpu.h |  2 +-
>  target-ppc/helper_regs.h |  2 +-
>  target-ppc/mmu_helper.c  | 44 
>  target-ppc/translate.c   | 27 +--
>  4 files changed, 31 insertions(+), 44 deletions(-)
> 
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index d8f8f7e..c2962d7 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -959,7 +959,6 @@ struct CPUPPCState {
>  ppc_slb_t slb[MAX_SLB_ENTRIES];
>  int32_t slb_nr;
>  /* tcg TLB needs flush (deferred slb inval instruction typically) */
> -uint32_t tlb_need_flush;
>  #endif
>  /* segment registers */
>  hwaddr htab_base;
> @@ -985,6 +984,7 @@ struct CPUPPCState {
>  target_ulong pb[4];
>  bool tlb_dirty;   /* Set to non-zero when modifying TLB  
> */
>  bool kvm_sw_tlb;  /* non-zero if KVM SW TLB API is active
> */
> +uint32_t tlb_need_flush; /* Delayed flush needed */
>  #endif
>  
>  /* Other registers */
> diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
> index 104b690..8fc0934 100644
> --- a/target-ppc/helper_regs.h
> +++ b/target-ppc/helper_regs.h
> @@ -151,7 +151,7 @@ static inline int hreg_store_msr(CPUPPCState *env, 
> target_ulong value,
>  return excp;
>  }
>  
> -#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64)
> +#if !defined(CONFIG_USER_ONLY)
>  static inline void check_tlb_flush(CPUPPCState *env)
>  {
>  CPUState *cs = CPU(ppc_env_get_cpu(env));
> diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
> index a5e3878..485d5b8 100644
> --- a/target-ppc/mmu_helper.c
> +++ b/target-ppc/mmu_helper.c
> @@ -1935,8 +1935,8 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
>  case POWERPC_MMU_2_06a:
>  case POWERPC_MMU_2_07:
>  case POWERPC_MMU_2_07a:
> -env->tlb_need_flush = 0;
>  #endif /* defined(TARGET_PPC64) */
> +env->tlb_need_flush = 0;
>  tlb_flush(CPU(cpu), 1);
>  break;
>  default:
> @@ -1949,9 +1949,6 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
>  void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
>  {
>  #if !defined(FLUSH_ALL_TLBS)
> -PowerPCCPU *cpu = ppc_env_get_cpu(env);
> -CPUState *cs;
> -
>  addr &= TARGET_PAGE_MASK;
>  switch (env->mmu_model) {
>  case POWERPC_MMU_SOFT_6xx:
> @@ -1963,36 +1960,12 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, 
> target_ulong addr)
>  break;
>  case POWERPC_MMU_32B:
>  case POWERPC_MMU_601:
> -/* tlbie invalidate TLBs for all segments */
> -addr &= ~((target_ulong)-1ULL << 28);
> -cs = CPU(cpu);
> -/* XXX: this case should be optimized,
> - * giving a mask to tlb_flush_page
> - */
> -/* This is broken, some CPUs invalidate a whole congruence
> - * class on an even smaller subset of bits and some OSes take
> - * advantage of this. Just blow the whole thing away.
> +/* Actual CPUs invalidate entire congruence classes based on the
> + * geometry of their TLBs and some OSes take that into account,
> + * we just mark the TLB to be flushed later (context synchronizing
> + * event or sync instruction on 32-bit).
>   */
> -#if 0
> -tlb_flush_page(cs, addr | (0x0 << 28));
> -tlb_flush_page(cs, addr | (0x1 << 28));
> -tlb_flush_page(cs, addr | (0x2 << 28));
> -tlb_flush_page(cs, addr | (0x3 << 28));
> -tlb_flush_page(cs, addr | (0x4 << 28));
> -tlb_flush_page(cs, addr | (0x5 << 28));
> -tlb_flush_page(cs, addr | (0x6 << 28));
> -tlb_flush_page(cs, addr | (0x7 << 28));
> -tlb_flush_page(cs, addr | (0x8 << 28));
> -tlb_flush_page(cs, addr | (0x9 << 28));
> -tlb_flush_page(cs, addr | (0xA << 28));
> -tlb_flush_page(cs, addr | (0xB << 28));
> -tlb_flush_page(cs, addr | (0xC << 28));
> -tlb_flush_page(cs, addr | (0xD << 28));
> -tlb_flush_page(cs, addr | (0xE << 28));
> -tlb_flush_page(cs, addr | (0xF << 28));
> -#else
> -tlb_flush(cs, 1);
> -#endif
> +env->tlb_need_flush = 1;
>  break;
>  #if defined(TARGET_PPC64)
>  case POWERPC_MMU_64B:
> @@ -2058,13 +2031,12 @@ target_ulong helper_load_sr(CPUPPCState *env, 
> target_ulong sr_num)
>  
>  void helper_store_sr(CPUPPCState *env, target_ulong srnum, target_ulong 
> value)
>  {
> -PowerPCCPU *cpu

[Qemu-devel] [Bug 1580459] Re: Windows (10?) guest freezes entire host on shutdown if using PCI passthrough

2016-06-06 Thread Chris McCarron

SYSLINUX.CFG

default /syslinux/menu.c32
menu title Lime Technology, Inc.
prompt 0
timeout 50
label unRAID OS
  kernel /bzimage
  append isolcpus=4,16,5,17,6,18,7,19,8,20,9,21,10,22,11,23 
pci-stub.ids=1b6f:7052,10de:13c2,10de:0fbb intel_iommu=on iommu=pt 
vfio_iommu_type1.allow_unsafe_interrupts=1 pcie_acs_override=downstream 
initrd=/bzroot
label unRAID OS GUI Mode
  menu default
  kernel /bzimage
  append isolcpus=4,16,5,17,6,18,7,19,8,20,9,21,10,22,11,23 
pci-stub-ids=1b6f:7052,10de:13c2,10de:0fbb intel_iommu=on iommu=pt 
vfio_iommu_type1.allow_unsafe_interrupts=1 pcie_acs_override=downstream 
initrd=/bzroot,/bzroot-gui
label unRAID OS Safe Mode (no plugins, no GUI)
  kernel /bzimage
  append initrd=/bzroot unraidsafemode
label Memtest86+
  kernel /memtest

pci-stub-ids=1b6f:7052,10de:13c2,10de:0fbb
1b6f:7052 = Etron Technology, Inc. EJ188/EJ198 USB 3.0 Host Controller
10de:13c2 = NVIDIA Corporation GM204 [GeForce GTX 970]
10de:0fbb = NVIDIA Corporation GM204 High Definition Audio Controller

** Attachment added: "W10 VM Devices.PNG"
   
https://bugs.launchpad.net/qemu/+bug/1580459/+attachment/4678446/+files/W10%20VM%20Devices.PNG

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1580459

Title:
  Windows (10?) guest freezes entire host on shutdown if using PCI
  passthrough

Status in libvirt:
  New
Status in QEMU:
  New
Status in Arch Linux:
  New
Status in Debian:
  New
Status in Fedora:
  New

Bug description:
  Problem: after leaving a Windows VM that uses PCI passthrough (as we
  do for gaming graphics cards, sound cards, and in my case, a USB card)
  running for some amount of time between 1 and 2 hours (it's not
  consistent with exactly how long), and for any amount of time longer
  than that, shutting down that guest will, right as it finishes
  shutting down, freeze the host computer, making it require a hard
  reboot. Unbinding (or in the other user's case, unbinding and THEN
  binding) any PCI device in sysfs, even one that has nothing to do with
  the VM, also has the same effect as shutting down the VM (if the VM
  has been running long enough). So, it's probably an issue related to
  unbinding and binding PCI devices.

  There's a lot of info on this problem over at 
https://bbs.archlinux.org/viewtopic.php?id=206050
  Here's a better-organized list of main details:
  -at least 2 confirmed victims of this bug; 2 (including me) have provided 
lots of info in the link
  -I'm on Arch Linux and the other one is on Gentoo (distro-nonspecific)
  -issue affects my Windows 10 guest and others' Windows guests, but not my 
Arch Linux guest (the others don't have non-Windows guests to test)
  -I'm using libvirt but the other user is not, so it's not an issue with 
libvirt
  -It seems to be version non-specific, too. I first noticed it at, or when 
testing versions still had the issue at (whichever version is lower), Linux 4.1 
and qemu 2.4.0. It still persists in all releases of both since, including the 
newest ones.
  -I can't track down exactly what package downgrade can fix it, as downgrading 
further than Linux 4.1 and qemu 2.4.0 requires Herculean and system-destroying 
changes such as downgrading ncurses, meaning I don't know whether it's a bug in 
QEMU, the Linux kernel, or some weird seemingly unrelated thing.
  -According to the other user, "graphics intensive gameplay (GTA V) can cause 
the crash to happen sooner," as soon as "15 minutes"
  -Also, "bringing up a second passthrough VM with separate hardware will cause 
the same crash," and "bringing up another VM before the two-hour mark will not 
result in a crash," further cementing that it's triggered by the un/binding of 
PCI devices.
  -This is NOT related to the very similar bug that can be worked around by not 
passing through the HDMI device or sound card. Even when we removed all traces 
of any sort of sound card from the VM, it still had the same behavior.

To manage notifications about this bug go to:
https://bugs.launchpad.net/libvirt/+bug/1580459/+subscriptions

Re: [Qemu-devel] [PATCH 1/2] ppc: Properly tag the translation cache based on MMU mode

2016-06-06 Thread Mark Cave-Ayland

On 06/06/16 10:52, Benjamin Herrenschmidt wrote:

> We used to always flush the TLB when changing relocation mode in
> MSR:IR and MSR:DR (ie. MMU on/off for Instructions and Data).
> 
> We don't anymore since we have split mmu_idx for instruction and data.
> 
> However, since we hard code the mmu_idx in the translated code, we
> now need to also make sure MSR:IR and MSR:DR are part of the hflags
> used to tag translated code, so that we use different translated
> code for different MMU settings.
> 
> Darwin gets hurt by this problem.
> 
> Signed-off-by: Benjamin Herrenschmidt 
> ---
>  target-ppc/helper_regs.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
> index 12af61c..104b690 100644
> --- a/target-ppc/helper_regs.h
> +++ b/target-ppc/helper_regs.h
> @@ -95,7 +95,7 @@ static inline void hreg_compute_hflags(CPUPPCState *env)
>  /* We 'forget' FE0 & FE1: we'll never generate imprecise exceptions */
>  hflags_mask = (1 << MSR_VR) | (1 << MSR_AP) | (1 << MSR_SA) |
>  (1 << MSR_PR) | (1 << MSR_FP) | (1 << MSR_SE) | (1 << MSR_BE) |
> -(1 << MSR_LE) | (1 << MSR_VSX);
> +(1 << MSR_LE) | (1 << MSR_VSX) | (1 << MSR_IR) | (1 << MSR_DR);
>  hflags_mask |= (1ULL << MSR_CM) | (1ULL << MSR_SF) | MSR_HVB;
>  hreg_compute_mem_idx(env);
>  env->hflags = env->msr & hflags_mask;
> 
> 
> 

I've run through my complete set of OpenBIOS boot tests with both this
and patch 2 applied on top of Cédric's recent fixes and I no longer see
any regressions with g3beige/mac99 under TCG:

Tested-by: Mark Cave-Ayland 


ATB,

Mark.

[Qemu-devel] [Bug 1580459] Re: Windows (10?) guest freezes entire host on shutdown if using PCI passthrough

2016-06-06 Thread Chris McCarron

Current VM Config


  csmccarronwx00
  82c5e4f6-6991-cd5f-8207-49db04386cc9
  csmccarronwx00 i440fx-2.5 OVMF
  

  
  10485760
  10485760
  

  
  12
  













  
  
/machine
  
  
hvm
/usr/share/qemu/ovmf-x64/OVMF_CODE-pure-efi.fd

/etc/libvirt/qemu/nvram/82c5e4f6-6991-cd5f-8207-49db04386cc9_VARS-pure-efi.fd



  
  



  
  
  
  

  
  

  
  


  
  destroy
  restart
  restart
  
/usr/local/sbin/qemu

  
  
  
  
  
  


  
  
  
  
  
  


  
  


  
  
  


  
  
  


  
  
  


  


  
  


  
  


  
  
  
  
  
  


  
  
  


  
  
  


  
  
  
  


  
  

  
  
  


  
  

  
  
  


  
  

  


-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1580459

Title:
  Windows (10?) guest freezes entire host on shutdown if using PCI
  passthrough

Status in libvirt:
  New
Status in QEMU:
  New
Status in Arch Linux:
  New
Status in Debian:
  New
Status in Fedora:
  New

Bug description:
  Problem: after leaving a Windows VM that uses PCI passthrough (as we
  do for gaming graphics cards, sound cards, and in my case, a USB card)
  running for some amount of time between 1 and 2 hours (it's not
  consistent with exactly how long), and for any amount of time longer
  than that, shutting down that guest will, right as it finishes
  shutting down, freeze the host computer, making it require a hard
  reboot. Unbinding (or in the other user's case, unbinding and THEN
  binding) any PCI device in sysfs, even one that has nothing to do with
  the VM, also has the same effect as shutting down the VM (if the VM
  has been running long enough). So, it's probably an issue related to
  unbinding and binding PCI devices.

  There's a lot of info on this problem over at 
https://bbs.archlinux.org/viewtopic.php?id=206050
  Here's a better-organized list of main details:
  -at least 2 confirmed victims of this bug; 2 (including me) have provided 
lots of info in the link
  -I'm on Arch Linux and the other one is on Gentoo (distro-nonspecific)
  -issue affects my Windows 10 guest and others' Windows guests, but not my 
Arch Linux guest (the others don't have non-Windows guests to test)
  -I'm using libvirt but the other user is not, so it's not an issue with 
libvirt
  -It seems to be version non-specific, too. I first noticed it at, or when 
testing versions still had the issue at (whichever version is lower), Linux 4.1 
and qemu 2.4.0. It still persists in all releases of both since, including the 
newest ones.
  -I can't track down exactly what package downgrade can fix it, as downgrading 
further than Linux 4.1 and qemu 2.4.0 requires Herculean and system-destroying 
changes such as downgrading ncurses, meaning I don't know whether it's a bug in 
QEMU, the Linux kernel, or some weird seemingly unrelated thing.
  -According to the other user, "graphics intensive gameplay (GTA V) can cause 
the crash to happen sooner," as soon as "15 minutes"
  -Also, "bringing up a second passthrough VM with separate hardware will cause 
the same crash," and "bringing up another VM before the two-hour mark will not 
result in a crash," further cementing that it's triggered by the un/binding of 
PCI devices.
  -This is NOT related to the very similar bug that can be worked around by not 
passing through the HDMI device or sound card. Even when we removed all traces 
of any sort of sound card from the VM, it still had the same behavior.

To manage notifications about this bug go to:
https://bugs.launchpad.net/libvirt/+bug/1580459/+subscriptions

[Qemu-devel] [Bug 1580459] Re: Windows (10?) guest freezes entire host on shutdown if using PCI passthrough

2016-06-06 Thread Chris McCarron

Well for now my issue is resolved. This morning when I was shutting
down my unRaid server to blacklist the intel sound module, snd-hda-
intel, I first stopped my ubuntu vm and my two dockers then logged out
of unraid. I then proceeded to shutdown my Windows 10 VM and like magic
it shutdown nicely without locking up the entire system. Also, I found
out from unRaid tech support that the unRaid kernel does not include any
sound modules and it was not necessary to blacklist them.

So this is what I have changed since the last lockup last Thursday
night.

1. Removed the NVIDIA Audio hardware from the VM Setup. I did this because
the sound was lagging horribly and I could not figure out how to fix it. So I
removed the sound hardware and I am now using a USB sound card that is plugged
into the USB3 PCI-Express card that is being passed to the VM.
2. I enabled MSI Interrupts on the GPU using this URL as my reference.

http://lime-technology.com/wiki/index.php/UnRAID_6/VM_Guest_Support#Enable_MSI_for_Interrupts_to_Fix_HDMI_Audio_Support

I should also mention that while I have the system NIC, USB1, and USB2
virtual modules mapped, they are disabled in the VM. I did this to
improve latency issues inside the VM. I am using a wireless NIC plugged
into the USB3 PCI-Express card and I do not require USB1 or USB2. These
changes where made on Thursday prior to the last lockup, so while I do
believe they have helped overall latency they had no effect on the
system locking up.

USB3 card is handling Logitech G910 keyboard, WOW MMO Legendary Gaming
Mouse, ASUS XONARU3 Sound Card, ASUS USB-AC56 Wireless NIC, and a USB
Mouse.

I still would like to add the NVIDIA Sound card back into the VM and
when I do I will enable MSI Interrupts. My goal is not not have to use
the USB Sound card.

See next post for current VM setup.

--
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1580459

Title:
Windows (10?) guest freezes entire host on shutdown if using PCI
passthrough

Status in libvirt:
New
Status in QEMU:
New
Status in Arch Linux:
New
Status in Debian:
New
Status in Fedora:
New

Bug description:
Problem: after leaving a Windows VM that uses PCI passthrough (as we
do for gaming graphics cards, sound cards, and in my case, a USB card)
running for some amount of time between 1 and 2 hours (it's not
consistent with exactly how long), and for any amount of time longer
than that, shutting down that guest will, right as it finishes
shutting down, freeze the host computer, making it require a hard
reboot. Unbinding (or in the other user's case, unbinding and THEN
binding) any PCI device in sysfs, even one that has nothing to do with
the VM, also has the same effect as shutting down the VM (if the VM
has been running long enough). So, it's probably an issue related to
unbinding and binding PCI devices.

There's a lot of info on this problem over at
https://bbs.archlinux.org/viewtopic.php?id=206050
Here's a better-organized list of main details:
-at least 2 confirmed victims of this bug; 2 (including me) have provided
lots of info in the link
-I'm on Arch Linux and the other one is on Gentoo (distro-nonspecific)
-issue affects my Windows 10 guest and others' Windows guests, but not my
Arch Linux guest (the others don't have non-Windows guests to test)
-I'm using libvirt but the other user is not, so it's not an issue with
libvirt
-It seems to be version non-specific, too. I first noticed it at, or when
testing versions still had the issue at (whichever version is lower), Linux 4.1
and qemu 2.4.0. It still persists in all releases of both since, including the
newest ones.
-I can't track down exactly what package downgrade can fix it, as downgrading
further than Linux 4.1 and qemu 2.4.0 requires Herculean and system-destroying
changes such as downgrading ncurses, meaning I don't know whether it's a bug in
QEMU, the Linux kernel, or some weird seemingly unrelated thing.
-According to the other user, "graphics intensive gameplay (GTA V) can cause
the crash to happen sooner," as soon as "15 minutes"
-Also, "bringing up a second passthrough VM with separate hardware will cause
the same crash," and "bringing up another VM before the two-hour mark will not
result in a crash," further cementing that it's triggered by the un/binding of
PCI devices.
-This is NOT related to the very similar bug that can be worked around by not
passing through the HDMI device or sound card. Even when we removed all traces
of any sort of sound card from the VM, it still had the same behavior.

To manage notifications about this bug go to:
https://bugs.launchpad.net/libvirt/+bug/1580459/+subscriptions

Re: [Qemu-devel] [PATCH v4 1/9] target-avr: AVR cores support is added. 1. basic CPU structure 2. registers 3. no instructions

On 6 June 2016 at 11:37, Michael Rolnik  wrote:
> Signed-off-by: Michael Rolnik 
> ---
>  arch_init.c |   2 +
>  configure   |   5 +
>  default-configs/avr-softmmu.mak |   1 +
>  disas/Makefile.objs |   1 +
>  disas/avr.c |  10 ++
>  include/disas/bfd.h |   7 +
>  include/sysemu/arch_init.h  |   1 +
>  target-avr/Makefile.objs|   3 +
>  target-avr/cpu-qom.h|  80 +++
>  target-avr/cpu.c| 288 
> 
>  target-avr/cpu.h| 134 +++
>  target-avr/gdbstub.c|  99 ++
>  target-avr/helper.c |  85 
>  target-avr/helper.h |  21 +++
>  target-avr/machine.c|  54 
>  target-avr/machine.h|  21 +++
>  target-avr/translate.c  | 288 
> 
>  17 files changed, 1100 insertions(+)

(This would probably benefit from being split up a bit too
in my view, but Richard's reviewing this part, not me.)

> diff --git a/disas/avr.c b/disas/avr.c
> new file mode 100644
> index 000..f916e72
> --- /dev/null
> +++ b/disas/avr.c
> @@ -0,0 +1,10 @@

Please provide a header comment stating copyright and
license for all new files, even really small ones and
Makefiles. This is one of those things we don't get right
for everything in the tree right now but would like to
maintain for anything new we add. (You don't, at least
as far as we're concerned, have to use the full three
paragraph form if that seems overly bulky; something like

 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
 * See the COPYING.LIB file in the top-level directory.

is also fine with us.)

> +#include "qemu/osdep.h"
> +#include "qemu-common.h"
> +#include "disas/bfd.h"
> +
> +int print_insn_avr(bfd_vma addr, disassemble_info *info)
> +{
> +int length  = 0;;
> +/*  TODO*/
> +return length;
> +}

thanks
-- PMM

Re: [Qemu-devel] [PATCH v4 5/9] target-avr: adding AVR interrupt handling

On 6 June 2016 at 22:44, Richard Henderson  wrote:
> On 06/06/2016 03:37 AM, Michael Rolnik wrote:
>>
>> +if (avr_feature(env, AVR_FEATURE_3_BYTE_PC)) {
>> +stb_phys(cs->as, env->sp--, (ret & 0xff));
>> +stb_phys(cs->as, env->sp--, (ret & 0x00ff00) >>  8);
>> +stb_phys(cs->as, env->sp--, (ret & 0xff) >> 16);
>> +} else if (avr_feature(env, AVR_FEATURE_2_BYTE_PC)) {
>> +stb_phys(cs->as, env->sp--, (ret & 0xff));
>> +stb_phys(cs->as, env->sp--, (ret & 0x00ff00) >>  8);
>> +} else {
>> +stb_phys(cs->as, env->sp--, (ret & 0xff));
>> +}
>
>
> It would be better to use cpu_stb_data.

Or address_space_stb() if you want to care about getting
the right behaviour if the store fails (we don't really
care about this on many targets at the moment though).

thanks
-- PMM

Re: [Qemu-devel] [PATCH v4 4/9] target-avr: adding instructions encodings

On 6 June 2016 at 22:38, Richard Henderson  wrote:
> On 06/06/2016 03:37 AM, Michael Rolnik wrote:
>>
>> Signed-off-by: Michael Rolnik 
>> ---
>>  target-avr/translate-inst.h | 730
>> 
>>  1 file changed, 730 insertions(+)
>>  create mode 100644 target-avr/translate-inst.h
>
>
> Reviewed-by: Richard Henderson 
>
> Although of course it would be better to have the generator
> and source instead of the generated code.

Yes, definitely. (Especially since the GPL wants the 'preferred form
of the work for making modifications'...)

thanks
-- PMM

Re: [Qemu-devel] [PATCH v4 7/9] target-avr: adding instruction decoder


On 06/06/2016 03:37 AM, Michael Rolnik wrote:

+void avr_decode(uint32_t pc, uint32_t *length, uint32_t code, 
translate_function_t *translate)
+{
+uint32_t opcode  = extract32(code, 0, 16);
+
+switch (opcode & 0xd000) {
+case0x: {
+uint32_t opcode  = extract32(code, 0, 16);
+switch (opcode & 0x2c00) {
+case0x: {
+uint32_t opcode  = extract32(code, 0, 16);


I thought you said you fixed these shadowing declarations.


r~

[Qemu-devel] [PATCH] crypto: aes: always rename internal symbols

2016-06-06 Thread Mike Frysinger

From: Mike Frysinger 

OpenSSL's libcrypto always defines AES symbols with the same names as
qemu's local aes code.  This is problematic when enabling at least curl
as that frequently also uses libcrypto.  It might not be noticed when
running, but if you try to statically link, everything falls down.

An example snippet:
  LINK  qemu-nbd
.../libcrypto.a(aes-x86_64.o): In function 'AES_encrypt':
(.text+0x460): multiple definition of 'AES_encrypt'
crypto/aes.o:aes.c:(.text+0x670): first defined here
.../libcrypto.a(aes-x86_64.o): In function 'AES_decrypt':
(.text+0x9f0): multiple definition of 'AES_decrypt'
crypto/aes.o:aes.c:(.text+0xb30): first defined here
.../libcrypto.a(aes-x86_64.o): In function 'AES_cbc_encrypt':
(.text+0xf90): multiple definition of 'AES_cbc_encrypt'
crypto/aes.o:aes.c:(.text+0xff0): first defined here
collect2: error: ld returned 1 exit status
.../qemu-2.6.0/rules.mak:105: recipe for target 'qemu-nbd' failed
make: *** [qemu-nbd] Error 1

The aes.h header has redefines already for FreeBSD, but go ahead and
enable that for everyone since there's no real good reason to not use
a namespace all the time.

Signed-off-by: Mike Frysinger 
---
 include/crypto/aes.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/crypto/aes.h b/include/crypto/aes.h
index a006da2224a9..12fb321b89de 100644
--- a/include/crypto/aes.h
+++ b/include/crypto/aes.h
@@ -10,14 +10,13 @@ struct aes_key_st {
 };
 typedef struct aes_key_st AES_KEY;
 
-/* FreeBSD has its own AES_set_decrypt_key in -lcrypto, avoid conflicts */
-#ifdef __FreeBSD__
+/* FreeBSD/OpenSSL have their own AES functions with the same names in -lcrypto
+ * (which might be pulled in via curl), so redefine to avoid conflicts. */
 #define AES_set_encrypt_key QEMU_AES_set_encrypt_key
 #define AES_set_decrypt_key QEMU_AES_set_decrypt_key
 #define AES_encrypt QEMU_AES_encrypt
 #define AES_decrypt QEMU_AES_decrypt
 #define AES_cbc_encrypt QEMU_AES_cbc_encrypt
-#endif
 
 int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
AES_KEY *key);
-- 
2.8.2

Re: [Qemu-devel] [PATCH v4 6/9] target-avr: adding helpers for IN, OUT, SLEEP, WBR & unsupported instructions


On 06/06/2016 03:37 AM, Michael Rolnik wrote:

Signed-off-by: Michael Rolnik 
---
 target-avr/helper.c | 153 
 target-avr/helper.h |   5 ++
 2 files changed, 147 insertions(+), 11 deletions(-)

diff --git a/target-avr/helper.c b/target-avr/helper.c
index e798dd9..f514ed6 100644
--- a/target-avr/helper.c
+++ b/target-avr/helper.c
@@ -31,7 +31,7 @@

 bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 {
-CPUClass*cc  = CPU_GET_CLASS(cs);
+CPUClass*cc = CPU_GET_CLASS(cs);
 AVRCPU  *cpu = AVR_CPU(cs);
 CPUAVRState *env = &cpu->env;

@@ -49,7 +49,7 @@ bool avr_cpu_exec_interrupt(CPUState *cs, int 
interrupt_request)
 }
 if (interrupt_request & CPU_INTERRUPT_HARD) {
 if (cpu_interrupts_enabled(env) && env->intsrc != 0) {
-int index   = ctz32(env->intsrc);
+int index = ctz32(env->intsrc);
 cs->exception_index = EXCP_INT(index);
 cc->do_interrupt(cs);

@@ -64,18 +64,18 @@ bool avr_cpu_exec_interrupt(CPUState *cs, int 
interrupt_request)

 void avr_cpu_do_interrupt(CPUState *cs)
 {
-AVRCPU *cpu = AVR_CPU(cs);
-CPUAVRState*env = &cpu->env;
+AVRCPU *cpu = AVR_CPU(cs);
+CPUAVRState *env = &cpu->env;

-uint32_tret = env->pc;
-int vector;
-int size= avr_feature(env, AVR_FEATURE_JMP_CALL) ? 2 : 1;
-int base= 0;/* TODO: where to get it */
+uint32_t ret = env->pc;
+int vector = 0;
+int size = avr_feature(env, AVR_FEATURE_JMP_CALL) ? 2 : 1;
+int base = 0;/* TODO: where to get it */

 if (cs->exception_index == EXCP_RESET) {
-vector  = 0;
+vector = 0;
 } else if (env->intsrc != 0) {
-vector  = ctz32(env->intsrc) + 1;
+vector = ctz32(env->intsrc) + 1;
 }

 if (avr_feature(env, AVR_FEATURE_3_BYTE_PC)) {
@@ -90,7 +90,7 @@ void avr_cpu_do_interrupt(CPUState *cs)
 }

 env->pc = base + vector * size;
-env->sregI  = 0;/*  clear Global Interrupt Flag */
+env->sregI = 0;/*  clear Global Interrupt Flag */

 cs->exception_index = -1;
 }



Fold all of these whitespace changes into the original patch.



 }
+void helper_sleep(CPUAVRState *env)


Spacing.


+}
+void helper_unsupported(CPUAVRState *env)


Spacing.


+{
+CPUState *cs = CPU(avr_env_get_cpu(env));
+
+cs->exception_index = EXCP_DEBUG;
+cpu_dump_state(cs, stderr, fprintf, 0);


  if (qemu_loglevel_mask(LOG_UNIMP))
{
  qemu_log("UNSUPPORTED\n");
  cpu_dump_state(cs, qemu_logfile, fprintf, 0);
}

You also should be using something other than EXCP_DEBUG.


+void helper_wdr(CPUAVRState *env)
+{
+CPUState *cs = CPU(avr_env_get_cpu(env));
+
+cs->exception_index = EXCP_DEBUG;


Likewise.


+sreg =   (env->sregC & 0x01) << 0
+| (env->sregZ & 0x01) << 1
+| (env->sregN & 0x01) << 2
+| (env->sregV & 0x01) << 3
+| (env->sregS & 0x01) << 4
+| (env->sregH & 0x01) << 5
+| (env->sregT & 0x01) << 6
+| (env->sregI & 0x01) << 7;


Same bug with sregZ.


+case0x3d: { /*  SPL */
+if (avr_feature(env, AVR_FEATURE_2_BYTE_SP)) {
+env->sp = (env->sp & 0xff00) | (data);
+}
+break;


Surely you should write to the low byte of SP all the time.  How else do you 
change SPL when you have a one-byte stack pointer?



+case0x3f: { /*  SREG */
+env->sregC = (data >> 0) & 0x01;
+env->sregZ = (data >> 1) & 0x01;
+env->sregN = (data >> 2) & 0x01;
+env->sregV = (data >> 3) & 0x01;
+env->sregS = (data >> 4) & 0x01;
+env->sregH = (data >> 5) & 0x01;
+env->sregT = (data >> 6) & 0x01;
+env->sregI = (data >> 7) & 0x01;


Same bug with sregZ.


r~

Re: [Qemu-devel] [PATCH v4 3/9] target-avr: adding a sample AVR board

On 6 June 2016 at 11:37, Michael Rolnik  wrote:
> Signed-off-by: Michael Rolnik 
> ---
>  hw/Makefile.objs |   1 +
>  hw/avr/Makefile.objs |   1 +
>  hw/avr/sample-io.c   | 217 
> +++
>  hw/avr/sample.c  | 118 
>  4 files changed, 337 insertions(+)
>  create mode 100644 hw/avr/Makefile.objs
>  create mode 100644 hw/avr/sample-io.c
>  create mode 100644 hw/avr/sample.c

You're probably better off having the device in one
patch and the board model in another, rather than combining them.

> diff --git a/hw/Makefile.objs b/hw/Makefile.objs
> index 4a07ed4..262ca15 100644
> --- a/hw/Makefile.objs
> +++ b/hw/Makefile.objs
> @@ -33,6 +33,7 @@ devices-dirs-$(CONFIG_SOFTMMU) += watchdog/
>  devices-dirs-$(CONFIG_SOFTMMU) += xen/
>  devices-dirs-$(CONFIG_MEM_HOTPLUG) += mem/
>  devices-dirs-$(CONFIG_SMBIOS) += smbios/
> +devices-dirs-$(CONFIG_SOFTMMU) += avr/

No other target uses this for their hw/ directory,
which is a clue that you don't need it. Makefile.target adds
hw/$(TARGET_BASE_ARCH) automatically.

>  devices-dirs-y += core/
>  common-obj-y += $(devices-dirs-y)
>  obj-y += $(devices-dirs-y)
> diff --git a/hw/avr/Makefile.objs b/hw/avr/Makefile.objs
> new file mode 100644
> index 000..9f6be2f
> --- /dev/null
> +++ b/hw/avr/Makefile.objs
> @@ -0,0 +1 @@
> +obj-y   += sample.o sample-io.o
> diff --git a/hw/avr/sample-io.c b/hw/avr/sample-io.c
> new file mode 100644
> index 000..7bf5e48
> --- /dev/null
> +++ b/hw/avr/sample-io.c

Generally, device models don't live in hw/, only board
models. Put the device model in the appropriate subdirectory
of hw/, which is 'misc' for this one.

> @@ -0,0 +1,217 @@
> +/*
> + *  QEMU AVR CPU
> + *
> + *  Copyright (c) 2016 Michael Rolnik
> + *
> + *  This library is free software; you can redistribute it and/or
> + *  modify it under the terms of the GNU Lesser General Public
> + *  License as published by the Free Software Foundation; either
> + *  version 2.1 of the License, or (at your option) any later version.
> + *
> + *  This library is distributed in the hope that it will be useful,
> + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + *  Lesser General Public License for more details.
> + *
> + *  You should have received a copy of the GNU Lesser General Public
> + *  License along with this library; if not, see
> + *  
> + */

So what actually is this device? Is it something that
corresponds to real hardware, or to some other emulator's
debug/test device, or something we've just made up?
This is a good place to put a comment answering this kind of
question (with links or references to documentation if relevant).

> +
> +#include "qemu/osdep.h"
> +#include "qapi/error.h"
> +#include "qemu-common.h"
> +#include "cpu.h"
> +#include "include/hw/sysbus.h"
> +
> +#define TYPE_SAMPLEIO   "SampleIO"
> +#define SAMPLEIO(obj)   OBJECT_CHECK(SAMPLEIOState, (obj), TYPE_SAMPLEIO)
> +
> +#ifndef DEBUG_SAMPLEIO
> +#define DEBUG_SAMPLEIO 1

Don't enable debug by default.

> +#endif
> +
> +#define DPRINTF(fmt, args...)
>  \
> +do { 
>  \
> +if (DEBUG_SAMPLEIO) {
>  \
> +fprintf(stderr, "[%s]%s: " fmt , TYPE_SAMPLEIO, __func__, 
> ##args);\
> +}
>  \
> +}
>  \
> +while (0)

You might want to consider using tracepoints rather than
a raw debug macro. I don't insist on it, but they're pretty neat.
(You list your trace points in the trace-events file and then
that automatically generates functions trace_whatever that you
call at the relevant points in your code. There are various
backends but by default you should be able to enable them
at runtime with '-d trace:some_glob_pattern' (eg
'-d trace:avr-sample-*'). Example device doing this:
hw/intc/aspeed_vic.c.

> +
> +#define AVR_IO_CPU_REGS_SIZE0x0020
> +#define AVR_IO_CPU_IO_SIZE  0x0040
> +#define AVR_IO_EXTERN_IO_SIZE   0x00a0
> +#define AVR_IO_SIZE (AVR_IO_CPU_REGS_SIZE   \
> ++ AVR_IO_CPU_IO_SIZE\
> ++ AVR_IO_EXTERN_IO_SIZE)
> +
> +#define AVR_IO_CPU_REGS_BASE0x
> +#define AVR_IO_CPU_IO_BASE  (AVR_IO_CPU_REGS_BASE   \
> ++ AVR_IO_CPU_REGS_SIZE)
> +#define AVR_IO_EXTERN_IO_BASE   (AVR_IO_CPU_IO_BASE \
> ++ AVR_IO_CPU_IO_SIZE)
> +
> +
> +typedef struct SAMPLEIOState {
> +SysBusDeviceparent;
> +
> +MemoryRegioniomem;
> +
> +AVRCPU *cpu;
> +
> +uint8_t io[0x40];
> +ui

Re: [Qemu-devel] [RFC v2 PATCH 01/13] Introduce TCGOpcode for memory barrier

2016-06-06 Thread Sergey Fedorov

On 07/06/16 00:00, Peter Maydell wrote:
> On 6 June 2016 at 21:30, Sergey Fedorov  wrote:
>> On 06/06/16 22:28, Pranith Kumar wrote:
>>> On Mon, Jun 6, 2016 at 3:23 PM, Richard Henderson  wrote:
 On 06/06/2016 10:11 AM, Pranith Kumar wrote:
> If I read it correctly TCG_BAR_SYNC is equivalent to OR of all the
> other four barriers. I am not sure if we can just construct SYNC like
> this or if we need to define it explicitly though.
 AFAICS, sparc membar #sync is stronger.
>>> I tried looking it up but it's not clear. How is it stronger? And do
>>> we need those strong guarantees in our front-end/back-end?
>> That is not clear for me either :( AFAIU, PPC's lwsync does allow stores
>> to be reordered after loads but hwsync - not.
> Yes, from the PoV of the other CPU. That is, for write-then-read by
> CPU 0, CPU 0 will always read what it wrote, but other CPUs don't
> necessarily see the write before the read is satisfied.
> https://www.cl.cam.ac.uk/~pes20/ppc-supplemental/test7.pdf
> describes the difference in sections 3.2 and 3.3, and has an
> example in section 6 of a situation which requires a full
> (hw)sync and an lwsync is not sufficient.
>
>> I suspect Sparc's membar
>> #Sync is used to ensure that some system operations are complete before
>> proceeding with execution. I'm not sure we need to introduce this into
>> TCG. It needs to be clear what is it and how to use it.
> My reading of the manual is that the SPARC "membar #Sync" is like ARM
> ISB -- it enforces that any instruction (whether a memory access or not)
> before it must finish before anything after it can start. It only
> affects the CPU that issues it (assuming you didn't also specify
> any of the bits requesting memory barriers!) Since TCG doesn't attempt
> to reorder instructions, we likely don't need to do anything except
> maybe end the current TB. Also if we're still trying to do TLB
> operations on other CPUs asynchronously we need to wait for them to
> finish; I forget what the conclusion was on that idea.
> PPC equivalent insn is isync I think.
>

Thanks for commenting this, Peter. AFAIU, a sequential consistency
barrier is stronger than a release-aquire barrier because it provides
"transitivity/commutativity" [1]. This is what general barrier
guarantees in Linux [2]. I especially like this piece of description
from [2]:

... if this example runs on a system where CPUs 1 and 2 share a
store buffer or a level of cache, CPU 2 might have early access to
CPU 1's writes. General barriers are therefore required to ensure
that all CPUs agree on the combined order of CPU 1's and CPU 2's
accesses.

Current Linux kernel implements Sparc's smp_mb()/__smp_mb()/mb() with
"membar #StoreLoad" [3]. So we'll probably be fine with just RR, RW, WR,
and WW bits in the TCG memory barrier operation attribute.

Kind regards,
Sergey

[1] https://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync#Overall_Summary
[2]
http://lxr.free-electrons.com/source/Documentation/memory-barriers.txt#L1268
[3]
http://lxr.free-electrons.com/source/arch/sparc/include/asm/barrier_64.h#L36

Re: [Qemu-devel] [PATCH v2 05/19] linux-user: Define macro for size of host kernel sigset_t



Le 27/05/2016 à 16:51, Peter Maydell a écrit :
> Some host syscalls take an argument specifying the size of a
> host kernel's sigset_t (which isn't necessarily the same as
> that of the host libc's type of that name). Instead of hardcoding
> _NSIG / 8 where we do this, define and use a SIGSET_T_SIZE macro.
> 
> Signed-off-by: Peter Maydell 

Reviewed-by: Laurent Vivier 

> ---
>  linux-user/syscall.c | 9 +++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> index df70255..e4b7404 100644
> --- a/linux-user/syscall.c
> +++ b/linux-user/syscall.c
> @@ -119,6 +119,10 @@ int __clone2(int (*fn)(void *), void *child_stack_base,
>  #define  VFAT_IOCTL_READDIR_BOTH _IOR('r', 1, struct 
> linux_dirent [2])
>  #define  VFAT_IOCTL_READDIR_SHORT_IOR('r', 2, struct 
> linux_dirent [2])
>  
> +/* This is the size of the host kernel's sigset_t, needed where we make
> + * direct system calls that take a sigset_t pointer and a size.
> + */
> +#define SIGSET_T_SIZE (_NSIG / 8)
>  
>  #undef _syscall0
>  #undef _syscall1
> @@ -7221,7 +7225,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
> arg1,
>  /* Extract the two packed args for the sigset */
>  if (arg6) {
>  sig_ptr = &sig;
> -sig.size = _NSIG / 8;
> +sig.size = SIGSET_T_SIZE;
>  
>  arg7 = lock_user(VERIFY_READ, arg6, sizeof(*arg7) * 2, 1);
>  if (!arg7) {
> @@ -8275,7 +8279,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
> arg1,
>  set = NULL;
>  }
>  
> -ret = get_errno(sys_ppoll(pfd, nfds, timeout_ts, set, 
> _NSIG/8));
> +ret = get_errno(sys_ppoll(pfd, nfds, timeout_ts,
> +  set, SIGSET_T_SIZE));
>  
>  if (!is_error(ret) && arg3) {
>  host_to_target_timespec(arg3, timeout_ts);
>

Re: [Qemu-devel] [PATCH v4 5/9] target-avr: adding AVR interrupt handling


On 06/06/2016 03:37 AM, Michael Rolnik wrote:

+if (avr_feature(env, AVR_FEATURE_3_BYTE_PC)) {
+stb_phys(cs->as, env->sp--, (ret & 0xff));
+stb_phys(cs->as, env->sp--, (ret & 0x00ff00) >>  8);
+stb_phys(cs->as, env->sp--, (ret & 0xff) >> 16);
+} else if (avr_feature(env, AVR_FEATURE_2_BYTE_PC)) {
+stb_phys(cs->as, env->sp--, (ret & 0xff));
+stb_phys(cs->as, env->sp--, (ret & 0x00ff00) >>  8);
+} else {
+stb_phys(cs->as, env->sp--, (ret & 0xff));
+}


It would be better to use cpu_stb_data.


r~

Re: [Qemu-devel] [PATCH v2 04/19] linux-user: Factor out uses of do_sigprocmask() from sigreturn code



Le 27/05/2016 à 16:51, Peter Maydell a écrit :
> All the architecture specific handlers for sigreturn include calls
> to do_sigprocmask(SIGSETMASK, &set, NULL) to set the signal mask
> from the uc_sigmask in the context being restored. Factor these
> out into calls to a set_sigmask() function. The next patch will
> want to add code which is not run when setting the signal mask
> via do_sigreturn, and this change allows us to separate the two
> cases.
> 
> Signed-off-by: Peter Maydell 

Reviewed-by: Laurent Vivier 

> ---
>  linux-user/signal.c | 55 
> +++--
>  1 file changed, 32 insertions(+), 23 deletions(-)
> 
> diff --git a/linux-user/signal.c b/linux-user/signal.c
> index 5069c3f..1b86a85 100644
> --- a/linux-user/signal.c
> +++ b/linux-user/signal.c
> @@ -239,6 +239,15 @@ int do_sigprocmask(int how, const sigset_t *set, 
> sigset_t *oldset)
>  return ret;
>  }
>  
> +#if !defined(TARGET_OPENRISC) && !defined(TARGET_UNICORE32) && \
> +!defined(TARGET_X86_64)
> +/* Just set the guest's signal mask to the specified value */
> +static void set_sigmask(const sigset_t *set)
> +{
> +do_sigprocmask(SIG_SETMASK, set, NULL);
> +}
> +#endif
> +
>  /* siginfo conversion */
>  
>  static inline void host_to_target_siginfo_noswap(target_siginfo_t *tinfo,
> @@ -1093,7 +1102,7 @@ long do_sigreturn(CPUX86State *env)
>  }
>  
>  target_to_host_sigset_internal(&set, &target_set);
> -do_sigprocmask(SIG_SETMASK, &set, NULL);
> +set_sigmask(&set);
>  
>  /* restore registers */
>  if (restore_sigcontext(env, &frame->sc))
> @@ -1118,7 +1127,7 @@ long do_rt_sigreturn(CPUX86State *env)
>  if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1))
>  goto badframe;
>  target_to_host_sigset(&set, &frame->uc.tuc_sigmask);
> -do_sigprocmask(SIG_SETMASK, &set, NULL);
> +set_sigmask(&set);
>  
>  if (restore_sigcontext(env, &frame->uc.tuc_mcontext)) {
>  goto badframe;
> @@ -1258,7 +1267,7 @@ static int target_restore_sigframe(CPUARMState *env,
>  uint64_t pstate;
>  
>  target_to_host_sigset(&set, &sf->uc.tuc_sigmask);
> -do_sigprocmask(SIG_SETMASK, &set, NULL);
> +set_sigmask(&set);
>  
>  for (i = 0; i < 31; i++) {
>  __get_user(env->xregs[i], &sf->uc.tuc_mcontext.regs[i]);
> @@ -1900,7 +1909,7 @@ static long do_sigreturn_v1(CPUARMState *env)
>  }
>  
>  target_to_host_sigset_internal(&host_set, &set);
> -do_sigprocmask(SIG_SETMASK, &host_set, NULL);
> +set_sigmask(&host_set);
>  
>  if (restore_sigcontext(env, &frame->sc)) {
>  goto badframe;
> @@ -1981,7 +1990,7 @@ static int do_sigframe_return_v2(CPUARMState *env, 
> target_ulong frame_addr,
>  abi_ulong *regspace;
>  
>  target_to_host_sigset(&host_set, &uc->tuc_sigmask);
> -do_sigprocmask(SIG_SETMASK, &host_set, NULL);
> +set_sigmask(&host_set);
>  
>  if (restore_sigcontext(env, &uc->tuc_mcontext))
>  return 1;
> @@ -2077,7 +2086,7 @@ static long do_rt_sigreturn_v1(CPUARMState *env)
>  }
>  
>  target_to_host_sigset(&host_set, &frame->uc.tuc_sigmask);
> -do_sigprocmask(SIG_SETMASK, &host_set, NULL);
> +set_sigmask(&host_set);
>  
>  if (restore_sigcontext(env, &frame->uc.tuc_mcontext)) {
>  goto badframe;
> @@ -2453,7 +2462,7 @@ long do_sigreturn(CPUSPARCState *env)
>  }
>  
>  target_to_host_sigset_internal(&host_set, &set);
> -do_sigprocmask(SIG_SETMASK, &host_set, NULL);
> +set_sigmask(&host_set);
>  
>  if (err) {
>  goto segv_and_exit;
> @@ -2576,7 +2585,7 @@ void sparc64_set_context(CPUSPARCState *env)
>  }
>  }
>  target_to_host_sigset_internal(&set, &target_set);
> -do_sigprocmask(SIG_SETMASK, &set, NULL);
> +set_sigmask(&set);
>  }
>  env->pc = pc;
>  env->npc = npc;
> @@ -2993,7 +3002,7 @@ long do_sigreturn(CPUMIPSState *regs)
>  }
>  
>  target_to_host_sigset_internal(&blocked, &target_set);
> -do_sigprocmask(SIG_SETMASK, &blocked, NULL);
> +set_sigmask(&blocked);
>  
>  restore_sigcontext(regs, &frame->sf_sc);
>  
> @@ -3097,7 +3106,7 @@ long do_rt_sigreturn(CPUMIPSState *env)
>  }
>  
>  target_to_host_sigset(&blocked, &frame->rs_uc.tuc_sigmask);
> -do_sigprocmask(SIG_SETMASK, &blocked, NULL);
> +set_sigmask(&blocked);
>  
>  restore_sigcontext(env, &frame->rs_uc.tuc_mcontext);
>  
> @@ -3371,7 +3380,7 @@ long do_sigreturn(CPUSH4State *regs)
>  goto badframe;
>  
>  target_to_host_sigset_internal(&blocked, &target_set);
> -do_sigprocmask(SIG_SETMASK, &blocked, NULL);
> +set_sigmask(&blocked);
>  
>  restore_sigcontext(regs, &frame->sc);
>  
> @@ -3397,7 +3406,7 @@ long do_rt_sigreturn(CPUSH4State *regs)
>  }
>  
>  target_to_host_sigset(&blocked, &frame->uc.tuc_sigmask);
> -do_sigprocmask(SIG_SETMASK, &blocked, NULL);
> +set_sigmask(&blocked);
>  
>  restore_sigconte

Re: [Qemu-devel] [PATCH v2 03/19] linux-user: Fix stray tab-indent



Le 27/05/2016 à 16:51, Peter Maydell a écrit :
> Fix a stray tab-indented linux in linux-user/signal.c.
> 
> Signed-off-by: Peter Maydell 

Reviewed-by: Laurent Vivier 

> ---
>  linux-user/signal.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/linux-user/signal.c b/linux-user/signal.c
> index 5f98c71..5069c3f 100644
> --- a/linux-user/signal.c
> +++ b/linux-user/signal.c
> @@ -5847,8 +5847,9 @@ static void handle_pending_signal(CPUArchState 
> *cpu_env, int sig)
>  else
>  setup_frame(sig, sa, &target_old_set, cpu_env);
>  #endif
> - if (sa->sa_flags & TARGET_SA_RESETHAND)
> +if (sa->sa_flags & TARGET_SA_RESETHAND) {
>  sa->_sa_handler = TARGET_SIG_DFL;
> +}
>  }
>  if (q != &k->info)
>  free_sigqueue(cpu_env, q);
>

Re: [Qemu-devel] [PATCH v2 02/19] linux-user: Move handle_pending_signal() to avoid need for declaration



Le 27/05/2016 à 16:51, Peter Maydell a écrit :
> Move the handle_pending_signal() function above process_pending_signals()
> to avoid the need for a forward declaration. (Whitespace only change.)
> 
> Signed-off-by: Peter Maydell 

Reviewed-by: Laurent Vivier 

> ---
>  linux-user/signal.c | 44 +---
>  1 file changed, 21 insertions(+), 23 deletions(-)
> 
> diff --git a/linux-user/signal.c b/linux-user/signal.c
> index a9ac491..5f98c71 100644
> --- a/linux-user/signal.c
> +++ b/linux-user/signal.c
> @@ -5765,29 +5765,6 @@ long do_rt_sigreturn(CPUArchState *env)
>  
>  #endif
>  
> -static void handle_pending_signal(CPUArchState *cpu_env, int sig);
> -
> -void process_pending_signals(CPUArchState *cpu_env)
> -{
> -CPUState *cpu = ENV_GET_CPU(cpu_env);
> -int sig;
> -TaskState *ts = cpu->opaque;
> -
> -if (!ts->signal_pending)
> -return;
> -
> -/* FIXME: This is not threadsafe.  */
> -for(sig = 1; sig <= TARGET_NSIG; sig++) {
> -if (ts->sigtab[sig - 1].pending) {
> -handle_pending_signal(cpu_env, sig);
> -return;
> -}
> -}
> -/* if no signal is pending, just return */
> -ts->signal_pending = 0;
> -return;
> -}
> -
>  static void handle_pending_signal(CPUArchState *cpu_env, int sig)
>  {
>  CPUState *cpu = ENV_GET_CPU(cpu_env);
> @@ -5876,3 +5853,24 @@ static void handle_pending_signal(CPUArchState 
> *cpu_env, int sig)
>  if (q != &k->info)
>  free_sigqueue(cpu_env, q);
>  }
> +
> +void process_pending_signals(CPUArchState *cpu_env)
> +{
> +CPUState *cpu = ENV_GET_CPU(cpu_env);
> +int sig;
> +TaskState *ts = cpu->opaque;
> +
> +if (!ts->signal_pending)
> +return;
> +
> +/* FIXME: This is not threadsafe.  */
> +for(sig = 1; sig <= TARGET_NSIG; sig++) {
> +if (ts->sigtab[sig - 1].pending) {
> +handle_pending_signal(cpu_env, sig);
> +return;
> +}
> +}
> +/* if no signal is pending, just return */
> +ts->signal_pending = 0;
> +return;
> +}
>

Re: [Qemu-devel] [PATCH v2 01/19] linux-user: Factor out handle_signal code from process_pending_signals()



Le 27/05/2016 à 16:51, Peter Maydell a écrit :
> Factor out the code to handle a single signal from the
> process_pending_signals() function. The use of goto for flow control
> is OK currently, but would get significantly uglier if extended to
> allow running the handle_signal code multiple times.
> 
> Signed-off-by: Peter Maydell 

Reviewed-by: Laurent Vivier 

> ---
>  linux-user/signal.c | 29 ++---
>  1 file changed, 18 insertions(+), 11 deletions(-)
> 
> diff --git a/linux-user/signal.c b/linux-user/signal.c
> index 8090b4d..a9ac491 100644
> --- a/linux-user/signal.c
> +++ b/linux-user/signal.c
> @@ -5765,33 +5765,40 @@ long do_rt_sigreturn(CPUArchState *env)
>  
>  #endif
>  
> +static void handle_pending_signal(CPUArchState *cpu_env, int sig);
> +
>  void process_pending_signals(CPUArchState *cpu_env)
>  {
>  CPUState *cpu = ENV_GET_CPU(cpu_env);
>  int sig;
> -abi_ulong handler;
> -sigset_t set, old_set;
> -target_sigset_t target_old_set;
> -struct emulated_sigtable *k;
> -struct target_sigaction *sa;
> -struct sigqueue *q;
>  TaskState *ts = cpu->opaque;
>  
>  if (!ts->signal_pending)
>  return;
>  
>  /* FIXME: This is not threadsafe.  */
> -k = ts->sigtab;
>  for(sig = 1; sig <= TARGET_NSIG; sig++) {
> -if (k->pending)
> -goto handle_signal;
> -k++;
> +if (ts->sigtab[sig - 1].pending) {
> +handle_pending_signal(cpu_env, sig);
> +return;
> +}
>  }
>  /* if no signal is pending, just return */
>  ts->signal_pending = 0;
>  return;
> +}
> +
> +static void handle_pending_signal(CPUArchState *cpu_env, int sig)
> +{
> +CPUState *cpu = ENV_GET_CPU(cpu_env);
> +abi_ulong handler;
> +sigset_t set, old_set;
> +target_sigset_t target_old_set;
> +struct target_sigaction *sa;
> +struct sigqueue *q;
> +TaskState *ts = cpu->opaque;
> +struct emulated_sigtable *k = &ts->sigtab[sig - 1];
>  
> - handle_signal:
>  trace_user_handle_signal(cpu_env, sig);
>  /* dequeue signal */
>  q = k->first;
>

Re: [Qemu-devel] [PATCH v4 4/9] target-avr: adding instructions encodings


On 06/06/2016 03:37 AM, Michael Rolnik wrote:

Signed-off-by: Michael Rolnik 
---
 target-avr/translate-inst.h | 730 
 1 file changed, 730 insertions(+)
 create mode 100644 target-avr/translate-inst.h


Reviewed-by: Richard Henderson 

Although of course it would be better to have the generator and source instead 
of the generated code.



r~

Re: [Qemu-devel] [PATCH v2 2/5] qcow2: Implement .bdrv_co_preadv()

On 06/06/2016 08:59 AM, Kevin Wolf wrote:
> Reading from qcow2 images is now byte granularity.
> 
> Most of the affected code in qcow2 actually gets simpler with this
> change. The only exception is encryption, which is fixed on 512 bytes
> blocks; in order to keep this working, bs->request_alignment is set for
> encrypted images.
> 
> Signed-off-by: Kevin Wolf 
> ---
>  block/qcow2-cluster.c |  27 ++---
>  block/qcow2.c | 108 
> +++---
>  block/qcow2.h |   2 +-
>  3 files changed, 72 insertions(+), 65 deletions(-)
> 

> - * on exit, *num is the number of contiguous sectors we can read.
> + * On exit, *bytes is the number of bytes staring at offset that have the 
> same

s/staring/starting/

(although "staring" does make for a rather interesting thought :)

> @@ -1389,26 +1402,24 @@ static coroutine_fn int 
> qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
>  
>  qemu_co_mutex_lock(&s->lock);
>  
> -while (remaining_sectors != 0) {
> +while (bytes != 0) {
>  
>  /* prepare next request */
> -cur_nr_sectors = remaining_sectors;
> +cur_bytes = MIN(bytes, INT_MAX);
>  if (s->cipher) {
> -cur_nr_sectors = MIN(cur_nr_sectors,
> -QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
> +cur_bytes = MIN(cur_bytes,
> +QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
>  }

Can this ever result in cur_bytes that is not aligned, even though
encryption requires aligned reads?  Do you need to round anything to a
sector boundary in this case?

>  qemu_co_mutex_unlock(&s->lock);
> -ret = bdrv_co_readv(bs->file->bs,
> -(cluster_offset >> 9) + index_in_cluster,
> -cur_nr_sectors, &hd_qiov);
> +ret = bdrv_co_preadv(bs->file->bs,
> + cluster_offset + offset_in_cluster,
> + cur_bytes, &hd_qiov, 0);
>  qemu_co_mutex_lock(&s->lock);
>  if (ret < 0) {
>  goto fail;
>  }
>  if (bs->encrypted) {
>  assert(s->cipher);
> +assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
> +assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);

Hmm, you do check that later on.

So, other than the typo (easy to fix on commit),
Reviewed-by: Eric Blake 

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v2] linux-user: provide frame information in x86-64 safe_syscall


On 06/06/2016 11:56 AM, Peter Maydell wrote:

Use cfi directives in the x86-64 safe_syscall to allow gdb to get
backtraces right from within it. (In particular this will be
quite a common situation if the user interrupts QEMU while it's
in a blocked safe-syscall: at the point of the syscall insn RBP
is in use for something else, and so gdb can't find the frame then
without assistance.)

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
---
v1->v2 changes: minor tweaks as requested by rth


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [RFC v2 PATCH 01/13] Introduce TCGOpcode for memory barrier

On 6 June 2016 at 21:30, Sergey Fedorov  wrote:
> On 06/06/16 22:28, Pranith Kumar wrote:
>> On Mon, Jun 6, 2016 at 3:23 PM, Richard Henderson  wrote:
>>> On 06/06/2016 10:11 AM, Pranith Kumar wrote:
 If I read it correctly TCG_BAR_SYNC is equivalent to OR of all the
 other four barriers. I am not sure if we can just construct SYNC like
 this or if we need to define it explicitly though.
>>>
>>> AFAICS, sparc membar #sync is stronger.
>> I tried looking it up but it's not clear. How is it stronger? And do
>> we need those strong guarantees in our front-end/back-end?
>
> That is not clear for me either :( AFAIU, PPC's lwsync does allow stores
> to be reordered after loads but hwsync - not.

Yes, from the PoV of the other CPU. That is, for write-then-read by
CPU 0, CPU 0 will always read what it wrote, but other CPUs don't
necessarily see the write before the read is satisfied.
https://www.cl.cam.ac.uk/~pes20/ppc-supplemental/test7.pdf
describes the difference in sections 3.2 and 3.3, and has an
example in section 6 of a situation which requires a full
(hw)sync and an lwsync is not sufficient.

> I suspect Sparc's membar
> #Sync is used to ensure that some system operations are complete before
> proceeding with execution. I'm not sure we need to introduce this into
> TCG. It needs to be clear what is it and how to use it.

My reading of the manual is that the SPARC "membar #Sync" is like ARM
ISB -- it enforces that any instruction (whether a memory access or not)
before it must finish before anything after it can start. It only
affects the CPU that issues it (assuming you didn't also specify
any of the bits requesting memory barriers!) Since TCG doesn't attempt
to reorder instructions, we likely don't need to do anything except
maybe end the current TB. Also if we're still trying to do TLB
operations on other CPUs asynchronously we need to wait for them to
finish; I forget what the conclusion was on that idea.
PPC equivalent insn is isync I think.

thanks
-- PMM

[Qemu-devel] [Bug 1581796] Re: console-gl.c:96:surface_gl_create_texture:code should not be reached

2016-06-06 Thread luigiburdo

Hi T.
yes the official git 2.6 from qemu.org 
No video come with all type of machine i had been tested:  ppc, ppc64, i386, 
x86_64 , but this only on Ubuntu 16.10
on Fedora 24 all is working right .
I think there is something broken on ubuntu 16.10.  

ah sorry i forgot ... on Fedora 24  qemu 2.6.0 with the patch is working

Luigi

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1581796

Title:
  console-gl.c:96:surface_gl_create_texture:code should not be reached

Status in QEMU:
  New

Bug description:
  Facing this if i enable gtk,gl option same is with sd2 gl options.

  PowerPc P5020 4gb ram Ubuntu Mate 16:04

  tested on
  RadeonSi 7750HD 2gb ddr3
  r600 6570 2gb ddr3

  Masa 11.2.2 and 11.3 dev
  same issue come on swrast mode

  Thanks
  Luigi

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1581796/+subscriptions

Re: [Qemu-devel] [PATCH v4 3/9] target-avr: adding a sample AVR board


On 06/06/2016 03:37 AM, Michael Rolnik wrote:

+void write_Rx(CPUAVRState *env, int inst, uint8_t data)
+{
+env->r[inst] = data;
+}
+uint8_t read_Rx(CPUAVRState *env, int inst)


Spacing.  But more importantly...


+static
+void sample_io_write(void *opaque, hwaddr offset, uint64_t value, unsigned 
size)
+{
+SAMPLEIOState *s = SAMPLEIO(opaque);
+AVRCPU *cpu = s->cpu;
+CPUAVRState *env = &cpu->env;
+
+assert(size == 1);
+
+if (AVR_IO_CPU_REGS_BASE <= offset
+&& offset < (AVR_IO_CPU_REGS_BASE + AVR_IO_CPU_REGS_SIZE)) {
+return  write_Rx(env, offset - AVR_IO_CPU_REGS_BASE, value);


...


+static
+int sample_io_init(DeviceState *dev)
+{
+SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+SAMPLEIOState *s = SAMPLEIO(dev);
+
+assert(AVR_IO_SIZE <= TARGET_PAGE_SIZE);
+
+s->cpu = AVR_CPU(qemu_get_cpu(0));
+
+memory_region_init_io(
+&s->iomem,
+OBJECT(s),
+&sample_io_ops,
+s,
+TYPE_SAMPLEIO,
+AVR_IO_SIZE);
+sysbus_init_mmio(sbd, &s->iomem);


This is a memory-mapped device, and you can't modify cpu registers with a 
memory write.


You can read cpu registers, since all TCG globals will have been synced back to 
ENV before the memory operation.  But they'll still be live in TB, and so the 
next instruction may not see the value that you just wrote.


If this write is really something that you need to support, then you'll have to 
figure out some other way of implementing it.


My first guess at a workable solution is to keep the page for this memory 
device read-only.  Detect writes via tlb_fill and throw a special exception. 
In do_interrupt, either handle the write (probably tricky); or set a flag in 
ENV, which in turn sets a flag in TB, which in turn tells translate to use a 
function (helper_writeb?) instead of using tcg_gen_qemu_st_tl.  That helper 
would be marked such that modifications to TCG registers is expected.  Then 
clear the flag in ENV so that subsequent stores use the normal path.



r~

Re: [Qemu-devel] [RFC v2 PATCH 01/13] Introduce TCGOpcode for memory barrier

2016-06-06 Thread Sergey Fedorov

On 06/06/16 22:28, Pranith Kumar wrote:
> On Mon, Jun 6, 2016 at 3:23 PM, Richard Henderson  wrote:
>> On 06/06/2016 10:11 AM, Pranith Kumar wrote:
>>> If I read it correctly TCG_BAR_SYNC is equivalent to OR of all the
>>> other four barriers. I am not sure if we can just construct SYNC like
>>> this or if we need to define it explicitly though.
>>
>> AFAICS, sparc membar #sync is stronger.
> I tried looking it up but it's not clear. How is it stronger? And do
> we need those strong guarantees in our front-end/back-end?

That is not clear for me either :( AFAIU, PPC's lwsync does allow stores
to be reordered after loads but hwsync - not. I suspect Sparc's membar
#Sync is used to ensure that some system operations are complete before
proceeding with execution. I'm not sure we need to introduce this into
TCG. It needs to be clear what is it and how to use it.

Kind regards,
Sergey

[Qemu-devel] [PULL v2 0/9] Misc changes for 2016-06-06

2016-06-06 Thread Paolo Bonzini

The following changes since commit 76462405809d29bab65a3699686998ba124ab942:

  Merge remote-tracking branch 
'remotes/pmaydell/tags/pull-target-arm-20160606-1' into staging (2016-06-06 
17:02:42 +0100)

are available in the git repository at:

  git://github.com/bonzini/qemu.git tags/for-upstream

for you to fetch changes up to 379b954bf85e3d05e6323def1736d3bdea9b2a5c:

  vnc: list the 'to' parameter of '-vnc' in the qemu man page (2016-06-06 
19:00:25 +0200)


* max-ram-below-4g improvement (Gerd)
* escc fix (xiaoqiang)
* ESP fix (Prasad)
* scsi-disk tweaks/fix (me)
* PKGVERSION improvement (Fam)
* -vnc man improvement (Robert)


Fam Zheng (3):
  tests: Rename tests/Makefile to tests/Makefile.include
  Makefile: Add a "FORCE" target
  Makefile: Derive "PKGVERSION" from "git describe" by default

Gerd Hoffmann (1):
  pc: allow raising low memory via max-ram-below-4g option

Paolo Bonzini (2):
  scsi: mark TYPE_SCSI_DISK_BASE as abstract
  scsi-disk: add missing break

Prasad J Pandit (1):
  scsi: esp: check TI buffer index before read/write

Robert Ho (1):
  vnc: list the 'to' parameter of '-vnc' in the qemu man page

xiaoqiang zhao (1):
  hw/char: QOM'ify escc.c (fix)

 Makefile | 26 +--
 hw/char/escc.c   | 12 ---
 hw/i386/pc.c |  2 +-
 hw/i386/pc_piix.c| 61 +++-
 hw/scsi/esp.c| 20 ++--
 hw/scsi/scsi-disk.c  |  2 ++
 linux-user/main.c|  1 +
 qemu-img.c   |  1 +
 qemu-options.hx  |  7 +
 qmp.c|  1 +
 scripts/create_config|  4 ---
 tests/{Makefile => Makefile.include} |  0
 vl.c |  1 +
 13 files changed, 92 insertions(+), 46 deletions(-)
 rename tests/{Makefile => Makefile.include} (100%)
-- 
2.5.5

Re: [Qemu-devel] [PATCH v4 2/9] target-avr: adding AVR CPU features/flavors


On 06/06/2016 03:37 AM, Michael Rolnik wrote:

@@ -55,12 +55,14 @@ static void avr_cpu_reset(CPUState *s)
 AVRCPU *cpu = AVR_CPU(s);
 AVRCPUClass *mcc = AVR_CPU_GET_CLASS(cpu);
 CPUAVRState *env = &cpu->env;
+uint32_t features = env->features;

 mcc->parent_reset(s);

 memset(env, 0, sizeof(CPUAVRState));
 env->pc = 0;
 env->sregI = 1;
+env->features = features;


Didn't fix the memset issue I pointed out.


+}
+static void avr_avr6_initfn(Object *obj)


Spacing.


+}
+static void avr_xmega2_initfn(Object *obj)


Spacing.


+}
+static void avr_xmega4_initfn(Object *obj)


Spacing.


+}
+static void avr_xmega5_initfn(Object *obj)


Spacing.


+}
+static void avr_xmega7_initfn(Object *obj)


Spacing.


+static inline void  avr_del_feature(
+CPUAVRState*env,
+int feature)
+{
+env->features   &= ~(1Ul << feature);
+}


Dead code.


r~

Re: [Qemu-devel] [PATCH v4 0/9] 8bit AVR cores


On 06/06/2016 12:49 PM, Michael Rolnik wrote:

please advise.
I have the following warning. but all print_instn_XXX functions are declared
this way.
so, I am kind of confused. I can fix it, but the file won't look the same.

WARNING: space prohibited between function name and open parenthesis '('
#132: FILE: include/disas/bfd.h:424:
+int print_insn_avr  (bfd_vma, disassemble_info*);




You can simply drop this declaration, since you don't have a printer.

Recall that I *do* want you to fall back to the default print_insn_od_target. 
One can then use ./script/disas-objdump.pl to get proper disassembly.



r~

Re: [Qemu-devel] [PATCH v4 1/9] target-avr: AVR cores support is added. 1. basic CPU structure 2. registers 3. no instructions


On 06/06/2016 03:37 AM, Michael Rolnik wrote:

+int print_insn_avr(bfd_vma addr, disassemble_info *info)
+{
+int length  = 0;;
+/*  TODO*/
+return length;
+}


Again, delete this file.  This prohibits the default implementation from 
working.


+static void avr_cpu_reset(CPUState *s)
+{
+AVRCPU *cpu = AVR_CPU(s);
+AVRCPUClass *mcc = AVR_CPU_GET_CLASS(cpu);
+CPUAVRState *env = &cpu->env;
+
+mcc->parent_reset(s);
+
+memset(env, 0, sizeof(CPUAVRState));


You didn't fix the memset issue I pointed out.


+}
+AVRCPU *cpu_avr_init(const char *cpu_model)


Spacing.


+/*
+NOTE:   all registers are assumed to hold 8 bit values.
+so all operations done on registers should preseve this property
+*/
+
+/*
+NOTE:   the flags C,H,V,N,V have either 0 or 1 values
+NOTE:   the flag Z has inverse logic, when the value of Zf is 0 the flag 
is assumed to be set, non zero - not set
+*/


Don't put these at the top, put them next to the declarations of the actual 
variables, where they're useful.



+#define TARGET_IS_LIENDIAN 1


Didn't fix the typo, or really just delete this.

In case you're wondering, this *doesn't* mean little-endian.

TARGET_IS_BIENDIAN means that the target changes between big- and little-endian 
at runtime.


One selects the endian-ness of the target in configure.  The default is little; 
set target_bigendian to select big.




+uint32_tsregC;  /*   0x0001 1 bits */
+uint32_tsregZ;  /*   0x00ff 8 bits */


This isn't quite true; sregZ may contain 0x after adwi.


+static inline int cpu_mmu_index(CPUAVRState *env, bool ifetch)
+{
+return  0;
+}


This should be

  return ifetch ? CODE_INDEX : DATA_INDEX;


+sreg =   (env->sregC & 0x01) << 0
+|   (env->sregZ & 0x01) << 1
+|   (env->sregN & 0x01) << 2
+|   (env->sregV & 0x01) << 3
+|   (env->sregS & 0x01) << 4
+|   (env->sregH & 0x01) << 5
+|   (env->sregT & 0x01) << 6
+|   (env->sregI & 0x01) << 7;


Didn't add the function I requested.
Plus, there's an error in handling of sregZ.


+env->sregC = (tmp >> 0) & 0x01;
+env->sregZ = (tmp >> 1) & 0x01;
+env->sregN = (tmp >> 2) & 0x01;
+env->sregV = (tmp >> 3) & 0x01;
+env->sregS = (tmp >> 4) & 0x01;
+env->sregH = (tmp >> 5) & 0x01;
+env->sregT = (tmp >> 6) & 0x01;
+env->sregI = (tmp >> 7) & 0x01;


Likewise.


+VMSTATE_UINT32_ARRAY(r, CPUAVRState, 32),
+
+VMSTATE_UINT32(sregC,   CPUAVRState),
+VMSTATE_UINT32(sregZ,   CPUAVRState),
+VMSTATE_UINT32(sregN,   CPUAVRState),
+VMSTATE_UINT32(sregV,   CPUAVRState),
+VMSTATE_UINT32(sregS,   CPUAVRState),
+VMSTATE_UINT32(sregH,   CPUAVRState),
+VMSTATE_UINT32(sregT,   CPUAVRState),
+VMSTATE_UINT32(sregI,   CPUAVRState),
+
+VMSTATE_UINT32(rampD,   CPUAVRState),
+VMSTATE_UINT32(rampX,   CPUAVRState),
+VMSTATE_UINT32(rampY,   CPUAVRState),
+VMSTATE_UINT32(rampZ,   CPUAVRState),
+
+VMSTATE_UINT32(eind,CPUAVRState),
+VMSTATE_UINT32(sp,  CPUAVRState),
+VMSTATE_UINT32(pc,  CPUAVRState),


Consider making the vm save state reflect the actual hardware format.  That way 
you can change the qemu internal format while retaining migration compatibility.



+cpu_fprintf(f, "rampX: %02x\n", env->rampX);
+cpu_fprintf(f, "rampY: %02x\n", env->rampY);
+cpu_fprintf(f, "rampZ: %02x\n", env->rampZ);
+cpu_fprintf(f, "rampD: %02x\n", env->rampD);
+cpu_fprintf(f, "EIND:  %02x\n", env->eind);


This format probably isn't what you intended after shifting these values.


r~

Re: [Qemu-devel] [PATCH v4 0/9] 8bit AVR cores


On 06/06/2016 12:53 PM, Michael Rolnik wrote:

WARNING: architecture specific defines should be avoided
#574: FILE: target-avr/cpu.h:32:
+#if !defined(__CPU_AVR_H__)

it was not my invention, I took it from either target-alpha or target-ppc.


Let's not introduce any more mistakes.  Follow target-arm/cpu.h.


r~

[Qemu-devel] [PATCH v2 5/6] target-i386: Use "-" instead of "_" on all feature names

This makes the feature name tables in feature_word_info all match
the actual QOM property names we use.

This will make the command-line interface more consistent,
allowing the QOM property names to be used as "-cpu" arguments
directly.

Add extra feat2prop() calls to x86_cpu_parse_featurestr() to keep
compatibility with the old that had underscores.

Cc: Jiri Denemark 
Cc: libvir-l...@redhat.com
Signed-off-by: Eduardo Habkost 
---
 target-i386/cpu.c | 32 
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index f455d3d..b8519ba 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -189,7 +189,7 @@ static const char *feature_name[] = {
 };
 static const char *ext_feature_name[] = {
 "pni|sse3" /* Intel,AMD sse3 */, "pclmulqdq|pclmuldq", "dtes64", "monitor",
-"ds_cpl", "vmx", "smx", "est",
+"ds-cpl", "vmx", "smx", "est",
 "tm2", "ssse3", "cid", NULL,
 "fma", "cx16", "xtpr", "pdcm",
 NULL, "pcid", "dca", "sse4.1|sse4_1",
@@ -209,17 +209,17 @@ static const char *ext2_feature_name[] = {
 NULL /* mtrr */, NULL /* pge */, NULL /* mca */, NULL /* cmov */,
 NULL /* pat */, NULL /* pse36 */, NULL, NULL /* Linux mp */,
 "nx|xd", NULL, "mmxext", NULL /* mmx */,
-NULL /* fxsr */, "fxsr_opt|ffxsr", "pdpe1gb" /* AMD Page1GB */, "rdtscp",
+NULL /* fxsr */, "fxsr-opt|ffxsr", "pdpe1gb" /* AMD Page1GB */, "rdtscp",
 NULL, "lm|i64", "3dnowext", "3dnow",
 };
 static const char *ext3_feature_name[] = {
-"lahf_lm" /* AMD LahfSahf */, "cmp_legacy", "svm", "extapic" /* AMD 
ExtApicSpace */,
+"lahf-lm" /* AMD LahfSahf */, "cmp-legacy", "svm", "extapic" /* AMD 
ExtApicSpace */,
 "cr8legacy" /* AMD AltMovCr8 */, "abm", "sse4a", "misalignsse",
 "3dnowprefetch", "osvw", "ibs", "xop",
 "skinit", "wdt", NULL, "lwp",
-"fma4", "tce", NULL, "nodeid_msr",
-NULL, "tbm", "topoext", "perfctr_core",
-"perfctr_nb", NULL, NULL, NULL,
+"fma4", "tce", NULL, "nodeid-msr",
+NULL, "tbm", "topoext", "perfctr-core",
+"perfctr-nb", NULL, NULL, NULL,
 NULL, NULL, NULL, NULL,
 };
 
@@ -235,8 +235,8 @@ static const char *ext4_feature_name[] = {
 };
 
 static const char *kvm_feature_name[] = {
-"kvmclock", "kvm_nopiodelay", "kvm_mmu", "kvmclock",
-"kvm_asyncpf", "kvm_steal_time", "kvm_pv_eoi", "kvm_pv_unhalt",
+"kvmclock", "kvm-nopiodelay", "kvm-mmu", "kvmclock",
+"kvm-asyncpf", "kvm-steal-time", "kvm-pv-eoi", "kvm-pv-unhalt",
 NULL, NULL, NULL, NULL,
 NULL, NULL, NULL, NULL,
 NULL, NULL, NULL, NULL,
@@ -246,9 +246,9 @@ static const char *kvm_feature_name[] = {
 };
 
 static const char *svm_feature_name[] = {
-"npt", "lbrv", "svm_lock", "nrip_save",
-"tsc_scale", "vmcb_clean",  "flushbyasid", "decodeassists",
-NULL, NULL, "pause_filter", NULL,
+"npt", "lbrv", "svm-lock", "nrip-save",
+"tsc-scale", "vmcb-clean",  "flushbyasid", "decodeassists",
+NULL, NULL, "pause-filter", NULL,
 "pfthreshold", NULL, NULL, NULL,
 NULL, NULL, NULL, NULL,
 NULL, NULL, NULL, NULL,
@@ -257,7 +257,7 @@ static const char *svm_feature_name[] = {
 };
 
 static const char *cpuid_7_0_ebx_feature_name[] = {
-"fsgsbase", "tsc_adjust", NULL, "bmi1", "hle", "avx2", NULL, "smep",
+"fsgsbase", "tsc-adjust", NULL, "bmi1", "hle", "avx2", NULL, "smep",
 "bmi2", "erms", "invpcid", "rtm", NULL, NULL, "mpx", NULL,
 "avx512f", NULL, "rdseed", "adx", "smap", NULL, "pcommit", "clflushopt",
 "clwb", NULL, "avx512pf", "avx512er", "avx512cd", NULL, NULL, NULL,
@@ -1941,8 +1941,8 @@ static PropertyInfo qdev_prop_spinlocks = {
 .set   = x86_set_hv_spinlocks,
 };
 
-/* Convert all '_' in a feature string option name to '-', to make feature
- * name conform to QOM property naming rule, which uses '-' instead of '_'.
+/* Convert all '_' in a feature string option name to '-', to keep 
compatibility
+ * with old feature names that used "_" instead of "-".
  */
 static inline void feat2prop(char *s)
 {
@@ -1971,8 +1971,10 @@ static void x86_cpu_parse_featurestr(CPUState *cs, char 
*features,
 while (featurestr) {
 char *val;
 if (featurestr[0] == '+') {
+feat2prop(featurestr);
 add_flagname_to_bitmaps(featurestr + 1, plus_features, &local_err);
 } else if (featurestr[0] == '-') {
+feat2prop(featurestr);
 add_flagname_to_bitmaps(featurestr + 1, minus_features, 
&local_err);
 } else if ((val = strchr(featurestr, '='))) {
 *val = 0; val++;
@@ -3180,11 +3182,9 @@ static void x86_cpu_register_feature_bit_props(X86CPU 
*cpu,
 
 names = g_strsplit(fi->feat_names[bitnr], "|", 0);
 
-feat2prop(names[0]);
 x86_cpu_register_bit_prop(cpu, names[0], &cpu->env.features[w], bitnr);
 
 for (i = 1; names[i]; i++) {
-feat2prop(names[i]);
 object_property_add_alias(obj, names[i], obj, names[0],
   &error_

[Qemu-devel] [PATCH v2 3/6] target-i386: Define CPUID filtering functions before x86_cpu_list()

Just move code to another place so the it can be reused by the
query-cpu-definitions code.

Signed-off-by: Eduardo Habkost 
---
 target-i386/cpu.c | 68 +++
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index bd815f3..f455d3d 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -2018,6 +2018,40 @@ static void x86_cpu_parse_featurestr(CPUState *cs, char 
*features,
 }
 }
 
+/*
+ * Filters CPU feature words based on host availability of each feature.
+ *
+ * Returns: 0 if all flags are supported by the host, non-zero otherwise.
+ */
+static int x86_cpu_filter_features(X86CPU *cpu)
+{
+CPUX86State *env = &cpu->env;
+FeatureWord w;
+int rv = 0;
+
+for (w = 0; w < FEATURE_WORDS; w++) {
+uint32_t host_feat =
+x86_cpu_get_supported_feature_word(w, cpu->migratable);
+uint32_t requested_features = env->features[w];
+env->features[w] &= host_feat;
+cpu->filtered_features[w] = requested_features & ~env->features[w];
+if (cpu->filtered_features[w]) {
+rv = 1;
+}
+}
+
+return rv;
+}
+
+static void x86_cpu_report_filtered_features(X86CPU *cpu)
+{
+FeatureWord w;
+
+for (w = 0; w < FEATURE_WORDS; w++) {
+report_unavailable_features(w, cpu->filtered_features[w]);
+}
+}
+
 /* Print all cpuid feature names in featureset
  */
 static void listflags(FILE *f, fprintf_function print, const char **featureset)
@@ -2147,40 +2181,6 @@ static uint32_t 
x86_cpu_get_supported_feature_word(FeatureWord w,
 return r;
 }
 
-/*
- * Filters CPU feature words based on host availability of each feature.
- *
- * Returns: 0 if all flags are supported by the host, non-zero otherwise.
- */
-static int x86_cpu_filter_features(X86CPU *cpu)
-{
-CPUX86State *env = &cpu->env;
-FeatureWord w;
-int rv = 0;
-
-for (w = 0; w < FEATURE_WORDS; w++) {
-uint32_t host_feat =
-x86_cpu_get_supported_feature_word(w, cpu->migratable);
-uint32_t requested_features = env->features[w];
-env->features[w] &= host_feat;
-cpu->filtered_features[w] = requested_features & ~env->features[w];
-if (cpu->filtered_features[w]) {
-rv = 1;
-}
-}
-
-return rv;
-}
-
-static void x86_cpu_report_filtered_features(X86CPU *cpu)
-{
-FeatureWord w;
-
-for (w = 0; w < FEATURE_WORDS; w++) {
-report_unavailable_features(w, cpu->filtered_features[w]);
-}
-}
-
 static void x86_cpu_apply_props(X86CPU *cpu, PropValue *props)
 {
 PropValue *pv;
-- 
2.5.5

[Qemu-devel] [PATCH v2 4/6] qmp: Add runnability information to query-cpu-definitions

Extend query-cpu-definitions schema to allow it to return two new
optional fields: "runnable" and "unavailable-features".
"runnable" will tell if the CPU model can be run in the current
host. "unavailable-features" will contain a list of CPU
properties that are preventing the CPU model from running in the
current host.

Cc: David Hildenbrand 
Cc: Michael Mueller 
Cc: Christian Borntraeger 
Cc: Cornelia Huck 
Cc: Jiri Denemark 
Cc: libvir-l...@redhat.com
Signed-off-by: Eduardo Habkost 
---
Changes v1 -> v2:
* Remove @runnable field, non-empty @unavailable-features is
  enough to report CPU model as not runnable.
* Documentation updates:
  * Changed to "(since 2.7)";
  * Add more details about the exact meaning of
unavailable-features, and what it would mean to see
read-only QOM properties in the list, and that
implementations can return "type" if there's
no extra information available;
---
 qapi-schema.json | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/qapi-schema.json b/qapi-schema.json
index 8483bdf..43478e9 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -3005,11 +3005,32 @@
 # Virtual CPU definition.
 #
 # @name: the name of the CPU definition
+# @unavailable-features: #optional List of properties that prevent
+#the CPU model from running in the current
+#host. (since 2.7)
+#
+# @unavailable-features is a list of QOM property names that
+# represent CPU model attributes that prevent the CPU from running.
+# If the QOM property is read-only, that means there's no known
+# way to make the CPU model run in the current host. If
+# absolutely no extra information will be returned to explain why
+# the CPU model is not runnable, implementations may simply
+# return "type" as the property name.
+# If the property is read-write, it means that it MAY be possible
+# to run the CPU model in the current host if that property is
+# changed. Management software can use it as hints to suggest or
+# choose an alternative for the user, or just to generate meaningful
+# error messages explaining why the CPU model can't be used.
+# If @unavailable-features is an empty list, the CPU model is
+# runnable using the current host and machine-type.
+# If @unavailable-features is not present, runnability
+# information for the CPU is not available.
 #
 # Since: 1.2.0
 ##
 { 'struct': 'CpuDefinitionInfo',
-  'data': { 'name': 'str' } }
+  'data': { 'name': 'str',
+'*unavailable-features': [ 'str' ] } }
 
 ##
 # @query-cpu-definitions:
-- 
2.5.5

[Qemu-devel] [PATCH v2 6/6] target-i386: Return runnability information on query-cpu-definitions

Fill the "unavailable-features" field on the x86 implementation
of query-cpu-definitions.

Cc: Jiri Denemark 
Cc: libvir-l...@redhat.com
Signed-off-by: Eduardo Habkost 
---
Changes v1 -> v2:
* Updated to the new schema: no @runnable field, and
  always report @unavailable-features as present
---
 target-i386/cpu.c | 41 +
 1 file changed, 41 insertions(+)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index b8519ba..65d1ffc 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -2054,6 +2054,45 @@ static void x86_cpu_report_filtered_features(X86CPU *cpu)
 }
 }
 
+/* Check for missing features that may prevent the CPU class from
+ * running using the current machine and accelerator.
+ */
+static void x86_cpu_class_check_missing_features(X86CPUClass *xcc,
+ strList **missing_feats)
+{
+X86CPU *xc;
+FeatureWord w;
+
+if (xcc->kvm_required && !kvm_enabled()) {
+strList *new = g_new0(strList, 1);
+new->value = g_strdup("kvm");;
+*missing_feats = new;
+return;
+}
+
+xc = X86_CPU(object_new(object_class_get_name(OBJECT_CLASS(xcc;
+if (x86_cpu_filter_features(xc)) {
+for (w = 0; w < FEATURE_WORDS; w++) {
+FeatureWordInfo *fi = &feature_word_info[w];
+uint32_t filtered = xc->filtered_features[w];
+int i;
+for (i = 0; i < 32; i++) {
+if (filtered & (1UL << i)) {
+char **parts = g_strsplit(fi->feat_names[i], "|", 2);
+strList *new = g_new0(strList, 1);
+new->value = g_strdup(parts[0]);
+feat2prop(new->value);
+new->next = *missing_feats;
+*missing_feats = new;
+g_strfreev(parts);
+}
+}
+}
+}
+
+object_unref(OBJECT(xc));
+}
+
 /* Print all cpuid feature names in featureset
  */
 static void listflags(FILE *f, fprintf_function print, const char **featureset)
@@ -2146,6 +2185,8 @@ static void x86_cpu_definition_entry(gpointer data, 
gpointer user_data)
 
 info = g_malloc0(sizeof(*info));
 info->name = x86_cpu_class_get_model_name(cc);
+x86_cpu_class_check_missing_features(cc, &info->unavailable_features);
+info->has_unavailable_features = true;
 
 entry = g_malloc0(sizeof(*entry));
 entry->value = info;
-- 
2.5.5

[Qemu-devel] [PATCH v2 2/6] target-i386: Move warning code outside x86_cpu_filter_features()

x86_cpu_filter_features() will be reused by code that shouldn't
print any warning. Move the warning code to a new
x86_cpu_report_filtered_features() function, and call it from
x86_cpu_realizefn().

Signed-off-by: Eduardo Habkost 
---
 target-i386/cpu.c | 28 +++-
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index aaa239a..bd815f3 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -2165,9 +2165,6 @@ static int x86_cpu_filter_features(X86CPU *cpu)
 env->features[w] &= host_feat;
 cpu->filtered_features[w] = requested_features & ~env->features[w];
 if (cpu->filtered_features[w]) {
-if (cpu->check_cpuid || cpu->enforce_cpuid) {
-report_unavailable_features(w, cpu->filtered_features[w]);
-}
 rv = 1;
 }
 }
@@ -2175,6 +2172,15 @@ static int x86_cpu_filter_features(X86CPU *cpu)
 return rv;
 }
 
+static void x86_cpu_report_filtered_features(X86CPU *cpu)
+{
+FeatureWord w;
+
+for (w = 0; w < FEATURE_WORDS; w++) {
+report_unavailable_features(w, cpu->filtered_features[w]);
+}
+}
+
 static void x86_cpu_apply_props(X86CPU *cpu, PropValue *props)
 {
 PropValue *pv;
@@ -2979,12 +2985,16 @@ static void x86_cpu_realizefn(DeviceState *dev, Error 
**errp)
 env->cpuid_level = 7;
 }
 
-if (x86_cpu_filter_features(cpu) && cpu->enforce_cpuid) {
-error_setg(&local_err,
-   kvm_enabled() ?
-   "Host doesn't support requested features" :
-   "TCG doesn't support requested features");
-goto out;
+if (x86_cpu_filter_features(cpu) &&
+(cpu->check_cpuid || cpu->enforce_cpuid)) {
+x86_cpu_report_filtered_features(cpu);
+if (cpu->enforce_cpuid) {
+error_setg(&local_err,
+   kvm_enabled() ?
+   "Host doesn't support requested features" :
+   "TCG doesn't support requested features");
+goto out;
+}
 }
 
 /* On AMD CPUs, some CPUID[8000_0001].EDX bits must match the bits on
-- 
2.5.5

[Qemu-devel] [PATCH v2 1/6] target-i386: List CPU models using subclass list

Instead of using the builtin_x86_defs array, use the QOM subclass list
to list CPU models on "-cpu ?" and "query-cpu-definitions".

Signed-off-by: Andreas Färber 
[ehabkost: copied code from a patch by Andreas:
 "target-i386: QOM'ify CPU", from March 2012]
Signed-off-by: Eduardo Habkost 
---
 target-i386/cpu-qom.h |   4 ++
 target-i386/cpu.c | 111 +-
 2 files changed, 86 insertions(+), 29 deletions(-)

diff --git a/target-i386/cpu-qom.h b/target-i386/cpu-qom.h
index 5dde658..e724004 100644
--- a/target-i386/cpu-qom.h
+++ b/target-i386/cpu-qom.h
@@ -63,6 +63,10 @@ typedef struct X86CPUClass {
 
 bool kvm_required;
 
+/* Optional description of CPU model.
+ * If unavailable, cpu_def->model_id is used */
+const char *model_description;
+
 DeviceRealize parent_realize;
 void (*parent_reset)(CPUState *cpu);
 } X86CPUClass;
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index a62d731..aaa239a 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -678,6 +678,14 @@ static ObjectClass *x86_cpu_class_by_name(const char 
*cpu_model)
 return oc;
 }
 
+static char *x86_cpu_class_get_model_name(X86CPUClass *cc)
+{
+const char *class_name = object_class_get_name(OBJECT_CLASS(cc));
+assert(g_str_has_suffix(class_name, X86_CPU_TYPE_SUFFIX));
+return g_strndup(class_name,
+ strlen(class_name) - strlen(X86_CPU_TYPE_SUFFIX));
+}
+
 struct X86CPUDefinition {
 const char *name;
 uint32_t level;
@@ -1531,6 +1539,9 @@ static void host_x86_cpu_class_init(ObjectClass *oc, void 
*data)
 cpu_x86_fill_model_id(host_cpudef.model_id);
 
 xcc->cpu_def = &host_cpudef;
+xcc->model_description =
+"KVM processor with all supported host features "
+"(only available in KVM mode)";
 
 /* level, xlevel, xlevel2, and the feature words are initialized on
  * instance_init, because they require KVM to be initialized.
@@ -2022,23 +2033,62 @@ static void listflags(FILE *f, fprintf_function print, 
const char **featureset)
 }
 }
 
-/* generate CPU information. */
+/* Sort alphabetically by type name, listing kvm_required models last. */
+static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b)
+{
+ObjectClass *class_a = (ObjectClass *)a;
+ObjectClass *class_b = (ObjectClass *)b;
+X86CPUClass *cc_a = X86_CPU_CLASS(class_a);
+X86CPUClass *cc_b = X86_CPU_CLASS(class_b);
+const char *name_a, *name_b;
+
+if (cc_a->kvm_required != cc_b->kvm_required) {
+/* kvm_required items go last */
+return cc_a->kvm_required ? 1 : -1;
+} else {
+name_a = object_class_get_name(class_a);
+name_b = object_class_get_name(class_b);
+return strcmp(name_a, name_b);
+}
+}
+
+static GSList *get_sorted_cpu_model_list(void)
+{
+GSList *list = object_class_get_list(TYPE_X86_CPU, false);
+list = g_slist_sort(list, x86_cpu_list_compare);
+return list;
+}
+
+static void x86_cpu_list_entry(gpointer data, gpointer user_data)
+{
+ObjectClass *oc = data;
+X86CPUClass *cc = X86_CPU_CLASS(oc);
+CPUListState *s = user_data;
+char *name = x86_cpu_class_get_model_name(cc);
+const char *desc = cc->model_description;
+if (!desc) {
+desc = cc->cpu_def->model_id;
+}
+
+(*s->cpu_fprintf)(s->file, "x86 %16s  %-48s\n",
+  name, desc);
+g_free(name);
+}
+
+/* list available CPU models and flags */
 void x86_cpu_list(FILE *f, fprintf_function cpu_fprintf)
 {
-X86CPUDefinition *def;
-char buf[256];
 int i;
+CPUListState s = {
+.file = f,
+.cpu_fprintf = cpu_fprintf,
+};
+GSList *list;
 
-for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); i++) {
-def = &builtin_x86_defs[i];
-snprintf(buf, sizeof(buf), "%s", def->name);
-(*cpu_fprintf)(f, "x86 %16s  %-48s\n", buf, def->model_id);
-}
-#ifdef CONFIG_KVM
-(*cpu_fprintf)(f, "x86 %16s  %-48s\n", "host",
-   "KVM processor with all supported host features "
-   "(only available in KVM mode)");
-#endif
+(*cpu_fprintf)(f, "Available CPUs:\n");
+list = get_sorted_cpu_model_list();
+g_slist_foreach(list, x86_cpu_list_entry, &s);
+g_slist_free(list);
 
 (*cpu_fprintf)(f, "\nRecognized CPUID flags:\n");
 for (i = 0; i < ARRAY_SIZE(feature_word_info); i++) {
@@ -2050,26 +2100,29 @@ void x86_cpu_list(FILE *f, fprintf_function cpu_fprintf)
 }
 }
 
-CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp)
+static void x86_cpu_definition_entry(gpointer data, gpointer user_data)
 {
-CpuDefinitionInfoList *cpu_list = NULL;
-X86CPUDefinition *def;
-int i;
-
-for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); i++) {
-CpuDefinitionInfoList *entry;
-CpuDefinitionInfo *info;
+ObjectClass *oc = data;
+X86CPUClass *cc = X86_CPU_CLASS(oc);
+CpuDefinitionInfoList **cpu_list = user_data;
+

[Qemu-devel] [PATCH v2 0/6] Add runnability info to query-cpu-definitions