date:20131112

Re: [Qemu-devel] [PATCH] qemu-img create: set nocow flag by default

2013-11-12 Thread Chunyan Liu

2013/11/12 Fam Zheng f...@redhat.com

 On 2013年11月07日 16:08, Chunyan Liu wrote:

 Set NOCOW flag to newly created images to solve performance issues on
 btrfs.

 Btrfs has terrible performance when hosting VM images, even more when the
 guest
 in those VM are also using btrfs as file system. One way to mitigate this
 bad
 performance is to turn off COW attributes on VM files (since having copy
 on
 write for this kind of data is not useful).

 Signed-off-by: Chunyan Liu cy...@suse.com
 ---
   qemu-img.c |   15 +++
   1 files changed, 15 insertions(+), 0 deletions(-)

 diff --git a/qemu-img.c b/qemu-img.c
 index bf3fb4f..d43e8f1 100644
 --- a/qemu-img.c
 +++ b/qemu-img.c
 @@ -34,11 +34,17 @@
   #include getopt.h
   #include stdio.h
   #include stdarg.h
 +#include linux/fs.h
 +#include sys/ioctl.h


 This should be in #ifdef to not break build on Windows.


#ifdef _WIN32
   #include windows.h
   #endif

 +#ifndef FS_NOCOW_FL
 +#define FS_NOCOW_FL 0x0080 /* Do not cow file */
 +#endif
 +
   typedef struct img_cmd_t {
   const char *name;
   int (*handler)(int argc, char **argv);
 @@ -340,6 +346,7 @@ static int img_create(int argc, char **argv)
   char *options = NULL;
   Error *local_err = NULL;
   bool quiet = false;
 +int fd, attr;

   for(;;) {
   c = getopt(argc, argv, F:b:f:he6o:q);
 @@ -417,6 +424,14 @@ static int img_create(int argc, char **argv)
   return 1;
   }

 +/* set NOCOW by default to solve performance issue on btrfs */
 +fd = qemu_open(filename, O_RDONLY|O_NONBLOCK);
 +if (fd = 0) {
 +attr = FS_NOCOW_FL;
 +ioctl(fd, FS_IOC_SETFLAGS, attr);
 +qemu_close(fd);
 +}
 +
   return 0;
   }


  man chattr says:
 ... For btrfs, the 'C' flag should be set on new or empty files.  If it is
 set on a file which already has data blocks, it is undefined when the
 blocks assigned to the file will be fully stable...

 But you are setting the attr after image creation, so what's the
 difference here? just want to make sure this does what's expected.


I expected such question. Following the man page, I should add changes in
each related block driver's drv_create, that seems too much changes. Just
to make the change as little as possible, I asked in #btrfs irc about the
reason why must empty and if it affects in our case. Maybe I misunderstand
the reply, but now I check btrfs/ioctl.c, I should say I made a mistake
here. If the file size is not 0, the COW flag won't be removed. Sorry!
Still need to change in block driver's drv_create function. I'll revise.
Thanks for correction.



 Thanks,
 Fam

Re: [Qemu-devel] audit needed for signal handlers

2013-11-12 Thread Gerd Hoffmann

On Mo, 2013-11-11 at 18:47 +0100, Paolo Bonzini wrote:
 Il 11/11/2013 18:13, Peter Maydell ha scritto:
   That said, aren't all signals in QEMU (except SIG_IPI) caught with
   signalfd and the handlers run synchronously in the iothread?
  Eric specifically points out one which is not.
  (I'm pretty sure that 'reinstall signal handler at
  end of signal handler' is ancient voodoo that we don't
  want either, incidentally.)
 
 Yeah, I was convinced it was---I still cannot find a reason why SIGWINCH
 needs to be handled synchronously.

There is zero need.  And changing that is actually the correct fix IMHO:
Just set a flag in the signal handler (i.e. no syscalls which then could
corrupt errno), then handle it the next time we update the screen.

cheers,
  Gerd

[Qemu-devel] [PATCH] curses: fixup SIGWINCH handler mess

2013-11-12 Thread Gerd Hoffmann

Don't run code in the signal handler, only set a flag.
Use sigaction(2) to avoid non-portable signal(2) semantics.
Make #ifdefs less messy.

Signed-off-by: Gerd Hoffmann kra...@redhat.com
---
 ui/curses.c | 44 
 1 file changed, 28 insertions(+), 16 deletions(-)

diff --git a/ui/curses.c b/ui/curses.c
index 289a955..c33acff 100644
--- a/ui/curses.c
+++ b/ui/curses.c
@@ -106,9 +106,9 @@ static void curses_resize(DisplayChangeListener *dcl,
 curses_calc_pad();
 }
 
-#ifndef _WIN32
-#if defined(SIGWINCH)  defined(KEY_RESIZE)
-static void curses_winch_handler(int signum)
+#if !defined(_WIN32)  defined(SIGWINCH)  defined(KEY_RESIZE)
+static bool got_sigwinch;
+static void curses_winch_check(void)
 {
 struct winsize {
 unsigned short ws_row;
@@ -117,18 +117,34 @@ static void curses_winch_handler(int signum)
 unsigned short ws_ypixel;   /* unused */
 } ws;
 
-/* terminal size changed */
-if (ioctl(1, TIOCGWINSZ, ws) == -1)
+if (!got_sigwinch) {
+return;
+}
+got_sigwinch = false;
+
+if (ioctl(1, TIOCGWINSZ, ws) == -1) {
 return;
+}
 
 resize_term(ws.ws_row, ws.ws_col);
-curses_calc_pad();
 invalidate = 1;
+}
 
-/* some systems require this */
-signal(SIGWINCH, curses_winch_handler);
+static void curses_winch_handler(int signum)
+{
+got_sigwinch = true;
 }
-#endif
+
+static void curses_winch_init(void)
+{
+struct sigaction old, winch = {
+.sa_handler  = curses_winch_handler,
+};
+sigaction(SIGWINCH, winch, old);
+}
+#else
+static void curses_winch_check(void) {}
+static void curses_winch_init(void) {}
 #endif
 
 static void curses_cursor_position(DisplayChangeListener *dcl,
@@ -163,6 +179,8 @@ static void curses_refresh(DisplayChangeListener *dcl)
 {
 int chr, nextchr, keysym, keycode, keycode_alt;
 
+curses_winch_check();
+
 if (invalidate) {
 clear();
 refresh();
@@ -349,13 +367,7 @@ void curses_display_init(DisplayState *ds, int full_screen)
 curses_keyboard_setup();
 atexit(curses_atexit);
 
-#ifndef _WIN32
-#if defined(SIGWINCH)  defined(KEY_RESIZE)
-/* some curses implementations provide a handler, but we
- * want to be sure this is handled regardless of the library */
-signal(SIGWINCH, curses_winch_handler);
-#endif
-#endif
+curses_winch_init();
 
 dcl = (DisplayChangeListener *) g_malloc0(sizeof(DisplayChangeListener));
 dcl-ops = dcl_ops;
-- 
1.8.3.1

Re: [Qemu-devel] [PATCHv7 10/17] block: honour BlockLimits in bdrv_co_discard

2013-11-12 Thread Peter Lieven


On 11.11.2013 14:20, Kevin Wolf wrote:

Am 24.10.2013 um 12:06 hat Peter Lieven geschrieben:

Reviewed-by: Eric Blake ebl...@redhat.com
Signed-off-by: Peter Lieven p...@kamp.de
---
  block.c |   37 -
  1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/block.c b/block.c
index 0c0b0ac..b28dd42 100644
--- a/block.c
+++ b/block.c
@@ -4234,6 +4234,11 @@ static void coroutine_fn bdrv_discard_co_entry(void 
*opaque)
  rwco-ret = bdrv_co_discard(rwco-bs, rwco-sector_num, rwco-nb_sectors);
  }
  
+/* if no limit is specified in the BlockLimits use a default

+ * of 32768 512-byte sectors (16 MiB) per request.
+ */
+#define MAX_DISCARD_DEFAULT 32768
+
  int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
   int nb_sectors)
  {
@@ -4255,7 +4260,37 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, 
int64_t sector_num,
  }
  
  if (bs-drv-bdrv_co_discard) {

-return bs-drv-bdrv_co_discard(bs, sector_num, nb_sectors);
+int max_discard = bs-bl.max_discard ?
+  bs-bl.max_discard : MAX_DISCARD_DEFAULT;
+
+while (nb_sectors  0) {
+int ret;
+int num = nb_sectors;
+
+/* align request */
+if (bs-bl.discard_alignment 
+num = bs-bl.discard_alignment 
+sector_num % bs-bl.discard_alignment) {
+if (num  bs-bl.discard_alignment) {
+num = bs-bl.discard_alignment;
+}
+num -= sector_num % bs-bl.discard_alignment;
+}
+
+/* limit request size */
+if (num  max_discard) {
+num = max_discard;
+}
+
+ret = bs-drv-bdrv_co_discard(bs, sector_num, num);
+if (ret) {
+return ret;
+}
+
+sector_num += num;
+nb_sectors -= num;
+}
+return 0;
  } else if (bs-drv-bdrv_aio_discard) {
  BlockDriverAIOCB *acb;
  CoroutineIOCompletion co = {

You're only optimising drivers which have a .bdrv_co_discard()
implementation, but not those with .bdrv_aio_discard(). Not very nice,
and it would have been easy to avoid this by putting the loop around the
whole if statement instead of inside the 'then' branch.

Good point. I wonder noone noticed before ;-)

Do you want me to respin or is ok to send a follow up patch?
Stefan has it already in block-next. This patch doesn't make
the situation worse and we need follow up patches for
all the drivers to supply alignment information anyway.

Peter

Re: [Qemu-devel] [PATCH] qga: Fix shutdown command of guest agent to work with SysV

2013-11-12 Thread whitearchey


What about patch? Will it be applied? Or should it be rewritten?

On Thu, 07 Nov 2013 11:45:25 +0900, whitearchey whitearc...@gmail.com wrote:


On Wed, 06 Nov 2013 18:20:36 +0900, Michael Tokarev m...@tls.msk.ru wrote:


06.11.2013 05:54, whitearchey wrote:

For now guest agent uses following command to shutdown system:
shutdown -P +0 blabla
but this syntax works only with shutdown command from systemd or upstart,
because SysV shutdown requires -h switch.

Following patch changes the command so it works with systemd, upstart and SysV


While the patch is one-liner indeed, it is a bit more than trivial.
Because it changes things in a non-obvious way, especially when
multiple systems are concerned.  Cc'ing qemu-devel@ due to this.


--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
-execle(/sbin/shutdown, shutdown, shutdown_flag, +0,
+execle(/sbin/shutdown, shutdown, -h, shutdown_flag, +0,


Note that shutdown command is not in POSIX, despite the fact
that this is put into commands-posix.c

Note also that even shutdown from sysvinit on linux has another
option, -P, which mean poweroff, while -h means halt OR poweroff
(the latter is a bit unclear).

With upstart/systemd qga use one of thee commands, depending on 'mode' 
parameter:
   shutdown -P +0 ...
   shutdown -H +0 ...
   shutdown -r +0 ...
SysV equivalents for these are:
   shutdown -h -P +0 ...
   shutdown -h -H +0 ...
   shutdown -h -r +0 ...
and these retain their meaning with upstart/systemd.



Does the same work on other *nixes?  *BSD?  Solaris?


No, it doesn't. But existing code also does not work.
According to FreeBSD manpages, shutdown does not accept -P and -H options. 
Commands should be:
   shutdown -p +0 ...
   shutdown -h +0 ...
   shutdown -r +0 ...

shutdown in Solaris does not accept any of -hHpPr and does not accept time in 
+0 format


Thanks,

/mjt



--
Using Opera's revolutionary email client: http://www.opera.com/mail/

Re: [Qemu-devel] [PATCHv7 10/17] block: honour BlockLimits in bdrv_co_discard

2013-11-12 Thread Paolo Bonzini

Il 12/11/2013 09:53, Peter Lieven ha scritto:
 Good point. I wonder noone noticed before ;-)
 
 Do you want me to respin or is ok to send a follow up patch?
 Stefan has it already in block-next. This patch doesn't make
 the situation worse and we need follow up patches for
 all the drivers to supply alignment information anyway.

I can do it too, since I'm working on follow-up patches anyway.

Paolo

Re: [Qemu-devel] [PATCHv7 10/17] block: honour BlockLimits in bdrv_co_discard

2013-11-12 Thread Peter Lieven


On 12.11.2013 10:19, Paolo Bonzini wrote:

Il 12/11/2013 09:53, Peter Lieven ha scritto:

Good point. I wonder noone noticed before ;-)

Do you want me to respin or is ok to send a follow up patch?
Stefan has it already in block-next. This patch doesn't make
the situation worse and we need follow up patches for
all the drivers to supply alignment information anyway.

I can do it too, since I'm working on follow-up patches anyway.

That would be great if Kevin does not insist on a respin.

Thank you.

Re: [Qemu-devel] [PATCHv7 10/17] block: honour BlockLimits in bdrv_co_discard

2013-11-12 Thread Kevin Wolf

Am 12.11.2013 um 10:21 hat Peter Lieven geschrieben:
 On 12.11.2013 10:19, Paolo Bonzini wrote:
 Il 12/11/2013 09:53, Peter Lieven ha scritto:
 Good point. I wonder noone noticed before ;-)
 
 Do you want me to respin or is ok to send a follow up patch?
 Stefan has it already in block-next. This patch doesn't make
 the situation worse and we need follow up patches for
 all the drivers to supply alignment information anyway.
 I can do it too, since I'm working on follow-up patches anyway.
 That would be great if Kevin does not insist on a respin.

A follow-up patch is fine.

Kevin

Re: [Qemu-devel] [PATCH v3 4/6] qemu-option: support +foo/-foo command line agruments

2013-11-12 Thread Igor Mammedov

On Tue, 12 Nov 2013 10:49:58 +1100
Alexey Kardashevskiy a...@ozlabs.ru wrote:

 On 11/12/2013 01:25 AM, Igor Mammedov wrote:
  On Mon, 11 Nov 2013 13:41:05 +0100
  Andreas Färber afaer...@suse.de wrote:
  
  Am 11.11.2013 08:44, schrieb Alexey Kardashevskiy:
  This converts +foo/-foo to foo=on/foo=off respectively when
  QEMU parser is used for the command line options.
 
  -cpu parsers in x86 and other architectures should be unaffected
  by this change.
 
  Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
  ---
   util/qemu-option.c | 6 ++
   1 file changed, 6 insertions(+)
 
  diff --git a/util/qemu-option.c b/util/qemu-option.c
  index efcb5dc..6c8667c 100644
  --- a/util/qemu-option.c
  +++ b/util/qemu-option.c
  @@ -890,6 +890,12 @@ static int opts_do_parse(QemuOpts *opts, const char 
  *params,
   if (strncmp(option, no, 2) == 0) {
   memmove(option, option+2, strlen(option+2)+1);
   pstrcpy(value, sizeof(value), off);
  +} else if (strncmp(option, -, 1) == 0) {
  +memmove(option, option+1, strlen(option+1)+1);
  +pstrcpy(value, sizeof(value), off);
  +} else if (strncmp(option, +, 1) == 0) {
  +memmove(option, option+1, strlen(option+1)+1);
  +pstrcpy(value, sizeof(value), on);
   } else {
   pstrcpy(value, sizeof(value), on);
   }
 
  This looks like an interesting idea! However this is much too big a
  change to just CC ppc folks on...
 
  Jan, I wonder if this might break slirp's hostfwd option?
 
  Not sure what other options potentially starting with '-' might be
  affected. Test cases would be a helpful way of demonstrating that this
  change does not have undesired side effects.
  on x86 there is several value fixups for compatibility reason and a manual
  value parsing in cpu_x86_parse_featurestr(), so above won't just work there.
 
 
 What particular x86 CPU option cannot be handled the way as PPC's VSX is
 handled two patches below? As I see, even static properties will work there
 fine.
There is legacy code that is kept for CLI compatibility reasons.
Please, look at following features in cpu_x86_parse_featurestr():
  xlevel, tsc-freq hv-spinlocks
the rest feature flags on x86 should be handled just fine by your patch,
once x86properties series is applied. 

that's why we are talking about parser hook that could be overridden
by target if necessary.

PS:
extending QemuOpts to parsing +/-opts format, seems like good workaround
above problem. But I was under impression that general movement was to convert
custom formats to canonical format prop=value.

 
 
 -- 
 Alexey
 


-- 
Regards,
  Igor

Re: [Qemu-devel] [RFC PATCH 2/4] block/raw-posix: implement bdrv_zero_init

2013-11-12 Thread Kevin Wolf

Am 12.11.2013 um 08:47 hat Hu Tao geschrieben:
 Implement bdrv_zero_init using posix_fallocate.
 
 Signed-off-by: Hu Tao hu...@cn.fujitsu.com
 ---
  block/raw-posix.c | 13 +
  1 file changed, 13 insertions(+)
 
 diff --git a/block/raw-posix.c b/block/raw-posix.c
 index f6d48bb..8798599 100644
 --- a/block/raw-posix.c
 +++ b/block/raw-posix.c
 @@ -1190,6 +1190,18 @@ static int64_t coroutine_fn 
 raw_co_get_block_status(BlockDriverState *bs,
  return ret;
  }
  
 +static int raw_zero_init(BlockDriverState *bs, int64_t offset, int64_t 
 length)
 +{
 +BDRVRawState *s = bs-opaque;
 +int64_t len = bdrv_getlength(bs);
 +
 +if (offset + length  0 || offset + length  len) {
 +return -1;
 +}
 +
 +return posix_fallocate(s-fd, offset, length);
 +}

This doesn't really initialise anything to zero. It merely preallocates
those parts of a file that aren't allocated yet (and they happen to be
zeroed in this case), but leaves already existing parts untouched.

I wonder if this would be a correct implementation for a bdrv_anchor(),
though. I also wouldn't call that full preallocation, but it might be
useful anyway.

Kevin

Re: [Qemu-devel] [RFC PATCH 4/4] qcow2: Add full image preallocation option

2013-11-12 Thread Kevin Wolf

Am 12.11.2013 um 08:47 hat Hu Tao geschrieben:
 This adds a preallocation=full mode to qcow2 image creation, which
 creates a non-sparse image file.
 
 Signed-off-by: Hu Tao hu...@cn.fujitsu.com
 ---
  block/qcow2.c | 28 ++--
  1 file changed, 22 insertions(+), 6 deletions(-)
 
 diff --git a/block/qcow2.c b/block/qcow2.c
 index 359030f..d3ca6cf 100644
 --- a/block/qcow2.c
 +++ b/block/qcow2.c
 @@ -1385,7 +1385,13 @@ static int qcow2_change_backing_file(BlockDriverState 
 *bs,
  return qcow2_update_header(bs);
  }
  
 -static int preallocate(BlockDriverState *bs)
 +enum prealloc_mode {
 +PREALLOC_OFF = 0,
 +PREALLOC_METADATA,
 +PREALLOC_FULL,
 +};
 +
 +static int preallocate(BlockDriverState *bs, enum prealloc_mode mode)
  {
  uint64_t nb_sectors;
  uint64_t offset;
 @@ -1394,9 +1400,12 @@ static int preallocate(BlockDriverState *bs)
  int ret;
  QCowL2Meta *meta;
  
 +assert(mode != PREALLOC_OFF);
 +
  nb_sectors = bdrv_getlength(bs)  9;
  offset = 0;
  
 +/* First allocate metadata in _really_ big chunks */
  while (nb_sectors) {
  num = MIN(nb_sectors, INT_MAX  9);
  ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, num,
 @@ -1424,6 +1433,11 @@ static int preallocate(BlockDriverState *bs)
  offset += num  9;
  }
  
 +/* Then write zeros to the cluster data, if requested */
 +if (mode == PREALLOC_FULL) {
 +bdrv_zero_init(bs-file, offset, bdrv_getlength(bs));
 +}

bdrv_zero_init() is completely optional. It is only implemented for
raw-posix and errors are not checked. You can't assume that after this
point anything is preallocated.

Kevin

Re: [Qemu-devel] [PATCH v2] block: per caller dirty bitmap

2013-11-12 Thread Kevin Wolf

Am 04.11.2013 um 10:30 hat Fam Zheng geschrieben:
 Previously a BlockDriverState has only one dirty bitmap, so only one
 caller (e.g. a block job) can keep track of writing. This changes the
 dirty bitmap to a list and creates a BdrvDirtyBitmap for each caller, the
 lifecycle is managed with these new functions:
 
 bdrv_create_dirty_bitmap
 bdrv_release_dirty_bitmap
 
 Where BdrvDirtyBitmap is a linked list wrapper structure of HBitmap.
 
 In place of bdrv_set_dirty_tracking, a BdrvDirtyBitmap pointer argument
 is added to these functions, since each caller has its own dirty bitmap:
 
 bdrv_get_dirty
 bdrv_dirty_iter_init
 bdrv_get_dirty_count
 
 bdrv_set_dirty and bdrv_reset_dirty prototypes are unchanged but will
 internally walk the list of all dirty bitmaps and set them one by one.
 
 Signed-off-by: Fam Zheng f...@redhat.com

 diff --git a/block/qapi.c b/block/qapi.c
 index 5880b3e..6b0cdcf 100644
 --- a/block/qapi.c
 +++ b/block/qapi.c
 @@ -204,14 +204,6 @@ void bdrv_query_info(BlockDriverState *bs,
  info-io_status = bs-iostatus;
  }
  
 -if (bs-dirty_bitmap) {
 -info-has_dirty = true;
 -info-dirty = g_malloc0(sizeof(*info-dirty));
 -info-dirty-count = bdrv_get_dirty_count(bs) * BDRV_SECTOR_SIZE;
 -info-dirty-granularity =
 - ((int64_t) BDRV_SECTOR_SIZE  
 hbitmap_granularity(bs-dirty_bitmap));
 -}
 -
  if (bs-drv) {
  info-has_inserted = true;
  info-inserted = g_malloc0(sizeof(*info-inserted));

The dirty field should probably be removed from qapi-schema.json if it
never gets set any more.

It was optional, so perhaps removing it doesn't cause any trouble
indeed, but I'd like to hear Eric on this matter before merging the
patch. Though if libvirt does make use of it, we have a problem because
it doesn't really make sense any more after these changes.

Kevin

Re: [Qemu-devel] [RFC] target-arm: provide skeleton for a64 insn decoding

2013-11-12 Thread Alex Bennée


claudio.font...@linaro.org writes:

 provide a skeleton for a64 instruction decoding in translate-a64.c,
 by dividing instructions into the classes defined by the
 ARM Architecture Reference Manual(DDI0487A_a) C3

 Signed-off-by: Claudio Fontana claudio.font...@linaro.org
 ---
 The following patch has been started during Linaro Connect
 by me and Alex Bennee.
snip

With the proviso of Richard's decode comment you can add:

Reviewed-by: Alex Bennée a...@bennee.com
Signed-of-by: Alex Bennée a...@bennee.com

-- 
Alex Bennée

Re: [Qemu-devel] [PATCH for-1.7] target-i386: Fix build by providing stub kvm_arch_get_supported_cpuid()

2013-11-12 Thread Peter Maydell

On 11 November 2013 23:21, Anthony Liguori anth...@codemonkey.ws wrote:
 We're not talking about something obscure here.  It's eliminating an
 if(0) block.

No, we're not talking about a simple if (0) expression.
What we had in this case was
 if (!(env-features[FEAT_1_ECX]  CPUID_EXT_XSAVE) || !kvm_enabled()) {
break;
 }
 [stuff using kvm_arch_get_supported_cpuid()]

[where kvm_enabled() is #defined to constant-0].

For the compiler to eliminate this we are relying on:
 * dead-code elimination of code following a 'break'
   statement in a case block
 * constant-folding of something || 1 to 1
 * the compiler having done enough reasoning to be
   sure that env is not NULL

Self-evidently, not all compilers will provide
all of the above. Andreas' patch swaps the order
of the if() conditionals, which seems to work for
a wider set of compilers, but there's no guarantee
that will always work.

So exactly how much do we require our compiler's
constant-folding to handle? For example,

   unsigned char a,b,c;
   [...]
   if (a != 0  b != 0  c != 0
a * a * a + b * b * b == c * c * c) {

is compile-time provable to be always false; can
we rely on the branch being eliminated?

My position here is straightforward: the only thing
we can rely on is what the compilers document that
they will guarantee to do, which is nothing. I
don't see that relying on dead-code elimination
gains us anything significant at all, and adding
an extra stub or two (that won't even be linked
in if your compiler does happen to optimise) is
totally trivial overhead.

You and Paolo have both mentioned doing checks
at compile time but why is this particular
detail of our code worth checking in that way
at the expense of reliably being able to compile?

As it happens, having a stub function that returns
0 would simplify several bits of code that currently
do:

if (kvm_enabled()  cpu-enable_pmu) {
KVMState *s = cs-kvm_state;

*eax = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EAX);
*ebx = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EBX);
*ecx = kvm_arch_get_supported_cpuid(s, 0xA, count, R_ECX);
*edx = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EDX);
} else {
*eax = 0;
*ebx = 0;
*ecx = 0;
*edx = 0;
}

because you could get rid of the else block.

Or you could #ifdef CONFIG_KVM this code section,
as we already do for some of the more complicated
bits of code that call this function. That would
work too.

-- PMM

Re: [Qemu-devel] [PATCH v3] qmp: access the local QemuOptsLists for drive option

2013-11-12 Thread Kevin Wolf

Am 09.11.2013 um 05:15 hat Amos Kong geschrieben:
 Currently we have three QemuOptsList (qemu_common_drive_opts,
 qemu_legacy_drive_opts, and qemu_drive_opts), only qemu_drive_opts
 is added to vm_config_groups[].
 
 This patch changes query-command-line-options to access three local
 QemuOptsLists for drive option, and merge the description items
 together.
 
 Signed-off-by: Amos Kong ak...@redhat.com

Thanks, applied to the block branch.

Kevin

Re: [Qemu-devel] [PATCH v5 1/2] target-arm: Move call to disas_vfp_insn out of disas_coproc_insn.

2013-11-12 Thread Will Newton

On 15 October 2013 16:09, Will Newton will.new...@linaro.org wrote:

 Floating point is an extension to the instruction set rather than
 a coprocessor, so call it directly from the ARM and Thumb decode
 functions.

 Signed-off-by: Will Newton will.new...@linaro.org
 ---
  target-arm/translate.c | 29 -
  1 file changed, 24 insertions(+), 5 deletions(-)

 Changes in v5:
  - Check for high bits set in disas_vfp_insn

Ping?

-- 
Will Newton
Toolchain Working Group, Linaro

Re: [Qemu-devel] [PATCH 0/7] virtio endian-ambivalent target fixes.

2013-11-12 Thread Thomas Huth

On Thu, 17 Oct 2013 14:23:35 +1030
Rusty Russell ru...@rustcorp.com.au wrote:

 This is a re-transmit of the core of the virtio endian code.  Since
 there seems to be some interest in ARM BE virtio, I've separated this from
 the direct problem I was solving: PowerPC LE.
 
 Please apply!
 Rusty.
 
 Rusty Russell (7):
   virtio_get_byteswap: function for endian-ambivalent targets using
 virtio.
   virtio: allow byte swapping for vring and config access
   hw/net/virtio-net: use virtio wrappers to access headers.
   hw/net/virtio-balloon: use virtio wrappers to access page frame
 numbers.
   hw/block/virtio-blk: use virtio wrappers to access headers.
   hw/scsi/virtio-scsi: use virtio wrappers to access headers.
   hw/char/virtio-serial-bus: use virtio wrappers to access headers.

 Hi Rusty!

May I ask what's the current status of your virtio endian patches? We
likely need something similar when we enable Virtio v1.0 for S390
virtio-ccw since we then have to byteswap the virtio stuff there, too.
So I recently started to have a look at this... However, in your
patches, the byteswapping seems to be activated/disabled globally, with
the virtio_byteswap variable. But with Virtio v1.0, the guest can
decide on a per-device basis whether it wants to drive the device in
v1.0 mode (-- byteswap on S390) or in v0.9 legacy mode (-- no
byteswap), depending on whether it sets the VIRTIO_F_VERSION_1 feature
bit or not. I guess other architectures will have the same problem with
Virtio 1.0, too, when the guests are not running in little endian mode.
So I wonder whether it would it be feasible to change the code so that
the decision of byteswapping or not is done on a per-device basis
instead? What do you think?

 Regards,
  Thomas

Re: [Qemu-devel] [PATCH] curses: fixup SIGWINCH handler mess

2013-11-12 Thread Laszlo Ersek

comments below

On 11/12/13 09:43, Gerd Hoffmann wrote:
 Don't run code in the signal handler, only set a flag.
 Use sigaction(2) to avoid non-portable signal(2) semantics.
 Make #ifdefs less messy.
 
 Signed-off-by: Gerd Hoffmann kra...@redhat.com
 ---
  ui/curses.c | 44 
  1 file changed, 28 insertions(+), 16 deletions(-)
 
 diff --git a/ui/curses.c b/ui/curses.c
 index 289a955..c33acff 100644
 --- a/ui/curses.c
 +++ b/ui/curses.c
 @@ -106,9 +106,9 @@ static void curses_resize(DisplayChangeListener *dcl,
  curses_calc_pad();
  }
  
 -#ifndef _WIN32
 -#if defined(SIGWINCH)  defined(KEY_RESIZE)
 -static void curses_winch_handler(int signum)
 +#if !defined(_WIN32)  defined(SIGWINCH)  defined(KEY_RESIZE)
 +static bool got_sigwinch;

The type volatile sig_atomic_t would be more pedantic
http://pubs.opengroup.org/onlinepubs/9699919799//functions/V2_chap02.html#tag_15_04_03_03.

 +static void curses_winch_check(void)
  {
  struct winsize {
  unsigned short ws_row;
 @@ -117,18 +117,34 @@ static void curses_winch_handler(int signum)
  unsigned short ws_ypixel;   /* unused */
  } ws;
  
 -/* terminal size changed */
 -if (ioctl(1, TIOCGWINSZ, ws) == -1)
 +if (!got_sigwinch) {
 +return;
 +}
 +got_sigwinch = false;
 +
 +if (ioctl(1, TIOCGWINSZ, ws) == -1) {
  return;
 +}
  
  resize_term(ws.ws_row, ws.ws_col);
 -curses_calc_pad();

Are you removing this call because we're setting invalidate below, and
the (new) caller of this function, curses_refresh(), calls
curses_calc_pad() on nonzero invalidate anyway?

  invalidate = 1;
 +}

Also, I grepped the source for SIGWINCH, and I think it is never masked
with pthread_sigmask(), or -- while the process is single-threaded
initially -- with sigprocmask(). Hence this signal can be delivered at
any time and interrupt interruptible functions (which is good
justification for the patch.) My point though is that after this patch a
narrow window seems to exist where you can lose a signal, namely between
checking got_sigwinch and resetting it.

(SIGWINCH is not a standard signal but I do think it it's not a realtime
one, hence it doesn't queue up; it can only be pending or not.)

For ultimate pedantry we could maybe write

bool local;

/* block the signal with pthread_sigmask()
 * for atomic retrieval and reset
 */
local = got_sigwinch;
got_sigwinch = false;
/* unblock the signal */

if (!local) {
return;
}

but it's likely overkill.

  
 -/* some systems require this */
 -signal(SIGWINCH, curses_winch_handler);
 +static void curses_winch_handler(int signum)
 +{
 +got_sigwinch = true;
  }
 -#endif
 +
 +static void curses_winch_init(void)
 +{
 +struct sigaction old, winch = {
 +.sa_handler  = curses_winch_handler,
 +};
 +sigaction(SIGWINCH, winch, old);
 +}

You don't need old here (you don't use it to restore the handler),
just pass NULL.

 +#else
 +static void curses_winch_check(void) {}
 +static void curses_winch_init(void) {}
  #endif
  
  static void curses_cursor_position(DisplayChangeListener *dcl,
 @@ -163,6 +179,8 @@ static void curses_refresh(DisplayChangeListener *dcl)
  {
  int chr, nextchr, keysym, keycode, keycode_alt;
  
 +curses_winch_check();
 +
  if (invalidate) {
  clear();
  refresh();
 @@ -349,13 +367,7 @@ void curses_display_init(DisplayState *ds, int 
 full_screen)
  curses_keyboard_setup();
  atexit(curses_atexit);
  
 -#ifndef _WIN32
 -#if defined(SIGWINCH)  defined(KEY_RESIZE)
 -/* some curses implementations provide a handler, but we
 - * want to be sure this is handled regardless of the library */
 -signal(SIGWINCH, curses_winch_handler);
 -#endif
 -#endif
 +curses_winch_init();
  
  dcl = (DisplayChangeListener *) g_malloc0(sizeof(DisplayChangeListener));
  dcl-ops = dcl_ops;
 

If you think a v2 is not warranted for (and because the patch is
strictly an improvement):

Reviewed-by: Laszlo Ersek ler...@redhat.com

Re: [Qemu-devel] audit needed for signal handlers

2013-11-12 Thread Laszlo Ersek

On 11/11/13 18:47, Paolo Bonzini wrote:
 Il 11/11/2013 18:13, Peter Maydell ha scritto:
 That said, aren't all signals in QEMU (except SIG_IPI) caught with
 signalfd and the handlers run synchronously in the iothread?
 Eric specifically points out one which is not.
 (I'm pretty sure that 'reinstall signal handler at
 end of signal handler' is ancient voodoo that we don't
 want either, incidentally.)
 
 Yeah, I was convinced it was---I still cannot find a reason why SIGWINCH
 needs to be handled synchronously.
 
 resize_term is definitely not signal safe; the man page reflects
 10-year-old (or more) signal handling lore: While these functions are
 intended to be used to support a signal handler (i.e., for SIGWINCH),
 care should be taken to avoid invoking them in a context where malloc or
 realloc may have been interrupted, since it uses those functions.
 
 Calling malloc/realloc from a signal handler is taboo these days...

The problem is when you call a non-async-signal-safe from the handler
and you've interrupted another non-async-signal-safe function. Ie.
there's trouble *only* if both interruptor and interruptee are
non-async-signal-safe.

If at least one is async-signal-safe, then there's no problem. The more
frequent case for this is when the handler is async-signal-safe (like
when it sets a volatile sig_atomic_t flag, and/or uses local variables
exclusively and possibly calls other async-signal-safe functions).

But, the reverse setup exists as well, when you do whatever you like in
the signal handler, just restrict it (ie. the signal delivery) to
portions of the normal source where no non-async-signal-safe functions
can run. One example is when you block the signal for the entire event
loop body, and you unblock it only atomically in pselect() or ppoll().
If there has been a signal pending, the (non-async-signal-safe) handler
body will run *inside* pselect(), but that's fine.

(Of course you still need to care about *other* signals interrupting the
handler, see also sa_mask.)

It's by far the best to do what Eric suggests (and I usually combine
that even with the block it everywhere idea, because I hate EINTR --
in a correct event loop no function call should block anyway, and EINTR
is just a nuisance).

I probably glossed over a thousand details and I'll get shredded for
that, but that's OK. :)

Done ranting :)

Thanks,
Laszlo

Re: [Qemu-devel] [PATCH for-1.7] target-i386: Fix build by providing stub kvm_arch_get_supported_cpuid()

2013-11-12 Thread Paolo Bonzini

Il 12/11/2013 12:07, Peter Maydell ha scritto:
 On 11 November 2013 23:21, Anthony Liguori anth...@codemonkey.ws wrote:
 We're not talking about something obscure here.  It's eliminating an
 if(0) block.
 
 No, we're not talking about a simple if (0) expression.
 What we had in this case was
  if (!(env-features[FEAT_1_ECX]  CPUID_EXT_XSAVE) || !kvm_enabled()) {
 break;
  }
  [stuff using kvm_arch_get_supported_cpuid()]
 
 [where kvm_enabled() is #defined to constant-0].
 
 For the compiler to eliminate this we are relying on:
  * dead-code elimination of code following a 'break'
statement in a case block
  * constant-folding of something || 1 to 1
  * the compiler having done enough reasoning to be
sure that env is not NULL

Yes, it's not trivial, but there are simpler ways to do it.

For example there is no need to make sure that env is non-NULL, only to
see that something || 1 is never zero and thus if (x) y; is just
(void)x; y;.  This seems easier to me than DCE after break which
clang is able to do.

Indeed, NULL checks (except perhaps very simple checks like x != NULL)
are not something I'd expect the compiler to optimize out at -O0.

 You and Paolo have both mentioned doing checks
 at compile time but why is this particular
 detail of our code worth checking in that way
 at the expense of reliably being able to compile?
 
 As it happens, having a stub function that returns
 0 would simplify several bits of code that currently
 do:
 
 if (kvm_enabled()  cpu-enable_pmu) {
 KVMState *s = cs-kvm_state;
 
 *eax = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EAX);
 *ebx = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EBX);
 *ecx = kvm_arch_get_supported_cpuid(s, 0xA, count, R_ECX);
 *edx = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EDX);
 } else {
 *eax = 0;
 *ebx = 0;
 *ecx = 0;
 *edx = 0;
 }
 
 because you could get rid of the else block.

No, you couldn't because the else branch runs for kvm_enabled() 
!cpu-enable_pmu too.  Relying on kvm_arch_get_supported_cpuid to
return 0 would only make code harder to review, and the above example
shows this fact very well.

kvm_arch_get_supported_cpuid abort()-ing would be fine, but it would
also make code harder to review, because you cannot rely anymore on the
compiler-linker combo to filter out the most obvious cases of forgetting
about TCG's existence.

 Or you could #ifdef CONFIG_KVM this code section,
 as we already do for some of the more complicated
 bits of code that call this function. That would
 work too.

Yes, but it wouldn't be _right_.  Most of the code below the break is
potentially applicable to TCG, even if it is not used now.

Paolo

Re: [Qemu-devel] [PATCH] curses: fixup SIGWINCH handler mess

2013-11-12 Thread Gerd Hoffmann

  Hi,

  +static bool got_sigwinch;
 
 The type volatile sig_atomic_t would be more pedantic
 http://pubs.opengroup.org/onlinepubs/9699919799//functions/V2_chap02.html#tag_15_04_03_03.

Will do for v2.

Hmm, checkpatch barfs on volatile, with a reference which looks linux
kernel related ...

   
   resize_term(ws.ws_row, ws.ws_col);
  -curses_calc_pad();
 
 Are you removing this call because we're setting invalidate below, and
 the (new) caller of this function, curses_refresh(), calls
 curses_calc_pad() on nonzero invalidate anyway?

Exactly.

 justification for the patch.) My point though is that after this patch a
 narrow window seems to exist where you can lose a signal, namely between
 checking got_sigwinch and resetting it.

Doesn't matter.  The signal just says terminal size has changed,
typically as result of a xterm window resize.  Even if we get that twice
we have to handle it only once, we just have to make sure this happens
after the second signal came in.  Which is the case, as we reset
got_sigwinch before going to handle it (query new size, tell curses).

cheers,
  Gerd

Re: [Qemu-devel] [PATCH for-1.7] target-i386: Fix build by providing stub kvm_arch_get_supported_cpuid()

2013-11-12 Thread Peter Maydell

On 12 November 2013 12:09, Paolo Bonzini pbonz...@redhat.com wrote:
 Il 12/11/2013 12:07, Peter Maydell ha scritto:
 For the compiler to eliminate this we are relying on:
  * dead-code elimination of code following a 'break'
statement in a case block
  * constant-folding of something || 1 to 1
  * the compiler having done enough reasoning to be
sure that env is not NULL

 Yes, it's not trivial, but there are simpler ways to do it.

 For example there is no need to make sure that env is non-NULL, only to
 see that something || 1 is never zero and thus if (x) y; is just
 (void)x; y;.  This seems easier to me than DCE after break which
 clang is able to do.

You seem to be trying to reason about what the compiler
might choose to do or how it might be implemented internally.
I think this is fundamentally misguided. -O0 means reduce
compile time and make debugging produce expected results,
not reduce compile time, make debugging produce expected
results and also run these two optimization passes which
my codebase implicitly relies on happening. gcc currently
happens to do DCE and constant-folding even at -O0 because
it turns out to be faster to do that than not to; if in
future the compilation-speed tradeoff swings the other
way they're free to decide to not do those passes, or to
do cut-down versions that fold less or eliminate less.

I find this argument confusing because to me it's a
completely simple choice with one obviously right
and one obviously wrong approach :-(

-- PMM

Re: [Qemu-devel] [PATCH] curses: fixup SIGWINCH handler mess

2013-11-12 Thread Paolo Bonzini

Il 12/11/2013 12:53, Laszlo Ersek ha scritto:
 Also, I grepped the source for SIGWINCH, and I think it is never masked
 with pthread_sigmask(), or -- while the process is single-threaded
 initially -- with sigprocmask(). Hence this signal can be delivered at
 any time and interrupt interruptible functions (which is good
 justification for the patch.) My point though is that after this patch a
 narrow window seems to exist where you can lose a signal, namely between
 checking got_sigwinch and resetting it.

You don't really lose it, it is delayed to the next refresh.

 (SIGWINCH is not a standard signal but I do think it it's not a realtime
 one, hence it doesn't queue up; it can only be pending or not.)

Indeed.

 For ultimate pedantry we could maybe write
 
 bool local;
 
 /* block the signal with pthread_sigmask()
  * for atomic retrieval and reset
  */
 local = got_sigwinch;
 got_sigwinch = false;
 /* unblock the signal */
 
 if (!local) {
 return;
 }
 
 but it's likely overkill.

Or just use an atomic function:

if (!atomic_xchg(got_sigwinch, false)) {
return;
}

Paolo

Re: [Qemu-devel] audit needed for signal handlers

2013-11-12 Thread Laszlo Ersek

On 11/11/13 19:03, Max Filippov wrote:
 On Mon, Nov 11, 2013 at 8:50 PM, Eric Blake ebl...@redhat.com wrote:
 Quick - identify the bug in this code (from ui/curses.c):

 static void curses_winch_handler(int signum)
 {
 struct winsize {
 unsigned short ws_row;
 unsigned short ws_col;
 unsigned short ws_xpixel;   /* unused */
 unsigned short ws_ypixel;   /* unused */
 } ws;

 /* terminal size changed */
 if (ioctl(1, TIOCGWINSZ, ws) == -1)
 
 An unsafe function is called in a signal. See man 7 signal,
 section 'Async-signal-safe functions'. This should be avoided.
 
 return;

 resize_term(ws.ws_row, ws.ws_col);
 curses_calc_pad();
 invalidate = 1;

 /* some systems require this */
 signal(SIGWINCH, curses_winch_handler);
 }

 Here's a hint: ioctl() can clobber errno.
 
 I believe it cannot, at least in linux, as technically the signal
 handler is always called in a new thread, specifically created
 to only handle that signal, and errno should be thread-local.

That's incorrect (*). The handler runs on a new *stack frame*, but
inside the same thread. You can specify an alternate stack for signal
handlers to run on in advance (see SA_ONSTACK / sigaltstack()), in which
case the new stack frame will be allocated there. Otherwise the system
will just use the normal stack. The handler indeed runs like an
unexpected, out-of-the-blue normal function call.

It is actually pretty vital that the handler is run by the specific
thread that the signal has been delivered to.

(*) Example code (not a correct/portable program due to the race on
errno, but it does disprove the idea that errno is protected on Linux):

  #define _XOPEN_SOURCE 600

  #include unistd.h
  #include errno.h
  #include signal.h
  #include stdio.h

  static void ringring(int signum)
  {
errno = -12;
  }

  int main(void)
  {
sigaction(SIGALRM,
  (struct sigaction){ .sa_handler = ringring },
  NULL);
alarm(1);

errno = 0;
/* pause() would clobber errno */
while (errno == 0)
  ;

printf(%d\n, errno);
return 0;
  }

Thanks
Laszlo

Re: [Qemu-devel] [PATCH] curses: fixup SIGWINCH handler mess

2013-11-12 Thread Laszlo Ersek

On 11/12/13 13:16, Gerd Hoffmann wrote:

 justification for the patch.) My point though is that after this patch a
 narrow window seems to exist where you can lose a signal, namely between
 checking got_sigwinch and resetting it.
 
 Doesn't matter.  The signal just says terminal size has changed,
 typically as result of a xterm window resize.  Even if we get that twice
 we have to handle it only once, we just have to make sure this happens
 after the second signal came in.  Which is the case, as we reset
 got_sigwinch before going to handle it (query new size, tell curses).

Ah, correct. We do lose the second signal (the one which was delivered
between we retrieve the flag set by the first instance, and resetting
the flag), but at that point we're going to handle *one* resize event
anyway. Supposing that the second signal was generated *after* the
terminal size had changed for the second time, we'll see a recent enough
terminal size when we query it. So, we lose the second signal and the
*first* terminal size. Good.

Thanks!
Laszlo

Re: [Qemu-devel] [PATCH v3 4/6] qemu-option: support +foo/-foo command line agruments

2013-11-12 Thread Alexey Kardashevskiy

On 12.11.2013 20:58, Igor Mammedov wrote:
 On Tue, 12 Nov 2013 10:49:58 +1100
 Alexey Kardashevskiy a...@ozlabs.ru wrote:
 
 On 11/12/2013 01:25 AM, Igor Mammedov wrote:
 On Mon, 11 Nov 2013 13:41:05 +0100
 Andreas Färber afaer...@suse.de wrote:

 Am 11.11.2013 08:44, schrieb Alexey Kardashevskiy:
 This converts +foo/-foo to foo=on/foo=off respectively when
 QEMU parser is used for the command line options.

 -cpu parsers in x86 and other architectures should be unaffected
 by this change.

 Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
 ---
  util/qemu-option.c | 6 ++
  1 file changed, 6 insertions(+)

 diff --git a/util/qemu-option.c b/util/qemu-option.c
 index efcb5dc..6c8667c 100644
 --- a/util/qemu-option.c
 +++ b/util/qemu-option.c
 @@ -890,6 +890,12 @@ static int opts_do_parse(QemuOpts *opts, const char 
 *params,
  if (strncmp(option, no, 2) == 0) {
  memmove(option, option+2, strlen(option+2)+1);
  pstrcpy(value, sizeof(value), off);
 +} else if (strncmp(option, -, 1) == 0) {
 +memmove(option, option+1, strlen(option+1)+1);
 +pstrcpy(value, sizeof(value), off);
 +} else if (strncmp(option, +, 1) == 0) {
 +memmove(option, option+1, strlen(option+1)+1);
 +pstrcpy(value, sizeof(value), on);
  } else {
  pstrcpy(value, sizeof(value), on);
  }

 This looks like an interesting idea! However this is much too big a
 change to just CC ppc folks on...

 Jan, I wonder if this might break slirp's hostfwd option?

 Not sure what other options potentially starting with '-' might be
 affected. Test cases would be a helpful way of demonstrating that this
 change does not have undesired side effects.
 on x86 there is several value fixups for compatibility reason and a manual
 value parsing in cpu_x86_parse_featurestr(), so above won't just work there.


 What particular x86 CPU option cannot be handled the way as PPC's VSX is
 handled two patches below? As I see, even static properties will work there
 fine.
 There is legacy code that is kept for CLI compatibility reasons.
 Please, look at following features in cpu_x86_parse_featurestr():
   xlevel, tsc-freq hv-spinlocks

Ok, I do not know for sure if static properties support setters/getters
(they do not if I remember correct) but what does prevent these x86
properties from being _dynamic_?


 the rest feature flags on x86 should be handled just fine by your patch,
 once x86properties series is applied. 
 
 that's why we are talking about parser hook that could be overridden
 by target if necessary.

This part confuses me the most. I thought I added the hook and I did not
change other than PPC archs so my patches should have gone quite easily
to upstream but instead I was told (I think I was but I could
misunderstand) that other folks may be unhappy that my stuff does not
support +foo/-foo (which could be added later).

Could you please point me to the x86properties patch(es) which everybody
is waiting for? Thanks!

 PS:
 extending QemuOpts to parsing +/-opts format, seems like good workaround
 above problem. But I was under impression that general movement was to convert
 custom formats to canonical format prop=value.

Heh. I do not understand movements in the qemu project most of the time
:) I thought I could have added compat to PowerPC CPU as others did
but I was so wrong :)



-- 
With best regards

Alexey Kardashevskiy -- icq: 52150396

Re: [Qemu-devel] [PATCH v3 4/6] qemu-option: support +foo/-foo command line agruments

2013-11-12 Thread Andreas Färber

Am 12.11.2013 13:39, schrieb Alexey Kardashevskiy:
 On 12.11.2013 20:58, Igor Mammedov wrote:
 PS:
 extending QemuOpts to parsing +/-opts format, seems like good workaround
 above problem. But I was under impression that general movement was to 
 convert
 custom formats to canonical format prop=value.
 
 Heh. I do not understand movements in the qemu project most of the time
 :) I thought I could have added compat to PowerPC CPU as others did
 but I was so wrong :)

Hey, I instructed you how to do exactly that, with a const char *
argument, but you chose rather to experiment more with QemuOpts. ;)
Don't blame us! :-)

Cheers,
Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg

Re: [Qemu-devel] i386: pc: align gpa-hpa on 1GB boundary (v5)

2013-11-12 Thread Igor Mammedov

On Sun, 10 Nov 2013 18:47:53 -0200
Marcelo Tosatti mtosa...@redhat.com wrote:

[...]

 @@ -1177,10 +1182,50 @@ FWCfgState *pc_memory_init(MemoryRegion 
 *system_memory,
  e820_add_entry(0, below_4g_mem_size, E820_RAM);
  if (above_4g_mem_size  0) {
  ram_above_4g = g_malloc(sizeof(*ram_above_4g));
it is a memory leak when ram-above-4g is not created

 -memory_region_init_alias(ram_above_4g, NULL, ram-above-4g, ram,
 - below_4g_mem_size, above_4g_mem_size);
 -memory_region_add_subregion(system_memory, 0x1ULL,
 +/*
 + *
 + * If 1GB hugepages are used to back guest RAM, map guest address
 + * space in the range [ramsize,ramsize+holesize] to the ram block
 + * range [holestart, 4GB]
 + *
 + *  0  h 4G 
 [ramsize,ramsize+holesize]
 + *
 + * guest-addr-space [  ] [  ][xxx]
 + */--/
 + * contiguous-ram-block [  ][xxx][ ]
 + *
 + * So that memory beyond 4GB is aligned on a 1GB boundary,
 + * at the host physical address space.
 + *
 + */
 +if (guest_info-gb_align) {
 +uint64_t holesize = 0x1ULL - below_4g_mem_size;
 +uint64_t piecetwosize = holesize - align_offset;
 +
 +assert(piecetwosize = holesize);
 +
 +if ((above_4g_mem_size - piecetwosize)  0) {
here is integer overflow,
reproducable with:  -mem-path /var/lib/hugetlbfs/global/pagesize-1GB -m 3600


 +memory_region_init_alias(ram_above_4g, NULL, ram-above-4g,
 + ram, 0x1ULL,
 + above_4g_mem_size - piecetwosize);
 +memory_region_add_subregion(system_memory, 0x1ULL,
 + ram_above_4g);
 +}
 +
 +ram_above_4g_piecetwo = g_malloc(sizeof(*ram_above_4g_piecetwo));
 +memory_region_init_alias(ram_above_4g_piecetwo, NULL,
 + ram-above-4g-piecetwo, ram,
 + 0x1ULL - holesize, 
 piecetwosize);
 +memory_region_add_subregion(system_memory,
 +0x1ULL +
 +above_4g_mem_size - piecetwosize,
is there a guaranty that ram-above-4g-piecetwo will be mapped immediately
after ram-above-4g without any gap?

if there is no then you might need to change how e820_add_entry() for high ram
is handled and possibly CMOS value as well. 

 +ram_above_4g_piecetwo);
 +} else {
 +memory_region_init_alias(ram_above_4g, NULL, ram-above-4g, ram,
 +below_4g_mem_size, above_4g_mem_size);
 +memory_region_add_subregion(system_memory, 0x1ULL,
  ram_above_4g);
 +}
  e820_add_entry(0x1ULL, above_4g_mem_size, E820_RAM);
  }
[...]
-- 
Regards,
  Igor

[Qemu-devel] [PATCH 1.7] qcow2: fix possible corruption when reading multiple clusters

2013-11-12 Thread Peter Lieven

if multiple sectors spanning multiple clusters are read the
function count_contiguous_clusters should ensure that the
cluster type should not change between the clusters.

Especially the for-loop should break when we have one
or more normal clusters followed by a compressed cluster.

Unfortunately the wrong macro was used in the mask to
compare the flags.

This was discovered while debugging a data corruption
issue when converting a compressed qcow2 image to raw.
qemu-img reads 2MB chunks which span multiple clusters.

CC: qemu-sta...@nongnu.org
Signed-off-by: Peter Lieven p...@kamp.de
---
 block/qcow2-cluster.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 6d7d1ac..11f9c50 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -290,7 +290,7 @@ static int count_contiguous_clusters(uint64_t nb_clusters, 
int cluster_size,
 uint64_t *l2_table, uint64_t stop_flags)
 {
 int i;
-uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW2_CLUSTER_COMPRESSED;
+uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED;
 uint64_t first_entry = be64_to_cpu(l2_table[0]);
 uint64_t offset = first_entry  mask;
 
-- 
1.7.9.5

Re: [Qemu-devel] [PATCH v2] block: per caller dirty bitmap

2013-11-12 Thread Eric Blake

On 11/12/2013 03:46 AM, Kevin Wolf wrote:

 +++ b/block/qapi.c
 @@ -204,14 +204,6 @@ void bdrv_query_info(BlockDriverState *bs,
  info-io_status = bs-iostatus;
  }
  
 -if (bs-dirty_bitmap) {
 -info-has_dirty = true;
 -info-dirty = g_malloc0(sizeof(*info-dirty));
 -info-dirty-count = bdrv_get_dirty_count(bs) * BDRV_SECTOR_SIZE;
 -info-dirty-granularity =
 - ((int64_t) BDRV_SECTOR_SIZE  
 hbitmap_granularity(bs-dirty_bitmap));
 -}
 -
  if (bs-drv) {
  info-has_inserted = true;
  info-inserted = g_malloc0(sizeof(*info-inserted));
 
 The dirty field should probably be removed from qapi-schema.json if it
 never gets set any more.
 
 It was optional, so perhaps removing it doesn't cause any trouble
 indeed, but I'd like to hear Eric on this matter before merging the
 patch. Though if libvirt does make use of it, we have a problem because
 it doesn't really make sense any more after these changes.

Removing an optional output-only field is okay (it is the same as
stating that we are ALWAYS taking the option of not including it).
Since it has always been marked optional, management can't be relying on
it; more specifically, libvirt does not have any code checking for it.
So I'm okay with removing it from the schema as a backwards-compatible
change.

[Removing an optional input field is wrong, but BlockInfo is an output
type, not an input type.]

Eventually, libvirt would like to use persistent dirty bitmaps, in order
to resume long-running operations such as drive-mirror even across
migration between different qemu processes, but I don't think we are
quite there yet, and I also don't think that removal of 'dirty' from
BlockInfo impedes in that goal.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v3 4/6] qemu-option: support +foo/-foo command line agruments

2013-11-12 Thread Igor Mammedov

On Tue, 12 Nov 2013 23:39:27 +1100
Alexey Kardashevskiy a...@ozlabs.ru wrote:

 On 12.11.2013 20:58, Igor Mammedov wrote:
  On Tue, 12 Nov 2013 10:49:58 +1100
  Alexey Kardashevskiy a...@ozlabs.ru wrote:
  
  On 11/12/2013 01:25 AM, Igor Mammedov wrote:
  On Mon, 11 Nov 2013 13:41:05 +0100
  Andreas Färber afaer...@suse.de wrote:
 
  Am 11.11.2013 08:44, schrieb Alexey Kardashevskiy:
  This converts +foo/-foo to foo=on/foo=off respectively when
  QEMU parser is used for the command line options.
 
  -cpu parsers in x86 and other architectures should be unaffected
  by this change.
 
  Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
  ---
   util/qemu-option.c | 6 ++
   1 file changed, 6 insertions(+)
 
  diff --git a/util/qemu-option.c b/util/qemu-option.c
  index efcb5dc..6c8667c 100644
  --- a/util/qemu-option.c
  +++ b/util/qemu-option.c
  @@ -890,6 +890,12 @@ static int opts_do_parse(QemuOpts *opts, const 
  char *params,
   if (strncmp(option, no, 2) == 0) {
   memmove(option, option+2, strlen(option+2)+1);
   pstrcpy(value, sizeof(value), off);
  +} else if (strncmp(option, -, 1) == 0) {
  +memmove(option, option+1, strlen(option+1)+1);
  +pstrcpy(value, sizeof(value), off);
  +} else if (strncmp(option, +, 1) == 0) {
  +memmove(option, option+1, strlen(option+1)+1);
  +pstrcpy(value, sizeof(value), on);
   } else {
   pstrcpy(value, sizeof(value), on);
   }
 
  This looks like an interesting idea! However this is much too big a
  change to just CC ppc folks on...
 
  Jan, I wonder if this might break slirp's hostfwd option?
 
  Not sure what other options potentially starting with '-' might be
  affected. Test cases would be a helpful way of demonstrating that this
  change does not have undesired side effects.
  on x86 there is several value fixups for compatibility reason and a manual
  value parsing in cpu_x86_parse_featurestr(), so above won't just work 
  there.
 
 
  What particular x86 CPU option cannot be handled the way as PPC's VSX is
  handled two patches below? As I see, even static properties will work there
  fine.
  There is legacy code that is kept for CLI compatibility reasons.
  Please, look at following features in cpu_x86_parse_featurestr():
xlevel, tsc-freq hv-spinlocks
 
 Ok, I do not know for sure if static properties support setters/getters
 (they do not if I remember correct) but what does prevent these x86
 properties from being _dynamic_?
nothing, except of:
 * it's better to keep CPU device model clean from legacy hacks so that legacy
   silent fixups of invalid values won't be available via other interfaces
   except of CLI. That will force users to use correct property names/values
   and not break old users that use legacy CLI options.


 
  the rest feature flags on x86 should be handled just fine by your patch,
  once x86properties series is applied. 
  
  that's why we are talking about parser hook that could be overridden
  by target if necessary.
 
 This part confuses me the most. I thought I added the hook and I did not
 change other than PPC archs so my patches should have gone quite easily
 to upstream but instead I was told (I think I was but I could
 misunderstand) that other folks may be unhappy that my stuff does not
 support +foo/-foo (which could be added later).
 
 Could you please point me to the x86properties patch(es) which everybody
 is waiting for? Thanks!
latest is available at
https://github.com/imammedo/qemu/tree/x86-cpu-properties.v10.1
which basically is a rebase with fixed conflicts of v9
http://comments.gmane.org/gmane.comp.emulators.qemu/84

  PS:
  extending QemuOpts to parsing +/-opts format, seems like good workaround
  above problem. But I was under impression that general movement was to 
  convert
  custom formats to canonical format prop=value.
 
 Heh. I do not understand movements in the qemu project most of the time
 :) I thought I could have added compat to PowerPC CPU as others did
 but I was so wrong :)
 
 
 
 -- 
 With best regards
 
 Alexey Kardashevskiy -- icq: 52150396


-- 
Regards,
  Igor

Re: [Qemu-devel] [PATCH for-1.7] target-i386: Fix build by providing stub kvm_arch_get_supported_cpuid()

2013-11-12 Thread Paolo Bonzini

Il 12/11/2013 13:16, Peter Maydell ha scritto:
 On 12 November 2013 12:09, Paolo Bonzini pbonz...@redhat.com wrote:
 Il 12/11/2013 12:07, Peter Maydell ha scritto:
 For the compiler to eliminate this we are relying on:
  * dead-code elimination of code following a 'break'
statement in a case block
  * constant-folding of something || 1 to 1
  * the compiler having done enough reasoning to be
sure that env is not NULL

 Yes, it's not trivial, but there are simpler ways to do it.

 For example there is no need to make sure that env is non-NULL, only to
 see that something || 1 is never zero and thus if (x) y; is just
 (void)x; y;.  This seems easier to me than DCE after break which
 clang is able to do.
 
 You seem to be trying to reason about what the compiler
 might choose to do or how it might be implemented internally.

I'm not reasoning about that in general (I was in the context of the
message you quoted).

I'm saying it's *reasonable* to expect that -O0 means reduce compile
time, make debugging produce expected results, and try (not too hard) to
not break what works at -O2.  It's a simple QoI argument based on the
fact that people *will* switch back and forth between -O2 and -O0.  Of
course not everything can be kept to work, since the compilers do pretty
surprising optimizations (not counting the ones that break your code of
course...).  But I think a limited amount of dead code elimination
*should* be expected because most people are now preferring if to
#ifdef for compiling out code.

If -O0 does not do that, let's move debug builds to -O1.

Paolo

Re: [Qemu-devel] [PATCH for-1.7] target-i386: Fix build by providing stub kvm_arch_get_supported_cpuid()

2013-11-12 Thread Peter Maydell

On 12 November 2013 13:12, Paolo Bonzini pbonz...@redhat.com wrote:
 I'm saying it's *reasonable* to expect that -O0 means reduce compile
 time, make debugging produce expected results, and try (not too hard) to
 not break what works at -O2.

And that's what we've got. There's no requirement, even
at -O2, to eliminate all dead code. It's just a QoI
issue for optimization. If your code is busted it's
busted and that might show up only at -O0 or only at -O2
(the latter in fact is pretty common because of the
tendency to only do dataflow analysis at higher levels).

  It's a simple QoI argument based on the
 fact that people *will* switch back and forth between -O2 and -O0.  Of
 course not everything can be kept to work, since the compilers do pretty
 surprising optimizations (not counting the ones that break your code of
 course...).  But I think a limited amount of dead code elimination
 *should* be expected because most people are now preferring if to
 #ifdef for compiling out code.

If your code isn't wrong then if (0) works just fine
for compiling out code that isn't used. In an optimized
build it gets eliminated; in an non-optimized build you
don't care if it gets eliminated or not, it's no big deal.
The reason for using if (0) rather than #ifdefs is
an optimizing compiler will make this no less efficient
for our release builds; that doesn't mean you can rely
on it being optimal when you've specifically asked for
a non-optimizing build, and it doesn't mean you can
put code in the if (0) that's broken. (Similarly,
you can put code that's a syntax error inside #if 0,
but that won't work inside an if (0). The solution
is not to do that.)

-- PMM

Re: [Qemu-devel] [PATCH for-1.7] target-i386: Fix build by providing stub kvm_arch_get_supported_cpuid()

2013-11-12 Thread Gleb Natapov

On Tue, Nov 12, 2013 at 02:12:56PM +0100, Paolo Bonzini wrote:
 Il 12/11/2013 13:16, Peter Maydell ha scritto:
  On 12 November 2013 12:09, Paolo Bonzini pbonz...@redhat.com wrote:
  Il 12/11/2013 12:07, Peter Maydell ha scritto:
  For the compiler to eliminate this we are relying on:
   * dead-code elimination of code following a 'break'
 statement in a case block
   * constant-folding of something || 1 to 1
   * the compiler having done enough reasoning to be
 sure that env is not NULL
 
  Yes, it's not trivial, but there are simpler ways to do it.
 
  For example there is no need to make sure that env is non-NULL, only to
  see that something || 1 is never zero and thus if (x) y; is just
  (void)x; y;.  This seems easier to me than DCE after break which
  clang is able to do.
  
  You seem to be trying to reason about what the compiler
  might choose to do or how it might be implemented internally.
 
 I'm not reasoning about that in general (I was in the context of the
 message you quoted).
 
 I'm saying it's *reasonable* to expect that -O0 means reduce compile
 time, make debugging produce expected results, and try (not too hard) to
 not break what works at -O2.  It's a simple QoI argument based on the
 fact that people *will* switch back and forth between -O2 and -O0.  Of
 course not everything can be kept to work, since the compilers do pretty
 surprising optimizations (not counting the ones that break your code of
 course...).  But I think a limited amount of dead code elimination
 *should* be expected because most people are now preferring if to
 #ifdef for compiling out code.
 
 If -O0 does not do that, let's move debug builds to -O1.
 
Why not enable dce with -fdce?

--
Gleb.

Re: [Qemu-devel] [PATCH for-1.7] target-i386: Fix build by providing stub kvm_arch_get_supported_cpuid()

2013-11-12 Thread Gleb Natapov

On Tue, Nov 12, 2013 at 01:21:51PM +, Peter Maydell wrote:
  (Similarly,
 you can put code that's a syntax error inside #if 0,
 but that won't work inside an if (0). The solution
 is not to do that.)
 
That's the advantage of using if (0) instead of  #if 0. You do not
need to enable insane amount of options to check that your change does
not break complication for any of them.

--
Gleb.

[Qemu-devel] [RFC v2] target-arm: provide skeleton for a64 insn decoding

2013-11-12 Thread Claudio Fontana

provide a skeleton for a64 instruction decoding in translate-a64.c,
by dividing instructions into the classes defined by the
ARM Architecture Reference Manual(DDI0487A_a) C3

Signed-off-by: Claudio Fontana claudio.font...@linaro.org
Signed-off-by: Alex Bennée a...@bennee.com
Reviewed-by: Alex Bennée a...@bennee.com
---
 target-arm/translate-a64.c | 368 -
 1 file changed, 360 insertions(+), 8 deletions(-)

For the rationale, see v1 of the RFC at
http://lists.gnu.org/archive/html/qemu-devel/2013-11/msg01312.html

diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index f120088..c4b2bb5 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -107,17 +107,346 @@ static void gen_exception_insn(DisasContext *s, int 
offset, int excp)
 s-is_jmp = DISAS_JUMP;
 }
 
-static void real_unallocated_encoding(DisasContext *s)
+static void unallocated_encoding(DisasContext *s)
 {
-fprintf(stderr, Unknown instruction: %#x\n, s-insn);
 gen_exception_insn(s, 4, EXCP_UDEF);
 }
 
-#define unallocated_encoding(s) do { \
-fprintf(stderr, unallocated encoding at line: %d\n, __LINE__); \
-real_unallocated_encoding(s); \
-} while (0)
+#define unsupported_encoding(s, insn)\
+do { \
+qemu_log_mask(LOG_UNIMP,\
+  %s:%d: unsupported instruction encoding 0x%08x, \
+  __FILE__, __LINE__, insn);\
+unallocated_encoding(s);\
+} while (0);
 
+/*
+ * the instruction disassembly implemented here matches
+ * the instruction encoding classifications in chapter 3 (C3)
+ * of the ARM Architecture Reference Manual (DDI0487A_a)
+ */
+
+/* Unconditional branch (immediate) */
+static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
+{
+unsupported_encoding(s, insn);
+}
+
+/* Compare  branch (immediate) */
+static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
+{
+unsupported_encoding(s, insn);
+}
+
+/* Test  branch (immediate) */
+static void disas_test_b_imm(DisasContext *s, uint32_t insn)
+{
+unsupported_encoding(s, insn);
+}
+
+/* Conditional branch (immediate) */
+static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
+{
+unsupported_encoding(s, insn);
+}
+
+/* System */
+static void disas_sys(DisasContext *s, uint32_t insn)
+{
+unsupported_encoding(s, insn);
+}
+
+/* Exception generation */
+static void disas_exc(DisasContext *s, uint32_t insn)
+{
+unsupported_encoding(s, insn);
+}
+
+/* Unconditional branch (register) */
+static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
+{
+unsupported_encoding(s, insn);
+}
+
+/* C3.2 Branches, exception generating and system instructions */
+static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
+{
+switch (extract32(insn, 25, 7)) {
+case 0x0a: case 0x0b:
+case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
+disas_uncond_b_imm(s, insn);
+break;
+case 0x1a: case 0x5a: /* Compare  branch (immediate) */
+disas_comp_b_imm(s, insn);
+break;
+case 0x1b: case 0x5b: /* Test  branch (immediate) */
+disas_test_b_imm(s, insn);
+break;
+case 0x2a: /* Conditional branch (immediate) */
+disas_cond_b_imm(s, insn);
+break;
+case 0x6a: /* Exception generation / System */
+if (insn  (1  24)) {
+disas_sys(s, insn);
+} else {
+disas_exc(s, insn);
+}
+break;
+case 0x6b: /* Unconditional branch (register) */
+disas_uncond_b_reg(s, insn);
+break;
+default:
+unallocated_encoding(s);
+break;
+}
+}
+
+/* Load/store exclusive */
+static void disas_ldst_excl(DisasContext *s, uint32_t insn)
+{
+unsupported_encoding(s, insn);
+}
+
+/* Load register (literal) */
+static void disas_ld_lit(DisasContext *s, uint32_t insn)
+{
+unsupported_encoding(s, insn);
+}
+
+/* Load/store pair (all forms) */
+static void disas_ldst_pair(DisasContext *s, uint32_t insn)
+{
+unsupported_encoding(s, insn);
+}
+
+/* Load/store register (all forms) */
+static void disas_ldst_reg(DisasContext *s, uint32_t insn)
+{
+unsupported_encoding(s, insn);
+}
+
+/* AdvSIMD load/store multiple structures */
+static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
+{
+unsupported_encoding(s, insn);
+}
+
+/* AdvSIMD load/store single structure */
+static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
+{
+unsupported_encoding(s, insn);
+}
+
+/* C3.3 Loads and stores */
+static void disas_ldst(DisasContext *s, uint32_t insn)
+{
+switch (extract32(insn, 24, 6)) {
+case 0x08: /* Load/store exclusive */
+disas_ldst_excl(s, insn);
+break;
+case 0x18: case 0x1c: /* Load register (literal) */
+

Re: [Qemu-devel] [PATCH for-1.7] target-i386: Fix build by providing stub kvm_arch_get_supported_cpuid()

2013-11-12 Thread Paolo Bonzini

Il 12/11/2013 14:23, Gleb Natapov ha scritto:
 If -O0 does not do that, let's move debug builds to -O1.

 Why not enable dce with -fdce?

First, because clang doesn't have fine-tuned optimization options (at
least I couldn't find them and -fdce doesn't work).

Second, because most optimization options are no-ops at -O0 (try -fdce
-fdump-tree-all with GCC.

Paolo

[Qemu-devel] [PATCH for-1.8 0/2 v3] pc: inform SeaBIOS where 64-bit PCI hole begins

2013-11-12 Thread Igor Mammedov

* simplify PCI address space mapping into system address space,
  replacing code duplication in piix/q53 PCs with a helper function

* add fw_cfg 'etc/reserved-memory-end' to allow QEMU reserve
  additional address space before 64-bit PCI hole. Which will be
  need for reserving memory hotplug region in highmem.

v3:
 *  rebased on top of current master (1.7-rc0), wich includes dependency 
memory: Change MemoryRegion priorities from unsigned to signed
 *  s/pcimem64-minimum-address/reserved-memory-end/ per Michael suggestion

v2:
 *  use negative priority to map PCI address space under RAM memory
regions which allows simplify code by removing pci_hole 
pci_hole64 memory region aliases

Igor Mammedov (1):
  pc: add 'etc/reserved-memory-end' fw_cfg interface for SeaBIOS

Michael S. Tsirkin (1):
  pc: map PCI address space as catchall region for not mapped addresses

 hw/i386/pc.c  | 28 
 hw/i386/pc_piix.c |  2 --
 hw/pci-host/piix.c| 27 +--
 hw/pci-host/q35.c | 28 ++--
 include/hw/i386/pc.h  | 15 +++
 include/hw/pci-host/q35.h |  2 --
 6 files changed, 30 insertions(+), 72 deletions(-)

-- 
1.8.3.1

[Qemu-devel] [PATCH 1/2] pc: map PCI address space as catchall region for not mapped addresses

2013-11-12 Thread Igor Mammedov

From: Michael S. Tsirkin m...@redhat.com

With a help of negative memory region priority PCI address space
is mapped underneath RAM regions effectively catching every access
to addresses not mapped by any other region.
It simplifies PCI address space mapping into system address space.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
Signed-off-by: Igor Mammedov imamm...@redhat.com
---
 hw/i386/pc.c  | 20 ++--
 hw/i386/pc_piix.c |  2 --
 hw/pci-host/piix.c| 26 --
 hw/pci-host/q35.c | 27 +--
 include/hw/i386/pc.h  | 14 ++
 include/hw/pci-host/q35.h |  2 --
 6 files changed, 17 insertions(+), 74 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 12c436e..6c82ada 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1093,21 +1093,13 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t 
below_4g_mem_size,
 return guest_info;
 }
 
-void pc_init_pci64_hole(PcPciInfo *pci_info, uint64_t pci_hole64_start,
-uint64_t pci_hole64_size)
+/* setup pci memory address space mapping into system address space */
+void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory,
+MemoryRegion *pci_address_space)
 {
-if ((sizeof(hwaddr) == 4) || (!pci_hole64_size)) {
-return;
-}
-/*
- * BIOS does not set MTRR entries for the 64 bit window, so no need to
- * align address to power of two.  Align address at 1G, this makes sure
- * it can be exactly covered with a PAT entry even when using huge
- * pages.
- */
-pci_info-w64.begin = ROUND_UP(pci_hole64_start, 0x1ULL  30);
-pci_info-w64.end = pci_info-w64.begin + pci_hole64_size;
-assert(pci_info-w64.begin = pci_info-w64.end);
+/* Set to lower priority than RAM */
+memory_region_add_subregion_overlap(system_memory, 0x0,
+pci_address_space, -1);
 }
 
 void pc_acpi_init(const char *default_dsdt)
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 4fdb7b6..23e4857 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -150,8 +150,6 @@ static void pc_init1(QEMUMachineInitArgs *args,
 if (pci_enabled) {
 pci_bus = i440fx_init(i440fx_state, piix3_devfn, isa_bus, gsi,
   system_memory, system_io, args-ram_size,
-  below_4g_mem_size,
-  0x1ULL - below_4g_mem_size,
   above_4g_mem_size,
   pci_memory, ram_memory);
 } else {
diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c
index bad3953..5d4e290 100644
--- a/hw/pci-host/piix.c
+++ b/hw/pci-host/piix.c
@@ -102,8 +102,6 @@ struct PCII440FXState {
 MemoryRegion *system_memory;
 MemoryRegion *pci_address_space;
 MemoryRegion *ram_memory;
-MemoryRegion pci_hole;
-MemoryRegion pci_hole_64bit;
 PAMMemoryRegion pam_regions[13];
 MemoryRegion smram_region;
 uint8_t smm_enabled;
@@ -312,8 +310,6 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state,
 MemoryRegion *address_space_mem,
 MemoryRegion *address_space_io,
 ram_addr_t ram_size,
-hwaddr pci_hole_start,
-hwaddr pci_hole_size,
 ram_addr_t above_4g_mem_size,
 MemoryRegion *pci_address_space,
 MemoryRegion *ram_memory)
@@ -326,7 +322,6 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state,
 PCII440FXState *f;
 unsigned i;
 I440FXState *i440fx;
-uint64_t pci_hole64_size;
 
 dev = qdev_create(NULL, TYPE_I440FX_PCI_HOST_BRIDGE);
 s = PCI_HOST_BRIDGE(dev);
@@ -354,23 +349,10 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state,
 i440fx-pci_info.w32.begin = 0xe000;
 }
 
-memory_region_init_alias(f-pci_hole, OBJECT(d), pci-hole, 
f-pci_address_space,
- pci_hole_start, pci_hole_size);
-memory_region_add_subregion(f-system_memory, pci_hole_start, 
f-pci_hole);
-
-pci_hole64_size = pci_host_get_hole64_size(i440fx-pci_hole64_size);
-
-pc_init_pci64_hole(i440fx-pci_info, 0x1ULL + above_4g_mem_size,
-   pci_hole64_size);
-memory_region_init_alias(f-pci_hole_64bit, OBJECT(d), pci-hole64,
- f-pci_address_space,
- i440fx-pci_info.w64.begin,
- pci_hole64_size);
-if (pci_hole64_size) {
-memory_region_add_subregion(f-system_memory,
-i440fx-pci_info.w64.begin,
-f-pci_hole_64bit);
-}
+/* setup pci memory mapping */
+pc_pci_as_mapping_init(OBJECT(f), f-system_memory,
+   f-pci_address_space);
+
 memory_region_init_alias(f-smram_region, OBJECT(d), smram-region,

[Qemu-devel] [PATCH 2/2] pc: add 'etc/reserved-memory-end' fw_cfg interface for SeaBIOS

2013-11-12 Thread Igor Mammedov

'etc/reserved-memory-end' will allow QEMU to tell BIOS where PCI
BARs mapping could safely start in high memory.

Allowing BIOS to start mapping 64-bit PCI BARs at address where it
wouldn't conflict with other mappings QEMU might place before it.

That permits QEMU to reserve extra address space before
64-bit PCI hole for memory hotplug.

Signed-off-by: Igor Mammedov imamm...@redhat.com
---
 hw/i386/pc.c | 14 +-
 hw/pci-host/piix.c   |  3 ++-
 hw/pci-host/q35.c|  3 ++-
 include/hw/i386/pc.h |  3 ++-
 4 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 6c82ada..b504047 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1095,11 +1095,23 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t 
below_4g_mem_size,
 
 /* setup pci memory address space mapping into system address space */
 void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory,
-MemoryRegion *pci_address_space)
+MemoryRegion *pci_address_space,
+uint64_t reserved_memory_end)
 {
+uint64_t *val;
+FWCfgState *fw_cfg = fw_cfg_find();
+
 /* Set to lower priority than RAM */
 memory_region_add_subregion_overlap(system_memory, 0x0,
 pci_address_space, -1);
+g_assert(fw_cfg);
+/*
+ *  Align address at 1G, this makes sure it can be exactly covered
+ *  with a PAT entry even when using huge pages.
+ */
+val = g_malloc(sizeof(*val));
+*val = cpu_to_le64(ROUND_UP(reserved_memory_end, 0x1ULL  30));
+fw_cfg_add_file(fw_cfg, etc/reserved-memory-end, val, sizeof(*val));
 }
 
 void pc_acpi_init(const char *default_dsdt)
diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c
index 5d4e290..16205e7 100644
--- a/hw/pci-host/piix.c
+++ b/hw/pci-host/piix.c
@@ -351,7 +351,8 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state,
 
 /* setup pci memory mapping */
 pc_pci_as_mapping_init(OBJECT(f), f-system_memory,
-   f-pci_address_space);
+   f-pci_address_space,
+   0x1ULL + above_4g_mem_size);
 
 memory_region_init_alias(f-smram_region, OBJECT(d), smram-region,
  f-pci_address_space, 0xa, 0x2);
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index d1792de..1293353 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -353,7 +353,8 @@ static int mch_init(PCIDevice *d)
 
 /* setup pci memory mapping */
 pc_pci_as_mapping_init(OBJECT(mch), mch-system_memory,
-   mch-pci_address_space);
+   mch-pci_address_space,
+   0x1ULL + mch-above_4g_mem_size);
 
 /* smram */
 cpu_smm_register(mch_set_smm, mch);
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 8b3be3c..2663046 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -130,7 +130,8 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t 
below_4g_mem_size,
 
 
 void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory,
-MemoryRegion *pci_address_space);
+MemoryRegion *pci_address_space,
+uint64_t reserved_memory_end);
 
 FWCfgState *pc_memory_init(MemoryRegion *system_memory,
const char *kernel_filename,
-- 
1.8.3.1

Re: [Qemu-devel] [PATCH for-1.7] target-i386: Fix build by providing stub kvm_arch_get_supported_cpuid()

2013-11-12 Thread Peter Maydell

On 12 November 2013 13:12, Paolo Bonzini pbonz...@redhat.com wrote:
 If -O0 does not do that, let's move debug builds to -O1.

Isn't this going to sacrifice debuggability? That also seems
like the wrong tradeoff to me.

-- PMM

Re: [Qemu-devel] [PATCH for-1.7] target-i386: Fix build by providing stub kvm_arch_get_supported_cpuid()

2013-11-12 Thread Gleb Natapov

On Tue, Nov 12, 2013 at 02:57:49PM +0100, Paolo Bonzini wrote:
 Il 12/11/2013 14:23, Gleb Natapov ha scritto:
  If -O0 does not do that, let's move debug builds to -O1.
 
  Why not enable dce with -fdce?
 
 First, because clang doesn't have fine-tuned optimization options (at
 least I couldn't find them and -fdce doesn't work).
 
-O1 then for clang.

 Second, because most optimization options are no-ops at -O0 (try -fdce
 -fdump-tree-all with GCC.
 
Strange. Is this by design? We can do -O1 and bunch of -fno- to
disable most of optimizations -O1 enables, but this is ugly.
  
--
Gleb.

Re: [Qemu-devel] [PATCH for-1.7] target-i386: Fix build by providing stub kvm_arch_get_supported_cpuid()

2013-11-12 Thread Peter Maydell

On 12 November 2013 14:09, Gleb Natapov g...@redhat.com wrote:
 On Tue, Nov 12, 2013 at 02:57:49PM +0100, Paolo Bonzini wrote:
 Il 12/11/2013 14:23, Gleb Natapov ha scritto:
  If -O0 does not do that, let's move debug builds to -O1.
 
  Why not enable dce with -fdce?

 First, because clang doesn't have fine-tuned optimization options (at
 least I couldn't find them and -fdce doesn't work).

 -O1 then for clang.

No thanks. --enable-debug should give the maximum
debuggability. That means -O0.

-- PMM

[Qemu-devel] ARM testing image

2013-11-12 Thread Xin Tong

Hi

I would like to know where i get can 32bit ARM image with appropriate
network driver.

thank you,
Xin

Re: [Qemu-devel] ARM testing image

2013-11-12 Thread Peter Maydell

On 12 November 2013 14:27, Xin Tong trent.t...@gmail.com wrote:
 Hi

 I would like to know where i get can 32bit ARM image

For which board?

 with appropriate network driver.

Appropriate for what?

http://people.debian.org/~aurel32/qemu/armel/
has a simple versatile kernel/initrd/filesystem
(it's a little old, though).

-- PMM

[Qemu-devel] [RFC V2 0/3] Giving names to BlockDriverState graph nodes

2013-11-12 Thread Benoît Canet

This series fix the issues of V1 and add the skeletton of a new QMP command
useful to communicate a bs graph to libvirt.
The minimum is exposed to the management to avoid binding it too much with QEMU.

Tell me if you think it's worth implementing.

Best regards

Benoît

v2:
   s/give/gives/ [Eric]
   s/see/sees/   [Eric]
   prevent duplicate node_name [Eric]
   drop undefined and use  [Eric, Kevin, Jeff]
   remove from graph list on bdrv_make_anon [Jeff]
   comment the two chains [Fam]
   Add new command stub to retrieve the graph from libvirt [Benoît]

Benoît Canet (3):
  block: Add bs-node_name to hold the name of a bs node of the bs
graph.
  block: Allow the user to define node-name option.
  qapi: Add skeletton of command to query a drive bs graph.

 block.c   | 88 +--
 block/blkverify.c |  2 +-
 block/iscsi.c |  2 +-
 block/vmdk.c  |  2 +-
 block/vvfat.c |  4 +--
 blockdev.c| 16 ++---
 hw/block/xen_disk.c   |  2 +-
 include/block/block.h |  3 +-
 include/block/block_int.h |  9 -
 qapi-schema.json  | 32 +
 qemu-img.c|  6 ++--
 qemu-io.c |  2 +-
 qemu-nbd.c|  2 +-
 13 files changed, 134 insertions(+), 36 deletions(-)

-- 
1.8.3.2

[Qemu-devel] [RFC V2 2/3] block: Allow the user to define node-name option.

2013-11-12 Thread Benoît Canet

Signed-off-by: Benoit Canet ben...@irqsave.net
---
 block.c | 20 ++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index c397ee9..6690e3d 100644
--- a/block.c
+++ b/block.c
@@ -872,6 +872,7 @@ int bdrv_file_open(BlockDriverState **pbs, const char 
*filename,
 const char *drvname;
 bool allow_protocol_prefix = false;
 Error *local_err = NULL;
+const char *node_name = NULL;
 int ret;
 
 /* NULL means an empty set of options */
@@ -879,7 +880,14 @@ int bdrv_file_open(BlockDriverState **pbs, const char 
*filename,
 options = qdict_new();
 }
 
-bs = bdrv_new(, );
+node_name = qdict_get_try_str(options, node-name);
+if (node_name  bdrv_find_node(node_name)) {
+error_setg(errp, Duplicate node name);
+return -EINVAL;
+}
+bs = bdrv_new(, node_name ? node_name : );
+qdict_del(options, node-name);
+
 bs-options = options;
 options = qdict_clone_shallow(options);
 
@@ -979,6 +987,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict 
*options, Error **errp)
 int back_flags, ret;
 BlockDriver *back_drv = NULL;
 Error *local_err = NULL;
+const char *node_name = NULL;
 
 if (bs-backing_hd != NULL) {
 QDECREF(options);
@@ -1001,7 +1010,14 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict 
*options, Error **errp)
sizeof(backing_filename));
 }
 
-bs-backing_hd = bdrv_new(, );
+node_name = qdict_get_try_str(options, node-name);
+if (node_name  bdrv_find_node(node_name)) {
+error_setg(errp, Duplicate node name);
+QDECREF(options);
+return -EINVAL;
+}
+bs-backing_hd = bdrv_new(, node_name ? node_name : );
+qdict_del(options, node-name);
 
 if (bs-backing_format[0] != '\0') {
 back_drv = bdrv_find_format(bs-backing_format);
-- 
1.8.3.2

[Qemu-devel] [RFC V2 1/3] block: Add bs-node_name to hold the name of a bs node of the bs graph.

2013-11-12 Thread Benoît Canet

Add the minimum of code to prepare the followings patches.

Signed-off-by: Benoit Canet ben...@irqsave.net
---
 block.c   | 72 ++-
 block/blkverify.c |  2 +-
 block/iscsi.c |  2 +-
 block/vmdk.c  |  2 +-
 block/vvfat.c |  4 +--
 blockdev.c|  8 +++---
 hw/block/xen_disk.c   |  2 +-
 include/block/block.h |  3 +-
 include/block/block_int.h |  9 +-
 qemu-img.c|  6 ++--
 qemu-io.c |  2 +-
 qemu-nbd.c|  2 +-
 12 files changed, 78 insertions(+), 36 deletions(-)

diff --git a/block.c b/block.c
index fd05a80..c397ee9 100644
--- a/block.c
+++ b/block.c
@@ -89,6 +89,9 @@ static int coroutine_fn 
bdrv_co_do_write_zeroes(BlockDriverState *bs,
 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
 QTAILQ_HEAD_INITIALIZER(bdrv_states);
 
+static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
+QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
+
 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
 QLIST_HEAD_INITIALIZER(bdrv_drivers);
 
@@ -318,14 +321,20 @@ void bdrv_register(BlockDriver *bdrv)
 }
 
 /* create a new block device (by default it is empty) */
-BlockDriverState *bdrv_new(const char *device_name)
+BlockDriverState *bdrv_new(const char *device_name, const char *node_name)
 {
 BlockDriverState *bs;
 
+assert(node_name);
+
 bs = g_malloc0(sizeof(BlockDriverState));
 pstrcpy(bs-device_name, sizeof(bs-device_name), device_name);
 if (device_name[0] != '\0') {
-QTAILQ_INSERT_TAIL(bdrv_states, bs, list);
+QTAILQ_INSERT_TAIL(bdrv_states, bs, device_list);
+}
+pstrcpy(bs-node_name, sizeof(bs-node_name), node_name);
+if (node_name[0] != '\0') {
+QTAILQ_INSERT_TAIL(graph_bdrv_states, bs, node_list);
 }
 bdrv_iostatus_disable(bs);
 notifier_list_init(bs-close_notifiers);
@@ -870,7 +879,7 @@ int bdrv_file_open(BlockDriverState **pbs, const char 
*filename,
 options = qdict_new();
 }
 
-bs = bdrv_new();
+bs = bdrv_new(, );
 bs-options = options;
 options = qdict_clone_shallow(options);
 
@@ -992,7 +1001,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict 
*options, Error **errp)
sizeof(backing_filename));
 }
 
-bs-backing_hd = bdrv_new();
+bs-backing_hd = bdrv_new(, );
 
 if (bs-backing_format[0] != '\0') {
 back_drv = bdrv_find_format(bs-backing_format);
@@ -1062,7 +1071,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, 
QDict *options,
instead of opening 'filename' directly */
 
 /* if there is a backing file, use it */
-bs1 = bdrv_new();
+bs1 = bdrv_new(, );
 ret = bdrv_open(bs1, filename, NULL, 0, drv, local_err);
 if (ret  0) {
 bdrv_unref(bs1);
@@ -1495,7 +1504,7 @@ void bdrv_close_all(void)
 {
 BlockDriverState *bs;
 
-QTAILQ_FOREACH(bs, bdrv_states, list) {
+QTAILQ_FOREACH(bs, bdrv_states, device_list) {
 bdrv_close(bs);
 }
 }
@@ -1524,7 +1533,7 @@ static bool bdrv_requests_pending(BlockDriverState *bs)
 static bool bdrv_requests_pending_all(void)
 {
 BlockDriverState *bs;
-QTAILQ_FOREACH(bs, bdrv_states, list) {
+QTAILQ_FOREACH(bs, bdrv_states, device_list) {
 if (bdrv_requests_pending(bs)) {
 return true;
 }
@@ -1554,7 +1563,7 @@ void bdrv_drain_all(void)
 /* FIXME: We do not have timer support here, so this is effectively
  * a busy wait.
  */
-QTAILQ_FOREACH(bs, bdrv_states, list) {
+QTAILQ_FOREACH(bs, bdrv_states, device_list) {
 if (bdrv_start_throttled_reqs(bs)) {
 busy = true;
 }
@@ -1565,14 +1574,18 @@ void bdrv_drain_all(void)
 }
 }
 
-/* make a BlockDriverState anonymous by removing from bdrv_state list.
+/* make a BlockDriverState anonymous by removing from bdrv_state and
+ * graph_bdrv_state list.
Also, NULL terminate the device_name to prevent double remove */
 void bdrv_make_anon(BlockDriverState *bs)
 {
 if (bs-device_name[0] != '\0') {
-QTAILQ_REMOVE(bdrv_states, bs, list);
+QTAILQ_REMOVE(bdrv_states, bs, device_list);
 }
 bs-device_name[0] = '\0';
+if (bs-node_name[0] != '\0') {
+QTAILQ_REMOVE(graph_bdrv_states, bs, node_list);
+}
 }
 
 static void bdrv_rebind(BlockDriverState *bs)
@@ -1626,7 +1639,12 @@ static void bdrv_move_feature_fields(BlockDriverState 
*bs_dest,
 /* keep the same entry in bdrv_states */
 pstrcpy(bs_dest-device_name, sizeof(bs_dest-device_name),
 bs_src-device_name);
-bs_dest-list = bs_src-list;
+bs_dest-device_list = bs_src-device_list;
+
+/* keep the same entry in graph_bdrv_states */
+pstrcpy(bs_dest-node_name, sizeof(bs_dest-node_name),
+bs_src-node_name);
+bs_dest-node_list   = bs_src-node_list;
 }

[Qemu-devel] [RFC V2 3/3] qapi: Add skeletton of command to query a drive bs graph.

2013-11-12 Thread Benoît Canet

---
 blockdev.c   |  8 
 qapi-schema.json | 32 
 2 files changed, 40 insertions(+)

diff --git a/blockdev.c b/blockdev.c
index 911ee7e..bfaeda0 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1938,6 +1938,14 @@ void qmp_drive_backup(const char *device, const char 
*target,
 }
 }
 
+BlockGraphNode *qmp_query_drive_graph(const char *device, Error **errp)
+{
+/* the implementation of this function would recurse through the
+ * BlockDriverState graph to build it's result
+ */
+return NULL;
+}
+
 #define DEFAULT_MIRROR_BUF_SIZE   (10  20)
 
 void qmp_drive_mirror(const char *device, const char *target,
diff --git a/qapi-schema.json b/qapi-schema.json
index 60f3fd1..bca3d5a 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1983,6 +1983,38 @@
 { 'command': 'drive-backup', 'data': 'DriveBackup' }
 
 ##
+# @BlockGraphNode
+#
+# Information about a node of the block driver state graph
+#
+# @node-name: the name of the node in the graph
+#
+# @drv: the name of the block format used by this node as described in
+#   @BlockDeviceInfo.
+#
+# @children: a list of @BlockGraphNode being the children of this node
+#
+# Since 1.8
+##
+{ 'type': 'BlockGraphNode',
+  'data': { 'node-name': 'str', 'drv': 'str', 'children': ['BlockGraphNode'] } 
}
+
+##
+# @query-drive-graph
+#
+# Get the block driver states graph for a given drive
+#
+# @device: the name of the device to get the graph from
+#
+# Returns: the root @BlockGraphNode
+#
+# Since 1.8
+##
+{ 'command': 'query-drive-graph',
+  'data': { 'device': 'str' },
+  'returns': 'BlockGraphNode' }
+
+##
 # @drive-mirror
 #
 # Start mirroring a block device's writes to a new destination.
-- 
1.8.3.2

Re: [Qemu-devel] KVM call agenda for 2013-11-12

2013-11-12 Thread Juan Quintela

Juan Quintela quint...@redhat.com wrote:
 Hi

 Please, send any topic that you are interested in covering.

 Thanks, Juan.

As there are no topics,  call is cancelled.

Have a nice day,  Juan.

Re: [Qemu-devel] [PATCH v2 2/4] apic: QOM'ify apic icc_bus

2013-11-12 Thread Andreas Färber

Resending yesterday's message since it hasn't arrived on qemu-devel...

Am 11.11.2013 04:58, schrieb 赵小强:
 于 11/05/2013 04:51 PM, 赵小强 写道:
 于 2013年11月05日 16:25, Chen Fan 写道:
 On Tue, 2013-11-05 at 15:55 +0800, xiaoqiang zhao wrote:
[...]
 diff --git a/include/hw/cpu/icc_bus.h b/include/hw/cpu/icc_bus.h
 index b550070..b32a549 100644
 --- a/include/hw/cpu/icc_bus.h
 +++ b/include/hw/cpu/icc_bus.h
 @@ -66,7 +66,8 @@ typedef struct ICCDeviceClass {
   DeviceClass parent_class;
   /* public */
   -int (*init)(ICCDevice *dev); /* TODO replace with QOM realize */
 +/* QOM realize */
 +DeviceRealize realize;
 Maybe adding 'realize' in ICCDeviceClass is redundant, because parent
 class has included 'realize' field.

   } ICCDeviceClass;
 #define TYPE_ICC_DEVICE icc-device
 diff --git a/include/hw/i386/apic_internal.h
 b/include/hw/i386/apic_internal.h
 index 1b0a7fb..bd3a5fc 100644
 --- a/include/hw/i386/apic_internal.h
 +++ b/include/hw/i386/apic_internal.h
 @@ -79,8 +79,9 @@ typedef struct APICCommonState APICCommonState;
   typedef struct APICCommonClass
   {
   ICCDeviceClass parent_class;
 -
 -void (*init)(APICCommonState *s);
 +
 +/* QOM realize */
 +DeviceRealize realize;
 as above.

 Thanks,
 Chen
   void (*set_base)(APICCommonState *s, uint64_t val);
   void (*set_tpr)(APICCommonState *s, uint8_t val);
   uint8_t (*get_tpr)(APICCommonState *s);

 Thanks for your review!!
 Hi, Chen Fan:
 
 In my understanding, If we use only one 'realize'(which in
 'DeviceClass'), I think all the initialization work must be done in the
 leaf child.  if we add 'redundant' realize to each parent, then we can
 call the initialization chain from DeviceClass down to leaf child's
 parent, with each parent complete a bit further(of cause, a parent can
 do nothing and pass to it's child directly).
 
 What do you think?

Your analysis is correct. v2 is like I requested you to do it and v3
still does iirc, so let's stick with that for consistency. Bad word in
this context, I know. ;)

If you have some time, it would be nice if you could check whether these
devices (the non-KVM versions at least) are covered by make check. For
ICC bus I am certain that it is. In particular I'm wondering if we need
certain -cpu arguments to enable the *APIC devices and have the realize
functions actually exercised? (My assumption is no, but a confirmation
would save time.)

Regards,
Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg

Re: [Qemu-devel] [PATCH for-1.7] target-i386: Fix build by providing stub kvm_arch_get_supported_cpuid()

2013-11-12 Thread Paolo Bonzini

Il 12/11/2013 15:09, Gleb Natapov ha scritto:
 On Tue, Nov 12, 2013 at 02:57:49PM +0100, Paolo Bonzini wrote:
 Il 12/11/2013 14:23, Gleb Natapov ha scritto:
 If -O0 does not do that, let's move debug builds to -O1.

 Why not enable dce with -fdce?

 First, because clang doesn't have fine-tuned optimization options (at
 least I couldn't find them and -fdce doesn't work).

 -O1 then for clang.

We can even test in configure for the exact optimizations we want, in
fact.  But I think -O1 doesn't sacrifice debuggability that much:

   http://www.redhat.com/magazine/011sep05/features/gcc/

-O0 Barely any transformations are done to the code, just code
generation. At this level, the target code can be debugged with
no loss of information.

-O1 Some transformations that preserve execution ordering.
Debuggability of the generated code is hardly affected. User
variables should not disappear and function inlining is not
done.

-O2 More aggressive transformations that may affect execution
ordering and usually provide faster code. Debuggability may be
somewhat compromised by disappearing user variables and
function bodies.

Not very recent, but things have remained roughly the same and gdb also
has improved.

Hmm...  I just found out that GCC has a shiny new -Og option to
optimize for debuggability and still producing good code.  Using -Og
if it is present, and -O1 otherwise, seems like a good idea to me for
1.8.  For 1.7 it can just be -O1.

 Second, because most optimization options are no-ops at -O0 (try -fdce
 -fdump-tree-all with GCC.

 Strange. Is this by design? We can do -O1 and bunch of -fno- to
 disable most of optimizations -O1 enables, but this is ugly.

Yes, some data structures (I'm not up to date as to which exactly) are
not even built at -O0 to make compilation faster, and they're required
for most optimizations.

Paolo

Re: [Qemu-devel] [ANNOUNCE] Key Signing Party at KVM Forum 2013

2013-11-12 Thread Peter Maydell

On 24 July 2013 13:50, Anthony Liguori anth...@codemonkey.ws wrote:

 I will be hosting a key signing party at this year's KVM Forum.

 http://wiki.qemu.org/KeySigningParty2013

Can somebody provide known-good instructions for how to
sign and return keys? I looked on the web and found four
different possible ways to do this (most notably, there
seems to be a split between just send keys back to
the keyserver and email something to the keyowner),
and as usual gpg's UI is hopelessly opaque and confusing :-(

thanks
-- PMM

[Qemu-devel] [edk2 PATCH 1/1] OvmfPkg/AcpiPlatformDxe: download ACPI tables from QEMU

2013-11-12 Thread Laszlo Ersek

Qemu v1.7.0-rc0 features an ACPI linker/loader interface, available over
fw_cfg, written by Michael Tsirkin.

Qemu composes all ACPI tables on the host side, according to the target
hardware configuration, and makes the tables available to any guest
firmware over fw_cfg.

The feature moves the burden of keeping ACPI tables up-to-date from boot
firmware to qemu (which is the source of hardware configuration anyway).

This patch adds client code for this feature.

Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Laszlo Ersek ler...@redhat.com
---
 OvmfPkg/AcpiPlatformDxe/AcpiPlatform.h |   7 +-
 OvmfPkg/AcpiPlatformDxe/AcpiPlatform.c |  12 +-
 OvmfPkg/AcpiPlatformDxe/Qemu.c | 204 +
 3 files changed, 215 insertions(+), 8 deletions(-)

diff --git a/OvmfPkg/AcpiPlatformDxe/AcpiPlatform.h 
b/OvmfPkg/AcpiPlatformDxe/AcpiPlatform.h
index 21107cd..c643fa1 100644
--- a/OvmfPkg/AcpiPlatformDxe/AcpiPlatform.h
+++ b/OvmfPkg/AcpiPlatformDxe/AcpiPlatform.h
@@ -10,7 +10,7 @@
   THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN AS IS BASIS,
   WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
 
-**/ 
+**/
 
 #ifndef _ACPI_PLATFORM_H_INCLUDED_
 #define _ACPI_PLATFORM_H_INCLUDED_
@@ -61,5 +61,10 @@ InstallXenTables (
   IN   EFI_ACPI_TABLE_PROTOCOL   *AcpiProtocol
   );
 
+EFI_STATUS
+EFIAPI
+InstallQemuLinkedTables (
+  IN   EFI_ACPI_TABLE_PROTOCOL   *AcpiProtocol
+  );
 #endif
 
diff --git a/OvmfPkg/AcpiPlatformDxe/AcpiPlatform.c 
b/OvmfPkg/AcpiPlatformDxe/AcpiPlatform.c
index 6e0b610..084c393 100644
--- a/OvmfPkg/AcpiPlatformDxe/AcpiPlatform.c
+++ b/OvmfPkg/AcpiPlatformDxe/AcpiPlatform.c
@@ -256,16 +256,14 @@ AcpiPlatformEntryPoint (
 
   if (XenDetected ()) {
 Status = InstallXenTables (AcpiTable);
-if (EFI_ERROR (Status)) {
-  Status = FindAcpiTablesInFv (AcpiTable);
-}
   } else {
+Status = InstallQemuLinkedTables (AcpiTable);
+  }
+
+  if (EFI_ERROR (Status)) {
 Status = FindAcpiTablesInFv (AcpiTable);
   }
-  if (EFI_ERROR (Status)) {
-return Status;
-  }
 
-  return EFI_SUCCESS;
+  return Status;
 }
 
diff --git a/OvmfPkg/AcpiPlatformDxe/Qemu.c b/OvmfPkg/AcpiPlatformDxe/Qemu.c
index 06bd463..c572f8a 100644
--- a/OvmfPkg/AcpiPlatformDxe/Qemu.c
+++ b/OvmfPkg/AcpiPlatformDxe/Qemu.c
@@ -515,3 +515,207 @@ QemuInstallAcpiTable (
);
 }
 
+
+/**
+  Download the ACPI table data file from QEMU and interpret it.
+
+  Starting with v1.7.0-rc0, QEMU provides the following three files for 1.7+
+  machine types:
+  - etc/acpi/rsdp
+  - etc/acpi/tables
+  - etc/table-loader
+
+  etc/acpi/rsdp and etc/acpi/tables are similar, they are only kept
+  separate because they have different allocation requirements in SeaBIOS.
+
+  Both of these fw_cfg files contain preformatted ACPI payload. etc/acpi/rsdp
+  contains only the RSDP table, while etc/acpi/tables contains all other
+  tables, concatenated.
+
+  The tables in these two files have been filled in by qemu, but two kinds of
+  fields are incomplete in each table: pointers to other tables, and checksums
+  (which depend on the pointers). The pointers are pre-initialized with
+  *relative* offsets, but their final values depend on where the actual files
+  will be placed in memory by the guest. That is, the pointer fields need to be
+  relocated (incremented) by the base addresses of where /etc/acpi/rsdp and
+  /etc/acpi/tables will be placed in guest memory.
+
+  This is where the third file, /etc/table-loader comes into the picture. It
+  is a linker/loader script that has several command types:
+
+One command type instructs the guest to download the other two files.
+
+Another command type instructs the guest to increment (absolutize) a
+pointer field (having a relative initial value) in some file, with the
+dynamic base address of the same (or another) file.
+
+The third command type instructs the guest to compute checksums over
+ranges and to store them.
+
+  In edk2, EFI_ACPI_TABLE_PROTOCOL knows about table relationships -- it
+  handles linkage automatically when a table is installed. The protocol takes
+  care of checksumming too. RSDP is installed automatically. Hence we only need
+  to care about the etc/acpi/tables file, determining the boundaries of each
+  table and installing it.
+
+  @param[in] AcpiProtocol  The ACPI table protocol used to install tables.
+
+  @retval  EFI_UNSUPPORTED   Firmware configuration is unavailable.
+
+  @retval  EFI_NOT_FOUND The host doesn't export the etc/acpi/tables
+ fw_cfg file.
+
+  @retval  EFI_OUT_OF_RESOURCES  Memory allocation failed.
+
+  @returnStatus codes returned by
+ AcpiProtocol-InstallAcpiTable().
+
+**/
+
+//
+// We'll be saving the keys of installed tables so that we can roll them back
+// in case of failure. 128 tables should be enough for anyone (TM).
+//

[Qemu-devel] [edk2 PATCH 0/1] OvmfPkg: grab ACPI tables from QEMU

2013-11-12 Thread Laszlo Ersek

I'll put only some testing and usage notes in the blurb. The commit
message for the patch and the comment block in the code go into some
implementation detail.

First, this patch has no guest-visible consequences when upgrading from
qemu v1.6 or earlier to v1.7 or later, as long as the qemu machine type
(-M option) is kept intact. Qemu only makes the underlying feature
available for v1.7+ machine types available.

(The machine type is saved in the libvirt XML, so libvirt users are
automatically protected. Direct qemu users need to specify a machine
type with an exact version number.)

Second, I tested the patch under the following circumstances:
- 3.10-based host kernel,
- qemu v1.7.0-rc0, with additional patches that shrink the pci-hole
  memory range to just below system.flash (see the parallel discussion
  on qemu-devel),
- in the guest, OVMF from SVN r14831, with the following additional
  patches pending on edk2-devel:
  - [edk2] [PATCH v2 00/10] OVMF support for QEMU's PC System Flash
  - [edk2] [PATCH v3 0/2] OvmfPkg: don't restore NvVars from disk if we
have working flash
  - (this patch)

Guest OSes:
(1) RHEL-7,
(2) Windows Server 2008 R2 SP1 (using additional CSM patches for OVMF),
(3) Windows Server 2012 R2

Details:

(1) RHEL-7

No noticeable changes in behavior.

I dumped and decompiled all ACPI tables before and after the patch. I
also saved the dmesgs. I can make them available at request.

I compared these. I noticed the following differences:

- OEM strings in tables (of course).

- Other small, apparently insignificant changes (for example, in the
  MADT the OVMF built-in tables set the ID of the IO-APIC to
  num_vcpus+1, while qemu exports it as a fixed 0 value, same as SeaBIOS
  used to prepare it). Similarly C[23] latencies in FADT (=FACP). OVMF
  also used to export a bunch of fixed fluff in FADT, which is now gone
  if we take the tables from qemu.

- Important stuff like PCI interrupt routing (_PRT, LNK[A-DS]) seems
  identical, according to the guest kernel dmesg.

- One small regression where OVMF used to provide RESET_REG in FADT
  (=FACP), but qemu currently doesn't say so. Should be fixed in qemu,
  probably.

- Giant leaps forward in the DSDT/SSDT area, especially wrt. PCI
  hotplug. (I didn't actually test PCI hotplug, that's for another
  time.)

- The S3/S4 packages are functionally identical. (Of course OVMF doesn't
  support S3 yet, that's a whole different story.)

- Qemu by default exports a HPET table. The base address described
  therein (0xFED0) matches the fixed values we have in
  OvmfPkg/PlatformPei/Platform.c and
  PcAtChipsetPkg/PcAtChipsetPkg.dec. I'll return to the HPET later.

- Boot progress bar and console (efifb) continue to work (tested with
  both 1GB and 5GB guest sizes).

(2) Windows Server 2008 R2 SP1 (using additional CSM patches for OVMF)

No changes that would have been visible to me as an interactive user.
Shutdown with the ACPI power button works.

(3) Windows Server 2012 R2

Installed a brand new VM for this guest.

It worked OK but at several points that all looked like reboot attempts
(even after installation, with the live OS) the VM was just waiting and
waiting for something to happen. I looked into the KVM trace as Gleb
had taught me, and I noticed accesses to the HPET. Which made me recall
(and google, and find) recommendations to disable the HPET in general
for virtual machines.

(Again, the OVMF built-in tables don't mention HPET at all, but qemu
does export one if the -no-hpet switch is absent.)

So, I added the corresponding stanza to my libvirt XML. It helped some,
but the problem didn't go away completely.

At this point the KVM trace overwhelmed me, and I just flipped the
machine type back to pc-i440fx-1.6. (I verified in the OVMF debug log
that this had the desired effect -- no tables from qemu.)

The hangs were gone. So I flipped the machine type to pc-i440fx-1.7
again, just to triple-check -- and I couldn't reproduce the hangs any
longer!

So, my testing in this part is inconclusive. The problem may or may not
be related to the ACPI tables; it might or might not be related to
recent changes in qemu itself, etc. Someone with more time and knowledge
than I have should look into the KVM trace.

The ACPI power button initiates the shutdown sequence.

Bonus testing: verified secure boot in this guest with the Microsoft
keys. Thanks to Jordan's series OVMF support for QEMU's PC System
Flash mentioned above, key enrollment is permanent now.

Thanks,
Laszlo

Laszlo Ersek (1):
  OvmfPkg/AcpiPlatformDxe: download ACPI tables from QEMU

 OvmfPkg/AcpiPlatformDxe/AcpiPlatform.h |   7 +-
 OvmfPkg/AcpiPlatformDxe/AcpiPlatform.c |  12 +-
 OvmfPkg/AcpiPlatformDxe/Qemu.c | 204 +
 3 files changed, 215 insertions(+), 8 deletions(-)

-- 
1.8.3.1

Re: [Qemu-devel] [PATCH for-1.7] target-i386: Fix build by providing stub kvm_arch_get_supported_cpuid()

2013-11-12 Thread Peter Maydell

On 12 November 2013 14:57, Paolo Bonzini pbonz...@redhat.com wrote:
 Il 12/11/2013 15:09, Gleb Natapov ha scritto:
 On Tue, Nov 12, 2013 at 02:57:49PM +0100, Paolo Bonzini wrote:
 Il 12/11/2013 14:23, Gleb Natapov ha scritto:
 If -O0 does not do that, let's move debug builds to -O1.

 Why not enable dce with -fdce?

 First, because clang doesn't have fine-tuned optimization options (at
 least I couldn't find them and -fdce doesn't work).

 -O1 then for clang.

 We can even test in configure for the exact optimizations we want, in
 fact.  But I think -O1 doesn't sacrifice debuggability that much:

I'm afraid I still don't see why you'd want to sacrifice it
at all, when the alternative is provide a three line stub
function in a file we already have all the build machinery
to compile in the config where it's needed. I just don't
see why you'd worry about the fact that there's no longer
a compile error if you try to call this obviously kvm
specific function in a non-kvm-enabled code path, when
we already have large numbers of kvm-specific functions
that have stubs (and when in general, eg QOM APIs, we seem
to be entirely happy to have things be runtime errors rather
than compile time). To me the benefit seems entirely on the
side of have standard compliant code rather than attempt
to find various odd workarounds for the fact that some
compilers will correctly complain about our code.

-- PMM

Re: [Qemu-devel] [ANNOUNCE] Key Signing Party at KVM Forum 2013

2013-11-12 Thread Gabriel L. Somlo

Peter,

On Tue, Nov 12, 2013 at 02:57:36PM +, Peter Maydell wrote:
 Can somebody provide known-good instructions for how to
 sign and return keys? I looked on the web and found four
 different possible ways to do this (most notably, there
 seems to be a split between just send keys back to
 the keyserver and email something to the keyowner),
 and as usual gpg's UI is hopelessly opaque and confusing :-(

I've pasted my key-signing bash script below. At the (few) key signing
parties I've been to, the idea was upload to keyserver as a personal
favor to those you already know and like, email signatures encrypted
with the recipient's key to those you've only just met at the party.

Assuming a text file with one key signature per line, the bits that
are commented out were used to import keys and display fingerprints
for comparison with the stuff we had printed on paper and verified at
the party. The uncommented bits will do the signature export,
encryption with the recipient's key, and emailing.

HTH,
--Gabriel

#!/bin/bash

for F in $(cat fingerprints.txt); do
  # receive keys matching ID $F:
  #gpg --recv-keys $F
  # list fingerprint for key matching ID $F:
  #gpg --fingerprint $F
  # sign key matching ID $F:
  #gpg --sign-key $F
  # send signature to recipient matching first uid, encrypted with recipient key
  E=$(gpg --list-key $F | grep ^uid | head -1 | sed 's/.*\(.*\).*/\1/')
  gpg --armor --export $F | gpg --armor --encrypt -r $F | \
mailx -r gso...@gmail.com -s the signature you requested (by $F) $E
  echo sent signature $F $E
done

Re: [Qemu-devel] [PATCH for-1.7] target-i386: Fix build by providing stub kvm_arch_get_supported_cpuid()

2013-11-12 Thread Paolo Bonzini

Il 12/11/2013 16:13, Peter Maydell ha scritto:
 
  -O1 then for clang.
 
  We can even test in configure for the exact optimizations we want, in
  fact.  But I think -O1 doesn't sacrifice debuggability that much:
 I'm afraid I still don't see why you'd want to sacrifice it
 at all,

Is this FUD or do you have examples of bad debuggability of -O1 code?

Personally, I've not even used -O0 for several years.  -O2 debuggability
is still awful but has improved a lot.  If it's not enough, 99% of the
time it means that tracing or printf are a better tool for the bug.

 when the alternative is provide a three line stub
 function in a file we already have all the build machinery
 to compile in the config where it's needed. I just don't
 see why you'd worry about the fact that there's no longer
 a compile error if you try to call this obviously kvm
 specific function in a non-kvm-enabled code path, when
 we already have large numbers of kvm-specific functions
 that have stubs

Most of these stubs do _not_ abort(), they return a sensible error code
or should be no-ops in the non-KVM case.

 (and when in general, eg QOM APIs, we seem
 to be entirely happy to have things be runtime errors rather
 than compile time).

We're far from having consensus on that, indeed.

Paolo

Re: [Qemu-devel] [PATCH for-1.7] target-i386: Fix build by providing stub kvm_arch_get_supported_cpuid()

2013-11-12 Thread Peter Maydell

On 12 November 2013 15:21, Paolo Bonzini pbonz...@redhat.com wrote:
 Il 12/11/2013 16:13, Peter Maydell ha scritto:
 
  -O1 then for clang.
 
  We can even test in configure for the exact optimizations we want, in
  fact.  But I think -O1 doesn't sacrifice debuggability that much:
 I'm afraid I still don't see why you'd want to sacrifice it
 at all,

 Is this FUD or do you have examples of bad debuggability of -O1 code?

The clang manpage says specifically Note that Clang debug
information works best at -O0. , and I see no reason to
disbelieve it. In particular, they don't say we definitely
will never add an optimization to -O1 that makes the debug
info much worse.

-- PMM

Re: [Qemu-devel] [PATCH v3 1/2] linux-user: create target_structsheader to place ipc_perm and shmid_dss

2013-11-12 Thread Petar Jovanovic

ping
http://patchwork.ozlabs.org/patch/287211/
http://patchwork.ozlabs.org/patch/287213/

Riku?

Regards,
Petar

Re: [Qemu-devel] [ANNOUNCE] Key Signing Party at KVM Forum 2013

2013-11-12 Thread Eric Blake

On 11/12/2013 08:18 AM, Gabriel L. Somlo wrote:
 Peter,
 
 On Tue, Nov 12, 2013 at 02:57:36PM +, Peter Maydell wrote:
 Can somebody provide known-good instructions for how to
 sign and return keys? I looked on the web and found four
 different possible ways to do this (most notably, there
 seems to be a split between just send keys back to
 the keyserver and email something to the keyowner),
 and as usual gpg's UI is hopelessly opaque and confusing :-(
 
 I've pasted my key-signing bash script below. At the (few) key signing
 parties I've been to, the idea was upload to keyserver as a personal
 favor to those you already know and like, email signatures encrypted
 with the recipient's key to those you've only just met at the party.
 
 Assuming a text file with one key signature per line, the bits that
 are commented out were used to import keys and display fingerprints
 for comparison with the stuff we had printed on paper and verified at
 the party. The uncommented bits will do the signature export,
 encryption with the recipient's key, and emailing.

Similarly, here's some advice I've used after previous key-signing
parties; I personally like how 'pius' automates the sending of
signatures to other recipients.

On 10/19/2011 09:56 AM, Jim Meyering wrote:
 You may want to know which of our colleagues have found time
 to handle their side of the key-signing deal.

 There are two interesting sets:
  - who has signed your key (either they uploaded it themselves,
  or they sent it to you and you processed it: import and upload)
  - who has uploaded your signature of their key (assuming you signed
  and mailed it to them)

 We want the complement of each set to be empty.
 I.e., each participant should do both things.
 Run the following script to list those who have not yet found the time.

 If you get stuck, reply here or ping me on IRC and I'll try to help.
 As a reminder, the recommended signing procedure was described here,
 in the Signing GPG keys section:

[replacing private URL with its contents:]

 I have a slight preference for pius over caff:
 http://www.phildev.net/pius/
 so I use it in the example below: (download sources)
 http://sourceforge.net/projects/pgpius/files/pius/2.0.9/

 Once Markus and I verified fingerprints, I did the following:

 # Download Markus' public key.
 gpg --recv EB918653

 # Create and email per-ID-signatures to each of his email addresses:
 # I specified a well-configured MTA, so that pius didn't try to send
 # directly from my desktop.  It asks for a level; I choose 3.[*]
 ./pius --mail-host=GOOD_MTA --encrypt --no-pgp-mime \
   --mail=j...@meyering.net --signer=7FD9FCCB000B EB918653
 #     
 #  my email  my key   Markus' key

 To try it first, sending mail only to myself, I could do this,
 adding the --debug and --override-email=... options on the 2nd line:

 ./pius --mail-host=GOOD_MTA --encrypt --no-pgp-mime \
 --debug --override-email=j...@meyering.net \
   --mail=j...@meyering.net --signer=7FD9FCCB000B EB918653

 The former sent two messages to Markus, who has to follow the instructions
 included in each message: decrypt the attached signature, use gpg to
 import it, and then send his just-modified (new signature) key
 out to the key servers.  It sent two messages because Markus has two
 IDs (name/email pairs) on his key, and I opted to sign both of them:

 $ gpg --fingerprint EB918653
 pub   4096R/EB918653 2011-10-07
 Key fingerprint = 354B C8B3 D7EB 2A6B 6867  4E5F 3870 B400 EB91 8653
 uid  Markus Armbruster arm...@redhat.com
 uid  Markus Armbruster arm...@pond.sub.org
 sub   4096R/26B7449C 2011-10-07

 So once Markus receives those two messages and does the
decrypt/import/send
 dance, only *then* do my signatures of his key appear on the public key
 servers.  Since they were encrypted and sent individually, they can appear
 in public only if Markus really does control both of those addresses at
 the time of signing.  IMHO, it's better to sign all IDs, as long as they
 look reasonable.

 Jim

 [*] pius asks Have you verified this user/key, and if so, what level do
 you want to sign at? (0/1/2/3/N/q) [default: N].  IMHO, it doesn't
 matter if you use 2 or 3.  Some tools don't even ask.

[resuming first email]


 -
 Save the script below as cross-sign and make it executable.
 Then you can run it with a single argument, your gpg key ID,
 to see the gaps in the WoT, just considering the participants
 in the recent kvm/virt-devel key signing:

 ./cross-sign YOUR_GPG_KEY_ID

 To see how things look using your own key-ring, run it like this:

 env use_temp_keyring=n ./cross-sign YOUR_GPG_KEY_ID

 The only reason it'd look different with your key-ring is if you had
 signed locally and forgotten to run gpg --send-key ID for

Re: [Qemu-devel] [ANNOUNCE] Key Signing Party at KVM Forum 2013

2013-11-12 Thread Peter Maydell

On 12 November 2013 15:42, Eric Blake ebl...@redhat.com wrote:
 I personally like how 'pius' automates the sending of
 signatures to other recipients.

I had a look at 'pius' since some of the signed-key
emails I've received used it; however I couldn't find
any way to make it write the emails to a file for
sending elsewhere (my machine with the gpg key has
no external SMTP access). Similarly, 'caff' claims
to support that but doesn't actually seem to in
practice.

-- PMM

Re: [Qemu-devel] [PATCH] e1000/rtl8139: update HMP NIC when every bit is written

2013-11-12 Thread Vlad Yasevich


On 11/10/2013 07:11 AM, Michael S. Tsirkin wrote:

On Fri, Nov 08, 2013 at 02:42:27PM -0500, Vlad Yasevich wrote:

What about this approach?  This only updates the monitory when all the
bits have been written to.

-vlad



Thanks!
Some comments below.


-- 8 --
Subject: [PATCH] e1000/rtl8139: update HMP NIC when every bit is written

We currently just update the HMP NIC info when the last bit of macaddr
is written. This assumes that guest driver will write all the macaddr
from bit 0 to bit 5 when it changes the macaddr, this is the current
behavior of linux driver (e1000/rtl8139cp), but we can't do this
assumption.


I would rather say it seems better not to make this assumption.
This does look somewhat safer than what Amos proposed.



The macaddr that is used for rx-filter will be updated when every bit
is changed. This patch updates the e1000/rtl8139 nic to update HMP NIC
info when every bit has been changed. It will be same as virtio-net.

Signed-off-by: Vlad Yasevich vyase...@redhat.com
---
  hw/net/e1000.c   | 17 -
  hw/net/rtl8139.c | 14 +-
  2 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index 8387443..a5967ed 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -149,6 +149,10 @@ typedef struct E1000State_st {
  #define E1000_FLAG_AUTONEG (1  E1000_FLAG_AUTONEG_BIT)
  #define E1000_FLAG_MIT (1  E1000_FLAG_MIT_BIT)
  uint32_t compat_flags;
+uint32_t mac_changed;


Hmm why uint32_t? uint8_t is enough here isn't?


Yes, that should be fine.  I'll fix and find a better spot to
put it. (may be a hole in the struct).



This new state has to be migrated then, and
we need to fallback to old behaviour if migrating to/from
an old version (see compat_flags for one way to
detect this compatibility mode).


Good point.  Didn't think of that...




+#define E1000_RA0_CHANGED 0
+#define E1000_RA1_CHANGED 1
+#define E1000_RA_ALL_CHANGED (E1000_RA0_CHANGED|E1000_RA1_CHANGED)


I don't get it. So E1000_RA_ALL_CHANGED is 0 | 1 == 1.
it looks like the trigger is when E1000_RA1_CHANGED
so that's more or less equivalent.


Goofed on the bits.  That should have been (10) and (11).




  } E1000State;

  #define TYPE_E1000 e1000
@@ -402,6 +406,7 @@ static void e1000_reset(void *opaque)
  d-mac_reg[RA + 1] |= (i  2) ? macaddr[i + 4]  (8 * i) : 0;
  }
  qemu_format_nic_info_str(qemu_get_queue(d-nic), macaddr);
+d-mac_changed = 0;
  }

  static void
@@ -1106,10 +,20 @@ mac_writereg(E1000State *s, int index, uint32_t val)

  s-mac_reg[index] = val;

-if (index == RA + 1) {
+switch (index) {
+case RA:
+s-mac_changed |= E1000_RA0_CHANGED;
+break;
+case (RA + 1):
+s-mac_changed |= E1000_RA1_CHANGED;
+break;
+}
+
+if (s-mac_changed ==  E1000_RA_ALL_CHANGED) {


Some whitespace damage here.


  macaddr[0] = cpu_to_le32(s-mac_reg[RA]);
  macaddr[1] = cpu_to_le32(s-mac_reg[RA + 1]);
  qemu_format_nic_info_str(qemu_get_queue(s-nic), (uint8_t *)macaddr);
+   s-mac_changed = 0;


Need to use spaces for indent in qemu.


  }
  }

diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c


Best to split out in a separate commit.


OK




index 5329f44..6dac10c 100644
--- a/hw/net/rtl8139.c
+++ b/hw/net/rtl8139.c
@@ -476,6 +476,8 @@ typedef struct RTL8139State {

  uint16_t CpCmd;
  uint8_t  TxThresh;
+uint8_t  mac_changed;


This new state has to be migrated then, and
we need to fallback to old behaviour if migrating to/from
an old version.


+#define RTL8139_MAC_CHANGED_ALL 0x3F

  NICState *nic;
  NICConf conf;
@@ -1215,6 +1217,7 @@ static void rtl8139_reset(DeviceState *d)
  /* restore MAC address */
  memcpy(s-phys, s-conf.macaddr.a, 6);
  qemu_format_nic_info_str(qemu_get_queue(s-nic), s-phys);
+s-mac_changed = 0;

  /* reset interrupt mask */
  s-IntrStatus = 0;
@@ -2741,12 +2744,13 @@ static void rtl8139_io_writeb(void *opaque, uint8_t 
addr, uint32_t val)

  switch (addr)
  {
-case MAC0 ... MAC0+4:
-s-phys[addr - MAC0] = val;
-break;
-case MAC0+5:
+case MAC0 ... MAC0+5:
  s-phys[addr - MAC0] = val;
-qemu_format_nic_info_str(qemu_get_queue(s-nic), s-phys);
+s-mac_changed |= (1  (addr - MAC0));


Better drop the external () here otherwise it starts looking like lisp :)


Heh.  OK.

Thanks
-vlad




+if (s-mac_changed == RTL8139_MAC_CHANGED_ALL) {
+qemu_format_nic_info_str(qemu_get_queue(s-nic), s-phys);
+s-mac_changed = 0;
+}
  break;
  case MAC0+6 ... MAC0+7:
  /* reserved */
--
1.8.4.2

[Qemu-devel] [PATCH 00/11] block scsi: write_zeroes support through the whole stack

2013-11-12 Thread Paolo Bonzini

This series fixes the implementation of WRITE SAME in the SCSI emulation
layer.  On top of this, it ensures that zero writes from the guest can
be offloaded to the host device or filesystem if that's supported.
This is a relatively important part of the thin provisioning feature,
and builds on the unmap flag to write_zeroes, recently added by Peter
Lieven.  The feature works with iscsi, block device and filesystem
targets.

The block layer patches are 1-3 (adding prerequisites for using the
feature in scsi-disk) and 7-11 (which support the feature on non-iscsi
targets).

Paolo Bonzini (11):
  block: generalize BlockLimits handling to cover bdrv_aio_discard too
  block: add flags to BlockRequest
  block: add bdrv_aio_write_zeroes
  scsi-disk: catch write protection errors in UNMAP
  scsi-disk: reject ANCHOR=1 for UNMAP and WRITE SAME commands
  scsi-disk: correctly implement WRITE SAME
  block: handle ENOTSUP from discard in generic code
  raw-posix: implement write_zeroes with MAY_UNMAP for files
  raw-posix: implement write_zeroes with MAY_UNMAP for block devices
  raw-posix: add support for write_zeroes on XFS and block devices
  qemu-iotests: 033 is fast

 block.c  | 108 +
 block/raw-aio.h  |   3 +-
 block/raw-posix.c| 173 +--
 hw/scsi/scsi-disk.c  | 154 ++---
 include/block/block.h|   4 ++
 tests/qemu-iotests/group |   2 +-
 trace-events |   2 +
 7 files changed, 369 insertions(+), 77 deletions(-)

-- 
1.8.4.2

[Qemu-devel] [PATCH 06/11] scsi-disk: correctly implement WRITE SAME

2013-11-12 Thread Paolo Bonzini

The WRITE SAME command is implemented incorrectly.  WRITE SAME with the
UNMAP bit set should _not_ unmap the sectors unless the written data
matches the payload of the WRITE SAME command; currently, QEMU is not
looking at the payload at all.

Thus, fetch the data to be written from the input buffer.  If it is
all zeroes, we can use the write_zeroes call (possibly with the new
MAY_UNMAP flag).  Otherwise, do as many write cycles as needed, covering
512k at a time to avoid allocating lots of memory for the bounce
buffer.

Strictly speaking, this is still incorrect because a zero cluster should
only be written if the MAY_UNMAP flag is set.  But this is a bug in the
block layer, not here.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 hw/scsi/scsi-disk.c | 140 +++-
 1 file changed, 116 insertions(+), 24 deletions(-)

diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 7e29760..cd5116c 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -41,6 +41,7 @@ do { printf(scsi-disk:  fmt , ## __VA_ARGS__); } while (0)
 #include scsi/sg.h
 #endif
 
+#define SCSI_WRITE_SAME_MAX 524288
 #define SCSI_DMA_BUF_SIZE   131072
 #define SCSI_MAX_INQUIRY_LEN256
 #define SCSI_MAX_MODE_LEN   256
@@ -634,6 +635,8 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, 
uint8_t *outbuf)
 buflen = 0x40;
 memset(outbuf + 4, 0, buflen - 4);
 
+outbuf[4] = 0x1; /* wsnz */
+
 /* optimal transfer length granularity */
 outbuf[6] = (min_io_size  8)  0xff;
 outbuf[7] = min_io_size  0xff;
@@ -1589,6 +1592,111 @@ invalid_field:
 scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
 }
 
+typedef struct WriteSameCBData {
+SCSIDiskReq *r;
+int64_t sector;
+int nb_sectors;
+QEMUIOVector qiov;
+struct iovec iov;
+} WriteSameCBData;
+
+static void scsi_write_same_complete(void *opaque, int ret)
+{
+WriteSameCBData *data = opaque;
+SCSIDiskReq *r = data-r;
+SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r-req.dev);
+
+assert(r-req.aiocb != NULL);
+r-req.aiocb = NULL;
+bdrv_acct_done(s-qdev.conf.bs, r-acct);
+if (r-req.io_canceled) {
+goto done;
+}
+
+if (ret  0) {
+if (scsi_handle_rw_error(r, -ret)) {
+goto done;
+}
+}
+
+data-nb_sectors -= data-iov.iov_len / 512;
+data-sector += data-iov.iov_len / 512;
+data-iov.iov_len = MIN(data-nb_sectors * 512, data-iov.iov_len);
+if (data-iov.iov_len) {
+bdrv_acct_start(s-qdev.conf.bs, r-acct, data-iov.iov_len, 
BDRV_ACCT_WRITE);
+r-req.aiocb = bdrv_aio_writev(s-qdev.conf.bs, data-sector,
+   data-qiov, data-iov.iov_len / 512,
+   scsi_write_same_complete, r);
+return;
+}
+
+scsi_req_complete(r-req, GOOD);
+
+done:
+if (!r-req.io_canceled) {
+scsi_req_unref(r-req);
+}
+g_free (data-iov.iov_base);
+g_free (data);
+}
+
+static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf)
+{
+SCSIRequest *req = r-req;
+SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req-dev);
+uint32_t nb_sectors = scsi_data_cdb_length(r-req.cmd.buf);
+WriteSameCBData *data;
+uint8_t *buf;
+int i;
+
+/* Fail if PBDATA=1 or LBDATA=1 or ANCHOR=1.  */
+if (nb_sectors == 0 || (req-cmd.buf[1]  0x16)) {
+scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
+return;
+}
+
+if (bdrv_is_read_only(s-qdev.conf.bs)) {
+scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
+return;
+}
+if (!check_lba_range(s, r-req.cmd.lba, nb_sectors)) {
+scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
+return;
+}
+
+if (buffer_is_zero(inbuf, s-qdev.blocksize)) {
+int flags = (req-cmd.buf[1]  0x8) ? BDRV_REQ_MAY_UNMAP : 0;
+
+/* The request is used as the AIO opaque value, so add a ref.  */
+scsi_req_ref(r-req);
+bdrv_acct_start(s-qdev.conf.bs, r-acct, nb_sectors * 
s-qdev.blocksize,
+BDRV_ACCT_WRITE);
+r-req.aiocb = bdrv_aio_write_zeroes(s-qdev.conf.bs,
+ r-req.cmd.lba * 
(s-qdev.blocksize / 512),
+ nb_sectors * (s-qdev.blocksize / 
512),
+ flags, scsi_aio_complete, r);
+return;
+}
+
+data = g_new0(WriteSameCBData, 1);
+data-r = r;
+data-sector = r-req.cmd.lba * (s-qdev.blocksize / 512);
+data-nb_sectors = nb_sectors * (s-qdev.blocksize / 512);
+data-iov.iov_len = MIN(data-nb_sectors * 512, SCSI_WRITE_SAME_MAX);
+data-iov.iov_base = buf = g_malloc(data-iov.iov_len);
+qemu_iovec_init_external(data-qiov, data-iov, 1);
+
+for (i = 0; i  data-iov.iov_len; i += s-qdev.blocksize) {
+

[Qemu-devel] [PATCH 04/11] scsi-disk: catch write protection errors in UNMAP

2013-11-12 Thread Paolo Bonzini

This is the same that is already done for WRITE SAME.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 hw/scsi/scsi-disk.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 74e6a14..4138268 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -1543,6 +1543,7 @@ done:
 
 static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf)
 {
+SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r-req.dev);
 uint8_t *p = inbuf;
 int len = r-req.cmd.xfer;
 UnmapCBData *data;
@@ -1560,6 +1561,11 @@ static void scsi_disk_emulate_unmap(SCSIDiskReq *r, 
uint8_t *inbuf)
 goto invalid_param_len;
 }
 
+if (bdrv_is_read_only(s-qdev.conf.bs)) {
+scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
+return;
+}
+
 data = g_new0(UnmapCBData, 1);
 data-r = r;
 data-inbuf = p[8];
-- 
1.8.4.2

[Qemu-devel] [PATCH 01/11] block: generalize BlockLimits handling to cover bdrv_aio_discard too

2013-11-12 Thread Paolo Bonzini

bdrv_co_discard is only covering drivers which have a .bdrv_co_discard()
implementation, but not those with .bdrv_aio_discard(). Not very nice,
and easy to avoid.

Suggested-by: Kevin Wolf kw...@redhat.com
Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 block.c | 80 +
 1 file changed, 41 insertions(+), 39 deletions(-)

diff --git a/block.c b/block.c
index aba6a19..ba6872e 100644
--- a/block.c
+++ b/block.c
@@ -4281,6 +4281,8 @@ static void coroutine_fn bdrv_discard_co_entry(void 
*opaque)
 int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
  int nb_sectors)
 {
+int max_discard;
+
 if (!bs-drv) {
 return -ENOMEDIUM;
 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
@@ -4298,55 +4300,55 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, 
int64_t sector_num,
 return 0;
 }
 
-if (bs-drv-bdrv_co_discard) {
-int max_discard = bs-bl.max_discard ?
-  bs-bl.max_discard : MAX_DISCARD_DEFAULT;
+if (!bs-drv-bdrv_co_discard  !bs-drv-bdrv_aio_discard) {
+return 0;
+}
 
-while (nb_sectors  0) {
-int ret;
-int num = nb_sectors;
+max_discard = bs-bl.max_discard ?  bs-bl.max_discard : 
MAX_DISCARD_DEFAULT;
+while (nb_sectors  0) {
+int ret;
+int num = nb_sectors;
 
-/* align request */
-if (bs-bl.discard_alignment 
-num = bs-bl.discard_alignment 
-sector_num % bs-bl.discard_alignment) {
-if (num  bs-bl.discard_alignment) {
-num = bs-bl.discard_alignment;
-}
-num -= sector_num % bs-bl.discard_alignment;
+/* align request */
+if (bs-bl.discard_alignment 
+num = bs-bl.discard_alignment 
+sector_num % bs-bl.discard_alignment) {
+if (num  bs-bl.discard_alignment) {
+num = bs-bl.discard_alignment;
 }
+num -= sector_num % bs-bl.discard_alignment;
+}
 
-/* limit request size */
-if (num  max_discard) {
-num = max_discard;
-}
+/* limit request size */
+if (num  max_discard) {
+num = max_discard;
+}
 
+if (bs-drv-bdrv_co_discard) {
 ret = bs-drv-bdrv_co_discard(bs, sector_num, num);
-if (ret) {
-return ret;
+} else {
+BlockDriverAIOCB *acb;
+CoroutineIOCompletion co = {
+.coroutine = qemu_coroutine_self(),
+};
+
+acb = bs-drv-bdrv_aio_discard(bs, sector_num, nb_sectors,
+bdrv_co_io_em_complete, co);
+if (acb == NULL) {
+return -EIO;
+} else {
+qemu_coroutine_yield();
+ret = co.ret;
 }
-
-sector_num += num;
-nb_sectors -= num;
 }
-return 0;
-} else if (bs-drv-bdrv_aio_discard) {
-BlockDriverAIOCB *acb;
-CoroutineIOCompletion co = {
-.coroutine = qemu_coroutine_self(),
-};
-
-acb = bs-drv-bdrv_aio_discard(bs, sector_num, nb_sectors,
-bdrv_co_io_em_complete, co);
-if (acb == NULL) {
-return -EIO;
-} else {
-qemu_coroutine_yield();
-return co.ret;
+if (ret) {
+return ret;
 }
-} else {
-return 0;
+
+sector_num += num;
+nb_sectors -= num;
 }
+return 0;
 }
 
 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
-- 
1.8.4.2

[Qemu-devel] [PATCH 10/11] raw-posix: add support for write_zeroes on XFS and block devices

2013-11-12 Thread Paolo Bonzini

The code is similar to the implementation of discard and write_zeroes
with UNMAP.  There is no fallocate flag for this operation (yet?)
so we must write XFS-specific code like xfs_discard.

With an image stored on XFS, doing this in the guest:

# sg_write_same --in /dev/zero --num=256 --lba=0 /dev/sda

will give qemu-img map output similar to the following:

{ start: 0, length: 131072, depth: 0, zero: true, data: true,
  offset: 0}

where the unwritten extents are exposed as a preallocated zero area.

The stale page cache problem can be reproduced as follows:

# modprobe scsi-debug lbpws=1 lbprz=1
# ./qemu-io /dev/sdXX
qemu-io write -P 0xcc 0 2M
qemu-io write -z 0 1M
qemu-io read -P 0x00 0 512
Pattern verification failed at offset 0, 512 bytes
qemu-io read -v 0 512
:  cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc  
...

# ./qemu-io --cache=none /dev/sdXX
qemu-io write -P 0xcc 0 2M
qemu-io write -z 0 1M
qemu-io read -P 0x00 0 512
qemu-io read -v 0 512
:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
...

And similarly with discard instead of write -z.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 block/raw-aio.h   |  3 +-
 block/raw-posix.c | 87 ++-
 2 files changed, 75 insertions(+), 15 deletions(-)

diff --git a/block/raw-aio.h b/block/raw-aio.h
index c61f159..7ad0a8a 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -21,9 +21,10 @@
 #define QEMU_AIO_IOCTL0x0004
 #define QEMU_AIO_FLUSH0x0008
 #define QEMU_AIO_DISCARD  0x0010
+#define QEMU_AIO_WRITE_ZEROES 0x0020
 #define QEMU_AIO_TYPE_MASK \
 (QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH| \
- QEMU_AIO_DISCARD)
+ QEMU_AIO_DISCARD|QEMU_AIO_WRITE_ZEROES)
 
 /* AIO flags */
 #define QEMU_AIO_MISALIGNED   0x1000
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 5cb46f1..6515def 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -142,6 +142,7 @@ typedef struct BDRVRawState {
 bool is_xfs : 1;
 #endif
 bool has_discard : 1;
+bool has_write_zeroes : 1;
 bool discard_zeroes : 1;
 } BDRVRawState;
 
@@ -327,6 +328,7 @@ static int raw_open_common(BlockDriverState *bs, QDict 
*options,
 #endif
 
 s-has_discard = true;
+s-has_write_zeroes = true;
 
 if (fstat(s-fd, st)  0) {
 error_setg_errno(errp, errno, Could not stat file);
@@ -335,20 +337,21 @@ static int raw_open_common(BlockDriverState *bs, QDict 
*options,
 if (S_ISREG(st.st_mode)) {
 s-discard_zeroes = true;
 }
-#ifdef BLKDISCARDZEROES
 if (S_ISBLK(st.st_mode)) {
+#ifdef BLKDISCARDZEROES
 unsigned int arg;
 if (ioctl(s-fd, BLKDISCARDZEROES, arg) == 0  arg) {
 s-discard_zeroes = true;
 }
-}
 #endif
 #ifdef CONFIG_LINUX
/* On Linux 3.10, BLKDISCARD leaves stale data in the page cache.  Do
 * not rely on the contents of discarded blocks unless using O_DIRECT.
+* Same for BLKZEROOUT.
 */
 if (!(bs-open_flags  BDRV_O_NOCACHE)) {
 s-discard_zeroes = false;
+s-has_write_zeroes = false;
 }
 #endif
 }
@@ -704,6 +707,23 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
 }
 
 #ifdef CONFIG_XFS
+static int xfs_write_zeroes(BDRVRawState *s, int64_t offset, uint64_t bytes)
+{
+struct xfs_flock64 fl;
+
+memset(fl, 0, sizeof(fl));
+fl.l_whence = SEEK_SET;
+fl.l_start = offset;
+fl.l_len = bytes;
+
+if (xfsctl(NULL, s-fd, XFS_IOC_ZERO_RANGE, fl)  0) {
+DEBUG_BLOCK_PRINT(cannot write zero range (%s)\n, strerror(errno));
+return -errno;
+}
+
+return 0;
+}
+
 static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
 {
 struct xfs_flock64 fl;
@@ -722,6 +742,42 @@ static int xfs_discard(BDRVRawState *s, int64_t offset, 
uint64_t bytes)
 }
 #endif
 
+static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
+{
+int ret = -EOPNOTSUPP;
+BDRVRawState *s = aiocb-bs-opaque;
+
+if (s-has_write_zeroes == 0) {
+return -ENOTSUP;
+}
+
+if (aiocb-aio_type  QEMU_AIO_BLKDEV) {
+#ifdef BLKZEROOUT
+do {
+uint64_t range[2] = { aiocb-aio_offset, aiocb-aio_nbytes };
+if (ioctl(aiocb-aio_fildes, BLKZEROOUT, range) == 0) {
+return 0;
+}
+} while (errno == EINTR);
+
+ret = -errno;
+#endif
+} else {
+#ifdef CONFIG_XFS
+if (s-is_xfs) {
+return xfs_write_zeroes(s, aiocb-aio_offset, aiocb-aio_nbytes);
+}
+#endif
+}
+
+if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
+ret == -ENOTTY) {
+s-has_write_zeroes = false;
+ret = -ENOTSUP;
+}
+return ret;
+}
+
 static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
 {
 int ret = -EOPNOTSUPP;
@@ -806,6 +862,9 @@

[Qemu-devel] [PATCH 07/11] block: handle ENOTSUP from discard in generic code

2013-11-12 Thread Paolo Bonzini

Similar to write_zeroes, let the generic code receive a ENOTSUP for
discard operations.  Since bdrv_discard has advisory semantics,
we can just swallow the error.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 block.c   |  2 +-
 block/raw-posix.c | 12 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/block.c b/block.c
index f195a64..1e17853 100644
--- a/block.c
+++ b/block.c
@@ -4357,7 +4357,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, 
int64_t sector_num,
 ret = co.ret;
 }
 }
-if (ret) {
+if (ret  ret != -ENOTSUP) {
 return ret;
 }
 
diff --git a/block/raw-posix.c b/block/raw-posix.c
index f6d48bb..27fe47d 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -324,10 +324,10 @@ static int raw_open_common(BlockDriverState *bs, QDict 
*options,
 }
 #endif
 
-s-has_discard = 1;
+s-has_discard = true;
 #ifdef CONFIG_XFS
 if (platform_test_xfs_fd(s-fd)) {
-s-is_xfs = 1;
+s-is_xfs = true;
 }
 #endif
 
@@ -699,8 +699,8 @@ static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
 int ret = -EOPNOTSUPP;
 BDRVRawState *s = aiocb-bs-opaque;
 
-if (s-has_discard == 0) {
-return 0;
+if (!s-has_discard) {
+return -ENOTSUP;
 }
 
 if (aiocb-aio_type  QEMU_AIO_BLKDEV) {
@@ -735,8 +735,8 @@ static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
 
 if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
 ret == -ENOTTY) {
-s-has_discard = 0;
-ret = 0;
+s-has_discard = false;
+ret = -ENOTSUP;
 }
 return ret;
 }
-- 
1.8.4.2

[Qemu-devel] [PATCH 03/11] block: add bdrv_aio_write_zeroes

2013-11-12 Thread Paolo Bonzini

This will be used by the SCSI layer.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 block.c   | 11 +++
 include/block/block.h |  3 +++
 trace-events  |  1 +
 3 files changed, 15 insertions(+)

diff --git a/block.c b/block.c
index 7ea2361..f195a64 100644
--- a/block.c
+++ b/block.c
@@ -3663,6 +3663,17 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, 
int64_t sector_num,
  cb, opaque, true);
 }
 
+BlockDriverAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
+int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
+BlockDriverCompletionFunc *cb, void *opaque)
+{
+trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
+
+return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
+ BDRV_REQ_ZERO_WRITE | flags,
+ cb, opaque, true);
+}
+
 
 typedef struct MultiwriteCB {
 int error;
diff --git a/include/block/block.h b/include/block/block.h
index 703d875..4967ed2 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -216,6 +216,9 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors);
 int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, BdrvRequestFlags flags);
+BlockDriverAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs, int64_t 
sector_num,
+int nb_sectors, BdrvRequestFlags flags,
+BlockDriverCompletionFunc *cb, void 
*opaque);
 int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags);
 int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov);
 int bdrv_pread(BlockDriverState *bs, int64_t offset,
diff --git a/trace-events b/trace-events
index 8695e9e..96b3974 100644
--- a/trace-events
+++ b/trace-events
@@ -60,6 +60,7 @@ bdrv_aio_discard(void *bs, int64_t sector_num, int 
nb_sectors, void *opaque) bs
 bdrv_aio_flush(void *bs, void *opaque) bs %p opaque %p
 bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) bs 
%p sector_num %PRId64 nb_sectors %d opaque %p
 bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) 
bs %p sector_num %PRId64 nb_sectors %d opaque %p
+bdrv_aio_write_zeroes(void *bs, int64_t sector_num, int nb_sectors, int flags, 
void *opaque) bs %p sector_num %PRId64 nb_sectors %d flags %d opaque %p
 bdrv_lock_medium(void *bs, bool locked) bs %p locked %d
 bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) bs %p sector_num 
%PRId64 nb_sectors %d
 bdrv_co_copy_on_readv(void *bs, int64_t sector_num, int nb_sector) bs %p 
sector_num %PRId64 nb_sectors %d
-- 
1.8.4.2

[Qemu-devel] [PATCH 08/11] raw-posix: implement write_zeroes with MAY_UNMAP for files

2013-11-12 Thread Paolo Bonzini

Writing zeroes to a file can be done by punching a hole if MAY_UNMAP
is set.

Note that in this case handle_aiocb_discard's ENOTSUP return code
is not ignored, but makes the block layer fall back to the generic
implementation.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 block/raw-posix.c | 64 ++-
 trace-events  |  1 +
 2 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/block/raw-posix.c b/block/raw-posix.c
index 27fe47d..830e109 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -142,6 +142,7 @@ typedef struct BDRVRawState {
 bool is_xfs : 1;
 #endif
 bool has_discard : 1;
+bool discard_zeroes : 1;
 } BDRVRawState;
 
 typedef struct BDRVRawReopenState {
@@ -283,6 +284,7 @@ static int raw_open_common(BlockDriverState *bs, QDict 
*options,
 Error *local_err = NULL;
 const char *filename;
 int fd, ret;
+struct stat st;
 
 opts = qemu_opts_create_nofail(raw_runtime_opts);
 qemu_opts_absorb_qdict(opts, options, local_err);
@@ -325,6 +327,15 @@ static int raw_open_common(BlockDriverState *bs, QDict 
*options,
 #endif
 
 s-has_discard = true;
+
+if (fstat(s-fd, st)  0) {
+error_setg_errno(errp, errno, Could not stat file);
+goto fail;
+}
+if (S_ISREG(st.st_mode)) {
+s-discard_zeroes = true;
+}
+
 #ifdef CONFIG_XFS
 if (platform_test_xfs_fd(s-fd)) {
 s-is_xfs = true;
@@ -788,6 +799,29 @@ static int aio_worker(void *arg)
 return ret;
 }
 
+static int paio_submit_co(BlockDriverState *bs, int fd,
+int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+int type)
+{
+RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
+ThreadPool *pool;
+
+acb-bs = bs;
+acb-aio_type = type;
+acb-aio_fildes = fd;
+
+if (qiov) {
+acb-aio_iov = qiov-iov;
+acb-aio_niov = qiov-niov;
+}
+acb-aio_nbytes = nb_sectors * 512;
+acb-aio_offset = sector_num * 512;
+
+trace_paio_submit_co(sector_num, nb_sectors, type);
+pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
+return thread_pool_submit_co(pool, aio_worker, acb);
+}
+
 static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
 BlockDriverCompletionFunc *cb, void *opaque, int type)
@@ -1200,6 +1234,31 @@ static coroutine_fn BlockDriverAIOCB 
*raw_aio_discard(BlockDriverState *bs,
cb, opaque, QEMU_AIO_DISCARD);
 }
 
+static int coroutine_fn raw_co_write_zeroes(
+BlockDriverState *bs, int64_t sector_num,
+int nb_sectors, BdrvRequestFlags flags)
+{
+BDRVRawState *s = bs-opaque;
+
+if (!(flags  BDRV_REQ_MAY_UNMAP)) {
+return -ENOTSUP;
+}
+if (!s-discard_zeroes) {
+return -ENOTSUP;
+}
+return paio_submit_co(bs, s-fd, sector_num, NULL, nb_sectors,
+  QEMU_AIO_DISCARD);
+}
+
+static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+BDRVRawState *s = bs-opaque;
+
+bdi-unallocated_blocks_are_zero = s-discard_zeroes;
+bdi-can_write_zeroes_with_unmap = s-discard_zeroes;
+return 0;
+}
+
 static QEMUOptionParameter raw_create_options[] = {
 {
 .name = BLOCK_OPT_SIZE,
@@ -1223,6 +1282,7 @@ static BlockDriver bdrv_file = {
 .bdrv_create = raw_create,
 .bdrv_has_zero_init = bdrv_has_zero_init_1,
 .bdrv_co_get_block_status = raw_co_get_block_status,
+.bdrv_co_write_zeroes = raw_co_write_zeroes,
 
 .bdrv_aio_readv = raw_aio_readv,
 .bdrv_aio_writev = raw_aio_writev,
@@ -1231,6 +1291,7 @@ static BlockDriver bdrv_file = {
 
 .bdrv_truncate = raw_truncate,
 .bdrv_getlength = raw_getlength,
+.bdrv_get_info = raw_get_info,
 .bdrv_get_allocated_file_size
 = raw_get_allocated_file_size,
 
@@ -1586,6 +1647,7 @@ static BlockDriver bdrv_host_device = {
 
 .bdrv_truncate  = raw_truncate,
 .bdrv_getlength= raw_getlength,
+.bdrv_get_info = raw_get_info,
 .bdrv_get_allocated_file_size
 = raw_get_allocated_file_size,
 
@@ -1715,7 +1777,7 @@ static BlockDriver bdrv_host_floppy = {
 .bdrv_aio_flush= raw_aio_flush,
 
 .bdrv_truncate  = raw_truncate,
-.bdrv_getlength  = raw_getlength,
+.bdrv_getlength = raw_getlength,
 .has_variable_length = true,
 .bdrv_get_allocated_file_size
 = raw_get_allocated_file_size,
diff --git a/trace-events b/trace-events
index 96b3974..995c84a 100644
--- a/trace-events
+++ b/trace-events
@@ -128,6 +128,7 @@ thread_pool_cancel(void *req, void *opaque) req %p opaque 
%p
 
 # block/raw-win32.c
 # block/raw-posix.c
+paio_submit_co(int64_t sector_num, int nb_sectors, int type) sector_num 
%PRId64 nb_sectors %d type %d
 paio_submit(void *acb, void *opaque, int64_t sector_num, int nb_sectors, int 
type) acb %p opaque %p sector_num %PRId64 nb_sectors %d

[Qemu-devel] [PATCH 02/11] block: add flags to BlockRequest

2013-11-12 Thread Paolo Bonzini

This lets bdrv_co_do_rw receive flags, so that it can be used for
zero writes.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 block.c   | 17 +++--
 include/block/block.h |  1 +
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/block.c b/block.c
index ba6872e..7ea2361 100644
--- a/block.c
+++ b/block.c
@@ -74,6 +74,7 @@ static BlockDriverAIOCB 
*bdrv_co_aio_rw_vector(BlockDriverState *bs,
int64_t sector_num,
QEMUIOVector *qiov,
int nb_sectors,
+   BdrvRequestFlags flags,
BlockDriverCompletionFunc *cb,
void *opaque,
bool is_write);
@@ -3648,7 +3649,7 @@ BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, 
int64_t sector_num,
 {
 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
 
-return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
+return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
  cb, opaque, false);
 }
 
@@ -3658,7 +3659,7 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, 
int64_t sector_num,
 {
 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
 
-return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
+return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
  cb, opaque, true);
 }
 
@@ -3830,8 +3831,10 @@ int bdrv_aio_multiwrite(BlockDriverState *bs, 
BlockRequest *reqs, int num_reqs)
 /* Run the aio requests. */
 mcb-num_requests = num_reqs;
 for (i = 0; i  num_reqs; i++) {
-bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
-reqs[i].nb_sectors, multiwrite_cb, mcb);
+bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
+  reqs[i].nb_sectors, reqs[i].flags,
+  multiwrite_cb, mcb,
+  true);
 }
 
 return 0;
@@ -3973,10 +3976,10 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque)
 
 if (!acb-is_write) {
 acb-req.error = bdrv_co_do_readv(bs, acb-req.sector,
-acb-req.nb_sectors, acb-req.qiov, 0);
+acb-req.nb_sectors, acb-req.qiov, acb-req.flags);
 } else {
 acb-req.error = bdrv_co_do_writev(bs, acb-req.sector,
-acb-req.nb_sectors, acb-req.qiov, 0);
+acb-req.nb_sectors, acb-req.qiov, acb-req.flags);
 }
 
 acb-bh = qemu_bh_new(bdrv_co_em_bh, acb);
@@ -3987,6 +3990,7 @@ static BlockDriverAIOCB 
*bdrv_co_aio_rw_vector(BlockDriverState *bs,
int64_t sector_num,
QEMUIOVector *qiov,
int nb_sectors,
+   BdrvRequestFlags flags,
BlockDriverCompletionFunc *cb,
void *opaque,
bool is_write)
@@ -3998,6 +4002,7 @@ static BlockDriverAIOCB 
*bdrv_co_aio_rw_vector(BlockDriverState *bs,
 acb-req.sector = sector_num;
 acb-req.nb_sectors = nb_sectors;
 acb-req.qiov = qiov;
+acb-req.flags = flags;
 acb-is_write = is_write;
 acb-done = NULL;
 
diff --git a/include/block/block.h b/include/block/block.h
index 4d9e67c..703d875 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -311,6 +311,7 @@ typedef struct BlockRequest {
 /* Fields to be filled by multiwrite caller */
 int64_t sector;
 int nb_sectors;
+int flags;
 QEMUIOVector *qiov;
 BlockDriverCompletionFunc *cb;
 void *opaque;
-- 
1.8.4.2

[Qemu-devel] [PATCH 11/11] qemu-iotests: 033 is fast

2013-11-12 Thread Paolo Bonzini

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 tests/qemu-iotests/group | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index c57ff35..cdd4b00 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -39,7 +39,7 @@
 030 rw auto backing
 031 rw auto quick
 032 rw auto
-033 rw auto
+033 rw auto quick
 034 rw auto backing
 035 rw auto quick
 036 rw auto quick
-- 
1.8.4.2

[Qemu-devel] [PATCH 09/11] raw-posix: implement write_zeroes with MAY_UNMAP for block devices

2013-11-12 Thread Paolo Bonzini

See the next commit for the description of the Linux kernel problem
that is worked around in raw_open_common.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 block/raw-posix.c | 38 ++
 1 file changed, 38 insertions(+)

diff --git a/block/raw-posix.c b/block/raw-posix.c
index 830e109..5cb46f1 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -335,6 +335,23 @@ static int raw_open_common(BlockDriverState *bs, QDict 
*options,
 if (S_ISREG(st.st_mode)) {
 s-discard_zeroes = true;
 }
+#ifdef BLKDISCARDZEROES
+if (S_ISBLK(st.st_mode)) {
+unsigned int arg;
+if (ioctl(s-fd, BLKDISCARDZEROES, arg) == 0  arg) {
+s-discard_zeroes = true;
+}
+}
+#endif
+#ifdef CONFIG_LINUX
+   /* On Linux 3.10, BLKDISCARD leaves stale data in the page cache.  Do
+* not rely on the contents of discarded blocks unless using O_DIRECT.
+*/
+if (!(bs-open_flags  BDRV_O_NOCACHE)) {
+s-discard_zeroes = false;
+}
+#endif
+}
 
 #ifdef CONFIG_XFS
 if (platform_test_xfs_fd(s-fd)) {
@@ -1587,6 +1604,26 @@ static coroutine_fn BlockDriverAIOCB 
*hdev_aio_discard(BlockDriverState *bs,
cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
 }
 
+static coroutine_fn int hdev_co_write_zeroes(BlockDriverState *bs,
+int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
+{
+BDRVRawState *s = bs-opaque;
+int rc;
+
+rc = fd_open(bs);
+if (rc  0) {
+return rc;
+}
+if (!(flags  BDRV_REQ_MAY_UNMAP)) {
+return -ENOTSUP;
+}
+if (!s-discard_zeroes) {
+return -ENOTSUP;
+}
+return paio_submit_co(bs, s-fd, sector_num, NULL, nb_sectors,
+  QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
+}
+
 static int hdev_create(const char *filename, QEMUOptionParameter *options,
Error **errp)
 {
@@ -1639,6 +1676,7 @@ static BlockDriver bdrv_host_device = {
 .bdrv_reopen_abort   = raw_reopen_abort,
 .bdrv_create= hdev_create,
 .create_options = raw_create_options,
+.bdrv_co_write_zeroes = hdev_co_write_zeroes,
 
 .bdrv_aio_readv= raw_aio_readv,
 .bdrv_aio_writev   = raw_aio_writev,
-- 
1.8.4.2

Re: [Qemu-devel] [PATCH for-1.7] target-i386: Fix build by providing stub kvm_arch_get_supported_cpuid()

2013-11-12 Thread Paolo Bonzini

Il 12/11/2013 16:32, Peter Maydell ha scritto:
  Is this FUD or do you have examples of bad debuggability of -O1 code?

 The clang manpage says specifically Note that Clang debug
 information works best at -O0. , and I see no reason to
 disbelieve it. In particular, they don't say we definitely
 will never add an optimization to -O1 that makes the debug
 info much worse.

This doesn't quite answer my question.  It looks like another bug that
should be reported to clang.  -O1 is somewhere between -O0 and -O2
(quoted from the man page) is a joke, it's not documentation.

Every time I look at clang, it seems to me that they are still relying
on the buzz from their better syntax errors blog posts (undeserved
these days), and from clang-analyzer (deserved).

I don't really see a reason why QEMU should give clang more weight than
Windows or Mac OS X.

Paolo

Re: [Qemu-devel] [edk2 PATCH 0/1] OvmfPkg: grab ACPI tables from QEMU

2013-11-12 Thread Igor Mammedov

On Tue, 12 Nov 2013 16:11:49 +0100
Laszlo Ersek ler...@redhat.com wrote:


 Second, I tested the patch under the following circumstances:
 - 3.10-based host kernel,
 - qemu v1.7.0-rc0, with additional patches that shrink the pci-hole
   memory range to just below system.flash (see the parallel discussion
   on qemu-devel),
Is pc: map PCI address space as catchall region for not
mapped addresses works for you instead of shrinking?


-- 
Regards,
  Igor

Re: [Qemu-devel] [PATCH for-1.7] target-i386: Fix build by providing stub kvm_arch_get_supported_cpuid()

2013-11-12 Thread Peter Maydell

On 12 November 2013 15:58, Paolo Bonzini pbonz...@redhat.com wrote:
 I don't really see a reason why QEMU should give clang more weight than
 Windows or Mac OS X.

I'm not asking for more weight (and actually my main
reason for caring about clang is exactly MacOSX). I'm
just asking that when a bug is reported whose underlying
cause is we don't work on clang because we're relying on
undocumented behaviour of gcc with an attached patch that
fixes this by not relying on the undocumented behaviour,
that we apply the patch rather than saying why do we
care about clang...

This seems to me to be a win-win situation:
 * we improve our code by not relying on undocumented
   implentation specifics
 * we work on a platform that, while not a primary
   platform, is at least supported in the codebase and
   has people who fix it when it breaks

-- PMM

Re: [Qemu-devel] [PATCH v2 2/4] apic: QOM'ify apic icc_bus

2013-11-12 Thread Andreas Färber

Hi,

Am 12.11.2013 02:28, schrieb Chen Fan:
 [...] not long ago， I had sent a path about refactor
 apic, though, there was not commented, if you are interested in it，
 please help to review it:
 https://lists.gnu.org/archive/html/qemu-devel/2013-10/msg02823.html

You sent your series on Oct 22nd during KVM Forum, whereas I have only
recently just reviewed the previous APIC patch from Sep 27th. Sorry, but
I am lagging behind with review of several series and unless there's
reasons not to, I try to honor whomever was first with a patch.

Regards,
Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg

Re: [Qemu-devel] [edk2 PATCH 0/1] OvmfPkg: grab ACPI tables from QEMU

2013-11-12 Thread Laszlo Ersek

On 11/12/13 17:05, Igor Mammedov wrote:
 On Tue, 12 Nov 2013 16:11:49 +0100
 Laszlo Ersek ler...@redhat.com wrote:
 
 
 Second, I tested the patch under the following circumstances:
 - 3.10-based host kernel,
 - qemu v1.7.0-rc0, with additional patches that shrink the pci-hole
   memory range to just below system.flash (see the parallel discussion
   on qemu-devel),
 Is pc: map PCI address space as catchall region for not
 mapped addresses works for you instead of shrinking?

Yes, it does. Thanks!
Laszlo

Re: [Qemu-devel] [PATCH 1/2] pc: map PCI address space as catchall region for not mapped addresses

2013-11-12 Thread Laszlo Ersek

On 11/12/13 14:58, Igor Mammedov wrote:
 From: Michael S. Tsirkin m...@redhat.com
 
 With a help of negative memory region priority PCI address space
 is mapped underneath RAM regions effectively catching every access
 to addresses not mapped by any other region.
 It simplifies PCI address space mapping into system address space.
 
 Signed-off-by: Michael S. Tsirkin m...@redhat.com
 Signed-off-by: Igor Mammedov imamm...@redhat.com
 ---
  hw/i386/pc.c  | 20 ++--
  hw/i386/pc_piix.c |  2 --
  hw/pci-host/piix.c| 26 --
  hw/pci-host/q35.c | 27 +--
  include/hw/i386/pc.h  | 14 ++
  include/hw/pci-host/q35.h |  2 --
  6 files changed, 17 insertions(+), 74 deletions(-)

Tested with OVMF running from flash (and storing nvvars there).

Also tested with my pending series i440fx-test: check firmware
visibility (reviews welcome :)).

GTest: result: OK
GTest: run: /i440fx/firmware/bios
(MSG: qemu cmdline: -S -display none -bios /tmp/fw_blob_UAGE6W)
GTest: result: OK
GTest: run: /i440fx/firmware/pflash
(MSG: qemu cmdline: -S -display none -pflash /tmp/fw_blob_DHXN6W)
GTest: result: OK

Tested-by: Laszlo Ersek ler...@redhat.com

Thanks!
Laszlo

[Qemu-devel] [PATCH 00/16] Add an IPMI device to QEMU

2013-11-12 Thread Corey Minyard

There are two (sets of) patches to the general code beyond the IPMI
device addition.

One set adds an option to qemu-char net devices to automatically try to
reconnect if the connection disconnects.  This lets the IPMI device
connect to a remote BMC and recover if that BMC fails.

The other set allows SMBIOS table entries to be added to the tables
by drivers.

-corey

[Qemu-devel] [PATCH 04/16] qemu-char: Close fd at end of file

2013-11-12 Thread Corey Minyard

The chardev backends that used qemu_chr_open_fd did not get their
file descriptors closed at end of file or when the chardev was closed.
This could result in a file descriptor leak.

Signed-off-by: Corey Minyard cminy...@mvista.com
---
 qemu-char.c | 35 +--
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/qemu-char.c b/qemu-char.c
index 935066d..08b29ac 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -820,6 +820,8 @@ typedef struct FDCharDriver {
 GIOChannel *fd_in, *fd_out;
 int max_size;
 QTAILQ_ENTRY(FDCharDriver) node;
+int close_fdin;
+int close_fdout;
 } FDCharDriver;
 
 static int fd_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
@@ -829,6 +831,18 @@ static int fd_chr_write(CharDriverState *chr, const 
uint8_t *buf, int len)
 return io_channel_send(s-fd_out, buf, len);
 }
 
+static void fd_close_fds(FDCharDriver *s)
+{
+if ((s-close_fdin != s-close_fdout)  (s-close_fdout != -1)) {
+close(s-close_fdout);
+}
+s-close_fdout = -1;
+if (s-close_fdin != -1) {
+close(s-close_fdin);
+}
+s-close_fdin = -1;
+}
+
 static gboolean fd_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
 {
 CharDriverState *chr = opaque;
@@ -850,6 +864,7 @@ static gboolean fd_chr_read(GIOChannel *chan, GIOCondition 
cond, void *opaque)
  len, bytes_read, NULL);
 if (status == G_IO_STATUS_EOF) {
 remove_fd_in_watch(chr);
+   fd_close_fds(s);
 qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
 return FALSE;
 }
@@ -898,19 +913,27 @@ static void fd_chr_close(struct CharDriverState *chr)
 g_io_channel_unref(s-fd_out);
 }
 
+fd_close_fds(s);
 g_free(s);
 qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
 }
 
 /* open a character device to a unix fd */
 static CharDriverState *qemu_chr_open_fd(CharDriverState *chr,
- int fd_in, int fd_out)
+ int fd_in, int fd_out,
+ int close_fds_on_close)
 {
 FDCharDriver *s;
 
 s = g_malloc0(sizeof(FDCharDriver));
 s-fd_in = io_channel_from_fd(fd_in);
 s-fd_out = io_channel_from_fd(fd_out);
+if (close_fds_on_close) {
+s-close_fdin = fd_in;
+s-close_fdout = fd_out;
+} else {
+s-close_fdin = s-close_fdout = -1;
+}
 fcntl(fd_out, F_SETFL, O_NONBLOCK);
 s-chr = chr;
 chr-opaque = s;
@@ -948,7 +971,7 @@ static CharDriverState *qemu_chr_open_pipe(CharDriverState 
*chr,
 return NULL;
 }
 }
-return qemu_chr_open_fd(chr, fd_in, fd_out);
+return qemu_chr_open_fd(chr, fd_in, fd_out, TRUE);
 }
 
 /* init terminal so that we can grab keys */
@@ -1001,7 +1024,7 @@ static CharDriverState 
*qemu_chr_open_stdio(CharDriverState *chr,
 fcntl(0, F_SETFL, O_NONBLOCK);
 atexit(term_exit);
 
-qemu_chr_open_fd(chr, 0, 1);
+qemu_chr_open_fd(chr, 0, 1, FALSE);
 chr-chr_close = qemu_chr_close_stdio;
 chr-chr_set_echo = qemu_chr_set_echo_stdio;
 if (opts-has_signal) {
@@ -1407,7 +1430,7 @@ static void qemu_chr_close_tty(CharDriverState *chr)
 static CharDriverState *qemu_chr_open_tty_fd(CharDriverState *chr, int fd)
 {
 tty_serial_init(fd, 115200, 'N', 8, 1);
-qemu_chr_open_fd(chr, fd, fd);
+qemu_chr_open_fd(chr, fd, fd, TRUE);
 chr-chr_ioctl = tty_serial_ioctl;
 chr-chr_close = qemu_chr_close_tty;
 return chr;
@@ -2483,7 +2506,7 @@ static gboolean tcp_chr_read(GIOChannel *chan, 
GIOCondition cond, void *opaque)
 #ifndef _WIN32
 CharDriverState *qemu_chr_open_eventfd(CharDriverState *chr, int eventfd)
 {
-return qemu_chr_open_fd(chr, eventfd, eventfd);
+return qemu_chr_open_fd(chr, eventfd, eventfd, FALSE);
 }
 #endif
 
@@ -3639,7 +3662,7 @@ static CharDriverState 
*qmp_chardev_open_file(CharDriverState *chr,
 }
 }
 
-return qemu_chr_open_fd(chr, in, out);
+return qemu_chr_open_fd(chr, in, out, TRUE);
 }
 
 static CharDriverState *qmp_chardev_open_serial(CharDriverState *chr,
-- 
1.8.3.1

[Qemu-devel] [PATCH 03/16] qemu-char: remove free of chr from win_stdio_close

2013-11-12 Thread Corey Minyard

This will result in a double free on close, because it's freed
in qemu_chr_delete() right after calling the close function.

Signed-off-by: Corey Minyard cminy...@mvista.com
---
 qemu-char.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/qemu-char.c b/qemu-char.c
index 23d7647..935066d 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -2075,7 +2075,6 @@ static void win_stdio_close(CharDriverState *chr)
 }
 
 g_free(chr-opaque);
-g_free(chr);
 }
 
 static CharDriverState *qemu_chr_open_stdio(CharDriverState *chr,
-- 
1.8.3.1

[Qemu-devel] [PATCH 14/16] pc: Postpone adding ACPI and SMBIOS to fw_cfg

2013-11-12 Thread Corey Minyard

Postpone the addition of the ACPI and SMBIOS tables until after
device initialization.  This allows devices to add entries to these
tables.

Signed-off-by: Corey Minyard cminy...@mvsita.com
---
 hw/i386/pc.c | 38 ++
 1 file changed, 30 insertions(+), 8 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index dee409d..765c95e 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -607,8 +607,6 @@ static unsigned int pc_apic_id_limit(unsigned int max_cpus)
 static FWCfgState *bochs_bios_init(void)
 {
 FWCfgState *fw_cfg;
-uint8_t *smbios_table;
-size_t smbios_len;
 uint64_t *numa_fw_cfg;
 int i, j;
 unsigned int apic_id_limit = pc_apic_id_limit(max_cpus);
@@ -631,14 +629,8 @@ static FWCfgState *bochs_bios_init(void)
 fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)apic_id_limit);
 fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
 fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
-fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES,
- acpi_tables, acpi_tables_len);
 fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, kvm_allows_irq0_override());
 
-smbios_table = smbios_get_table(smbios_len);
-if (smbios_table)
-fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES,
- smbios_table, smbios_len);
 fw_cfg_add_bytes(fw_cfg, FW_CFG_E820_TABLE,
  e820_table, sizeof(e820_table));
 
@@ -1127,6 +1119,31 @@ void pc_acpi_init(const char *default_dsdt)
 }
 }
 
+struct pc_bios_post_init {
+Notifier post_init;
+void *fw_cfg;
+};
+
+/* Add the ACPI and SMBIOS tables after all the hardware has been initialized.
+ * This gives devices a chance to add to those tables.
+ */
+static void pc_bios_post_initfn(Notifier *n, void *opaque)
+{
+struct pc_bios_post_init *p = container_of(n, struct pc_bios_post_init,
+   post_init);
+uint8_t *smbios_table;
+size_t smbios_len;
+
+fw_cfg_add_bytes(p-fw_cfg, FW_CFG_ACPI_TABLES,
+ acpi_tables, acpi_tables_len);
+smbios_table = smbios_get_table(smbios_len);
+if (smbios_table)
+fw_cfg_add_bytes(p-fw_cfg, FW_CFG_SMBIOS_ENTRIES,
+ smbios_table, smbios_len);
+}
+
+static struct pc_bios_post_init post_init;
+
 FWCfgState *pc_memory_init(MemoryRegion *system_memory,
const char *kernel_filename,
const char *kernel_cmdline,
@@ -1196,6 +1213,11 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
 rom_add_option(option_rom[i].name, option_rom[i].bootindex);
 }
 guest_info-fw_cfg = fw_cfg;
+
+post_init.fw_cfg = fw_cfg;
+post_init.post_init.notify = pc_bios_post_initfn;
+qemu_add_machine_init_done_notifier(post_init.post_init);
+
 return fw_cfg;
 }
 
-- 
1.8.3.1

[Qemu-devel] [PATCH 01/16] qemu-char: Allocate CharDriverState in qemu_chr_new_from_opts

2013-11-12 Thread Corey Minyard

This allocates the CharDriverState structure and passes it in to the
open routine.  This allows a coming option to automatically attempt to
reconnect a chardev if the connection fails.  The chardev has to be
kept around so a reconnect can be done on it.

Signed-off-by: Corey Minyard cminy...@mvista.com
---
 backends/baum.c |   6 +-
 backends/msmouse.c  |   5 +-
 hw/misc/ivshmem.c   |  11 ++-
 include/sysemu/char.h   |   9 +--
 include/ui/console.h|   4 +-
 include/ui/qemu-spice.h |   6 +-
 qemu-char.c | 180 ++--
 spice-qemu-char.c   |  14 ++--
 ui/console.c|  10 +--
 9 files changed, 113 insertions(+), 132 deletions(-)

diff --git a/backends/baum.c b/backends/baum.c
index 1132899..9910a74 100644
--- a/backends/baum.c
+++ b/backends/baum.c
@@ -561,10 +561,9 @@ static void baum_close(struct CharDriverState *chr)
 g_free(baum);
 }
 
-CharDriverState *chr_baum_init(void)
+CharDriverState *chr_baum_init(CharDriverState *chr)
 {
 BaumDriverState *baum;
-CharDriverState *chr;
 brlapi_handle_t *handle;
 #ifdef CONFIG_SDL
 SDL_SysWMinfo info;
@@ -572,7 +571,7 @@ CharDriverState *chr_baum_init(void)
 int tty;
 
 baum = g_malloc0(sizeof(BaumDriverState));
-baum-chr = chr = g_malloc0(sizeof(CharDriverState));
+baum-chr = chr;
 
 chr-opaque = baum;
 chr-chr_write = baum_write;
@@ -618,7 +617,6 @@ fail:
 brlapi__closeConnection(handle);
 fail_handle:
 g_free(handle);
-g_free(chr);
 g_free(baum);
 return NULL;
 }
diff --git a/backends/msmouse.c b/backends/msmouse.c
index c0dbfcd..b4e7a23 100644
--- a/backends/msmouse.c
+++ b/backends/msmouse.c
@@ -63,11 +63,8 @@ static void msmouse_chr_close (struct CharDriverState *chr)
 g_free (chr);
 }
 
-CharDriverState *qemu_chr_open_msmouse(void)
+CharDriverState *qemu_chr_open_msmouse(CharDriverState *chr)
 {
-CharDriverState *chr;
-
-chr = g_malloc0(sizeof(CharDriverState));
 chr-chr_write = msmouse_chr_write;
 chr-chr_close = msmouse_chr_close;
 chr-explicit_be_open = true;
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index 8d144ba..75e83c3 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -291,12 +291,17 @@ static CharDriverState* create_eventfd_chr_device(void * 
opaque, EventNotifier *
 {
 /* create a event character device based on the passed eventfd */
 IVShmemState *s = opaque;
-CharDriverState * chr;
+CharDriverState *chr, *newchr;
 int eventfd = event_notifier_get_fd(n);
 
-chr = qemu_chr_open_eventfd(eventfd);
-
+newchr = g_malloc0(sizeof(*chr));
+if (newchr == NULL) {
+fprintf(stderr, creating eventfd for eventfd %d failed\n, eventfd);
+exit(-1);
+}
+chr = qemu_chr_open_eventfd(newchr, eventfd);
 if (chr == NULL) {
+g_free(newchr);
 fprintf(stderr, creating eventfd for eventfd %d failed\n, eventfd);
 exit(-1);
 }
diff --git a/include/sysemu/char.h b/include/sysemu/char.h
index ad101d9..44baf0f 100644
--- a/include/sysemu/char.h
+++ b/include/sysemu/char.h
@@ -282,21 +282,22 @@ CharDriverState *qemu_chr_find(const char *name);
 
 QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename);
 
-void register_char_driver(const char *name, CharDriverState *(*open)(QemuOpts 
*));
+void register_char_driver(const char *name,
+CharDriverState *(*open)(CharDriverState *, QemuOpts *));
 void register_char_driver_qapi(const char *name, ChardevBackendKind kind,
 void (*parse)(QemuOpts *opts, ChardevBackend *backend, Error **errp));
 
 /* add an eventfd to the qemu devices that are polled */
-CharDriverState *qemu_chr_open_eventfd(int eventfd);
+CharDriverState *qemu_chr_open_eventfd(CharDriverState *chr, int eventfd);
 
 extern int term_escape_char;
 
 CharDriverState *qemu_char_get_next_serial(void);
 
 /* msmouse */
-CharDriverState *qemu_chr_open_msmouse(void);
+CharDriverState *qemu_chr_open_msmouse(CharDriverState *chr);
 
 /* baum.c */
-CharDriverState *chr_baum_init(void);
+CharDriverState *chr_baum_init(CharDriverState *chr);
 
 #endif
diff --git a/include/ui/console.h b/include/ui/console.h
index 98edf41..5e5c74d 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -300,9 +300,9 @@ void qemu_console_copy(QemuConsole *con, int src_x, int 
src_y,
 DisplaySurface *qemu_console_surface(QemuConsole *con);
 DisplayState *qemu_console_displaystate(QemuConsole *console);
 
-typedef CharDriverState *(VcHandler)(ChardevVC *vc);
+typedef CharDriverState *(VcHandler)(CharDriverState *chr, ChardevVC *vc);
 
-CharDriverState *vc_init(ChardevVC *vc);
+CharDriverState *vc_init(CharDriverState *chr, ChardevVC *vc);
 void register_vc_handler(VcHandler *handler);
 
 /* sdl.c */
diff --git a/include/ui/qemu-spice.h b/include/ui/qemu-spice.h
index 86c75c7..eadb9c9 100644
--- a/include/ui/qemu-spice.h
+++ b/include/ui/qemu-spice.h
@@ -46,9 +46,11 @@ int

[Qemu-devel] [PATCH 05/16] Add a base IPMI interface

2013-11-12 Thread Corey Minyard

Add the basic IPMI types and infrastructure to QEMU.  Low-level
interfaces and simulation interfaces will register with this; it's
kind of the go-between to tie them together.

Signed-off-by: Corey Minyard cminy...@mvista.com
---
 default-configs/i386-softmmu.mak   |   1 +
 default-configs/x86_64-softmmu.mak |   1 +
 hw/Makefile.objs   |   1 +
 hw/ipmi/Makefile.objs  |   1 +
 hw/ipmi/ipmi.c | 167 +
 hw/ipmi/ipmi.h | 241 +
 qemu-doc.texi  |   2 +
 7 files changed, 414 insertions(+)
 create mode 100644 hw/ipmi/Makefile.objs
 create mode 100644 hw/ipmi/ipmi.c
 create mode 100644 hw/ipmi/ipmi.h

diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index 37ef90f..89199fb 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -10,6 +10,7 @@ CONFIG_VGA_ISA=y
 CONFIG_VGA_CIRRUS=y
 CONFIG_VMWARE_VGA=y
 CONFIG_VMMOUSE=y
+CONFIG_IPMI=y
 CONFIG_SERIAL=y
 CONFIG_PARALLEL=y
 CONFIG_I8254=y
diff --git a/default-configs/x86_64-softmmu.mak 
b/default-configs/x86_64-softmmu.mak
index 31bddce..154248a 100644
--- a/default-configs/x86_64-softmmu.mak
+++ b/default-configs/x86_64-softmmu.mak
@@ -10,6 +10,7 @@ CONFIG_VGA_ISA=y
 CONFIG_VGA_CIRRUS=y
 CONFIG_VMWARE_VGA=y
 CONFIG_VMMOUSE=y
+CONFIG_IPMI=y
 CONFIG_SERIAL=y
 CONFIG_PARALLEL=y
 CONFIG_I8254=y
diff --git a/hw/Makefile.objs b/hw/Makefile.objs
index 0243d6a..eeeb3bd 100644
--- a/hw/Makefile.objs
+++ b/hw/Makefile.objs
@@ -12,6 +12,7 @@ devices-dirs-$(CONFIG_SOFTMMU) += i2c/
 devices-dirs-$(CONFIG_SOFTMMU) += ide/
 devices-dirs-$(CONFIG_SOFTMMU) += input/
 devices-dirs-$(CONFIG_SOFTMMU) += intc/
+devices-dirs-$(CONFIG_SOFTMMU) += ipmi/
 devices-dirs-$(CONFIG_SOFTMMU) += isa/
 devices-dirs-$(CONFIG_SOFTMMU) += misc/
 devices-dirs-$(CONFIG_SOFTMMU) += net/
diff --git a/hw/ipmi/Makefile.objs b/hw/ipmi/Makefile.objs
new file mode 100644
index 000..65bde11
--- /dev/null
+++ b/hw/ipmi/Makefile.objs
@@ -0,0 +1 @@
+common-obj-$(CONFIG_IPMI) += ipmi.o
diff --git a/hw/ipmi/ipmi.c b/hw/ipmi/ipmi.c
new file mode 100644
index 000..21560a9
--- /dev/null
+++ b/hw/ipmi/ipmi.c
@@ -0,0 +1,167 @@
+/*
+ * QEMU IPMI emulation
+ *
+ * Copyright (c) 2012 Corey Minyard, MontaVista Software, LLC
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the Software), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include hw/hw.h
+#include ipmi.h
+#include sysemu/sysemu.h
+#include qmp-commands.h
+
+#ifdef DO_IPMI_THREAD
+static void *ipmi_thread(void *opaque)
+{
+IPMIInterface *s = opaque;
+int64_t wait_until;
+
+ipmi_lock(s);
+for (;;) {
+qemu_cond_wait(s-waker, s-lock);
+wait_until = 0;
+while (s-do_wake) {
+s-do_wake = 0;
+s-handle_if_event(s);
+}
+}
+ipmi_unlock(s);
+return NULL;
+}
+#endif
+
+static int ipmi_do_hw_op(IPMIInterface *s, enum ipmi_op op, int checkonly)
+{
+switch (op) {
+case IPMI_RESET_CHASSIS:
+if (checkonly) {
+return 0;
+}
+qemu_system_reset_request();
+return 0;
+
+case IPMI_POWEROFF_CHASSIS:
+if (checkonly) {
+return 0;
+}
+qemu_system_powerdown_request();
+return 0;
+
+case IPMI_SEND_NMI:
+if (checkonly) {
+return 0;
+}
+qemu_mutex_lock_iothread();
+qmp_inject_nmi(NULL);
+qemu_mutex_unlock_iothread();
+return 0;
+
+case IPMI_POWERCYCLE_CHASSIS:
+case IPMI_PULSE_DIAG_IRQ:
+case IPMI_SHUTDOWN_VIA_ACPI_OVERTEMP:
+case IPMI_POWERON_CHASSIS:
+default:
+return IPMI_CC_COMMAND_NOT_SUPPORTED;
+}
+}
+
+static void ipmi_set_irq_enable(IPMIInterface *s, int val)
+{
+s-irqs_enabled = val;
+}
+
+void ipmi_interface_reset(IPMIInterface *s)
+{
+IPMIBmcClass *bk = IPMI_BMC_GET_CLASS(s-bmc);
+
+if (bk-handle_reset) {
+

[Qemu-devel] [PATCH 07/16] ipmi: Add a KCS low-level interface

2013-11-12 Thread Corey Minyard

This provides the simulation of the KCS hardware interface.

Signed-off-by: Corey Minyard cminy...@mvista.com
---
 default-configs/i386-softmmu.mak   |   1 +
 default-configs/x86_64-softmmu.mak |   1 +
 hw/ipmi/Makefile.objs  |   1 +
 hw/ipmi/ipmi_kcs.c | 345 +
 4 files changed, 348 insertions(+)
 create mode 100644 hw/ipmi/ipmi_kcs.c

diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index d5ee6c5..e257954 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -12,6 +12,7 @@ CONFIG_VMWARE_VGA=y
 CONFIG_VMMOUSE=y
 CONFIG_IPMI=y
 CONFIG_ISA_IPMI=y
+CONFIG_IPMI_KCS=y
 CONFIG_SERIAL=y
 CONFIG_PARALLEL=y
 CONFIG_I8254=y
diff --git a/default-configs/x86_64-softmmu.mak 
b/default-configs/x86_64-softmmu.mak
index 0b700ae..f3b2bf8 100644
--- a/default-configs/x86_64-softmmu.mak
+++ b/default-configs/x86_64-softmmu.mak
@@ -12,6 +12,7 @@ CONFIG_VMWARE_VGA=y
 CONFIG_VMMOUSE=y
 CONFIG_IPMI=y
 CONFIG_ISA_IPMI=y
+CONFIG_IPMI_KCS=y
 CONFIG_SERIAL=y
 CONFIG_PARALLEL=y
 CONFIG_I8254=y
diff --git a/hw/ipmi/Makefile.objs b/hw/ipmi/Makefile.objs
index 3494b70..3c811ae 100644
--- a/hw/ipmi/Makefile.objs
+++ b/hw/ipmi/Makefile.objs
@@ -1,2 +1,3 @@
 common-obj-$(CONFIG_IPMI) += ipmi.o
 common-obj-$(CONFIG_ISA_IPMI) += isa_ipmi.o
+common-obj-$(CONFIG_IPMI_KCS) += ipmi_kcs.o
diff --git a/hw/ipmi/ipmi_kcs.c b/hw/ipmi/ipmi_kcs.c
new file mode 100644
index 000..056a369
--- /dev/null
+++ b/hw/ipmi/ipmi_kcs.c
@@ -0,0 +1,345 @@
+/*
+ * QEMU IPMI KCS emulation
+ *
+ * Copyright (c) 2012 Corey Minyard, MontaVista Software, LLC
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the Software), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include hw/hw.h
+#include ipmi.h
+
+#define TYPE_IPMI_INTERFACE_KCS TYPE_IPMI_INTERFACE_PREFIX kcs
+#define IPMI_INTERFACE_KCS(obj) OBJECT_CHECK(IPMIKcsInterface, (obj), \
+TYPE_IPMI_INTERFACE_KCS)
+
+#define IPMI_KCS_OBF_BIT0
+#define IPMI_KCS_IBF_BIT1
+#define IPMI_KCS_SMS_ATN_BIT2
+#define IPMI_KCS_CD_BIT 3
+
+#define IPMI_KCS_OBF_MASK  (1  IPMI_KCS_OBF_BIT)
+#define IPMI_KCS_GET_OBF(d)(((d)  IPMI_KCS_OBF_BIT)  0x1)
+#define IPMI_KCS_SET_OBF(d, v) (d) = (((d)  ~IPMI_KCS_OBF_MASK) | \
+   (((v)  1)  IPMI_KCS_OBF_BIT))
+#define IPMI_KCS_IBF_MASK  (1  IPMI_KCS_IBF_BIT)
+#define IPMI_KCS_GET_IBF(d)(((d)  IPMI_KCS_IBF_BIT)  0x1)
+#define IPMI_KCS_SET_IBF(d, v) (d) = (((d)  ~IPMI_KCS_IBF_MASK) | \
+   (((v)  1)  IPMI_KCS_IBF_BIT))
+#define IPMI_KCS_SMS_ATN_MASK  (1  IPMI_KCS_SMS_ATN_BIT)
+#define IPMI_KCS_GET_SMS_ATN(d)(((d)  IPMI_KCS_SMS_ATN_BIT)  0x1)
+#define IPMI_KCS_SET_SMS_ATN(d, v) (d) = (((d)  ~IPMI_KCS_SMS_ATN_MASK) | \
+   (((v)  1)  IPMI_KCS_SMS_ATN_BIT))
+#define IPMI_KCS_CD_MASK   (1  IPMI_KCS_CD_BIT)
+#define IPMI_KCS_GET_CD(d) (((d)  IPMI_KCS_CD_BIT)  0x1)
+#define IPMI_KCS_SET_CD(d, v)  (d) = (((d)  ~IPMI_KCS_CD_MASK) | \
+   (((v)  1)  IPMI_KCS_CD_BIT))
+
+#define IPMI_KCS_IDLE_STATE0
+#define IPMI_KCS_READ_STATE1
+#define IPMI_KCS_WRITE_STATE   2
+#define IPMI_KCS_ERROR_STATE   3
+
+#define IPMI_KCS_GET_STATE(d)(((d)  6)  0x3)
+#define IPMI_KCS_SET_STATE(d, v) ((d) = ((d)  ~0xc0) | (((v)  0x3)  6))
+
+#define IPMI_KCS_ABORT_STATUS_CMD   0x60
+#define IPMI_KCS_WRITE_START_CMD0x61
+#define IPMI_KCS_WRITE_END_CMD  0x62
+#define IPMI_KCS_READ_CMD   0x68
+
+#define IPMI_KCS_STATUS_NO_ERR  0x00
+#define IPMI_KCS_STATUS_ABORTED_ERR 0x01
+#define IPMI_KCS_STATUS_BAD_CC_ERR  0x02
+#define IPMI_KCS_STATUS_LENGTH_ERR  0x06
+
+typedef struct IPMIKcsInterface {
+IPMIInterface intf;
+
+uint8_t status_reg;
+uint8_t data_out_reg;
+
+int16_t

[Qemu-devel] [PATCH 06/16] ipmi: Add a PC ISA type structure

2013-11-12 Thread Corey Minyard

This provides the base infrastructure to tie IPMI low-level
interfaces into a PC ISA bus.

Signed-off-by: Corey Minyard cminy...@mvista.com
---
 default-configs/i386-softmmu.mak   |   1 +
 default-configs/x86_64-softmmu.mak |   1 +
 hw/ipmi/Makefile.objs  |   1 +
 hw/ipmi/isa_ipmi.c | 148 +
 include/hw/nvram/fw_cfg.h  |  11 ++-
 5 files changed, 161 insertions(+), 1 deletion(-)
 create mode 100644 hw/ipmi/isa_ipmi.c

diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index 89199fb..d5ee6c5 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -11,6 +11,7 @@ CONFIG_VGA_CIRRUS=y
 CONFIG_VMWARE_VGA=y
 CONFIG_VMMOUSE=y
 CONFIG_IPMI=y
+CONFIG_ISA_IPMI=y
 CONFIG_SERIAL=y
 CONFIG_PARALLEL=y
 CONFIG_I8254=y
diff --git a/default-configs/x86_64-softmmu.mak 
b/default-configs/x86_64-softmmu.mak
index 154248a..0b700ae 100644
--- a/default-configs/x86_64-softmmu.mak
+++ b/default-configs/x86_64-softmmu.mak
@@ -11,6 +11,7 @@ CONFIG_VGA_CIRRUS=y
 CONFIG_VMWARE_VGA=y
 CONFIG_VMMOUSE=y
 CONFIG_IPMI=y
+CONFIG_ISA_IPMI=y
 CONFIG_SERIAL=y
 CONFIG_PARALLEL=y
 CONFIG_I8254=y
diff --git a/hw/ipmi/Makefile.objs b/hw/ipmi/Makefile.objs
index 65bde11..3494b70 100644
--- a/hw/ipmi/Makefile.objs
+++ b/hw/ipmi/Makefile.objs
@@ -1 +1,2 @@
 common-obj-$(CONFIG_IPMI) += ipmi.o
+common-obj-$(CONFIG_ISA_IPMI) += isa_ipmi.o
diff --git a/hw/ipmi/isa_ipmi.c b/hw/ipmi/isa_ipmi.c
new file mode 100644
index 000..0242a41
--- /dev/null
+++ b/hw/ipmi/isa_ipmi.c
@@ -0,0 +1,148 @@
+/*
+ * QEMU ISA IPMI emulation
+ *
+ * Copyright (c) 2012 Corey Minyard, MontaVista Software, LLC
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the Software), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include hw/hw.h
+#include hw/isa/isa.h
+#include hw/i386/pc.h
+#include qemu/timer.h
+#include sysemu/char.h
+#include sysemu/sysemu.h
+#include ipmi.h
+
+/* This is the type the user specifies on the -device command line */
+#define TYPE_ISA_IPMI   isa-ipmi
+#define ISA_IPMI(obj) OBJECT_CHECK(ISAIPMIDevice, (obj), \
+TYPE_ISA_IPMI)
+typedef struct ISAIPMIDevice {
+ISADevice dev;
+char *interface;
+uint32_t iobase;
+uint32_t isairq;
+uint8_t slave_addr;
+CharDriverState *chr;
+IPMIInterface *intf;
+} ISAIPMIDevice;
+
+static void ipmi_isa_realizefn(DeviceState *dev, Error **errp)
+{
+ISADevice *isadev = ISA_DEVICE(dev);
+ISAIPMIDevice *isa = ISA_IPMI(dev);
+char typename[20];
+Object *intfobj;
+IPMIInterface *intf;
+Object *bmcobj;
+IPMIBmc *bmc;
+int rc;
+
+if (!isa-interface) {
+isa-interface = g_strdup(kcs);
+}
+
+if (isa-chr) {
+bmcobj = object_new(TYPE_IPMI_BMC_EXTERN);
+} else {
+bmcobj = object_new(TYPE_IPMI_BMC_SIMULATOR);
+}
+bmc = IPMI_BMC(bmcobj);
+bmc-chr = isa-chr;
+snprintf(typename, sizeof(typename),
+ TYPE_IPMI_INTERFACE_PREFIX %s, isa-interface);
+intfobj = object_new(typename);
+intf = IPMI_INTERFACE(intfobj);
+bmc-intf = intf;
+intf-bmc = bmc;
+intf-io_base = isa-iobase;
+intf-slave_addr = isa-slave_addr;
+rc = ipmi_interface_init(intf);
+if (rc) {
+error_setg(errp, Error initializing IPMI interface: %d\n, rc);
+return;
+}
+rc = ipmi_bmc_init(bmc);
+if (rc) {
+error_setg(errp, Error initializing BMC: %d\n, rc);
+return;
+}
+
+/* These may be set by the interface. */
+isa-iobase = intf-io_base;
+isa-slave_addr = intf-slave_addr;
+
+if (isa-isairq != 0) {
+isa_init_irq(isadev, intf-irq, isa-isairq);
+intf-use_irq = 1;
+}
+
+isa-intf = intf;
+object_property_add_child(OBJECT(isadev), intf, OBJECT(intf), errp);
+if (error_is_set(errp)) {
+return;
+}
+object_property_add_child(OBJECT(isadev), bmc, OBJECT(bmc), errp);
+if

[Qemu-devel] [PATCH 11/16] ipmi: Add tests

2013-11-12 Thread Corey Minyard

Test the KCS interface with a local BMC and a BT interface with an
external BMC.

Signed-off-by: Corey Minyard cminy...@mvista.com
---
 tests/Makefile|   4 +
 tests/ipmi-bt-test.c  | 440 ++
 tests/ipmi-kcs-test.c | 294 +
 3 files changed, 738 insertions(+)
 create mode 100644 tests/ipmi-bt-test.c
 create mode 100644 tests/ipmi-kcs-test.c

diff --git a/tests/Makefile b/tests/Makefile
index fa4c9f0..044e7a8 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -65,6 +65,8 @@ check-qtest-i386-y += tests/hd-geo-test$(EXESUF)
 gcov-files-i386-y += hw/hd-geometry.c
 check-qtest-i386-y += tests/boot-order-test$(EXESUF)
 check-qtest-i386-y += tests/rtc-test$(EXESUF)
+check-qtest-i386-y += tests/ipmi-kcs-test$(EXESUF)
+check-qtest-i386-y += tests/ipmi-bt-test$(EXESUF)
 check-qtest-i386-y += tests/i440fx-test$(EXESUF)
 check-qtest-i386-y += tests/fw_cfg-test$(EXESUF)
 check-qtest-x86_64-y = $(check-qtest-i386-y)
@@ -169,6 +171,8 @@ tests/m48t59-test$(EXESUF): tests/m48t59-test.o
 tests/endianness-test$(EXESUF): tests/endianness-test.o
 tests/fdc-test$(EXESUF): tests/fdc-test.o
 tests/ide-test$(EXESUF): tests/ide-test.o $(libqos-pc-obj-y)
+tests/ipmi-kcs-test$(EXESUF): tests/ipmi-kcs-test.o
+tests/ipmi-bt-test$(EXESUF): tests/ipmi-bt-test.o
 tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o
 tests/boot-order-test$(EXESUF): tests/boot-order-test.o $(libqos-obj-y)
 tests/tmp105-test$(EXESUF): tests/tmp105-test.o $(libqos-omap-obj-y)
diff --git a/tests/ipmi-bt-test.c b/tests/ipmi-bt-test.c
new file mode 100644
index 000..c1da325
--- /dev/null
+++ b/tests/ipmi-bt-test.c
@@ -0,0 +1,440 @@
+/*
+ * IPMI BT test cases, using the external interface for checking
+ *
+ * Copyright (c) 2012 Corey Minyard cminy...@mvista.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the Software), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include sys/types.h
+#include stdint.h
+#include string.h
+#include stdio.h
+
+#include sys/socket.h
+#include netinet/in.h
+#include netinet/ip.h
+#include netinet/tcp.h
+
+#include glib.h
+
+#include libqtest.h
+#include qemu-common.h
+
+#define IPMI_IRQ5
+
+#define IPMI_BT_BASE0xe4
+
+#define IPMI_BT_CTLREG_CLR_WR_PTR  0
+#define IPMI_BT_CTLREG_CLR_RD_PTR  1
+#define IPMI_BT_CTLREG_H2B_ATN 2
+#define IPMI_BT_CTLREG_B2H_ATN 3
+#define IPMI_BT_CTLREG_SMS_ATN 4
+#define IPMI_BT_CTLREG_H_BUSY  6
+#define IPMI_BT_CTLREG_B_BUSY  7
+
+#define IPMI_BT_CTLREG_GET(b) ((bt_get_ctrlreg()  (b))  1)
+#define IPMI_BT_CTLREG_GET_H2B_ATN() IPMI_BT_CTLREG_GET(IPMI_BT_CTLREG_H2B_ATN)
+#define IPMI_BT_CTLREG_GET_B2H_ATN() IPMI_BT_CTLREG_GET(IPMI_BT_CTLREG_B2H_ATN)
+#define IPMI_BT_CTLREG_GET_SMS_ATN() IPMI_BT_CTLREG_GET(IPMI_BT_CTLREG_SMS_ATN)
+#define IPMI_BT_CTLREG_GET_H_BUSY()  IPMI_BT_CTLREG_GET(IPMI_BT_CTLREG_H_BUSY)
+#define IPMI_BT_CTLREG_GET_B_BUSY()  IPMI_BT_CTLREG_GET(IPMI_BT_CTLREG_B_BUSY)
+
+#define IPMI_BT_CTLREG_SET(b) bt_write_ctrlreg(1  (b))
+#define IPMI_BT_CTLREG_SET_CLR_WR_PTR() IPMI_BT_CTLREG_SET( \
+IPMI_BT_CTLREG_CLR_WR_PTR)
+#define IPMI_BT_CTLREG_SET_CLR_RD_PTR() IPMI_BT_CTLREG_SET( \
+IPMI_BT_CTLREG_CLR_RD_PTR)
+#define IPMI_BT_CTLREG_SET_H2B_ATN()  
IPMI_BT_CTLREG_SET(IPMI_BT_CTLREG_H2B_ATN)
+#define IPMI_BT_CTLREG_SET_B2H_ATN()  
IPMI_BT_CTLREG_SET(IPMI_BT_CTLREG_B2H_ATN)
+#define IPMI_BT_CTLREG_SET_SMS_ATN()  
IPMI_BT_CTLREG_SET(IPMI_BT_CTLREG_SMS_ATN)
+#define IPMI_BT_CTLREG_SET_H_BUSY()   IPMI_BT_CTLREG_SET(IPMI_BT_CTLREG_H_BUSY)
+
+static int bt_ints_enabled;
+
+static uint8_t bt_get_ctrlreg(void)
+{
+return inb(IPMI_BT_BASE);
+}
+
+static void bt_write_ctrlreg(uint8_t val)
+{
+outb(IPMI_BT_BASE, val);
+}
+
+static uint8_t bt_get_buf(void)
+{
+return inb(IPMI_BT_BASE + 1);
+}
+
+static void bt_write_buf(uint8_t val)
+{
+outb(IPMI_BT_BASE + 1, val);
+}
+
+static uint8_t bt_get_irqreg(void)

[Qemu-devel] [PATCH 08/16] ipmi: Add a BT low-level interface

2013-11-12 Thread Corey Minyard

This provides the simulation of the BT hardware interface for
IPMI.

Signed-off-by: Corey Minyard cminy...@mvista.com
---
 default-configs/i386-softmmu.mak   |   1 +
 default-configs/x86_64-softmmu.mak |   1 +
 hw/ipmi/Makefile.objs  |   1 +
 hw/ipmi/ipmi_bt.c  | 367 +
 4 files changed, 370 insertions(+)
 create mode 100644 hw/ipmi/ipmi_bt.c

diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index e257954..ea3a3b4 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -13,6 +13,7 @@ CONFIG_VMMOUSE=y
 CONFIG_IPMI=y
 CONFIG_ISA_IPMI=y
 CONFIG_IPMI_KCS=y
+CONFIG_IPMI_BT=y
 CONFIG_SERIAL=y
 CONFIG_PARALLEL=y
 CONFIG_I8254=y
diff --git a/default-configs/x86_64-softmmu.mak 
b/default-configs/x86_64-softmmu.mak
index f3b2bf8..89efe84 100644
--- a/default-configs/x86_64-softmmu.mak
+++ b/default-configs/x86_64-softmmu.mak
@@ -13,6 +13,7 @@ CONFIG_VMMOUSE=y
 CONFIG_IPMI=y
 CONFIG_ISA_IPMI=y
 CONFIG_IPMI_KCS=y
+CONFIG_IPMI_BT=y
 CONFIG_SERIAL=y
 CONFIG_PARALLEL=y
 CONFIG_I8254=y
diff --git a/hw/ipmi/Makefile.objs b/hw/ipmi/Makefile.objs
index 3c811ae..a8cd463 100644
--- a/hw/ipmi/Makefile.objs
+++ b/hw/ipmi/Makefile.objs
@@ -1,3 +1,4 @@
 common-obj-$(CONFIG_IPMI) += ipmi.o
 common-obj-$(CONFIG_ISA_IPMI) += isa_ipmi.o
 common-obj-$(CONFIG_IPMI_KCS) += ipmi_kcs.o
+common-obj-$(CONFIG_IPMI_BT) += ipmi_bt.o
diff --git a/hw/ipmi/ipmi_bt.c b/hw/ipmi/ipmi_bt.c
new file mode 100644
index 000..ec256f4
--- /dev/null
+++ b/hw/ipmi/ipmi_bt.c
@@ -0,0 +1,367 @@
+/*
+ * QEMU IPMI BT emulation
+ *
+ * Copyright (c) 2012 Corey Minyard, MontaVista Software, LLC
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the Software), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include hw/hw.h
+#include ipmi.h
+
+#define TYPE_IPMI_INTERFACE_BT TYPE_IPMI_INTERFACE_PREFIX bt
+#define IPMI_INTERFACE_BT(obj) OBJECT_CHECK(IPMIBtInterface, (obj), \
+TYPE_IPMI_INTERFACE_BT)
+
+/* Control register */
+#define IPMI_BT_CLR_WR_BIT0
+#define IPMI_BT_CLR_RD_BIT1
+#define IPMI_BT_H2B_ATN_BIT2
+#define IPMI_BT_B2H_ATN_BIT3
+#define IPMI_BT_SMS_ATN_BIT4
+#define IPMI_BT_HBUSY_BIT6
+#define IPMI_BT_BBUSY_BIT7
+
+#define IPMI_BT_CLR_WR_MASK(1  IPMI_BT_CLR_WR_BIT)
+#define IPMI_BT_GET_CLR_WR(d)  (((d)  IPMI_BT_CLR_WR_BIT)  0x1)
+#define IPMI_BT_SET_CLR_WR(d, v)   (d) = (((d)  ~IPMI_BT_CLR_WR_MASK) | \
+   (((v  1)  IPMI_BT_CLR_WR_BIT)))
+
+#define IPMI_BT_CLR_RD_MASK(1  IPMI_BT_CLR_RD_BIT)
+#define IPMI_BT_GET_CLR_RD(d)  (((d)  IPMI_BT_CLR_RD_BIT)  0x1)
+#define IPMI_BT_SET_CLR_RD(d, v)   (d) = (((d)  ~IPMI_BT_CLR_RD_MASK) | \
+   (((v  1)  IPMI_BT_CLR_RD_BIT)))
+
+#define IPMI_BT_H2B_ATN_MASK   (1  IPMI_BT_H2B_ATN_BIT)
+#define IPMI_BT_GET_H2B_ATN(d) (((d)  IPMI_BT_H2B_ATN_BIT)  0x1)
+#define IPMI_BT_SET_H2B_ATN(d, v)  (d) = (((d)  ~IPMI_BT_H2B_ATN_MASK) | \
+(((v  1)  IPMI_BT_H2B_ATN_BIT)))
+
+#define IPMI_BT_B2H_ATN_MASK   (1  IPMI_BT_B2H_ATN_BIT)
+#define IPMI_BT_GET_B2H_ATN(d) (((d)  IPMI_BT_B2H_ATN_BIT)  0x1)
+#define IPMI_BT_SET_B2H_ATN(d, v)  (d) = (((d)  ~IPMI_BT_B2H_ATN_MASK) | \
+(((v  1)  IPMI_BT_B2H_ATN_BIT)))
+
+#define IPMI_BT_SMS_ATN_MASK   (1  IPMI_BT_SMS_ATN_BIT)
+#define IPMI_BT_GET_SMS_ATN(d) (((d)  IPMI_BT_SMS_ATN_BIT)  0x1)
+#define IPMI_BT_SET_SMS_ATN(d, v)  (d) = (((d)  ~IPMI_BT_SMS_ATN_MASK) | \
+(((v  1)  IPMI_BT_SMS_ATN_BIT)))
+
+#define IPMI_BT_HBUSY_MASK (1  IPMI_BT_HBUSY_BIT)
+#define IPMI_BT_GET_HBUSY(d)   (((d)  IPMI_BT_HBUSY_BIT)  0x1)
+#define IPMI_BT_SET_HBUSY(d, v)(d) = (((d)  ~IPMI_BT_HBUSY_MASK) | \
+   (((v  1)  IPMI_BT_HBUSY_BIT)))
+
+#define

[Qemu-devel] [PATCH for-1.7] acpi unit-test: add signatures and checksum verification

2013-11-12 Thread Marcel Apfelbaum

Loaded all ACPI tables from guest, making
a good environment for further unit tests.

Checked that ACPI tables are corrected pointed
within the ACPI tree using their signatures.
Verified checksum for all the tables.

Signed-off-by: Marcel Apfelbaum marce...@redhat.com
---
To be applied on top of: [PATCH v2 2/2] acpi-test: basic acpi unit-test

 tests/acpi-test.c | 272 ++
 1 file changed, 252 insertions(+), 20 deletions(-)

diff --git a/tests/acpi-test.c b/tests/acpi-test.c
index 468c4f5..d6ff66f 100644
--- a/tests/acpi-test.c
+++ b/tests/acpi-test.c
@@ -13,13 +13,28 @@
 #include string.h
 #include stdio.h
 #include glib.h
+#include qemu-common.h
 #include libqtest.h
+#include qemu/compiler.h
+#include hw/i386/acpi-defs.h
 
+/* DSDT and SSDTs format */
 typedef struct {
-const char *args;
-uint64_t expected_boot;
-uint64_t expected_reboot;
-} boot_order_test;
+AcpiTableHeader header;
+uint8_t *aml;
+int aml_len;
+} AcpiSdtTable;
+
+typedef struct {
+uint32_t rsdp_addr;
+AcpiRsdpDescriptor rsdp_table;
+AcpiRsdtDescriptorRev1 rsdt_table;
+AcpiFadtDescriptorRev1 fadt_table;
+uint32_t *rsdt_tables_addr;
+int rsdt_tables_nr;
+AcpiSdtTable dsdt_table;
+AcpiSdtTable *ssdt_tables;
+} test_data;
 
 #define LOW(x) ((x)  0xff)
 #define HIGH(x) ((x)  8)
@@ -28,6 +43,51 @@ typedef struct {
 #define SIGNATURE_OFFSET 0x10
 #define BOOT_SECTOR_ADDRESS 0x7c00
 
+#define ACPI_READ_FIELD(field, addr)   \
+do {   \
+switch (sizeof(field)) {   \
+case 1:\
+field = readb(addr);   \
+break; \
+case 2:\
+field = le16_to_cpu(readw(addr));  \
+break; \
+case 4:\
+field = le32_to_cpu(readl(addr));  \
+break; \
+case 8:\
+field = le64_to_cpu(readq(addr));  \
+break; \
+default:   \
+g_assert(false);   \
+}  \
+addr += sizeof(field);  \
+} while (0);
+
+#define ACPI_READ_ARRAY_PTR(arr, length, addr)  \
+do {\
+int idx;\
+for (idx = 0; idx  length; ++idx) {\
+ACPI_READ_FIELD(arr[idx], addr);\
+}   \
+} while (0);
+
+#define ACPI_READ_ARRAY(arr, addr)   \
+ACPI_READ_ARRAY_PTR(arr, sizeof(arr)/sizeof(arr[0]), addr)
+
+#define ACPI_READ_TABLE_HEADER(table, addr)  \
+do { \
+ACPI_READ_FIELD((table)-signature, addr);   \
+ACPI_READ_FIELD((table)-length, addr);  \
+ACPI_READ_FIELD((table)-revision, addr);\
+ACPI_READ_FIELD((table)-checksum, addr);\
+ACPI_READ_ARRAY((table)-oem_id, addr);  \
+ACPI_READ_ARRAY((table)-oem_table_id, addr);\
+ACPI_READ_FIELD((table)-oem_revision, addr);\
+ACPI_READ_ARRAY((table)-asl_compiler_id, addr); \
+ACPI_READ_FIELD((table)-asl_compiler_revision, addr);   \
+} while (0);
+
 /* Boot sector code: write SIGNATURE into memory,
  * then halt.
  */
@@ -57,6 +117,181 @@ static uint8_t boot_sector[0x200] = {
 
 static const char *disk = tests/acpi-test-disk.raw;
 
+static uint8_t acpi_checksum(const uint8_t *data, int len)
+{
+int i;
+uint8_t sum = 0;
+
+for (i = 0; i  len; i++) {
+sum += data[i];
+}
+
+return sum;
+}
+
+static void test_acpi_rsdp_address(test_data *data)
+{
+uint32_t off;
+
+/* OK, now find RSDP */
+for (off = 0xf; off  0x10; off += 0x10) {
+uint8_t sig[] = RSD PTR ;
+int i;
+
+for (i = 0; i  sizeof sig - 1; ++i) {
+sig[i] = readb(off + i);
+}
+
+if (!memcmp(sig, RSD PTR , sizeof sig)) {
+break;
+}
+}
+
+g_assert_cmphex(off, , 0x10);
+data-rsdp_addr = off;
+}
+
+static void test_acpi_rsdp_table(test_data *data)
+{
+AcpiRsdpDescriptor *rsdp_table = data-rsdp_table;
+uint32_t addr = data-rsdp_addr;
+
+ACPI_READ_FIELD(rsdp_table-signature, addr);
+g_assert_cmphex(rsdp_table-signature, ==, ACPI_RSDP_SIGNATURE);
+
+ACPI_READ_FIELD(rsdp_table-checksum, addr);
+ACPI_READ_ARRAY(rsdp_table-oem_id, addr);
+ACPI_READ_FIELD(rsdp_table-revision, addr);
+ACPI_READ_FIELD(rsdp_table-rsdt_physical_address, addr);
+

[Qemu-devel] [PATCH 15/16] smbios: Add a function to directly add an entry

2013-11-12 Thread Corey Minyard

There was no way to directly add a table entry to the SMBIOS table,
even though the BIOS supports this.  So add a function to do this.
This is in preparation for the IPMI handler adding it's SMBIOS table
entry.

Signed-off-by: Corey Minyard cminy...@mvista.com
---
 hw/i386/smbios.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/hw/i386/smbios.c b/hw/i386/smbios.c
index d3f1ee6..9c53131 100644
--- a/hw/i386/smbios.c
+++ b/hw/i386/smbios.c
@@ -277,6 +277,33 @@ static void save_opt(const char **dest, QemuOpts *opts, 
const char *name)
 }
 }
 
+int smbios_table_entry_add(struct smbios_structure_header *entry)
+{
+struct smbios_table *table;
+struct smbios_structure_header *header;
+unsigned int size = entry-length;
+
+if (!smbios_entries) {
+smbios_entries_len = sizeof(uint16_t);
+smbios_entries = g_malloc0(smbios_entries_len);
+}
+smbios_entries = g_realloc(smbios_entries, smbios_entries_len +
+  sizeof(*table) + size);
+table = (struct smbios_table *)(smbios_entries + smbios_entries_len);
+table-header.type = SMBIOS_TABLE_ENTRY;
+table-header.length = cpu_to_le16(sizeof(*table) + size);
+
+header = (struct smbios_structure_header *)(table-data);
+memcpy(header, entry, size);
+
+smbios_check_collision(header-type, SMBIOS_TABLE_ENTRY);
+
+smbios_entries_len += sizeof(*table) + size;
+(*(uint16_t *)smbios_entries) =
+   cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1);
+return 0;
+}
+
 void smbios_entry_add(QemuOpts *opts)
 {
 Error *local_err = NULL;
-- 
1.8.3.1

[Qemu-devel] [PATCH 16/16] ipmi: Add SMBIOS table entry

2013-11-12 Thread Corey Minyard

Add an IPMI table entry to the SMBIOS.
---
 hw/ipmi/isa_ipmi.c   | 29 +
 include/hw/i386/smbios.h | 14 ++
 2 files changed, 43 insertions(+)

diff --git a/hw/ipmi/isa_ipmi.c b/hw/ipmi/isa_ipmi.c
index b38c846..e40ca90 100644
--- a/hw/ipmi/isa_ipmi.c
+++ b/hw/ipmi/isa_ipmi.c
@@ -27,6 +27,7 @@
 #include qemu/timer.h
 #include sysemu/char.h
 #include sysemu/sysemu.h
+#include hw/i386/smbios.h
 #include ipmi.h
 
 /* This is the type the user specifies on the -device command line */
@@ -36,13 +37,36 @@
 typedef struct ISAIPMIDevice {
 ISADevice dev;
 char *interface;
+int intftype;
 uint32_t iobase;
 uint32_t isairq;
 uint8_t slave_addr;
+uint8_t version;
 CharDriverState *chr;
 IPMIInterface *intf;
 } ISAIPMIDevice;
 
+static void ipmi_encode_smbios(ISAIPMIDevice *info)
+{
+struct smbios_type_38 smb38;
+
+smb38.header.type = 38;
+smb38.header.length = sizeof(smb38);
+smb38.header.handle = cpu_to_le16(0x3000);
+smb38.interface_type = info-intftype;
+smb38.ipmi_spec_revision = info-version;
+smb38.i2c_slave_address = info-slave_addr;
+smb38.nv_storage_device_address = 0;
+
+/* or 1 to set it to I/O space */
+smb38.base_address = cpu_to_le64(info-iobase | 1);
+
+ /* 1-byte boundaries, addr bit0=0, level triggered irq */
+smb38.base_address_modifier = 1;
+smb38.interrupt_number = info-isairq;
+smbios_table_entry_add((struct smbios_structure_header *) smb38);
+}
+
 static void ipmi_isa_realizefn(DeviceState *dev, Error **errp)
 {
 ISADevice *isadev = ISA_DEVICE(dev);
@@ -50,6 +74,7 @@ static void ipmi_isa_realizefn(DeviceState *dev, Error **errp)
 char typename[20];
 Object *intfobj;
 IPMIInterface *intf;
+IPMIInterfaceClass *intfk;
 Object *bmcobj;
 IPMIBmc *bmc;
 int rc;
@@ -69,10 +94,13 @@ static void ipmi_isa_realizefn(DeviceState *dev, Error 
**errp)
  TYPE_IPMI_INTERFACE_PREFIX %s, isa-interface);
 intfobj = object_new(typename);
 intf = IPMI_INTERFACE(intfobj);
+intfk = IPMI_INTERFACE_GET_CLASS(intf);
 bmc-intf = intf;
 intf-bmc = bmc;
 intf-io_base = isa-iobase;
 intf-slave_addr = isa-slave_addr;
+isa-intftype = intfk-smbios_type;
+isa-version = 0x20; /* Version 2.0 */
 rc = ipmi_interface_init(intf);
 if (rc) {
 error_setg(errp, Error initializing IPMI interface: %d\n, rc);
@@ -106,6 +134,7 @@ static void ipmi_isa_realizefn(DeviceState *dev, Error 
**errp)
 qdev_set_legacy_instance_id(dev, intf-io_base, intf-io_length);
 
 isa_register_ioport(isadev, intf-io, intf-io_base);
+ipmi_encode_smbios(isa);
 }
 
 static void ipmi_isa_reset(DeviceState *dev)
diff --git a/include/hw/i386/smbios.h b/include/hw/i386/smbios.h
index b08ec71..8133faa 100644
--- a/include/hw/i386/smbios.h
+++ b/include/hw/i386/smbios.h
@@ -29,6 +29,8 @@ struct smbios_structure_header {
 uint16_t handle;
 } QEMU_PACKED;
 
+int smbios_table_entry_add(struct smbios_structure_header *entry);
+
 /* SMBIOS type 0 - BIOS Information */
 struct smbios_type_0 {
 struct smbios_structure_header header;
@@ -155,6 +157,18 @@ struct smbios_type_32 {
 uint8_t boot_status;
 } QEMU_PACKED;
 
+/* SMBIOS type 38 - IPMI */
+struct smbios_type_38 {
+struct smbios_structure_header header;
+uint8_t interface_type;
+uint8_t ipmi_spec_revision;
+uint8_t i2c_slave_address;
+uint8_t nv_storage_device_address;
+uint64_t base_address;
+uint8_t base_address_modifier;
+uint8_t interrupt_number;
+} QEMU_PACKED;
+
 /* SMBIOS type 127 -- End-of-table */
 struct smbios_type_127 {
 struct smbios_structure_header header;
-- 
1.8.3.1

[Qemu-devel] [PATCH 12/16] ipmi: Add documentation

2013-11-12 Thread Corey Minyard

Add some basic documentation for the IPMI device.

Signed-off-by: Corey Minyard cminy...@mvista.com
---
 qemu-options.hx | 35 +++
 1 file changed, 35 insertions(+)

diff --git a/qemu-options.hx b/qemu-options.hx
index 5bcfaa0..500d7c8 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -327,6 +327,41 @@ Add device @var{driver}.  @var{prop}=@var{value} sets 
driver
 properties.  Valid properties depend on the driver.  To get help on
 possible drivers and properties, use @code{-device help} and
 @code{-device @var{driver},help}.
+
+Some drivers are:
+@item -device 
isa-ipmi[,interface=kcs|bt][,iobase=@var{val}][,irq=@var{val}][,slave_addr=@var{val}][,chardev=name]
+
+Add an IPMI device.  This also adds a corresponding SMBIOS entry to the
+SMBIOS tables for x86.  The following options are handled:
+@table @option
+@item interface=kcs|bt
+Define the interface type to use.  Currently the IPMI-defined KCS and
+BT interfaces are handled.  The default is KCS.
+@item iobase=@var{val}
+Define the I/O address of the interface.  The default is 0xca0 for KCS
+and 0xe4 for BT.
+@item irq=@var{val}
+Define the interrupt to use.  The default is 5.  To disable interrupts,
+set this to 0.
+@item slave_addr=@var{val}
+The IPMI slave address to use for the BMC.  The default is 0x20.
+@item chardev=name
+If a chardev is not specified, the IPMI driver uses a built-in baseboard
+management controller (BMC) simulator.  It provides a basic BMC with a
+watchdog timer and associated sensor.
+
+If a chardev is specified, A connection is made to an external BMC
+simulator.  If you do this, it is strongly recommended that you use
+the reconnect= chardev option to reconnect to the simulator if the
+connection is lost.  Note that if this is not used carefully, it can
+be a security issue, as the interface has the ability to send resets,
+NMIs, and power off the VM.  It's best if QEMU makes a connection to
+an external simulator running on a secure port on localhost, so
+neither the simulator nor QEMU is exposed to any outside network.
+
+See the lanserv/README.vm file in the OpenIPMI library for more
+details on the external interface.
+@end table
 ETEXI
 
 DEF(name, HAS_ARG, QEMU_OPTION_name,
-- 
1.8.3.1

[Qemu-devel] [PATCH 05/11] scsi-disk: reject ANCHOR=1 for UNMAP and WRITE SAME commands

2013-11-12 Thread Paolo Bonzini

Since we report ANC_SUP==0 in VPD page B2h, we need to return
an error (ILLEGAL REQUEST/INVALID FIELD IN CDB) for all WRITE SAME
requests with ANCHOR==1.

Inspired by a similar patch to the LIO in-kernel target.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 hw/scsi/scsi-disk.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 4138268..7e29760 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -1548,6 +1548,11 @@ static void scsi_disk_emulate_unmap(SCSIDiskReq *r, 
uint8_t *inbuf)
 int len = r-req.cmd.xfer;
 UnmapCBData *data;
 
+/* Reject ANCHOR=1.  */
+if (r-req.cmd.buf[1]  0x1) {
+goto invalid_field;
+}
+
 if (len  8) {
 goto invalid_param_len;
 }
@@ -1578,6 +1583,10 @@ static void scsi_disk_emulate_unmap(SCSIDiskReq *r, 
uint8_t *inbuf)
 
 invalid_param_len:
 scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
+return;
+
+invalid_field:
+scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
 }
 
 static void scsi_disk_emulate_write_data(SCSIRequest *req)
@@ -1856,8 +1865,9 @@ static int32_t scsi_disk_emulate_command(SCSIRequest 
*req, uint8_t *buf)
 
 /*
  * We only support WRITE SAME with the unmap bit set for now.
+* Reject UNMAP=0 or ANCHOR=1.
  */
-if (!(req-cmd.buf[1]  0x8)) {
+if (!(req-cmd.buf[1]  0x8) || (req-cmd.buf[1]  0x10)) {
 goto illegal_request;
 }
 
-- 
1.8.4.2

[Qemu-devel] [PATCH 13/16] ipmi: Add migration capability to the IPMI device.

2013-11-12 Thread Corey Minyard

Signed-off-by: Corey Minyard cminy...@mvista.com
---
 hw/ipmi/ipmi.c| 17 +
 hw/ipmi/ipmi.h|  2 ++
 hw/ipmi/ipmi_bt.c | 14 ++
 hw/ipmi/ipmi_extern.c | 42 ++
 hw/ipmi/ipmi_kcs.c| 15 +++
 hw/ipmi/ipmi_sim.c| 30 ++
 hw/ipmi/isa_ipmi.c| 12 
 7 files changed, 128 insertions(+), 4 deletions(-)

diff --git a/hw/ipmi/ipmi.c b/hw/ipmi/ipmi.c
index 21560a9..5f04085 100644
--- a/hw/ipmi/ipmi.c
+++ b/hw/ipmi/ipmi.c
@@ -150,6 +150,23 @@ int ipmi_bmc_init(IPMIBmc *s)
 return 0;
 }
 
+const VMStateDescription vmstate_IPMIInterface = {
+.name = TYPE_IPMI_INTERFACE,
+.version_id = 1,
+.minimum_version_id = 1,
+.fields  = (VMStateField[]) {
+VMSTATE_BOOL(obf_irq_set, IPMIInterface),
+VMSTATE_BOOL(atn_irq_set, IPMIInterface),
+VMSTATE_BOOL(use_irq, IPMIInterface),
+VMSTATE_BOOL(irqs_enabled, IPMIInterface),
+VMSTATE_UINT32(outpos, IPMIInterface),
+VMSTATE_UINT32(outlen, IPMIInterface),
+VMSTATE_VBUFFER_UINT32(inmsg, IPMIInterface, 1, NULL, 0, inlen),
+VMSTATE_BOOL(write_end, IPMIInterface),
+VMSTATE_END_OF_LIST()
+}
+};
+
 static TypeInfo ipmi_bmc_type_info = {
 .name = TYPE_IPMI_BMC,
 .parent = TYPE_OBJECT,
diff --git a/hw/ipmi/ipmi.h b/hw/ipmi/ipmi.h
index 8838023..a7cd4a3 100644
--- a/hw/ipmi/ipmi.h
+++ b/hw/ipmi/ipmi.h
@@ -187,6 +187,8 @@ typedef struct IPMIInterfaceClass {
unsigned char *rsp, unsigned int rsp_len);
 } IPMIInterfaceClass;
 
+extern const VMStateDescription vmstate_IPMIInterface;
+
 int ipmi_interface_init(IPMIInterface *s);
 void ipmi_interface_reset(IPMIInterface *s);
 
diff --git a/hw/ipmi/ipmi_bt.c b/hw/ipmi/ipmi_bt.c
index ec256f4..b7ffdcf 100644
--- a/hw/ipmi/ipmi_bt.c
+++ b/hw/ipmi/ipmi_bt.c
@@ -327,6 +327,19 @@ static void ipmi_bt_set_atn(IPMIInterface *s, int val, int 
irq)
 }
 }
 
+static const VMStateDescription vmstate_ipmi_bt = {
+.name = TYPE_IPMI_INTERFACE_BT,
+.version_id = 1,
+.minimum_version_id = 1,
+.fields  = (VMStateField[]) {
+VMSTATE_UINT8(control_reg, IPMIBtInterface),
+VMSTATE_UINT8(mask_reg, IPMIBtInterface),
+VMSTATE_UINT8(waiting_rsp, IPMIBtInterface),
+VMSTATE_UINT8(waiting_seq, IPMIBtInterface),
+VMSTATE_END_OF_LIST()
+}
+};
+
 static int ipmi_bt_init(IPMIInterface *s)
 {
 IPMIBtInterface *bt = IPMI_INTERFACE_BT(s);
@@ -337,6 +350,7 @@ static int ipmi_bt_init(IPMIInterface *s)
 s-io_length = 3;
 
 memory_region_init_io(s-io, OBJECT(s), ipmi_bt_io_ops, bt, ipmi-bt, 
3);
+vmstate_register(NULL, 0, vmstate_ipmi_bt, bt);
 
 return 0;
 }
diff --git a/hw/ipmi/ipmi_extern.c b/hw/ipmi/ipmi_extern.c
index 1cd7c11..d66797d 100644
--- a/hw/ipmi/ipmi_extern.c
+++ b/hw/ipmi/ipmi_extern.c
@@ -63,10 +63,10 @@ typedef struct IPMIExternBmc {
 
 unsigned char inbuf[MAX_IPMI_MSG_SIZE + 2];
 unsigned int inpos;
-int in_escape;
-int in_too_many;
-int waiting_rsp;
-int sending_cmd;
+bool in_escape;
+bool in_too_many;
+bool waiting_rsp;
+bool sending_cmd;
 
 unsigned char outbuf[(MAX_IPMI_MSG_SIZE + 2) * 2 + 1];
 unsigned int outpos;
@@ -442,6 +442,39 @@ static void ipmi_extern_handle_reset(IPMIBmc *b)
 ipmi_unlock(s);
 }
 
+static int ipmi_extern_post_migrate(void *opaque, int version_id)
+{
+IPMIExternBmc *es = opaque;
+
+/*
+ * We don't directly restore waiting_rsp, Instead, we return an
+ * error on the interface if a response was being waited for.
+ */
+if (es-waiting_rsp) {
+IPMIInterface *s = es-parent.intf;
+IPMIInterfaceClass *k = IPMI_INTERFACE_GET_CLASS(s);
+
+es-waiting_rsp = 0;
+es-inbuf[1] = es-outbuf[1] | 0x04;
+es-inbuf[2] = es-outbuf[2];
+es-inbuf[3] = IPMI_CC_BMC_INIT_IN_PROGRESS;
+k-handle_rsp(s, es-outbuf[0], es-inbuf + 1, 3);
+}
+return 0;
+}
+
+static const VMStateDescription vmstate_ipmi_extern = {
+.name = TYPE_IPMI_BMC_EXTERN,
+.version_id = 1,
+.minimum_version_id = 1,
+.post_load = ipmi_extern_post_migrate,
+.fields  = (VMStateField[]) {
+VMSTATE_BOOL(send_reset, IPMIExternBmc),
+VMSTATE_BOOL(waiting_rsp, IPMIExternBmc),
+VMSTATE_END_OF_LIST()
+}
+};
+
 static int ipmi_extern_init(IPMIBmc *b)
 {
 IPMIExternBmc *es = IPMI_BMC_EXTERN(b);
@@ -449,6 +482,7 @@ static int ipmi_extern_init(IPMIBmc *b)
 es-extern_timer = timer_new(QEMU_CLOCK_REALTIME, SCALE_NS,
 extern_timeout, es);
 qemu_chr_add_handlers(es-parent.chr, can_receive, receive, chr_event, es);
+vmstate_register(NULL, 0, vmstate_ipmi_extern, es);
 return 0;
 }
 
diff --git a/hw/ipmi/ipmi_kcs.c b/hw/ipmi/ipmi_kcs.c
index 056a369..81f0709 100644
--- a/hw/ipmi/ipmi_kcs.c
+++ b/hw/ipmi/ipmi_kcs.c
@@ -304,6 +304,20

Re: [Qemu-devel] [PATCH 02/16] qemu-char: Allow a chardev to reconnect if disconnected

2013-11-12 Thread Eric Blake

On 11/12/2013 09:33 AM, Corey Minyard wrote:
 Allow a socket that connects to reconnect on a periodic basis if it
 fails to connect at startup or if the connection drops while in use.
 
 Signed-off-by: Corey Minyard cminy...@mvista.com
 ---
  include/sysemu/char.h |  3 ++
  qemu-char.c   | 88 
 ---
  qemu-options.hx   | 11 +--
  3 files changed, 87 insertions(+), 15 deletions(-)
 

 +++ b/qemu-options.hx
 @@ -1780,8 +1780,9 @@ ETEXI
  DEF(chardev, HAS_ARG, QEMU_OPTION_chardev,
  -chardev null,id=id[,mux=on|off]\n
  -chardev 
 socket,id=id[,host=host],port=host[,to=to][,ipv4][,ipv6][,nodelay]\n
 - [,server][,nowait][,telnet][,mux=on|off] (tcp)\n
 --chardev socket,id=id,path=path[,server][,nowait][,telnet],[mux=on|off] 
 (unix)\n
 + [,server][,nowait][,telnet][,mux=on|off][,reconnect=seconds] 
 (tcp)\n
 +-chardev 
 socket,id=id,path=path[,server][,nowait][,telnet][,mux=on|off]\n
 + [,reconnect=seconds] (unix)\n

 +@option{reconnect} specifies that if the socket does not come up at startup,
 +or if the socket is closed for some reason (like the other end exited),
 +wait the given number of seconds and attempt to reconnect.

Sounds cool.  Are you planning on also adding the QMP counterpart for
specifying this option when doing hotplugs of a chardev?  Does reconnect
make any sense when not using server mode?

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

[Qemu-devel] [PATCH 09/16] ipmi: Add a local BMC simulation

2013-11-12 Thread Corey Minyard

This provides a minimal local BMC, basically enough to comply with the
spec and provide a complete watchdog timer (including a sensor, SDR,
and event).

Signed-off-by: Corey Minyard cminy...@mvista.com
---
 default-configs/i386-softmmu.mak   |1 +
 default-configs/x86_64-softmmu.mak |1 +
 hw/ipmi/Makefile.objs  |1 +
 hw/ipmi/ipmi_sim.c | 1552 
 4 files changed, 1555 insertions(+)
 create mode 100644 hw/ipmi/ipmi_sim.c

diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index ea3a3b4..fd38dc1 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -14,6 +14,7 @@ CONFIG_IPMI=y
 CONFIG_ISA_IPMI=y
 CONFIG_IPMI_KCS=y
 CONFIG_IPMI_BT=y
+CONFIG_IPMI_LOCAL=y
 CONFIG_SERIAL=y
 CONFIG_PARALLEL=y
 CONFIG_I8254=y
diff --git a/default-configs/x86_64-softmmu.mak 
b/default-configs/x86_64-softmmu.mak
index 89efe84..941b08c 100644
--- a/default-configs/x86_64-softmmu.mak
+++ b/default-configs/x86_64-softmmu.mak
@@ -14,6 +14,7 @@ CONFIG_IPMI=y
 CONFIG_ISA_IPMI=y
 CONFIG_IPMI_KCS=y
 CONFIG_IPMI_BT=y
+CONFIG_IPMI_LOCAL=y
 CONFIG_SERIAL=y
 CONFIG_PARALLEL=y
 CONFIG_I8254=y
diff --git a/hw/ipmi/Makefile.objs b/hw/ipmi/Makefile.objs
index a8cd463..2366160 100644
--- a/hw/ipmi/Makefile.objs
+++ b/hw/ipmi/Makefile.objs
@@ -2,3 +2,4 @@ common-obj-$(CONFIG_IPMI) += ipmi.o
 common-obj-$(CONFIG_ISA_IPMI) += isa_ipmi.o
 common-obj-$(CONFIG_IPMI_KCS) += ipmi_kcs.o
 common-obj-$(CONFIG_IPMI_BT) += ipmi_bt.o
+common-obj-$(CONFIG_IPMI_LOCAL) += ipmi_sim.o
diff --git a/hw/ipmi/ipmi_sim.c b/hw/ipmi/ipmi_sim.c
new file mode 100644
index 000..327a01c
--- /dev/null
+++ b/hw/ipmi/ipmi_sim.c
@@ -0,0 +1,1552 @@
+/*
+ * IPMI BMC emulation
+ *
+ * Copyright (c) 2012 Corey Minyard, MontaVista Software, LLC
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the Software), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include stdio.h
+#include string.h
+#include stdint.h
+#include qemu/timer.h
+#include ipmi.h
+
+#define IPMI_NETFN_CHASSIS0x00
+#define IPMI_NETFN_CHASSIS_MAXCMD 0x03
+
+#define IPMI_CMD_GET_CHASSIS_CAPABILITIES 0x00
+#define IPMI_CMD_GET_CHASSIS_STATUS   0x01
+#define IPMI_CMD_CHASSIS_CONTROL  0x02
+
+#define IPMI_NETFN_SENSOR_EVENT   0x04
+#define IPMI_NETFN_SENSOR_EVENT_MAXCMD0x2e
+
+#define IPMI_CMD_SET_SENSOR_EVT_ENABLE0x28
+#define IPMI_CMD_GET_SENSOR_EVT_ENABLE0x29
+#define IPMI_CMD_REARM_SENSOR_EVTS0x2a
+#define IPMI_CMD_GET_SENSOR_EVT_STATUS0x2b
+#define IPMI_CMD_GET_SENSOR_READING   0x2d
+
+/* #define IPMI_NETFN_APP 0x06 In ipmi.h */
+#define IPMI_NETFN_APP_MAXCMD 0x36
+
+#define IPMI_CMD_GET_DEVICE_ID0x01
+#define IPMI_CMD_RESET_WATCHDOG_TIMER 0x22
+#define IPMI_CMD_SET_WATCHDOG_TIMER   0x24
+#define IPMI_CMD_GET_WATCHDOG_TIMER   0x25
+#define IPMI_CMD_SET_BMC_GLOBAL_ENABLES   0x2e
+#define IPMI_CMD_GET_BMC_GLOBAL_ENABLES   0x2f
+#define IPMI_CMD_CLR_MSG_FLAGS0x30
+#define IPMI_CMD_GET_MSG_FLAGS0x31
+#define IPMI_CMD_READ_EVT_MSG_BUF 0x35
+
+#define IPMI_NETFN_STORAGE0x0a
+#define IPMI_NETFN_STORAGE_MAXCMD 0x4a
+
+#define IPMI_CMD_GET_SDR_REP_INFO 0x20
+#define IPMI_CMD_GET_SDR_REP_ALLOC_INFO   0x21
+#define IPMI_CMD_RESERVE_SDR_REP  0x22
+#define IPMI_CMD_GET_SDR  0x23
+#define IPMI_CMD_ADD_SDR  0x24
+#define IPMI_CMD_PARTIAL_ADD_SDR  0x25
+#define IPMI_CMD_DELETE_SDR   0x26
+#define IPMI_CMD_CLEAR_SDR_REP0x27
+#define IPMI_CMD_GET_SDR_REP_TIME 0x28
+#define IPMI_CMD_SET_SDR_REP_TIME 0x29
+#define IPMI_CMD_ENTER_SDR_REP_UPD_MODE   0x2A
+#define IPMI_CMD_EXIT_SDR_REP_UPD_MODE0x2B
+#define IPMI_CMD_RUN_INIT_AGENT   0x2C
+#define IPMI_CMD_GET_SEL_INFO 0x40
+#define IPMI_CMD_GET_SEL_ALLOC_INFO   0x41
+#define IPMI_CMD_RESERVE_SEL

[Qemu-devel] snapshots [was: Qemu-devel Digest, Vol 128, Issue 93]

2013-11-12 Thread Eric Blake

On 11/05/2013 08:27 PM, 宫文超 wrote:
 hello evryone,who can give me a detail explain of the qemu code of the online 
 snapshot?THX

Top-posting and replying to a digest message (especially without
changing the subject line) is poor netiquette.  Better would have been
starting a new thread.

Your question is rather broad; an online snapshot is currently done via
the 'savevm' HMP monitor command, so your best bet might be to step
through that code, or ask more specific questions.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

[Qemu-devel] [PATCH 02/16] qemu-char: Allow a chardev to reconnect if disconnected

2013-11-12 Thread Corey Minyard

Allow a socket that connects to reconnect on a periodic basis if it
fails to connect at startup or if the connection drops while in use.

Signed-off-by: Corey Minyard cminy...@mvista.com
---
 include/sysemu/char.h |  3 ++
 qemu-char.c   | 88 ---
 qemu-options.hx   | 11 +--
 3 files changed, 87 insertions(+), 15 deletions(-)

diff --git a/include/sysemu/char.h b/include/sysemu/char.h
index 44baf0f..3d21b5a 100644
--- a/include/sysemu/char.h
+++ b/include/sysemu/char.h
@@ -81,6 +81,9 @@ struct CharDriverState {
 guint fd_in_tag;
 QemuOpts *opts;
 QTAILQ_ENTRY(CharDriverState) next;
+GSList *backend;
+QEMUTimer *recon_timer;
+uint64_t recon_time;
 };
 
 /**
diff --git a/qemu-char.c b/qemu-char.c
index dd04cc0..23d7647 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -96,9 +96,22 @@ void qemu_chr_be_event(CharDriverState *s, int event)
 /* Keep track if the char device is open */
 switch (event) {
 case CHR_EVENT_OPENED:
+if (s-recon_timer) {
+timer_del(s-recon_timer);
+}
 s-be_open = 1;
 break;
 case CHR_EVENT_CLOSED:
+if (s-recon_timer) {
+void (*chr_close)(struct CharDriverState *chr) = s-chr_close;
+if (chr_close) {
+s-chr_close = NULL;
+chr_close(s);
+}
+timer_mod(s-recon_timer,
+ (get_clock() +
+  (s-recon_time * get_ticks_per_sec(;
+}
 s-be_open = 0;
 break;
 }
@@ -2583,6 +2596,7 @@ static void tcp_chr_close(CharDriverState *chr)
 closesocket(s-listen_fd);
 }
 g_free(s);
+chr-opaque = NULL;
 qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
 }
 
@@ -2642,8 +2656,6 @@ static CharDriverState 
*qemu_chr_open_socket_fd(CharDriverState *chr,
 chr-get_msgfd = tcp_get_msgfd;
 chr-chr_add_client = tcp_chr_add_client;
 chr-chr_add_watch = tcp_chr_add_watch;
-/* be isn't opened until we get a connection */
-chr-explicit_be_open = true;
 
 if (is_listen) {
 s-listen_fd = fd;
@@ -2681,6 +2693,9 @@ static CharDriverState 
*qemu_chr_open_socket(CharDriverState *chr,
 bool do_nodelay = !qemu_opt_get_bool(opts, delay, true);
 bool is_unix= qemu_opt_get(opts, path) != NULL;
 
+/* be isn't opened until we get a connection */
+chr-explicit_be_open = true;
+
 if (is_unix) {
 if (is_listen) {
 fd = unix_listen_opts(opts, local_err);
@@ -2717,8 +2732,9 @@ static CharDriverState 
*qemu_chr_open_socket(CharDriverState *chr,
 if (fd = 0) {
 closesocket(fd);
 }
-if (chr) {
+if (chr  chr-opaque) {
 g_free(chr-opaque);
+chr-opaque = NULL;
 }
 return NULL;
 }
@@ -3163,6 +3179,20 @@ void register_char_driver_qapi(const char *name, 
ChardevBackendKind kind,
 backends = g_slist_append(backends, s);
 }
 
+static void recon_timeout(void *opaque)
+{
+CharDriverState *chr = opaque;
+CharDriver *cd = chr-backend-data;
+
+if (chr-be_open) {
+return;
+}
+
+timer_mod(chr-recon_timer,
+ (get_clock() + (chr-recon_time * get_ticks_per_sec(;
+cd-open(chr, chr-opts);
+}
+
 CharDriverState *qemu_chr_new_from_opts(QemuOpts *opts,
 void (*init)(struct CharDriverState *s),
 Error **errp)
@@ -3245,11 +3275,36 @@ CharDriverState *qemu_chr_new_from_opts(QemuOpts *opts,
 }
 
 chr = g_malloc0(sizeof(CharDriverState));
-chr = cd-open(chr, opts);
-if (!chr) {
-error_setg(errp, chardev: opening backend \%s\ failed,
-   qemu_opt_get(opts, backend));
-goto err;
+
+chr-backend = i;
+chr-recon_time = qemu_opt_get_number(opts, reconnect, 0);
+if (chr-recon_time) {
+if (strcmp(qemu_opt_get(opts, backend), socket) != 0) {
+g_free(chr);
+fprintf(stderr, chardev: reconnect only supported on sockets\n);
+return NULL;
+}
+if (qemu_opt_get_bool(opts, server, 0)) {
+g_free(chr);
+fprintf(stderr, chardev: server device cannot reconnect\n);
+return NULL;
+}
+chr-opts = opts;
+chr-recon_timer = timer_new(QEMU_CLOCK_REALTIME, SCALE_NS,
+recon_timeout, chr);
+
+/* Make sure it connects in time. */
+timer_mod(chr-recon_timer,
+ (get_clock() + (chr-recon_time * get_ticks_per_sec(;
+}
+
+if (!cd-open(chr, opts)) {
+if (!chr-recon_time) {
+/* Reconnect is not enabled, give up */
+fprintf(stderr, chardev: opening backend \%s\ failed\n,
+qemu_opt_get(opts, backend));
+return NULL;
+}
 }
 
 if (!chr-filename)

1 2 >

1 - 100 of 177 matches

Mail list logo