[Qemu-devel] [PATCH 11/12] hw/omap1.c : separate uart module

2010-05-29 Thread cmchao

Signed-off-by: cmchao cmc...@gmail.com
---
 Makefile.target |3 +-
 hw/omap1.c  |  170 
 hw/omap_uart.c  |  194 +++
 3 files changed, 196 insertions(+), 171 deletions(-)
 create mode 100644 hw/omap_uart.c

diff --git a/Makefile.target b/Makefile.target
index 20bcb8a..a01daa4 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -263,7 +263,8 @@ obj-arm-y += pxa2xx.o pxa2xx_pic.o pxa2xx_gpio.o 
pxa2xx_timer.o pxa2xx_dma.o
 obj-arm-y += pxa2xx_lcd.o pxa2xx_mmci.o pxa2xx_pcmcia.o pxa2xx_keypad.o
 obj-arm-y += gumstix.o
 obj-arm-y += zaurus.o ide/microdrive.o spitz.o tosa.o tc6393xb.o
-obj-arm-y += omap1.o omap_lcdc.o omap_dma.o omap_clk.o omap_mmc.o omap_i2c.o 
omap_gpio.o omap_intc.o
+obj-arm-y += omap1.o omap_lcdc.o omap_dma.o omap_clk.o omap_mmc.o omap_i2c.o \
+   omap_gpio.o omap_intc.o omap_uart.o
 obj-arm-y += omap2.o omap_dss.o soc_dma.o omap_gptimer.o omap_synctimer.o \
omap_gpmc.o omap_sdrc.o omap_spi.o omap_tap.o omap_l4.o
 obj-arm-y += omap_sx1.o palm.o tsc210x.o
diff --git a/hw/omap1.c b/hw/omap1.c
index 21c53fe..301eec5 100644
--- a/hw/omap1.c
+++ b/hw/omap1.c
@@ -1378,176 +1378,6 @@ static void omap_dpll_init(struct dpll_ctl_s *s, 
target_phys_addr_t base,
 cpu_register_physical_memory(base, 0x100, iomemtype);
 }
 
-/* UARTs */
-struct omap_uart_s {
-target_phys_addr_t base;
-SerialState *serial; /* TODO */
-struct omap_target_agent_s *ta;
-omap_clk fclk;
-qemu_irq irq;
-
-uint8_t eblr;
-uint8_t syscontrol;
-uint8_t wkup;
-uint8_t cfps;
-uint8_t mdr[2];
-uint8_t scr;
-uint8_t clksel;
-};
-
-void omap_uart_reset(struct omap_uart_s *s)
-{
-s-eblr = 0x00;
-s-syscontrol = 0;
-s-wkup = 0x3f;
-s-cfps = 0x69;
-s-clksel = 0;
-}
-
-struct omap_uart_s *omap_uart_init(target_phys_addr_t base,
-qemu_irq irq, omap_clk fclk, omap_clk iclk,
-qemu_irq txdma, qemu_irq rxdma, CharDriverState *chr)
-{
-struct omap_uart_s *s = (struct omap_uart_s *)
-qemu_mallocz(sizeof(struct omap_uart_s));
-
-s-base = base;
-s-fclk = fclk;
-s-irq = irq;
-#ifdef TARGET_WORDS_BIGENDIAN
-s-serial = serial_mm_init(base, 2, irq, omap_clk_getrate(fclk)/16,
-   chr ?: qemu_chr_open(null, null, NULL), 1,
-   1);
-#else
-s-serial = serial_mm_init(base, 2, irq, omap_clk_getrate(fclk)/16,
-   chr ?: qemu_chr_open(null, null, NULL), 1,
-   0);
-#endif
-return s;
-}
-
-static uint32_t omap_uart_read(void *opaque, target_phys_addr_t addr)
-{
-struct omap_uart_s *s = (struct omap_uart_s *) opaque;
-
-addr = 0xff;
-switch (addr) {
-case 0x20: /* MDR1 */
-return s-mdr[0];
-case 0x24: /* MDR2 */
-return s-mdr[1];
-case 0x40: /* SCR */
-return s-scr;
-case 0x44: /* SSR */
-return 0x0;
-case 0x48: /* EBLR (OMAP2) */
-return s-eblr;
-case 0x4C: /* OSC_12M_SEL (OMAP1) */
-return s-clksel;
-case 0x50: /* MVR */
-return 0x30;
-case 0x54: /* SYSC (OMAP2) */
-return s-syscontrol;
-case 0x58: /* SYSS (OMAP2) */
-return 1;
-case 0x5c: /* WER (OMAP2) */
-return s-wkup;
-case 0x60: /* CFPS (OMAP2) */
-return s-cfps;
-}
-
-OMAP_BAD_REG(addr);
-return 0;
-}
-
-static void omap_uart_write(void *opaque, target_phys_addr_t addr,
-uint32_t value)
-{
-struct omap_uart_s *s = (struct omap_uart_s *) opaque;
-
-addr = 0xff;
-switch (addr) {
-case 0x20: /* MDR1 */
-s-mdr[0] = value  0x7f;
-break;
-case 0x24: /* MDR2 */
-s-mdr[1] = value  0xff;
-break;
-case 0x40: /* SCR */
-s-scr = value  0xff;
-break;
-case 0x48: /* EBLR (OMAP2) */
-s-eblr = value  0xff;
-break;
-case 0x4C: /* OSC_12M_SEL (OMAP1) */
-s-clksel = value  1;
-break;
-case 0x44: /* SSR */
-case 0x50: /* MVR */
-case 0x58: /* SYSS (OMAP2) */
-OMAP_RO_REG(addr);
-break;
-case 0x54: /* SYSC (OMAP2) */
-s-syscontrol = value  0x1d;
-if (value  2)
-omap_uart_reset(s);
-break;
-case 0x5c: /* WER (OMAP2) */
-s-wkup = value  0x7f;
-break;
-case 0x60: /* CFPS (OMAP2) */
-s-cfps = value  0xff;
-break;
-default:
-OMAP_BAD_REG(addr);
-}
-}
-
-static CPUReadMemoryFunc * const omap_uart_readfn[] = {
-omap_uart_read,
-omap_uart_read,
-omap_badwidth_read8,
-};
-
-static CPUWriteMemoryFunc * const omap_uart_writefn[] = {
-omap_uart_write,
-omap_uart_write,
-omap_badwidth_write8,
-};
-
-struct omap_uart_s *omap2_uart_init(struct omap_target_agent_s *ta,
-qemu_irq irq, omap_clk fclk, omap_clk iclk,
-   

[Qemu-devel] [PATCH 08/12] hw/omap2.c : separate spi module

2010-05-29 Thread cmchao

Signed-off-by: cmchao cmc...@gmail.com
---
 Makefile.target |2 +-
 hw/omap.h   |2 +
 hw/omap2.c  |  323 ---
 hw/omap_spi.c   |  346 +++
 4 files changed, 349 insertions(+), 324 deletions(-)
 create mode 100644 hw/omap_spi.c

diff --git a/Makefile.target b/Makefile.target
index 9a309e2..1edec6f 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -264,7 +264,7 @@ obj-arm-y += pxa2xx_lcd.o pxa2xx_mmci.o pxa2xx_pcmcia.o 
pxa2xx_keypad.o
 obj-arm-y += gumstix.o
 obj-arm-y += zaurus.o ide/microdrive.o spitz.o tosa.o tc6393xb.o
 obj-arm-y += omap1.o omap_lcdc.o omap_dma.o omap_clk.o omap_mmc.o omap_i2c.o 
omap_gpio.o omap_intc.o
-obj-arm-y += omap2.o omap_dss.o soc_dma.o omap_gptimer.o omap_synctimer.o 
omap_gpmc.o omap_sdrc.o
+obj-arm-y += omap2.o omap_dss.o soc_dma.o omap_gptimer.o omap_synctimer.o 
omap_gpmc.o omap_sdrc.o omap_spi.o
 obj-arm-y += omap_sx1.o palm.o tsc210x.o
 obj-arm-y += nseries.o blizzard.o onenand.o vga.o cbus.o tusb6010.o usb-musb.o
 obj-arm-y += mst_fpga.o mainstone.o
diff --git a/hw/omap.h b/hw/omap.h
index ea23ec9..fef495a 100644
--- a/hw/omap.h
+++ b/hw/omap.h
@@ -706,12 +706,14 @@ struct omap_uwire_s *omap_uwire_init(target_phys_addr_t 
base,
 void omap_uwire_attach(struct omap_uwire_s *s,
 uWireSlave *slave, int chipselect);
 
+/* OMAP2 spi */
 struct omap_mcspi_s;
 struct omap_mcspi_s *omap_mcspi_init(struct omap_target_agent_s *ta, int chnum,
 qemu_irq irq, qemu_irq *drq, omap_clk fclk, omap_clk iclk);
 void omap_mcspi_attach(struct omap_mcspi_s *s,
 uint32_t (*txrx)(void *opaque, uint32_t, int), void *opaque,
 int chipselect);
+void omap_mcspi_reset(struct omap_mcspi_s *s);
 
 struct omap_rtc_s;
 struct omap_rtc_s *omap_rtc_init(target_phys_addr_t base,
diff --git a/hw/omap2.c b/hw/omap2.c
index e6d1b52..ae6394e 100644
--- a/hw/omap2.c
+++ b/hw/omap2.c
@@ -27,329 +27,6 @@
 #include soc_dma.h
 #include audio/audio.h
 
-/* Multichannel SPI */
-struct omap_mcspi_s {
-qemu_irq irq;
-int chnum;
-
-uint32_t sysconfig;
-uint32_t systest;
-uint32_t irqst;
-uint32_t irqen;
-uint32_t wken;
-uint32_t control;
-
-struct omap_mcspi_ch_s {
-qemu_irq txdrq;
-qemu_irq rxdrq;
-uint32_t (*txrx)(void *opaque, uint32_t, int);
-void *opaque;
-
-uint32_t tx;
-uint32_t rx;
-
-uint32_t config;
-uint32_t status;
-uint32_t control;
-} ch[4];
-};
-
-static inline void omap_mcspi_interrupt_update(struct omap_mcspi_s *s)
-{
-qemu_set_irq(s-irq, s-irqst  s-irqen);
-}
-
-static inline void omap_mcspi_dmarequest_update(struct omap_mcspi_ch_s *ch)
-{
-qemu_set_irq(ch-txdrq,
-(ch-control  1)/* EN */
-(ch-config  (1  14)) /* DMAW */
-(ch-status  (1  1))  /* TXS */
-((ch-config  12)  3) != 1);/* TRM */
-qemu_set_irq(ch-rxdrq,
-(ch-control  1)/* EN */
-(ch-config  (1  15)) /* DMAW */
-(ch-status  (1  0))  /* RXS */
-((ch-config  12)  3) != 2);/* TRM */
-}
-
-static void omap_mcspi_transfer_run(struct omap_mcspi_s *s, int chnum)
-{
-struct omap_mcspi_ch_s *ch = s-ch + chnum;
-
-if (!(ch-control  1))/* EN */
-return;
-if ((ch-status  (1  0))  /* RXS */
-((ch-config  12)  3) != 2/* TRM */
-!(ch-config  (1  19))) /* TURBO */
-goto intr_update;
-if ((ch-status  (1  1))  /* TXS */
-((ch-config  12)  3) != 1) /* TRM */
-goto intr_update;
-
-if (!(s-control  1) ||   /* SINGLE */
-(ch-config  (1  20))) {/* FORCE */
-if (ch-txrx)
-ch-rx = ch-txrx(ch-opaque, ch-tx,  /* WL */
-1 + (0x1f  (ch-config  7)));
-}
-
-ch-tx = 0;
-ch-status |= 1  2;  /* EOT */
-ch-status |= 1  1;  /* TXS */
-if (((ch-config  12)  3) != 2) /* TRM */
-ch-status |= 1  0;  /* RXS */
-
-intr_update:
-if ((ch-status  (1  0))  /* RXS */
-((ch-config  12)  3) != 2/* TRM */
-!(ch-config  (1  19))) /* TURBO */
-s-irqst |= 1  (2 + 4 * chnum);  /* RX_FULL */
-if ((ch-status  (1  1))  /* TXS */
-((ch-config  12)  3) != 1) /* TRM */
-s-irqst |= 1  (0 + 4 * chnum);  /* TX_EMPTY */
-omap_mcspi_interrupt_update(s);
-

Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback

2010-05-29 Thread Gleb Natapov
On Sat, May 29, 2010 at 09:15:11AM +, Blue Swirl wrote:
  There is no code, because we're still at architecture design stage.
 
  Try to write test code to understand the problem better.
 
 I will.
 
Please do ASAP. This discussion shows that you don't understand what is the
problem that we are dialing with.

guests could also be assisted with special handling (like win2k
install hack), for example guest instructions could be counted
(approximately, for example using TB size or TSC) and only inject
after at least N instructions have passed.
Guest instructions cannot be easily counted in KVM (it can be done 
more
or less reliably using perf counters, may be).
  
   Aren't there any debug registers or perf counters, which can generate
   an interrupt after some number of instructions have been executed?
   Don't think debug registers have something like that and they are
   available for guest use anyway. Perf counters differs greatly from CPU
   to CPU (even between two CPUs of the same manufacturer), and we want to
   keep using them for profiling guests. And I don't see what problem it
   will solve anyway that can be solved by simple delay between irq
   reinjection.
 
  This would allow counting the executed instructions and limit it. Thus
  we could emulate a 500MHz CPU on a 2GHz CPU more accurately.
 
  Why would you want to limit number of instruction executed by guest if
  CPU has nothing else to do anyway? The problem occurs not when we have
  spare cycles so give to a guest, but in opposite case.
 
 I think one problem is that the guest has executed too much compared
 to what would happen with real HW with a lesser CPU. That explains the
 RTC frequency reprogramming case.
You think wrong. The problem is exactly opposite: the guest haven't
had enough execution time between two time interrupts. I don't know what
RTC frequency reprogramming case you are talking about here.

 
 
  
   
 And even if the rate did not matter, the APIC woult still have to 
 now
 about the fact that an IRQ is really periodic and does not only 
 appear
 as such for a certain interval. This really does not sound like
 simplifying things or even make them cleaner.
   
It would, the voodoo would be contained only in APIC, RTC would be
just like any other device. With the bidirectional irqs, this voodoo
would probably eventually spread to many other devices. The logical
conclusion of that would be a system where all devices would be
careful not to disturb the guest at wrong moment because that would
trigger a bug.
   
This voodoo will be so complex and unreliable that it will make RTC 
hack
pale in comparison (and I still don't see how you are going to make it
actually work).
  
   Implement everything inside APIC: only coalescing and reinjection.
   APIC has zero info needed to implement reinjection correctly as was
   shown to you several time in this thread and you simply keep ignoring
   it.
 
  On the contrary, APIC is actually the only source of the IRQ ack
  information. RTC hack would not work without APIC (or the
  bidirectional IRQ) passing this info to RTC.
 
  What APIC doesn't have now is the timer frequency or period info. This
  is known by RTC and also higher levels managing the clocks.
 
  So APIC has one bit of information and RTC everything else.
 
 The information known by RTC (timer period) is also known by higher levels.
 
What do you mean by higher level here? vl.c or APIC.

  The current
  approach (and proposed patch) brings this one bit of information to RTC,
  you are arguing that RTC should be able to communicate all its info to
  APIC. Sorry I don't see that your way has any advantage. Just more
  complex interface and it is much easier to get it wrong for other time
  sources.
 
 I don't think anymore that APIC should be handling this but the
 generic stuff, like vl.c or exec.c. Then there would be only
 information passing from APIC to higher levels.
Handling reinjection by general timer code makes infinitely more sense
then handling it in APIC. One thing (from the top of my head) that can't
be implemented at that level is injection of interrupt back to back (i.e
injecting next interrupt immediately after guest acknowledge previous
one to RTC).

 
  I keep ignoring the idea that the current model, where both RTC and
  APIC must somehow work together to make coalescing work, is the only
  possible just because it is committed and it happens to work in some
  cases. It would be much better to concentrate this to one place, APIC
  or preferably higher level where it may benefit other timers too.
  Provided of course that the other models can be made to work.
 
  So write the code and show us. You haven't show any evidence that RTC is
  the wrong place. RTC knows when interrupt was acknowledge to RTC, it
  know when clock frequency changes, it know when device reset happened.
  APIC knows only that 

Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback

2010-05-29 Thread Gleb Natapov
On Sat, May 29, 2010 at 09:35:30AM +, Blue Swirl wrote:
  I still don't see how the alternative is supposed to simplify our life
  or improve the efficiency of the de-coalescing workaround. It's rather
  problematic like Gleb pointed out: The de-coalescing logic needs to be
  informed about periodicity changes that can only be delivered along
  IRQs. So what to do with the backlog when the timer is stopped?
 
 What happens with the current design? Gleb only mentioned the
 frequency change, I thought that was not so big problem. But I don't
 think this case should be allowed happen at all, it can't exist on
 real HW.
 
Hm, why it can't exist on real HW? Do simple exercise. Run WindowsXP
inside QEMU, connect with gdb to QEMU process and check what frequency
RTC configured with (hint: it will be 64Hz), now run video inside the
guest and check frequency again (hint: it will be 1Khz).

--
Gleb.



Re: [Qemu-devel] [PATCH] virtio-blk: Avoid zeroing every request structure

2010-05-29 Thread Jamie Lokier
Alexander Graf wrote:
 Anthony Liguori wrote:
  I'd prefer to stick to bug fixes for stable releases.  Performance
  improvements are a good motivation for people to upgrade to 0.13 :-)
 
 In general I agree, but this one looks like a really simple one.

Besides, there are too many reported guest regressions at the moment
to upgrade if using any of them.

-- Jamie



[Qemu-devel] Re: [SeaBIOS] SMBIOS strings

2010-05-29 Thread Sebastian Herbszt

Jes Sorensen wrote:

Hi,

We were looking at the dmidecode output from qemu-kvm pre-seabios and
current qemu-kvm and noticed some of the strings have changed.

The main problem with this is that certain OSes are quite sensitive to
system changes and avoiding to change things unnecessarily would
probably be a good thing.


Which OSes do care? Windows only?


I wanted to check with the lists if there are any strong feelings about
this, and whether some of these changes were made for specific reasons?

For example:

Handle 0x, DMI type 0, 24 bytes
BIOS Information
-   Vendor: QEMU
-   Version: QEMU
+   Vendor: Bochs
+   Version: Bochs
   Release Date: 01/01/2007
   Address: 0xE8000
   Runtime Size: 96 kB


You can configure this with CONFIG_APPNAME.


and this:

Handle 0x0401, DMI type 4, 32 bytes
Processor Information
-   Socket Designation: CPU 1
+   Socket Designation: CPU01
   Type: Central Processor
   Family: Other
-   Manufacturer: QEMU
-   ID: 63 06 00 00 FD FB 8B 07
+   Manufacturer: Bochs
+   ID: 23 06 00 00 FD FB 8B 07
   Version: Not Specified
   Voltage: Unknown
   External Clock: Unknown

I guess the Socket Designation in particular might have been done for a
reason?

Otherwise, if there are no objections, I'll look at adding some patches
to make it more backwards compatible.

Cheers,
Jes


Is the different ID displayed on the same VM configuration (esp. -cpu option) ?
The value is gained by calling CPUID so it should not be different.

Which pre-seabios qemu-kvm bios are you comparing to?

Sebastian




Re: [Qemu-devel] [PATCH] target-ppc: remove useless line

2010-05-29 Thread Andreas Färber

Am 28.05.2010 um 21:00 schrieb Thomas Monjalon:


From: Thomas Monjalon tho...@monjalon.net

This line was a bit clear.
The next lines set or reset this bit (LE) depending of another bit  
(ILE).

So the first line is useless.

Signed-off-by: Thomas Monjalon tho...@monjalon.net
---
target-ppc/helper.c |1 -
1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/target-ppc/helper.c b/target-ppc/helper.c
index 3d843b5..dabf1fd 100644
--- a/target-ppc/helper.c
+++ b/target-ppc/helper.c
@@ -2591,7 +2591,6 @@ static inline void powerpc_excp(CPUState *env,  
int excp_model, int excp)

#if 0 /* Fix this: not on all targets */
new_msr = ~((target_ulong)1  MSR_PMM);
#endif
-new_msr = ~((target_ulong)1  MSR_LE);
if (msr_ile)
new_msr |= (target_ulong)1  MSR_LE;
else


Following line is
new_msr = ~((target_ulong)1  MSR_LE);
so this should be fine.

Reviewed-by: Andreas Faerber andreas.faer...@web.de

Andreas



Re: [Qemu-devel] Inquiry about qemu for Motorola 68360

2010-05-29 Thread hadi motamedi
qemu-system-m68k -cpu ?
Sorry . Where to find executable 'qemu-system-m68k.exe ?
Thank you


[Qemu-devel] Re: [PATCH 1/2] Pad iommu with an empty slot (necessary for SunOS 4.1.4)

2010-05-29 Thread Blue Swirl
On Fri, May 28, 2010 at 9:53 PM, Artyom Tarasenko
atar4q...@googlemail.com wrote:
 32m: 0x12fff394
 64m: 0x14fff394
 192m:0x1cfff394
 256m:0x20fff394

 Memory probing? It would be strange that OS would do it itself. The OS
 could just
 ask OBP how much does it have. Here is the listing where it happens:

 _swift_vac_rgnflush:            rd      %psr, %g2
 _swift_vac_rgnflush+4:          andn    %g2, 0x20, %g5
 _swift_vac_rgnflush+8:          mov     %g5, %psr
 _swift_vac_rgnflush+0xc:        nop
 _swift_vac_rgnflush+0x10:       nop
 _swift_vac_rgnflush+0x14:       mov     0x100, %g5
 _swift_vac_rgnflush+0x18:       lda     [%g5] 0x4, %g5
 _swift_vac_rgnflush+0x1c:       sll     %o2, 0x2, %g1
 _swift_vac_rgnflush+0x20:       sll     %g5, 0x4, %g5
 _swift_vac_rgnflush+0x24:       add     %g5, %g1, %g5
 _swift_vac_rgnflush+0x28:       lda     [%g5] 0x20, %g5

 _swift_vac_rgnflush+0x28: is the fatal one.

 kadb $c
 _swift_vac_rgnflush(?)
 _vac_rgnflush() + 4
 _hat_setup_kas(0xc00,0xf0447000,0x43a000,0x400,0xf043a000,0x3c0) + 70
 _startup(0xfe00,0x1000,0xfa00,0xf00e2bfc,0x10,0xdbc00) + 1414
 _main(0xf00e0fb4,0xf0007810,0x293ff49f,0xa805209c,0x200,0xf00d1d18) + 14

 Unfortunately (but not surprisingly) kadb doesn't allow debugging
 cache-flush code, so I can't check what is in
 [%g5] (aka sfar) on the real machine when this happens.

 I was telling fairy tales here and no one stopped me. [%g5] is not
 sfar, it's the context pointer,
 so the code makes much more sense!

 And I guess, SunOS 4.1.4 is buggy. I've managed to reproduce the
 complete case on the real machine. The trick is to set the breakpoint
 before the interrupts are switched off:

 kadb _swift_vac_rgnflush:b
 kadb :c
 breakpoint      _swift_vac_rgnflush:            rd      %psr, %g2
 kadb o2=X
                44000e5
 kadb $q
 Type  'go' to resume
 Type  help  for more information
 ok 100 4 spacel@ .
 3fff00

 So at _swift_vac_rgnflush+0x28 it would access (44000e52) + (3fff00
  4) = 14fff394. Which is outside of IOMMU.

 ok 14fff394 20 spacel@ .
 3fe000

 This seems to be an alias to

 ok 1404 20 spacel@ .
 3fe000

 So, it seems to be safe to pad iommu with an empty slot. I guess we
 are not missing anything more serious. Alternatively we can use your
 aliasing patch.

 What do you say?

Thanks, applied.

 P.S. What is also interesting about SunOS 4.1.4 is that only the
 single-cpu kernel (which is used during the installation) calls
 _swift_vac_rgnflush on initialization. The smp kernel just doesn't
 have this call in _hat_setup_kas. Maybe they have noticed the bug and
 corrected it?

 --
 Regards,
 Artyom Tarasenko

 solaris/sparc under qemu blog: http://tyom.blogspot.com/




Re: [Qemu-devel] Re: [PATCH v2 12/15] monitor: Add basic device state visualization

2010-05-29 Thread Markus Armbruster
Avi Kivity a...@redhat.com writes:

 On 05/23/2010 10:57 AM, Jan Kiszka wrote:
 Avi Kivity wrote:

 On 05/22/2010 11:18 AM, Jan Kiszka wrote:
  
 From: Jan Kiszkajan.kis...@siemens.com

 This introduces device_show, a monitor command that saves the vmstate of
 a qdev device and visualizes it. QMP is also supported. Buffers are cut
 after 16 byte by default, but the full content can be requested via
 '-f'. To pretty-print sub-arrays, vmstate is extended to store the start
 index name. A new qerror is introduced to signal a missing vmstate. And
 it comes with documentation.

 +
 +Dump a snapshot of the device state. Buffers are cut after 16 bytes
 unless
 +a full dump is requested.
 +
 +Arguments:
 +
 +- path: the device's qtree path or unique ID (json-string)


 This may be ambiguous.
  
 Can your elaborate what precisely is ambiguous?


 Can't the user choose the unique ID so that it aliases an unrelated
 qtree path?

 I prefer having mutually exclusive 'path' and 'ref' arguments.

Don't think that's necessary.  If the string starts with '/', it's an
absolute path.  Else, it's a relative path rooted at the node with the
ID equal to the first component.

Currently breaks down when IDs contain '/', but permitting that is a
bug.  There may be more problems; the path lookup code is way too
clever.



Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback

2010-05-29 Thread Blue Swirl
On Sat, May 29, 2010 at 10:16 AM, Jan Kiszka jan.kis...@web.de wrote:
 Blue Swirl wrote:
 This is - according to my current understanding - the proposed
 alternative architecture:

                                          .---.
                                          | de-coalescing |
                                          |     logic     |
                                          '---'
                                                ^   |
                                         period,|   |IRQ
                                       coalesced|   |(or tuned VM clock)
                                        (yes/no)|   v
 .---.              ..             .---.
 |  RTC  |-IRQ-| router |-IRQ| APIC  |
 '---'              ''             '---'
    |                    ^    |                   ^
    |                    |    |                   |
    '---period---'    '--period---'
 The period information is already known by the higher level clock
 management,
 The timer device models program the next event of some qemu-timer. There
 is no tag attached explaining the timer subsystem or anyone else the
 reason behind this programming.

 Yes, but why would we care? All timers in the system besides RTC
 should be affected likewise, this would in fact be the benefit from
 handling the problem at higher level.

 And how does your equation for calculating the clock slow-down look like?

How about icount_adjust()?


Re: [Qemu-devel] cg14

2010-05-29 Thread Artyom Tarasenko
2010/5/29 Bob Breuer breu...@mc.net:
 Artyom Tarasenko wrote:
 2010/5/28 Blue Swirl blauwir...@gmail.com:

 On Fri, May 28, 2010 at 7:54 AM, Bob Breuer breu...@mc.net wrote:

 Artyom Tarasenko wrote:

 2010/5/27 Bob Breuer breu...@mc.net:


 Artyom Tarasenko wrote:


 Was going to put some more empty slots into SS-10/20 (VSIMMs, SX)
 after we are done with SS-5 (due to technical limitations I can switch
 access from one real SS model to another one once a few days only).



 I have a partial implementation of the SS-20 VSIMM (cg14) that I've been
 working on.  With the Sun firmware, I have working text console, color
 boot logo, and programmable video resolutions up to 1600x1280.


 Great news! This would allow qemu booting NeXTStep! Are you planning
 to submit the patches any time soon?



 It's not in a state to be submitted yet, but I've attached a working
 patch if you want to give it a try.  I need to hook it up to qdev and
 fill in some more of the obviously incomplete switch cases before I'd
 sign off on it.

 Nice work. I have a few comments below.

 This probably needs support from OpenBIOS to be usable without OBP.


 Maybe it can be used as a second adapter without OpenBIOS support? At
 least under some OSes?


 Probably won't be used without at least being in the firmware device
 tree.  One area that OpenBIOS could enhance would be a larger memory
 size option.  The real hardware was only available in 4M and 8M options,
 but the memory map allows for 16M.  OBP will identify a 16M VSIMM but
 won't do anything else with it, and with 16M of vram it would allow for
 a potential 2560x1600 32bit resolution.
 Bob


 diff --git a/Makefile.target b/Makefile.target
 index fda5bf3..b17b3af 100644
 --- a/Makefile.target
 +++ b/Makefile.target
 @@ -250,6 +250,7 @@ else
  obj-sparc-y = sun4m.o lance.o tcx.o sun4m_iommu.o slavio_intctl.o
  obj-sparc-y += slavio_timer.o slavio_misc.o sparc32_dma.o
  obj-sparc-y += cs4231.o eccmemctl.o sbi.o sun4c_intctl.o
 +obj-sparc-y += cg14.o
  endif

  obj-arm-y = integratorcp.o versatilepb.o arm_pic.o arm_timer.o
 diff --git a/hw/sun4m.c b/hw/sun4m.c
 index 7ba0f76..8b23c9b 100644
 --- a/hw/sun4m.c
 +++ b/hw/sun4m.c
 @@ -864,6 +864,13 @@ static void sun4m_hw_init(const struct sun4m_hwdef 
 *hwdef, ram_addr_t RAM_size,
         fprintf(stderr, qemu: Unsupported depth: %d\n, graphic_depth);
         exit (1);
     }
 +  if (hwdef-machine_id == 65) { /* SS-20 */

 hwdef structure should contain a field for cg14. If non-zero, install
 cg14. Was cg14 only available for SS-20? Was it always included? This
 is also interesting for OpenBIOS, we need to detect cg14 vs. TCX.

 The cg14 was only an option for SS-20 and the rare SS-10SX, but not the
 regular SS-10, though the SS-10 chipset may have been capable of
 supporting it.  Each cg14 vsimm takes the place of a stick of memory
 with 2 slots physically capable of holding a vsimm.

The few SS-10 OBP versions I have probe for VSIMMs. So we need to put
either empty_slot (when the -nographics option is used) or VSIMMs
there.

 Is there a way to pass the framebuffer type and/or address to OpenBIOS?
 I would be inclined to have the SS-20 machine default to cg14 instead of
 TCX, but it will blow a hole in the support of more than 2G of emulated
 system ram.
 +    /* cg14.c */
 +    void cg14_init(target_phys_addr_t ctrl_base, target_phys_addr_t 
 vram_base,
 +                uint32_t vram_size);

 This should go to sun4m.h or cg14.h.


 +
 +    cg14_init(0x09c00ULL, 0x0fc00ULL, 820);
 +  } else

 Please add braces and reindent.


     tcx_init(hwdef-tcx_base, 0x0010, graphic_width, graphic_height,
              graphic_depth);

 --- /dev/null   Fri May 28 02:08:36 2010
 +++ hw/cg14.c   Fri May 28 01:58:49 2010
 @@ -0,0 +1,785 @@
 +/*
 + * QEMU CG14 Frame buffer
 + *
 + * Copyright (c) 2010 Bob Breuer
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining 
 a copy
 + * of this software and associated documentation files (the Software), 
 to deal
 + * in the Software without restriction, including without limitation the 
 rights
 + * to use, copy, modify, merge, publish, distribute, sublicense, and/or 
 sell
 + * copies of the Software, and to permit persons to whom the Software is
 + * furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice shall be 
 included in
 + * all copies or substantial portions of the Software.
 + *
 + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, 
 EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
 MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
 OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
 ARISING FROM,
 + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 
 IN
 + * THE SOFTWARE.
 + */
 +
 

Re: [Qemu-devel] [PATCH] Compile dma only once

2010-05-29 Thread Blue Swirl
On Fri, May 28, 2010 at 7:34 PM, Paul Brook p...@codesourcery.com wrote:
 Use a qemu_irq to request CPU exit.

 Needing to request a CPU exit at all is just wrong. See previous discussions
 about how any use of qemu_bh_schedule_idle is fundamentally broken.

I agree for the device case. Is the attached patch then OK?

But what about other uses (with the patch applied):

User emulator signal delivery:
/src/qemu/darwin-user/signal.c:216:cpu_exit(global_env);
/src/qemu/linux-user/signal.c:507:cpu_exit(thread_env);

qemu_notify_event():
/src/qemu/cpus.c:286:cpu_exit(env);
/src/qemu/cpus.c:289:cpu_exit(next_cpu);
Is that broken too and should be removed?

cpu_signal():
/src/qemu/cpus.c:531:cpu_exit(cpu_single_env);

vm_stop():
/src/qemu/cpus.c:733:cpu_exit(cpu_single_env);

KVM IO window exit:
/src/qemu/kvm-all.c:859:cpu_exit(env);

Some exclusive ARM operation:
/src/qemu/linux-user/main.c:152:cpu_exit(other);

ARM/m68k semihosting:
/src/qemu/gdbstub.c:2296:cpu_exit(s-c_cpu);
From 12940e4bf57af4801ffc209095b6adcc0693320f Mon Sep 17 00:00:00 2001
From: Blue Swirl blauwirbel@gmail.com
Date: Sat, 29 May 2010 07:59:40 +
Subject: [PATCH] dma: remove DMA_schedule and related cpu_request_exit irq

It's wrong for devices to request cpu_exit. Remove DMA_schedule and
cpu_request_exit irq, thus partially reverting
4556bd8b2514a55d48c15b1adb17537f49657744.

Signed-off-by: Blue Swirl blauwirbel@gmail.com
---
 hw/dma.c|   19 ---
 hw/fdc.c|1 -
 hw/isa.h|3 +--
 hw/mips_jazz.c  |   13 +
 hw/mips_malta.c |   13 +
 hw/pc.c |   13 +
 hw/ppc_prep.c   |   13 +
 hw/sun4m.c  |1 -
 hw/sun4u.c  |1 -
 9 files changed, 9 insertions(+), 68 deletions(-)

diff --git a/hw/dma.c b/hw/dma.c
index 5b21521..1015b41 100644
--- a/hw/dma.c
+++ b/hw/dma.c
@@ -57,7 +57,6 @@ static struct dma_cont {
 uint8_t flip_flop;
 int dshift;
 struct dma_regs regs[4];
-qemu_irq *cpu_request_exit;
 } dma_controllers[2];
 
 enum {
@@ -442,14 +441,6 @@ int DMA_write_memory (int nchan, void *buf, int pos, int len)
 return len;
 }
 
-/* request the emulator to transfer a new DMA memory block ASAP */
-void DMA_schedule(int nchan)
-{
-struct dma_cont *d = dma_controllers[nchan  3];
-
-qemu_irq_pulse(*d-cpu_request_exit);
-}
-
 static void dma_reset(void *opaque)
 {
 struct dma_cont *d = opaque;
@@ -465,14 +456,12 @@ static int dma_phony_handler (void *opaque, int nchan, int dma_pos, int dma_len)
 
 /* dshift = 0: 8 bit DMA, 1 = 16 bit DMA */
 static void dma_init2(struct dma_cont *d, int base, int dshift,
-  int page_base, int pageh_base,
-  qemu_irq *cpu_request_exit)
+  int page_base, int pageh_base)
 {
 static const int page_port_list[] = { 0x1, 0x2, 0x3, 0x7 };
 int i;
 
 d-dshift = dshift;
-d-cpu_request_exit = cpu_request_exit;
 for (i = 0; i  8; i++) {
 register_ioport_write (base + (i  dshift), 1, 1, write_chan, d);
 register_ioport_read (base + (i  dshift), 1, 1, read_chan, d);
@@ -542,12 +531,12 @@ static const VMStateDescription vmstate_dma = {
 }
 };
 
-void DMA_init(int high_page_enable, qemu_irq *cpu_request_exit)
+void DMA_init(int high_page_enable)
 {
 dma_init2(dma_controllers[0], 0x00, 0, 0x80,
-  high_page_enable ? 0x480 : -1, cpu_request_exit);
+  high_page_enable ? 0x480 : -1);
 dma_init2(dma_controllers[1], 0xc0, 1, 0x88,
-  high_page_enable ? 0x488 : -1, cpu_request_exit);
+  high_page_enable ? 0x488 : -1);
 vmstate_register (0, vmstate_dma, dma_controllers[0]);
 vmstate_register (1, vmstate_dma, dma_controllers[1]);
 
diff --git a/hw/fdc.c b/hw/fdc.c
index 6306496..d4505b4 100644
--- a/hw/fdc.c
+++ b/hw/fdc.c
@@ -1174,7 +1174,6 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction)
  * recall us...
  */
 DMA_hold_DREQ(fdctrl-dma_chann);
-DMA_schedule(fdctrl-dma_chann);
 return;
 } else {
 FLOPPY_ERROR(dma_mode=%d direction=%d\n, dma_mode, direction);
diff --git a/hw/isa.h b/hw/isa.h
index aaf0272..9681de1 100644
--- a/hw/isa.h
+++ b/hw/isa.h
@@ -40,8 +40,7 @@ int DMA_read_memory (int nchan, void *buf, int pos, int size);
 int DMA_write_memory (int nchan, void *buf, int pos, int size);
 void DMA_hold_DREQ (int nchan);
 void DMA_release_DREQ (int nchan);
-void DMA_schedule(int nchan);
-void DMA_init(int high_page_enable, qemu_irq *cpu_request_exit);
+void DMA_init(int high_page_enable);
 void DMA_register_channel (int nchan,
DMA_transfer_handler transfer_handler,
void *opaque);
diff --git a/hw/mips_jazz.c b/hw/mips_jazz.c
index ead3a00..6e0ec8f 100644
--- a/hw/mips_jazz.c
+++ b/hw/mips_jazz.c
@@ -114,15 

Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback

2010-05-29 Thread Jan Kiszka
Blue Swirl wrote:
 This is - according to my current understanding - the proposed
 alternative architecture:

  .---.
  | de-coalescing |
  | logic |
  '---'
^   |
 period,|   |IRQ
   coalesced|   |(or tuned VM clock)
(yes/no)|   v
 .---.  .. .---.
 |  RTC  |-IRQ-| router |-IRQ| APIC  |
 '---'  '' '---'
|^|   ^
|||   |
'---period---''--period---'
 The period information is already known by the higher level clock
 management,
 The timer device models program the next event of some qemu-timer. There
 is no tag attached explaining the timer subsystem or anyone else the
 reason behind this programming.
 
 Yes, but why would we care? All timers in the system besides RTC
 should be affected likewise, this would in fact be the benefit from
 handling the problem at higher level.

And how does your equation for calculating the clock slow-down look like?

Jan



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback

2010-05-29 Thread Jan Kiszka
Gleb Natapov wrote:
 On Fri, May 28, 2010 at 08:06:45PM +, Blue Swirl wrote:
 2010/5/28 Gleb Natapov g...@redhat.com:
 On Thu, May 27, 2010 at 06:37:10PM +, Blue Swirl wrote:
 2010/5/27 Gleb Natapov g...@redhat.com:
 On Wed, May 26, 2010 at 08:35:00PM +, Blue Swirl wrote:
 On Wed, May 26, 2010 at 8:09 PM, Jan Kiszka jan.kis...@web.de wrote:
 Blue Swirl wrote:
 On Tue, May 25, 2010 at 9:44 PM, Jan Kiszka jan.kis...@web.de wrote:
 Anthony Liguori wrote:
 On 05/25/2010 02:09 PM, Blue Swirl wrote:
 On Mon, May 24, 2010 at 8:13 PM, Jan Kiszkajan.kis...@web.de  
 wrote:

 From: Jan Kiszkajan.kis...@siemens.com

 This allows to communicate potential IRQ coalescing during 
 delivery from
 the sink back to the source. Targets that support IRQ coalescing
 workarounds need to register handlers that return the appropriate
 QEMU_IRQ_* code, and they have to propergate the code across all 
 IRQ
 redirections. If the IRQ source receives a QEMU_IRQ_COALESCED, it 
 can
 apply its workaround. If multiple sinks exist, the source may only
 consider an IRQ coalesced if all other sinks either report
 QEMU_IRQ_COALESCED as well or QEMU_IRQ_MASKED.

 No real devices are interested whether any of their output lines are
 even connected. This would introduce a new signal type, 
 bidirectional
 multi-level, which is not correct.

 I don't think it's really an issue of correct, but I wouldn't 
 disagree
 to a suggestion that we ought to introduce a new signal type for this
 type of bidirectional feedback.  Maybe it's qemu_coalesced_irq and 
 has a
 similar interface as qemu_irq.
 A separate type would complicate the delivery of the feedback value
 across GPIO pins (as Paul requested for the RTC-HPET routing).

 I think the real solution to coalescing is put the logic inside one
 device, in this case APIC because it has the information about irq
 delivery. APIC could monitor incoming RTC irqs for frequency
 information and whether they get delivered or not. If not, an 
 internal
 timer is installed which injects the lost irqs.
 That won't fly as the IRQs will already arrive at the APIC with a
 sufficiently high jitter. At the bare minimum, you need to tell the
 interrupt controller about the fact that a particular IRQ should be
 delivered at a specific regular rate. For this, you also need a 
 generic
 interface - nothing really won.
 OK, let's simplify: just reinject at next possible chance. No need to
 monitor or tell anything.
 There are guests that won't like this (I know of one in-house, but
 others may even have more examples), specifically if you end up firing
 multiple IRQs in a row due to a longer backlog. For that reason, the RTC
 spreads the reinjection according to the current rate.
 Then reinject with a constant delay, or next CPU exit. Such buggy
 If guest's time frequency is the same as host time frequency you can't
 reinject with constant delay. That is why current code mixes two
 approaches: reinject M interrupts in a raw then delay.
 This approach can be also used by APIC-only version.

 I don't know what APIC-only version you are talking about. I haven't
 seen the code and I don't understand hand waving, sorry.
 There is no code, because we're still at architecture design stage.

 Try to write test code to understand the problem better.
 
 guests could also be assisted with special handling (like win2k
 install hack), for example guest instructions could be counted
 (approximately, for example using TB size or TSC) and only inject
 after at least N instructions have passed.
 Guest instructions cannot be easily counted in KVM (it can be done more
 or less reliably using perf counters, may be).
 Aren't there any debug registers or perf counters, which can generate
 an interrupt after some number of instructions have been executed?
 Don't think debug registers have something like that and they are
 available for guest use anyway. Perf counters differs greatly from CPU
 to CPU (even between two CPUs of the same manufacturer), and we want to
 keep using them for profiling guests. And I don't see what problem it
 will solve anyway that can be solved by simple delay between irq
 reinjection.
 This would allow counting the executed instructions and limit it. Thus
 we could emulate a 500MHz CPU on a 2GHz CPU more accurately.

 Why would you want to limit number of instruction executed by guest if
 CPU has nothing else to do anyway? The problem occurs not when we have
 spare cycles so give to a guest, but in opposite case.
 
 And even if the rate did not matter, the APIC woult still have to now
 about the fact that an IRQ is really periodic and does not only appear
 as such for a certain interval. This really does not sound like
 simplifying things or even make them cleaner.
 It would, the voodoo would be contained only in APIC, RTC would be
 just like any other device. With the bidirectional irqs, this voodoo
 would probably eventually spread to many other devices. The logical
 conclusion of that would be a 

[Qemu-devel] [PATCH 0/3] [RFC] Threaded vnc server

2010-05-29 Thread Corentin Chary
Hi,
This series add a threaded VNC server and should be applied on top on my 
previous patch set (adding tight encoding).
The first two patchs add some functions to qemu-thread. The last is the 
threaded VNC server and the changelog
explains how it works.

refs: 
http://xf.iksaif.net/blog/index.php?post/2010/05/28/QEMU%3A-Threaded-VNC-Server-results

Thanks

Corentin Chary (3):
  qemu-thread: add qemu_mutex/cond_destroy and qemu_mutex_exit
  qemu-thread: add cleanup_push() and cleanup_pop()
  vnc: threaded VNC server

 Makefile|4 +
 Makefile.objs   |7 +-
 configure   |   13 ++
 qemu-thread.c   |   22 
 qemu-thread.h   |8 ++
 vnc-jobs-sync.c |   70 
 vnc-jobs.c  |  328 +++
 vnc.c   |  161 +++
 vnc.h   |   73 
 9 files changed, 663 insertions(+), 23 deletions(-)
 create mode 100644 vnc-jobs-sync.c
 create mode 100644 vnc-jobs.c




Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback

2010-05-29 Thread Jan Kiszka
Blue Swirl wrote:
 2010/5/29 Jan Kiszka jan.kis...@web.de:
 Gleb Natapov wrote:
 On Fri, May 28, 2010 at 08:06:45PM +, Blue Swirl wrote:
 2010/5/28 Gleb Natapov g...@redhat.com:
 On Thu, May 27, 2010 at 06:37:10PM +, Blue Swirl wrote:
 2010/5/27 Gleb Natapov g...@redhat.com:
 On Wed, May 26, 2010 at 08:35:00PM +, Blue Swirl wrote:
 On Wed, May 26, 2010 at 8:09 PM, Jan Kiszka jan.kis...@web.de wrote:
 Blue Swirl wrote:
 On Tue, May 25, 2010 at 9:44 PM, Jan Kiszka jan.kis...@web.de 
 wrote:
 Anthony Liguori wrote:
 On 05/25/2010 02:09 PM, Blue Swirl wrote:
 On Mon, May 24, 2010 at 8:13 PM, Jan Kiszkajan.kis...@web.de  
 wrote:

 From: Jan Kiszkajan.kis...@siemens.com

 This allows to communicate potential IRQ coalescing during 
 delivery from
 the sink back to the source. Targets that support IRQ coalescing
 workarounds need to register handlers that return the appropriate
 QEMU_IRQ_* code, and they have to propergate the code across all 
 IRQ
 redirections. If the IRQ source receives a QEMU_IRQ_COALESCED, 
 it can
 apply its workaround. If multiple sinks exist, the source may 
 only
 consider an IRQ coalesced if all other sinks either report
 QEMU_IRQ_COALESCED as well or QEMU_IRQ_MASKED.

 No real devices are interested whether any of their output lines 
 are
 even connected. This would introduce a new signal type, 
 bidirectional
 multi-level, which is not correct.

 I don't think it's really an issue of correct, but I wouldn't 
 disagree
 to a suggestion that we ought to introduce a new signal type for 
 this
 type of bidirectional feedback.  Maybe it's qemu_coalesced_irq and 
 has a
 similar interface as qemu_irq.
 A separate type would complicate the delivery of the feedback value
 across GPIO pins (as Paul requested for the RTC-HPET routing).

 I think the real solution to coalescing is put the logic inside 
 one
 device, in this case APIC because it has the information about irq
 delivery. APIC could monitor incoming RTC irqs for frequency
 information and whether they get delivered or not. If not, an 
 internal
 timer is installed which injects the lost irqs.
 That won't fly as the IRQs will already arrive at the APIC with a
 sufficiently high jitter. At the bare minimum, you need to tell the
 interrupt controller about the fact that a particular IRQ should be
 delivered at a specific regular rate. For this, you also need a 
 generic
 interface - nothing really won.
 OK, let's simplify: just reinject at next possible chance. No need to
 monitor or tell anything.
 There are guests that won't like this (I know of one in-house, but
 others may even have more examples), specifically if you end up firing
 multiple IRQs in a row due to a longer backlog. For that reason, the 
 RTC
 spreads the reinjection according to the current rate.
 Then reinject with a constant delay, or next CPU exit. Such buggy
 If guest's time frequency is the same as host time frequency you can't
 reinject with constant delay. That is why current code mixes two
 approaches: reinject M interrupts in a raw then delay.
 This approach can be also used by APIC-only version.

 I don't know what APIC-only version you are talking about. I haven't
 seen the code and I don't understand hand waving, sorry.
 There is no code, because we're still at architecture design stage.

 Try to write test code to understand the problem better.

 guests could also be assisted with special handling (like win2k
 install hack), for example guest instructions could be counted
 (approximately, for example using TB size or TSC) and only inject
 after at least N instructions have passed.
 Guest instructions cannot be easily counted in KVM (it can be done more
 or less reliably using perf counters, may be).
 Aren't there any debug registers or perf counters, which can generate
 an interrupt after some number of instructions have been executed?
 Don't think debug registers have something like that and they are
 available for guest use anyway. Perf counters differs greatly from CPU
 to CPU (even between two CPUs of the same manufacturer), and we want to
 keep using them for profiling guests. And I don't see what problem it
 will solve anyway that can be solved by simple delay between irq
 reinjection.
 This would allow counting the executed instructions and limit it. Thus
 we could emulate a 500MHz CPU on a 2GHz CPU more accurately.

 Why would you want to limit number of instruction executed by guest if
 CPU has nothing else to do anyway? The problem occurs not when we have
 spare cycles so give to a guest, but in opposite case.

 And even if the rate did not matter, the APIC woult still have to now
 about the fact that an IRQ is really periodic and does not only appear
 as such for a certain interval. This really does not sound like
 simplifying things or even make them cleaner.
 It would, the voodoo would be contained only in APIC, RTC would be
 just like any other device. With the bidirectional irqs, this voodoo
 would probably eventually spread 

Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback

2010-05-29 Thread Blue Swirl
On Sat, May 29, 2010 at 9:45 AM, Jan Kiszka jan.kis...@web.de wrote:
 Blue Swirl wrote:
 2010/5/29 Jan Kiszka jan.kis...@web.de:
 Gleb Natapov wrote:
 On Fri, May 28, 2010 at 08:06:45PM +, Blue Swirl wrote:
 2010/5/28 Gleb Natapov g...@redhat.com:
 On Thu, May 27, 2010 at 06:37:10PM +, Blue Swirl wrote:
 2010/5/27 Gleb Natapov g...@redhat.com:
 On Wed, May 26, 2010 at 08:35:00PM +, Blue Swirl wrote:
 On Wed, May 26, 2010 at 8:09 PM, Jan Kiszka jan.kis...@web.de wrote:
 Blue Swirl wrote:
 On Tue, May 25, 2010 at 9:44 PM, Jan Kiszka jan.kis...@web.de 
 wrote:
 Anthony Liguori wrote:
 On 05/25/2010 02:09 PM, Blue Swirl wrote:
 On Mon, May 24, 2010 at 8:13 PM, Jan Kiszkajan.kis...@web.de  
 wrote:

 From: Jan Kiszkajan.kis...@siemens.com

 This allows to communicate potential IRQ coalescing during 
 delivery from
 the sink back to the source. Targets that support IRQ coalescing
 workarounds need to register handlers that return the 
 appropriate
 QEMU_IRQ_* code, and they have to propergate the code across 
 all IRQ
 redirections. If the IRQ source receives a QEMU_IRQ_COALESCED, 
 it can
 apply its workaround. If multiple sinks exist, the source may 
 only
 consider an IRQ coalesced if all other sinks either report
 QEMU_IRQ_COALESCED as well or QEMU_IRQ_MASKED.

 No real devices are interested whether any of their output lines 
 are
 even connected. This would introduce a new signal type, 
 bidirectional
 multi-level, which is not correct.

 I don't think it's really an issue of correct, but I wouldn't 
 disagree
 to a suggestion that we ought to introduce a new signal type for 
 this
 type of bidirectional feedback.  Maybe it's qemu_coalesced_irq 
 and has a
 similar interface as qemu_irq.
 A separate type would complicate the delivery of the feedback value
 across GPIO pins (as Paul requested for the RTC-HPET routing).

 I think the real solution to coalescing is put the logic inside 
 one
 device, in this case APIC because it has the information about 
 irq
 delivery. APIC could monitor incoming RTC irqs for frequency
 information and whether they get delivered or not. If not, an 
 internal
 timer is installed which injects the lost irqs.
 That won't fly as the IRQs will already arrive at the APIC with a
 sufficiently high jitter. At the bare minimum, you need to tell the
 interrupt controller about the fact that a particular IRQ should be
 delivered at a specific regular rate. For this, you also need a 
 generic
 interface - nothing really won.
 OK, let's simplify: just reinject at next possible chance. No need 
 to
 monitor or tell anything.
 There are guests that won't like this (I know of one in-house, but
 others may even have more examples), specifically if you end up 
 firing
 multiple IRQs in a row due to a longer backlog. For that reason, the 
 RTC
 spreads the reinjection according to the current rate.
 Then reinject with a constant delay, or next CPU exit. Such buggy
 If guest's time frequency is the same as host time frequency you can't
 reinject with constant delay. That is why current code mixes two
 approaches: reinject M interrupts in a raw then delay.
 This approach can be also used by APIC-only version.

 I don't know what APIC-only version you are talking about. I haven't
 seen the code and I don't understand hand waving, sorry.
 There is no code, because we're still at architecture design stage.

 Try to write test code to understand the problem better.

 guests could also be assisted with special handling (like win2k
 install hack), for example guest instructions could be counted
 (approximately, for example using TB size or TSC) and only inject
 after at least N instructions have passed.
 Guest instructions cannot be easily counted in KVM (it can be done more
 or less reliably using perf counters, may be).
 Aren't there any debug registers or perf counters, which can generate
 an interrupt after some number of instructions have been executed?
 Don't think debug registers have something like that and they are
 available for guest use anyway. Perf counters differs greatly from CPU
 to CPU (even between two CPUs of the same manufacturer), and we want to
 keep using them for profiling guests. And I don't see what problem it
 will solve anyway that can be solved by simple delay between irq
 reinjection.
 This would allow counting the executed instructions and limit it. Thus
 we could emulate a 500MHz CPU on a 2GHz CPU more accurately.

 Why would you want to limit number of instruction executed by guest if
 CPU has nothing else to do anyway? The problem occurs not when we have
 spare cycles so give to a guest, but in opposite case.

 And even if the rate did not matter, the APIC woult still have to now
 about the fact that an IRQ is really periodic and does not only 
 appear
 as such for a certain interval. This really does not sound like
 simplifying things or even make them cleaner.
 It would, the voodoo would be contained only in APIC, RTC would be
 just like any other 

Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback

2010-05-29 Thread Jan Kiszka
Blue Swirl wrote:
 On the contrary, APIC is actually the only source of the IRQ ack
 information. RTC hack would not work without APIC (or the
 bidirectional IRQ) passing this info to RTC.

 What APIC doesn't have now is the timer frequency or period info. This
 is known by RTC and also higher levels managing the clocks.

 So APIC has one bit of information and RTC everything else.
 
 The information known by RTC (timer period) is also known by higher levels.

Curious to see where you'll find this.

 
 The current
 approach (and proposed patch) brings this one bit of information to RTC,
 you are arguing that RTC should be able to communicate all its info to
 APIC. Sorry I don't see that your way has any advantage. Just more
 complex interface and it is much easier to get it wrong for other time
 sources.
 
 I don't think anymore that APIC should be handling this but the
 generic stuff, like vl.c or exec.c. Then there would be only
 information passing from APIC to higher levels.

You neglect the the information required to associate a periodic source
(e.g. RTC) with an IRQ sink (e.g. APIC). Without that, you will have a
hard time figuring out if a reported IRQ coalescing requires any
activities or should simply be welcomed (for I/O IRQs).

 
 I keep ignoring the idea that the current model, where both RTC and
 APIC must somehow work together to make coalescing work, is the only
 possible just because it is committed and it happens to work in some
 cases. It would be much better to concentrate this to one place, APIC
 or preferably higher level where it may benefit other timers too.
 Provided of course that the other models can be made to work.

 So write the code and show us. You haven't show any evidence that RTC is
 the wrong place. RTC knows when interrupt was acknowledge to RTC, it
 know when clock frequency changes, it know when device reset happened.
 APIC knows only that interrupt was coalesced. It doesn't even know that
 it may be masked by a guest in IOAPIC (interrupts delivered while they
 are masked not considered coalesced).
 
 Oh, I thought interrupt masking was the reason for coalescing! What
 exactly is the reason then?

Missing acks, ie. the IRQ is still pending when the next one arrives.
You want to filter out masked/suppressed IRQs to avoid running the
de-coalescing logic on sources that are actually cut off (like the RTC
IRQ when the HPET took over).

 
 Time source knows only when
 frequency changes and may be when device reset happens if timer is
 stopped by device on reset. So RTC is actually a sweet spot if you want
 to minimize amount of info you need to pass between various layers.

 Maybe that version would not bend backwards as much as the current to
 cater for buggy hosts.

 You mean buggy guests?
 Yes, sorry.

 What guests are not buggy in your opinion?
 Linux tries hard to be smart and as a result the only way to have stable
 clock with it is to go paravirt.
 I'm not an OS designer, but I think an OS should never crash, even if
 a burst of IRQs is received. Reprogramming the timer should consider
 the pending IRQ situation (0 or 1 with real HW). Those bugs are one
 cause of the problem.
 OS should never crash in the absence of HW bugs? I doubt you can design
 an OS that can run in a face of any HW failure. Anyway here we are
 trying to solve guests time keeping problem not crashes. Do you think
 you can design OS that can keep time accurately no matter how crazy all
 HW clock behaves?
 
 I think my OS design skills are not relevant in this discussion, but
 IIRC there are fault tolerant operating systems for extreme conditions
 so it can be done.

No one can influence the design of released OS versions anymore.

 
 The fact is that timer device is not just like any
 other device in virtual world. Any other device is easy: you just
 implement spec as close as possible and everything works. For time
 source device this is not enough. You can implement RTC+HPET to the
 letter and your guest will drift like crazy.
 It's doable: a cycle accurate emulator will not cause any drift,
 without any voodoo. The interrupts would come after executing the same
 instruction as the real HW. For emulating any sufficiently buggy
 guests in any sufficiently desperate low resource conditions, this may
 be the only option that will always work.

 Yes, but qemu and kvm are not cycle accurate emulators and don't strive
 to be one. On the contrary KVM runs at native host CPU speed most of the
 time, so any emulation done between two instruction is theoretically
 noticeable for a guest. TSC is bypassed directly to a guest too, so
 keeping all time source in perfect sync is also impossible.
 That is actually another cause of the problem. KVM gives the guest an
 illusion that the VCPU speed is equal to host speed. When they don't
 match, especially in critical code, there can be problems. It would be
 better to tell the guest a lower speed, which also can be guaranteed.

 Not possible. It's that simple. 

Re: [Qemu-devel] [PATCH] Name the default PCI bus pci.0 on all architectures

2010-05-29 Thread Markus Armbruster
Paul Brook p...@codesourcery.com writes:

 The system emulators for each arch are using inconsistent
 naming for the default PCI bus pci vs pci.0. Since it
 is conceivable we'll have multiple PCI buses in the future
 standardize on pci.0 for all architectures. This ensures
 mgmt apps can rely on a name when assigning PCI devices an
 address on the bus using eg '-device e1000,bus=pci.0,addr=3'

 No. Bus names are local to the parent device.  None of the host bridges 
 support multiple bridges, so the .0 suffix makes no sense.  The parent 
 device has no idea whether it owns the default pci bus or not.
 If you have multiple PCI busses then you can identify them by the device path.

From qbus_create_inplace():

if (name) {
/* use supplied name */
bus-name = qemu_strdup(name);
} else if (parent  parent-id) {
/* parent device has id - use it for bus name */
len = strlen(parent-id) + 16;
buf = qemu_malloc(len);
snprintf(buf, len, %s.%d, parent-id, parent-num_child_bus);
bus-name = buf;
} else {
/* no id - use lowercase bus type for bus name */
len = strlen(info-name) + 16;
buf = qemu_malloc(len);
len = snprintf(buf, len, %s.%d, info-name,
   parent ? parent-num_child_bus : 0);
for (i = 0; i  len; i++)
buf[i] = qemu_tolower(buf[i]);
bus-name = buf;
}

If appending .0 really makes no sense when the device has just one
bus, then we shouldn't append it in cases 2  3.

But I'd simply append it always.  One bus is just as countable as many.



Re: [Qemu-devel] [PATCH] qemu-io: Fix error messages

2010-05-29 Thread Christoph Hellwig
On Fri, May 28, 2010 at 08:15:04PM +0200, Kevin Wolf wrote:
 The truncate and getlength commands passed a negative error number to 
 strerror.
 They also happen to be the two functions that are lacking a newline at the end
 of their error message.
 
 Signed-off-by: Kevin Wolf kw...@redhat.com

Ok,


Reviewed-by: Christoph Hellwig h...@lst.de




Re: [Qemu-devel] cg14

2010-05-29 Thread Blue Swirl
On Sat, May 29, 2010 at 5:15 AM, Bob Breuer breu...@mc.net wrote:
 Artyom Tarasenko wrote:
 2010/5/28 Blue Swirl blauwir...@gmail.com:

 On Fri, May 28, 2010 at 7:54 AM, Bob Breuer breu...@mc.net wrote:

 Artyom Tarasenko wrote:

 2010/5/27 Bob Breuer breu...@mc.net:


 Artyom Tarasenko wrote:


 Was going to put some more empty slots into SS-10/20 (VSIMMs, SX)
 after we are done with SS-5 (due to technical limitations I can switch
 access from one real SS model to another one once a few days only).



 I have a partial implementation of the SS-20 VSIMM (cg14) that I've been
 working on.  With the Sun firmware, I have working text console, color
 boot logo, and programmable video resolutions up to 1600x1280.


 Great news! This would allow qemu booting NeXTStep! Are you planning
 to submit the patches any time soon?



 It's not in a state to be submitted yet, but I've attached a working
 patch if you want to give it a try.  I need to hook it up to qdev and
 fill in some more of the obviously incomplete switch cases before I'd
 sign off on it.

 Nice work. I have a few comments below.

 This probably needs support from OpenBIOS to be usable without OBP.


 Maybe it can be used as a second adapter without OpenBIOS support? At
 least under some OSes?


 Probably won't be used without at least being in the firmware device
 tree.  One area that OpenBIOS could enhance would be a larger memory
 size option.  The real hardware was only available in 4M and 8M options,
 but the memory map allows for 16M.  OBP will identify a 16M VSIMM but
 won't do anything else with it, and with 16M of vram it would allow for
 a potential 2560x1600 32bit resolution.

Awesome, PC with Cirrus VGA can't compete.

 Bob


 diff --git a/Makefile.target b/Makefile.target
 index fda5bf3..b17b3af 100644
 --- a/Makefile.target
 +++ b/Makefile.target
 @@ -250,6 +250,7 @@ else
  obj-sparc-y = sun4m.o lance.o tcx.o sun4m_iommu.o slavio_intctl.o
  obj-sparc-y += slavio_timer.o slavio_misc.o sparc32_dma.o
  obj-sparc-y += cs4231.o eccmemctl.o sbi.o sun4c_intctl.o
 +obj-sparc-y += cg14.o
  endif

  obj-arm-y = integratorcp.o versatilepb.o arm_pic.o arm_timer.o
 diff --git a/hw/sun4m.c b/hw/sun4m.c
 index 7ba0f76..8b23c9b 100644
 --- a/hw/sun4m.c
 +++ b/hw/sun4m.c
 @@ -864,6 +864,13 @@ static void sun4m_hw_init(const struct sun4m_hwdef 
 *hwdef, ram_addr_t RAM_size,
         fprintf(stderr, qemu: Unsupported depth: %d\n, graphic_depth);
         exit (1);
     }
 +  if (hwdef-machine_id == 65) { /* SS-20 */

 hwdef structure should contain a field for cg14. If non-zero, install
 cg14. Was cg14 only available for SS-20? Was it always included? This
 is also interesting for OpenBIOS, we need to detect cg14 vs. TCX.

 The cg14 was only an option for SS-20 and the rare SS-10SX, but not the
 regular SS-10, though the SS-10 chipset may have been capable of
 supporting it.  Each cg14 vsimm takes the place of a stick of memory
 with 2 slots physically capable of holding a vsimm.

 Is there a way to pass the framebuffer type and/or address to OpenBIOS?

Currently there's fw_cfg and we can also set NVRAM variables.

 I would be inclined to have the SS-20 machine default to cg14 instead of
 TCX, but it will blow a hole in the support of more than 2G of emulated
 system ram.

I'm not so worried about the RAM hole, the real machines didn't have
so much RAM and we can also always add more RAM at some other
location.

But the machine default is a bit of a problem because we would not be
compatible to old versions. Adding a new machine for each new
configuration does not sound viable in longer term either. In this
case it makes a lot of sense for SS-10SX and we could deprecate SS-20
with TCX.

A generic solution for major device changes could be to pass a device
tree (FDT), like some KVM PPC machines do. Another solution, which
would give most realistic emulation, would be to add FCode ROMs which
is used to probe and add the devices by OBP or OpenBIOS.

 +    /* cg14.c */
 +    void cg14_init(target_phys_addr_t ctrl_base, target_phys_addr_t 
 vram_base,
 +                uint32_t vram_size);

 This should go to sun4m.h or cg14.h.


 +
 +    cg14_init(0x09c00ULL, 0x0fc00ULL, 820);
 +  } else

 Please add braces and reindent.


     tcx_init(hwdef-tcx_base, 0x0010, graphic_width, graphic_height,
              graphic_depth);

 --- /dev/null   Fri May 28 02:08:36 2010
 +++ hw/cg14.c   Fri May 28 01:58:49 2010
 @@ -0,0 +1,785 @@
 +/*
 + * QEMU CG14 Frame buffer
 + *
 + * Copyright (c) 2010 Bob Breuer
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining 
 a copy
 + * of this software and associated documentation files (the Software), 
 to deal
 + * in the Software without restriction, including without limitation the 
 rights
 + * to use, copy, modify, merge, publish, distribute, sublicense, and/or 
 sell
 + * copies of the Software, and to permit persons to whom the Software is
 + * furnished to do so, subject to the following 

[Qemu-devel] Re: [PATCH v3 0/3]: QMP: Commands doc

2010-05-29 Thread Jan Kiszka
Luiz Capitulino wrote:
 This new version moves the documentation to qemu-monitor.hx and now
 QMP/qmp-commands.txt is generated from there (thanks Jan!).
 
 I hope I've addressed all review comments in this version and now it should
 describe reality.
 
 Next step is to fix glitches (after this series is merged, of course).

This is a fragile series, breaking quickly when something changes in
qemu-monitor.hx (as just happened). Can we get this rebased and merged ASAP?

Thanks,
Jan

 
 changelog
 -
 
 v2 - v3
 
 
 - Rebased
 - Addressed review comments
 - Move contents to qemu-monitor.hx (Jan)
 
 v1 - v2
 
 
 - Rebased
 - Addressed Markus's comments
 - Changed indentation style
 - Minor fixes and clarifications
 
 




signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback

2010-05-29 Thread Blue Swirl
2010/5/28 Gleb Natapov g...@redhat.com:
 On Fri, May 28, 2010 at 08:06:45PM +, Blue Swirl wrote:
 2010/5/28 Gleb Natapov g...@redhat.com:
  On Thu, May 27, 2010 at 06:37:10PM +, Blue Swirl wrote:
  2010/5/27 Gleb Natapov g...@redhat.com:
   On Wed, May 26, 2010 at 08:35:00PM +, Blue Swirl wrote:
   On Wed, May 26, 2010 at 8:09 PM, Jan Kiszka jan.kis...@web.de wrote:
Blue Swirl wrote:
On Tue, May 25, 2010 at 9:44 PM, Jan Kiszka jan.kis...@web.de 
wrote:
Anthony Liguori wrote:
On 05/25/2010 02:09 PM, Blue Swirl wrote:
On Mon, May 24, 2010 at 8:13 PM, Jan Kiszkajan.kis...@web.de  
wrote:
   
From: Jan Kiszkajan.kis...@siemens.com
   
This allows to communicate potential IRQ coalescing during 
delivery from
the sink back to the source. Targets that support IRQ coalescing
workarounds need to register handlers that return the 
appropriate
QEMU_IRQ_* code, and they have to propergate the code across 
all IRQ
redirections. If the IRQ source receives a QEMU_IRQ_COALESCED, 
it can
apply its workaround. If multiple sinks exist, the source may 
only
consider an IRQ coalesced if all other sinks either report
QEMU_IRQ_COALESCED as well or QEMU_IRQ_MASKED.
   
No real devices are interested whether any of their output lines 
are
even connected. This would introduce a new signal type, 
bidirectional
multi-level, which is not correct.
   
I don't think it's really an issue of correct, but I wouldn't 
disagree
to a suggestion that we ought to introduce a new signal type for 
this
type of bidirectional feedback.  Maybe it's qemu_coalesced_irq 
and has a
similar interface as qemu_irq.
A separate type would complicate the delivery of the feedback value
across GPIO pins (as Paul requested for the RTC-HPET routing).
   
I think the real solution to coalescing is put the logic inside 
one
device, in this case APIC because it has the information about 
irq
delivery. APIC could monitor incoming RTC irqs for frequency
information and whether they get delivered or not. If not, an 
internal
timer is installed which injects the lost irqs.
That won't fly as the IRQs will already arrive at the APIC with a
sufficiently high jitter. At the bare minimum, you need to tell the
interrupt controller about the fact that a particular IRQ should be
delivered at a specific regular rate. For this, you also need a 
generic
interface - nothing really won.
   
OK, let's simplify: just reinject at next possible chance. No need 
to
monitor or tell anything.
   
There are guests that won't like this (I know of one in-house, but
others may even have more examples), specifically if you end up 
firing
multiple IRQs in a row due to a longer backlog. For that reason, the 
RTC
spreads the reinjection according to the current rate.
  
   Then reinject with a constant delay, or next CPU exit. Such buggy
   If guest's time frequency is the same as host time frequency you can't
   reinject with constant delay. That is why current code mixes two
   approaches: reinject M interrupts in a raw then delay.
 
  This approach can be also used by APIC-only version.
 
  I don't know what APIC-only version you are talking about. I haven't
  seen the code and I don't understand hand waving, sorry.

 There is no code, because we're still at architecture design stage.

 Try to write test code to understand the problem better.

I will.

   guests could also be assisted with special handling (like win2k
   install hack), for example guest instructions could be counted
   (approximately, for example using TB size or TSC) and only inject
   after at least N instructions have passed.
   Guest instructions cannot be easily counted in KVM (it can be done more
   or less reliably using perf counters, may be).
 
  Aren't there any debug registers or perf counters, which can generate
  an interrupt after some number of instructions have been executed?
  Don't think debug registers have something like that and they are
  available for guest use anyway. Perf counters differs greatly from CPU
  to CPU (even between two CPUs of the same manufacturer), and we want to
  keep using them for profiling guests. And I don't see what problem it
  will solve anyway that can be solved by simple delay between irq
  reinjection.

 This would allow counting the executed instructions and limit it. Thus
 we could emulate a 500MHz CPU on a 2GHz CPU more accurately.

 Why would you want to limit number of instruction executed by guest if
 CPU has nothing else to do anyway? The problem occurs not when we have
 spare cycles so give to a guest, but in opposite case.

I think one problem is that the guest has executed too much compared
to what would happen with real HW with a lesser CPU. That explains the
RTC frequency reprogramming case.


 
  
And even if the rate 

Re: [Qemu-devel] Re: [PATCH v2 12/15] monitor: Add basic device state visualization

2010-05-29 Thread Jan Kiszka
Markus Armbruster wrote:
 Avi Kivity a...@redhat.com writes:
 
 On 05/23/2010 10:57 AM, Jan Kiszka wrote:
 Avi Kivity wrote:

 On 05/22/2010 11:18 AM, Jan Kiszka wrote:
  
 From: Jan Kiszkajan.kis...@siemens.com

 This introduces device_show, a monitor command that saves the vmstate of
 a qdev device and visualizes it. QMP is also supported. Buffers are cut
 after 16 byte by default, but the full content can be requested via
 '-f'. To pretty-print sub-arrays, vmstate is extended to store the start
 index name. A new qerror is introduced to signal a missing vmstate. And
 it comes with documentation.

 +
 +Dump a snapshot of the device state. Buffers are cut after 16 bytes
 unless
 +a full dump is requested.
 +
 +Arguments:
 +
 +- path: the device's qtree path or unique ID (json-string)


 This may be ambiguous.
  
 Can your elaborate what precisely is ambiguous?

 Can't the user choose the unique ID so that it aliases an unrelated
 qtree path?

 I prefer having mutually exclusive 'path' and 'ref' arguments.
 
 Don't think that's necessary.  If the string starts with '/', it's an
 absolute path.  Else, it's a relative path rooted at the node with the
 ID equal to the first component.

'pci.0' can be both a (badly chosen) ID as well as an abbreviated qtree
path.

 
 Currently breaks down when IDs contain '/', but permitting that is a
 bug.  There may be more problems; the path lookup code is way too
 clever.

Indeed. Less can sometimes be more. My impression is that some of the
cleverness was motivated by lacking qtree completion for the HMP.

Jan



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH v2 03/15] qdev: Allow device addressing via 'driver.instance'

2010-05-29 Thread Jan Kiszka
Markus Armbruster wrote:
 Jan Kiszka jan.kis...@web.de writes:
 
 From: Jan Kiszka jan.kis...@siemens.com

 Extend qbus_find_dev to allow addressing of devices without an unique id
 via an optional per-bus instance number. The new formats are
 'driver.instance' and 'alias.instance'.

 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---
  docs/qdev-device-use.txt |   12 +++-
  hw/qdev.c|   23 ++-
  2 files changed, 29 insertions(+), 6 deletions(-)

 diff --git a/docs/qdev-device-use.txt b/docs/qdev-device-use.txt
 index 9ac1fa1..5939481 100644
 --- a/docs/qdev-device-use.txt
 +++ b/docs/qdev-device-use.txt
 @@ -1,6 +1,6 @@
  = How to convert to -device  friends =
  
 -=== Specifying Bus and Address on Bus ===
 +=== Specifying Bus, Address on Bus, and Devices ===
  
  In qdev, each device has a parent bus.  Some devices provide one or
  more buses for children.  You can specify a device's parent bus with
 @@ -24,6 +24,16 @@ Furthermore, if a device only hosts a single bus, the bus 
 name can be
  omitted in the path.  Example: /i440FX-pcihost/PIIX3 abbreviates
  /i440FX-pcihost/pci.0/PIIX3/isa.0 as none of the buses has siblings.
  
 +Existing devices can be addressed either via a unique ID if it was
 +assigned during creation or via the device tree path:
 +
 +/full_bus_address/driver_name[.instance_number]
 +or
 +abbreviated_bus_address/driver_name[.instance_number]
 +
 +Example: /i440FX-pcihost/pci.0/e1000.2 addresses the second e1000
 +adapter on the bus 'pci.0'.
 +
  Note: the USB device address can't be controlled at this time.
 
 instance number isn't defined in this document.

True, only implicitly via the example.

 
 I understand the problem you're trying to solve; I've had it myself many
 times.  But is inventing an instance number the right solution?
 
 The two e1000 devices already have a perfectly fine unique identifier on
 their bus: their bus address.  What about recognizing bus addresses in
 paths?  Requires a suitable restriction on device names and IDs to avoid
 ambiguity, say start with letter.
 
 qdev currently doesn't abstract bus addresses, but that's fixable.

You would also have to specify unique addressing scheme to those buses
that do not have it yet. E.g. what should be the address of a ISA bus
device? The base of its first ioport range? But if it does not have any
as it only injects ISA IRQs?

Jan



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] Re: [PATCH v3 13/17] monitor: Allow to exclude commands from QMP

2010-05-29 Thread Jan Kiszka
Markus Armbruster wrote:
 Luiz Capitulino lcapitul...@redhat.com writes:
 
 On Sun, 23 May 2010 12:59:26 +0200
 Jan Kiszka jan.kis...@web.de wrote:

 From: Jan Kiszka jan.kis...@siemens.com

 Ported commands that are marked 'user_only' will not be considered for
 QMP monitor sessions. This allows to implement new commands that do not
 (yet) provide a sufficiently stable interface for QMP use (e.g.
 device_show).
  This is fine for me, but two things I've been wondering:

  1. Isn't a 'flags' struct member better? So that we can do (in the
 qemu-monitor.hx entry):

 .flags = MONITOR_USER_ONLY | MONITOR_HANDLER_ASYNC,

 I'm not suggesting this is an async handler, just exemplifying multiple
 flags.
 
 We also have at least one command that makes only sense in QMP:
 qmp_capabilities.  Maybe we could use separate flags controlling command
 availability in human monitor and QMP.

Sounds reasonable. Then I think I will lay the ground for this by
introducing flags already in this patch. A v4 run is required anyway.

Jan



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH v3 12/17] monitor: return length of printed string via monitor_[v]printf

2010-05-29 Thread Markus Armbruster
Jan Kiszka jan.kis...@web.de writes:

 From: Jan Kiszka jan.kis...@siemens.com

 This simply forwards the result of the internal vsnprintf to the callers
 of monitor_printf and monitor_vprintf. When invoked over a QMP session
 or in absence of an active monitor, -1 is returned.

 Signed-off-by: Jan Kiszka jan.kis...@siemens.com

Appreciated!  Opens the door to addressing the TODOs in qemu-error.c.



Re: [Qemu-devel] Re: [PATCH v3 06/17] qdev: Allow device specification by qtree path for device_del

2010-05-29 Thread Jan Kiszka
Markus Armbruster wrote:
 Jan Kiszka jan.kis...@siemens.com writes:
 
 Markus Armbruster wrote:
 Jan Kiszka jan.kis...@web.de writes:

 Luiz Capitulino wrote:
 On Sun, 23 May 2010 12:59:19 +0200
 Jan Kiszka jan.kis...@web.de wrote:

 From: Jan Kiszka jan.kis...@siemens.com

 Allow to specify the device to be removed via device_del not only by ID
 but also by its full or abbreviated qtree path. For this purpose,
 qdev_find is introduced which combines walking the qtree with searching
 for device IDs if required.
  [...]

  Arguments:
  
 -- id: the device's ID (json-string)
 +- path: the device's qtree path or unique ID (json-string)
  
  Example:
  
 -- { execute: device_del, arguments: { id: net1 } }
 +- { execute: device_del, arguments: { path: net1 } }
  Doesn't seem like a good change to me, besides being incompatible[1] we
 shouldn't overload arguments this way in QMP as overloading leads to
 interface degradation (harder to use, understand, maintain).
 It's not overloaded, think of an ID as a (weak) symbolic link in the
 qtree filesystem. The advantage of basing everything on top of full or
 abbreviated qtree paths is that IDs are not always assigned, paths are.
 As long as your patch doesn't change the interpretation of IDs, we can
 keep the old name.

 The recent review of QMP documentation may lead to a clean up bad
 names flag day.  One more wouldn't make it worse, I guess.

  Maybe we could have both arguments as optional, but one must be passed.
 This would at least require some way to keep the proposed unified path
 specification for the human monitor (having separate arguments there is
 really unhandy).
 Correct.

 It would be nice to have device_del support paths in addition to IDs.
 I'd expect management tools to slap IDs on everything, so they won't
 care, but human users do.

 As far as I know, we have two places where we let the user name a node
 in the qtree: device_add bus=X and device_del X.  The former names a
 bus, the latter a device.  But both are nodes in the same tree, so
 consistency is in order.

 Only devices have user-specified IDs.  Buses have names assigned by the
 system.  Unique names, hopefully.
 ...but not necessarily. The bus name device_add accepts can also be a
 full, thus unambiguous path.

 If the user doesn't specify a device ID, the driver name is used
 instead.  If you put multiple instances of the same device on the same
 bus, they have the *same* path.  For instance, here's a snippet of info
 qtree after adding two usb-mouse:

   dev: piix3-usb-uhci, id 
 bus-prop: addr = 01.2
 bus-prop: romfile = null
 bus-prop: rombar = 1
 class USB controller, addr 00:01.2, pci id 8086:7020 (sub 1af4:1100)
 bar 4: i/o at 0x [0x1e]
 bus: usb.0
   type USB
   dev: usb-hub, id 
 addr 0.0, speed 12, name QEMU USB Hub, attached
   dev: usb-mouse, id no2
 addr 0.0, speed 12, name QEMU USB Mouse, attached
   dev: usb-mouse, id 
 addr 0.0, speed 12, name QEMU USB Mouse, attached

 Both devices have the same full path
 /i440FX-pcihost/pci.0/piix3-usb-uhci/usb.0/usb-mouse
 Which one does your code pick?  Shouldn't it refuse to pick?
 Patch 3 of this series resolves this as follows:

 usb-mouse[.0] - first listed instance
 usb-mouse.1   - second instance
 ...

 We should probably include this numbering in the qtree dump, I guess.

 By the way, you *can* put '/' in IDs.  I call that a bug.
 Even if we prevent this, IDs can still collide with abbreviated device
 or bus paths. Therefore I give paths precedence over IDs in patch 4.
 
 You're fixing problems in the overly complex and subtle path name lookup
 by making it more complex and subtle.  Let's make it simple and
 straightforward instead.

I have no problem with ripping out all of those abbreviations, requiring
the user to either specify a '/'-less unique ID or a full qtree path
that must start with a '/'. If paths get long, we now have interactive
completions.

Jan



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback

2010-05-29 Thread Gleb Natapov
On Sat, May 29, 2010 at 04:03:22PM +, Blue Swirl wrote:
 2010/5/29 Gleb Natapov g...@redhat.com:
  On Sat, May 29, 2010 at 09:15:11AM +, Blue Swirl wrote:
   There is no code, because we're still at architecture design stage.
  
   Try to write test code to understand the problem better.
 
  I will.
 
  Please do ASAP. This discussion shows that you don't understand what is the
  problem that we are dialing with.
 
 Which part of the problem you think I don't understand?
 
It seams to me you don't understand how Windows uses RTC for time
keeping and how the QEMU solves the problem today.

 guests could also be assisted with special handling (like win2k
 install hack), for example guest instructions could be counted
 (approximately, for example using TB size or TSC) and only inject
 after at least N instructions have passed.
 Guest instructions cannot be easily counted in KVM (it can be done 
 more
 or less reliably using perf counters, may be).
   
Aren't there any debug registers or perf counters, which can generate
an interrupt after some number of instructions have been executed?
Don't think debug registers have something like that and they are
available for guest use anyway. Perf counters differs greatly from CPU
to CPU (even between two CPUs of the same manufacturer), and we want 
to
keep using them for profiling guests. And I don't see what problem it
will solve anyway that can be solved by simple delay between irq
reinjection.
  
   This would allow counting the executed instructions and limit it. Thus
   we could emulate a 500MHz CPU on a 2GHz CPU more accurately.
  
   Why would you want to limit number of instruction executed by guest if
   CPU has nothing else to do anyway? The problem occurs not when we have
   spare cycles so give to a guest, but in opposite case.
 
  I think one problem is that the guest has executed too much compared
  to what would happen with real HW with a lesser CPU. That explains the
  RTC frequency reprogramming case.
  You think wrong. The problem is exactly opposite: the guest haven't
  had enough execution time between two time interrupts. I don't know what
  RTC frequency reprogramming case you are talking about here.
 
 The case you told me where N pending tick IRQs exist but the guest
 wants to change the RTC frequency from 64Hz to 1024Hz.
 
 Let's make this more concrete. 1 GHz CPU, initially 100Hz RTC, so
 10Mcycles/tick or 10ms/tick. At T = 30Mcycles, guest wants to change
 the frequency to 1000Hz.
 
 The problem for emulation is that for the same 3 ticks, there has been
 so little execution power that the ticks have been coalesced. But
 isn't the guest cycle count then much lower than 30Mcyc?
 
 Isn't it so that the guest must be above 30Mcyc to be able to want the
 change? But if we reach that point,  the problem must have not been
 too little execution time, but too much.
 
Sorry I tried hard to understand what have you said above but failed.
What do you mean to be able to want the change? Guest sometimes wants
to get 64 timer interrupts per second and sometimes it wants to get 1024
timer interrupt per second. It wants it not as a result of time drift or
anything. It's just how guest behaves. You seams to be to fixated on
guest frequency change. It's just something you have to take into
account when you reinject interrupts.


 
 
  
   

  And even if the rate did not matter, the APIC woult still have 
  to now
  about the fact that an IRQ is really periodic and does not only 
  appear
  as such for a certain interval. This really does not sound like
  simplifying things or even make them cleaner.

 It would, the voodoo would be contained only in APIC, RTC would be
 just like any other device. With the bidirectional irqs, this 
 voodoo
 would probably eventually spread to many other devices. The 
 logical
 conclusion of that would be a system where all devices would be
 careful not to disturb the guest at wrong moment because that 
 would
 trigger a bug.

 This voodoo will be so complex and unreliable that it will make 
 RTC hack
 pale in comparison (and I still don't see how you are going to 
 make it
 actually work).
   
Implement everything inside APIC: only coalescing and reinjection.
APIC has zero info needed to implement reinjection correctly as was
shown to you several time in this thread and you simply keep ignoring
it.
  
   On the contrary, APIC is actually the only source of the IRQ ack
   information. RTC hack would not work without APIC (or the
   bidirectional IRQ) passing this info to RTC.
  
   What APIC doesn't have now is the timer frequency or period info. This
   is known by RTC and also higher levels managing the clocks.
  
   So APIC has one bit of information and RTC everything else.
 
  The information known by RTC (timer period) is also known by higher 

Re: [Qemu-devel] Re: [PATCH v3 06/17] qdev: Allow device specification by qtree path for device_del

2010-05-29 Thread Markus Armbruster
Jan Kiszka jan.kis...@siemens.com writes:

 Markus Armbruster wrote:
 Jan Kiszka jan.kis...@web.de writes:
 
 Luiz Capitulino wrote:
 On Sun, 23 May 2010 12:59:19 +0200
 Jan Kiszka jan.kis...@web.de wrote:

 From: Jan Kiszka jan.kis...@siemens.com

 Allow to specify the device to be removed via device_del not only by ID
 but also by its full or abbreviated qtree path. For this purpose,
 qdev_find is introduced which combines walking the qtree with searching
 for device IDs if required.
  [...]

  Arguments:
  
 -- id: the device's ID (json-string)
 +- path: the device's qtree path or unique ID (json-string)
  
  Example:
  
 -- { execute: device_del, arguments: { id: net1 } }
 +- { execute: device_del, arguments: { path: net1 } }
  Doesn't seem like a good change to me, besides being incompatible[1] we
 shouldn't overload arguments this way in QMP as overloading leads to
 interface degradation (harder to use, understand, maintain).
 It's not overloaded, think of an ID as a (weak) symbolic link in the
 qtree filesystem. The advantage of basing everything on top of full or
 abbreviated qtree paths is that IDs are not always assigned, paths are.
 
 As long as your patch doesn't change the interpretation of IDs, we can
 keep the old name.
 
 The recent review of QMP documentation may lead to a clean up bad
 names flag day.  One more wouldn't make it worse, I guess.
 
  Maybe we could have both arguments as optional, but one must be passed.
 This would at least require some way to keep the proposed unified path
 specification for the human monitor (having separate arguments there is
 really unhandy).
 
 Correct.
 
 It would be nice to have device_del support paths in addition to IDs.
 I'd expect management tools to slap IDs on everything, so they won't
 care, but human users do.
 
 As far as I know, we have two places where we let the user name a node
 in the qtree: device_add bus=X and device_del X.  The former names a
 bus, the latter a device.  But both are nodes in the same tree, so
 consistency is in order.
 
 Only devices have user-specified IDs.  Buses have names assigned by the
 system.  Unique names, hopefully.

 ...but not necessarily. The bus name device_add accepts can also be a
 full, thus unambiguous path.

 
 If the user doesn't specify a device ID, the driver name is used
 instead.  If you put multiple instances of the same device on the same
 bus, they have the *same* path.  For instance, here's a snippet of info
 qtree after adding two usb-mouse:
 
   dev: piix3-usb-uhci, id 
 bus-prop: addr = 01.2
 bus-prop: romfile = null
 bus-prop: rombar = 1
 class USB controller, addr 00:01.2, pci id 8086:7020 (sub 1af4:1100)
 bar 4: i/o at 0x [0x1e]
 bus: usb.0
   type USB
   dev: usb-hub, id 
 addr 0.0, speed 12, name QEMU USB Hub, attached
   dev: usb-mouse, id no2
 addr 0.0, speed 12, name QEMU USB Mouse, attached
   dev: usb-mouse, id 
 addr 0.0, speed 12, name QEMU USB Mouse, attached
 
 Both devices have the same full path
 /i440FX-pcihost/pci.0/piix3-usb-uhci/usb.0/usb-mouse
 Which one does your code pick?  Shouldn't it refuse to pick?

 Patch 3 of this series resolves this as follows:

 usb-mouse[.0] - first listed instance
 usb-mouse.1   - second instance
 ...

 We should probably include this numbering in the qtree dump, I guess.

 
 By the way, you *can* put '/' in IDs.  I call that a bug.

 Even if we prevent this, IDs can still collide with abbreviated device
 or bus paths. Therefore I give paths precedence over IDs in patch 4.

You're fixing problems in the overly complex and subtle path name lookup
by making it more complex and subtle.  Let's make it simple and
straightforward instead.



Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback

2010-05-29 Thread Blue Swirl
On Sat, May 29, 2010 at 2:46 PM, Gleb Natapov g...@redhat.com wrote:
 On Sat, May 29, 2010 at 09:35:30AM +, Blue Swirl wrote:
  I still don't see how the alternative is supposed to simplify our life
  or improve the efficiency of the de-coalescing workaround. It's rather
  problematic like Gleb pointed out: The de-coalescing logic needs to be
  informed about periodicity changes that can only be delivered along
  IRQs. So what to do with the backlog when the timer is stopped?

 What happens with the current design? Gleb only mentioned the
 frequency change, I thought that was not so big problem. But I don't
 think this case should be allowed happen at all, it can't exist on
 real HW.

 Hm, why it can't exist on real HW? Do simple exercise. Run WindowsXP
 inside QEMU, connect with gdb to QEMU process and check what frequency
 RTC configured with (hint: it will be 64Hz), now run video inside the
 guest and check frequency again (hint: it will be 1Khz).

You missed the key word 'stopped'. If the timer is really stopped, no
IRQs should ever come out afterwards, just like on real HW. For the
emulation, this means loss of ticks which should have been delivered
before the change.

But what if the guest changed the frequency very often, and between
changes used zero value, like 64Hz - 0Hz - 128Hz - 0Hz - 64Hz?



[Qemu-devel] [PATCH 1/3] qemu-thread: add qemu_mutex/cond_destroy and qemu_mutex_exit

2010-05-29 Thread Corentin Chary
Add some missing functions in qemu-thread. Currently qemu-thread
is only used for io-thread but it will used by the vnc server soon
and we need those functions instead of calling pthread directly.

Signed-off-by: Corentin Chary corenti...@iksaif.net
---
 qemu-thread.c |   22 ++
 qemu-thread.h |4 
 2 files changed, 26 insertions(+), 0 deletions(-)

diff --git a/qemu-thread.c b/qemu-thread.c
index 3923db7..afc9933 100644
--- a/qemu-thread.c
+++ b/qemu-thread.c
@@ -34,6 +34,15 @@ void qemu_mutex_init(QemuMutex *mutex)
 error_exit(err, __func__);
 }
 
+void qemu_mutex_destroy(QemuMutex *mutex)
+{
+int err;
+
+err = pthread_mutex_destroy(mutex-lock);
+if (err)
+error_exit(err, __func__);
+}
+
 void qemu_mutex_lock(QemuMutex *mutex)
 {
 int err;
@@ -90,6 +99,15 @@ void qemu_cond_init(QemuCond *cond)
 error_exit(err, __func__);
 }
 
+void qemu_cond_destroy(QemuCond *cond)
+{
+int err;
+
+err = pthread_cond_destroy(cond-cond);
+if (err)
+error_exit(err, __func__);
+}
+
 void qemu_cond_signal(QemuCond *cond)
 {
 int err;
@@ -161,3 +179,7 @@ int qemu_thread_equal(QemuThread *thread1, QemuThread 
*thread2)
return pthread_equal(thread1-thread, thread2-thread);
 }
 
+void qemu_thread_exit(void *retval)
+{
+pthread_exit(retval);
+}
diff --git a/qemu-thread.h b/qemu-thread.h
index 5ef4a3a..19bb30c 100644
--- a/qemu-thread.h
+++ b/qemu-thread.h
@@ -20,12 +20,14 @@ typedef struct QemuCond QemuCond;
 typedef struct QemuThread QemuThread;
 
 void qemu_mutex_init(QemuMutex *mutex);
+void qemu_mutex_destroy(QemuMutex *mutex);
 void qemu_mutex_lock(QemuMutex *mutex);
 int qemu_mutex_trylock(QemuMutex *mutex);
 int qemu_mutex_timedlock(QemuMutex *mutex, uint64_t msecs);
 void qemu_mutex_unlock(QemuMutex *mutex);
 
 void qemu_cond_init(QemuCond *cond);
+void qemu_cond_destroy(QemuCond *cond);
 void qemu_cond_signal(QemuCond *cond);
 void qemu_cond_broadcast(QemuCond *cond);
 void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex);
@@ -37,4 +39,6 @@ void qemu_thread_create(QemuThread *thread,
 void qemu_thread_signal(QemuThread *thread, int sig);
 void qemu_thread_self(QemuThread *thread);
 int qemu_thread_equal(QemuThread *thread1, QemuThread *thread2);
+void qemu_thread_exit(void *retval);
+
 #endif
-- 
1.7.1




Re: [Qemu-devel] [PATCH v3 04/17] qdev: Give qtree names precedence over user-assigned IDs

2010-05-29 Thread Markus Armbruster
Jan Kiszka jan.kis...@web.de writes:

 From: Jan Kiszka jan.kis...@siemens.com

 As the user may specify ambiguous device IDs, let's search for their
 official names first before considering the user-supplied identifiers.

 Signed-off-by: Jan Kiszka jan.kis...@siemens.com

The problem is letting the user specify ambiguous device IDs in the
first place!  That way is madness...



Re: [Qemu-devel] [PATCH v2 03/15] qdev: Allow device addressing via 'driver.instance'

2010-05-29 Thread Markus Armbruster
Jan Kiszka jan.kis...@web.de writes:

 From: Jan Kiszka jan.kis...@siemens.com

 Extend qbus_find_dev to allow addressing of devices without an unique id
 via an optional per-bus instance number. The new formats are
 'driver.instance' and 'alias.instance'.

 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---
  docs/qdev-device-use.txt |   12 +++-
  hw/qdev.c|   23 ++-
  2 files changed, 29 insertions(+), 6 deletions(-)

 diff --git a/docs/qdev-device-use.txt b/docs/qdev-device-use.txt
 index 9ac1fa1..5939481 100644
 --- a/docs/qdev-device-use.txt
 +++ b/docs/qdev-device-use.txt
 @@ -1,6 +1,6 @@
  = How to convert to -device  friends =
  
 -=== Specifying Bus and Address on Bus ===
 +=== Specifying Bus, Address on Bus, and Devices ===
  
  In qdev, each device has a parent bus.  Some devices provide one or
  more buses for children.  You can specify a device's parent bus with
 @@ -24,6 +24,16 @@ Furthermore, if a device only hosts a single bus, the bus 
 name can be
  omitted in the path.  Example: /i440FX-pcihost/PIIX3 abbreviates
  /i440FX-pcihost/pci.0/PIIX3/isa.0 as none of the buses has siblings.
  
 +Existing devices can be addressed either via a unique ID if it was
 +assigned during creation or via the device tree path:
 +
 +/full_bus_address/driver_name[.instance_number]
 +or
 +abbreviated_bus_address/driver_name[.instance_number]
 +
 +Example: /i440FX-pcihost/pci.0/e1000.2 addresses the second e1000
 +adapter on the bus 'pci.0'.
 +
  Note: the USB device address can't be controlled at this time.

instance number isn't defined in this document.

I understand the problem you're trying to solve; I've had it myself many
times.  But is inventing an instance number the right solution?

The two e1000 devices already have a perfectly fine unique identifier on
their bus: their bus address.  What about recognizing bus addresses in
paths?  Requires a suitable restriction on device names and IDs to avoid
ambiguity, say start with letter.

qdev currently doesn't abstract bus addresses, but that's fixable.

[...]



Re: [Qemu-devel] [PATCH] sparc64: fix 128-bit atomic load from nucleus context v1

2010-05-29 Thread Blue Swirl
Thanks, applied.

On Fri, May 28, 2010 at 9:05 PM, Igor V. Kovalenko
igor.v.kovale...@gmail.com wrote:
 From: Igor V. Kovalenko igor.v.kovale...@gmail.com

 - change 128-bit atomic loads to reference nucleus context
 v0-v1: dropped disassembler change
 Signed-off-by: Igor V. Kovalenko igor.v.kovale...@gmail.com
 ---
  target-sparc/op_helper.c |   10 +-
  1 files changed, 5 insertions(+), 5 deletions(-)

 diff --git a/target-sparc/op_helper.c b/target-sparc/op_helper.c
 index edeeb44..f468e7b 100644
 --- a/target-sparc/op_helper.c
 +++ b/target-sparc/op_helper.c
 @@ -3060,19 +3060,19 @@ void helper_ldda_asi(target_ulong addr, int asi, int 
 rd)
     case 0x2c: // Nucleus quad LDD 128 bit atomic LE
         helper_check_align(addr, 0xf);
         if (rd == 0) {
 -            env-gregs[1] = ldq_kernel(addr + 8);
 +            env-gregs[1] = ldq_nucleus(addr + 8);
             if (asi == 0x2c)
                 bswap64s(env-gregs[1]);
         } else if (rd  8) {
 -            env-gregs[rd] = ldq_kernel(addr);
 -            env-gregs[rd + 1] = ldq_kernel(addr + 8);
 +            env-gregs[rd] = ldq_nucleus(addr);
 +            env-gregs[rd + 1] = ldq_nucleus(addr + 8);
             if (asi == 0x2c) {
                 bswap64s(env-gregs[rd]);
                 bswap64s(env-gregs[rd + 1]);
             }
         } else {
 -            env-regwptr[rd] = ldq_kernel(addr);
 -            env-regwptr[rd + 1] = ldq_kernel(addr + 8);
 +            env-regwptr[rd] = ldq_nucleus(addr);
 +            env-regwptr[rd + 1] = ldq_nucleus(addr + 8);
             if (asi == 0x2c) {
                 bswap64s(env-regwptr[rd]);
                 bswap64s(env-regwptr[rd + 1]);






Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback

2010-05-29 Thread Jan Kiszka
Blue Swirl wrote:
 On Sat, May 29, 2010 at 10:16 AM, Jan Kiszka jan.kis...@web.de wrote:
 Blue Swirl wrote:
 This is - according to my current understanding - the proposed
 alternative architecture:

  .---.
  | de-coalescing |
  | logic |
  '---'
^   |
 period,|   |IRQ
   coalesced|   |(or tuned VM clock)
(yes/no)|   v
 .---.  .. .---.
 |  RTC  |-IRQ-| router |-IRQ| APIC  |
 '---'  '' '---'
|^|   ^
|||   |
'---period---''--period---'
 The period information is already known by the higher level clock
 management,
 The timer device models program the next event of some qemu-timer. There
 is no tag attached explaining the timer subsystem or anyone else the
 reason behind this programming.
 Yes, but why would we care? All timers in the system besides RTC
 should be affected likewise, this would in fact be the benefit from
 handling the problem at higher level.
 And how does your equation for calculating the clock slow-down look like?
 
 How about icount_adjust()?

I would suggest that you implement your ideas now. Please keep us
informed about the progress as this series (and more) depends on a decision.

Jan



signature.asc
Description: OpenPGP digital signature


[Qemu-devel] [PATCH 2/3] qemu-thread: add cleanup_push() and cleanup_pop()

2010-05-29 Thread Corentin Chary
From pthread man:

 These  functions  manipulate  the  calling thread's stack of
 thread-cancellation clean-up handlers.  A clean-up handler is
 a function that is automatically executed when a thread is canceled
 [...] it might, for example, unlock a mutex so that it becomes
 available to other threads in the process.

These two functions are implemented using macros because there is no
other way to do that (pthread man, again):

 On Linux, the pthread_cleanup_push() and pthread_cleanup_pop()
 functions are implemented as macros that expand to text  containing
 '{'  and  '}',  respectively.  This means that variables declared
 within the scope of paired calls to these functions will only
 be visible within that scope.

Signed-off-by: Corentin Chary corenti...@iksaif.net
---
 qemu-thread.h |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/qemu-thread.h b/qemu-thread.h
index 19bb30c..e5006bb 100644
--- a/qemu-thread.h
+++ b/qemu-thread.h
@@ -41,4 +41,8 @@ void qemu_thread_self(QemuThread *thread);
 int qemu_thread_equal(QemuThread *thread1, QemuThread *thread2);
 void qemu_thread_exit(void *retval);
 
+#define qemu_thread_cleanup_pop(execute) pthread_cleanup_pop(execute)
+#define qemu_thread_cleanup_push(routine, arg)  \
+pthread_cleanup_push(routine, arg)
+
 #endif
-- 
1.7.1




Re: [Qemu-devel] [PATCH v2 02/15] qdev: Fix scanning across single-bus devices

2010-05-29 Thread Markus Armbruster
[cc: kraxel]

Jan Kiszka jan.kis...@web.de writes:

 From: Jan Kiszka jan.kis...@siemens.com

 As long as we allow /dev.1 as shortcut for /dev1/bus1, we also have to
 make sure that /dev1/dev2 works for /dev1/bus1/dev2/bus2 - as long as
 there is only one child bus per device.

We auto-root a path not starting with '/' via convention first
component is ID then (I like that).  We auto-complete a path ending
with a device when we want a bus (a bit too clever).  Auto-inserting bus
names in the middle is too clever by half!

Such cleverness can be convenient in the human monitor, but all it adds
to QMP is complexity.

I'm worried about possibly ambigous interpretation of paths.  Would be
bad if a path could name different node after we add something to the
tree.  I *think* your fine print makes that impossible, but it's not
exactly obvious.  Heck, I could be wrong.

Wouldn't it be better to put the convenience into the interactive
completion function?  That way we keep it out of QMP.

Subject is misleading, it's a feature, not a bug fix.



[Qemu-devel] Re: [PATCH] savevm: Really verify if a drive supports snapshots

2010-05-29 Thread Markus Armbruster
Kevin Wolf kw...@redhat.com writes:

 Am 28.05.2010 20:18, schrieb Miguel Di Ciurcio Filho:
 Both bdrv_can_snapshot() and bdrv_has_snapshot() does not work as advertized.
 
 First issue: Their names implies different porpouses, but they do the same 
 thing
 and have exactly the same code. Maybe copied and pasted and forgotten?
 bdrv_has_snapshot() is called in various places for actually checking if 
 there
 is snapshots or not.
 
 Second issue: the way bdrv_can_snapshot() verifies if a block driver 
 supports or
 not snapshots does not catch all cases. E.g.: a raw image.
 
 So when do_savevm() is called, first thing it does is to set a global
 BlockDriverState to save the VM memory state calling get_bs_snapshots().
 
 static BlockDriverState *get_bs_snapshots(void)
 {
 BlockDriverState *bs;
 DriveInfo *dinfo;
 
 if (bs_snapshots)
 return bs_snapshots;
 QTAILQ_FOREACH(dinfo, drives, next) {
 bs = dinfo-bdrv;
 if (bdrv_can_snapshot(bs))
 goto ok;
 }
 return NULL;
  ok:
 bs_snapshots = bs;
 return bs;
 }
 
 bdrv_can_snapshot() may return a BlockDriverState that does not support
 snapshots and do_savevm() goes on.
 
 Later on in do_savevm(), we find:
 
 QTAILQ_FOREACH(dinfo, drives, next) {
 bs1 = dinfo-bdrv;
 if (bdrv_has_snapshot(bs1)) {
 /* Write VM state size only to the image that contains the state 
 */
 sn-vm_state_size = (bs == bs1 ? vm_state_size : 0);
 ret = bdrv_snapshot_create(bs1, sn);
 if (ret  0) {
 monitor_printf(mon, Error while creating snapshot on 
 '%s'\n,
bdrv_get_device_name(bs1));
 }
 }
 }
 
 bdrv_has_snapshot(bs1) is not checking if the device does support or has
 snapshots as explained above. Only in bdrv_snapshot_create() the device is
 actually checked for snapshot support.
 
 So, in cases where the first device supports snapshots, and the second does 
 not,
 the snapshot on the first will happen anyways. I believe this is not a good
 behavior. It should be an all or nothing process.
 
 This patch addresses these issues by making bdrv_can_snapshot() and
 bdrv_has_snapshot() actually do what they must do and enforces better tests 
 to
 avoid errors in the middle of do_savevm().
 
 The functions were moved from savevm.c to block.c. It makes more sense to me.
 
 The bdrv_has_snapshot() is not beaultiful, but it does the job. I think 
 having
 this function avaible in the BlockDriver would be the best option.
 
 The loadvm_state() function was updated too to enforce that when loading a 
 VM at
 least all writable devices must support snapshots too.
 
 Signed-off-by: Miguel Di Ciurcio Filho miguel.fi...@gmail.com

 Markus, I think this implements mostly what we discussed the other day.
 Not sure if you already have a patch for doing this - if so, maybe could
 compare the patches and give it a review this way?

Glad I don't, because this one looks pretty good.

 I seem to remember that we came to the conclusion that
 bdrv_has_snapshot() isn't needed at all and should be dropped. Any user
 should be using bdrv_can_snapshot() instead as this is what they really
 want.

Our reasoning adapted to the patched code goes like this.  The remaining
uses of bdrv_has_snapshot() are of the form:

if (bdrv_has_snapshot(bs)
   some_snapshot_op()

where some_snapshot_op() fails when there are no snapshots, and the code
can recognize that failure as sorry, no snapshots (that's what it does
before your patch, when bdrv_has_snapshot() is really no more than a
necessary, but not sufficient condition for has snapshots).

Therefore, we don't have to check for actual existence of snapshots with
bdrv_has_snapshot().  bdrv_can_snapshot() suffices.

In general, I consider

if (predict_whether_foo_will_work)
if (foo()  0)
// foo failed
else
// foo would have failed (we think)

an anti-pattern.  Just make foo() safe to try, and check its status:

if (foo()  0)
// foo failed

Except when foo() is too expensive to try, or can't be made safe.

 ---
  block.c  |   47 ++-
  block.h  |2 ++
  savevm.c |   48 +---
  3 files changed, 69 insertions(+), 28 deletions(-)
 
 diff --git a/block.c b/block.c
 index cd70730..7eddc15 100644
 --- a/block.c
 +++ b/block.c
 @@ -1720,15 +1720,52 @@ void bdrv_debug_event(BlockDriverState *bs, 
 BlkDebugEvent event)
  /**/
  /* handling of snapshots */
  
 -int bdrv_snapshot_create(BlockDriverState *bs,
 - QEMUSnapshotInfo *sn_info)
 +int bdrv_can_snapshot(BlockDriverState *bs)
  {
  BlockDriver *drv = bs-drv;
 -if (!drv)
 +if (!drv) {
 +return -ENOMEDIUM;
 +}
 +
 +if (!drv-bdrv_snapshot_create || bdrv_is_removable(bs) ||
 +   

Re: [Qemu-devel] cg14

2010-05-29 Thread Bob Breuer
Artyom Tarasenko wrote:
 2010/5/28 Blue Swirl blauwir...@gmail.com:
   
 On Fri, May 28, 2010 at 7:54 AM, Bob Breuer breu...@mc.net wrote:
 
 Artyom Tarasenko wrote:
   
 2010/5/27 Bob Breuer breu...@mc.net:

 
 Artyom Tarasenko wrote:

   
 Was going to put some more empty slots into SS-10/20 (VSIMMs, SX)
 after we are done with SS-5 (due to technical limitations I can switch
 access from one real SS model to another one once a few days only).


 
 I have a partial implementation of the SS-20 VSIMM (cg14) that I've been
 working on.  With the Sun firmware, I have working text console, color
 boot logo, and programmable video resolutions up to 1600x1280.

   
 Great news! This would allow qemu booting NeXTStep! Are you planning
 to submit the patches any time soon?


 
 It's not in a state to be submitted yet, but I've attached a working
 patch if you want to give it a try.  I need to hook it up to qdev and
 fill in some more of the obviously incomplete switch cases before I'd
 sign off on it.
   
 Nice work. I have a few comments below.

 This probably needs support from OpenBIOS to be usable without OBP.
 

 Maybe it can be used as a second adapter without OpenBIOS support? At
 least under some OSes?

   
Probably won't be used without at least being in the firmware device
tree.  One area that OpenBIOS could enhance would be a larger memory
size option.  The real hardware was only available in 4M and 8M options,
but the memory map allows for 16M.  OBP will identify a 16M VSIMM but
won't do anything else with it, and with 16M of vram it would allow for
a potential 2560x1600 32bit resolution.
 Bob


 diff --git a/Makefile.target b/Makefile.target
 index fda5bf3..b17b3af 100644
 --- a/Makefile.target
 +++ b/Makefile.target
 @@ -250,6 +250,7 @@ else
  obj-sparc-y = sun4m.o lance.o tcx.o sun4m_iommu.o slavio_intctl.o
  obj-sparc-y += slavio_timer.o slavio_misc.o sparc32_dma.o
  obj-sparc-y += cs4231.o eccmemctl.o sbi.o sun4c_intctl.o
 +obj-sparc-y += cg14.o
  endif

  obj-arm-y = integratorcp.o versatilepb.o arm_pic.o arm_timer.o
 diff --git a/hw/sun4m.c b/hw/sun4m.c
 index 7ba0f76..8b23c9b 100644
 --- a/hw/sun4m.c
 +++ b/hw/sun4m.c
 @@ -864,6 +864,13 @@ static void sun4m_hw_init(const struct sun4m_hwdef 
 *hwdef, ram_addr_t RAM_size,
 fprintf(stderr, qemu: Unsupported depth: %d\n, graphic_depth);
 exit (1);
 }
 +  if (hwdef-machine_id == 65) { /* SS-20 */
   
 hwdef structure should contain a field for cg14. If non-zero, install
 cg14. Was cg14 only available for SS-20? Was it always included? This
 is also interesting for OpenBIOS, we need to detect cg14 vs. TCX.
 
The cg14 was only an option for SS-20 and the rare SS-10SX, but not the
regular SS-10, though the SS-10 chipset may have been capable of
supporting it.  Each cg14 vsimm takes the place of a stick of memory
with 2 slots physically capable of holding a vsimm.

Is there a way to pass the framebuffer type and/or address to OpenBIOS? 
I would be inclined to have the SS-20 machine default to cg14 instead of
TCX, but it will blow a hole in the support of more than 2G of emulated
system ram.
 +/* cg14.c */
 +void cg14_init(target_phys_addr_t ctrl_base, target_phys_addr_t 
 vram_base,
 +uint32_t vram_size);
   
 This should go to sun4m.h or cg14.h.

 
 +
 +cg14_init(0x09c00ULL, 0x0fc00ULL, 820);
 +  } else
   
 Please add braces and reindent.

 
 tcx_init(hwdef-tcx_base, 0x0010, graphic_width, graphic_height,
  graphic_depth);

 --- /dev/null   Fri May 28 02:08:36 2010
 +++ hw/cg14.c   Fri May 28 01:58:49 2010
 @@ -0,0 +1,785 @@
 +/*
 + * QEMU CG14 Frame buffer
 + *
 + * Copyright (c) 2010 Bob Breuer
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a 
 copy
 + * of this software and associated documentation files (the Software), 
 to deal
 + * in the Software without restriction, including without limitation the 
 rights
 + * to use, copy, modify, merge, publish, distribute, sublicense, and/or 
 sell
 + * copies of the Software, and to permit persons to whom the Software is
 + * furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice shall be included 
 in
 + * all copies or substantial portions of the Software.
 + *
 + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS 
 OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
 OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 FROM,
 + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 
 IN
 + * THE SOFTWARE.
 + */
 +
 +#include console.h
 +#include sysbus.h
 +
 +#ifdef DEBUG
   

 DEBUG_CG14 ?

   

[Qemu-devel] Re: [SeaBIOS] SMBIOS strings

2010-05-29 Thread Kevin O'Connor
On Fri, May 28, 2010 at 05:24:47PM +0200, Jes Sorensen wrote:
 We were looking at the dmidecode output from qemu-kvm pre-seabios and
 current qemu-kvm and noticed some of the strings have changed.
[...]
 I wanted to check with the lists if there are any strong feelings about
 this, and whether some of these changes were made for specific reasons?
 
 For example:
[...]
 -   Vendor: QEMU
 -   Version: QEMU
 +   Vendor: Bochs
 +   Version: Bochs
[...]
 -   Manufacturer: QEMU
 -   ID: 63 06 00 00 FD FB 8B 07
 +   Manufacturer: Bochs
 +   ID: 23 06 00 00 FD FB 8B 07

I made these changes in SeaBIOS when I was cleaning up the ifdefs in
the code.  Instead of ifdef QEMU spread around the code, I
abstracted out the various names into CONFIG_APPNAME.  I defaulted the
names to the Bochs based ones because they were the default in the
original Bochs bios code.  So, the only reason for these string
differences is that the default name hasn't been changed.

 Otherwise, if there are no objections, I'll look at adding some patches
 to make it more backwards compatible.

I wonder if making the default names be SeaBIOS based instead of
bochs/qemu based would make sense.  That way there isn't an impulse to
change the settings with every emulator seabios runs on.  (Ideally,
the same binary would be run on multiple emulators to simplify
testing.)

-Kevin



Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback

2010-05-29 Thread Gleb Natapov
On Sat, May 29, 2010 at 04:13:22PM +, Blue Swirl wrote:
 On Sat, May 29, 2010 at 2:46 PM, Gleb Natapov g...@redhat.com wrote:
  On Sat, May 29, 2010 at 09:35:30AM +, Blue Swirl wrote:
   I still don't see how the alternative is supposed to simplify our life
   or improve the efficiency of the de-coalescing workaround. It's rather
   problematic like Gleb pointed out: The de-coalescing logic needs to be
   informed about periodicity changes that can only be delivered along
   IRQs. So what to do with the backlog when the timer is stopped?
 
  What happens with the current design? Gleb only mentioned the
  frequency change, I thought that was not so big problem. But I don't
  think this case should be allowed happen at all, it can't exist on
  real HW.
 
  Hm, why it can't exist on real HW? Do simple exercise. Run WindowsXP
  inside QEMU, connect with gdb to QEMU process and check what frequency
  RTC configured with (hint: it will be 64Hz), now run video inside the
  guest and check frequency again (hint: it will be 1Khz).
 
 You missed the key word 'stopped'. If the timer is really stopped, no
 IRQs should ever come out afterwards, just like on real HW. For the
 emulation, this means loss of ticks which should have been delivered
 before the change.
 
I haven't missed it. I describe to you reality of the situation. You want
to change reality to be more close to what you want it to be by adding
words to my description. Please just go write code, experiment, debug
and _then_ come here with design.

 But what if the guest changed the frequency very often, and between
 changes used zero value, like 64Hz - 0Hz - 128Hz - 0Hz - 64Hz?
Too bad, the world is not perfect.

--
Gleb.



Re: [Qemu-devel] [PATCH] arm: fix arm kernel boot for non zero start addr

2010-05-29 Thread Lars Munch
On Fri, May 28, 2010 at 9:24 PM, Aurelien Jarno aurel...@aurel32.net wrote:
 On Sat, May 08, 2010 at 10:43:35PM +0200, Lars Munch wrote:
 Booting an arm kernel has been broken a while when booting from non zero 
 start
 address. This is due to the order of events: board init loads the kernel and
 sets register 15 to the start address and then qemu_system_reset reset the 
 cpu
 making register 15 zero again.

 This patch fixes the usage of the register 15 start address trick in
 combination with arm_load_kernel.

 Signed-off-by: Lars Munch l...@segv.dk
 ---
  hw/arm_boot.c       |    1 +
  hw/gumstix.c        |    4 
  hw/mainstone.c      |    3 ---
  hw/nseries.c        |    7 ---
  hw/omap_sx1.c       |    5 -
  hw/palm.c           |    4 
  hw/spitz.c          |    3 ---
  hw/tosa.c           |    3 ---
  target-arm/helper.c |    1 -
  9 files changed, 1 insertions(+), 30 deletions(-)

 diff --git a/hw/arm_boot.c b/hw/arm_boot.c
 index df031a5..620550b 100644
 --- a/hw/arm_boot.c
 +++ b/hw/arm_boot.c
 @@ -187,6 +187,7 @@ static void main_cpu_reset(void *opaque)
              env-regs[15] = info-entry  0xfffe;
              env-thumb = info-entry  1;
          } else {
 +            env-regs[15] = info-loader_start;
              if (old_param) {
                  set_kernel_args_old(info, info-initrd_size,
                                      info-loader_start);

 This parts looks fine.

 diff --git a/hw/gumstix.c b/hw/gumstix.c
 index 3fd31f4..b64e04e 100644
 --- a/hw/gumstix.c
 +++ b/hw/gumstix.c
 @@ -74,8 +74,6 @@ static void connex_init(ram_addr_t ram_size,
          exit(1);
      }

 -    cpu-env-regs[15] = 0x;
 -
      /* Interrupt line of NIC is connected to GPIO line 36 */
      smc91c111_init(nd_table[0], 0x04000300,
                      pxa2xx_gpio_in_get(cpu-gpio)[36]);
 @@ -114,8 +112,6 @@ static void verdex_init(ram_addr_t ram_size,
          exit(1);
      }

 -    cpu-env-regs[15] = 0x;
 -
      /* Interrupt line of NIC is connected to GPIO line 99 */
      smc91c111_init(nd_table[0], 0x04000300,
                      pxa2xx_gpio_in_get(cpu-gpio)[99]);
 diff --git a/hw/mainstone.c b/hw/mainstone.c
 index a4379e3..54bacfb 100644
 --- a/hw/mainstone.c
 +++ b/hw/mainstone.c
 @@ -89,9 +89,6 @@ static void mainstone_common_init(ram_addr_t ram_size,
      cpu_register_physical_memory(0, MAINSTONE_ROM,
                      qemu_ram_alloc(MAINSTONE_ROM) | IO_MEM_ROM);

 -    /* Setup initial (reset) machine state */
 -    cpu-env-regs[15] = mainstone_binfo.loader_start;
 -
  #ifdef TARGET_WORDS_BIGENDIAN
      be = 1;
  #else
 diff --git a/hw/nseries.c b/hw/nseries.c
 index 0273eee..04a028d 100644
 --- a/hw/nseries.c
 +++ b/hw/nseries.c
 @@ -1016,7 +1016,6 @@ static void n8x0_boot_init(void *opaque)
      n800_dss_init(s-blizzard);

      /* CPU setup */
 -    s-cpu-env-regs[15] = s-cpu-env-boot_info-loader_start;
      s-cpu-env-GE = 0x5;

      /* If the machine has a slided keyboard, open it */
 @@ -1317,11 +1316,6 @@ static void n8x0_init(ram_addr_t ram_size, const char 
 *boot_device,
      if (usb_enabled)
          n8x0_usb_setup(s);

 -    /* Setup initial (reset) machine state */
 -
 -    /* Start at the OneNAND bootloader.  */
 -    s-cpu-env-regs[15] = 0;
 -
      if (kernel_filename) {
          /* Or at the linux loader.  */
          binfo-kernel_filename = kernel_filename;
 @@ -1330,7 +1324,6 @@ static void n8x0_init(ram_addr_t ram_size, const char 
 *boot_device,
          arm_load_kernel(s-cpu-env, binfo);

          qemu_register_reset(n8x0_boot_init, s);
 -        n8x0_boot_init(s);
      }

      if (option_rom[0]  (boot_device[0] == 'n' || !kernel_filename)) {
 diff --git a/hw/omap_sx1.c b/hw/omap_sx1.c
 index ca0a7d1..2e9879f 100644
 --- a/hw/omap_sx1.c
 +++ b/hw/omap_sx1.c
 @@ -195,15 +195,10 @@ static void sx1_init(ram_addr_t ram_size,

      /* Load the kernel.  */
      if (kernel_filename) {
 -        /* Start at bootloader.  */
 -        cpu-env-regs[15] = sx1_binfo.loader_start;
 -
          sx1_binfo.kernel_filename = kernel_filename;
          sx1_binfo.kernel_cmdline = kernel_cmdline;
          sx1_binfo.initrd_filename = initrd_filename;
          arm_load_kernel(cpu-env, sx1_binfo);
 -    } else {
 -        cpu-env-regs[15] = 0x;
      }

      /* TODO: fix next line */
 diff --git a/hw/palm.c b/hw/palm.c
 index ba7c398..8db133d 100644
 --- a/hw/palm.c
 +++ b/hw/palm.c
 @@ -243,7 +243,6 @@ static void palmte_init(ram_addr_t ram_size,
              rom_size = load_image_targphys(option_rom[0], OMAP_CS0_BASE,
                                             flash_size);
              rom_loaded = 1;
 -            cpu-env-regs[15] = 0x;
          }
          if (rom_size  0) {
              fprintf(stderr, %s: error loading '%s'\n,
 @@ -258,9 +257,6 @@ static void palmte_init(ram_addr_t ram_size,

      /* Load the kernel.  */
      if (kernel_filename) {
 -        /* Start at bootloader.  */
 -        

Re: [Qemu-devel] [PATCH] arm: fix arm kernel boot for non zero start addr

2010-05-29 Thread Aurelien Jarno
On Sat, May 29, 2010 at 08:42:52PM +0200, Lars Munch wrote:
 On Fri, May 28, 2010 at 9:24 PM, Aurelien Jarno aurel...@aurel32.net wrote:
  On Sat, May 08, 2010 at 10:43:35PM +0200, Lars Munch wrote:
  Booting an arm kernel has been broken a while when booting from non zero 
  start
  address. This is due to the order of events: board init loads the kernel 
  and
  sets register 15 to the start address and then qemu_system_reset reset the 
  cpu
  making register 15 zero again.
 
  This patch fixes the usage of the register 15 start address trick in
  combination with arm_load_kernel.
 
  Signed-off-by: Lars Munch l...@segv.dk
  ---
   hw/arm_boot.c       |    1 +
   hw/gumstix.c        |    4 
   hw/mainstone.c      |    3 ---
   hw/nseries.c        |    7 ---
   hw/omap_sx1.c       |    5 -
   hw/palm.c           |    4 
   hw/spitz.c          |    3 ---
   hw/tosa.c           |    3 ---
   target-arm/helper.c |    1 -
   9 files changed, 1 insertions(+), 30 deletions(-)
 
  diff --git a/hw/arm_boot.c b/hw/arm_boot.c
  index df031a5..620550b 100644
  --- a/hw/arm_boot.c
  +++ b/hw/arm_boot.c
  @@ -187,6 +187,7 @@ static void main_cpu_reset(void *opaque)
               env-regs[15] = info-entry  0xfffe;
               env-thumb = info-entry  1;
           } else {
  +            env-regs[15] = info-loader_start;
               if (old_param) {
                   set_kernel_args_old(info, info-initrd_size,
                                       info-loader_start);
 
  This parts looks fine.
 
  diff --git a/hw/gumstix.c b/hw/gumstix.c
  index 3fd31f4..b64e04e 100644
  --- a/hw/gumstix.c
  +++ b/hw/gumstix.c
  @@ -74,8 +74,6 @@ static void connex_init(ram_addr_t ram_size,
           exit(1);
       }
 
  -    cpu-env-regs[15] = 0x;
  -
       /* Interrupt line of NIC is connected to GPIO line 36 */
       smc91c111_init(nd_table[0], 0x04000300,
                       pxa2xx_gpio_in_get(cpu-gpio)[36]);
  @@ -114,8 +112,6 @@ static void verdex_init(ram_addr_t ram_size,
           exit(1);
       }
 
  -    cpu-env-regs[15] = 0x;
  -
       /* Interrupt line of NIC is connected to GPIO line 99 */
       smc91c111_init(nd_table[0], 0x04000300,
                       pxa2xx_gpio_in_get(cpu-gpio)[99]);
  diff --git a/hw/mainstone.c b/hw/mainstone.c
  index a4379e3..54bacfb 100644
  --- a/hw/mainstone.c
  +++ b/hw/mainstone.c
  @@ -89,9 +89,6 @@ static void mainstone_common_init(ram_addr_t ram_size,
       cpu_register_physical_memory(0, MAINSTONE_ROM,
                       qemu_ram_alloc(MAINSTONE_ROM) | IO_MEM_ROM);
 
  -    /* Setup initial (reset) machine state */
  -    cpu-env-regs[15] = mainstone_binfo.loader_start;
  -
   #ifdef TARGET_WORDS_BIGENDIAN
       be = 1;
   #else
  diff --git a/hw/nseries.c b/hw/nseries.c
  index 0273eee..04a028d 100644
  --- a/hw/nseries.c
  +++ b/hw/nseries.c
  @@ -1016,7 +1016,6 @@ static void n8x0_boot_init(void *opaque)
       n800_dss_init(s-blizzard);
 
       /* CPU setup */
  -    s-cpu-env-regs[15] = s-cpu-env-boot_info-loader_start;
       s-cpu-env-GE = 0x5;
 
       /* If the machine has a slided keyboard, open it */
  @@ -1317,11 +1316,6 @@ static void n8x0_init(ram_addr_t ram_size, const 
  char *boot_device,
       if (usb_enabled)
           n8x0_usb_setup(s);
 
  -    /* Setup initial (reset) machine state */
  -
  -    /* Start at the OneNAND bootloader.  */
  -    s-cpu-env-regs[15] = 0;
  -
       if (kernel_filename) {
           /* Or at the linux loader.  */
           binfo-kernel_filename = kernel_filename;
  @@ -1330,7 +1324,6 @@ static void n8x0_init(ram_addr_t ram_size, const 
  char *boot_device,
           arm_load_kernel(s-cpu-env, binfo);
 
           qemu_register_reset(n8x0_boot_init, s);
  -        n8x0_boot_init(s);
       }
 
       if (option_rom[0]  (boot_device[0] == 'n' || !kernel_filename)) {
  diff --git a/hw/omap_sx1.c b/hw/omap_sx1.c
  index ca0a7d1..2e9879f 100644
  --- a/hw/omap_sx1.c
  +++ b/hw/omap_sx1.c
  @@ -195,15 +195,10 @@ static void sx1_init(ram_addr_t ram_size,
 
       /* Load the kernel.  */
       if (kernel_filename) {
  -        /* Start at bootloader.  */
  -        cpu-env-regs[15] = sx1_binfo.loader_start;
  -
           sx1_binfo.kernel_filename = kernel_filename;
           sx1_binfo.kernel_cmdline = kernel_cmdline;
           sx1_binfo.initrd_filename = initrd_filename;
           arm_load_kernel(cpu-env, sx1_binfo);
  -    } else {
  -        cpu-env-regs[15] = 0x;
       }
 
       /* TODO: fix next line */
  diff --git a/hw/palm.c b/hw/palm.c
  index ba7c398..8db133d 100644
  --- a/hw/palm.c
  +++ b/hw/palm.c
  @@ -243,7 +243,6 @@ static void palmte_init(ram_addr_t ram_size,
               rom_size = load_image_targphys(option_rom[0], OMAP_CS0_BASE,
                                              flash_size);
               rom_loaded = 1;
  -            cpu-env-regs[15] = 0x;
           }
           if (rom_size  0) {
               fprintf(stderr, %s: 

[Qemu-devel] Re: [PATCH] savevm: Really verify if a drive supports snapshots

2010-05-29 Thread Miguel Di Ciurcio Filho
On Sat, May 29, 2010 at 3:06 AM, Markus Armbruster arm...@redhat.com wrote:

 I seem to remember that we came to the conclusion that
 bdrv_has_snapshot() isn't needed at all and should be dropped. Any user
 should be using bdrv_can_snapshot() instead as this is what they really
 want.

 Our reasoning adapted to the patched code goes like this.  The remaining
 uses of bdrv_has_snapshot() are of the form:

    if (bdrv_has_snapshot(bs)
       some_snapshot_op()

 where some_snapshot_op() fails when there are no snapshots, and the code
 can recognize that failure as sorry, no snapshots (that's what it does
 before your patch, when bdrv_has_snapshot() is really no more than a
 necessary, but not sufficient condition for has snapshots).

 Therefore, we don't have to check for actual existence of snapshots with
 bdrv_has_snapshot().  bdrv_can_snapshot() suffices.

I had this feeling too, but I was conservative and kept
bdrv_has_snapshot(). I will remove it and replace by
bdrv_can_snapshot() where appropriate.

 +int bdrv_can_snapshot(BlockDriverState *bs)
  {
      BlockDriver *drv = bs-drv;
 -    if (!drv)
 +    if (!drv) {
 +        return -ENOMEDIUM;
 +    }
 +
 +    if (!drv-bdrv_snapshot_create || bdrv_is_removable(bs) ||
 +        bdrv_is_read_only(bs)) {
 +        return -ENOTSUP;
 +    }
 +
 +    return 1;
 +}

 Returning either 1 or -errno is a strange interface. I'm not sure which

 Agree.

 of 1/0 or 0/-errno is better in this case, but I'd suggest to take one
 of these.

 Does any existing caller care for the error codes?  If not, just go with
 1/0.

When bdrv_can_snapshot() is called the specific reason for not
supporting snapshots does not matter. So, 1/0 is better. I will fix it
and resend.

Regards,

Miguel



Re: [Qemu-devel] [PATCH] arm: fix arm kernel boot for non zero start addr

2010-05-29 Thread Lars Munch
On Sat, May 29, 2010 at 8:51 PM, Aurelien Jarno aurel...@aurel32.net wrote:
 On Sat, May 29, 2010 at 08:42:52PM +0200, Lars Munch wrote:
 On Fri, May 28, 2010 at 9:24 PM, Aurelien Jarno aurel...@aurel32.net wrote:
  On Sat, May 08, 2010 at 10:43:35PM +0200, Lars Munch wrote:
  Booting an arm kernel has been broken a while when booting from non zero 
  start
  address. This is due to the order of events: board init loads the kernel 
  and
  sets register 15 to the start address and then qemu_system_reset reset 
  the cpu
  making register 15 zero again.
 
  This patch fixes the usage of the register 15 start address trick in
  combination with arm_load_kernel.
 
  Signed-off-by: Lars Munch l...@segv.dk
  ---
   hw/arm_boot.c       |    1 +
   hw/gumstix.c        |    4 
   hw/mainstone.c      |    3 ---
   hw/nseries.c        |    7 ---
   hw/omap_sx1.c       |    5 -
   hw/palm.c           |    4 
   hw/spitz.c          |    3 ---
   hw/tosa.c           |    3 ---
   target-arm/helper.c |    1 -
   9 files changed, 1 insertions(+), 30 deletions(-)
 
  diff --git a/hw/arm_boot.c b/hw/arm_boot.c
  index df031a5..620550b 100644
  --- a/hw/arm_boot.c
  +++ b/hw/arm_boot.c
  @@ -187,6 +187,7 @@ static void main_cpu_reset(void *opaque)
               env-regs[15] = info-entry  0xfffe;
               env-thumb = info-entry  1;
           } else {
  +            env-regs[15] = info-loader_start;
               if (old_param) {
                   set_kernel_args_old(info, info-initrd_size,
                                       info-loader_start);
 
  This parts looks fine.
 
  diff --git a/hw/gumstix.c b/hw/gumstix.c
  index 3fd31f4..b64e04e 100644
  --- a/hw/gumstix.c
  +++ b/hw/gumstix.c
  @@ -74,8 +74,6 @@ static void connex_init(ram_addr_t ram_size,
           exit(1);
       }
 
  -    cpu-env-regs[15] = 0x;
  -
       /* Interrupt line of NIC is connected to GPIO line 36 */
       smc91c111_init(nd_table[0], 0x04000300,
                       pxa2xx_gpio_in_get(cpu-gpio)[36]);
  @@ -114,8 +112,6 @@ static void verdex_init(ram_addr_t ram_size,
           exit(1);
       }
 
  -    cpu-env-regs[15] = 0x;
  -
       /* Interrupt line of NIC is connected to GPIO line 99 */
       smc91c111_init(nd_table[0], 0x04000300,
                       pxa2xx_gpio_in_get(cpu-gpio)[99]);
  diff --git a/hw/mainstone.c b/hw/mainstone.c
  index a4379e3..54bacfb 100644
  --- a/hw/mainstone.c
  +++ b/hw/mainstone.c
  @@ -89,9 +89,6 @@ static void mainstone_common_init(ram_addr_t ram_size,
       cpu_register_physical_memory(0, MAINSTONE_ROM,
                       qemu_ram_alloc(MAINSTONE_ROM) | IO_MEM_ROM);
 
  -    /* Setup initial (reset) machine state */
  -    cpu-env-regs[15] = mainstone_binfo.loader_start;
  -
   #ifdef TARGET_WORDS_BIGENDIAN
       be = 1;
   #else
  diff --git a/hw/nseries.c b/hw/nseries.c
  index 0273eee..04a028d 100644
  --- a/hw/nseries.c
  +++ b/hw/nseries.c
  @@ -1016,7 +1016,6 @@ static void n8x0_boot_init(void *opaque)
       n800_dss_init(s-blizzard);
 
       /* CPU setup */
  -    s-cpu-env-regs[15] = s-cpu-env-boot_info-loader_start;
       s-cpu-env-GE = 0x5;
 
       /* If the machine has a slided keyboard, open it */
  @@ -1317,11 +1316,6 @@ static void n8x0_init(ram_addr_t ram_size, const 
  char *boot_device,
       if (usb_enabled)
           n8x0_usb_setup(s);
 
  -    /* Setup initial (reset) machine state */
  -
  -    /* Start at the OneNAND bootloader.  */
  -    s-cpu-env-regs[15] = 0;
  -
       if (kernel_filename) {
           /* Or at the linux loader.  */
           binfo-kernel_filename = kernel_filename;
  @@ -1330,7 +1324,6 @@ static void n8x0_init(ram_addr_t ram_size, const 
  char *boot_device,
           arm_load_kernel(s-cpu-env, binfo);
 
           qemu_register_reset(n8x0_boot_init, s);
  -        n8x0_boot_init(s);
       }
 
       if (option_rom[0]  (boot_device[0] == 'n' || !kernel_filename)) {
  diff --git a/hw/omap_sx1.c b/hw/omap_sx1.c
  index ca0a7d1..2e9879f 100644
  --- a/hw/omap_sx1.c
  +++ b/hw/omap_sx1.c
  @@ -195,15 +195,10 @@ static void sx1_init(ram_addr_t ram_size,
 
       /* Load the kernel.  */
       if (kernel_filename) {
  -        /* Start at bootloader.  */
  -        cpu-env-regs[15] = sx1_binfo.loader_start;
  -
           sx1_binfo.kernel_filename = kernel_filename;
           sx1_binfo.kernel_cmdline = kernel_cmdline;
           sx1_binfo.initrd_filename = initrd_filename;
           arm_load_kernel(cpu-env, sx1_binfo);
  -    } else {
  -        cpu-env-regs[15] = 0x;
       }
 
       /* TODO: fix next line */
  diff --git a/hw/palm.c b/hw/palm.c
  index ba7c398..8db133d 100644
  --- a/hw/palm.c
  +++ b/hw/palm.c
  @@ -243,7 +243,6 @@ static void palmte_init(ram_addr_t ram_size,
               rom_size = load_image_targphys(option_rom[0], OMAP_CS0_BASE,
                                              flash_size);
               rom_loaded = 1;
  -            cpu-env-regs[15] = 0x;
     

[Qemu-devel] [PATCH v2] savevm: Really verify if a drive supports snapshots

2010-05-29 Thread Miguel Di Ciurcio Filho
Both bdrv_can_snapshot() and bdrv_has_snapshot() does not work as advertized.

First issue: Their names implies different porpouses, but they do the same thing
and have exactly the same code. Maybe copied and pasted and forgotten?
bdrv_has_snapshot() is called in various places for actually checking if there
is snapshots or not.

Second issue: the way bdrv_can_snapshot() verifies if a block driver supports or
not snapshots does not catch all cases. E.g.: a raw image.

So when do_savevm() is called, first thing it does is to set a global
BlockDriverState to save the VM memory state calling get_bs_snapshots().

static BlockDriverState *get_bs_snapshots(void)
{
BlockDriverState *bs;
DriveInfo *dinfo;

if (bs_snapshots)
return bs_snapshots;
QTAILQ_FOREACH(dinfo, drives, next) {
bs = dinfo-bdrv;
if (bdrv_can_snapshot(bs))
goto ok;
}
return NULL;
 ok:
bs_snapshots = bs;
return bs;
}

bdrv_can_snapshot() may return a BlockDriverState that does not support
snapshots and do_savevm() goes on.

Later on in do_savevm(), we find:

QTAILQ_FOREACH(dinfo, drives, next) {
bs1 = dinfo-bdrv;
if (bdrv_has_snapshot(bs1)) {
/* Write VM state size only to the image that contains the state */
sn-vm_state_size = (bs == bs1 ? vm_state_size : 0);
ret = bdrv_snapshot_create(bs1, sn);
if (ret  0) {
monitor_printf(mon, Error while creating snapshot on '%s'\n,
   bdrv_get_device_name(bs1));
}
}
}

bdrv_has_snapshot(bs1) is not checking if the device does support or has
snapshots as explained above. Only in bdrv_snapshot_create() the device is
actually checked for snapshot support.

So, in cases where the first device supports snapshots, and the second does not,
the snapshot on the first will happen anyways. I believe this is not a good
behavior. It should be an all or nothing process.

This patch addresses these issues by making bdrv_can_snapshot() actually do
what it must do and enforces better tests to avoid errors in the middle of
do_savevm(). bdrv_has_snapshot() is removed and replaced by bdrv_can_snapshot()
where appropriate.

bdrv_can_snapshot() was moved from savevm.c to block.c. It makes more sense to 
me.

The loadvm_state() function was updated too to enforce that when loading a VM at
least all writable devices must support snapshots too.

Signed-off-by: Miguel Di Ciurcio Filho miguel.fi...@gmail.com
---
 block.c   |   21 -
 block.h   |1 +
 monitor.c |5 -
 savevm.c  |   58 --
 4 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/block.c b/block.c
index cd70730..1115a60 100644
--- a/block.c
+++ b/block.c
@@ -1720,15 +1720,26 @@ void bdrv_debug_event(BlockDriverState *bs, 
BlkDebugEvent event)
 /**/
 /* handling of snapshots */
 
+int bdrv_can_snapshot(BlockDriverState *bs)
+{
+BlockDriver *drv = bs-drv;
+if (!drv || !drv-bdrv_snapshot_create || bdrv_is_removable(bs) ||
+bdrv_is_read_only(bs)) {
+return 0;
+}
+
+return 1;
+}
+
 int bdrv_snapshot_create(BlockDriverState *bs,
  QEMUSnapshotInfo *sn_info)
 {
 BlockDriver *drv = bs-drv;
-if (!drv)
-return -ENOMEDIUM;
-if (!drv-bdrv_snapshot_create)
-return -ENOTSUP;
-return drv-bdrv_snapshot_create(bs, sn_info);
+if (bdrv_can_snapshot(bs)) {
+return drv-bdrv_snapshot_create(bs, sn_info);
+}
+
+return -1;
 }
 
 int bdrv_snapshot_goto(BlockDriverState *bs,
diff --git a/block.h b/block.h
index 24efeb6..fbcd8af 100644
--- a/block.h
+++ b/block.h
@@ -173,6 +173,7 @@ int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo 
*bdi);
 const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
 void bdrv_get_backing_filename(BlockDriverState *bs,
char *filename, int filename_size);
+int bdrv_can_snapshot(BlockDriverState *bs);
 int bdrv_snapshot_create(BlockDriverState *bs,
  QEMUSnapshotInfo *sn_info);
 int bdrv_snapshot_goto(BlockDriverState *bs,
diff --git a/monitor.c b/monitor.c
index ad50f12..8b4a1d7 100644
--- a/monitor.c
+++ b/monitor.c
@@ -2468,8 +2468,11 @@ static void do_loadvm(Monitor *mon, const QDict *qdict)
 
 vm_stop(0);
 
-if (load_vmstate(name) = 0  saved_vm_running)
+if (load_vmstate(name) = 0  saved_vm_running) {
 vm_start();
+} else {
+monitor_printf(mon, Failed to load VM state. VM stopped.\n);
+}
 }
 
 int monitor_get_fd(Monitor *mon, const char *fdname)
diff --git a/savevm.c b/savevm.c
index dc20390..6549ca7 100644
--- a/savevm.c
+++ b/savevm.c
@@ -1574,22 +1574,6 @@ out:
 return ret;
 }
 
-/* device can contain snapshots */
-static int bdrv_can_snapshot(BlockDriverState *bs)
-{
-return (bs 
-

Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback

2010-05-29 Thread Blue Swirl
On Sat, May 29, 2010 at 4:32 PM, Gleb Natapov g...@redhat.com wrote:
 On Sat, May 29, 2010 at 04:03:22PM +, Blue Swirl wrote:
 2010/5/29 Gleb Natapov g...@redhat.com:
  On Sat, May 29, 2010 at 09:15:11AM +, Blue Swirl wrote:
   There is no code, because we're still at architecture design stage.
  
   Try to write test code to understand the problem better.
 
  I will.
 
  Please do ASAP. This discussion shows that you don't understand what is the
  problem that we are dialing with.

 Which part of the problem you think I don't understand?

 It seams to me you don't understand how Windows uses RTC for time
 keeping and how the QEMU solves the problem today.

RTC causes periodic interrupts and Windows interrupt handler
increments jiffies, like Linux?

 guests could also be assisted with special handling (like win2k
 install hack), for example guest instructions could be counted
 (approximately, for example using TB size or TSC) and only inject
 after at least N instructions have passed.
 Guest instructions cannot be easily counted in KVM (it can be 
 done more
 or less reliably using perf counters, may be).
   
Aren't there any debug registers or perf counters, which can 
generate
an interrupt after some number of instructions have been executed?
Don't think debug registers have something like that and they are
available for guest use anyway. Perf counters differs greatly from 
CPU
to CPU (even between two CPUs of the same manufacturer), and we want 
to
keep using them for profiling guests. And I don't see what problem it
will solve anyway that can be solved by simple delay between irq
reinjection.
  
   This would allow counting the executed instructions and limit it. Thus
   we could emulate a 500MHz CPU on a 2GHz CPU more accurately.
  
   Why would you want to limit number of instruction executed by guest if
   CPU has nothing else to do anyway? The problem occurs not when we have
   spare cycles so give to a guest, but in opposite case.
 
  I think one problem is that the guest has executed too much compared
  to what would happen with real HW with a lesser CPU. That explains the
  RTC frequency reprogramming case.
  You think wrong. The problem is exactly opposite: the guest haven't
  had enough execution time between two time interrupts. I don't know what
  RTC frequency reprogramming case you are talking about here.

 The case you told me where N pending tick IRQs exist but the guest
 wants to change the RTC frequency from 64Hz to 1024Hz.

 Let's make this more concrete. 1 GHz CPU, initially 100Hz RTC, so
 10Mcycles/tick or 10ms/tick. At T = 30Mcycles, guest wants to change
 the frequency to 1000Hz.

 The problem for emulation is that for the same 3 ticks, there has been
 so little execution power that the ticks have been coalesced. But
 isn't the guest cycle count then much lower than 30Mcyc?

 Isn't it so that the guest must be above 30Mcyc to be able to want the
 change? But if we reach that point,  the problem must have not been
 too little execution time, but too much.

 Sorry I tried hard to understand what have you said above but failed.
 What do you mean to be able to want the change? Guest sometimes wants
 to get 64 timer interrupts per second and sometimes it wants to get 1024
 timer interrupt per second. It wants it not as a result of time drift or
 anything. It's just how guest behaves. You seams to be to fixated on
 guest frequency change. It's just something you have to take into
 account when you reinject interrupts.

I meant that in the scenario, the guest won't change the RTC before
30Mcyc because of some built in determinism in the guest. At that
point, because of some reason, the change would happen.


 
 
  
   

  And even if the rate did not matter, the APIC woult still have 
  to now
  about the fact that an IRQ is really periodic and does not 
  only appear
  as such for a certain interval. This really does not sound like
  simplifying things or even make them cleaner.

 It would, the voodoo would be contained only in APIC, RTC would 
 be
 just like any other device. With the bidirectional irqs, this 
 voodoo
 would probably eventually spread to many other devices. The 
 logical
 conclusion of that would be a system where all devices would be
 careful not to disturb the guest at wrong moment because that 
 would
 trigger a bug.

 This voodoo will be so complex and unreliable that it will make 
 RTC hack
 pale in comparison (and I still don't see how you are going to 
 make it
 actually work).
   
Implement everything inside APIC: only coalescing and reinjection.
APIC has zero info needed to implement reinjection correctly as was
shown to you several time in this thread and you simply keep ignoring
it.
  
   On the contrary, APIC is actually the only source of the IRQ ack
   information. 

[Qemu-devel] [Bug 587344] [NEW] gfxmenu from GRUB or GRUB4DOS hang qemu.

2010-05-29 Thread ABATAPA
Public bug reported:

When you run the graphical menu GRUB (gfxmenu, any theme) execution stops.
Test image (iso, as an attachment): 
http://www.multiupload.com/AAI7MGMOLE

Image make options: 
mkisofs -R -b grldr -no-emul-boot -boot-load-seg 0x1000 -o bootable.iso 
image_root/

With options and GFX menu:
qemu -cdrom bootable.iso
execution stops after rendering background.

With options and GFX menu::
qemu -cdrom bootable.iso --no-kvm
the menu is drawn, and the execution stops.

** Affects: qemu
 Importance: Undecided
 Status: New


** Tags: gfx gfxmenu grub grub4dos

-- 
gfxmenu from GRUB or GRUB4DOS hang qemu.
https://bugs.launchpad.net/bugs/587344
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.

Status in QEMU: New

Bug description:
When you run the graphical menu GRUB (gfxmenu, any theme) execution stops.
Test image (iso, as an attachment): 
http://www.multiupload.com/AAI7MGMOLE

Image make options: 
mkisofs -R -b grldr -no-emul-boot -boot-load-seg 0x1000 -o bootable.iso 
image_root/

With options and GFX menu:
qemu -cdrom bootable.iso
execution stops after rendering background.

With options and GFX menu::
qemu -cdrom bootable.iso --no-kvm
the menu is drawn, and the execution stops.





[Qemu-devel] [Bug 587344] Re: gfxmenu from GRUB or GRUB4DOS hang qemu.

2010-05-29 Thread ABATAPA

** Attachment added: Test image (iso, gzipped)
   http://launchpadlibrarian.net/49329042/bootable.iso.gz

-- 
gfxmenu from GRUB or GRUB4DOS hang qemu.
https://bugs.launchpad.net/bugs/587344
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.

Status in QEMU: New

Bug description:
When you run the graphical menu GRUB (gfxmenu, any theme) execution stops.
Test image (iso, as an attachment): 
http://www.multiupload.com/AAI7MGMOLE

Image make options: 
mkisofs -R -b grldr -no-emul-boot -boot-load-seg 0x1000 -o bootable.iso 
image_root/

With options and GFX menu:
qemu -cdrom bootable.iso
execution stops after rendering background.

With options and GFX menu::
qemu -cdrom bootable.iso --no-kvm
the menu is drawn, and the execution stops.





[Qemu-devel] Re: [PATCH] sparc32 SuperSPARC MMU Breakpoint Action register (SS-20 OBP fix)

2010-05-29 Thread Blue Swirl
Thanks, applied.

On Sat, May 29, 2010 at 8:48 PM, Artyom Tarasenko
atar4q...@googlemail.com wrote:
 SuperSPARC MMU Breakpoint Action register is used by OBP at boot

 The patch allows booting Solaris and some other OS with
 SPARCStation-20 OBP.

 Signed-off-by: Artyom Tarasenko atar4q...@gmail.com
 ---
  target-sparc/op_helper.c |    1 +
  1 files changed, 1 insertions(+), 0 deletions(-)

 diff --git a/target-sparc/op_helper.c b/target-sparc/op_helper.c
 index aaacfc4..ef3504f 100644
 --- a/target-sparc/op_helper.c
 +++ b/target-sparc/op_helper.c
 @@ -1745,6 +1745,7 @@ uint64_t helper_ld_asi(target_ulong addr, int asi, int 
 size, int sign)
     case 0x31: // Turbosparc RAM snoop
     case 0x32: // Turbosparc page table descriptor diagnostic
     case 0x39: /* data cache diagnostic register */
 +    case 0x4c: /* SuperSPARC MMU Breakpoint Action register */
         ret = 0;
         break;
     case 0x38: /* SuperSPARC MMU Breakpoint Control Registers */
 --
 1.6.2.5





[Qemu-devel] sparc mmu

2010-05-29 Thread Artyom Tarasenko
2010/5/29 Blue Swirl blauwir...@gmail.com:
 Robert Reif did some improvements to SuperSparc emulation, but the
 work was not finished. That should be a good starting point.

Do you mean the last patch he sent to us or are there some earlier
unapplied patches?

The last patch _seems_ to be mainly refactoring and disabling features
which are not
presented in certain models. I might have missed something of course:
the patch is large.

For me is also interesting what do we miss in the microSPARC implementation.
If I switch off POST (which crashes due to the known FPU problems)
LX/CX/X OBPs hang. Looks like it's expecting some interrupt (the SS-5
OBP escapes a similar endless loop on a timer irq), but not getting
it.
Do you know anything obvious qemu is missing in LX machine?

-- 
Regards,
Artyom Tarasenko

solaris/sparc under qemu blog: http://tyom.blogspot.com/



Re: [Qemu-devel] [PATCH] Compile dma only once

2010-05-29 Thread Paul Brook
 On Fri, May 28, 2010 at 7:34 PM, Paul Brook p...@codesourcery.com wrote:
  Use a qemu_irq to request CPU exit.
  
  Needing to request a CPU exit at all is just wrong. See previous
  discussions about how any use of qemu_bh_schedule_idle is fundamentally
  broken.
 
 I agree for the device case. Is the attached patch then OK?

No. You can't remove code without understanding why it was there in the first 
place.  I'm pretty sure this will break FDC emulation, or at least make it 
unfeasibly slow.

The underlying problem is that devices (in particular the FDC) don't 
communicate properly with the DMA engine.  Instead they rely on the DMA device 
polling state at poorly defined intervals.  Removing DMA_schedule without 
removing qemu_bh_schedule_idle is almost certainly wrong.

My main objection to you original patch is that it introduces a new API for 
something that is just plain wrong. At minimum it needs a comment documenting 
that this function should never be used for anything - It only exists because 
we're too lazy to fix legacy code.

 But what about other uses (with the patch applied):

I was just referring to device emulation.

qemu_notify_event is also pretty bogus.  It is a horrible hack to workaround 
deficiencies in the network API.

Paul



Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback

2010-05-29 Thread Gleb Natapov
On Sat, May 29, 2010 at 08:52:34PM +, Blue Swirl wrote:
 On Sat, May 29, 2010 at 4:32 PM, Gleb Natapov g...@redhat.com wrote:
  On Sat, May 29, 2010 at 04:03:22PM +, Blue Swirl wrote:
  2010/5/29 Gleb Natapov g...@redhat.com:
   On Sat, May 29, 2010 at 09:15:11AM +, Blue Swirl wrote:
There is no code, because we're still at architecture design stage.
   
Try to write test code to understand the problem better.
  
   I will.
  
   Please do ASAP. This discussion shows that you don't understand what is 
   the
   problem that we are dialing with.
 
  Which part of the problem you think I don't understand?
 
  It seams to me you don't understand how Windows uses RTC for time
  keeping and how the QEMU solves the problem today.
 
 RTC causes periodic interrupts and Windows interrupt handler
 increments jiffies, like Linux?
 
Linux does much more complicated things than that to keep time, so the
only way to fix time drift in Linux was to introduce pvclock. For Window
it is not so accurate too, since Windows can change clock frequency any
time it can't calculate time from jiffies, it needs to update clock at
each time tick.

  guests could also be assisted with special handling (like win2k
  install hack), for example guest instructions could be counted
  (approximately, for example using TB size or TSC) and only 
  inject
  after at least N instructions have passed.
  Guest instructions cannot be easily counted in KVM (it can be 
  done more
  or less reliably using perf counters, may be).

 Aren't there any debug registers or perf counters, which can 
 generate
 an interrupt after some number of instructions have been executed?
 Don't think debug registers have something like that and they are
 available for guest use anyway. Perf counters differs greatly from 
 CPU
 to CPU (even between two CPUs of the same manufacturer), and we 
 want to
 keep using them for profiling guests. And I don't see what problem 
 it
 will solve anyway that can be solved by simple delay between irq
 reinjection.
   
This would allow counting the executed instructions and limit it. 
Thus
we could emulate a 500MHz CPU on a 2GHz CPU more accurately.
   
Why would you want to limit number of instruction executed by guest if
CPU has nothing else to do anyway? The problem occurs not when we have
spare cycles so give to a guest, but in opposite case.
  
   I think one problem is that the guest has executed too much compared
   to what would happen with real HW with a lesser CPU. That explains the
   RTC frequency reprogramming case.
   You think wrong. The problem is exactly opposite: the guest haven't
   had enough execution time between two time interrupts. I don't know what
   RTC frequency reprogramming case you are talking about here.
 
  The case you told me where N pending tick IRQs exist but the guest
  wants to change the RTC frequency from 64Hz to 1024Hz.
 
  Let's make this more concrete. 1 GHz CPU, initially 100Hz RTC, so
  10Mcycles/tick or 10ms/tick. At T = 30Mcycles, guest wants to change
  the frequency to 1000Hz.
 
  The problem for emulation is that for the same 3 ticks, there has been
  so little execution power that the ticks have been coalesced. But
  isn't the guest cycle count then much lower than 30Mcyc?
 
  Isn't it so that the guest must be above 30Mcyc to be able to want the
  change? But if we reach that point,  the problem must have not been
  too little execution time, but too much.
 
  Sorry I tried hard to understand what have you said above but failed.
  What do you mean to be able to want the change? Guest sometimes wants
  to get 64 timer interrupts per second and sometimes it wants to get 1024
  timer interrupt per second. It wants it not as a result of time drift or
  anything. It's just how guest behaves. You seams to be to fixated on
  guest frequency change. It's just something you have to take into
  account when you reinject interrupts.
 
 I meant that in the scenario, the guest won't change the RTC before
 30Mcyc because of some built in determinism in the guest. At that
 point, because of some reason, the change would happen.
 
I still don't understand what are you trying to say here. Guest changes
frequency because of some even in the guest. It is totally independent
of what happens in QEMUs RTC emulation.

 
  
  
   

 
   And even if the rate did not matter, the APIC woult still 
   have to now
   about the fact that an IRQ is really periodic and does not 
   only appear
   as such for a certain interval. This really does not sound 
   like
   simplifying things or even make them cleaner.
 
  It would, the voodoo would be contained only in APIC, RTC 
  would be
  just like any other device. With the bidirectional irqs, this 
  voodoo
  would probably eventually spread to many other devices. The