[Qemu-devel] [PATCH 11/12] hw/omap1.c : separate uart module
Signed-off-by: cmchao cmc...@gmail.com --- Makefile.target |3 +- hw/omap1.c | 170 hw/omap_uart.c | 194 +++ 3 files changed, 196 insertions(+), 171 deletions(-) create mode 100644 hw/omap_uart.c diff --git a/Makefile.target b/Makefile.target index 20bcb8a..a01daa4 100644 --- a/Makefile.target +++ b/Makefile.target @@ -263,7 +263,8 @@ obj-arm-y += pxa2xx.o pxa2xx_pic.o pxa2xx_gpio.o pxa2xx_timer.o pxa2xx_dma.o obj-arm-y += pxa2xx_lcd.o pxa2xx_mmci.o pxa2xx_pcmcia.o pxa2xx_keypad.o obj-arm-y += gumstix.o obj-arm-y += zaurus.o ide/microdrive.o spitz.o tosa.o tc6393xb.o -obj-arm-y += omap1.o omap_lcdc.o omap_dma.o omap_clk.o omap_mmc.o omap_i2c.o omap_gpio.o omap_intc.o +obj-arm-y += omap1.o omap_lcdc.o omap_dma.o omap_clk.o omap_mmc.o omap_i2c.o \ + omap_gpio.o omap_intc.o omap_uart.o obj-arm-y += omap2.o omap_dss.o soc_dma.o omap_gptimer.o omap_synctimer.o \ omap_gpmc.o omap_sdrc.o omap_spi.o omap_tap.o omap_l4.o obj-arm-y += omap_sx1.o palm.o tsc210x.o diff --git a/hw/omap1.c b/hw/omap1.c index 21c53fe..301eec5 100644 --- a/hw/omap1.c +++ b/hw/omap1.c @@ -1378,176 +1378,6 @@ static void omap_dpll_init(struct dpll_ctl_s *s, target_phys_addr_t base, cpu_register_physical_memory(base, 0x100, iomemtype); } -/* UARTs */ -struct omap_uart_s { -target_phys_addr_t base; -SerialState *serial; /* TODO */ -struct omap_target_agent_s *ta; -omap_clk fclk; -qemu_irq irq; - -uint8_t eblr; -uint8_t syscontrol; -uint8_t wkup; -uint8_t cfps; -uint8_t mdr[2]; -uint8_t scr; -uint8_t clksel; -}; - -void omap_uart_reset(struct omap_uart_s *s) -{ -s-eblr = 0x00; -s-syscontrol = 0; -s-wkup = 0x3f; -s-cfps = 0x69; -s-clksel = 0; -} - -struct omap_uart_s *omap_uart_init(target_phys_addr_t base, -qemu_irq irq, omap_clk fclk, omap_clk iclk, -qemu_irq txdma, qemu_irq rxdma, CharDriverState *chr) -{ -struct omap_uart_s *s = (struct omap_uart_s *) -qemu_mallocz(sizeof(struct omap_uart_s)); - -s-base = base; -s-fclk = fclk; -s-irq = irq; -#ifdef TARGET_WORDS_BIGENDIAN -s-serial = serial_mm_init(base, 2, irq, omap_clk_getrate(fclk)/16, - chr ?: qemu_chr_open(null, null, NULL), 1, - 1); -#else -s-serial = serial_mm_init(base, 2, irq, omap_clk_getrate(fclk)/16, - chr ?: qemu_chr_open(null, null, NULL), 1, - 0); -#endif -return s; -} - -static uint32_t omap_uart_read(void *opaque, target_phys_addr_t addr) -{ -struct omap_uart_s *s = (struct omap_uart_s *) opaque; - -addr = 0xff; -switch (addr) { -case 0x20: /* MDR1 */ -return s-mdr[0]; -case 0x24: /* MDR2 */ -return s-mdr[1]; -case 0x40: /* SCR */ -return s-scr; -case 0x44: /* SSR */ -return 0x0; -case 0x48: /* EBLR (OMAP2) */ -return s-eblr; -case 0x4C: /* OSC_12M_SEL (OMAP1) */ -return s-clksel; -case 0x50: /* MVR */ -return 0x30; -case 0x54: /* SYSC (OMAP2) */ -return s-syscontrol; -case 0x58: /* SYSS (OMAP2) */ -return 1; -case 0x5c: /* WER (OMAP2) */ -return s-wkup; -case 0x60: /* CFPS (OMAP2) */ -return s-cfps; -} - -OMAP_BAD_REG(addr); -return 0; -} - -static void omap_uart_write(void *opaque, target_phys_addr_t addr, -uint32_t value) -{ -struct omap_uart_s *s = (struct omap_uart_s *) opaque; - -addr = 0xff; -switch (addr) { -case 0x20: /* MDR1 */ -s-mdr[0] = value 0x7f; -break; -case 0x24: /* MDR2 */ -s-mdr[1] = value 0xff; -break; -case 0x40: /* SCR */ -s-scr = value 0xff; -break; -case 0x48: /* EBLR (OMAP2) */ -s-eblr = value 0xff; -break; -case 0x4C: /* OSC_12M_SEL (OMAP1) */ -s-clksel = value 1; -break; -case 0x44: /* SSR */ -case 0x50: /* MVR */ -case 0x58: /* SYSS (OMAP2) */ -OMAP_RO_REG(addr); -break; -case 0x54: /* SYSC (OMAP2) */ -s-syscontrol = value 0x1d; -if (value 2) -omap_uart_reset(s); -break; -case 0x5c: /* WER (OMAP2) */ -s-wkup = value 0x7f; -break; -case 0x60: /* CFPS (OMAP2) */ -s-cfps = value 0xff; -break; -default: -OMAP_BAD_REG(addr); -} -} - -static CPUReadMemoryFunc * const omap_uart_readfn[] = { -omap_uart_read, -omap_uart_read, -omap_badwidth_read8, -}; - -static CPUWriteMemoryFunc * const omap_uart_writefn[] = { -omap_uart_write, -omap_uart_write, -omap_badwidth_write8, -}; - -struct omap_uart_s *omap2_uart_init(struct omap_target_agent_s *ta, -qemu_irq irq, omap_clk fclk, omap_clk iclk, -
[Qemu-devel] [PATCH 08/12] hw/omap2.c : separate spi module
Signed-off-by: cmchao cmc...@gmail.com --- Makefile.target |2 +- hw/omap.h |2 + hw/omap2.c | 323 --- hw/omap_spi.c | 346 +++ 4 files changed, 349 insertions(+), 324 deletions(-) create mode 100644 hw/omap_spi.c diff --git a/Makefile.target b/Makefile.target index 9a309e2..1edec6f 100644 --- a/Makefile.target +++ b/Makefile.target @@ -264,7 +264,7 @@ obj-arm-y += pxa2xx_lcd.o pxa2xx_mmci.o pxa2xx_pcmcia.o pxa2xx_keypad.o obj-arm-y += gumstix.o obj-arm-y += zaurus.o ide/microdrive.o spitz.o tosa.o tc6393xb.o obj-arm-y += omap1.o omap_lcdc.o omap_dma.o omap_clk.o omap_mmc.o omap_i2c.o omap_gpio.o omap_intc.o -obj-arm-y += omap2.o omap_dss.o soc_dma.o omap_gptimer.o omap_synctimer.o omap_gpmc.o omap_sdrc.o +obj-arm-y += omap2.o omap_dss.o soc_dma.o omap_gptimer.o omap_synctimer.o omap_gpmc.o omap_sdrc.o omap_spi.o obj-arm-y += omap_sx1.o palm.o tsc210x.o obj-arm-y += nseries.o blizzard.o onenand.o vga.o cbus.o tusb6010.o usb-musb.o obj-arm-y += mst_fpga.o mainstone.o diff --git a/hw/omap.h b/hw/omap.h index ea23ec9..fef495a 100644 --- a/hw/omap.h +++ b/hw/omap.h @@ -706,12 +706,14 @@ struct omap_uwire_s *omap_uwire_init(target_phys_addr_t base, void omap_uwire_attach(struct omap_uwire_s *s, uWireSlave *slave, int chipselect); +/* OMAP2 spi */ struct omap_mcspi_s; struct omap_mcspi_s *omap_mcspi_init(struct omap_target_agent_s *ta, int chnum, qemu_irq irq, qemu_irq *drq, omap_clk fclk, omap_clk iclk); void omap_mcspi_attach(struct omap_mcspi_s *s, uint32_t (*txrx)(void *opaque, uint32_t, int), void *opaque, int chipselect); +void omap_mcspi_reset(struct omap_mcspi_s *s); struct omap_rtc_s; struct omap_rtc_s *omap_rtc_init(target_phys_addr_t base, diff --git a/hw/omap2.c b/hw/omap2.c index e6d1b52..ae6394e 100644 --- a/hw/omap2.c +++ b/hw/omap2.c @@ -27,329 +27,6 @@ #include soc_dma.h #include audio/audio.h -/* Multichannel SPI */ -struct omap_mcspi_s { -qemu_irq irq; -int chnum; - -uint32_t sysconfig; -uint32_t systest; -uint32_t irqst; -uint32_t irqen; -uint32_t wken; -uint32_t control; - -struct omap_mcspi_ch_s { -qemu_irq txdrq; -qemu_irq rxdrq; -uint32_t (*txrx)(void *opaque, uint32_t, int); -void *opaque; - -uint32_t tx; -uint32_t rx; - -uint32_t config; -uint32_t status; -uint32_t control; -} ch[4]; -}; - -static inline void omap_mcspi_interrupt_update(struct omap_mcspi_s *s) -{ -qemu_set_irq(s-irq, s-irqst s-irqen); -} - -static inline void omap_mcspi_dmarequest_update(struct omap_mcspi_ch_s *ch) -{ -qemu_set_irq(ch-txdrq, -(ch-control 1)/* EN */ -(ch-config (1 14)) /* DMAW */ -(ch-status (1 1)) /* TXS */ -((ch-config 12) 3) != 1);/* TRM */ -qemu_set_irq(ch-rxdrq, -(ch-control 1)/* EN */ -(ch-config (1 15)) /* DMAW */ -(ch-status (1 0)) /* RXS */ -((ch-config 12) 3) != 2);/* TRM */ -} - -static void omap_mcspi_transfer_run(struct omap_mcspi_s *s, int chnum) -{ -struct omap_mcspi_ch_s *ch = s-ch + chnum; - -if (!(ch-control 1))/* EN */ -return; -if ((ch-status (1 0)) /* RXS */ -((ch-config 12) 3) != 2/* TRM */ -!(ch-config (1 19))) /* TURBO */ -goto intr_update; -if ((ch-status (1 1)) /* TXS */ -((ch-config 12) 3) != 1) /* TRM */ -goto intr_update; - -if (!(s-control 1) || /* SINGLE */ -(ch-config (1 20))) {/* FORCE */ -if (ch-txrx) -ch-rx = ch-txrx(ch-opaque, ch-tx, /* WL */ -1 + (0x1f (ch-config 7))); -} - -ch-tx = 0; -ch-status |= 1 2; /* EOT */ -ch-status |= 1 1; /* TXS */ -if (((ch-config 12) 3) != 2) /* TRM */ -ch-status |= 1 0; /* RXS */ - -intr_update: -if ((ch-status (1 0)) /* RXS */ -((ch-config 12) 3) != 2/* TRM */ -!(ch-config (1 19))) /* TURBO */ -s-irqst |= 1 (2 + 4 * chnum); /* RX_FULL */ -if ((ch-status (1 1)) /* TXS */ -((ch-config 12) 3) != 1) /* TRM */ -s-irqst |= 1 (0 + 4 * chnum); /* TX_EMPTY */ -omap_mcspi_interrupt_update(s); -
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
On Sat, May 29, 2010 at 09:15:11AM +, Blue Swirl wrote: There is no code, because we're still at architecture design stage. Try to write test code to understand the problem better. I will. Please do ASAP. This discussion shows that you don't understand what is the problem that we are dialing with. guests could also be assisted with special handling (like win2k install hack), for example guest instructions could be counted (approximately, for example using TB size or TSC) and only inject after at least N instructions have passed. Guest instructions cannot be easily counted in KVM (it can be done more or less reliably using perf counters, may be). Aren't there any debug registers or perf counters, which can generate an interrupt after some number of instructions have been executed? Don't think debug registers have something like that and they are available for guest use anyway. Perf counters differs greatly from CPU to CPU (even between two CPUs of the same manufacturer), and we want to keep using them for profiling guests. And I don't see what problem it will solve anyway that can be solved by simple delay between irq reinjection. This would allow counting the executed instructions and limit it. Thus we could emulate a 500MHz CPU on a 2GHz CPU more accurately. Why would you want to limit number of instruction executed by guest if CPU has nothing else to do anyway? The problem occurs not when we have spare cycles so give to a guest, but in opposite case. I think one problem is that the guest has executed too much compared to what would happen with real HW with a lesser CPU. That explains the RTC frequency reprogramming case. You think wrong. The problem is exactly opposite: the guest haven't had enough execution time between two time interrupts. I don't know what RTC frequency reprogramming case you are talking about here. And even if the rate did not matter, the APIC woult still have to now about the fact that an IRQ is really periodic and does not only appear as such for a certain interval. This really does not sound like simplifying things or even make them cleaner. It would, the voodoo would be contained only in APIC, RTC would be just like any other device. With the bidirectional irqs, this voodoo would probably eventually spread to many other devices. The logical conclusion of that would be a system where all devices would be careful not to disturb the guest at wrong moment because that would trigger a bug. This voodoo will be so complex and unreliable that it will make RTC hack pale in comparison (and I still don't see how you are going to make it actually work). Implement everything inside APIC: only coalescing and reinjection. APIC has zero info needed to implement reinjection correctly as was shown to you several time in this thread and you simply keep ignoring it. On the contrary, APIC is actually the only source of the IRQ ack information. RTC hack would not work without APIC (or the bidirectional IRQ) passing this info to RTC. What APIC doesn't have now is the timer frequency or period info. This is known by RTC and also higher levels managing the clocks. So APIC has one bit of information and RTC everything else. The information known by RTC (timer period) is also known by higher levels. What do you mean by higher level here? vl.c or APIC. The current approach (and proposed patch) brings this one bit of information to RTC, you are arguing that RTC should be able to communicate all its info to APIC. Sorry I don't see that your way has any advantage. Just more complex interface and it is much easier to get it wrong for other time sources. I don't think anymore that APIC should be handling this but the generic stuff, like vl.c or exec.c. Then there would be only information passing from APIC to higher levels. Handling reinjection by general timer code makes infinitely more sense then handling it in APIC. One thing (from the top of my head) that can't be implemented at that level is injection of interrupt back to back (i.e injecting next interrupt immediately after guest acknowledge previous one to RTC). I keep ignoring the idea that the current model, where both RTC and APIC must somehow work together to make coalescing work, is the only possible just because it is committed and it happens to work in some cases. It would be much better to concentrate this to one place, APIC or preferably higher level where it may benefit other timers too. Provided of course that the other models can be made to work. So write the code and show us. You haven't show any evidence that RTC is the wrong place. RTC knows when interrupt was acknowledge to RTC, it know when clock frequency changes, it know when device reset happened. APIC knows only that
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
On Sat, May 29, 2010 at 09:35:30AM +, Blue Swirl wrote: I still don't see how the alternative is supposed to simplify our life or improve the efficiency of the de-coalescing workaround. It's rather problematic like Gleb pointed out: The de-coalescing logic needs to be informed about periodicity changes that can only be delivered along IRQs. So what to do with the backlog when the timer is stopped? What happens with the current design? Gleb only mentioned the frequency change, I thought that was not so big problem. But I don't think this case should be allowed happen at all, it can't exist on real HW. Hm, why it can't exist on real HW? Do simple exercise. Run WindowsXP inside QEMU, connect with gdb to QEMU process and check what frequency RTC configured with (hint: it will be 64Hz), now run video inside the guest and check frequency again (hint: it will be 1Khz). -- Gleb.
Re: [Qemu-devel] [PATCH] virtio-blk: Avoid zeroing every request structure
Alexander Graf wrote: Anthony Liguori wrote: I'd prefer to stick to bug fixes for stable releases. Performance improvements are a good motivation for people to upgrade to 0.13 :-) In general I agree, but this one looks like a really simple one. Besides, there are too many reported guest regressions at the moment to upgrade if using any of them. -- Jamie
[Qemu-devel] Re: [SeaBIOS] SMBIOS strings
Jes Sorensen wrote: Hi, We were looking at the dmidecode output from qemu-kvm pre-seabios and current qemu-kvm and noticed some of the strings have changed. The main problem with this is that certain OSes are quite sensitive to system changes and avoiding to change things unnecessarily would probably be a good thing. Which OSes do care? Windows only? I wanted to check with the lists if there are any strong feelings about this, and whether some of these changes were made for specific reasons? For example: Handle 0x, DMI type 0, 24 bytes BIOS Information - Vendor: QEMU - Version: QEMU + Vendor: Bochs + Version: Bochs Release Date: 01/01/2007 Address: 0xE8000 Runtime Size: 96 kB You can configure this with CONFIG_APPNAME. and this: Handle 0x0401, DMI type 4, 32 bytes Processor Information - Socket Designation: CPU 1 + Socket Designation: CPU01 Type: Central Processor Family: Other - Manufacturer: QEMU - ID: 63 06 00 00 FD FB 8B 07 + Manufacturer: Bochs + ID: 23 06 00 00 FD FB 8B 07 Version: Not Specified Voltage: Unknown External Clock: Unknown I guess the Socket Designation in particular might have been done for a reason? Otherwise, if there are no objections, I'll look at adding some patches to make it more backwards compatible. Cheers, Jes Is the different ID displayed on the same VM configuration (esp. -cpu option) ? The value is gained by calling CPUID so it should not be different. Which pre-seabios qemu-kvm bios are you comparing to? Sebastian
Re: [Qemu-devel] [PATCH] target-ppc: remove useless line
Am 28.05.2010 um 21:00 schrieb Thomas Monjalon: From: Thomas Monjalon tho...@monjalon.net This line was a bit clear. The next lines set or reset this bit (LE) depending of another bit (ILE). So the first line is useless. Signed-off-by: Thomas Monjalon tho...@monjalon.net --- target-ppc/helper.c |1 - 1 files changed, 0 insertions(+), 1 deletions(-) diff --git a/target-ppc/helper.c b/target-ppc/helper.c index 3d843b5..dabf1fd 100644 --- a/target-ppc/helper.c +++ b/target-ppc/helper.c @@ -2591,7 +2591,6 @@ static inline void powerpc_excp(CPUState *env, int excp_model, int excp) #if 0 /* Fix this: not on all targets */ new_msr = ~((target_ulong)1 MSR_PMM); #endif -new_msr = ~((target_ulong)1 MSR_LE); if (msr_ile) new_msr |= (target_ulong)1 MSR_LE; else Following line is new_msr = ~((target_ulong)1 MSR_LE); so this should be fine. Reviewed-by: Andreas Faerber andreas.faer...@web.de Andreas
Re: [Qemu-devel] Inquiry about qemu for Motorola 68360
qemu-system-m68k -cpu ? Sorry . Where to find executable 'qemu-system-m68k.exe ? Thank you
[Qemu-devel] Re: [PATCH 1/2] Pad iommu with an empty slot (necessary for SunOS 4.1.4)
On Fri, May 28, 2010 at 9:53 PM, Artyom Tarasenko atar4q...@googlemail.com wrote: 32m: 0x12fff394 64m: 0x14fff394 192m:0x1cfff394 256m:0x20fff394 Memory probing? It would be strange that OS would do it itself. The OS could just ask OBP how much does it have. Here is the listing where it happens: _swift_vac_rgnflush: rd %psr, %g2 _swift_vac_rgnflush+4: andn %g2, 0x20, %g5 _swift_vac_rgnflush+8: mov %g5, %psr _swift_vac_rgnflush+0xc: nop _swift_vac_rgnflush+0x10: nop _swift_vac_rgnflush+0x14: mov 0x100, %g5 _swift_vac_rgnflush+0x18: lda [%g5] 0x4, %g5 _swift_vac_rgnflush+0x1c: sll %o2, 0x2, %g1 _swift_vac_rgnflush+0x20: sll %g5, 0x4, %g5 _swift_vac_rgnflush+0x24: add %g5, %g1, %g5 _swift_vac_rgnflush+0x28: lda [%g5] 0x20, %g5 _swift_vac_rgnflush+0x28: is the fatal one. kadb $c _swift_vac_rgnflush(?) _vac_rgnflush() + 4 _hat_setup_kas(0xc00,0xf0447000,0x43a000,0x400,0xf043a000,0x3c0) + 70 _startup(0xfe00,0x1000,0xfa00,0xf00e2bfc,0x10,0xdbc00) + 1414 _main(0xf00e0fb4,0xf0007810,0x293ff49f,0xa805209c,0x200,0xf00d1d18) + 14 Unfortunately (but not surprisingly) kadb doesn't allow debugging cache-flush code, so I can't check what is in [%g5] (aka sfar) on the real machine when this happens. I was telling fairy tales here and no one stopped me. [%g5] is not sfar, it's the context pointer, so the code makes much more sense! And I guess, SunOS 4.1.4 is buggy. I've managed to reproduce the complete case on the real machine. The trick is to set the breakpoint before the interrupts are switched off: kadb _swift_vac_rgnflush:b kadb :c breakpoint _swift_vac_rgnflush: rd %psr, %g2 kadb o2=X 44000e5 kadb $q Type 'go' to resume Type help for more information ok 100 4 spacel@ . 3fff00 So at _swift_vac_rgnflush+0x28 it would access (44000e52) + (3fff00 4) = 14fff394. Which is outside of IOMMU. ok 14fff394 20 spacel@ . 3fe000 This seems to be an alias to ok 1404 20 spacel@ . 3fe000 So, it seems to be safe to pad iommu with an empty slot. I guess we are not missing anything more serious. Alternatively we can use your aliasing patch. What do you say? Thanks, applied. P.S. What is also interesting about SunOS 4.1.4 is that only the single-cpu kernel (which is used during the installation) calls _swift_vac_rgnflush on initialization. The smp kernel just doesn't have this call in _hat_setup_kas. Maybe they have noticed the bug and corrected it? -- Regards, Artyom Tarasenko solaris/sparc under qemu blog: http://tyom.blogspot.com/
Re: [Qemu-devel] Re: [PATCH v2 12/15] monitor: Add basic device state visualization
Avi Kivity a...@redhat.com writes: On 05/23/2010 10:57 AM, Jan Kiszka wrote: Avi Kivity wrote: On 05/22/2010 11:18 AM, Jan Kiszka wrote: From: Jan Kiszkajan.kis...@siemens.com This introduces device_show, a monitor command that saves the vmstate of a qdev device and visualizes it. QMP is also supported. Buffers are cut after 16 byte by default, but the full content can be requested via '-f'. To pretty-print sub-arrays, vmstate is extended to store the start index name. A new qerror is introduced to signal a missing vmstate. And it comes with documentation. + +Dump a snapshot of the device state. Buffers are cut after 16 bytes unless +a full dump is requested. + +Arguments: + +- path: the device's qtree path or unique ID (json-string) This may be ambiguous. Can your elaborate what precisely is ambiguous? Can't the user choose the unique ID so that it aliases an unrelated qtree path? I prefer having mutually exclusive 'path' and 'ref' arguments. Don't think that's necessary. If the string starts with '/', it's an absolute path. Else, it's a relative path rooted at the node with the ID equal to the first component. Currently breaks down when IDs contain '/', but permitting that is a bug. There may be more problems; the path lookup code is way too clever.
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
On Sat, May 29, 2010 at 10:16 AM, Jan Kiszka jan.kis...@web.de wrote: Blue Swirl wrote: This is - according to my current understanding - the proposed alternative architecture: .---. | de-coalescing | | logic | '---' ^ | period,| |IRQ coalesced| |(or tuned VM clock) (yes/no)| v .---. .. .---. | RTC |-IRQ-| router |-IRQ| APIC | '---' '' '---' | ^ | ^ | | | | '---period---' '--period---' The period information is already known by the higher level clock management, The timer device models program the next event of some qemu-timer. There is no tag attached explaining the timer subsystem or anyone else the reason behind this programming. Yes, but why would we care? All timers in the system besides RTC should be affected likewise, this would in fact be the benefit from handling the problem at higher level. And how does your equation for calculating the clock slow-down look like? How about icount_adjust()?
Re: [Qemu-devel] cg14
2010/5/29 Bob Breuer breu...@mc.net: Artyom Tarasenko wrote: 2010/5/28 Blue Swirl blauwir...@gmail.com: On Fri, May 28, 2010 at 7:54 AM, Bob Breuer breu...@mc.net wrote: Artyom Tarasenko wrote: 2010/5/27 Bob Breuer breu...@mc.net: Artyom Tarasenko wrote: Was going to put some more empty slots into SS-10/20 (VSIMMs, SX) after we are done with SS-5 (due to technical limitations I can switch access from one real SS model to another one once a few days only). I have a partial implementation of the SS-20 VSIMM (cg14) that I've been working on. With the Sun firmware, I have working text console, color boot logo, and programmable video resolutions up to 1600x1280. Great news! This would allow qemu booting NeXTStep! Are you planning to submit the patches any time soon? It's not in a state to be submitted yet, but I've attached a working patch if you want to give it a try. I need to hook it up to qdev and fill in some more of the obviously incomplete switch cases before I'd sign off on it. Nice work. I have a few comments below. This probably needs support from OpenBIOS to be usable without OBP. Maybe it can be used as a second adapter without OpenBIOS support? At least under some OSes? Probably won't be used without at least being in the firmware device tree. One area that OpenBIOS could enhance would be a larger memory size option. The real hardware was only available in 4M and 8M options, but the memory map allows for 16M. OBP will identify a 16M VSIMM but won't do anything else with it, and with 16M of vram it would allow for a potential 2560x1600 32bit resolution. Bob diff --git a/Makefile.target b/Makefile.target index fda5bf3..b17b3af 100644 --- a/Makefile.target +++ b/Makefile.target @@ -250,6 +250,7 @@ else obj-sparc-y = sun4m.o lance.o tcx.o sun4m_iommu.o slavio_intctl.o obj-sparc-y += slavio_timer.o slavio_misc.o sparc32_dma.o obj-sparc-y += cs4231.o eccmemctl.o sbi.o sun4c_intctl.o +obj-sparc-y += cg14.o endif obj-arm-y = integratorcp.o versatilepb.o arm_pic.o arm_timer.o diff --git a/hw/sun4m.c b/hw/sun4m.c index 7ba0f76..8b23c9b 100644 --- a/hw/sun4m.c +++ b/hw/sun4m.c @@ -864,6 +864,13 @@ static void sun4m_hw_init(const struct sun4m_hwdef *hwdef, ram_addr_t RAM_size, fprintf(stderr, qemu: Unsupported depth: %d\n, graphic_depth); exit (1); } + if (hwdef-machine_id == 65) { /* SS-20 */ hwdef structure should contain a field for cg14. If non-zero, install cg14. Was cg14 only available for SS-20? Was it always included? This is also interesting for OpenBIOS, we need to detect cg14 vs. TCX. The cg14 was only an option for SS-20 and the rare SS-10SX, but not the regular SS-10, though the SS-10 chipset may have been capable of supporting it. Each cg14 vsimm takes the place of a stick of memory with 2 slots physically capable of holding a vsimm. The few SS-10 OBP versions I have probe for VSIMMs. So we need to put either empty_slot (when the -nographics option is used) or VSIMMs there. Is there a way to pass the framebuffer type and/or address to OpenBIOS? I would be inclined to have the SS-20 machine default to cg14 instead of TCX, but it will blow a hole in the support of more than 2G of emulated system ram. + /* cg14.c */ + void cg14_init(target_phys_addr_t ctrl_base, target_phys_addr_t vram_base, + uint32_t vram_size); This should go to sun4m.h or cg14.h. + + cg14_init(0x09c00ULL, 0x0fc00ULL, 820); + } else Please add braces and reindent. tcx_init(hwdef-tcx_base, 0x0010, graphic_width, graphic_height, graphic_depth); --- /dev/null Fri May 28 02:08:36 2010 +++ hw/cg14.c Fri May 28 01:58:49 2010 @@ -0,0 +1,785 @@ +/* + * QEMU CG14 Frame buffer + * + * Copyright (c) 2010 Bob Breuer + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the Software), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +
Re: [Qemu-devel] [PATCH] Compile dma only once
On Fri, May 28, 2010 at 7:34 PM, Paul Brook p...@codesourcery.com wrote: Use a qemu_irq to request CPU exit. Needing to request a CPU exit at all is just wrong. See previous discussions about how any use of qemu_bh_schedule_idle is fundamentally broken. I agree for the device case. Is the attached patch then OK? But what about other uses (with the patch applied): User emulator signal delivery: /src/qemu/darwin-user/signal.c:216:cpu_exit(global_env); /src/qemu/linux-user/signal.c:507:cpu_exit(thread_env); qemu_notify_event(): /src/qemu/cpus.c:286:cpu_exit(env); /src/qemu/cpus.c:289:cpu_exit(next_cpu); Is that broken too and should be removed? cpu_signal(): /src/qemu/cpus.c:531:cpu_exit(cpu_single_env); vm_stop(): /src/qemu/cpus.c:733:cpu_exit(cpu_single_env); KVM IO window exit: /src/qemu/kvm-all.c:859:cpu_exit(env); Some exclusive ARM operation: /src/qemu/linux-user/main.c:152:cpu_exit(other); ARM/m68k semihosting: /src/qemu/gdbstub.c:2296:cpu_exit(s-c_cpu); From 12940e4bf57af4801ffc209095b6adcc0693320f Mon Sep 17 00:00:00 2001 From: Blue Swirl blauwirbel@gmail.com Date: Sat, 29 May 2010 07:59:40 + Subject: [PATCH] dma: remove DMA_schedule and related cpu_request_exit irq It's wrong for devices to request cpu_exit. Remove DMA_schedule and cpu_request_exit irq, thus partially reverting 4556bd8b2514a55d48c15b1adb17537f49657744. Signed-off-by: Blue Swirl blauwirbel@gmail.com --- hw/dma.c| 19 --- hw/fdc.c|1 - hw/isa.h|3 +-- hw/mips_jazz.c | 13 + hw/mips_malta.c | 13 + hw/pc.c | 13 + hw/ppc_prep.c | 13 + hw/sun4m.c |1 - hw/sun4u.c |1 - 9 files changed, 9 insertions(+), 68 deletions(-) diff --git a/hw/dma.c b/hw/dma.c index 5b21521..1015b41 100644 --- a/hw/dma.c +++ b/hw/dma.c @@ -57,7 +57,6 @@ static struct dma_cont { uint8_t flip_flop; int dshift; struct dma_regs regs[4]; -qemu_irq *cpu_request_exit; } dma_controllers[2]; enum { @@ -442,14 +441,6 @@ int DMA_write_memory (int nchan, void *buf, int pos, int len) return len; } -/* request the emulator to transfer a new DMA memory block ASAP */ -void DMA_schedule(int nchan) -{ -struct dma_cont *d = dma_controllers[nchan 3]; - -qemu_irq_pulse(*d-cpu_request_exit); -} - static void dma_reset(void *opaque) { struct dma_cont *d = opaque; @@ -465,14 +456,12 @@ static int dma_phony_handler (void *opaque, int nchan, int dma_pos, int dma_len) /* dshift = 0: 8 bit DMA, 1 = 16 bit DMA */ static void dma_init2(struct dma_cont *d, int base, int dshift, - int page_base, int pageh_base, - qemu_irq *cpu_request_exit) + int page_base, int pageh_base) { static const int page_port_list[] = { 0x1, 0x2, 0x3, 0x7 }; int i; d-dshift = dshift; -d-cpu_request_exit = cpu_request_exit; for (i = 0; i 8; i++) { register_ioport_write (base + (i dshift), 1, 1, write_chan, d); register_ioport_read (base + (i dshift), 1, 1, read_chan, d); @@ -542,12 +531,12 @@ static const VMStateDescription vmstate_dma = { } }; -void DMA_init(int high_page_enable, qemu_irq *cpu_request_exit) +void DMA_init(int high_page_enable) { dma_init2(dma_controllers[0], 0x00, 0, 0x80, - high_page_enable ? 0x480 : -1, cpu_request_exit); + high_page_enable ? 0x480 : -1); dma_init2(dma_controllers[1], 0xc0, 1, 0x88, - high_page_enable ? 0x488 : -1, cpu_request_exit); + high_page_enable ? 0x488 : -1); vmstate_register (0, vmstate_dma, dma_controllers[0]); vmstate_register (1, vmstate_dma, dma_controllers[1]); diff --git a/hw/fdc.c b/hw/fdc.c index 6306496..d4505b4 100644 --- a/hw/fdc.c +++ b/hw/fdc.c @@ -1174,7 +1174,6 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction) * recall us... */ DMA_hold_DREQ(fdctrl-dma_chann); -DMA_schedule(fdctrl-dma_chann); return; } else { FLOPPY_ERROR(dma_mode=%d direction=%d\n, dma_mode, direction); diff --git a/hw/isa.h b/hw/isa.h index aaf0272..9681de1 100644 --- a/hw/isa.h +++ b/hw/isa.h @@ -40,8 +40,7 @@ int DMA_read_memory (int nchan, void *buf, int pos, int size); int DMA_write_memory (int nchan, void *buf, int pos, int size); void DMA_hold_DREQ (int nchan); void DMA_release_DREQ (int nchan); -void DMA_schedule(int nchan); -void DMA_init(int high_page_enable, qemu_irq *cpu_request_exit); +void DMA_init(int high_page_enable); void DMA_register_channel (int nchan, DMA_transfer_handler transfer_handler, void *opaque); diff --git a/hw/mips_jazz.c b/hw/mips_jazz.c index ead3a00..6e0ec8f 100644 --- a/hw/mips_jazz.c +++ b/hw/mips_jazz.c @@ -114,15
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
Blue Swirl wrote: This is - according to my current understanding - the proposed alternative architecture: .---. | de-coalescing | | logic | '---' ^ | period,| |IRQ coalesced| |(or tuned VM clock) (yes/no)| v .---. .. .---. | RTC |-IRQ-| router |-IRQ| APIC | '---' '' '---' |^| ^ ||| | '---period---''--period---' The period information is already known by the higher level clock management, The timer device models program the next event of some qemu-timer. There is no tag attached explaining the timer subsystem or anyone else the reason behind this programming. Yes, but why would we care? All timers in the system besides RTC should be affected likewise, this would in fact be the benefit from handling the problem at higher level. And how does your equation for calculating the clock slow-down look like? Jan signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
Gleb Natapov wrote: On Fri, May 28, 2010 at 08:06:45PM +, Blue Swirl wrote: 2010/5/28 Gleb Natapov g...@redhat.com: On Thu, May 27, 2010 at 06:37:10PM +, Blue Swirl wrote: 2010/5/27 Gleb Natapov g...@redhat.com: On Wed, May 26, 2010 at 08:35:00PM +, Blue Swirl wrote: On Wed, May 26, 2010 at 8:09 PM, Jan Kiszka jan.kis...@web.de wrote: Blue Swirl wrote: On Tue, May 25, 2010 at 9:44 PM, Jan Kiszka jan.kis...@web.de wrote: Anthony Liguori wrote: On 05/25/2010 02:09 PM, Blue Swirl wrote: On Mon, May 24, 2010 at 8:13 PM, Jan Kiszkajan.kis...@web.de wrote: From: Jan Kiszkajan.kis...@siemens.com This allows to communicate potential IRQ coalescing during delivery from the sink back to the source. Targets that support IRQ coalescing workarounds need to register handlers that return the appropriate QEMU_IRQ_* code, and they have to propergate the code across all IRQ redirections. If the IRQ source receives a QEMU_IRQ_COALESCED, it can apply its workaround. If multiple sinks exist, the source may only consider an IRQ coalesced if all other sinks either report QEMU_IRQ_COALESCED as well or QEMU_IRQ_MASKED. No real devices are interested whether any of their output lines are even connected. This would introduce a new signal type, bidirectional multi-level, which is not correct. I don't think it's really an issue of correct, but I wouldn't disagree to a suggestion that we ought to introduce a new signal type for this type of bidirectional feedback. Maybe it's qemu_coalesced_irq and has a similar interface as qemu_irq. A separate type would complicate the delivery of the feedback value across GPIO pins (as Paul requested for the RTC-HPET routing). I think the real solution to coalescing is put the logic inside one device, in this case APIC because it has the information about irq delivery. APIC could monitor incoming RTC irqs for frequency information and whether they get delivered or not. If not, an internal timer is installed which injects the lost irqs. That won't fly as the IRQs will already arrive at the APIC with a sufficiently high jitter. At the bare minimum, you need to tell the interrupt controller about the fact that a particular IRQ should be delivered at a specific regular rate. For this, you also need a generic interface - nothing really won. OK, let's simplify: just reinject at next possible chance. No need to monitor or tell anything. There are guests that won't like this (I know of one in-house, but others may even have more examples), specifically if you end up firing multiple IRQs in a row due to a longer backlog. For that reason, the RTC spreads the reinjection according to the current rate. Then reinject with a constant delay, or next CPU exit. Such buggy If guest's time frequency is the same as host time frequency you can't reinject with constant delay. That is why current code mixes two approaches: reinject M interrupts in a raw then delay. This approach can be also used by APIC-only version. I don't know what APIC-only version you are talking about. I haven't seen the code and I don't understand hand waving, sorry. There is no code, because we're still at architecture design stage. Try to write test code to understand the problem better. guests could also be assisted with special handling (like win2k install hack), for example guest instructions could be counted (approximately, for example using TB size or TSC) and only inject after at least N instructions have passed. Guest instructions cannot be easily counted in KVM (it can be done more or less reliably using perf counters, may be). Aren't there any debug registers or perf counters, which can generate an interrupt after some number of instructions have been executed? Don't think debug registers have something like that and they are available for guest use anyway. Perf counters differs greatly from CPU to CPU (even between two CPUs of the same manufacturer), and we want to keep using them for profiling guests. And I don't see what problem it will solve anyway that can be solved by simple delay between irq reinjection. This would allow counting the executed instructions and limit it. Thus we could emulate a 500MHz CPU on a 2GHz CPU more accurately. Why would you want to limit number of instruction executed by guest if CPU has nothing else to do anyway? The problem occurs not when we have spare cycles so give to a guest, but in opposite case. And even if the rate did not matter, the APIC woult still have to now about the fact that an IRQ is really periodic and does not only appear as such for a certain interval. This really does not sound like simplifying things or even make them cleaner. It would, the voodoo would be contained only in APIC, RTC would be just like any other device. With the bidirectional irqs, this voodoo would probably eventually spread to many other devices. The logical conclusion of that would be a
[Qemu-devel] [PATCH 0/3] [RFC] Threaded vnc server
Hi, This series add a threaded VNC server and should be applied on top on my previous patch set (adding tight encoding). The first two patchs add some functions to qemu-thread. The last is the threaded VNC server and the changelog explains how it works. refs: http://xf.iksaif.net/blog/index.php?post/2010/05/28/QEMU%3A-Threaded-VNC-Server-results Thanks Corentin Chary (3): qemu-thread: add qemu_mutex/cond_destroy and qemu_mutex_exit qemu-thread: add cleanup_push() and cleanup_pop() vnc: threaded VNC server Makefile|4 + Makefile.objs |7 +- configure | 13 ++ qemu-thread.c | 22 qemu-thread.h |8 ++ vnc-jobs-sync.c | 70 vnc-jobs.c | 328 +++ vnc.c | 161 +++ vnc.h | 73 9 files changed, 663 insertions(+), 23 deletions(-) create mode 100644 vnc-jobs-sync.c create mode 100644 vnc-jobs.c
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
Blue Swirl wrote: 2010/5/29 Jan Kiszka jan.kis...@web.de: Gleb Natapov wrote: On Fri, May 28, 2010 at 08:06:45PM +, Blue Swirl wrote: 2010/5/28 Gleb Natapov g...@redhat.com: On Thu, May 27, 2010 at 06:37:10PM +, Blue Swirl wrote: 2010/5/27 Gleb Natapov g...@redhat.com: On Wed, May 26, 2010 at 08:35:00PM +, Blue Swirl wrote: On Wed, May 26, 2010 at 8:09 PM, Jan Kiszka jan.kis...@web.de wrote: Blue Swirl wrote: On Tue, May 25, 2010 at 9:44 PM, Jan Kiszka jan.kis...@web.de wrote: Anthony Liguori wrote: On 05/25/2010 02:09 PM, Blue Swirl wrote: On Mon, May 24, 2010 at 8:13 PM, Jan Kiszkajan.kis...@web.de wrote: From: Jan Kiszkajan.kis...@siemens.com This allows to communicate potential IRQ coalescing during delivery from the sink back to the source. Targets that support IRQ coalescing workarounds need to register handlers that return the appropriate QEMU_IRQ_* code, and they have to propergate the code across all IRQ redirections. If the IRQ source receives a QEMU_IRQ_COALESCED, it can apply its workaround. If multiple sinks exist, the source may only consider an IRQ coalesced if all other sinks either report QEMU_IRQ_COALESCED as well or QEMU_IRQ_MASKED. No real devices are interested whether any of their output lines are even connected. This would introduce a new signal type, bidirectional multi-level, which is not correct. I don't think it's really an issue of correct, but I wouldn't disagree to a suggestion that we ought to introduce a new signal type for this type of bidirectional feedback. Maybe it's qemu_coalesced_irq and has a similar interface as qemu_irq. A separate type would complicate the delivery of the feedback value across GPIO pins (as Paul requested for the RTC-HPET routing). I think the real solution to coalescing is put the logic inside one device, in this case APIC because it has the information about irq delivery. APIC could monitor incoming RTC irqs for frequency information and whether they get delivered or not. If not, an internal timer is installed which injects the lost irqs. That won't fly as the IRQs will already arrive at the APIC with a sufficiently high jitter. At the bare minimum, you need to tell the interrupt controller about the fact that a particular IRQ should be delivered at a specific regular rate. For this, you also need a generic interface - nothing really won. OK, let's simplify: just reinject at next possible chance. No need to monitor or tell anything. There are guests that won't like this (I know of one in-house, but others may even have more examples), specifically if you end up firing multiple IRQs in a row due to a longer backlog. For that reason, the RTC spreads the reinjection according to the current rate. Then reinject with a constant delay, or next CPU exit. Such buggy If guest's time frequency is the same as host time frequency you can't reinject with constant delay. That is why current code mixes two approaches: reinject M interrupts in a raw then delay. This approach can be also used by APIC-only version. I don't know what APIC-only version you are talking about. I haven't seen the code and I don't understand hand waving, sorry. There is no code, because we're still at architecture design stage. Try to write test code to understand the problem better. guests could also be assisted with special handling (like win2k install hack), for example guest instructions could be counted (approximately, for example using TB size or TSC) and only inject after at least N instructions have passed. Guest instructions cannot be easily counted in KVM (it can be done more or less reliably using perf counters, may be). Aren't there any debug registers or perf counters, which can generate an interrupt after some number of instructions have been executed? Don't think debug registers have something like that and they are available for guest use anyway. Perf counters differs greatly from CPU to CPU (even between two CPUs of the same manufacturer), and we want to keep using them for profiling guests. And I don't see what problem it will solve anyway that can be solved by simple delay between irq reinjection. This would allow counting the executed instructions and limit it. Thus we could emulate a 500MHz CPU on a 2GHz CPU more accurately. Why would you want to limit number of instruction executed by guest if CPU has nothing else to do anyway? The problem occurs not when we have spare cycles so give to a guest, but in opposite case. And even if the rate did not matter, the APIC woult still have to now about the fact that an IRQ is really periodic and does not only appear as such for a certain interval. This really does not sound like simplifying things or even make them cleaner. It would, the voodoo would be contained only in APIC, RTC would be just like any other device. With the bidirectional irqs, this voodoo would probably eventually spread
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
On Sat, May 29, 2010 at 9:45 AM, Jan Kiszka jan.kis...@web.de wrote: Blue Swirl wrote: 2010/5/29 Jan Kiszka jan.kis...@web.de: Gleb Natapov wrote: On Fri, May 28, 2010 at 08:06:45PM +, Blue Swirl wrote: 2010/5/28 Gleb Natapov g...@redhat.com: On Thu, May 27, 2010 at 06:37:10PM +, Blue Swirl wrote: 2010/5/27 Gleb Natapov g...@redhat.com: On Wed, May 26, 2010 at 08:35:00PM +, Blue Swirl wrote: On Wed, May 26, 2010 at 8:09 PM, Jan Kiszka jan.kis...@web.de wrote: Blue Swirl wrote: On Tue, May 25, 2010 at 9:44 PM, Jan Kiszka jan.kis...@web.de wrote: Anthony Liguori wrote: On 05/25/2010 02:09 PM, Blue Swirl wrote: On Mon, May 24, 2010 at 8:13 PM, Jan Kiszkajan.kis...@web.de wrote: From: Jan Kiszkajan.kis...@siemens.com This allows to communicate potential IRQ coalescing during delivery from the sink back to the source. Targets that support IRQ coalescing workarounds need to register handlers that return the appropriate QEMU_IRQ_* code, and they have to propergate the code across all IRQ redirections. If the IRQ source receives a QEMU_IRQ_COALESCED, it can apply its workaround. If multiple sinks exist, the source may only consider an IRQ coalesced if all other sinks either report QEMU_IRQ_COALESCED as well or QEMU_IRQ_MASKED. No real devices are interested whether any of their output lines are even connected. This would introduce a new signal type, bidirectional multi-level, which is not correct. I don't think it's really an issue of correct, but I wouldn't disagree to a suggestion that we ought to introduce a new signal type for this type of bidirectional feedback. Maybe it's qemu_coalesced_irq and has a similar interface as qemu_irq. A separate type would complicate the delivery of the feedback value across GPIO pins (as Paul requested for the RTC-HPET routing). I think the real solution to coalescing is put the logic inside one device, in this case APIC because it has the information about irq delivery. APIC could monitor incoming RTC irqs for frequency information and whether they get delivered or not. If not, an internal timer is installed which injects the lost irqs. That won't fly as the IRQs will already arrive at the APIC with a sufficiently high jitter. At the bare minimum, you need to tell the interrupt controller about the fact that a particular IRQ should be delivered at a specific regular rate. For this, you also need a generic interface - nothing really won. OK, let's simplify: just reinject at next possible chance. No need to monitor or tell anything. There are guests that won't like this (I know of one in-house, but others may even have more examples), specifically if you end up firing multiple IRQs in a row due to a longer backlog. For that reason, the RTC spreads the reinjection according to the current rate. Then reinject with a constant delay, or next CPU exit. Such buggy If guest's time frequency is the same as host time frequency you can't reinject with constant delay. That is why current code mixes two approaches: reinject M interrupts in a raw then delay. This approach can be also used by APIC-only version. I don't know what APIC-only version you are talking about. I haven't seen the code and I don't understand hand waving, sorry. There is no code, because we're still at architecture design stage. Try to write test code to understand the problem better. guests could also be assisted with special handling (like win2k install hack), for example guest instructions could be counted (approximately, for example using TB size or TSC) and only inject after at least N instructions have passed. Guest instructions cannot be easily counted in KVM (it can be done more or less reliably using perf counters, may be). Aren't there any debug registers or perf counters, which can generate an interrupt after some number of instructions have been executed? Don't think debug registers have something like that and they are available for guest use anyway. Perf counters differs greatly from CPU to CPU (even between two CPUs of the same manufacturer), and we want to keep using them for profiling guests. And I don't see what problem it will solve anyway that can be solved by simple delay between irq reinjection. This would allow counting the executed instructions and limit it. Thus we could emulate a 500MHz CPU on a 2GHz CPU more accurately. Why would you want to limit number of instruction executed by guest if CPU has nothing else to do anyway? The problem occurs not when we have spare cycles so give to a guest, but in opposite case. And even if the rate did not matter, the APIC woult still have to now about the fact that an IRQ is really periodic and does not only appear as such for a certain interval. This really does not sound like simplifying things or even make them cleaner. It would, the voodoo would be contained only in APIC, RTC would be just like any other
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
Blue Swirl wrote: On the contrary, APIC is actually the only source of the IRQ ack information. RTC hack would not work without APIC (or the bidirectional IRQ) passing this info to RTC. What APIC doesn't have now is the timer frequency or period info. This is known by RTC and also higher levels managing the clocks. So APIC has one bit of information and RTC everything else. The information known by RTC (timer period) is also known by higher levels. Curious to see where you'll find this. The current approach (and proposed patch) brings this one bit of information to RTC, you are arguing that RTC should be able to communicate all its info to APIC. Sorry I don't see that your way has any advantage. Just more complex interface and it is much easier to get it wrong for other time sources. I don't think anymore that APIC should be handling this but the generic stuff, like vl.c or exec.c. Then there would be only information passing from APIC to higher levels. You neglect the the information required to associate a periodic source (e.g. RTC) with an IRQ sink (e.g. APIC). Without that, you will have a hard time figuring out if a reported IRQ coalescing requires any activities or should simply be welcomed (for I/O IRQs). I keep ignoring the idea that the current model, where both RTC and APIC must somehow work together to make coalescing work, is the only possible just because it is committed and it happens to work in some cases. It would be much better to concentrate this to one place, APIC or preferably higher level where it may benefit other timers too. Provided of course that the other models can be made to work. So write the code and show us. You haven't show any evidence that RTC is the wrong place. RTC knows when interrupt was acknowledge to RTC, it know when clock frequency changes, it know when device reset happened. APIC knows only that interrupt was coalesced. It doesn't even know that it may be masked by a guest in IOAPIC (interrupts delivered while they are masked not considered coalesced). Oh, I thought interrupt masking was the reason for coalescing! What exactly is the reason then? Missing acks, ie. the IRQ is still pending when the next one arrives. You want to filter out masked/suppressed IRQs to avoid running the de-coalescing logic on sources that are actually cut off (like the RTC IRQ when the HPET took over). Time source knows only when frequency changes and may be when device reset happens if timer is stopped by device on reset. So RTC is actually a sweet spot if you want to minimize amount of info you need to pass between various layers. Maybe that version would not bend backwards as much as the current to cater for buggy hosts. You mean buggy guests? Yes, sorry. What guests are not buggy in your opinion? Linux tries hard to be smart and as a result the only way to have stable clock with it is to go paravirt. I'm not an OS designer, but I think an OS should never crash, even if a burst of IRQs is received. Reprogramming the timer should consider the pending IRQ situation (0 or 1 with real HW). Those bugs are one cause of the problem. OS should never crash in the absence of HW bugs? I doubt you can design an OS that can run in a face of any HW failure. Anyway here we are trying to solve guests time keeping problem not crashes. Do you think you can design OS that can keep time accurately no matter how crazy all HW clock behaves? I think my OS design skills are not relevant in this discussion, but IIRC there are fault tolerant operating systems for extreme conditions so it can be done. No one can influence the design of released OS versions anymore. The fact is that timer device is not just like any other device in virtual world. Any other device is easy: you just implement spec as close as possible and everything works. For time source device this is not enough. You can implement RTC+HPET to the letter and your guest will drift like crazy. It's doable: a cycle accurate emulator will not cause any drift, without any voodoo. The interrupts would come after executing the same instruction as the real HW. For emulating any sufficiently buggy guests in any sufficiently desperate low resource conditions, this may be the only option that will always work. Yes, but qemu and kvm are not cycle accurate emulators and don't strive to be one. On the contrary KVM runs at native host CPU speed most of the time, so any emulation done between two instruction is theoretically noticeable for a guest. TSC is bypassed directly to a guest too, so keeping all time source in perfect sync is also impossible. That is actually another cause of the problem. KVM gives the guest an illusion that the VCPU speed is equal to host speed. When they don't match, especially in critical code, there can be problems. It would be better to tell the guest a lower speed, which also can be guaranteed. Not possible. It's that simple.
Re: [Qemu-devel] [PATCH] Name the default PCI bus pci.0 on all architectures
Paul Brook p...@codesourcery.com writes: The system emulators for each arch are using inconsistent naming for the default PCI bus pci vs pci.0. Since it is conceivable we'll have multiple PCI buses in the future standardize on pci.0 for all architectures. This ensures mgmt apps can rely on a name when assigning PCI devices an address on the bus using eg '-device e1000,bus=pci.0,addr=3' No. Bus names are local to the parent device. None of the host bridges support multiple bridges, so the .0 suffix makes no sense. The parent device has no idea whether it owns the default pci bus or not. If you have multiple PCI busses then you can identify them by the device path. From qbus_create_inplace(): if (name) { /* use supplied name */ bus-name = qemu_strdup(name); } else if (parent parent-id) { /* parent device has id - use it for bus name */ len = strlen(parent-id) + 16; buf = qemu_malloc(len); snprintf(buf, len, %s.%d, parent-id, parent-num_child_bus); bus-name = buf; } else { /* no id - use lowercase bus type for bus name */ len = strlen(info-name) + 16; buf = qemu_malloc(len); len = snprintf(buf, len, %s.%d, info-name, parent ? parent-num_child_bus : 0); for (i = 0; i len; i++) buf[i] = qemu_tolower(buf[i]); bus-name = buf; } If appending .0 really makes no sense when the device has just one bus, then we shouldn't append it in cases 2 3. But I'd simply append it always. One bus is just as countable as many.
Re: [Qemu-devel] [PATCH] qemu-io: Fix error messages
On Fri, May 28, 2010 at 08:15:04PM +0200, Kevin Wolf wrote: The truncate and getlength commands passed a negative error number to strerror. They also happen to be the two functions that are lacking a newline at the end of their error message. Signed-off-by: Kevin Wolf kw...@redhat.com Ok, Reviewed-by: Christoph Hellwig h...@lst.de
Re: [Qemu-devel] cg14
On Sat, May 29, 2010 at 5:15 AM, Bob Breuer breu...@mc.net wrote: Artyom Tarasenko wrote: 2010/5/28 Blue Swirl blauwir...@gmail.com: On Fri, May 28, 2010 at 7:54 AM, Bob Breuer breu...@mc.net wrote: Artyom Tarasenko wrote: 2010/5/27 Bob Breuer breu...@mc.net: Artyom Tarasenko wrote: Was going to put some more empty slots into SS-10/20 (VSIMMs, SX) after we are done with SS-5 (due to technical limitations I can switch access from one real SS model to another one once a few days only). I have a partial implementation of the SS-20 VSIMM (cg14) that I've been working on. With the Sun firmware, I have working text console, color boot logo, and programmable video resolutions up to 1600x1280. Great news! This would allow qemu booting NeXTStep! Are you planning to submit the patches any time soon? It's not in a state to be submitted yet, but I've attached a working patch if you want to give it a try. I need to hook it up to qdev and fill in some more of the obviously incomplete switch cases before I'd sign off on it. Nice work. I have a few comments below. This probably needs support from OpenBIOS to be usable without OBP. Maybe it can be used as a second adapter without OpenBIOS support? At least under some OSes? Probably won't be used without at least being in the firmware device tree. One area that OpenBIOS could enhance would be a larger memory size option. The real hardware was only available in 4M and 8M options, but the memory map allows for 16M. OBP will identify a 16M VSIMM but won't do anything else with it, and with 16M of vram it would allow for a potential 2560x1600 32bit resolution. Awesome, PC with Cirrus VGA can't compete. Bob diff --git a/Makefile.target b/Makefile.target index fda5bf3..b17b3af 100644 --- a/Makefile.target +++ b/Makefile.target @@ -250,6 +250,7 @@ else obj-sparc-y = sun4m.o lance.o tcx.o sun4m_iommu.o slavio_intctl.o obj-sparc-y += slavio_timer.o slavio_misc.o sparc32_dma.o obj-sparc-y += cs4231.o eccmemctl.o sbi.o sun4c_intctl.o +obj-sparc-y += cg14.o endif obj-arm-y = integratorcp.o versatilepb.o arm_pic.o arm_timer.o diff --git a/hw/sun4m.c b/hw/sun4m.c index 7ba0f76..8b23c9b 100644 --- a/hw/sun4m.c +++ b/hw/sun4m.c @@ -864,6 +864,13 @@ static void sun4m_hw_init(const struct sun4m_hwdef *hwdef, ram_addr_t RAM_size, fprintf(stderr, qemu: Unsupported depth: %d\n, graphic_depth); exit (1); } + if (hwdef-machine_id == 65) { /* SS-20 */ hwdef structure should contain a field for cg14. If non-zero, install cg14. Was cg14 only available for SS-20? Was it always included? This is also interesting for OpenBIOS, we need to detect cg14 vs. TCX. The cg14 was only an option for SS-20 and the rare SS-10SX, but not the regular SS-10, though the SS-10 chipset may have been capable of supporting it. Each cg14 vsimm takes the place of a stick of memory with 2 slots physically capable of holding a vsimm. Is there a way to pass the framebuffer type and/or address to OpenBIOS? Currently there's fw_cfg and we can also set NVRAM variables. I would be inclined to have the SS-20 machine default to cg14 instead of TCX, but it will blow a hole in the support of more than 2G of emulated system ram. I'm not so worried about the RAM hole, the real machines didn't have so much RAM and we can also always add more RAM at some other location. But the machine default is a bit of a problem because we would not be compatible to old versions. Adding a new machine for each new configuration does not sound viable in longer term either. In this case it makes a lot of sense for SS-10SX and we could deprecate SS-20 with TCX. A generic solution for major device changes could be to pass a device tree (FDT), like some KVM PPC machines do. Another solution, which would give most realistic emulation, would be to add FCode ROMs which is used to probe and add the devices by OBP or OpenBIOS. + /* cg14.c */ + void cg14_init(target_phys_addr_t ctrl_base, target_phys_addr_t vram_base, + uint32_t vram_size); This should go to sun4m.h or cg14.h. + + cg14_init(0x09c00ULL, 0x0fc00ULL, 820); + } else Please add braces and reindent. tcx_init(hwdef-tcx_base, 0x0010, graphic_width, graphic_height, graphic_depth); --- /dev/null Fri May 28 02:08:36 2010 +++ hw/cg14.c Fri May 28 01:58:49 2010 @@ -0,0 +1,785 @@ +/* + * QEMU CG14 Frame buffer + * + * Copyright (c) 2010 Bob Breuer + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the Software), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following
[Qemu-devel] Re: [PATCH v3 0/3]: QMP: Commands doc
Luiz Capitulino wrote: This new version moves the documentation to qemu-monitor.hx and now QMP/qmp-commands.txt is generated from there (thanks Jan!). I hope I've addressed all review comments in this version and now it should describe reality. Next step is to fix glitches (after this series is merged, of course). This is a fragile series, breaking quickly when something changes in qemu-monitor.hx (as just happened). Can we get this rebased and merged ASAP? Thanks, Jan changelog - v2 - v3 - Rebased - Addressed review comments - Move contents to qemu-monitor.hx (Jan) v1 - v2 - Rebased - Addressed Markus's comments - Changed indentation style - Minor fixes and clarifications signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
2010/5/28 Gleb Natapov g...@redhat.com: On Fri, May 28, 2010 at 08:06:45PM +, Blue Swirl wrote: 2010/5/28 Gleb Natapov g...@redhat.com: On Thu, May 27, 2010 at 06:37:10PM +, Blue Swirl wrote: 2010/5/27 Gleb Natapov g...@redhat.com: On Wed, May 26, 2010 at 08:35:00PM +, Blue Swirl wrote: On Wed, May 26, 2010 at 8:09 PM, Jan Kiszka jan.kis...@web.de wrote: Blue Swirl wrote: On Tue, May 25, 2010 at 9:44 PM, Jan Kiszka jan.kis...@web.de wrote: Anthony Liguori wrote: On 05/25/2010 02:09 PM, Blue Swirl wrote: On Mon, May 24, 2010 at 8:13 PM, Jan Kiszkajan.kis...@web.de wrote: From: Jan Kiszkajan.kis...@siemens.com This allows to communicate potential IRQ coalescing during delivery from the sink back to the source. Targets that support IRQ coalescing workarounds need to register handlers that return the appropriate QEMU_IRQ_* code, and they have to propergate the code across all IRQ redirections. If the IRQ source receives a QEMU_IRQ_COALESCED, it can apply its workaround. If multiple sinks exist, the source may only consider an IRQ coalesced if all other sinks either report QEMU_IRQ_COALESCED as well or QEMU_IRQ_MASKED. No real devices are interested whether any of their output lines are even connected. This would introduce a new signal type, bidirectional multi-level, which is not correct. I don't think it's really an issue of correct, but I wouldn't disagree to a suggestion that we ought to introduce a new signal type for this type of bidirectional feedback. Maybe it's qemu_coalesced_irq and has a similar interface as qemu_irq. A separate type would complicate the delivery of the feedback value across GPIO pins (as Paul requested for the RTC-HPET routing). I think the real solution to coalescing is put the logic inside one device, in this case APIC because it has the information about irq delivery. APIC could monitor incoming RTC irqs for frequency information and whether they get delivered or not. If not, an internal timer is installed which injects the lost irqs. That won't fly as the IRQs will already arrive at the APIC with a sufficiently high jitter. At the bare minimum, you need to tell the interrupt controller about the fact that a particular IRQ should be delivered at a specific regular rate. For this, you also need a generic interface - nothing really won. OK, let's simplify: just reinject at next possible chance. No need to monitor or tell anything. There are guests that won't like this (I know of one in-house, but others may even have more examples), specifically if you end up firing multiple IRQs in a row due to a longer backlog. For that reason, the RTC spreads the reinjection according to the current rate. Then reinject with a constant delay, or next CPU exit. Such buggy If guest's time frequency is the same as host time frequency you can't reinject with constant delay. That is why current code mixes two approaches: reinject M interrupts in a raw then delay. This approach can be also used by APIC-only version. I don't know what APIC-only version you are talking about. I haven't seen the code and I don't understand hand waving, sorry. There is no code, because we're still at architecture design stage. Try to write test code to understand the problem better. I will. guests could also be assisted with special handling (like win2k install hack), for example guest instructions could be counted (approximately, for example using TB size or TSC) and only inject after at least N instructions have passed. Guest instructions cannot be easily counted in KVM (it can be done more or less reliably using perf counters, may be). Aren't there any debug registers or perf counters, which can generate an interrupt after some number of instructions have been executed? Don't think debug registers have something like that and they are available for guest use anyway. Perf counters differs greatly from CPU to CPU (even between two CPUs of the same manufacturer), and we want to keep using them for profiling guests. And I don't see what problem it will solve anyway that can be solved by simple delay between irq reinjection. This would allow counting the executed instructions and limit it. Thus we could emulate a 500MHz CPU on a 2GHz CPU more accurately. Why would you want to limit number of instruction executed by guest if CPU has nothing else to do anyway? The problem occurs not when we have spare cycles so give to a guest, but in opposite case. I think one problem is that the guest has executed too much compared to what would happen with real HW with a lesser CPU. That explains the RTC frequency reprogramming case. And even if the rate
Re: [Qemu-devel] Re: [PATCH v2 12/15] monitor: Add basic device state visualization
Markus Armbruster wrote: Avi Kivity a...@redhat.com writes: On 05/23/2010 10:57 AM, Jan Kiszka wrote: Avi Kivity wrote: On 05/22/2010 11:18 AM, Jan Kiszka wrote: From: Jan Kiszkajan.kis...@siemens.com This introduces device_show, a monitor command that saves the vmstate of a qdev device and visualizes it. QMP is also supported. Buffers are cut after 16 byte by default, but the full content can be requested via '-f'. To pretty-print sub-arrays, vmstate is extended to store the start index name. A new qerror is introduced to signal a missing vmstate. And it comes with documentation. + +Dump a snapshot of the device state. Buffers are cut after 16 bytes unless +a full dump is requested. + +Arguments: + +- path: the device's qtree path or unique ID (json-string) This may be ambiguous. Can your elaborate what precisely is ambiguous? Can't the user choose the unique ID so that it aliases an unrelated qtree path? I prefer having mutually exclusive 'path' and 'ref' arguments. Don't think that's necessary. If the string starts with '/', it's an absolute path. Else, it's a relative path rooted at the node with the ID equal to the first component. 'pci.0' can be both a (badly chosen) ID as well as an abbreviated qtree path. Currently breaks down when IDs contain '/', but permitting that is a bug. There may be more problems; the path lookup code is way too clever. Indeed. Less can sometimes be more. My impression is that some of the cleverness was motivated by lacking qtree completion for the HMP. Jan signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] [PATCH v2 03/15] qdev: Allow device addressing via 'driver.instance'
Markus Armbruster wrote: Jan Kiszka jan.kis...@web.de writes: From: Jan Kiszka jan.kis...@siemens.com Extend qbus_find_dev to allow addressing of devices without an unique id via an optional per-bus instance number. The new formats are 'driver.instance' and 'alias.instance'. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- docs/qdev-device-use.txt | 12 +++- hw/qdev.c| 23 ++- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/docs/qdev-device-use.txt b/docs/qdev-device-use.txt index 9ac1fa1..5939481 100644 --- a/docs/qdev-device-use.txt +++ b/docs/qdev-device-use.txt @@ -1,6 +1,6 @@ = How to convert to -device friends = -=== Specifying Bus and Address on Bus === +=== Specifying Bus, Address on Bus, and Devices === In qdev, each device has a parent bus. Some devices provide one or more buses for children. You can specify a device's parent bus with @@ -24,6 +24,16 @@ Furthermore, if a device only hosts a single bus, the bus name can be omitted in the path. Example: /i440FX-pcihost/PIIX3 abbreviates /i440FX-pcihost/pci.0/PIIX3/isa.0 as none of the buses has siblings. +Existing devices can be addressed either via a unique ID if it was +assigned during creation or via the device tree path: + +/full_bus_address/driver_name[.instance_number] +or +abbreviated_bus_address/driver_name[.instance_number] + +Example: /i440FX-pcihost/pci.0/e1000.2 addresses the second e1000 +adapter on the bus 'pci.0'. + Note: the USB device address can't be controlled at this time. instance number isn't defined in this document. True, only implicitly via the example. I understand the problem you're trying to solve; I've had it myself many times. But is inventing an instance number the right solution? The two e1000 devices already have a perfectly fine unique identifier on their bus: their bus address. What about recognizing bus addresses in paths? Requires a suitable restriction on device names and IDs to avoid ambiguity, say start with letter. qdev currently doesn't abstract bus addresses, but that's fixable. You would also have to specify unique addressing scheme to those buses that do not have it yet. E.g. what should be the address of a ISA bus device? The base of its first ioport range? But if it does not have any as it only injects ISA IRQs? Jan signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] Re: [PATCH v3 13/17] monitor: Allow to exclude commands from QMP
Markus Armbruster wrote: Luiz Capitulino lcapitul...@redhat.com writes: On Sun, 23 May 2010 12:59:26 +0200 Jan Kiszka jan.kis...@web.de wrote: From: Jan Kiszka jan.kis...@siemens.com Ported commands that are marked 'user_only' will not be considered for QMP monitor sessions. This allows to implement new commands that do not (yet) provide a sufficiently stable interface for QMP use (e.g. device_show). This is fine for me, but two things I've been wondering: 1. Isn't a 'flags' struct member better? So that we can do (in the qemu-monitor.hx entry): .flags = MONITOR_USER_ONLY | MONITOR_HANDLER_ASYNC, I'm not suggesting this is an async handler, just exemplifying multiple flags. We also have at least one command that makes only sense in QMP: qmp_capabilities. Maybe we could use separate flags controlling command availability in human monitor and QMP. Sounds reasonable. Then I think I will lay the ground for this by introducing flags already in this patch. A v4 run is required anyway. Jan signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] [PATCH v3 12/17] monitor: return length of printed string via monitor_[v]printf
Jan Kiszka jan.kis...@web.de writes: From: Jan Kiszka jan.kis...@siemens.com This simply forwards the result of the internal vsnprintf to the callers of monitor_printf and monitor_vprintf. When invoked over a QMP session or in absence of an active monitor, -1 is returned. Signed-off-by: Jan Kiszka jan.kis...@siemens.com Appreciated! Opens the door to addressing the TODOs in qemu-error.c.
Re: [Qemu-devel] Re: [PATCH v3 06/17] qdev: Allow device specification by qtree path for device_del
Markus Armbruster wrote: Jan Kiszka jan.kis...@siemens.com writes: Markus Armbruster wrote: Jan Kiszka jan.kis...@web.de writes: Luiz Capitulino wrote: On Sun, 23 May 2010 12:59:19 +0200 Jan Kiszka jan.kis...@web.de wrote: From: Jan Kiszka jan.kis...@siemens.com Allow to specify the device to be removed via device_del not only by ID but also by its full or abbreviated qtree path. For this purpose, qdev_find is introduced which combines walking the qtree with searching for device IDs if required. [...] Arguments: -- id: the device's ID (json-string) +- path: the device's qtree path or unique ID (json-string) Example: -- { execute: device_del, arguments: { id: net1 } } +- { execute: device_del, arguments: { path: net1 } } Doesn't seem like a good change to me, besides being incompatible[1] we shouldn't overload arguments this way in QMP as overloading leads to interface degradation (harder to use, understand, maintain). It's not overloaded, think of an ID as a (weak) symbolic link in the qtree filesystem. The advantage of basing everything on top of full or abbreviated qtree paths is that IDs are not always assigned, paths are. As long as your patch doesn't change the interpretation of IDs, we can keep the old name. The recent review of QMP documentation may lead to a clean up bad names flag day. One more wouldn't make it worse, I guess. Maybe we could have both arguments as optional, but one must be passed. This would at least require some way to keep the proposed unified path specification for the human monitor (having separate arguments there is really unhandy). Correct. It would be nice to have device_del support paths in addition to IDs. I'd expect management tools to slap IDs on everything, so they won't care, but human users do. As far as I know, we have two places where we let the user name a node in the qtree: device_add bus=X and device_del X. The former names a bus, the latter a device. But both are nodes in the same tree, so consistency is in order. Only devices have user-specified IDs. Buses have names assigned by the system. Unique names, hopefully. ...but not necessarily. The bus name device_add accepts can also be a full, thus unambiguous path. If the user doesn't specify a device ID, the driver name is used instead. If you put multiple instances of the same device on the same bus, they have the *same* path. For instance, here's a snippet of info qtree after adding two usb-mouse: dev: piix3-usb-uhci, id bus-prop: addr = 01.2 bus-prop: romfile = null bus-prop: rombar = 1 class USB controller, addr 00:01.2, pci id 8086:7020 (sub 1af4:1100) bar 4: i/o at 0x [0x1e] bus: usb.0 type USB dev: usb-hub, id addr 0.0, speed 12, name QEMU USB Hub, attached dev: usb-mouse, id no2 addr 0.0, speed 12, name QEMU USB Mouse, attached dev: usb-mouse, id addr 0.0, speed 12, name QEMU USB Mouse, attached Both devices have the same full path /i440FX-pcihost/pci.0/piix3-usb-uhci/usb.0/usb-mouse Which one does your code pick? Shouldn't it refuse to pick? Patch 3 of this series resolves this as follows: usb-mouse[.0] - first listed instance usb-mouse.1 - second instance ... We should probably include this numbering in the qtree dump, I guess. By the way, you *can* put '/' in IDs. I call that a bug. Even if we prevent this, IDs can still collide with abbreviated device or bus paths. Therefore I give paths precedence over IDs in patch 4. You're fixing problems in the overly complex and subtle path name lookup by making it more complex and subtle. Let's make it simple and straightforward instead. I have no problem with ripping out all of those abbreviations, requiring the user to either specify a '/'-less unique ID or a full qtree path that must start with a '/'. If paths get long, we now have interactive completions. Jan signature.asc Description: OpenPGP digital signature
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
On Sat, May 29, 2010 at 04:03:22PM +, Blue Swirl wrote: 2010/5/29 Gleb Natapov g...@redhat.com: On Sat, May 29, 2010 at 09:15:11AM +, Blue Swirl wrote: There is no code, because we're still at architecture design stage. Try to write test code to understand the problem better. I will. Please do ASAP. This discussion shows that you don't understand what is the problem that we are dialing with. Which part of the problem you think I don't understand? It seams to me you don't understand how Windows uses RTC for time keeping and how the QEMU solves the problem today. guests could also be assisted with special handling (like win2k install hack), for example guest instructions could be counted (approximately, for example using TB size or TSC) and only inject after at least N instructions have passed. Guest instructions cannot be easily counted in KVM (it can be done more or less reliably using perf counters, may be). Aren't there any debug registers or perf counters, which can generate an interrupt after some number of instructions have been executed? Don't think debug registers have something like that and they are available for guest use anyway. Perf counters differs greatly from CPU to CPU (even between two CPUs of the same manufacturer), and we want to keep using them for profiling guests. And I don't see what problem it will solve anyway that can be solved by simple delay between irq reinjection. This would allow counting the executed instructions and limit it. Thus we could emulate a 500MHz CPU on a 2GHz CPU more accurately. Why would you want to limit number of instruction executed by guest if CPU has nothing else to do anyway? The problem occurs not when we have spare cycles so give to a guest, but in opposite case. I think one problem is that the guest has executed too much compared to what would happen with real HW with a lesser CPU. That explains the RTC frequency reprogramming case. You think wrong. The problem is exactly opposite: the guest haven't had enough execution time between two time interrupts. I don't know what RTC frequency reprogramming case you are talking about here. The case you told me where N pending tick IRQs exist but the guest wants to change the RTC frequency from 64Hz to 1024Hz. Let's make this more concrete. 1 GHz CPU, initially 100Hz RTC, so 10Mcycles/tick or 10ms/tick. At T = 30Mcycles, guest wants to change the frequency to 1000Hz. The problem for emulation is that for the same 3 ticks, there has been so little execution power that the ticks have been coalesced. But isn't the guest cycle count then much lower than 30Mcyc? Isn't it so that the guest must be above 30Mcyc to be able to want the change? But if we reach that point, the problem must have not been too little execution time, but too much. Sorry I tried hard to understand what have you said above but failed. What do you mean to be able to want the change? Guest sometimes wants to get 64 timer interrupts per second and sometimes it wants to get 1024 timer interrupt per second. It wants it not as a result of time drift or anything. It's just how guest behaves. You seams to be to fixated on guest frequency change. It's just something you have to take into account when you reinject interrupts. And even if the rate did not matter, the APIC woult still have to now about the fact that an IRQ is really periodic and does not only appear as such for a certain interval. This really does not sound like simplifying things or even make them cleaner. It would, the voodoo would be contained only in APIC, RTC would be just like any other device. With the bidirectional irqs, this voodoo would probably eventually spread to many other devices. The logical conclusion of that would be a system where all devices would be careful not to disturb the guest at wrong moment because that would trigger a bug. This voodoo will be so complex and unreliable that it will make RTC hack pale in comparison (and I still don't see how you are going to make it actually work). Implement everything inside APIC: only coalescing and reinjection. APIC has zero info needed to implement reinjection correctly as was shown to you several time in this thread and you simply keep ignoring it. On the contrary, APIC is actually the only source of the IRQ ack information. RTC hack would not work without APIC (or the bidirectional IRQ) passing this info to RTC. What APIC doesn't have now is the timer frequency or period info. This is known by RTC and also higher levels managing the clocks. So APIC has one bit of information and RTC everything else. The information known by RTC (timer period) is also known by higher
Re: [Qemu-devel] Re: [PATCH v3 06/17] qdev: Allow device specification by qtree path for device_del
Jan Kiszka jan.kis...@siemens.com writes: Markus Armbruster wrote: Jan Kiszka jan.kis...@web.de writes: Luiz Capitulino wrote: On Sun, 23 May 2010 12:59:19 +0200 Jan Kiszka jan.kis...@web.de wrote: From: Jan Kiszka jan.kis...@siemens.com Allow to specify the device to be removed via device_del not only by ID but also by its full or abbreviated qtree path. For this purpose, qdev_find is introduced which combines walking the qtree with searching for device IDs if required. [...] Arguments: -- id: the device's ID (json-string) +- path: the device's qtree path or unique ID (json-string) Example: -- { execute: device_del, arguments: { id: net1 } } +- { execute: device_del, arguments: { path: net1 } } Doesn't seem like a good change to me, besides being incompatible[1] we shouldn't overload arguments this way in QMP as overloading leads to interface degradation (harder to use, understand, maintain). It's not overloaded, think of an ID as a (weak) symbolic link in the qtree filesystem. The advantage of basing everything on top of full or abbreviated qtree paths is that IDs are not always assigned, paths are. As long as your patch doesn't change the interpretation of IDs, we can keep the old name. The recent review of QMP documentation may lead to a clean up bad names flag day. One more wouldn't make it worse, I guess. Maybe we could have both arguments as optional, but one must be passed. This would at least require some way to keep the proposed unified path specification for the human monitor (having separate arguments there is really unhandy). Correct. It would be nice to have device_del support paths in addition to IDs. I'd expect management tools to slap IDs on everything, so they won't care, but human users do. As far as I know, we have two places where we let the user name a node in the qtree: device_add bus=X and device_del X. The former names a bus, the latter a device. But both are nodes in the same tree, so consistency is in order. Only devices have user-specified IDs. Buses have names assigned by the system. Unique names, hopefully. ...but not necessarily. The bus name device_add accepts can also be a full, thus unambiguous path. If the user doesn't specify a device ID, the driver name is used instead. If you put multiple instances of the same device on the same bus, they have the *same* path. For instance, here's a snippet of info qtree after adding two usb-mouse: dev: piix3-usb-uhci, id bus-prop: addr = 01.2 bus-prop: romfile = null bus-prop: rombar = 1 class USB controller, addr 00:01.2, pci id 8086:7020 (sub 1af4:1100) bar 4: i/o at 0x [0x1e] bus: usb.0 type USB dev: usb-hub, id addr 0.0, speed 12, name QEMU USB Hub, attached dev: usb-mouse, id no2 addr 0.0, speed 12, name QEMU USB Mouse, attached dev: usb-mouse, id addr 0.0, speed 12, name QEMU USB Mouse, attached Both devices have the same full path /i440FX-pcihost/pci.0/piix3-usb-uhci/usb.0/usb-mouse Which one does your code pick? Shouldn't it refuse to pick? Patch 3 of this series resolves this as follows: usb-mouse[.0] - first listed instance usb-mouse.1 - second instance ... We should probably include this numbering in the qtree dump, I guess. By the way, you *can* put '/' in IDs. I call that a bug. Even if we prevent this, IDs can still collide with abbreviated device or bus paths. Therefore I give paths precedence over IDs in patch 4. You're fixing problems in the overly complex and subtle path name lookup by making it more complex and subtle. Let's make it simple and straightforward instead.
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
On Sat, May 29, 2010 at 2:46 PM, Gleb Natapov g...@redhat.com wrote: On Sat, May 29, 2010 at 09:35:30AM +, Blue Swirl wrote: I still don't see how the alternative is supposed to simplify our life or improve the efficiency of the de-coalescing workaround. It's rather problematic like Gleb pointed out: The de-coalescing logic needs to be informed about periodicity changes that can only be delivered along IRQs. So what to do with the backlog when the timer is stopped? What happens with the current design? Gleb only mentioned the frequency change, I thought that was not so big problem. But I don't think this case should be allowed happen at all, it can't exist on real HW. Hm, why it can't exist on real HW? Do simple exercise. Run WindowsXP inside QEMU, connect with gdb to QEMU process and check what frequency RTC configured with (hint: it will be 64Hz), now run video inside the guest and check frequency again (hint: it will be 1Khz). You missed the key word 'stopped'. If the timer is really stopped, no IRQs should ever come out afterwards, just like on real HW. For the emulation, this means loss of ticks which should have been delivered before the change. But what if the guest changed the frequency very often, and between changes used zero value, like 64Hz - 0Hz - 128Hz - 0Hz - 64Hz?
[Qemu-devel] [PATCH 1/3] qemu-thread: add qemu_mutex/cond_destroy and qemu_mutex_exit
Add some missing functions in qemu-thread. Currently qemu-thread is only used for io-thread but it will used by the vnc server soon and we need those functions instead of calling pthread directly. Signed-off-by: Corentin Chary corenti...@iksaif.net --- qemu-thread.c | 22 ++ qemu-thread.h |4 2 files changed, 26 insertions(+), 0 deletions(-) diff --git a/qemu-thread.c b/qemu-thread.c index 3923db7..afc9933 100644 --- a/qemu-thread.c +++ b/qemu-thread.c @@ -34,6 +34,15 @@ void qemu_mutex_init(QemuMutex *mutex) error_exit(err, __func__); } +void qemu_mutex_destroy(QemuMutex *mutex) +{ +int err; + +err = pthread_mutex_destroy(mutex-lock); +if (err) +error_exit(err, __func__); +} + void qemu_mutex_lock(QemuMutex *mutex) { int err; @@ -90,6 +99,15 @@ void qemu_cond_init(QemuCond *cond) error_exit(err, __func__); } +void qemu_cond_destroy(QemuCond *cond) +{ +int err; + +err = pthread_cond_destroy(cond-cond); +if (err) +error_exit(err, __func__); +} + void qemu_cond_signal(QemuCond *cond) { int err; @@ -161,3 +179,7 @@ int qemu_thread_equal(QemuThread *thread1, QemuThread *thread2) return pthread_equal(thread1-thread, thread2-thread); } +void qemu_thread_exit(void *retval) +{ +pthread_exit(retval); +} diff --git a/qemu-thread.h b/qemu-thread.h index 5ef4a3a..19bb30c 100644 --- a/qemu-thread.h +++ b/qemu-thread.h @@ -20,12 +20,14 @@ typedef struct QemuCond QemuCond; typedef struct QemuThread QemuThread; void qemu_mutex_init(QemuMutex *mutex); +void qemu_mutex_destroy(QemuMutex *mutex); void qemu_mutex_lock(QemuMutex *mutex); int qemu_mutex_trylock(QemuMutex *mutex); int qemu_mutex_timedlock(QemuMutex *mutex, uint64_t msecs); void qemu_mutex_unlock(QemuMutex *mutex); void qemu_cond_init(QemuCond *cond); +void qemu_cond_destroy(QemuCond *cond); void qemu_cond_signal(QemuCond *cond); void qemu_cond_broadcast(QemuCond *cond); void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex); @@ -37,4 +39,6 @@ void qemu_thread_create(QemuThread *thread, void qemu_thread_signal(QemuThread *thread, int sig); void qemu_thread_self(QemuThread *thread); int qemu_thread_equal(QemuThread *thread1, QemuThread *thread2); +void qemu_thread_exit(void *retval); + #endif -- 1.7.1
Re: [Qemu-devel] [PATCH v3 04/17] qdev: Give qtree names precedence over user-assigned IDs
Jan Kiszka jan.kis...@web.de writes: From: Jan Kiszka jan.kis...@siemens.com As the user may specify ambiguous device IDs, let's search for their official names first before considering the user-supplied identifiers. Signed-off-by: Jan Kiszka jan.kis...@siemens.com The problem is letting the user specify ambiguous device IDs in the first place! That way is madness...
Re: [Qemu-devel] [PATCH v2 03/15] qdev: Allow device addressing via 'driver.instance'
Jan Kiszka jan.kis...@web.de writes: From: Jan Kiszka jan.kis...@siemens.com Extend qbus_find_dev to allow addressing of devices without an unique id via an optional per-bus instance number. The new formats are 'driver.instance' and 'alias.instance'. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- docs/qdev-device-use.txt | 12 +++- hw/qdev.c| 23 ++- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/docs/qdev-device-use.txt b/docs/qdev-device-use.txt index 9ac1fa1..5939481 100644 --- a/docs/qdev-device-use.txt +++ b/docs/qdev-device-use.txt @@ -1,6 +1,6 @@ = How to convert to -device friends = -=== Specifying Bus and Address on Bus === +=== Specifying Bus, Address on Bus, and Devices === In qdev, each device has a parent bus. Some devices provide one or more buses for children. You can specify a device's parent bus with @@ -24,6 +24,16 @@ Furthermore, if a device only hosts a single bus, the bus name can be omitted in the path. Example: /i440FX-pcihost/PIIX3 abbreviates /i440FX-pcihost/pci.0/PIIX3/isa.0 as none of the buses has siblings. +Existing devices can be addressed either via a unique ID if it was +assigned during creation or via the device tree path: + +/full_bus_address/driver_name[.instance_number] +or +abbreviated_bus_address/driver_name[.instance_number] + +Example: /i440FX-pcihost/pci.0/e1000.2 addresses the second e1000 +adapter on the bus 'pci.0'. + Note: the USB device address can't be controlled at this time. instance number isn't defined in this document. I understand the problem you're trying to solve; I've had it myself many times. But is inventing an instance number the right solution? The two e1000 devices already have a perfectly fine unique identifier on their bus: their bus address. What about recognizing bus addresses in paths? Requires a suitable restriction on device names and IDs to avoid ambiguity, say start with letter. qdev currently doesn't abstract bus addresses, but that's fixable. [...]
Re: [Qemu-devel] [PATCH] sparc64: fix 128-bit atomic load from nucleus context v1
Thanks, applied. On Fri, May 28, 2010 at 9:05 PM, Igor V. Kovalenko igor.v.kovale...@gmail.com wrote: From: Igor V. Kovalenko igor.v.kovale...@gmail.com - change 128-bit atomic loads to reference nucleus context v0-v1: dropped disassembler change Signed-off-by: Igor V. Kovalenko igor.v.kovale...@gmail.com --- target-sparc/op_helper.c | 10 +- 1 files changed, 5 insertions(+), 5 deletions(-) diff --git a/target-sparc/op_helper.c b/target-sparc/op_helper.c index edeeb44..f468e7b 100644 --- a/target-sparc/op_helper.c +++ b/target-sparc/op_helper.c @@ -3060,19 +3060,19 @@ void helper_ldda_asi(target_ulong addr, int asi, int rd) case 0x2c: // Nucleus quad LDD 128 bit atomic LE helper_check_align(addr, 0xf); if (rd == 0) { - env-gregs[1] = ldq_kernel(addr + 8); + env-gregs[1] = ldq_nucleus(addr + 8); if (asi == 0x2c) bswap64s(env-gregs[1]); } else if (rd 8) { - env-gregs[rd] = ldq_kernel(addr); - env-gregs[rd + 1] = ldq_kernel(addr + 8); + env-gregs[rd] = ldq_nucleus(addr); + env-gregs[rd + 1] = ldq_nucleus(addr + 8); if (asi == 0x2c) { bswap64s(env-gregs[rd]); bswap64s(env-gregs[rd + 1]); } } else { - env-regwptr[rd] = ldq_kernel(addr); - env-regwptr[rd + 1] = ldq_kernel(addr + 8); + env-regwptr[rd] = ldq_nucleus(addr); + env-regwptr[rd + 1] = ldq_nucleus(addr + 8); if (asi == 0x2c) { bswap64s(env-regwptr[rd]); bswap64s(env-regwptr[rd + 1]);
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
Blue Swirl wrote: On Sat, May 29, 2010 at 10:16 AM, Jan Kiszka jan.kis...@web.de wrote: Blue Swirl wrote: This is - according to my current understanding - the proposed alternative architecture: .---. | de-coalescing | | logic | '---' ^ | period,| |IRQ coalesced| |(or tuned VM clock) (yes/no)| v .---. .. .---. | RTC |-IRQ-| router |-IRQ| APIC | '---' '' '---' |^| ^ ||| | '---period---''--period---' The period information is already known by the higher level clock management, The timer device models program the next event of some qemu-timer. There is no tag attached explaining the timer subsystem or anyone else the reason behind this programming. Yes, but why would we care? All timers in the system besides RTC should be affected likewise, this would in fact be the benefit from handling the problem at higher level. And how does your equation for calculating the clock slow-down look like? How about icount_adjust()? I would suggest that you implement your ideas now. Please keep us informed about the progress as this series (and more) depends on a decision. Jan signature.asc Description: OpenPGP digital signature
[Qemu-devel] [PATCH 2/3] qemu-thread: add cleanup_push() and cleanup_pop()
From pthread man: These functions manipulate the calling thread's stack of thread-cancellation clean-up handlers. A clean-up handler is a function that is automatically executed when a thread is canceled [...] it might, for example, unlock a mutex so that it becomes available to other threads in the process. These two functions are implemented using macros because there is no other way to do that (pthread man, again): On Linux, the pthread_cleanup_push() and pthread_cleanup_pop() functions are implemented as macros that expand to text containing '{' and '}', respectively. This means that variables declared within the scope of paired calls to these functions will only be visible within that scope. Signed-off-by: Corentin Chary corenti...@iksaif.net --- qemu-thread.h |4 1 files changed, 4 insertions(+), 0 deletions(-) diff --git a/qemu-thread.h b/qemu-thread.h index 19bb30c..e5006bb 100644 --- a/qemu-thread.h +++ b/qemu-thread.h @@ -41,4 +41,8 @@ void qemu_thread_self(QemuThread *thread); int qemu_thread_equal(QemuThread *thread1, QemuThread *thread2); void qemu_thread_exit(void *retval); +#define qemu_thread_cleanup_pop(execute) pthread_cleanup_pop(execute) +#define qemu_thread_cleanup_push(routine, arg) \ +pthread_cleanup_push(routine, arg) + #endif -- 1.7.1
Re: [Qemu-devel] [PATCH v2 02/15] qdev: Fix scanning across single-bus devices
[cc: kraxel] Jan Kiszka jan.kis...@web.de writes: From: Jan Kiszka jan.kis...@siemens.com As long as we allow /dev.1 as shortcut for /dev1/bus1, we also have to make sure that /dev1/dev2 works for /dev1/bus1/dev2/bus2 - as long as there is only one child bus per device. We auto-root a path not starting with '/' via convention first component is ID then (I like that). We auto-complete a path ending with a device when we want a bus (a bit too clever). Auto-inserting bus names in the middle is too clever by half! Such cleverness can be convenient in the human monitor, but all it adds to QMP is complexity. I'm worried about possibly ambigous interpretation of paths. Would be bad if a path could name different node after we add something to the tree. I *think* your fine print makes that impossible, but it's not exactly obvious. Heck, I could be wrong. Wouldn't it be better to put the convenience into the interactive completion function? That way we keep it out of QMP. Subject is misleading, it's a feature, not a bug fix.
[Qemu-devel] Re: [PATCH] savevm: Really verify if a drive supports snapshots
Kevin Wolf kw...@redhat.com writes: Am 28.05.2010 20:18, schrieb Miguel Di Ciurcio Filho: Both bdrv_can_snapshot() and bdrv_has_snapshot() does not work as advertized. First issue: Their names implies different porpouses, but they do the same thing and have exactly the same code. Maybe copied and pasted and forgotten? bdrv_has_snapshot() is called in various places for actually checking if there is snapshots or not. Second issue: the way bdrv_can_snapshot() verifies if a block driver supports or not snapshots does not catch all cases. E.g.: a raw image. So when do_savevm() is called, first thing it does is to set a global BlockDriverState to save the VM memory state calling get_bs_snapshots(). static BlockDriverState *get_bs_snapshots(void) { BlockDriverState *bs; DriveInfo *dinfo; if (bs_snapshots) return bs_snapshots; QTAILQ_FOREACH(dinfo, drives, next) { bs = dinfo-bdrv; if (bdrv_can_snapshot(bs)) goto ok; } return NULL; ok: bs_snapshots = bs; return bs; } bdrv_can_snapshot() may return a BlockDriverState that does not support snapshots and do_savevm() goes on. Later on in do_savevm(), we find: QTAILQ_FOREACH(dinfo, drives, next) { bs1 = dinfo-bdrv; if (bdrv_has_snapshot(bs1)) { /* Write VM state size only to the image that contains the state */ sn-vm_state_size = (bs == bs1 ? vm_state_size : 0); ret = bdrv_snapshot_create(bs1, sn); if (ret 0) { monitor_printf(mon, Error while creating snapshot on '%s'\n, bdrv_get_device_name(bs1)); } } } bdrv_has_snapshot(bs1) is not checking if the device does support or has snapshots as explained above. Only in bdrv_snapshot_create() the device is actually checked for snapshot support. So, in cases where the first device supports snapshots, and the second does not, the snapshot on the first will happen anyways. I believe this is not a good behavior. It should be an all or nothing process. This patch addresses these issues by making bdrv_can_snapshot() and bdrv_has_snapshot() actually do what they must do and enforces better tests to avoid errors in the middle of do_savevm(). The functions were moved from savevm.c to block.c. It makes more sense to me. The bdrv_has_snapshot() is not beaultiful, but it does the job. I think having this function avaible in the BlockDriver would be the best option. The loadvm_state() function was updated too to enforce that when loading a VM at least all writable devices must support snapshots too. Signed-off-by: Miguel Di Ciurcio Filho miguel.fi...@gmail.com Markus, I think this implements mostly what we discussed the other day. Not sure if you already have a patch for doing this - if so, maybe could compare the patches and give it a review this way? Glad I don't, because this one looks pretty good. I seem to remember that we came to the conclusion that bdrv_has_snapshot() isn't needed at all and should be dropped. Any user should be using bdrv_can_snapshot() instead as this is what they really want. Our reasoning adapted to the patched code goes like this. The remaining uses of bdrv_has_snapshot() are of the form: if (bdrv_has_snapshot(bs) some_snapshot_op() where some_snapshot_op() fails when there are no snapshots, and the code can recognize that failure as sorry, no snapshots (that's what it does before your patch, when bdrv_has_snapshot() is really no more than a necessary, but not sufficient condition for has snapshots). Therefore, we don't have to check for actual existence of snapshots with bdrv_has_snapshot(). bdrv_can_snapshot() suffices. In general, I consider if (predict_whether_foo_will_work) if (foo() 0) // foo failed else // foo would have failed (we think) an anti-pattern. Just make foo() safe to try, and check its status: if (foo() 0) // foo failed Except when foo() is too expensive to try, or can't be made safe. --- block.c | 47 ++- block.h |2 ++ savevm.c | 48 +--- 3 files changed, 69 insertions(+), 28 deletions(-) diff --git a/block.c b/block.c index cd70730..7eddc15 100644 --- a/block.c +++ b/block.c @@ -1720,15 +1720,52 @@ void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event) /**/ /* handling of snapshots */ -int bdrv_snapshot_create(BlockDriverState *bs, - QEMUSnapshotInfo *sn_info) +int bdrv_can_snapshot(BlockDriverState *bs) { BlockDriver *drv = bs-drv; -if (!drv) +if (!drv) { +return -ENOMEDIUM; +} + +if (!drv-bdrv_snapshot_create || bdrv_is_removable(bs) || +
Re: [Qemu-devel] cg14
Artyom Tarasenko wrote: 2010/5/28 Blue Swirl blauwir...@gmail.com: On Fri, May 28, 2010 at 7:54 AM, Bob Breuer breu...@mc.net wrote: Artyom Tarasenko wrote: 2010/5/27 Bob Breuer breu...@mc.net: Artyom Tarasenko wrote: Was going to put some more empty slots into SS-10/20 (VSIMMs, SX) after we are done with SS-5 (due to technical limitations I can switch access from one real SS model to another one once a few days only). I have a partial implementation of the SS-20 VSIMM (cg14) that I've been working on. With the Sun firmware, I have working text console, color boot logo, and programmable video resolutions up to 1600x1280. Great news! This would allow qemu booting NeXTStep! Are you planning to submit the patches any time soon? It's not in a state to be submitted yet, but I've attached a working patch if you want to give it a try. I need to hook it up to qdev and fill in some more of the obviously incomplete switch cases before I'd sign off on it. Nice work. I have a few comments below. This probably needs support from OpenBIOS to be usable without OBP. Maybe it can be used as a second adapter without OpenBIOS support? At least under some OSes? Probably won't be used without at least being in the firmware device tree. One area that OpenBIOS could enhance would be a larger memory size option. The real hardware was only available in 4M and 8M options, but the memory map allows for 16M. OBP will identify a 16M VSIMM but won't do anything else with it, and with 16M of vram it would allow for a potential 2560x1600 32bit resolution. Bob diff --git a/Makefile.target b/Makefile.target index fda5bf3..b17b3af 100644 --- a/Makefile.target +++ b/Makefile.target @@ -250,6 +250,7 @@ else obj-sparc-y = sun4m.o lance.o tcx.o sun4m_iommu.o slavio_intctl.o obj-sparc-y += slavio_timer.o slavio_misc.o sparc32_dma.o obj-sparc-y += cs4231.o eccmemctl.o sbi.o sun4c_intctl.o +obj-sparc-y += cg14.o endif obj-arm-y = integratorcp.o versatilepb.o arm_pic.o arm_timer.o diff --git a/hw/sun4m.c b/hw/sun4m.c index 7ba0f76..8b23c9b 100644 --- a/hw/sun4m.c +++ b/hw/sun4m.c @@ -864,6 +864,13 @@ static void sun4m_hw_init(const struct sun4m_hwdef *hwdef, ram_addr_t RAM_size, fprintf(stderr, qemu: Unsupported depth: %d\n, graphic_depth); exit (1); } + if (hwdef-machine_id == 65) { /* SS-20 */ hwdef structure should contain a field for cg14. If non-zero, install cg14. Was cg14 only available for SS-20? Was it always included? This is also interesting for OpenBIOS, we need to detect cg14 vs. TCX. The cg14 was only an option for SS-20 and the rare SS-10SX, but not the regular SS-10, though the SS-10 chipset may have been capable of supporting it. Each cg14 vsimm takes the place of a stick of memory with 2 slots physically capable of holding a vsimm. Is there a way to pass the framebuffer type and/or address to OpenBIOS? I would be inclined to have the SS-20 machine default to cg14 instead of TCX, but it will blow a hole in the support of more than 2G of emulated system ram. +/* cg14.c */ +void cg14_init(target_phys_addr_t ctrl_base, target_phys_addr_t vram_base, +uint32_t vram_size); This should go to sun4m.h or cg14.h. + +cg14_init(0x09c00ULL, 0x0fc00ULL, 820); + } else Please add braces and reindent. tcx_init(hwdef-tcx_base, 0x0010, graphic_width, graphic_height, graphic_depth); --- /dev/null Fri May 28 02:08:36 2010 +++ hw/cg14.c Fri May 28 01:58:49 2010 @@ -0,0 +1,785 @@ +/* + * QEMU CG14 Frame buffer + * + * Copyright (c) 2010 Bob Breuer + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the Software), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include console.h +#include sysbus.h + +#ifdef DEBUG DEBUG_CG14 ?
[Qemu-devel] Re: [SeaBIOS] SMBIOS strings
On Fri, May 28, 2010 at 05:24:47PM +0200, Jes Sorensen wrote: We were looking at the dmidecode output from qemu-kvm pre-seabios and current qemu-kvm and noticed some of the strings have changed. [...] I wanted to check with the lists if there are any strong feelings about this, and whether some of these changes were made for specific reasons? For example: [...] - Vendor: QEMU - Version: QEMU + Vendor: Bochs + Version: Bochs [...] - Manufacturer: QEMU - ID: 63 06 00 00 FD FB 8B 07 + Manufacturer: Bochs + ID: 23 06 00 00 FD FB 8B 07 I made these changes in SeaBIOS when I was cleaning up the ifdefs in the code. Instead of ifdef QEMU spread around the code, I abstracted out the various names into CONFIG_APPNAME. I defaulted the names to the Bochs based ones because they were the default in the original Bochs bios code. So, the only reason for these string differences is that the default name hasn't been changed. Otherwise, if there are no objections, I'll look at adding some patches to make it more backwards compatible. I wonder if making the default names be SeaBIOS based instead of bochs/qemu based would make sense. That way there isn't an impulse to change the settings with every emulator seabios runs on. (Ideally, the same binary would be run on multiple emulators to simplify testing.) -Kevin
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
On Sat, May 29, 2010 at 04:13:22PM +, Blue Swirl wrote: On Sat, May 29, 2010 at 2:46 PM, Gleb Natapov g...@redhat.com wrote: On Sat, May 29, 2010 at 09:35:30AM +, Blue Swirl wrote: I still don't see how the alternative is supposed to simplify our life or improve the efficiency of the de-coalescing workaround. It's rather problematic like Gleb pointed out: The de-coalescing logic needs to be informed about periodicity changes that can only be delivered along IRQs. So what to do with the backlog when the timer is stopped? What happens with the current design? Gleb only mentioned the frequency change, I thought that was not so big problem. But I don't think this case should be allowed happen at all, it can't exist on real HW. Hm, why it can't exist on real HW? Do simple exercise. Run WindowsXP inside QEMU, connect with gdb to QEMU process and check what frequency RTC configured with (hint: it will be 64Hz), now run video inside the guest and check frequency again (hint: it will be 1Khz). You missed the key word 'stopped'. If the timer is really stopped, no IRQs should ever come out afterwards, just like on real HW. For the emulation, this means loss of ticks which should have been delivered before the change. I haven't missed it. I describe to you reality of the situation. You want to change reality to be more close to what you want it to be by adding words to my description. Please just go write code, experiment, debug and _then_ come here with design. But what if the guest changed the frequency very often, and between changes used zero value, like 64Hz - 0Hz - 128Hz - 0Hz - 64Hz? Too bad, the world is not perfect. -- Gleb.
Re: [Qemu-devel] [PATCH] arm: fix arm kernel boot for non zero start addr
On Fri, May 28, 2010 at 9:24 PM, Aurelien Jarno aurel...@aurel32.net wrote: On Sat, May 08, 2010 at 10:43:35PM +0200, Lars Munch wrote: Booting an arm kernel has been broken a while when booting from non zero start address. This is due to the order of events: board init loads the kernel and sets register 15 to the start address and then qemu_system_reset reset the cpu making register 15 zero again. This patch fixes the usage of the register 15 start address trick in combination with arm_load_kernel. Signed-off-by: Lars Munch l...@segv.dk --- hw/arm_boot.c | 1 + hw/gumstix.c | 4 hw/mainstone.c | 3 --- hw/nseries.c | 7 --- hw/omap_sx1.c | 5 - hw/palm.c | 4 hw/spitz.c | 3 --- hw/tosa.c | 3 --- target-arm/helper.c | 1 - 9 files changed, 1 insertions(+), 30 deletions(-) diff --git a/hw/arm_boot.c b/hw/arm_boot.c index df031a5..620550b 100644 --- a/hw/arm_boot.c +++ b/hw/arm_boot.c @@ -187,6 +187,7 @@ static void main_cpu_reset(void *opaque) env-regs[15] = info-entry 0xfffe; env-thumb = info-entry 1; } else { + env-regs[15] = info-loader_start; if (old_param) { set_kernel_args_old(info, info-initrd_size, info-loader_start); This parts looks fine. diff --git a/hw/gumstix.c b/hw/gumstix.c index 3fd31f4..b64e04e 100644 --- a/hw/gumstix.c +++ b/hw/gumstix.c @@ -74,8 +74,6 @@ static void connex_init(ram_addr_t ram_size, exit(1); } - cpu-env-regs[15] = 0x; - /* Interrupt line of NIC is connected to GPIO line 36 */ smc91c111_init(nd_table[0], 0x04000300, pxa2xx_gpio_in_get(cpu-gpio)[36]); @@ -114,8 +112,6 @@ static void verdex_init(ram_addr_t ram_size, exit(1); } - cpu-env-regs[15] = 0x; - /* Interrupt line of NIC is connected to GPIO line 99 */ smc91c111_init(nd_table[0], 0x04000300, pxa2xx_gpio_in_get(cpu-gpio)[99]); diff --git a/hw/mainstone.c b/hw/mainstone.c index a4379e3..54bacfb 100644 --- a/hw/mainstone.c +++ b/hw/mainstone.c @@ -89,9 +89,6 @@ static void mainstone_common_init(ram_addr_t ram_size, cpu_register_physical_memory(0, MAINSTONE_ROM, qemu_ram_alloc(MAINSTONE_ROM) | IO_MEM_ROM); - /* Setup initial (reset) machine state */ - cpu-env-regs[15] = mainstone_binfo.loader_start; - #ifdef TARGET_WORDS_BIGENDIAN be = 1; #else diff --git a/hw/nseries.c b/hw/nseries.c index 0273eee..04a028d 100644 --- a/hw/nseries.c +++ b/hw/nseries.c @@ -1016,7 +1016,6 @@ static void n8x0_boot_init(void *opaque) n800_dss_init(s-blizzard); /* CPU setup */ - s-cpu-env-regs[15] = s-cpu-env-boot_info-loader_start; s-cpu-env-GE = 0x5; /* If the machine has a slided keyboard, open it */ @@ -1317,11 +1316,6 @@ static void n8x0_init(ram_addr_t ram_size, const char *boot_device, if (usb_enabled) n8x0_usb_setup(s); - /* Setup initial (reset) machine state */ - - /* Start at the OneNAND bootloader. */ - s-cpu-env-regs[15] = 0; - if (kernel_filename) { /* Or at the linux loader. */ binfo-kernel_filename = kernel_filename; @@ -1330,7 +1324,6 @@ static void n8x0_init(ram_addr_t ram_size, const char *boot_device, arm_load_kernel(s-cpu-env, binfo); qemu_register_reset(n8x0_boot_init, s); - n8x0_boot_init(s); } if (option_rom[0] (boot_device[0] == 'n' || !kernel_filename)) { diff --git a/hw/omap_sx1.c b/hw/omap_sx1.c index ca0a7d1..2e9879f 100644 --- a/hw/omap_sx1.c +++ b/hw/omap_sx1.c @@ -195,15 +195,10 @@ static void sx1_init(ram_addr_t ram_size, /* Load the kernel. */ if (kernel_filename) { - /* Start at bootloader. */ - cpu-env-regs[15] = sx1_binfo.loader_start; - sx1_binfo.kernel_filename = kernel_filename; sx1_binfo.kernel_cmdline = kernel_cmdline; sx1_binfo.initrd_filename = initrd_filename; arm_load_kernel(cpu-env, sx1_binfo); - } else { - cpu-env-regs[15] = 0x; } /* TODO: fix next line */ diff --git a/hw/palm.c b/hw/palm.c index ba7c398..8db133d 100644 --- a/hw/palm.c +++ b/hw/palm.c @@ -243,7 +243,6 @@ static void palmte_init(ram_addr_t ram_size, rom_size = load_image_targphys(option_rom[0], OMAP_CS0_BASE, flash_size); rom_loaded = 1; - cpu-env-regs[15] = 0x; } if (rom_size 0) { fprintf(stderr, %s: error loading '%s'\n, @@ -258,9 +257,6 @@ static void palmte_init(ram_addr_t ram_size, /* Load the kernel. */ if (kernel_filename) { - /* Start at bootloader. */ -
Re: [Qemu-devel] [PATCH] arm: fix arm kernel boot for non zero start addr
On Sat, May 29, 2010 at 08:42:52PM +0200, Lars Munch wrote: On Fri, May 28, 2010 at 9:24 PM, Aurelien Jarno aurel...@aurel32.net wrote: On Sat, May 08, 2010 at 10:43:35PM +0200, Lars Munch wrote: Booting an arm kernel has been broken a while when booting from non zero start address. This is due to the order of events: board init loads the kernel and sets register 15 to the start address and then qemu_system_reset reset the cpu making register 15 zero again. This patch fixes the usage of the register 15 start address trick in combination with arm_load_kernel. Signed-off-by: Lars Munch l...@segv.dk --- hw/arm_boot.c | 1 + hw/gumstix.c | 4 hw/mainstone.c | 3 --- hw/nseries.c | 7 --- hw/omap_sx1.c | 5 - hw/palm.c | 4 hw/spitz.c | 3 --- hw/tosa.c | 3 --- target-arm/helper.c | 1 - 9 files changed, 1 insertions(+), 30 deletions(-) diff --git a/hw/arm_boot.c b/hw/arm_boot.c index df031a5..620550b 100644 --- a/hw/arm_boot.c +++ b/hw/arm_boot.c @@ -187,6 +187,7 @@ static void main_cpu_reset(void *opaque) env-regs[15] = info-entry 0xfffe; env-thumb = info-entry 1; } else { + env-regs[15] = info-loader_start; if (old_param) { set_kernel_args_old(info, info-initrd_size, info-loader_start); This parts looks fine. diff --git a/hw/gumstix.c b/hw/gumstix.c index 3fd31f4..b64e04e 100644 --- a/hw/gumstix.c +++ b/hw/gumstix.c @@ -74,8 +74,6 @@ static void connex_init(ram_addr_t ram_size, exit(1); } - cpu-env-regs[15] = 0x; - /* Interrupt line of NIC is connected to GPIO line 36 */ smc91c111_init(nd_table[0], 0x04000300, pxa2xx_gpio_in_get(cpu-gpio)[36]); @@ -114,8 +112,6 @@ static void verdex_init(ram_addr_t ram_size, exit(1); } - cpu-env-regs[15] = 0x; - /* Interrupt line of NIC is connected to GPIO line 99 */ smc91c111_init(nd_table[0], 0x04000300, pxa2xx_gpio_in_get(cpu-gpio)[99]); diff --git a/hw/mainstone.c b/hw/mainstone.c index a4379e3..54bacfb 100644 --- a/hw/mainstone.c +++ b/hw/mainstone.c @@ -89,9 +89,6 @@ static void mainstone_common_init(ram_addr_t ram_size, cpu_register_physical_memory(0, MAINSTONE_ROM, qemu_ram_alloc(MAINSTONE_ROM) | IO_MEM_ROM); - /* Setup initial (reset) machine state */ - cpu-env-regs[15] = mainstone_binfo.loader_start; - #ifdef TARGET_WORDS_BIGENDIAN be = 1; #else diff --git a/hw/nseries.c b/hw/nseries.c index 0273eee..04a028d 100644 --- a/hw/nseries.c +++ b/hw/nseries.c @@ -1016,7 +1016,6 @@ static void n8x0_boot_init(void *opaque) n800_dss_init(s-blizzard); /* CPU setup */ - s-cpu-env-regs[15] = s-cpu-env-boot_info-loader_start; s-cpu-env-GE = 0x5; /* If the machine has a slided keyboard, open it */ @@ -1317,11 +1316,6 @@ static void n8x0_init(ram_addr_t ram_size, const char *boot_device, if (usb_enabled) n8x0_usb_setup(s); - /* Setup initial (reset) machine state */ - - /* Start at the OneNAND bootloader. */ - s-cpu-env-regs[15] = 0; - if (kernel_filename) { /* Or at the linux loader. */ binfo-kernel_filename = kernel_filename; @@ -1330,7 +1324,6 @@ static void n8x0_init(ram_addr_t ram_size, const char *boot_device, arm_load_kernel(s-cpu-env, binfo); qemu_register_reset(n8x0_boot_init, s); - n8x0_boot_init(s); } if (option_rom[0] (boot_device[0] == 'n' || !kernel_filename)) { diff --git a/hw/omap_sx1.c b/hw/omap_sx1.c index ca0a7d1..2e9879f 100644 --- a/hw/omap_sx1.c +++ b/hw/omap_sx1.c @@ -195,15 +195,10 @@ static void sx1_init(ram_addr_t ram_size, /* Load the kernel. */ if (kernel_filename) { - /* Start at bootloader. */ - cpu-env-regs[15] = sx1_binfo.loader_start; - sx1_binfo.kernel_filename = kernel_filename; sx1_binfo.kernel_cmdline = kernel_cmdline; sx1_binfo.initrd_filename = initrd_filename; arm_load_kernel(cpu-env, sx1_binfo); - } else { - cpu-env-regs[15] = 0x; } /* TODO: fix next line */ diff --git a/hw/palm.c b/hw/palm.c index ba7c398..8db133d 100644 --- a/hw/palm.c +++ b/hw/palm.c @@ -243,7 +243,6 @@ static void palmte_init(ram_addr_t ram_size, rom_size = load_image_targphys(option_rom[0], OMAP_CS0_BASE, flash_size); rom_loaded = 1; - cpu-env-regs[15] = 0x; } if (rom_size 0) { fprintf(stderr, %s:
[Qemu-devel] Re: [PATCH] savevm: Really verify if a drive supports snapshots
On Sat, May 29, 2010 at 3:06 AM, Markus Armbruster arm...@redhat.com wrote: I seem to remember that we came to the conclusion that bdrv_has_snapshot() isn't needed at all and should be dropped. Any user should be using bdrv_can_snapshot() instead as this is what they really want. Our reasoning adapted to the patched code goes like this. The remaining uses of bdrv_has_snapshot() are of the form: if (bdrv_has_snapshot(bs) some_snapshot_op() where some_snapshot_op() fails when there are no snapshots, and the code can recognize that failure as sorry, no snapshots (that's what it does before your patch, when bdrv_has_snapshot() is really no more than a necessary, but not sufficient condition for has snapshots). Therefore, we don't have to check for actual existence of snapshots with bdrv_has_snapshot(). bdrv_can_snapshot() suffices. I had this feeling too, but I was conservative and kept bdrv_has_snapshot(). I will remove it and replace by bdrv_can_snapshot() where appropriate. +int bdrv_can_snapshot(BlockDriverState *bs) { BlockDriver *drv = bs-drv; - if (!drv) + if (!drv) { + return -ENOMEDIUM; + } + + if (!drv-bdrv_snapshot_create || bdrv_is_removable(bs) || + bdrv_is_read_only(bs)) { + return -ENOTSUP; + } + + return 1; +} Returning either 1 or -errno is a strange interface. I'm not sure which Agree. of 1/0 or 0/-errno is better in this case, but I'd suggest to take one of these. Does any existing caller care for the error codes? If not, just go with 1/0. When bdrv_can_snapshot() is called the specific reason for not supporting snapshots does not matter. So, 1/0 is better. I will fix it and resend. Regards, Miguel
Re: [Qemu-devel] [PATCH] arm: fix arm kernel boot for non zero start addr
On Sat, May 29, 2010 at 8:51 PM, Aurelien Jarno aurel...@aurel32.net wrote: On Sat, May 29, 2010 at 08:42:52PM +0200, Lars Munch wrote: On Fri, May 28, 2010 at 9:24 PM, Aurelien Jarno aurel...@aurel32.net wrote: On Sat, May 08, 2010 at 10:43:35PM +0200, Lars Munch wrote: Booting an arm kernel has been broken a while when booting from non zero start address. This is due to the order of events: board init loads the kernel and sets register 15 to the start address and then qemu_system_reset reset the cpu making register 15 zero again. This patch fixes the usage of the register 15 start address trick in combination with arm_load_kernel. Signed-off-by: Lars Munch l...@segv.dk --- hw/arm_boot.c | 1 + hw/gumstix.c | 4 hw/mainstone.c | 3 --- hw/nseries.c | 7 --- hw/omap_sx1.c | 5 - hw/palm.c | 4 hw/spitz.c | 3 --- hw/tosa.c | 3 --- target-arm/helper.c | 1 - 9 files changed, 1 insertions(+), 30 deletions(-) diff --git a/hw/arm_boot.c b/hw/arm_boot.c index df031a5..620550b 100644 --- a/hw/arm_boot.c +++ b/hw/arm_boot.c @@ -187,6 +187,7 @@ static void main_cpu_reset(void *opaque) env-regs[15] = info-entry 0xfffe; env-thumb = info-entry 1; } else { + env-regs[15] = info-loader_start; if (old_param) { set_kernel_args_old(info, info-initrd_size, info-loader_start); This parts looks fine. diff --git a/hw/gumstix.c b/hw/gumstix.c index 3fd31f4..b64e04e 100644 --- a/hw/gumstix.c +++ b/hw/gumstix.c @@ -74,8 +74,6 @@ static void connex_init(ram_addr_t ram_size, exit(1); } - cpu-env-regs[15] = 0x; - /* Interrupt line of NIC is connected to GPIO line 36 */ smc91c111_init(nd_table[0], 0x04000300, pxa2xx_gpio_in_get(cpu-gpio)[36]); @@ -114,8 +112,6 @@ static void verdex_init(ram_addr_t ram_size, exit(1); } - cpu-env-regs[15] = 0x; - /* Interrupt line of NIC is connected to GPIO line 99 */ smc91c111_init(nd_table[0], 0x04000300, pxa2xx_gpio_in_get(cpu-gpio)[99]); diff --git a/hw/mainstone.c b/hw/mainstone.c index a4379e3..54bacfb 100644 --- a/hw/mainstone.c +++ b/hw/mainstone.c @@ -89,9 +89,6 @@ static void mainstone_common_init(ram_addr_t ram_size, cpu_register_physical_memory(0, MAINSTONE_ROM, qemu_ram_alloc(MAINSTONE_ROM) | IO_MEM_ROM); - /* Setup initial (reset) machine state */ - cpu-env-regs[15] = mainstone_binfo.loader_start; - #ifdef TARGET_WORDS_BIGENDIAN be = 1; #else diff --git a/hw/nseries.c b/hw/nseries.c index 0273eee..04a028d 100644 --- a/hw/nseries.c +++ b/hw/nseries.c @@ -1016,7 +1016,6 @@ static void n8x0_boot_init(void *opaque) n800_dss_init(s-blizzard); /* CPU setup */ - s-cpu-env-regs[15] = s-cpu-env-boot_info-loader_start; s-cpu-env-GE = 0x5; /* If the machine has a slided keyboard, open it */ @@ -1317,11 +1316,6 @@ static void n8x0_init(ram_addr_t ram_size, const char *boot_device, if (usb_enabled) n8x0_usb_setup(s); - /* Setup initial (reset) machine state */ - - /* Start at the OneNAND bootloader. */ - s-cpu-env-regs[15] = 0; - if (kernel_filename) { /* Or at the linux loader. */ binfo-kernel_filename = kernel_filename; @@ -1330,7 +1324,6 @@ static void n8x0_init(ram_addr_t ram_size, const char *boot_device, arm_load_kernel(s-cpu-env, binfo); qemu_register_reset(n8x0_boot_init, s); - n8x0_boot_init(s); } if (option_rom[0] (boot_device[0] == 'n' || !kernel_filename)) { diff --git a/hw/omap_sx1.c b/hw/omap_sx1.c index ca0a7d1..2e9879f 100644 --- a/hw/omap_sx1.c +++ b/hw/omap_sx1.c @@ -195,15 +195,10 @@ static void sx1_init(ram_addr_t ram_size, /* Load the kernel. */ if (kernel_filename) { - /* Start at bootloader. */ - cpu-env-regs[15] = sx1_binfo.loader_start; - sx1_binfo.kernel_filename = kernel_filename; sx1_binfo.kernel_cmdline = kernel_cmdline; sx1_binfo.initrd_filename = initrd_filename; arm_load_kernel(cpu-env, sx1_binfo); - } else { - cpu-env-regs[15] = 0x; } /* TODO: fix next line */ diff --git a/hw/palm.c b/hw/palm.c index ba7c398..8db133d 100644 --- a/hw/palm.c +++ b/hw/palm.c @@ -243,7 +243,6 @@ static void palmte_init(ram_addr_t ram_size, rom_size = load_image_targphys(option_rom[0], OMAP_CS0_BASE, flash_size); rom_loaded = 1; - cpu-env-regs[15] = 0x;
[Qemu-devel] [PATCH v2] savevm: Really verify if a drive supports snapshots
Both bdrv_can_snapshot() and bdrv_has_snapshot() does not work as advertized. First issue: Their names implies different porpouses, but they do the same thing and have exactly the same code. Maybe copied and pasted and forgotten? bdrv_has_snapshot() is called in various places for actually checking if there is snapshots or not. Second issue: the way bdrv_can_snapshot() verifies if a block driver supports or not snapshots does not catch all cases. E.g.: a raw image. So when do_savevm() is called, first thing it does is to set a global BlockDriverState to save the VM memory state calling get_bs_snapshots(). static BlockDriverState *get_bs_snapshots(void) { BlockDriverState *bs; DriveInfo *dinfo; if (bs_snapshots) return bs_snapshots; QTAILQ_FOREACH(dinfo, drives, next) { bs = dinfo-bdrv; if (bdrv_can_snapshot(bs)) goto ok; } return NULL; ok: bs_snapshots = bs; return bs; } bdrv_can_snapshot() may return a BlockDriverState that does not support snapshots and do_savevm() goes on. Later on in do_savevm(), we find: QTAILQ_FOREACH(dinfo, drives, next) { bs1 = dinfo-bdrv; if (bdrv_has_snapshot(bs1)) { /* Write VM state size only to the image that contains the state */ sn-vm_state_size = (bs == bs1 ? vm_state_size : 0); ret = bdrv_snapshot_create(bs1, sn); if (ret 0) { monitor_printf(mon, Error while creating snapshot on '%s'\n, bdrv_get_device_name(bs1)); } } } bdrv_has_snapshot(bs1) is not checking if the device does support or has snapshots as explained above. Only in bdrv_snapshot_create() the device is actually checked for snapshot support. So, in cases where the first device supports snapshots, and the second does not, the snapshot on the first will happen anyways. I believe this is not a good behavior. It should be an all or nothing process. This patch addresses these issues by making bdrv_can_snapshot() actually do what it must do and enforces better tests to avoid errors in the middle of do_savevm(). bdrv_has_snapshot() is removed and replaced by bdrv_can_snapshot() where appropriate. bdrv_can_snapshot() was moved from savevm.c to block.c. It makes more sense to me. The loadvm_state() function was updated too to enforce that when loading a VM at least all writable devices must support snapshots too. Signed-off-by: Miguel Di Ciurcio Filho miguel.fi...@gmail.com --- block.c | 21 - block.h |1 + monitor.c |5 - savevm.c | 58 -- 4 files changed, 57 insertions(+), 28 deletions(-) diff --git a/block.c b/block.c index cd70730..1115a60 100644 --- a/block.c +++ b/block.c @@ -1720,15 +1720,26 @@ void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event) /**/ /* handling of snapshots */ +int bdrv_can_snapshot(BlockDriverState *bs) +{ +BlockDriver *drv = bs-drv; +if (!drv || !drv-bdrv_snapshot_create || bdrv_is_removable(bs) || +bdrv_is_read_only(bs)) { +return 0; +} + +return 1; +} + int bdrv_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) { BlockDriver *drv = bs-drv; -if (!drv) -return -ENOMEDIUM; -if (!drv-bdrv_snapshot_create) -return -ENOTSUP; -return drv-bdrv_snapshot_create(bs, sn_info); +if (bdrv_can_snapshot(bs)) { +return drv-bdrv_snapshot_create(bs, sn_info); +} + +return -1; } int bdrv_snapshot_goto(BlockDriverState *bs, diff --git a/block.h b/block.h index 24efeb6..fbcd8af 100644 --- a/block.h +++ b/block.h @@ -173,6 +173,7 @@ int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); const char *bdrv_get_encrypted_filename(BlockDriverState *bs); void bdrv_get_backing_filename(BlockDriverState *bs, char *filename, int filename_size); +int bdrv_can_snapshot(BlockDriverState *bs); int bdrv_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info); int bdrv_snapshot_goto(BlockDriverState *bs, diff --git a/monitor.c b/monitor.c index ad50f12..8b4a1d7 100644 --- a/monitor.c +++ b/monitor.c @@ -2468,8 +2468,11 @@ static void do_loadvm(Monitor *mon, const QDict *qdict) vm_stop(0); -if (load_vmstate(name) = 0 saved_vm_running) +if (load_vmstate(name) = 0 saved_vm_running) { vm_start(); +} else { +monitor_printf(mon, Failed to load VM state. VM stopped.\n); +} } int monitor_get_fd(Monitor *mon, const char *fdname) diff --git a/savevm.c b/savevm.c index dc20390..6549ca7 100644 --- a/savevm.c +++ b/savevm.c @@ -1574,22 +1574,6 @@ out: return ret; } -/* device can contain snapshots */ -static int bdrv_can_snapshot(BlockDriverState *bs) -{ -return (bs -
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
On Sat, May 29, 2010 at 4:32 PM, Gleb Natapov g...@redhat.com wrote: On Sat, May 29, 2010 at 04:03:22PM +, Blue Swirl wrote: 2010/5/29 Gleb Natapov g...@redhat.com: On Sat, May 29, 2010 at 09:15:11AM +, Blue Swirl wrote: There is no code, because we're still at architecture design stage. Try to write test code to understand the problem better. I will. Please do ASAP. This discussion shows that you don't understand what is the problem that we are dialing with. Which part of the problem you think I don't understand? It seams to me you don't understand how Windows uses RTC for time keeping and how the QEMU solves the problem today. RTC causes periodic interrupts and Windows interrupt handler increments jiffies, like Linux? guests could also be assisted with special handling (like win2k install hack), for example guest instructions could be counted (approximately, for example using TB size or TSC) and only inject after at least N instructions have passed. Guest instructions cannot be easily counted in KVM (it can be done more or less reliably using perf counters, may be). Aren't there any debug registers or perf counters, which can generate an interrupt after some number of instructions have been executed? Don't think debug registers have something like that and they are available for guest use anyway. Perf counters differs greatly from CPU to CPU (even between two CPUs of the same manufacturer), and we want to keep using them for profiling guests. And I don't see what problem it will solve anyway that can be solved by simple delay between irq reinjection. This would allow counting the executed instructions and limit it. Thus we could emulate a 500MHz CPU on a 2GHz CPU more accurately. Why would you want to limit number of instruction executed by guest if CPU has nothing else to do anyway? The problem occurs not when we have spare cycles so give to a guest, but in opposite case. I think one problem is that the guest has executed too much compared to what would happen with real HW with a lesser CPU. That explains the RTC frequency reprogramming case. You think wrong. The problem is exactly opposite: the guest haven't had enough execution time between two time interrupts. I don't know what RTC frequency reprogramming case you are talking about here. The case you told me where N pending tick IRQs exist but the guest wants to change the RTC frequency from 64Hz to 1024Hz. Let's make this more concrete. 1 GHz CPU, initially 100Hz RTC, so 10Mcycles/tick or 10ms/tick. At T = 30Mcycles, guest wants to change the frequency to 1000Hz. The problem for emulation is that for the same 3 ticks, there has been so little execution power that the ticks have been coalesced. But isn't the guest cycle count then much lower than 30Mcyc? Isn't it so that the guest must be above 30Mcyc to be able to want the change? But if we reach that point, the problem must have not been too little execution time, but too much. Sorry I tried hard to understand what have you said above but failed. What do you mean to be able to want the change? Guest sometimes wants to get 64 timer interrupts per second and sometimes it wants to get 1024 timer interrupt per second. It wants it not as a result of time drift or anything. It's just how guest behaves. You seams to be to fixated on guest frequency change. It's just something you have to take into account when you reinject interrupts. I meant that in the scenario, the guest won't change the RTC before 30Mcyc because of some built in determinism in the guest. At that point, because of some reason, the change would happen. And even if the rate did not matter, the APIC woult still have to now about the fact that an IRQ is really periodic and does not only appear as such for a certain interval. This really does not sound like simplifying things or even make them cleaner. It would, the voodoo would be contained only in APIC, RTC would be just like any other device. With the bidirectional irqs, this voodoo would probably eventually spread to many other devices. The logical conclusion of that would be a system where all devices would be careful not to disturb the guest at wrong moment because that would trigger a bug. This voodoo will be so complex and unreliable that it will make RTC hack pale in comparison (and I still don't see how you are going to make it actually work). Implement everything inside APIC: only coalescing and reinjection. APIC has zero info needed to implement reinjection correctly as was shown to you several time in this thread and you simply keep ignoring it. On the contrary, APIC is actually the only source of the IRQ ack information.
[Qemu-devel] [Bug 587344] [NEW] gfxmenu from GRUB or GRUB4DOS hang qemu.
Public bug reported: When you run the graphical menu GRUB (gfxmenu, any theme) execution stops. Test image (iso, as an attachment): http://www.multiupload.com/AAI7MGMOLE Image make options: mkisofs -R -b grldr -no-emul-boot -boot-load-seg 0x1000 -o bootable.iso image_root/ With options and GFX menu: qemu -cdrom bootable.iso execution stops after rendering background. With options and GFX menu:: qemu -cdrom bootable.iso --no-kvm the menu is drawn, and the execution stops. ** Affects: qemu Importance: Undecided Status: New ** Tags: gfx gfxmenu grub grub4dos -- gfxmenu from GRUB or GRUB4DOS hang qemu. https://bugs.launchpad.net/bugs/587344 You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. Status in QEMU: New Bug description: When you run the graphical menu GRUB (gfxmenu, any theme) execution stops. Test image (iso, as an attachment): http://www.multiupload.com/AAI7MGMOLE Image make options: mkisofs -R -b grldr -no-emul-boot -boot-load-seg 0x1000 -o bootable.iso image_root/ With options and GFX menu: qemu -cdrom bootable.iso execution stops after rendering background. With options and GFX menu:: qemu -cdrom bootable.iso --no-kvm the menu is drawn, and the execution stops.
[Qemu-devel] [Bug 587344] Re: gfxmenu from GRUB or GRUB4DOS hang qemu.
** Attachment added: Test image (iso, gzipped) http://launchpadlibrarian.net/49329042/bootable.iso.gz -- gfxmenu from GRUB or GRUB4DOS hang qemu. https://bugs.launchpad.net/bugs/587344 You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. Status in QEMU: New Bug description: When you run the graphical menu GRUB (gfxmenu, any theme) execution stops. Test image (iso, as an attachment): http://www.multiupload.com/AAI7MGMOLE Image make options: mkisofs -R -b grldr -no-emul-boot -boot-load-seg 0x1000 -o bootable.iso image_root/ With options and GFX menu: qemu -cdrom bootable.iso execution stops after rendering background. With options and GFX menu:: qemu -cdrom bootable.iso --no-kvm the menu is drawn, and the execution stops.
[Qemu-devel] Re: [PATCH] sparc32 SuperSPARC MMU Breakpoint Action register (SS-20 OBP fix)
Thanks, applied. On Sat, May 29, 2010 at 8:48 PM, Artyom Tarasenko atar4q...@googlemail.com wrote: SuperSPARC MMU Breakpoint Action register is used by OBP at boot The patch allows booting Solaris and some other OS with SPARCStation-20 OBP. Signed-off-by: Artyom Tarasenko atar4q...@gmail.com --- target-sparc/op_helper.c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/target-sparc/op_helper.c b/target-sparc/op_helper.c index aaacfc4..ef3504f 100644 --- a/target-sparc/op_helper.c +++ b/target-sparc/op_helper.c @@ -1745,6 +1745,7 @@ uint64_t helper_ld_asi(target_ulong addr, int asi, int size, int sign) case 0x31: // Turbosparc RAM snoop case 0x32: // Turbosparc page table descriptor diagnostic case 0x39: /* data cache diagnostic register */ + case 0x4c: /* SuperSPARC MMU Breakpoint Action register */ ret = 0; break; case 0x38: /* SuperSPARC MMU Breakpoint Control Registers */ -- 1.6.2.5
[Qemu-devel] sparc mmu
2010/5/29 Blue Swirl blauwir...@gmail.com: Robert Reif did some improvements to SuperSparc emulation, but the work was not finished. That should be a good starting point. Do you mean the last patch he sent to us or are there some earlier unapplied patches? The last patch _seems_ to be mainly refactoring and disabling features which are not presented in certain models. I might have missed something of course: the patch is large. For me is also interesting what do we miss in the microSPARC implementation. If I switch off POST (which crashes due to the known FPU problems) LX/CX/X OBPs hang. Looks like it's expecting some interrupt (the SS-5 OBP escapes a similar endless loop on a timer irq), but not getting it. Do you know anything obvious qemu is missing in LX machine? -- Regards, Artyom Tarasenko solaris/sparc under qemu blog: http://tyom.blogspot.com/
Re: [Qemu-devel] [PATCH] Compile dma only once
On Fri, May 28, 2010 at 7:34 PM, Paul Brook p...@codesourcery.com wrote: Use a qemu_irq to request CPU exit. Needing to request a CPU exit at all is just wrong. See previous discussions about how any use of qemu_bh_schedule_idle is fundamentally broken. I agree for the device case. Is the attached patch then OK? No. You can't remove code without understanding why it was there in the first place. I'm pretty sure this will break FDC emulation, or at least make it unfeasibly slow. The underlying problem is that devices (in particular the FDC) don't communicate properly with the DMA engine. Instead they rely on the DMA device polling state at poorly defined intervals. Removing DMA_schedule without removing qemu_bh_schedule_idle is almost certainly wrong. My main objection to you original patch is that it introduces a new API for something that is just plain wrong. At minimum it needs a comment documenting that this function should never be used for anything - It only exists because we're too lazy to fix legacy code. But what about other uses (with the patch applied): I was just referring to device emulation. qemu_notify_event is also pretty bogus. It is a horrible hack to workaround deficiencies in the network API. Paul
Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback
On Sat, May 29, 2010 at 08:52:34PM +, Blue Swirl wrote: On Sat, May 29, 2010 at 4:32 PM, Gleb Natapov g...@redhat.com wrote: On Sat, May 29, 2010 at 04:03:22PM +, Blue Swirl wrote: 2010/5/29 Gleb Natapov g...@redhat.com: On Sat, May 29, 2010 at 09:15:11AM +, Blue Swirl wrote: There is no code, because we're still at architecture design stage. Try to write test code to understand the problem better. I will. Please do ASAP. This discussion shows that you don't understand what is the problem that we are dialing with. Which part of the problem you think I don't understand? It seams to me you don't understand how Windows uses RTC for time keeping and how the QEMU solves the problem today. RTC causes periodic interrupts and Windows interrupt handler increments jiffies, like Linux? Linux does much more complicated things than that to keep time, so the only way to fix time drift in Linux was to introduce pvclock. For Window it is not so accurate too, since Windows can change clock frequency any time it can't calculate time from jiffies, it needs to update clock at each time tick. guests could also be assisted with special handling (like win2k install hack), for example guest instructions could be counted (approximately, for example using TB size or TSC) and only inject after at least N instructions have passed. Guest instructions cannot be easily counted in KVM (it can be done more or less reliably using perf counters, may be). Aren't there any debug registers or perf counters, which can generate an interrupt after some number of instructions have been executed? Don't think debug registers have something like that and they are available for guest use anyway. Perf counters differs greatly from CPU to CPU (even between two CPUs of the same manufacturer), and we want to keep using them for profiling guests. And I don't see what problem it will solve anyway that can be solved by simple delay between irq reinjection. This would allow counting the executed instructions and limit it. Thus we could emulate a 500MHz CPU on a 2GHz CPU more accurately. Why would you want to limit number of instruction executed by guest if CPU has nothing else to do anyway? The problem occurs not when we have spare cycles so give to a guest, but in opposite case. I think one problem is that the guest has executed too much compared to what would happen with real HW with a lesser CPU. That explains the RTC frequency reprogramming case. You think wrong. The problem is exactly opposite: the guest haven't had enough execution time between two time interrupts. I don't know what RTC frequency reprogramming case you are talking about here. The case you told me where N pending tick IRQs exist but the guest wants to change the RTC frequency from 64Hz to 1024Hz. Let's make this more concrete. 1 GHz CPU, initially 100Hz RTC, so 10Mcycles/tick or 10ms/tick. At T = 30Mcycles, guest wants to change the frequency to 1000Hz. The problem for emulation is that for the same 3 ticks, there has been so little execution power that the ticks have been coalesced. But isn't the guest cycle count then much lower than 30Mcyc? Isn't it so that the guest must be above 30Mcyc to be able to want the change? But if we reach that point, the problem must have not been too little execution time, but too much. Sorry I tried hard to understand what have you said above but failed. What do you mean to be able to want the change? Guest sometimes wants to get 64 timer interrupts per second and sometimes it wants to get 1024 timer interrupt per second. It wants it not as a result of time drift or anything. It's just how guest behaves. You seams to be to fixated on guest frequency change. It's just something you have to take into account when you reinject interrupts. I meant that in the scenario, the guest won't change the RTC before 30Mcyc because of some built in determinism in the guest. At that point, because of some reason, the change would happen. I still don't understand what are you trying to say here. Guest changes frequency because of some even in the guest. It is totally independent of what happens in QEMUs RTC emulation. And even if the rate did not matter, the APIC woult still have to now about the fact that an IRQ is really periodic and does not only appear as such for a certain interval. This really does not sound like simplifying things or even make them cleaner. It would, the voodoo would be contained only in APIC, RTC would be just like any other device. With the bidirectional irqs, this voodoo would probably eventually spread to many other devices. The