[Qemu-devel] Doubts on SMP, VCPU and CONFIG_IOTHREAD
Sorry for being so confused, I am sure that there is some manual which I haven't read, but I am not able to find it :-\ I saw some things[1] about multiple vcpu, smp and things like that. It seemed to me that --enable-io-thread enables it. But, it only works for KVM, doesn't it? I assume that there is NOT one thread per vcpu in TCG mode. And this --enable-io-thread now is the default? This option is always active? Now I was wondering if something parallel is done in TCG (maybe through coroutines?). No truly parallel, I know, but... logically parallel if you know what I mean. I have been unable to find it on the code. Also, the problem on multithreading the execution of qemu-system is the translation, right? TCG is not thread safe and cannot be run in a parallel mode. Right? [1] http://blog.vmsplice.net/2011/03/qemu-internals-overall-architecture-and.html
Re: [Qemu-devel] [PATCH v6 2/4] exynos4210: Added SD host controller model
On 09/18/2012 06:41 AM, Peter Crosthwaite wrote: Ping! Igor, are you able to provide a diff of this patch so I can send the next revision? Sure, but I still don't understand what to do with QEMU-lockup issue, I believe the same topic was discussed here http://thread.gmane.org/gmane.comp.emulators.qemu/169524, and the decision was to use runtime loop detection? Regards, Peter On Mon, 2012-08-06 at 16:29 +0400, Igor Mitsyanko wrote: On 08/06/2012 02:56 PM, Peter Maydell wrote: On 6 August 2012 04:25, Peter A. G. Crosthwaite peter.crosthwa...@petalogix.com wrote: +static uint64_t +exynos4210_sdhci_readfn(void *opaque, target_phys_addr_t offset, unsigned size) +{ +Exynos4SDHCIState *s = (Exynos4SDHCIState *)opaque; +uint32_t ret; + +switch (offset ~0x3) { +case SDHC_BDATA: +/* Buffer data port read can be disabled by CONTROL2 register */ +if (s-control2 EXYNOS4_SDHC_DISBUFRD) { +ret = 0; +} else { +ret = SDHCI_GET_CLASS(s)-mem_read(SDHCI(s), offset, size); +} +break; +case SDHC_ADMAERR: +ret = (s-admaerr 8 * (offset - SDHC_ADMAERR)) +((1 8 * size) - 1); If size == 4 you've just shifted right by 32, which is undefined behaviour when ints are 32 bits. Try ret = extract32(s-admaerr, (offset 3) 3, size * 8); and similarly below. Ok +static void exynos4210_sdhci_writefn(void *opaque, target_phys_addr_t offset, +uint64_t val, unsigned size) +{ +Exynos4SDHCIState *s = (Exynos4SDHCIState *)opaque; +SDHCIState *sdhci = SDHCI(s); +unsigned shift; + +DPRINT_L2(write %ub: addr[0x%04x] - %u(0x%x)\n, size, (uint32_t)offset, +(uint32_t)val, (uint32_t)val); + +switch (offset) { +case SDHC_CLKCON: +if ((val SDHC_CLOCK_SDCLK_EN) +(sdhci-prnsts SDHC_CARD_PRESENT)) { +val |= EXYNOS4_SDHC_SDCLK_STBL; +} else { +val = ~EXYNOS4_SDHC_SDCLK_STBL; +} +/* Break out to superclass write to handle the rest of this register */ +break; +case EXYNOS4_SDHC_CONTROL2 ... EXYNOS4_SDHC_CONTROL2 + 3: Why do we switch (offset 3) in the readfn but switch (offset) and use case FOO ... FOO + 3 in the writefn? Consistency would be nice. I think I'll change readfn() switch to match writefn then, to avoid complicating SDHC_CLKON case. +shift = (offset - EXYNOS4_SDHC_CONTROL2) * 8; +s-control2 = (s-control2 ~(((1 8 * size) - 1) shift)) | +(val shift); s-control2 = deposit32(s-control2, (offset 3) 3, size * 8, val); and similarly below. +case SDHC_ADMAERR ... SDHC_ADMAERR + 3: +if (size == 4 || (size == 2 offset == SDHC_ADMAERR) || +(size == 1 offset == (SDHC_ADMAERR + 1))) { +uint32_t mask = 0; + +if (size == 2) { +mask = 0x; +} else if (size == 1) { +mask = 0x00FF; +val = 8; +} + +s-admaerr = (s-admaerr (mask | EXYNOS4_SDHC_FINAL_BLOCK | + EXYNOS4_SDHC_IRQ_STAT)) | (val ~(EXYNOS4_SDHC_FINAL_BLOCK | + EXYNOS4_SDHC_IRQ_STAT | EXYNOS4_SDHC_CONTINUE_REQ)); +s-admaerr = ~(val EXYNOS4_SDHC_IRQ_STAT); +if ((s-stopped_adma) (val EXYNOS4_SDHC_CONTINUE_REQ) +(SDHC_DMA_TYPE(sdhci-hostctl) == SDHC_CTRL_ADMA2_32)) { +s-stopped_adma = false; +SDHCI_GET_CLASS(sdhci)-do_adma(sdhci); +} +} else { +uint32_t mask = (1 (size * 8)) - 1; +shift = 8 * (offset 0x3); +val = shift; +mask = ~(mask shift); +s-admaerr = (s-admaerr mask) | val; +} +return; This case just looks odd. I think it would be clearer to first calculate the updated value of admaerr (using deposit32) and then act on the changes (xor of old and new value is handy to identify which bits are changed). ok -- PMM
[Qemu-devel] [PATCH 7/9] fbdev: move to pixman
Stop reinventing the wheel. Use the pixman library for raster ops. Signed-off-by: Gerd Hoffmann kra...@redhat.com --- configure | 12 ui/fbdev.c | 172 +++ 2 files changed, 114 insertions(+), 70 deletions(-) diff --git a/configure b/configure index c4ba338..d10ff78 100755 --- a/configure +++ b/configure @@ -148,6 +148,7 @@ docs= fdt= nptl= sdl= +pixman= fbdev=no virtfs= vnc=yes @@ -2153,6 +2154,17 @@ else exit 1 fi +if $pkg_config pixman-1 /dev/null 21 +then +pixman=yes +pixman_cflags=`$pkg_config --cflags pixman-1 2/dev/null` +pixman_libs=`$pkg_config --libs pixman-1 2/dev/null` +QEMU_CFLAGS=$QEMU_CFLAGS $pixman_cflags +libs_softmmu=$libs_softmmu $pixman_libs +else +fbdev=no +fi + ## # libcap probe diff --git a/ui/fbdev.c b/ui/fbdev.c index 40fc7d4..4cb4d1d 100644 --- a/ui/fbdev.c +++ b/ui/fbdev.c @@ -23,11 +23,12 @@ #include linux/vt.h #include linux/fb.h +#include pixman.h + #include qemu-common.h #include console.h #include keymaps.h #include sysemu.h -#include pflib.h /* * must be last so we get the linux input layer @@ -70,19 +71,82 @@ static bool key_down[KEY_CNT]; #define FB_ACQ_REQ 3 static int fb_switch_state; -/* qdev windup */ +/* qemu windup */ static DisplayChangeListener *dcl; -static QemuPfConv *conv; -static PixelFormatfbpf; static intresize_screen; static intredraw_screen; static intcx, cy, cw, ch; static Notifier exit_notifier; +static pixman_image_t *surface; +static pixman_image_t *framebuffer; +static pixman_transform_t transform; +static pixman_region16_t dirty; /* fwd decls */ static int fbdev_activate_vt(int tty, int vtno, bool wait); /* */ +/* pixman helpers */ + +static int pixman_shifts_to_type(int rshift, int gshift, int bshift) +{ +int type = PIXMAN_TYPE_OTHER; + +if (rshift gshift gshift bshift) { +if (bshift == 0) { +type = PIXMAN_TYPE_ARGB; +} else { +#if PIXMAN_VERSION = PIXMAN_VERSION_ENCODE(0, 21, 8) +type = PIXMAN_TYPE_RGBA; +#endif +} +} else if (rshift gshift gshift bshift) { +if (rshift == 0) { +type = PIXMAN_TYPE_ABGR; +} else { +type = PIXMAN_TYPE_BGRA; +} +} +return type; +} + +static pixman_image_t *pixman_from_displaystate(DisplayState *ds) +{ +PixelFormat *pf = ds-surface-pf; +pixman_format_code_t format; +pixman_image_t *image; +int type; + +type = pixman_shifts_to_type(pf-rshift, pf-gshift, pf-bshift); +format = PIXMAN_FORMAT(pf-bits_per_pixel, type, + pf-abits, pf-rbits, pf-gbits, pf-bbits); +image = pixman_image_create_bits(format, ds_get_width(ds), + ds_get_height(ds), + (void *)ds_get_data(ds), + ds_get_linesize(ds)); +return image; +} + +static pixman_image_t *pixman_from_framebuffer(void) +{ +pixman_format_code_t format; +pixman_image_t *image; +int type; + +type = pixman_shifts_to_type(fb_var.red.offset, + fb_var.green.offset, + fb_var.blue.offset); +format = PIXMAN_FORMAT(fb_var.bits_per_pixel, type, + fb_var.transp.length, + fb_var.red.length, + fb_var.green.length, + fb_var.blue.length); +image = pixman_image_create_bits(format, fb_var.xres, fb_var.yres, + (void *)fb_mem, fb_fix.line_length); +return image; +} + +/* */ /* mouse*/ static void read_mouse(void *opaque) @@ -529,6 +593,17 @@ static void fbdev_cleanup(void) { trace_fbdev_cleanup(); +/* release pixman stuff */ +pixman_region_fini(dirty); +if (framebuffer) { +pixman_image_unref(framebuffer); +framebuffer = NULL; +} +if (surface) { +pixman_image_unref(surface); +surface = NULL; +} + /* restore console */ if (fb_mem != NULL) { munmap(fb_mem, fb_fix.smem_len+fb_mem_offset); @@ -681,36 +756,8 @@ static int fbdev_init(const char *device) start_mediumraw(tty); qemu_set_fd_handler(tty, read_mediumraw, NULL, NULL); -/* create PixelFormat from fbdev structs */ -fbpf.bits_per_pixel = fb_var.bits_per_pixel; -fbpf.bytes_per_pixel = (fb_var.bits_per_pixel+7)/8; -
[Qemu-devel] [PATCH 9/9] fbdev: add display scaling support
Add support for scaling the guest display. Ctrl-Alt-S hotkey toggles scaling. Signed-off-by: Gerd Hoffmann kra...@redhat.com --- ui/fbdev.c | 61 ++- 1 files changed, 51 insertions(+), 10 deletions(-) diff --git a/ui/fbdev.c b/ui/fbdev.c index 6835fef..55793ab 100644 --- a/ui/fbdev.c +++ b/ui/fbdev.c @@ -81,6 +81,7 @@ static pixman_image_t *surface; static pixman_image_t *framebuffer; static pixman_transform_t transform; static pixman_region16_t dirty; +static double scale; static QEMUCursor *ptr_cursor; static pixman_image_t *ptr_image; @@ -88,6 +89,10 @@ static intptr_refresh; static intpx, py, pw, ph; static intmx, my, mon; +/* options */ +static intuse_scale = 1; +static pixman_filter_tpfilter = PIXMAN_FILTER_GOOD; + /* fwd decls */ static int fbdev_activate_vt(int tty, int vtno, bool wait); @@ -182,13 +187,14 @@ static void read_mouse(void *opaque) if (ay 0) { ay = 0; } -if (ax = cw) { -ax = cw-1; +if (ax = cw*scale) { +ax = cw*scale-1; } -if (ay = ch) { -ay = ch-1; +if (ay = ch*scale) { +ay = ch*scale-1; } -kbd_mouse_event(ax * 0x7FFF / cw, ay * 0x7FFF / ch, 0, b); +kbd_mouse_event(ax * 0x7FFF / (cw*scale), +ay * 0x7FFF / (ch*scale), 0, b); } else { kbd_mouse_event(x, y, 0, b); } @@ -543,6 +549,12 @@ static void read_mediumraw(void *opaque) (ctrl-alt-esc) ===\n); exit(1); } +if (keycode == KEY_S) { +use_scale = !use_scale; +resize_screen++; +redraw_screen++; +continue; +} if (keycode = KEY_F1 keycode = KEY_F10) { fbdev_activate_vt(tty, keycode+1-KEY_F1, false); key_down[keycode] = false; @@ -912,6 +924,11 @@ static void fbdev_render_ptr(DisplayState *ds) pixman_transform_translate(transform, NULL, pixman_int_to_fixed(-cx), pixman_int_to_fixed(-cy)); +if (use_scale) { +pixman_transform_scale(transform, NULL, + pixman_double_to_fixed(1/scale), + pixman_double_to_fixed(1/scale)); +} pixman_transform_translate(transform, NULL, pixman_int_to_fixed(-px), pixman_int_to_fixed(-py)); @@ -937,16 +954,32 @@ static void fbdev_update(DisplayState *ds, int x, int y, int w, int h) } if (resize_screen) { +double xs, ys; + trace_fbdev_dpy_resize(ds_get_width(ds), ds_get_height(ds)); resize_screen = 0; cx = 0; cy = 0; cw = ds_get_width(ds); ch = ds_get_height(ds); -if (ds_get_width(ds) fb_var.xres) { -cx = (fb_var.xres - ds_get_width(ds)) / 2; -} -if (ds_get_height(ds) fb_var.yres) { -cy = (fb_var.yres - ds_get_height(ds)) / 2; + +if (use_scale) { +xs = (double)fb_var.xres / cw; +ys = (double)fb_var.yres / ch; +if (xs ys) { +scale = ys; +cx = (fb_var.xres - ds_get_width(ds)*scale) / 2; +} else { +scale = xs; +cy = (fb_var.yres - ds_get_height(ds)*scale) / 2; +} +} else { +scale = 1; +if (ds_get_width(ds) fb_var.xres) { +cx = (fb_var.xres - ds_get_width(ds)) / 2; +} +if (ds_get_height(ds) fb_var.yres) { +cy = (fb_var.yres - ds_get_height(ds)) / 2; +} } if (surface) { pixman_image_unref(surface); @@ -957,7 +990,14 @@ static void fbdev_update(DisplayState *ds, int x, int y, int w, int h) pixman_transform_translate(transform, NULL, pixman_int_to_fixed(-cx), pixman_int_to_fixed(-cy)); +if (use_scale) { +pixman_transform_scale(transform, NULL, + pixman_double_to_fixed(1/scale), + pixman_double_to_fixed(1/scale)); +} pixman_image_set_transform(surface, transform); + +pixman_image_set_filter(surface, pfilter, NULL, 0); } if (redraw_screen) { @@ -1049,6 +1089,7 @@ static void fbdev_cursor_define(DisplayState *ds, QEMUCursor *cursor) cursor-width, cursor-height,
[Qemu-devel] [PATCH 4/9] fbdev: add linux framebuffer display driver.
Display works, requires truecolor framebuffer with 16 or 32 bpp on the host. 32bpp is recommended. The framebuffer is used as-is, qemu doesn't try to switch modes. With LCD displays mode switching is pretty pointless IMHO, also it wouldn't work anyway with the most common fbdev drivers (vesafb, KMS). Guest display is centered on the host screen. Mouse works, uses /dev/input/mice. Keyboard works. Guest screen has whatever keymap you load inside the guest. Text windows (monitor, serial, ...) have a simple en-us keymap. Good enough to type monitor commands. Not goot enough to work seriously on a serial terminal. But the qemu terminal emulation isn't good enough for that anyway ;) Hot keys: Ctrl-Alt-Fnr - host console switching. Ctrl-Alt-nr - qemu console switching. Ctrl-Alt-ESC- exit qemu. Special feature: Sane console switching. Switching away stops screen updates. Switching back redraws the screen. When started from the linux console qemu uses the vt you've started it from (requires just read/write access to /dev/fb0). When starting from somewhere else qemu tries to open a unused virtual terminal and switch to it (usually requires root privileges to open /dev/ttynr). Signed-off-by: Gerd Hoffmann kra...@redhat.com --- console.h |4 + qemu-options.hx |8 + sysemu.h|1 + trace-events| 15 + ui/Makefile.objs|1 + ui/fbdev.c | 974 +++ ui/linux-keynames.h | 388 vl.c| 12 + 8 files changed, 1403 insertions(+), 0 deletions(-) create mode 100644 ui/fbdev.c create mode 100644 ui/linux-keynames.h diff --git a/console.h b/console.h index bef2d2d..0a3bae2 100644 --- a/console.h +++ b/console.h @@ -417,6 +417,10 @@ void qemu_console_copy(DisplayState *ds, int src_x, int src_y, /* sdl.c */ void sdl_display_init(DisplayState *ds, int full_screen, int no_frame); +/* fbdev.c */ +int fbdev_display_init(DisplayState *ds, const char *device); +void fbdev_display_uninit(DisplayState *ds); + /* cocoa.m */ void cocoa_display_init(DisplayState *ds, int full_screen); diff --git a/qemu-options.hx b/qemu-options.hx index 09c86c4..3445655 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -947,6 +947,14 @@ Enable/disable spice seamless migration. Default is off. @end table ETEXI +DEF(fbdev, 0, QEMU_OPTION_fbdev, +-fbdev enable fbdev\n, QEMU_ARCH_ALL) +STEXI +@item -fbdev +@findex -fbdev +Enable fbdev (linux framebuffer). +ETEXI + DEF(portrait, 0, QEMU_OPTION_portrait, -portrait rotate graphical output 90 deg left (only PXA LCD)\n, QEMU_ARCH_ALL) diff --git a/sysemu.h b/sysemu.h index 65552ac..34e6bfa 100644 --- a/sysemu.h +++ b/sysemu.h @@ -93,6 +93,7 @@ typedef enum DisplayType DT_DEFAULT, DT_CURSES, DT_SDL, +DT_FBDEV, DT_NOGRAPHIC, DT_NONE, } DisplayType; diff --git a/trace-events b/trace-events index b48fe2d..0d0b7fa 100644 --- a/trace-events +++ b/trace-events @@ -994,3 +994,18 @@ spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req) func %u, requested % spapr_pci_rtas_ibm_query_interrupt_source_number(unsigned ioa, unsigned intr) queries for #%u, IRQ%u spapr_pci_msi_write(uint64_t addr, uint64_t data, uint32_t dt_irq) @%PRIx64=%PRIx64 IRQ %u spapr_pci_lsi_set(const char *busname, int pin, uint32_t irq) %s PIN%d IRQ %u + +# ui/fbdev.c +fbdev_enabled(void) +fbdev_cleanup(void) +fbdev_vt_activate(int vtno, int wait) vtno %d, wait %d +fbdev_vt_activated(void) +fbdev_vt_release_request(void) +fbdev_vt_released(void) +fbdev_vt_aquire_request(void) +fbdev_vt_aquired(void) +fbdev_kbd_raw(int enable) enable %d +fbdev_kbd_event(int keycode, const char *kname, int up) keycode 0x%x [%s], down %d +fbdev_dpy_resize(int w, int h) %dx%d +fbdev_dpy_redraw(void) + diff --git a/ui/Makefile.objs b/ui/Makefile.objs index adc07be..55ddcf2 100644 --- a/ui/Makefile.objs +++ b/ui/Makefile.objs @@ -12,3 +12,4 @@ common-obj-$(CONFIG_SDL) += sdl.o sdl_zoom.o x_keymap.o common-obj-$(CONFIG_COCOA) += cocoa.o common-obj-$(CONFIG_CURSES) += curses.o common-obj-$(CONFIG_VNC) += $(vnc-obj-y) +common-obj-$(CONFIG_LINUX) += fbdev.o diff --git a/ui/fbdev.c b/ui/fbdev.c new file mode 100644 index 000..40fc7d4 --- /dev/null +++ b/ui/fbdev.c @@ -0,0 +1,974 @@ +/* + * linux fbdev output driver. + * + * Author: Gerd Hoffmann kra...@redhat.com + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ +#include stdio.h +#include stdlib.h +#include stdbool.h +#include string.h +#include unistd.h +#include fcntl.h +#include signal.h +#include termios.h + +#include sys/ioctl.h +#include sys/mman.h + +#include linux/kd.h +#include linux/vt.h +#include linux/fb.h + +#include qemu-common.h +#include console.h +#include keymaps.h +#include sysemu.h +#include pflib.h + +/* + * must be last so we get the
[Qemu-devel] [PATCH 2/9] add unregister_displaychangelistener
Also change the way the gui_timer is initialized: each time a displaychangelistener is registered or unregistered we'll check whenever we need a timer (due to dpy_refresh callback being present) and if so setup a timer, otherwise zap it. This way the gui timer works correctly with displaychangelisteners coming and going. Signed-off-by: Gerd Hoffmann kra...@redhat.com --- console.h | 10 ++ vl.c | 31 +++ 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/console.h b/console.h index 646ad4b..48fef22 100644 --- a/console.h +++ b/console.h @@ -229,9 +229,19 @@ static inline int is_buffer_shared(DisplaySurface *surface) !(surface-flags QEMU_REALPIXELS_FLAG)); } +void gui_setup_refresh(DisplayState *ds); + static inline void register_displaychangelistener(DisplayState *ds, DisplayChangeListener *dcl) { QLIST_INSERT_HEAD(ds-listeners, dcl, next); +gui_setup_refresh(ds); +} + +static inline void unregister_displaychangelistener(DisplayState *ds, +DisplayChangeListener *dcl) +{ +QLIST_REMOVE(dcl, next); +gui_setup_refresh(ds); } static inline void dpy_update(DisplayState *s, int x, int y, int w, int h) diff --git a/vl.c b/vl.c index 2a7c92a..fbb77fe 100644 --- a/vl.c +++ b/vl.c @@ -1288,6 +1288,29 @@ static void gui_update(void *opaque) qemu_mod_timer(ds-gui_timer, interval + qemu_get_clock_ms(rt_clock)); } +void gui_setup_refresh(DisplayState *ds) +{ +DisplayChangeListener *dcl; +bool need_timer = false; + +QLIST_FOREACH(dcl, ds-listeners, next) { +if (dcl-dpy_refresh != NULL) { +need_timer = true; +break; +} +} + +if (need_timer ds-gui_timer == NULL) { +ds-gui_timer = qemu_new_timer_ms(rt_clock, gui_update, ds); +qemu_mod_timer(ds-gui_timer, qemu_get_clock_ms(rt_clock)); +} +if (!need_timer ds-gui_timer != NULL) { +qemu_del_timer(ds-gui_timer); +qemu_free_timer(ds-gui_timer); +ds-gui_timer = NULL; +} +} + struct vm_change_state_entry { VMChangeStateHandler *cb; void *opaque; @@ -2350,7 +2373,6 @@ int main(int argc, char **argv, char **envp) const char *kernel_filename, *kernel_cmdline; char boot_devices[33] = cad; /* default to HD-floppy-CD-ROM */ DisplayState *ds; -DisplayChangeListener *dcl; int cyls, heads, secs, translation; QemuOpts *hda_opts = NULL, *opts, *machine_opts; QemuOptsList *olist; @@ -3698,13 +3720,6 @@ int main(int argc, char **argv, char **envp) /* display setup */ dpy_resize(ds); -QLIST_FOREACH(dcl, ds-listeners, next) { -if (dcl-dpy_refresh != NULL) { -ds-gui_timer = qemu_new_timer_ms(rt_clock, gui_update, ds); -qemu_mod_timer(ds-gui_timer, qemu_get_clock_ms(rt_clock)); -break; -} -} text_consoles_set_display(ds); if (foreach_device_config(DEV_GDB, gdbserver_start) 0) { -- 1.7.1
[Qemu-devel] [PATCH 3/9] move set_mouse + cursor_define callbacks
When adding DisplayChangeListeners the set_mouse and cursor_define callbacks have been left in DisplayState for some reason. Fix it. Signed-off-by: Gerd Hoffmann kra...@redhat.com --- console.c |2 +- console.h | 39 +++ hw/jazz_led.c |2 +- hw/qxl-render.c|2 +- hw/vga.c | 10 +- hw/vmware_vga.c| 11 ++- ui/sdl.c |8 ui/spice-display.c |4 ++-- ui/vnc.c |8 9 files changed, 59 insertions(+), 27 deletions(-) diff --git a/console.c b/console.c index a8bcc42..cc0479b 100644 --- a/console.c +++ b/console.c @@ -1239,7 +1239,7 @@ static void text_console_update(void *opaque, console_ch_t *chardata) s-text_y[1] = 0; } if (s-cursor_invalidate) { -dpy_cursor(s-ds, s-x, s-y); +dpy_text_cursor(s-ds, s-x, s-y); s-cursor_invalidate = 0; } } diff --git a/console.h b/console.h index 48fef22..bef2d2d 100644 --- a/console.h +++ b/console.h @@ -164,6 +164,9 @@ struct DisplayChangeListener { int w, int h, uint32_t c); void (*dpy_text_cursor)(struct DisplayState *s, int x, int y); +void (*dpy_mouse_set)(struct DisplayState *s, int x, int y, int on); +void (*dpy_cursor_define)(struct DisplayState *s, QEMUCursor *cursor); + QLIST_ENTRY(DisplayChangeListener) next; }; @@ -181,9 +184,6 @@ struct DisplayState { struct DisplayAllocator* allocator; QLIST_HEAD(, DisplayChangeListener) listeners; -void (*mouse_set)(int x, int y, int on); -void (*cursor_define)(QEMUCursor *cursor); - struct DisplayState *next; }; @@ -304,7 +304,7 @@ static inline void dpy_fill(struct DisplayState *s, int x, int y, } } -static inline void dpy_cursor(struct DisplayState *s, int x, int y) +static inline void dpy_text_cursor(struct DisplayState *s, int x, int y) { struct DisplayChangeListener *dcl; QLIST_FOREACH(dcl, s-listeners, next) { @@ -314,6 +314,37 @@ static inline void dpy_cursor(struct DisplayState *s, int x, int y) } } +static inline void dpy_mouse_set(struct DisplayState *s, int x, int y, int on) +{ +struct DisplayChangeListener *dcl; +QLIST_FOREACH(dcl, s-listeners, next) { +if (dcl-dpy_mouse_set) { +dcl-dpy_mouse_set(s, x, y, on); +} +} +} + +static inline void dpy_cursor_define(struct DisplayState *s, QEMUCursor *cursor) +{ +struct DisplayChangeListener *dcl; +QLIST_FOREACH(dcl, s-listeners, next) { +if (dcl-dpy_cursor_define) { +dcl-dpy_cursor_define(s, cursor); +} +} +} + +static inline bool dpy_cursor_define_supported(struct DisplayState *s) +{ +struct DisplayChangeListener *dcl; +QLIST_FOREACH(dcl, s-listeners, next) { +if (dcl-dpy_cursor_define) { +return true; +} +} +return false; +} + static inline int ds_get_linesize(DisplayState *ds) { return ds-surface-linesize; diff --git a/hw/jazz_led.c b/hw/jazz_led.c index 6486523..c4d54e2 100644 --- a/hw/jazz_led.c +++ b/hw/jazz_led.c @@ -210,7 +210,7 @@ static void jazz_led_text_update(void *opaque, console_ch_t *chardata) LedState *s = opaque; char buf[2]; -dpy_cursor(s-ds, -1, -1); +dpy_text_cursor(s-ds, -1, -1); qemu_console_resize(s-ds, 2, 1); /* TODO: draw the segments */ diff --git a/hw/qxl-render.c b/hw/qxl-render.c index e2e3fe2..085a090 100644 --- a/hw/qxl-render.c +++ b/hw/qxl-render.c @@ -238,7 +238,7 @@ int qxl_render_cursor(PCIQXLDevice *qxl, QXLCommandExt *ext) return 1; } -if (!qxl-ssd.ds-mouse_set || !qxl-ssd.ds-cursor_define) { +if (!dpy_cursor_define_supported(qxl-ssd.ds)) { return 0; } diff --git a/hw/vga.c b/hw/vga.c index afaef0d..ec4f0c5 100644 --- a/hw/vga.c +++ b/hw/vga.c @@ -2081,11 +2081,11 @@ static void vga_update_text(void *opaque, console_ch_t *chardata) s-cr[VGA_CRTC_CURSOR_END] != s-cursor_end || full_update) { cursor_visible = !(s-cr[VGA_CRTC_CURSOR_START] 0x20); if (cursor_visible cursor_offset size cursor_offset = 0) -dpy_cursor(s-ds, - TEXTMODE_X(cursor_offset), - TEXTMODE_Y(cursor_offset)); +dpy_text_cursor(s-ds, +TEXTMODE_X(cursor_offset), +TEXTMODE_Y(cursor_offset)); else -dpy_cursor(s-ds, -1, -1); +dpy_text_cursor(s-ds, -1, -1); s-cursor_offset = cursor_offset; s-cursor_start = s-cr[VGA_CRTC_CURSOR_START]; s-cursor_end = s-cr[VGA_CRTC_CURSOR_END]; @@ -2146,7 +2146,7 @@ static void vga_update_text(void *opaque, console_ch_t *chardata) /* Display a message */ s-last_width = 60; s-last_height = height = 3; -dpy_cursor(s-ds, -1, -1); +
[Qemu-devel] [PATCH 6/9] fbdev: make configurable at compile time.
Add CONFIG_FBDEV, add --enable-fbdev and --disable-fbdev configure switches so fbdev can be enabled/disabled at compile time. Signed-off-by: Gerd Hoffmann kra...@redhat.com --- configure| 12 qmp.c|2 +- ui/Makefile.objs |2 +- vl.c |4 ++-- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/configure b/configure index 8564142..c4ba338 100755 --- a/configure +++ b/configure @@ -148,6 +148,7 @@ docs= fdt= nptl= sdl= +fbdev=no virtfs= vnc=yes sparse=no @@ -527,6 +528,7 @@ Haiku) usb=linux kvm=yes vhost_net=yes + fbdev=yes if [ $cpu = i386 -o $cpu = x86_64 ] ; then audio_possible_drivers=$audio_possible_drivers fmod fi @@ -658,6 +660,10 @@ for opt do ;; --enable-sdl) sdl=yes ;; + --disable-fbdev) fbdev=no + ;; + --enable-fbdev) fbdev=yes + ;; --disable-virtfs) virtfs=no ;; --enable-virtfs) virtfs=yes @@ -1070,6 +1076,8 @@ echo --disable-strip disable stripping binaries echo --disable-werror disable compilation abort on warning echo --disable-sdldisable SDL echo --enable-sdl enable SDL +echo --disable-fbdev disable linux framebuffer +echo --enable-fbdev enable linux framebuffer echo --disable-virtfs disable VirtFS echo --enable-virtfs enable VirtFS echo --disable-vncdisable VNC @@ -3159,6 +3167,7 @@ if test $darwin = yes ; then echo Cocoa support $cocoa fi echo SDL support $sdl +echo fbdev support $fbdev echo curses support$curses echo curl support $curl echo mingw32 support $mingw32 @@ -3367,6 +3376,9 @@ if test $sdl = yes ; then echo CONFIG_SDL=y $config_host_mak echo SDL_CFLAGS=$sdl_cflags $config_host_mak fi +if test $fbdev = yes ; then + echo CONFIG_FBDEV=y $config_host_mak +fi if test $cocoa = yes ; then echo CONFIG_COCOA=y $config_host_mak fi diff --git a/qmp.c b/qmp.c index 7f6cc0b..060d804 100644 --- a/qmp.c +++ b/qmp.c @@ -393,7 +393,7 @@ void qmp_change(const char *device, const char *target, void qmp_fbdev(bool enable, Error **errp) { -#if defined(CONFIG_LINUX) +#if defined(CONFIG_FBDEV) DisplayState *ds = get_displaystate(); if (enable) { diff --git a/ui/Makefile.objs b/ui/Makefile.objs index 55ddcf2..479cd01 100644 --- a/ui/Makefile.objs +++ b/ui/Makefile.objs @@ -12,4 +12,4 @@ common-obj-$(CONFIG_SDL) += sdl.o sdl_zoom.o x_keymap.o common-obj-$(CONFIG_COCOA) += cocoa.o common-obj-$(CONFIG_CURSES) += curses.o common-obj-$(CONFIG_VNC) += $(vnc-obj-y) -common-obj-$(CONFIG_LINUX) += fbdev.o +common-obj-$(CONFIG_FBDEV) += fbdev.o diff --git a/vl.c b/vl.c index 18982b2..d39352c 100644 --- a/vl.c +++ b/vl.c @@ -3041,7 +3041,7 @@ int main(int argc, char **argv, char **envp) fprintf(stderr, SDL support is disabled\n); exit(1); #endif -#ifdef CONFIG_LINUX +#ifdef CONFIG_FBDEV case QEMU_OPTION_fbdev: display_type = DT_FBDEV; break; @@ -3686,7 +3686,7 @@ int main(int argc, char **argv, char **envp) curses_display_init(ds, full_screen); break; #endif -#if defined(CONFIG_LINUX) +#if defined(CONFIG_FBDEV) case DT_FBDEV: if (fbdev_display_init(ds, NULL) != 0) { exit(1); -- 1.7.1
[Qemu-devel] [PATCH 5/9] fbdev: add monitor command to enable/disable
This patch adds a fbdev monitor command to enable/disable the fbdev display at runtime to both qmp and hmp. qmp: fbdev enable=on|off hmp: fbdev on|off Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hmp-commands.hx | 15 +++ hmp.c|9 + hmp.h|1 + qapi-schema.json | 14 ++ qmp-commands.hx |6 ++ qmp.c| 17 + 6 files changed, 62 insertions(+), 0 deletions(-) diff --git a/hmp-commands.hx b/hmp-commands.hx index ed67e99..366a92b 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1377,6 +1377,21 @@ passed since 1970, i.e. unix epoch. ETEXI { +.name = fbdev, +.args_type = enable:b, +.params = on|off, +.help = enable/disable fbdev, +.mhandler.cmd = hmp_fbdev, +}, + +STEXI +@item fbdev on | off +@findex fbdev + +enable/disable fbdev +ETEXI + +{ .name = info, .args_type = item:s?, .params = [subcommand], diff --git a/hmp.c b/hmp.c index ba6fbd3..a7feec5 100644 --- a/hmp.c +++ b/hmp.c @@ -1168,3 +1168,12 @@ void hmp_screen_dump(Monitor *mon, const QDict *qdict) qmp_screendump(filename, err); hmp_handle_error(mon, err); } + +void hmp_fbdev(Monitor *mon, const QDict *qdict) +{ +int enable = qdict_get_bool(qdict, enable); +Error *errp = NULL; + +qmp_fbdev(enable, errp); +hmp_handle_error(mon, errp); +} diff --git a/hmp.h b/hmp.h index 48b9c59..9c3d315 100644 --- a/hmp.h +++ b/hmp.h @@ -73,5 +73,6 @@ void hmp_getfd(Monitor *mon, const QDict *qdict); void hmp_closefd(Monitor *mon, const QDict *qdict); void hmp_send_key(Monitor *mon, const QDict *qdict); void hmp_screen_dump(Monitor *mon, const QDict *qdict); +void hmp_fbdev(Monitor *mon, const QDict *qdict); #endif diff --git a/qapi-schema.json b/qapi-schema.json index 14e4419..901c2e8 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -2619,3 +2619,17 @@ # Since: 0.14.0 ## { 'command': 'screendump', 'data': {'filename': 'str'} } + +# @fbdev: +# +# Enable/disable fbdev. +# +# @enable: whenever fbdev should be enabled or disabled. +# +# Returns: Nothing on success +# GenericError on failure. +# +# Since: 1.3 +# +## +{ 'command': 'fbdev', 'data': {'enable': 'bool'} } diff --git a/qmp-commands.hx b/qmp-commands.hx index 6e21ddb..4b95fd0 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -2539,3 +2539,9 @@ EQMP .args_type = , .mhandler.cmd_new = qmp_marshal_input_query_target, }, + +{ +.name = fbdev, +.args_type = enable:b, +.mhandler.cmd_new = qmp_marshal_input_fbdev, +}, diff --git a/qmp.c b/qmp.c index 8463922..7f6cc0b 100644 --- a/qmp.c +++ b/qmp.c @@ -391,6 +391,23 @@ void qmp_change(const char *device, const char *target, } } +void qmp_fbdev(bool enable, Error **errp) +{ +#if defined(CONFIG_LINUX) +DisplayState *ds = get_displaystate(); + +if (enable) { +if (fbdev_display_init(ds, NULL) != 0) { +error_setg(errp, fbdev initialization failed); +} +} else { +fbdev_display_uninit(ds); +} +#else +error_set(errp, QERR_FEATURE_DISABLED, fbdev); +#endif +} + static void qom_list_types_tramp(ObjectClass *klass, void *data) { ObjectTypeInfoList *e, **pret = data; -- 1.7.1
[Qemu-devel] [PATCH 1/9] QLIST-ify display change listeners.
Signed-off-by: Gerd Hoffmann kra...@redhat.com --- console.h | 72 +++ hw/xenfb.c |2 +- vl.c |9 ++- 3 files changed, 42 insertions(+), 41 deletions(-) diff --git a/console.h b/console.h index f990684..646ad4b 100644 --- a/console.h +++ b/console.h @@ -164,7 +164,7 @@ struct DisplayChangeListener { int w, int h, uint32_t c); void (*dpy_text_cursor)(struct DisplayState *s, int x, int y); -struct DisplayChangeListener *next; +QLIST_ENTRY(DisplayChangeListener) next; }; struct DisplayAllocator { @@ -179,7 +179,7 @@ struct DisplayState { struct QEMUTimer *gui_timer; struct DisplayAllocator* allocator; -struct DisplayChangeListener* listeners; +QLIST_HEAD(, DisplayChangeListener) listeners; void (*mouse_set)(int x, int y, int on); void (*cursor_define)(QEMUCursor *cursor); @@ -231,72 +231,76 @@ static inline int is_buffer_shared(DisplaySurface *surface) static inline void register_displaychangelistener(DisplayState *ds, DisplayChangeListener *dcl) { -dcl-next = ds-listeners; -ds-listeners = dcl; +QLIST_INSERT_HEAD(ds-listeners, dcl, next); } static inline void dpy_update(DisplayState *s, int x, int y, int w, int h) { -struct DisplayChangeListener *dcl = s-listeners; -while (dcl != NULL) { +struct DisplayChangeListener *dcl; +QLIST_FOREACH(dcl, s-listeners, next) { dcl-dpy_update(s, x, y, w, h); -dcl = dcl-next; } } static inline void dpy_resize(DisplayState *s) { -struct DisplayChangeListener *dcl = s-listeners; -while (dcl != NULL) { +struct DisplayChangeListener *dcl; +QLIST_FOREACH(dcl, s-listeners, next) { dcl-dpy_resize(s); -dcl = dcl-next; } } static inline void dpy_setdata(DisplayState *s) { -struct DisplayChangeListener *dcl = s-listeners; -while (dcl != NULL) { -if (dcl-dpy_setdata) dcl-dpy_setdata(s); -dcl = dcl-next; +struct DisplayChangeListener *dcl; +QLIST_FOREACH(dcl, s-listeners, next) { +if (dcl-dpy_setdata) { +dcl-dpy_setdata(s); +} } } static inline void dpy_refresh(DisplayState *s) { -struct DisplayChangeListener *dcl = s-listeners; -while (dcl != NULL) { -if (dcl-dpy_refresh) dcl-dpy_refresh(s); -dcl = dcl-next; +struct DisplayChangeListener *dcl; +QLIST_FOREACH(dcl, s-listeners, next) { +if (dcl-dpy_refresh) { +dcl-dpy_refresh(s); +} } } static inline void dpy_copy(struct DisplayState *s, int src_x, int src_y, - int dst_x, int dst_y, int w, int h) { -struct DisplayChangeListener *dcl = s-listeners; -while (dcl != NULL) { -if (dcl-dpy_copy) + int dst_x, int dst_y, int w, int h) +{ +struct DisplayChangeListener *dcl; +QLIST_FOREACH(dcl, s-listeners, next) { +if (dcl-dpy_copy) { dcl-dpy_copy(s, src_x, src_y, dst_x, dst_y, w, h); -else /* TODO */ +} else { /* TODO */ dcl-dpy_update(s, dst_x, dst_y, w, h); -dcl = dcl-next; +} } } static inline void dpy_fill(struct DisplayState *s, int x, int y, - int w, int h, uint32_t c) { -struct DisplayChangeListener *dcl = s-listeners; -while (dcl != NULL) { -if (dcl-dpy_fill) dcl-dpy_fill(s, x, y, w, h, c); -dcl = dcl-next; + int w, int h, uint32_t c) +{ +struct DisplayChangeListener *dcl; +QLIST_FOREACH(dcl, s-listeners, next) { +if (dcl-dpy_fill) { +dcl-dpy_fill(s, x, y, w, h, c); +} } } -static inline void dpy_cursor(struct DisplayState *s, int x, int y) { -struct DisplayChangeListener *dcl = s-listeners; -while (dcl != NULL) { -if (dcl-dpy_text_cursor) dcl-dpy_text_cursor(s, x, y); -dcl = dcl-next; +static inline void dpy_cursor(struct DisplayState *s, int x, int y) +{ +struct DisplayChangeListener *dcl; +QLIST_FOREACH(dcl, s-listeners, next) { +if (dcl-dpy_text_cursor) { +dcl-dpy_text_cursor(s, x, y); +} } } diff --git a/hw/xenfb.c b/hw/xenfb.c index 338800a..ef24c33 100644 --- a/hw/xenfb.c +++ b/hw/xenfb.c @@ -717,7 +717,7 @@ static void xenfb_update(void *opaque) if (xenfb_queue_full(xenfb)) return; -for (l = xenfb-c.ds-listeners; l != NULL; l = l-next) { +QLIST_FOREACH(l, xenfb-c.ds-listeners, next) { if (l-idle) continue; idle = 0; diff --git a/vl.c b/vl.c index 7c577fa..2a7c92a 100644 --- a/vl.c +++ b/vl.c @@ -1276,15 +1276,14 @@ static void gui_update(void *opaque) { uint64_t interval = GUI_REFRESH_INTERVAL; DisplayState *ds = opaque; -DisplayChangeListener *dcl = ds-listeners; +DisplayChangeListener *dcl;
[Qemu-devel] [PATCH 8/9] fbdev: add mouse pointer support
Add mouse_set and cursor_define DisplayChangeListener callbacks and mouse pointer rendering support. Signed-off-by: Gerd Hoffmann kra...@redhat.com --- ui/fbdev.c | 95 1 files changed, 95 insertions(+), 0 deletions(-) diff --git a/ui/fbdev.c b/ui/fbdev.c index 4cb4d1d..6835fef 100644 --- a/ui/fbdev.c +++ b/ui/fbdev.c @@ -82,6 +82,12 @@ static pixman_image_t *framebuffer; static pixman_transform_t transform; static pixman_region16_t dirty; +static QEMUCursor *ptr_cursor; +static pixman_image_t *ptr_image; +static intptr_refresh; +static intpx, py, pw, ph; +static intmx, my, mon; + /* fwd decls */ static int fbdev_activate_vt(int tty, int vtno, bool wait); @@ -876,6 +882,51 @@ static void fbdev_render(DisplayState *ds) pixman_region_init(dirty); } +static void fbdev_unrender_ptr(DisplayState *ds) +{ +if (!pw !ph) { +return; +} +pixman_region_union_rect(dirty, dirty, px, py, pw, ph); +ph = pw = 0; +} + +static void fbdev_render_ptr(DisplayState *ds) +{ +pixman_region16_t region; +pixman_transform_t transform; + +if (!mon || !ptr_image) { +return; +} +if (mx 0 || mx = cw || my 0 || my = ch) { +return; +} + +px = mx - ptr_cursor-hot_x; +py = my - ptr_cursor-hot_y; +pw = ptr_cursor-width; +ph = ptr_cursor-height; + +pixman_transform_init_identity(transform); +pixman_transform_translate(transform, NULL, + pixman_int_to_fixed(-cx), + pixman_int_to_fixed(-cy)); +pixman_transform_translate(transform, NULL, + pixman_int_to_fixed(-px), + pixman_int_to_fixed(-py)); +pixman_image_set_transform(ptr_image, transform); + +pixman_region_init_rect(region, 0, 0, pw, ph); +pixman_image_set_clip_region(ptr_image, region); + +pixman_image_composite(PIXMAN_OP_OVER, ptr_image, NULL, framebuffer, + 0, 0, 0, 0, 0, 0, fb_var.xres, fb_var.yres); + +pixman_region_fini(region); +ptr_refresh = 0; +} + /* */ /* qemu interfaces */ @@ -917,6 +968,9 @@ static void fbdev_update(DisplayState *ds, int x, int y, int w, int h) } pixman_region_union_rect(dirty, dirty, x, y, w, h); +if (ptr_image mon pw ph) { +ptr_refresh++; +} } static void fbdev_resize(DisplayState *ds) @@ -953,9 +1007,48 @@ static void fbdev_refresh(DisplayState *ds) fbdev_update(ds, 0, 0, 0, 0); } +if (ptr_refresh) { +fbdev_unrender_ptr(ds); +} if (pixman_region_not_empty(dirty)) { fbdev_render(ds); } +if (ptr_refresh) { +fbdev_render_ptr(ds); +} +} + +static void fbdev_mouse_set(DisplayState *ds, int x, int y, int on) +{ +ptr_refresh++; +mx = x; +my = y; +mon = on; +} + +static void fbdev_cursor_define(DisplayState *ds, QEMUCursor *cursor) +{ +ptr_refresh++; + +if (ptr_cursor) { +cursor_put(ptr_cursor); +ptr_cursor = NULL; +} +if (ptr_image) { +pixman_image_unref(ptr_image); +ptr_image = NULL; +} + +if (!cursor) { +return; +} + +ptr_cursor = cursor; +cursor_get(ptr_cursor); +ptr_image = pixman_image_create_bits(PIXMAN_a8r8g8b8, + cursor-width, cursor-height, + cursor-data, + cursor-width * 4); } static void fbdev_exit_notifier(Notifier *notifier, void *data) @@ -984,6 +1077,8 @@ int fbdev_display_init(DisplayState *ds, const char *device) dcl-dpy_resize = fbdev_resize; dcl-dpy_setdata = fbdev_setdata; dcl-dpy_refresh = fbdev_refresh; +dcl-dpy_mouse_set = fbdev_mouse_set; +dcl-dpy_cursor_define = fbdev_cursor_define; register_displaychangelistener(ds, dcl); trace_fbdev_enabled(); -- 1.7.1
[Qemu-devel] [PULL 0/9] linux framebuffer display driver
Hi, Third round of the framebuffer display driver patches, including git tree for pull as I think it's ready now. Changes: Addressed review comments from Markus. Catch a few more fatal signals, especially SIGABRT, so fbdev restores your console when qemu runs into an assert(). Misc little tweaks, no major changes. cheers, Gerd The following changes since commit 6b80f7db8a7f84d21e46d01e30c8497733bb23a0: Merge remote-tracking branch 'kiszka/queues/slirp' into staging (2012-09-17 10:23:20 -0500) are available in the git repository at: git://git.kraxel.org/qemu fbdev.1 Gerd Hoffmann (9): QLIST-ify display change listeners. add unregister_displaychangelistener move set_mouse + cursor_define callbacks fbdev: add linux framebuffer display driver. fbdev: add monitor command to enable/disable fbdev: make configurable at compile time. fbdev: move to pixman fbdev: add mouse pointer support fbdev: add display scaling support configure | 24 ++ console.c |2 +- console.h | 123 -- hmp-commands.hx | 15 + hmp.c |9 + hmp.h |1 + hw/jazz_led.c |2 +- hw/qxl-render.c |2 +- hw/vga.c| 10 +- hw/vmware_vga.c | 11 +- hw/xenfb.c |2 +- qapi-schema.json| 14 + qemu-options.hx |8 + qmp-commands.hx |6 + qmp.c | 17 + sysemu.h|1 + trace-events| 15 + ui/Makefile.objs|1 + ui/fbdev.c | 1142 +++ ui/linux-keynames.h | 388 + ui/sdl.c|8 +- ui/spice-display.c |4 +- ui/vnc.c|8 +- vl.c| 50 ++- 24 files changed, 1789 insertions(+), 74 deletions(-) create mode 100644 ui/fbdev.c create mode 100644 ui/linux-keynames.h
Re: [Qemu-devel] Doubts on SMP, VCPU and CONFIG_IOTHREAD
Il 18/09/2012 08:27, Alex Barcelo ha scritto: I saw some things[1] about multiple vcpu, smp and things like that. It seemed to me that --enable-io-thread enables it. iothread means that the QEMU main thread only services an event loop (I/O, bottom halves, timers, etc.). Running CPUs is offloaded to extra threads. The iothread most of the times runs without the big QEMU lock (because most of the time it is waiting on a select system call). But, it only works for KVM, doesn't it? I assume that there is NOT one thread per vcpu in TCG mode. Yes, KVM has a thread per VCPU. This is possible because with KVM the VCPU thread is _also_ running most of the time without the big QEMU lock (it is in the KVM_RUN ioctl). However, TCG needs to run with the big QEMU lock. For this reason TCG has a single thread that runs in lockstep with the io-thread. Whenever the iothread gets out of the select system call and needs the lock, it asks the TCG thread to exit and the TCG thread obeys. This is done using a condition variable qemu_io_proceeded_cond, controlled by a boolean variable iothread_requesting_mutex. Whenever the iothread goes back to sleep, it signals the condition variable and the TCG thread starts running again. And this --enable-io-thread now is the default? This option is always active? Now I was wondering if something parallel is done in TCG (maybe through coroutines?). The lockstep behavior obtained with the condition variable is what you are looking for. Paolo
Re: [Qemu-devel] ping Re: [RFC PATCH 00/13] Embedded NBD server
Am 17.09.2012 18:43, schrieb Paolo Bonzini: Il 07/09/2012 18:11, Kevin Wolf ha scritto: I was planning to review it in more detail next week, but I just had a quick look. I'm not sure if automatically shutting down the NBD server when the guest stops using it is always right (for removable media it could even be an eject from the guest), Yes, the removable media case could be a bit too eager. Note however that a guest-triggered eject doesn't do bdrv_close, only a user-triggered eject does, and that's blocked by bdrv_in_use. Luckily removable media are usually not too interesting, so a slightly suboptimal behavior is okay as long as it does not break the important use cases---mostly migration without shared storage, where also uninteresting images have to be mirrored or exposed via NBD. Those should be covered by bdrv_in_use. It sounds like it could be acceptable, yes. But what's even the motivation to close the server on bdrv_close? The commit message is a bit... well, not just terse, but even empty. The standard case for closing images is that qemu exits. In this case, the NBD server would automatically exit as well. An interesting case for the NBD server would be when the migration has completed; but do we even get a bdrv_close there? but introducing a notifier list doesn't look too bad. We can probably use it for other things that are currently hardcoded in bdrv_close() with some if statements, like disabling I/O throttling, cancelling a block job, etc. Yes, though a lot of these could be moved to filters and use whatever filter-specific method is there (e.g. a filter bdrv_close). This circles back to the question of whether bdrv_close kills filters or only the base image... Note that after completing the refactoring, we'll only have one combined bdrv_close/delete function and so there won't be BlockDriverStates that are closed. In this case, I think it's quite obvious that not closing the filters wouldn't make any sense. Kevin
[Qemu-devel] [PATCH V3 1/5] libqblock build system
Libqblock was placed in new directory ./libqblock, libtool will build dynamic library there, source files of block layer remains in ./block. So block related source code will generate 3 sets of binary, first is old ones used in qemu, second and third are non PIC and PIC ones in ./libqblock. GCC compiler flag visibility=hidden was used with special macro, to export only symbols that was marked as PUBLIC. Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com --- Makefile| 13 - Makefile.objs |6 libqblock/Makefile | 64 +++ 3 files changed, 82 insertions(+), 1 deletions(-) create mode 100644 libqblock/Makefile create mode 100644 libqblock/libqblock-error.c create mode 100644 libqblock/libqblock.c diff --git a/Makefile b/Makefile index 971e92f..b0b9b8d 100644 --- a/Makefile +++ b/Makefile @@ -164,6 +164,17 @@ qemu-io$(EXESUF): qemu-io.o cmd.o $(tools-obj-y) $(block-obj-y) qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o +## +# Support building shared library libqblock +ifeq ($(LIBTOOL),) +$(libqblock-lib-la): + @echo libtool is missing, please install and rerun configure; exit 1 +else +$(libqblock-lib-la): + $(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C libqblock V=$(V) TARGET_DIR=$*/ $(libqblock-lib-la),) +endif +### + vscclient$(EXESUF): $(libcacard-y) $(oslib-obj-y) $(trace-obj-y) $(tools-obj-y) qemu-timer-common.o libcacard/vscclient.o $(call quiet-command,$(CC) $(LDFLAGS) -o $@ $^ $(libcacard_libs) $(LIBS), LINK $@) @@ -227,7 +238,7 @@ clean: rm -rf qapi-generated rm -rf qga/qapi-generated $(MAKE) -C tests/tcg clean - for d in $(ALL_SUBDIRS) $(QEMULIBS) libcacard; do \ + for d in $(ALL_SUBDIRS) $(QEMULIBS) libcacard libqblock; do \ if test -d $$d; then $(MAKE) -C $$d $@ || exit 1; fi; \ rm -f $$d/qemu-options.def; \ done diff --git a/Makefile.objs b/Makefile.objs index 4412757..8a4c9fc 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -248,3 +248,9 @@ nested-vars += \ common-obj-y \ extra-obj-y dummy := $(call unnest-vars) + +# +# libqblock + +libqblock-lib-la = libqblock.la +libqblock-lib-path = libqblock diff --git a/libqblock/Makefile b/libqblock/Makefile new file mode 100644 index 000..bf7abcc --- /dev/null +++ b/libqblock/Makefile @@ -0,0 +1,64 @@ +### +# libqblock Makefile +# Todo: +#1 trace related files is generated in this directory, move +# them to the root directory. +## +-include ../config-host.mak +-include $(SRC_PATH)/Makefile.objs +-include $(SRC_PATH)/rules.mak + +# +# Library settings +# +$(call set-vpath, $(SRC_PATH)) + +#expand the foldered vars,especially ./block +dummy := $(call unnest-vars-1) + +#library objects +tools-obj-y = $(oslib-obj-y) $(trace-obj-y) qemu-tool.o qemu-timer.o \ + qemu-timer-common.o main-loop.o notify.o \ + iohandler.o cutils.o iov.o async.o +tools-obj-$(CONFIG_POSIX) += compatfd.o + +libqblock-y=libqblock.o libqblock-error.o +libqblock-lib-y=$(patsubst %.o,%.lo,$(libqblock-y)) + +QEMU_OBJS=$(tools-obj-y) $(block-obj-y) +QEMU_OBJS_FILTERED=$(filter %.o,$(QEMU_OBJS)) +QEMU_OBJS_LIB=$(patsubst %.o, %.lo,$(QEMU_OBJS_FILTERED)) + +QEMU_CFLAGS+= -I../ -I../include +#adding magic macro define for symbol hiding and exposing +QEMU_CFLAGS+= -fvisibility=hidden -D LIBQB_BUILD + +#dependency libraries +LIBS+=-lz $(LIBS_TOOLS) + +# +# Runtime rules +# +clean: + rm -f *.lo *.o *.d *.la libqblock-test trace.c trace.c-timestamp + rm -rf .libs block trace + +all: libqblock-test + @true + +help: + @echo type make libqblock-test at root dirtory, libtool is required + +#make dir block at runtime which would hold the output of block/*.c +block: + @mkdir block + +ifeq ($(LIBTOOL),) +$(libqblock-lib-la): + @echo libtool is missing, please install and rerun configure; exit 1 +else +$(libqblock-lib-la): $(libqblock-lib-y) $(QEMU_OBJS_LIB) + $(call quiet-command,$(LIBTOOL) --mode=link --quiet --tag=CC $(CC) -rpath $(libdir) -o $@ $^ $(LIBS), lt LINK $@) +endif + +.PHONY: libqblock.la diff --git a/libqblock/libqblock-error.c b/libqblock/libqblock-error.c new file mode 100644 index 000..e69de29 diff --git a/libqblock/libqblock.c b/libqblock/libqblock.c new file
[Qemu-devel] [PATCH V3 0/5] libqblock qemu block layer library
This patch introduce libqblock API, make libqblock.la and make check-libqblock could build this library. Functionalities: 1 create a new image. 2 sync access of an image. 3 basic image information retrieving such as backing file. 4 detect if a sector is allocated in an image. Supported Formats: ALL using file protocols. v2: Insert reserved bytes into union. Use uint64_t instead of size_t, offset. Use const char * in filename pointer. Initialization function removed and it was automatically executed when library is loaded. Added compile flag visibility=hidden, to avoid name space pollution. Structure naming style changed. Using byte unit instead of sector for every API. Added a member in image static information structure, to report logical sector size, which is always 512 now. Read and write API can take request not aligned to 512 now. It returns the byte number that have succeed in operation, but now either negative value or the number requested would be returned, because qemu block sync I/O API would not return such number. Typo fix due to comments and improved documents. v3: Removed the code about OOM error, introduced GError. Used a table to map from string to enum types about format. Use typedef for every structure. Improved the gcc compiler macro to warn if gcc was not used. Global variable name changed with prefix libqb_. The struct QBlockStaticInfo was changed to folder full format related information inside, and a new member with pointers pointing to the mostly used members, such as backing file, virt size, was added. This would allow the user to get full information about how it is created in the future. Each patch in the serial can work with qemu now. Typo fixes. Wenchao Xia (5): libqblock build system libqblock type defines libqblock API libqblock test build system libqblock test example code .gitignore |1 + Makefile | 14 +- Makefile.objs|6 + block.c |2 +- block.h |1 + libqblock/Makefile | 64 +++ libqblock/libqblock-error.c | 57 ++ libqblock/libqblock-error.h | 49 ++ libqblock/libqblock-internal.h | 56 ++ libqblock/libqblock-types.h | 268 + libqblock/libqblock.c| 1140 ++ libqblock/libqblock.h| 297 ++ tests/Makefile |3 + tests/libqblock/Makefile | 32 ++ tests/libqblock/libqblock-test.c | 237 15 files changed, 2225 insertions(+), 2 deletions(-) create mode 100644 libqblock/Makefile create mode 100644 libqblock/libqblock-error.c create mode 100644 libqblock/libqblock-error.h create mode 100644 libqblock/libqblock-internal.h create mode 100644 libqblock/libqblock-types.h create mode 100644 libqblock/libqblock.c create mode 100644 libqblock/libqblock.h create mode 100644 tests/libqblock/Makefile create mode 100644 tests/libqblock/libqblock-test.c
Re: [Qemu-devel] ping Re: [RFC PATCH 00/13] Embedded NBD server
Luckily removable media are usually not too interesting, so a slightly suboptimal behavior is okay as long as it does not break the important use cases---mostly migration without shared storage, where also uninteresting images have to be mirrored or exposed via NBD. Those should be covered by bdrv_in_use. It sounds like it could be acceptable, yes. But what's even the motivation to close the server on bdrv_close? The commit message is a bit... well, not just terse, but even empty. The motivation is two-fold: 1) for device hot-unplug, not closing the server would impede removal of the blockdev until after all clients have closed their connections. 2) for the removable media case, clients risk reading data from two different images and merging it somehow. In either case (hot-unplug and eject) after bdrv_close I/O requests would return ENOMEDIUM, so there is not much benefit in leaving the connection open. Clients can reconnect with the understanding that the medium has changed (medium change is not part of the NBD specification, but we can retrofit it this way). The standard case for closing images is that qemu exits. In this case, the NBD server would automatically exit as well. An interesting case for the NBD server would be when the migration has completed; but do we even get a bdrv_close there? No, you don't. Yes, though a lot of these could be moved to filters and use whatever filter-specific method is there (e.g. a filter bdrv_close). This circles back to the question of whether bdrv_close kills filters or only the base image... Note that after completing the refactoring, we'll only have one combined bdrv_close/delete function and so there won't be BlockDriverStates that are closed. In this case, I think it's quite obvious that not closing the filters wouldn't make any sense. Does that mean that any I/O throttling must be applied again on every medium change? That would be a behavioral change. Paolo
Re: [Qemu-devel] qmp: dump-guest-memory: -p option has issues, fix it or drop it?
On 2012-09-18 03:52, Wen Congyang wrote: At 09/18/2012 01:56 AM, Luiz Capitulino Wrote: Hi Wen, We've re-reviewed the dump-guest-memory command and found some possible issues with the -p option. The main issue is that it seems possible for a malicious guest to set page tables in a way that we allocate a MemoryMapping structure for each possible PTE. If IA-32e paging is used, this could lead to the allocation of dozens of gigabytes by qemu. Of course that this is not expected for the regular case, where a MemoryMapping allocation can be skipped for several reasons (I/O memory, page not present, contiguous/in same range addresses etc), but the point is what a malicious guest can do. Another problem is that the -p option seems to be broken for SMP guests. The problem is in qemu_get_guest_memory_mapping(): first_paging_enabled_cpu = find_paging_enabled_cpu(first_cpu); if (first_paging_enabled_cpu) { for (env = first_paging_enabled_cpu; env != NULL; env = env-next_cpu) { ret = cpu_get_memory_mapping(list, env); if (ret 0) { return -1; } } return 0; } This looks for the first vCPU with paging enabled, and then assumes that all the following vCPUs also have paging enabled. How does this hold? cpu_get_memory_mapping re-validates that paging is one. In fact, cpu_get_memory_mapping should handle both cases so that the generic code need not worry about paging on/off. Assuming that this last issue is fixable (ie. we can make the -p option work well with SMP guests), we should at least document that -p can make QEMU allocates lots of memory and end up being killed by the OS. However, I also think that we should consider if having the -p feature is really worth it. It's a complex feature and has a number of limitations*. If libvirt doesn't use this, dropping it shouldn't be a big deal (we can return an error when -p is used). libvirt should surely not be the only reference for debugging features. * The issues discussed in this email plus the fact that the guest memory may be corrupted, and the guest may be in real-mode even when paging is enabled Yes, there are some limitations with this option. Jan said that he always use gdb to deal with vmcore, so he needs such information. The point is to overcome the focus on Linux-only dump processing tools. I'm sure the memory allocation can be avoided by writing out any found virt-phys mapping directly to the vmcore file. We know where physical RAM will be, we only need the corresponding virtual addresses - IIUC. So first prepare the section according to the guest's RAM size and then, once we identified a page while walking the tables carefully, seek to that file position and write to it. Jan -- Siemens AG, Corporate Technology, CT RTC ITP SDP-DE Corporate Competence Center Embedded Linux
[Qemu-devel] [PATCH V3 2/5] libqblock type defines
This patch contains type and defines used in APIs, one file for public usage by user, one for libqblock internal usage. Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com --- libqblock/libqblock-internal.h | 56 + libqblock/libqblock-types.h| 268 2 files changed, 324 insertions(+), 0 deletions(-) create mode 100644 libqblock/libqblock-internal.h create mode 100644 libqblock/libqblock-types.h diff --git a/libqblock/libqblock-internal.h b/libqblock/libqblock-internal.h new file mode 100644 index 000..87f32be --- /dev/null +++ b/libqblock/libqblock-internal.h @@ -0,0 +1,56 @@ +/* + * QEMU block layer library + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Wenchao Xia xiaw...@linux.vnet.ibm.com + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef LIBQBLOCK_INTERNAL +#define LIBQBLOCK_INTERNAL + +#include glib.h + +#include block.h +#include block_int.h +#include libqblock-types.h + +/* this file contains defines and types used inside the library. */ + +#define FUNC_FREE(p) g_free((p)) +#define FUNC_MALLOC(size) g_malloc((size)) +#define FUNC_CALLOC(nmemb, size) g_malloc0((nmemb)*(size)) + +#define CLEAN_FREE(p) { \ +FUNC_FREE(p); \ +(p) = NULL; \ +} + +/* details should be hidden to user */ +struct QBlockState { +BlockDriverState *bdrvs; +/* internal used file name now, if it is not NULL, it means + image was opened. +*/ +char *filename; +} ; + +struct QBroker { +/* last error */ +GError *g_error; +int err_ret; /* 1st level of error, the libqblock error number */ +int err_no; /* 2nd level of error, errno what below reports */ +}; + +#define G_LIBQBLOCK_ERROR g_libqbock_error_quark() + +static inline GQuark g_libqbock_error_quark(void) +{ +return g_quark_from_static_string(g-libqblock-error-quark); +} +#endif diff --git a/libqblock/libqblock-types.h b/libqblock/libqblock-types.h new file mode 100644 index 000..3c548b8 --- /dev/null +++ b/libqblock/libqblock-types.h @@ -0,0 +1,268 @@ +/* + * QEMU block layer library + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Wenchao Xia xiaw...@linux.vnet.ibm.com + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef LIBQBLOCK_TYPES_H +#define LIBQBLOCK_TYPES_H + +#include sys/types.h +#include stdint.h +#include stdbool.h + +#if defined(__GNUC__) __GNUC__ = 4 +#ifdef LIBQB_BUILD +#define DLL_PUBLIC __attribute__((visibility(default))) +#else +#define DLL_PUBLIC +#endif +#else +#warning : gcc compiler version 4, symbols can not be hidden. +#endif + +/* this library is designed around this core struct. */ +typedef struct QBlockState QBlockState; + +/* every thread should have a broker. */ +typedef struct QBroker QBroker; + +/* flag used in open and create */ +#define LIBQBLOCK_O_RDWR0x0002 +/* do not use the host page cache */ +#define LIBQBLOCK_O_NOCACHE 0x0020 +/* use write-back caching */ +#define LIBQBLOCK_O_CACHE_WB0x0040 +/* don't open the backing file */ +#define LIBQBLOCK_O_NO_BACKING 0x0100 +/* disable flushing on this disk */ +#define LIBQBLOCK_O_NO_FLUSH0x0200 + +#define LIBQBLOCK_O_CACHE_MASK \ + (LIBQBLOCK_O_NOCACHE | LIBQBLOCK_O_CACHE_WB | LIBQBLOCK_O_NO_FLUSH) + +#define LIBQBLOCK_O_VALID_MASK \ + (LIBQBLOCK_O_RDWR | LIBQBLOCK_O_NOCACHE | LIBQBLOCK_O_CACHE_WB | \ +LIBQBLOCK_O_NO_BACKING | LIBQBLOCK_O_NO_FLUSH) + +typedef enum QBlockProtType { +QB_PROT_NONE = 0, +QB_PROT_FILE, +QB_PROT_MAX +} QBlockProtType; + +typedef struct QBlockProtOptionFile { +const char *filename; +} QBlockProtOptionFile; + +#define QBLOCK_PROT_OPTIONS_UNION_SIZE (512) +typedef union QBlockProtOptionsUnion { +QBlockProtOptionFile o_file; +uint8_t reserved[QBLOCK_PROT_OPTIONS_UNION_SIZE]; +} QBlockProtOptionsUnion; + +/** + * struct QBlockProtInfo: contains information about how to find the image + * + * @prot_type: protocol type, now only support FILE. + * @prot_op: protocol related options. + */ +typedef struct QBlockProtInfo { +QBlockProtType prot_type; +QBlockProtOptionsUnion prot_op; +} QBlockProtInfo; + + +/* format related options */ +typedef enum QBlockFmtType { +QB_FMT_NONE = 0, +QB_FMT_COW, +QB_FMT_QED, +QB_FMT_QCOW, +QB_FMT_QCOW2, +QB_FMT_RAW, +QB_FMT_RBD, +QB_FMT_SHEEPDOG, +QB_FMT_VDI, +QB_FMT_VMDK, +QB_FMT_VPC, +QB_FMT_MAX +} QBlockFmtType; + +typedef struct QBlockFmtOptionCow { +uint64_t virt_size; +QBlockProtInfo backing_loc; +} QBlockFmtOptionCow; + +typedef struct QBlockFmtOptionQed { +uint64_t virt_size; +QBlockProtInfo backing_loc; +QBlockFmtType backing_fmt; +uint64_t cluster_size; /* unit is bytes */ +uint64_t table_size; /* unit is clusters
Re: [Qemu-devel] [PATCH] usb-redir: Allow to attach USB 2.0 devices to 1.1 host controller
Hi, On 09/17/2012 06:22 PM, Jan Kiszka wrote: If that does not work, add the debug parameter to the usb-redir device, set it to 4, collect logs of trying to redirect the device and send me the logs please, ie: -device usb-redir,chardev=usbredirchardev1,id=usbredirdev1,debug=4 Also be aware that usb-redir relies on chardev flowcontrol working, which it does not upstream! See for example here for the chardev flow control patch set which RHEL / Fedora carry: http://cgit.freedesktop.org/~jwrdegoede/qemu/log/?h=qemu-kvm-1.2-usbredirofs=50 And then the first 13 patches after: Merge tag 'v1.2.0' Oh, and also, if you're running qemu git master, make sure you've: http://cgit.freedesktop.org/~jwrdegoede/qemu/commit/?id=81e34f5973d8d6a1ef998a50c4a4bf66abb3b56b I used qemu-kvm-1.2-usbredir^ (the last commit is apparently broken - copypaste bug?). Yeah, that has been fixed now. I'm getting this right after typing cat /dev/ACM0 in the guest. It's an endless stream, and so is the output in the guest although there should be nothing to dump (that's the proper behaviour on the host). Hmm, can you try commenting out line 1608 of hw/usb/redirect.c: usb_ep-pipeline = true; And see if that helps. If it does not help, please bump the debug level to 5 (this will also make it log packet contents), and then generate another log, and then it is time to dive into the ACM protocol to see what is happening... Regards, Hans
[Qemu-devel] [PATCH V3 4/5] libqblock test build system
Created a new directory in tests, make chekc-libqblock will build an executable binrary, make clean will delete it. Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com --- .gitignore |1 + Makefile |1 + tests/Makefile |3 +++ tests/libqblock/Makefile | 32 tests/libqblock/libqblock-test.c |4 5 files changed, 41 insertions(+), 0 deletions(-) create mode 100644 tests/libqblock/Makefile create mode 100644 tests/libqblock/libqblock-test.c diff --git a/.gitignore b/.gitignore index 824c0d2..eccb637 100644 --- a/.gitignore +++ b/.gitignore @@ -95,3 +95,4 @@ cscope.* tags TAGS *~ +tests/libqblock/*.bin diff --git a/Makefile b/Makefile index b0b9b8d..de8ea17 100644 --- a/Makefile +++ b/Makefile @@ -238,6 +238,7 @@ clean: rm -rf qapi-generated rm -rf qga/qapi-generated $(MAKE) -C tests/tcg clean + $(MAKE) -C tests/libqblock clean for d in $(ALL_SUBDIRS) $(QEMULIBS) libcacard libqblock; do \ if test -d $$d; then $(MAKE) -C $$d $@ || exit 1; fi; \ rm -f $$d/qemu-options.def; \ diff --git a/tests/Makefile b/tests/Makefile index 26a67ce..69af1e2 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -148,4 +148,7 @@ check-unit: $(patsubst %,check-%, $(check-unit-y)) check-block: $(patsubst %,check-%, $(check-block-y)) check: check-unit check-qtest +check-libqblock: + $(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C tests/libqblock V=$(V) TARGET_DIR=$*/ check-libqblock,) + -include $(wildcard tests/*.d) diff --git a/tests/libqblock/Makefile b/tests/libqblock/Makefile new file mode 100644 index 000..eb6947b --- /dev/null +++ b/tests/libqblock/Makefile @@ -0,0 +1,32 @@ +-include ../../config-host.mak +-include $(SRC_PATH)/Makefile.objs +-include $(SRC_PATH)/rules.mak + +$(call set-vpath, $(SRC_PATH)) + +#library test case objects +libqblock-test-objs=libqblock-test.lo + +QEMU_CFLAGS+=-I $(SRC_PATH)/$(libqblock-lib-path) +libqblock-la-path = $(SRC_PATH)/$(libqblock-lib-path)/$(libqblock-lib-la) + +## +#runtime rules: +ifeq ($(LIBTOOL),) +libqblock-test.bin: + @echo libtool is missing, please install and rerun configure; exit 1 +else +libqblock-test.bin: $(libqblock-test-objs) $(libqblock-la-path) + $(call quiet-command,$(LIBTOOL) --mode=link --quiet --tag=CC $(CC) -shared -rpath $(libdir) -o $@ $^, lt LINK $@) +endif + +check-libqblock: + @echo Building libqblock.la... + $(call quiet-command,$(MAKE) -C $(SRC_PATH) $(libqblock-lib-la),) + @make libqblock-test.bin + @echo Executing test binary... + ./libqblock-test.bin + +clean: + rm -f *.lo *.o *.d *.la *.bin + rm -rf .libs diff --git a/tests/libqblock/libqblock-test.c b/tests/libqblock/libqblock-test.c new file mode 100644 index 000..c05c0c4 --- /dev/null +++ b/tests/libqblock/libqblock-test.c @@ -0,0 +1,4 @@ +int main(int argc, char **argv) +{ +return 0; +} -- 1.7.1
[Qemu-devel] [PATCH V3 3/5] libqblock API
This patch contains the major APIs in the library. Important APIs: 1 QBroker. These structure was used to retrieve errors, every thread must create one first, later maybe thread related staff could be added into it. 2 QBlockState. It stands for an block image object. 3 QBlockStaticInfo. It contains static information such as location, backing file, size. 4 ABI was kept with reserved members. 5 Sync I/O. It is similar to C file open, read, write and close operations. Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com --- block.c |2 +- block.h |1 + libqblock/libqblock-error.c | 57 +++ libqblock/libqblock-error.h | 49 ++ libqblock/libqblock.c | 1140 +++ libqblock/libqblock.h | 297 +++ 6 files changed, 1545 insertions(+), 1 deletions(-) create mode 100644 libqblock/libqblock-error.h create mode 100644 libqblock/libqblock.h diff --git a/block.c b/block.c index e78039b..d5321d3 100644 --- a/block.c +++ b/block.c @@ -196,7 +196,7 @@ static void bdrv_io_limits_intercept(BlockDriverState *bs, } /* check if the path starts with protocol: */ -static int path_has_protocol(const char *path) +int path_has_protocol(const char *path) { const char *p; diff --git a/block.h b/block.h index 2e2be11..e7da711 100644 --- a/block.h +++ b/block.h @@ -405,4 +405,5 @@ typedef enum { #define BLKDBG_EVENT(bs, evt) bdrv_debug_event(bs, evt) void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event); +int path_has_protocol(const char *path); #endif diff --git a/libqblock/libqblock-error.c b/libqblock/libqblock-error.c index e69de29..9000e3e 100644 --- a/libqblock/libqblock-error.c +++ b/libqblock/libqblock-error.c @@ -0,0 +1,57 @@ +/* + * QEMU block layer library + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Wenchao Xia xiaw...@linux.vnet.ibm.com + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include libqblock-error.h +#include libqblock-internal.h + +void qb_error_get_human_str(QBroker *broker, +char *buf, size_t buf_size) +{ +const char *err_ret_str; +switch (broker-err_ret) { +case QB_ERR_INTERNAL_ERR: +err_ret_str = Internal error.; +break; +case QB_ERR_INVALID_PARAM: +err_ret_str = Invalid param.; +break; +case QB_ERR_BLOCK_OUT_OF_RANGE: +err_ret_str = request is out of image's range.; +break; +default: +err_ret_str = Unknown error.; +break; +} +if (broker == NULL) { +snprintf(buf, buf_size, %s, err_ret_str); +return; +} + +if (broker-err_ret == QB_ERR_INTERNAL_ERR) { +snprintf(buf, buf_size, %s %s errno [%d]. strerror [%s]., + err_ret_str, broker-g_error-message, + broker-err_no, strerror(-broker-err_no)); +} else { +snprintf(buf, buf_size, %s %s, + err_ret_str, broker-g_error-message); +} +return; +} + +int qb_error_get_errno(QBroker *broker) +{ +if (broker-err_ret == QB_ERR_INTERNAL_ERR) { +return broker-err_no; +} +return 0; +} diff --git a/libqblock/libqblock-error.h b/libqblock/libqblock-error.h new file mode 100644 index 000..83d6d98 --- /dev/null +++ b/libqblock/libqblock-error.h @@ -0,0 +1,49 @@ +/* + * QEMU block layer library + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Wenchao Xia xiaw...@linux.vnet.ibm.com + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef LIBQBLOCK_ERROR +#define LIBQBLOCK_ERROR + +#include libqblock-types.h + +#define QB_ERR_INTERNAL_ERR (-1) +#define QB_ERR_INVALID_PARAM (-100) +#define QB_ERR_BLOCK_OUT_OF_RANGE (-101) + +/* error handling */ +/** + * qb_error_get_human_str: get human readable error string. + * + * return a human readable string, it would be truncated if buf is not big + * enough. + * + * @broker: operation broker, must be valid. + * @buf: buf to receive the string. + * @buf_size: the size of the string buf. + */ +DLL_PUBLIC +void qb_error_get_human_str(QBroker *broker, +char *buf, size_t buf_size); + +/** + * qb_error_get_errno: get error number, only valid when err_ret is + * QB_ERR_INTERNAL_ERR. + * + * return negative errno or 0 if last error is not QB_ERR_INTERNAL_ERR. + * + * @broker: operation broker. + */ +DLL_PUBLIC +int qb_error_get_errno(QBroker *broker); + +#endif diff --git a/libqblock/libqblock.c b/libqblock/libqblock.c index e69de29..2d9b351 100644 --- a/libqblock/libqblock.c +++ b/libqblock/libqblock.c @@ -0,0 +1,1140 @@ +/* + * QEMU block layer library + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Wenchao Xia xiaw...@linux.vnet.ibm.com + * + * This work is licensed under
[Qemu-devel] [PATCH V3 5/5] libqblock test example code
In this example, user first create two qcow2 images, and then get the backing file relationship information of them. Then does write and read sync IO on them. Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com --- tests/libqblock/libqblock-test.c | 233 ++ 1 files changed, 233 insertions(+), 0 deletions(-) diff --git a/tests/libqblock/libqblock-test.c b/tests/libqblock/libqblock-test.c index c05c0c4..c0b7963 100644 --- a/tests/libqblock/libqblock-test.c +++ b/tests/libqblock/libqblock-test.c @@ -1,4 +1,237 @@ +/* + * QEMU block layer library test + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Wenchao Xia xiaw...@linux.vnet.ibm.com + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include stdarg.h +#include stdio.h +#include unistd.h +#include inttypes.h +#include string.h +#include stdlib.h +#include libqblock.h + +#define TEST_BUF_SIZE 1024 +static unsigned char buf_r[TEST_BUF_SIZE]; +static unsigned char buf_w[TEST_BUF_SIZE] = {0, 0, 0, 0}; + +typedef struct VerifyData { +unsigned char *buf_r; +unsigned char *buf_w; +int len; +} VerifyData; + +static void print_loc(QBlockProtInfo *loc) +{ +if (loc == NULL) { +printf(backing file is NULL.); +return; +} +switch (loc-prot_type) { +case QB_PROT_NONE: +printf(protocol type [none].); +break; +case QB_PROT_FILE: +printf(protocol type [file], filename [%s]., + loc-prot_op.o_file.filename); +break; +default: +printf(protocol type not supported.); +break; +} +} + +static void print_info_image_static(QBlockStaticInfo *info) +{ +printf(===image location:\n); +print_loc(info-loc); +printf(\nvirtual_size % PRId64 , format type %d [%s], + *(info-member_addr-virt_size), + info-fmt.fmt_type, qb_fmttype2str(info-fmt.fmt_type)); +printf(\nbacking image location:\n); +print_loc(info-member_addr-backing_loc); +printf(\n); +} + +static void test_check(VerifyData *vdata) +{ +int cmp; +cmp = memcmp(vdata-buf_r, vdata-buf_w, vdata-len); +if (cmp == 0) { +printf(compare succeed, %d.\n, vdata-buf_r[24]); +} else { +printf(!!! compare fail, %d.\n, vdata-buf_r[24]); +exit(1); +} +} + int main(int argc, char **argv) { +const char *filename1, *filename2; +QBroker *broker = NULL; +QBlockState *qbs = NULL; +QBlockProtInfo *ol = NULL; +QBlockFmtInfo *of = NULL; +QBlockStaticInfo *info_st = NULL; +int ret, flag; +int test_offset = 510; +int test_len = 520; +VerifyData vdata; +char err_str[1024]; + +vdata.buf_r = buf_r; +vdata.buf_w = buf_w; +vdata.len = test_len; + +filename1 = ./qemu_image1; +filename2 = ./qemu_image2; +printf(qemu test, filename1 is %s, filename2 is %s.\n, + filename1, filename2); + +ret = qb_broker_new(broker); +if (ret 0) { +goto free; +} + +ret = qb_state_new(broker, qbs); +if (ret 0) { +goto free; +} + +ret = qb_prot_info_new(broker, ol); +if (ret 0) { +goto free; +} + +ret = qb_fmt_info_new(broker, of); +if (ret 0) { +goto free; +} + +/* create a new image */ + +ol-prot_type = QB_PROT_FILE; +ol-prot_op.o_file.filename = filename2; +of-fmt_type = QB_FMT_QCOW2; +of-fmt_op.o_qcow2.virt_size = 100 * 1024; +flag = 0; + +ret = qb_create(broker, qbs, ol, of, flag); +if (ret 0) { +qb_error_get_human_str(broker, err_str, sizeof(err_str)); +printf(create fail 1. %s.\n, err_str); +goto unlink; +} + +ol-prot_type = QB_PROT_FILE; +ol-prot_op.o_file.filename = filename1; +of-fmt_type = QB_FMT_QCOW2; +of-fmt_op.o_qcow2.backing_loc.prot_type = QB_PROT_FILE; +of-fmt_op.o_qcow2.backing_loc.prot_op.o_file.filename = filename2; +flag = 0; +ret = qb_create(broker, qbs, ol, of, flag); +if (ret 0) { +qb_error_get_human_str(broker, err_str, sizeof(err_str)); +printf(create fail 2. %s.\n, err_str); +goto unlink; +} + +/* get informations */ +ol-prot_type = QB_PROT_FILE; +ol-prot_op.o_file.filename = filename1; +of-fmt_type = QB_FMT_NONE; +flag = LIBQBLOCK_O_NO_BACKING; +ret = qb_open(broker, qbs, ol, of, flag); +if (ret 0) { +qb_error_get_human_str(broker, err_str, sizeof(err_str)); +printf(info getting, open failed. %s.\n, err_str); +goto free; +} + +while (1) { +ret = qb_info_image_static_get(broker, qbs, info_st); +if (ret 0) { +qb_error_get_human_str(broker, err_str, sizeof(err_str)); +printf(info get error. %s.\n, err_str); +goto close; +} +print_info_image_static(info_st); +
Re: [Qemu-devel] ping Re: [RFC PATCH 00/13] Embedded NBD server
Am 18.09.2012 11:09, schrieb Paolo Bonzini: Luckily removable media are usually not too interesting, so a slightly suboptimal behavior is okay as long as it does not break the important use cases---mostly migration without shared storage, where also uninteresting images have to be mirrored or exposed via NBD. Those should be covered by bdrv_in_use. It sounds like it could be acceptable, yes. But what's even the motivation to close the server on bdrv_close? The commit message is a bit... well, not just terse, but even empty. The motivation is two-fold: 1) for device hot-unplug, not closing the server would impede removal of the blockdev until after all clients have closed their connections. 2) for the removable media case, clients risk reading data from two different images and merging it somehow. In either case (hot-unplug and eject) after bdrv_close I/O requests would return ENOMEDIUM, so there is not much benefit in leaving the connection open. Clients can reconnect with the understanding that the medium has changed (medium change is not part of the NBD specification, but we can retrofit it this way). I think I can buy this, but please add it to the commit message. Yes, though a lot of these could be moved to filters and use whatever filter-specific method is there (e.g. a filter bdrv_close). This circles back to the question of whether bdrv_close kills filters or only the base image... Note that after completing the refactoring, we'll only have one combined bdrv_close/delete function and so there won't be BlockDriverStates that are closed. In this case, I think it's quite obvious that not closing the filters wouldn't make any sense. Does that mean that any I/O throttling must be applied again on every medium change? That would be a behavioral change. Hm, I guess so, at least on the lowest level. The only thing I know for certain is that maintaining compatibility for the old commands will be fun, but if possible at all we shouldn't let that compromise our design. Kevin
Re: [Qemu-devel] ping Re: [RFC PATCH 00/13] Embedded NBD server
Il 18/09/2012 11:40, Kevin Wolf ha scritto: Note that after completing the refactoring, we'll only have one combined bdrv_close/delete function and so there won't be BlockDriverStates that are closed. In this case, I think it's quite obvious that not closing the filters wouldn't make any sense. Does that mean that any I/O throttling must be applied again on every medium change? That would be a behavioral change. Hm, I guess so, at least on the lowest level. The only thing I know for certain is that maintaining compatibility for the old commands will be fun, but if possible at all we shouldn't let that compromise our design. Yeah, originally we had the idea of a proxy driver where you could stack all your persistent filters. The proxy driver would be needed of course for removable media, but it could also subsume things like bdrv_swap and bdrv_append. Paolo
[Qemu-devel] [RfC PATCH] vga: add mmio bar to standard vga
This patch adds a mmio bar to the qemu standard vga which allows to access the standard vga registers and bochs dispi interface registers via mmio. Cc: Benjamin Herrenschmidt b...@kernel.crashing.org Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/vga-pci.c | 97 ++ hw/vga.c |6 ++-- hw/vga_int.h |6 +++ 3 files changed, 106 insertions(+), 3 deletions(-) diff --git a/hw/vga-pci.c b/hw/vga-pci.c index 9abbada..e05e2ef 100644 --- a/hw/vga-pci.c +++ b/hw/vga-pci.c @@ -30,9 +30,36 @@ #include qemu-timer.h #include loader.h +/* + * QEMU Standard VGA -- MMIO area spec. + * + * Using PCI bar #2, keeping #1 free, which leaves the + * door open to upgrade bar #0 to 64bit. + * + * mmio area layout: + * 0x - 0x03ff reserved, for possible virtio extension. + * 0x0400 - 0x041f vga ioports (0x3c0 - 0x3df), remapped 1:1 + * 0x0500 - 0x0515 bochs dispi interface registers, mapped flat without + * index/data ports. Use (index 1) as offset for + * (16bit) register access. + */ +#define PCI_VGA_IOPORT_OFFSET 0x400 +#define PCI_VGA_IOPORT_SIZE (0x3e0 - 0x3c0) +#define PCI_VGA_BOCHS_OFFSET 0x500 +#define PCI_VGA_BOCHS_SIZE(0x0b * 2) +#define PCI_VGA_MMIO_SIZE 0x1000 + +enum vga_pci_flags { +PCI_VGA_FLAG_ENABLE_MMIO = 1, +}; + typedef struct PCIVGAState { PCIDevice dev; VGACommonState vga; +uint32_t flags; +MemoryRegion mmio; +MemoryRegion ioport; +MemoryRegion bochs; } PCIVGAState; static const VMStateDescription vmstate_vga_pci = { @@ -47,6 +74,60 @@ static const VMStateDescription vmstate_vga_pci = { } }; +static uint64_t pci_vga_ioport_read(void *ptr, target_phys_addr_t addr, +unsigned size) +{ +PCIVGAState *d = ptr; +return vga_ioport_read(d-vga, addr); +} + +static void pci_vga_ioport_write(void *ptr, target_phys_addr_t addr, + uint64_t val, unsigned size) +{ +PCIVGAState *d = ptr; +vga_ioport_write(d-vga, addr, val); +} + +static const MemoryRegionOps pci_vga_ioport_ops = { +.read = pci_vga_ioport_read, +.write = pci_vga_ioport_write, +.valid.min_access_size = 1, +.valid.max_access_size = 4, +.impl.min_access_size = 1, +.impl.max_access_size = 1, +.endianness = DEVICE_LITTLE_ENDIAN, +}; + +static uint64_t pci_vga_bochs_read(void *ptr, target_phys_addr_t addr, + unsigned size) +{ +PCIVGAState *d = ptr; +int index = addr 1; + +vbe_ioport_write_index(d-vga, 0, index); +return vbe_ioport_read_data(d-vga, 0); +} + +static void pci_vga_bochs_write(void *ptr, target_phys_addr_t addr, +uint64_t val, unsigned size) +{ +PCIVGAState *d = ptr; +int index = addr 1; + +vbe_ioport_write_index(d-vga, 0, index); +vbe_ioport_write_data(d-vga, 0, val); +} + +static const MemoryRegionOps pci_vga_bochs_ops = { +.read = pci_vga_bochs_read, +.write = pci_vga_bochs_write, +.valid.min_access_size = 1, +.valid.max_access_size = 4, +.impl.min_access_size = 2, +.impl.max_access_size = 2, +.endianness = DEVICE_LITTLE_ENDIAN, +}; + static int pci_vga_initfn(PCIDevice *dev) { PCIVGAState *d = DO_UPCAST(PCIVGAState, dev, dev); @@ -62,6 +143,21 @@ static int pci_vga_initfn(PCIDevice *dev) /* XXX: VGA_RAM_SIZE must be a power of two */ pci_register_bar(d-dev, 0, PCI_BASE_ADDRESS_MEM_PREFETCH, s-vram); + /* mmio bar for vga register access */ + if (d-flags (1 PCI_VGA_FLAG_ENABLE_MMIO)) { + memory_region_init(d-mmio, vga.mmio, 4096); + memory_region_init_io(d-ioport, pci_vga_ioport_ops, d, + vga ioports remapped, PCI_VGA_IOPORT_SIZE); + memory_region_init_io(d-bochs, pci_vga_bochs_ops, d, + bochs dispi interface, PCI_VGA_BOCHS_SIZE); + + memory_region_add_subregion(d-mmio, PCI_VGA_IOPORT_OFFSET, + d-ioport); + memory_region_add_subregion(d-mmio, PCI_VGA_BOCHS_OFFSET, + d-bochs); + pci_register_bar(d-dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, d-mmio); + } + if (!dev-rom_bar) { /* compatibility with pc-0.13 and older */ vga_init_vbe(s, pci_address_space(dev)); @@ -77,6 +173,7 @@ DeviceState *pci_vga_init(PCIBus *bus) static Property vga_pci_properties[] = { DEFINE_PROP_UINT32(vgamem_mb, PCIVGAState, vga.vram_size_mb, 16), +DEFINE_PROP_BIT(mmio, PCIVGAState, flags, PCI_VGA_FLAG_ENABLE_MMIO, true), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/vga.c b/hw/vga.c index ec4f0c5..053f89d 100644 --- a/hw/vga.c +++ b/hw/vga.c @@ -591,7 +591,7 @@ static uint32_t vbe_ioport_read_index(void *opaque, uint32_t addr) return val; } -static uint32_t vbe_ioport_read_data(void *opaque,
Re: [Qemu-devel] ping Re: [RFC PATCH 00/13] Embedded NBD server
Am 18.09.2012 11:48, schrieb Paolo Bonzini: Il 18/09/2012 11:40, Kevin Wolf ha scritto: Note that after completing the refactoring, we'll only have one combined bdrv_close/delete function and so there won't be BlockDriverStates that are closed. In this case, I think it's quite obvious that not closing the filters wouldn't make any sense. Does that mean that any I/O throttling must be applied again on every medium change? That would be a behavioral change. Hm, I guess so, at least on the lowest level. The only thing I know for certain is that maintaining compatibility for the old commands will be fun, but if possible at all we shouldn't let that compromise our design. Yeah, originally we had the idea of a proxy driver where you could stack all your persistent filters. The proxy driver would be needed of course for removable media, but it could also subsume things like bdrv_swap and bdrv_append. That would be filters directly for a BlockBackend, which we need in order to support filter that stay on top of snapshots. However, they only help in this specific case if we leave the same BlockBackend around across media change, which wasn't part of the plan, I think. But maybe we'll have to. Kevin
Re: [Qemu-devel] [PATCH V3 2/5] libqblock type defines
Il 18/09/2012 11:01, Wenchao Xia ha scritto: This patch contains type and defines used in APIs, one file for public usage by user, one for libqblock internal usage. Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com --- libqblock/libqblock-internal.h | 56 + libqblock/libqblock-types.h| 268 2 files changed, 324 insertions(+), 0 deletions(-) create mode 100644 libqblock/libqblock-internal.h create mode 100644 libqblock/libqblock-types.h diff --git a/libqblock/libqblock-internal.h b/libqblock/libqblock-internal.h new file mode 100644 index 000..87f32be --- /dev/null +++ b/libqblock/libqblock-internal.h @@ -0,0 +1,56 @@ +/* + * QEMU block layer library + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Wenchao Xia xiaw...@linux.vnet.ibm.com + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef LIBQBLOCK_INTERNAL +#define LIBQBLOCK_INTERNAL + +#include glib.h + +#include block.h +#include block_int.h +#include libqblock-types.h + +/* this file contains defines and types used inside the library. */ + +#define FUNC_FREE(p) g_free((p)) +#define FUNC_MALLOC(size) g_malloc((size)) +#define FUNC_CALLOC(nmemb, size) g_malloc0((nmemb)*(size)) + +#define CLEAN_FREE(p) { \ +FUNC_FREE(p); \ +(p) = NULL; \ +} + +/* details should be hidden to user */ +struct QBlockState { +BlockDriverState *bdrvs; +/* internal used file name now, if it is not NULL, it means + image was opened. +*/ +char *filename; +} ; + +struct QBroker { +/* last error */ +GError *g_error; +int err_ret; /* 1st level of error, the libqblock error number */ +int err_no; /* 2nd level of error, errno what below reports */ +}; Sorry for keeping on bikeshedding---a better name for this is QBlockContext. Context is a well-known name for this kind of global object. Otherwise looks good---thanks for putting up with us! :) Paolo +#define G_LIBQBLOCK_ERROR g_libqbock_error_quark() + +static inline GQuark g_libqbock_error_quark(void) +{ +return g_quark_from_static_string(g-libqblock-error-quark); +} +#endif diff --git a/libqblock/libqblock-types.h b/libqblock/libqblock-types.h new file mode 100644 index 000..3c548b8 --- /dev/null +++ b/libqblock/libqblock-types.h @@ -0,0 +1,268 @@ +/* + * QEMU block layer library + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Wenchao Xia xiaw...@linux.vnet.ibm.com + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef LIBQBLOCK_TYPES_H +#define LIBQBLOCK_TYPES_H + +#include sys/types.h +#include stdint.h +#include stdbool.h + +#if defined(__GNUC__) __GNUC__ = 4 +#ifdef LIBQB_BUILD +#define DLL_PUBLIC __attribute__((visibility(default))) +#else +#define DLL_PUBLIC +#endif +#else +#warning : gcc compiler version 4, symbols can not be hidden. +#endif + +/* this library is designed around this core struct. */ +typedef struct QBlockState QBlockState; + +/* every thread should have a broker. */ +typedef struct QBroker QBroker; + +/* flag used in open and create */ +#define LIBQBLOCK_O_RDWR0x0002 +/* do not use the host page cache */ +#define LIBQBLOCK_O_NOCACHE 0x0020 +/* use write-back caching */ +#define LIBQBLOCK_O_CACHE_WB0x0040 +/* don't open the backing file */ +#define LIBQBLOCK_O_NO_BACKING 0x0100 +/* disable flushing on this disk */ +#define LIBQBLOCK_O_NO_FLUSH0x0200 + +#define LIBQBLOCK_O_CACHE_MASK \ + (LIBQBLOCK_O_NOCACHE | LIBQBLOCK_O_CACHE_WB | LIBQBLOCK_O_NO_FLUSH) + +#define LIBQBLOCK_O_VALID_MASK \ + (LIBQBLOCK_O_RDWR | LIBQBLOCK_O_NOCACHE | LIBQBLOCK_O_CACHE_WB | \ +LIBQBLOCK_O_NO_BACKING | LIBQBLOCK_O_NO_FLUSH) + +typedef enum QBlockProtType { +QB_PROT_NONE = 0, +QB_PROT_FILE, +QB_PROT_MAX +} QBlockProtType; + +typedef struct QBlockProtOptionFile { +const char *filename; +} QBlockProtOptionFile; + +#define QBLOCK_PROT_OPTIONS_UNION_SIZE (512) +typedef union QBlockProtOptionsUnion { +QBlockProtOptionFile o_file; +uint8_t reserved[QBLOCK_PROT_OPTIONS_UNION_SIZE]; +} QBlockProtOptionsUnion; + +/** + * struct QBlockProtInfo: contains information about how to find the image + * + * @prot_type: protocol type, now only support FILE. + * @prot_op: protocol related options. + */ +typedef struct QBlockProtInfo { +QBlockProtType prot_type; +QBlockProtOptionsUnion prot_op; +} QBlockProtInfo; + + +/* format related options */ +typedef enum QBlockFmtType { +QB_FMT_NONE = 0, +QB_FMT_COW, +QB_FMT_QED, +QB_FMT_QCOW, +QB_FMT_QCOW2, +QB_FMT_RAW, +QB_FMT_RBD, +QB_FMT_SHEEPDOG, +
Re: [Qemu-devel] [PATCH V3 1/5] libqblock build system
Il 18/09/2012 11:01, Wenchao Xia ha scritto: Libqblock was placed in new directory ./libqblock, libtool will build dynamic library there, source files of block layer remains in ./block. So block related source code will generate 3 sets of binary, first is old ones used in qemu, second and third are non PIC and PIC ones in ./libqblock. GCC compiler flag visibility=hidden was used with special macro, to export only symbols that was marked as PUBLIC. Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com --- Makefile| 13 - Makefile.objs |6 libqblock/Makefile | 64 +++ 3 files changed, 82 insertions(+), 1 deletions(-) create mode 100644 libqblock/Makefile create mode 100644 libqblock/libqblock-error.c create mode 100644 libqblock/libqblock.c diff --git a/Makefile b/Makefile index 971e92f..b0b9b8d 100644 --- a/Makefile +++ b/Makefile @@ -164,6 +164,17 @@ qemu-io$(EXESUF): qemu-io.o cmd.o $(tools-obj-y) $(block-obj-y) qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o +## +# Support building shared library libqblock +ifeq ($(LIBTOOL),) +$(libqblock-lib-la): + @echo libtool is missing, please install and rerun configure; exit 1 +else +$(libqblock-lib-la): + $(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C libqblock V=$(V) TARGET_DIR=$*/ $(libqblock-lib-la),) +endif +### + vscclient$(EXESUF): $(libcacard-y) $(oslib-obj-y) $(trace-obj-y) $(tools-obj-y) qemu-timer-common.o libcacard/vscclient.o $(call quiet-command,$(CC) $(LDFLAGS) -o $@ $^ $(libcacard_libs) $(LIBS), LINK $@) @@ -227,7 +238,7 @@ clean: rm -rf qapi-generated rm -rf qga/qapi-generated $(MAKE) -C tests/tcg clean - for d in $(ALL_SUBDIRS) $(QEMULIBS) libcacard; do \ + for d in $(ALL_SUBDIRS) $(QEMULIBS) libcacard libqblock; do \ if test -d $$d; then $(MAKE) -C $$d $@ || exit 1; fi; \ rm -f $$d/qemu-options.def; \ done diff --git a/Makefile.objs b/Makefile.objs index 4412757..8a4c9fc 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -248,3 +248,9 @@ nested-vars += \ common-obj-y \ extra-obj-y dummy := $(call unnest-vars) + +# +# libqblock + +libqblock-lib-la = libqblock.la +libqblock-lib-path = libqblock diff --git a/libqblock/Makefile b/libqblock/Makefile new file mode 100644 index 000..bf7abcc --- /dev/null +++ b/libqblock/Makefile @@ -0,0 +1,64 @@ +### +# libqblock Makefile +# Todo: +#1 trace related files is generated in this directory, move +# them to the root directory. +## +-include ../config-host.mak +-include $(SRC_PATH)/Makefile.objs +-include $(SRC_PATH)/rules.mak + +# +# Library settings +# +$(call set-vpath, $(SRC_PATH)) + +#expand the foldered vars,especially ./block +dummy := $(call unnest-vars-1) + +#library objects +tools-obj-y = $(oslib-obj-y) $(trace-obj-y) qemu-tool.o qemu-timer.o \ + qemu-timer-common.o main-loop.o notify.o \ + iohandler.o cutils.o iov.o async.o +tools-obj-$(CONFIG_POSIX) += compatfd.o Do you really need all of these? (BTW, I posted recently a patch to move tools-obj-y to Makefile.objs. It doesn't apply anymore, I'll repost---but the conflicts are trivial). +libqblock-y=libqblock.o libqblock-error.o +libqblock-lib-y=$(patsubst %.o,%.lo,$(libqblock-y)) + +QEMU_OBJS=$(tools-obj-y) $(block-obj-y) +QEMU_OBJS_FILTERED=$(filter %.o,$(QEMU_OBJS)) What does this filter out? Paolo +QEMU_OBJS_LIB=$(patsubst %.o, %.lo,$(QEMU_OBJS_FILTERED)) + +QEMU_CFLAGS+= -I../ -I../include +#adding magic macro define for symbol hiding and exposing +QEMU_CFLAGS+= -fvisibility=hidden -D LIBQB_BUILD + +#dependency libraries +LIBS+=-lz $(LIBS_TOOLS) + +# +# Runtime rules +# +clean: + rm -f *.lo *.o *.d *.la libqblock-test trace.c trace.c-timestamp + rm -rf .libs block trace + +all: libqblock-test + @true + +help: + @echo type make libqblock-test at root dirtory, libtool is required + +#make dir block at runtime which would hold the output of block/*.c +block: + @mkdir block + +ifeq ($(LIBTOOL),) +$(libqblock-lib-la): + @echo libtool is missing, please install and rerun configure; exit 1 +else +$(libqblock-lib-la): $(libqblock-lib-y)
Re: [Qemu-devel] [PATCH V3 4/5] libqblock test build system
Il 18/09/2012 11:01, Wenchao Xia ha scritto: Created a new directory in tests, make chekc-libqblock will build an executable binrary, make clean will delete it. Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com --- .gitignore |1 + Makefile |1 + tests/Makefile |3 +++ tests/libqblock/Makefile | 32 tests/libqblock/libqblock-test.c |4 5 files changed, 41 insertions(+), 0 deletions(-) create mode 100644 tests/libqblock/Makefile create mode 100644 tests/libqblock/libqblock-test.c diff --git a/.gitignore b/.gitignore index 824c0d2..eccb637 100644 --- a/.gitignore +++ b/.gitignore @@ -95,3 +95,4 @@ cscope.* tags TAGS *~ +tests/libqblock/*.bin diff --git a/Makefile b/Makefile index b0b9b8d..de8ea17 100644 --- a/Makefile +++ b/Makefile @@ -238,6 +238,7 @@ clean: rm -rf qapi-generated rm -rf qga/qapi-generated $(MAKE) -C tests/tcg clean + $(MAKE) -C tests/libqblock clean for d in $(ALL_SUBDIRS) $(QEMULIBS) libcacard libqblock; do \ if test -d $$d; then $(MAKE) -C $$d $@ || exit 1; fi; \ rm -f $$d/qemu-options.def; \ diff --git a/tests/Makefile b/tests/Makefile index 26a67ce..69af1e2 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -148,4 +148,7 @@ check-unit: $(patsubst %,check-%, $(check-unit-y)) check-block: $(patsubst %,check-%, $(check-block-y)) check: check-unit check-qtest +check-libqblock: + $(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C tests/libqblock V=$(V) TARGET_DIR=$*/ check-libqblock,) Please just put everything in tests/Makefile. make check should run it if LIBTOOL is available. +libqblock-test.bin: $(libqblock-test-objs) $(libqblock-la-path) + $(call quiet-command,$(LIBTOOL) --mode=link --quiet --tag=CC $(CC) -shared -rpath $(libdir) -o $@ $^, lt LINK $@) .bin looks so MS-DOS. :) Paolo
Re: [Qemu-devel] [PATCH V3 5/5] libqblock test example code
Il 18/09/2012 11:01, Wenchao Xia ha scritto: In this example, user first create two qcow2 images, and then get the backing file relationship information of them. Then does write and read sync IO on them. Please use gtest so that this can be easily extensible. Paolo Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com --- tests/libqblock/libqblock-test.c | 233 ++ 1 files changed, 233 insertions(+), 0 deletions(-) diff --git a/tests/libqblock/libqblock-test.c b/tests/libqblock/libqblock-test.c index c05c0c4..c0b7963 100644 --- a/tests/libqblock/libqblock-test.c +++ b/tests/libqblock/libqblock-test.c @@ -1,4 +1,237 @@ +/* + * QEMU block layer library test + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Wenchao Xia xiaw...@linux.vnet.ibm.com + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include stdarg.h +#include stdio.h +#include unistd.h +#include inttypes.h +#include string.h +#include stdlib.h +#include libqblock.h + +#define TEST_BUF_SIZE 1024 +static unsigned char buf_r[TEST_BUF_SIZE]; +static unsigned char buf_w[TEST_BUF_SIZE] = {0, 0, 0, 0}; + +typedef struct VerifyData { +unsigned char *buf_r; +unsigned char *buf_w; +int len; +} VerifyData; + +static void print_loc(QBlockProtInfo *loc) +{ +if (loc == NULL) { +printf(backing file is NULL.); +return; +} +switch (loc-prot_type) { +case QB_PROT_NONE: +printf(protocol type [none].); +break; +case QB_PROT_FILE: +printf(protocol type [file], filename [%s]., + loc-prot_op.o_file.filename); +break; +default: +printf(protocol type not supported.); +break; +} +} + +static void print_info_image_static(QBlockStaticInfo *info) +{ +printf(===image location:\n); +print_loc(info-loc); +printf(\nvirtual_size % PRId64 , format type %d [%s], + *(info-member_addr-virt_size), + info-fmt.fmt_type, qb_fmttype2str(info-fmt.fmt_type)); +printf(\nbacking image location:\n); +print_loc(info-member_addr-backing_loc); +printf(\n); +} + +static void test_check(VerifyData *vdata) +{ +int cmp; +cmp = memcmp(vdata-buf_r, vdata-buf_w, vdata-len); +if (cmp == 0) { +printf(compare succeed, %d.\n, vdata-buf_r[24]); +} else { +printf(!!! compare fail, %d.\n, vdata-buf_r[24]); +exit(1); +} +} + int main(int argc, char **argv) { +const char *filename1, *filename2; +QBroker *broker = NULL; +QBlockState *qbs = NULL; +QBlockProtInfo *ol = NULL; +QBlockFmtInfo *of = NULL; +QBlockStaticInfo *info_st = NULL; +int ret, flag; +int test_offset = 510; +int test_len = 520; +VerifyData vdata; +char err_str[1024]; + +vdata.buf_r = buf_r; +vdata.buf_w = buf_w; +vdata.len = test_len; + +filename1 = ./qemu_image1; +filename2 = ./qemu_image2; +printf(qemu test, filename1 is %s, filename2 is %s.\n, + filename1, filename2); + +ret = qb_broker_new(broker); +if (ret 0) { +goto free; +} + +ret = qb_state_new(broker, qbs); +if (ret 0) { +goto free; +} + +ret = qb_prot_info_new(broker, ol); +if (ret 0) { +goto free; +} + +ret = qb_fmt_info_new(broker, of); +if (ret 0) { +goto free; +} + +/* create a new image */ + +ol-prot_type = QB_PROT_FILE; +ol-prot_op.o_file.filename = filename2; +of-fmt_type = QB_FMT_QCOW2; +of-fmt_op.o_qcow2.virt_size = 100 * 1024; +flag = 0; + +ret = qb_create(broker, qbs, ol, of, flag); +if (ret 0) { +qb_error_get_human_str(broker, err_str, sizeof(err_str)); +printf(create fail 1. %s.\n, err_str); +goto unlink; +} + +ol-prot_type = QB_PROT_FILE; +ol-prot_op.o_file.filename = filename1; +of-fmt_type = QB_FMT_QCOW2; +of-fmt_op.o_qcow2.backing_loc.prot_type = QB_PROT_FILE; +of-fmt_op.o_qcow2.backing_loc.prot_op.o_file.filename = filename2; +flag = 0; +ret = qb_create(broker, qbs, ol, of, flag); +if (ret 0) { +qb_error_get_human_str(broker, err_str, sizeof(err_str)); +printf(create fail 2. %s.\n, err_str); +goto unlink; +} + +/* get informations */ +ol-prot_type = QB_PROT_FILE; +ol-prot_op.o_file.filename = filename1; +of-fmt_type = QB_FMT_NONE; +flag = LIBQBLOCK_O_NO_BACKING; +ret = qb_open(broker, qbs, ol, of, flag); +if (ret 0) { +qb_error_get_human_str(broker, err_str, sizeof(err_str)); +printf(info getting, open failed. %s.\n, err_str); +goto free; +} + +while (1) { +ret =
Re: [Qemu-devel] [PATCH 0/2] QEMU/xen: simplify cpu_ioreq_pio and cpu_ioreq_move
On Tue, 18 Sep 2012, Xu, Dongxiao wrote: Hi Stefano, Is these patches merged with Xen 4.2? I didn't see them in the upstream. The uint/int fix is critical to fix the nested guest boot issue. They are not. Ian decided that he wanted to merge a different version of them.
Re: [Qemu-devel] Block Migration Assertion in qemu-kvm 1.2.0
On 09/17/12 22:12, Peter Lieven wrote: On 09/17/12 10:41, Kevin Wolf wrote: Am 16.09.2012 12:13, schrieb Peter Lieven: Hi, when trying to block migrate a VM from one node to another, the source VM crashed with the following assertion: block.c:3829: bdrv_set_in_use: Assertion `bs-in_use != in_use' failed. Is this sth already addresses/known? Not that I'm aware of, at least. Block migration doesn't seem to check whether the device is already in use, maybe this is the problem. Not sure why it would be in use, though, and in my quick test it didn't crash. So we need some more information: What's you command line, did you do anything specific in the monitor with block devices, what does the stacktrace look like, etc.? kevin, it seems that i can very easily force a crash if I cancel a running block migration. if I understand correctly what happens there are aio callbacks coming in after blk_mig_cleanup() has been called. what is the proper way to detect this in blk_mig_read_cb()? Thanks, Peter
Re: [Qemu-devel] Block Migration Assertion in qemu-kvm 1.2.0
Am 18.09.2012 12:28, schrieb Peter Lieven: On 09/17/12 22:12, Peter Lieven wrote: On 09/17/12 10:41, Kevin Wolf wrote: Am 16.09.2012 12:13, schrieb Peter Lieven: Hi, when trying to block migrate a VM from one node to another, the source VM crashed with the following assertion: block.c:3829: bdrv_set_in_use: Assertion `bs-in_use != in_use' failed. Is this sth already addresses/known? Not that I'm aware of, at least. Block migration doesn't seem to check whether the device is already in use, maybe this is the problem. Not sure why it would be in use, though, and in my quick test it didn't crash. So we need some more information: What's you command line, did you do anything specific in the monitor with block devices, what does the stacktrace look like, etc.? kevin, it seems that i can very easily force a crash if I cancel a running block migration. if I understand correctly what happens there are aio callbacks coming in after blk_mig_cleanup() has been called. what is the proper way to detect this in blk_mig_read_cb()? You could try this, it doesn't detect the situation in blk_mig_read_cb(), but ensures that all callbacks happen before we do the actual cleanup (completely untested): diff --git a/block-migration.c b/block-migration.c index 7def8ab..ed93301 100644 --- a/block-migration.c +++ b/block-migration.c @@ -519,6 +519,8 @@ static void blk_mig_cleanup(void) BlkMigDevState *bmds; BlkMigBlock *blk; +bdrv_drain_all(); + set_dirty_tracking(0); while ((bmds = QSIMPLEQ_FIRST(block_mig_state.bmds_list)) != NULL) {
Re: [Qemu-devel] [PATCH V3 1/5] libqblock build system
Il 18/09/2012 11:01, Wenchao Xia ha scritto: Libqblock was placed in new directory ./libqblock, libtool will build dynamic library there, source files of block layer remains in ./block. So block related source code will generate 3 sets of binary, first is old ones used in qemu, second and third are non PIC and PIC ones in ./libqblock. GCC compiler flag visibility=hidden was used with special macro, to export only symbols that was marked as PUBLIC. Signed-off-by: Wenchao Xia xiaw...@linux.vnet.ibm.com --- Makefile| 13 - Makefile.objs |6 libqblock/Makefile | 64 +++ 3 files changed, 82 insertions(+), 1 deletions(-) create mode 100644 libqblock/Makefile create mode 100644 libqblock/libqblock-error.c create mode 100644 libqblock/libqblock.c diff --git a/Makefile b/Makefile index 971e92f..b0b9b8d 100644 --- a/Makefile +++ b/Makefile @@ -164,6 +164,17 @@ qemu-io$(EXESUF): qemu-io.o cmd.o $(tools-obj-y) $(block-obj-y) qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o +## +# Support building shared library libqblock +ifeq ($(LIBTOOL),) +$(libqblock-lib-la): + @echo libtool is missing, please install and rerun configure; exit 1 +else +$(libqblock-lib-la): + $(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C libqblock V=$(V) TARGET_DIR=$*/ $(libqblock-lib-la),) +endif +### + vscclient$(EXESUF): $(libcacard-y) $(oslib-obj-y) $(trace-obj-y) $(tools-obj-y) qemu-timer-common.o libcacard/vscclient.o $(call quiet-command,$(CC) $(LDFLAGS) -o $@ $^ $(libcacard_libs) $(LIBS), LINK $@) @@ -227,7 +238,7 @@ clean: rm -rf qapi-generated rm -rf qga/qapi-generated $(MAKE) -C tests/tcg clean - for d in $(ALL_SUBDIRS) $(QEMULIBS) libcacard; do \ + for d in $(ALL_SUBDIRS) $(QEMULIBS) libcacard libqblock; do \ if test -d $$d; then $(MAKE) -C $$d $@ || exit 1; fi; \ rm -f $$d/qemu-options.def; \ done diff --git a/Makefile.objs b/Makefile.objs index 4412757..8a4c9fc 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -248,3 +248,9 @@ nested-vars += \ common-obj-y \ extra-obj-y dummy := $(call unnest-vars) + +# +# libqblock + +libqblock-lib-la = libqblock.la +libqblock-lib-path = libqblock diff --git a/libqblock/Makefile b/libqblock/Makefile new file mode 100644 index 000..bf7abcc --- /dev/null +++ b/libqblock/Makefile @@ -0,0 +1,64 @@ +### +# libqblock Makefile +# Todo: +#1 trace related files is generated in this directory, move +# them to the root directory. +## +-include ../config-host.mak +-include $(SRC_PATH)/Makefile.objs +-include $(SRC_PATH)/rules.mak + +# +# Library settings +# +$(call set-vpath, $(SRC_PATH)) + +#expand the foldered vars,especially ./block +dummy := $(call unnest-vars-1) + +#library objects +tools-obj-y = $(oslib-obj-y) $(trace-obj-y) qemu-tool.o qemu-timer.o \ + qemu-timer-common.o main-loop.o notify.o \ + iohandler.o cutils.o iov.o async.o +tools-obj-$(CONFIG_POSIX) += compatfd.o Do you really need all of these? (BTW, I posted recently a patch to move tools-obj-y to Makefile.objs. It doesn't apply anymore, I'll repost---but the conflicts are trivial). +libqblock-y=libqblock.o libqblock-error.o +libqblock-lib-y=$(patsubst %.o,%.lo,$(libqblock-y)) + +QEMU_OBJS=$(tools-obj-y) $(block-obj-y) +QEMU_OBJS_FILTERED=$(filter %.o,$(QEMU_OBJS)) What does this filter out? +QEMU_OBJS_LIB=$(patsubst %.o, %.lo,$(QEMU_OBJS_FILTERED)) + +QEMU_CFLAGS+= -I../ -I../include +#adding magic macro define for symbol hiding and exposing +QEMU_CFLAGS+= -fvisibility=hidden -D LIBQB_BUILD + +#dependency libraries +LIBS+=-lz $(LIBS_TOOLS) + +# +# Runtime rules +# +clean: + rm -f *.lo *.o *.d *.la libqblock-test trace.c trace.c-timestamp + rm -rf .libs block trace + +all: libqblock-test Do not put yet the libqblock-test rules in here, please. Paolo + @true + +help: + @echo type make libqblock-test at root dirtory, libtool is required + +#make dir block at runtime which would hold the output of block/*.c +block: + @mkdir block + +ifeq ($(LIBTOOL),) +$(libqblock-lib-la): + @echo libtool is missing, please install and rerun configure;
Re: [Qemu-devel] [RfC PATCH] vga: add mmio bar to standard vga
On Tue, 2012-09-18 at 11:51 +0200, Gerd Hoffmann wrote: This patch adds a mmio bar to the qemu standard vga which allows to access the standard vga registers and bochs dispi interface registers via mmio. I had a patch like that somewhere (or is that it ? :-) I dropped it in favor of a more interesting approach doing a virtio-vga, which Anthony and I have been hacking on a bit, but due to time constraints haven't really finished at this point. In any case, I'm fine with this patch but does it help anybody ? Cheers, Ben. Cc: Benjamin Herrenschmidt b...@kernel.crashing.org Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/vga-pci.c | 97 ++ hw/vga.c |6 ++-- hw/vga_int.h |6 +++ 3 files changed, 106 insertions(+), 3 deletions(-) diff --git a/hw/vga-pci.c b/hw/vga-pci.c index 9abbada..e05e2ef 100644 --- a/hw/vga-pci.c +++ b/hw/vga-pci.c @@ -30,9 +30,36 @@ #include qemu-timer.h #include loader.h +/* + * QEMU Standard VGA -- MMIO area spec. + * + * Using PCI bar #2, keeping #1 free, which leaves the + * door open to upgrade bar #0 to 64bit. + * + * mmio area layout: + * 0x - 0x03ff reserved, for possible virtio extension. + * 0x0400 - 0x041f vga ioports (0x3c0 - 0x3df), remapped 1:1 + * 0x0500 - 0x0515 bochs dispi interface registers, mapped flat without + * index/data ports. Use (index 1) as offset for + * (16bit) register access. + */ +#define PCI_VGA_IOPORT_OFFSET 0x400 +#define PCI_VGA_IOPORT_SIZE (0x3e0 - 0x3c0) +#define PCI_VGA_BOCHS_OFFSET 0x500 +#define PCI_VGA_BOCHS_SIZE(0x0b * 2) +#define PCI_VGA_MMIO_SIZE 0x1000 + +enum vga_pci_flags { +PCI_VGA_FLAG_ENABLE_MMIO = 1, +}; + typedef struct PCIVGAState { PCIDevice dev; VGACommonState vga; +uint32_t flags; +MemoryRegion mmio; +MemoryRegion ioport; +MemoryRegion bochs; } PCIVGAState; static const VMStateDescription vmstate_vga_pci = { @@ -47,6 +74,60 @@ static const VMStateDescription vmstate_vga_pci = { } }; +static uint64_t pci_vga_ioport_read(void *ptr, target_phys_addr_t addr, +unsigned size) +{ +PCIVGAState *d = ptr; +return vga_ioport_read(d-vga, addr); +} + +static void pci_vga_ioport_write(void *ptr, target_phys_addr_t addr, + uint64_t val, unsigned size) +{ +PCIVGAState *d = ptr; +vga_ioport_write(d-vga, addr, val); +} + +static const MemoryRegionOps pci_vga_ioport_ops = { +.read = pci_vga_ioport_read, +.write = pci_vga_ioport_write, +.valid.min_access_size = 1, +.valid.max_access_size = 4, +.impl.min_access_size = 1, +.impl.max_access_size = 1, +.endianness = DEVICE_LITTLE_ENDIAN, +}; + +static uint64_t pci_vga_bochs_read(void *ptr, target_phys_addr_t addr, + unsigned size) +{ +PCIVGAState *d = ptr; +int index = addr 1; + +vbe_ioport_write_index(d-vga, 0, index); +return vbe_ioport_read_data(d-vga, 0); +} + +static void pci_vga_bochs_write(void *ptr, target_phys_addr_t addr, +uint64_t val, unsigned size) +{ +PCIVGAState *d = ptr; +int index = addr 1; + +vbe_ioport_write_index(d-vga, 0, index); +vbe_ioport_write_data(d-vga, 0, val); +} + +static const MemoryRegionOps pci_vga_bochs_ops = { +.read = pci_vga_bochs_read, +.write = pci_vga_bochs_write, +.valid.min_access_size = 1, +.valid.max_access_size = 4, +.impl.min_access_size = 2, +.impl.max_access_size = 2, +.endianness = DEVICE_LITTLE_ENDIAN, +}; + static int pci_vga_initfn(PCIDevice *dev) { PCIVGAState *d = DO_UPCAST(PCIVGAState, dev, dev); @@ -62,6 +143,21 @@ static int pci_vga_initfn(PCIDevice *dev) /* XXX: VGA_RAM_SIZE must be a power of two */ pci_register_bar(d-dev, 0, PCI_BASE_ADDRESS_MEM_PREFETCH, s-vram); + /* mmio bar for vga register access */ + if (d-flags (1 PCI_VGA_FLAG_ENABLE_MMIO)) { + memory_region_init(d-mmio, vga.mmio, 4096); + memory_region_init_io(d-ioport, pci_vga_ioport_ops, d, + vga ioports remapped, PCI_VGA_IOPORT_SIZE); + memory_region_init_io(d-bochs, pci_vga_bochs_ops, d, + bochs dispi interface, PCI_VGA_BOCHS_SIZE); + + memory_region_add_subregion(d-mmio, PCI_VGA_IOPORT_OFFSET, + d-ioport); + memory_region_add_subregion(d-mmio, PCI_VGA_BOCHS_OFFSET, + d-bochs); + pci_register_bar(d-dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, d-mmio); + } + if (!dev-rom_bar) { /* compatibility with pc-0.13 and older */ vga_init_vbe(s, pci_address_space(dev)); @@ -77,6 +173,7 @@
Re: [Qemu-devel] [PATCH 2/9] add unregister_displaychangelistener
On Tue, 18 Sep 2012, Gerd Hoffmann wrote: Also change the way the gui_timer is initialized: each time a displaychangelistener is registered or unregistered we'll check whenever we need a timer (due to dpy_refresh callback being present) and if so setup a timer, otherwise zap it. This way the gui timer works correctly with displaychangelisteners coming and going. Signed-off-by: Gerd Hoffmann kra...@redhat.com Acked-by: Stefano Stabellini stefano.stabell...@eu.citrix.com console.h | 10 ++ vl.c | 31 +++ 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/console.h b/console.h index 646ad4b..48fef22 100644 --- a/console.h +++ b/console.h @@ -229,9 +229,19 @@ static inline int is_buffer_shared(DisplaySurface *surface) !(surface-flags QEMU_REALPIXELS_FLAG)); } +void gui_setup_refresh(DisplayState *ds); + static inline void register_displaychangelistener(DisplayState *ds, DisplayChangeListener *dcl) { QLIST_INSERT_HEAD(ds-listeners, dcl, next); +gui_setup_refresh(ds); +} + +static inline void unregister_displaychangelistener(DisplayState *ds, +DisplayChangeListener *dcl) +{ +QLIST_REMOVE(dcl, next); +gui_setup_refresh(ds); } static inline void dpy_update(DisplayState *s, int x, int y, int w, int h) diff --git a/vl.c b/vl.c index 2a7c92a..fbb77fe 100644 --- a/vl.c +++ b/vl.c @@ -1288,6 +1288,29 @@ static void gui_update(void *opaque) qemu_mod_timer(ds-gui_timer, interval + qemu_get_clock_ms(rt_clock)); } +void gui_setup_refresh(DisplayState *ds) +{ +DisplayChangeListener *dcl; +bool need_timer = false; + +QLIST_FOREACH(dcl, ds-listeners, next) { +if (dcl-dpy_refresh != NULL) { +need_timer = true; +break; +} +} + +if (need_timer ds-gui_timer == NULL) { +ds-gui_timer = qemu_new_timer_ms(rt_clock, gui_update, ds); +qemu_mod_timer(ds-gui_timer, qemu_get_clock_ms(rt_clock)); +} +if (!need_timer ds-gui_timer != NULL) { +qemu_del_timer(ds-gui_timer); +qemu_free_timer(ds-gui_timer); +ds-gui_timer = NULL; +} +} + struct vm_change_state_entry { VMChangeStateHandler *cb; void *opaque; @@ -2350,7 +2373,6 @@ int main(int argc, char **argv, char **envp) const char *kernel_filename, *kernel_cmdline; char boot_devices[33] = cad; /* default to HD-floppy-CD-ROM */ DisplayState *ds; -DisplayChangeListener *dcl; int cyls, heads, secs, translation; QemuOpts *hda_opts = NULL, *opts, *machine_opts; QemuOptsList *olist; @@ -3698,13 +3720,6 @@ int main(int argc, char **argv, char **envp) /* display setup */ dpy_resize(ds); -QLIST_FOREACH(dcl, ds-listeners, next) { -if (dcl-dpy_refresh != NULL) { -ds-gui_timer = qemu_new_timer_ms(rt_clock, gui_update, ds); -qemu_mod_timer(ds-gui_timer, qemu_get_clock_ms(rt_clock)); -break; -} -} text_consoles_set_display(ds); if (foreach_device_config(DEV_GDB, gdbserver_start) 0) { -- 1.7.1
Re: [Qemu-devel] [PATCH -v2 2/2] make the compaction skip ahead logic robust
On Tue, Sep 18, 2012 at 09:14:55AM +0100, Richard Davies wrote: Hi Mel, Thanks for your latest patch, I attach a perf report below with this on top of all previous patches. There is still lock contention, though in a different place. 59.97% qemu-kvm [kernel.kallsyms] [k] _raw_spin_lock_irqsave | --- _raw_spin_lock_irqsave | |--99.30%-- compact_checklock_irqsave | | | |--99.98%-- compaction_alloc Ok, this just means the focus has moved to the zone-lock instead of the zone-lru_lock. This was expected to some extent. This is an additional patch that defers acquisition of the zone-lock for as long as possible. Incidentally, I checked the efficiency of compaction - i.e. how many pages scanned versus how many pages isolated and the efficiency completely sucks. It must be addressed but addressing the lock contention should happen first. ---8--- mm: compaction: Acquire the zone-lock as late as possible The zone lock is required when isolating pages to allocate and for checking PageBuddy. It is a coarse-grained lock but the current implementation acquires the lock when examining each pageblock before it is known if there are free pages to isolate. This patch defers acquiring the zone lock for as long as possible. In the event there are no free pages in the pageblock then the lock will not be acquired at all. Signed-off-by: Mel Gorman mgor...@suse.de --- mm/compaction.c | 80 --- 1 file changed, 47 insertions(+), 33 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index a5d698f..57ff9ef 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -89,19 +89,14 @@ static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags, return true; } -static inline bool compact_trylock_irqsave(spinlock_t *lock, - unsigned long *flags, struct compact_control *cc) -{ - return compact_checklock_irqsave(lock, flags, false, cc); -} - /* * Isolate free pages onto a private freelist. Caller must hold zone-lock. * If @strict is true, will abort returning 0 on any invalid PFNs or non-free * pages inside of the pageblock (even though it may still end up isolating * some pages). */ -static unsigned long isolate_freepages_block(unsigned long start_pfn, +static unsigned long isolate_freepages_block(struct compact_control *cc, + unsigned long start_pfn, unsigned long end_pfn, struct list_head *freelist, bool strict) @@ -109,6 +104,8 @@ static unsigned long isolate_freepages_block(unsigned long start_pfn, int nr_scanned = 0, total_isolated = 0; unsigned long blockpfn = start_pfn; struct page *cursor; + unsigned long flags; + bool locked = false; cursor = pfn_to_page(blockpfn); @@ -117,18 +114,29 @@ static unsigned long isolate_freepages_block(unsigned long start_pfn, int isolated, i; struct page *page = cursor; - if (!pfn_valid_within(blockpfn)) { - if (strict) - return 0; - continue; - } + if (!pfn_valid_within(blockpfn)) + goto strict_check; nr_scanned++; - if (!PageBuddy(page)) { - if (strict) - return 0; - continue; - } + if (!PageBuddy(page)) + goto strict_check; + + /* +* The zone lock must be held to isolate freepages. This +* unfortunately this is a very coarse lock and can be +* heavily contended if there are parallel allocations +* or parallel compactions. For async compaction do not +* spin on the lock and we acquire the lock as late as +* possible. +*/ + locked = compact_checklock_irqsave(cc-zone-lock, flags, + locked, cc); + if (!locked) + break; + + /* Recheck this is a buddy page under lock */ + if (!PageBuddy(page)) + goto strict_check; /* Found a free page, break it into order-0 pages */ isolated = split_free_page(page); @@ -145,10 +153,24 @@ static unsigned long isolate_freepages_block(unsigned long start_pfn, blockpfn += isolated - 1; cursor += isolated - 1; } + + continue; + +strict_check: +
Re: [Qemu-devel] [PATCH 1/2] qemu-char: BUGFIX, don't call FD_ISSET with negative fd
Am 18.09.2012 02:08, schrieb David Gibson: On Mon, Sep 17, 2012 at 01:24:51PM -0500, Anthony Liguori wrote: David Gibson da...@gibson.dropbear.id.au writes: tcp_chr_connect(), unlike for example udp_chr_update_read_handler() does not check if the fd it is using is valid (= 0) before passing it to qemu_set_fd_handler2(). If using e.g. a TCP serial port, which is not initially connected, this can result in -1 being passed to FD_ISSET, which has undefined behaviour. On x86 it seems to harmlessly return 0, but on PowerPC, it causes a fortify buffer overflow error to be thrown. This patch fixes this by putting an extra test in tcp_chr_connect(), and also adds an assert qemu_set_fd_handler2() to catch other such errors on all platforms, rather than just some. Signed-off-by: David Gibson da...@gibson.dropbear.id.au Applied. Thanks. Excellent. Fwiw, I think this one should go into the stable branch, too. ...which you indicate by cc'ing qemu-stable since that is not handled by Anthony himself. Queued for stable-0.15. Andreas -- SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg
Re: [Qemu-devel] Breakage
On Tue, 18 Sep 2012, Max Filippov wrote: On Tue, Sep 18, 2012 at 12:15 AM, Eduardo Habkost ehabk...@redhat.com wrote: On Mon, Sep 17, 2012 at 11:54:42PM +0400, malc wrote: On Mon, 17 Sep 2012, Anthony Liguori wrote: malc av1...@comtv.ru writes: Some(thing|one) broke compilation with pcspk enabled. Symptoms being: ../libhw32/hw/pcspk.o: In function `pcspk_io_write': /home/malc/x/rcs/git/qemu/hw/pcspk.c:145: undefined reference to `pit_set_gate' ../libhw32/hw/pcspk.o: In function `pcspk_io_read': /home/malc/x/rcs/git/qemu/hw/pcspk.c:130: undefined reference to `pit_get_channel_info' ../libhw32/hw/pcspk.o: In function `pcspk_callback': /home/malc/x/rcs/git/qemu/hw/pcspk.c:81: undefined reference to `pit_get_channel_info' collect2: ld returned 1 exit status Try cleaning your build directory. It builds fine for me. --target-list=xtensa-softmmu --audio-card-list=pcspk It looks like this particular configuration never worked, since xtensa-softmmu was introduced (commit cfa550c6acc6718c3f932e858366e3e1e81266d6). Other embedded platforms (I've tried cris-softmmu, lm32-softmmu, or32-softmmu, sh4-softmmu) give the same result. Does it even make sense to have a PC-speaker on a Xtensa machine? Should the machine have a i8254 PIT? I doubt that it's meaningful for any of the mentioned _architectures_, definitely not for xtensa. Condensed version of long story: I have a script that invokes QEMU's configure with tons of command line arguments (i.e. turn on most of the audio drivers, most of the audio cards, disable stuff i don't have or need; enable ccache and so forth), this script was used to configure xtensa-softmmu configure never complained but the make later bombed in the way mentioned above. To avoid this surprise result we can(should?) filter out the cards requested by the user based on contents of audio_possible_card_list (in a fashion similar to the one employed for driver list filtering) -- mailto:av1...@comtv.ru
Re: [Qemu-devel] [PATCH v2 1/2] Versatile Express: Fix NOR flash 0 address and remove flash alias
On 17 September 2012 21:07, Francesco Lavra francescolavra...@gmail.com wrote: In the A series memory map (implemented in the Cortex A15 CoreTile), the first NOR flash bank (flash 0) is mapped to address 0x0800, while address 0x can be configured as alias to either the first or the second flash bank. This patch fixes the definition of flash 0 address, and for simplicity removes the alias definition. Signed-off-by: Francesco Lavra francescolavra...@gmail.com Reviewed-by: Peter Maydell peter.mayd...@linaro.org -- PMM
[Qemu-devel] [RFC PATCH 05/16] qcow2: Allocate l2meta only for cluster allocations
Even for writes to already allocated clusters, an l2meta is allocated, though it stays effectively unused. After this patch, only allocating requests still have one. Each l2meta now describes an in-flight request that writes to clusters that are not yet hooked up in the L2 table. Signed-off-by: Kevin Wolf kw...@redhat.com --- block/qcow2-cluster.c | 23 +-- block/qcow2.c | 32 +--- block/qcow2.h |7 +-- 3 files changed, 31 insertions(+), 31 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index c4752ee..c2b59e7 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -652,9 +652,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) uint64_t cluster_offset = m-alloc_offset; trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m-nb_clusters); - -if (m-nb_clusters == 0) -return 0; +assert(m-nb_clusters 0); old_cluster = g_malloc(m-nb_clusters * sizeof(uint64_t)); @@ -856,7 +854,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, * Return 0 on success and -errno in error cases */ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, -int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta *m) +int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m) { BDRVQcowState *s = bs-opaque; int l2_index, ret, sectors; @@ -928,11 +926,6 @@ again: } /* If there is something left to allocate, do that now */ -*m = (QCowL2Meta) { -.nb_clusters= 0, -}; -qemu_co_queue_init(m-dependent_requests); - if (nb_clusters 0) { uint64_t alloc_offset; uint64_t alloc_cluster_offset; @@ -980,7 +973,9 @@ again: cluster_offset = alloc_cluster_offset; } -*m = (QCowL2Meta) { +*m = g_malloc0(sizeof(**m)); + +**m = (QCowL2Meta) { .alloc_offset = alloc_cluster_offset, .offset = alloc_offset ~(s-cluster_size - 1), .nb_clusters= nb_clusters, @@ -995,8 +990,8 @@ again: .nb_sectors = avail_sectors - nb_sectors, }, }; -qemu_co_queue_init(m-dependent_requests); -QLIST_INSERT_HEAD(s-cluster_allocs, m, next_in_flight); +qemu_co_queue_init((*m)-dependent_requests); +QLIST_INSERT_HEAD(s-cluster_allocs, *m, next_in_flight); } } @@ -1013,8 +1008,8 @@ again: return 0; fail: -if (m-nb_clusters 0) { -QLIST_REMOVE(m, next_in_flight); +if (*m (*m)-nb_clusters 0) { +QLIST_REMOVE(*m, next_in_flight); } return ret; } diff --git a/block/qcow2.c b/block/qcow2.c index a98e899..c0a2822 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -778,8 +778,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, while (remaining_sectors != 0) { -l2meta = g_malloc0(sizeof(*l2meta)); -qemu_co_queue_init(l2meta-dependent_requests); +l2meta = NULL; trace_qcow2_writev_start_part(qemu_coroutine_self()); index_in_cluster = sector_num (s-cluster_sectors - 1); @@ -790,7 +789,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, } ret = qcow2_alloc_cluster_offset(bs, sector_num 9, -index_in_cluster, n_end, cur_nr_sectors, cluster_offset, l2meta); +index_in_cluster, n_end, cur_nr_sectors, cluster_offset, l2meta); if (ret 0) { goto fail; } @@ -836,14 +835,16 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, goto fail; } -ret = qcow2_alloc_cluster_link_l2(bs, l2meta); -if (ret 0) { -goto fail; -} +if (l2meta != NULL) { +ret = qcow2_alloc_cluster_link_l2(bs, l2meta); +if (ret 0) { +goto fail; +} -run_dependent_requests(s, l2meta); -g_free(l2meta); -l2meta = NULL; +run_dependent_requests(s, l2meta); +g_free(l2meta); +l2meta = NULL; +} remaining_sectors -= cur_nr_sectors; sector_num += cur_nr_sectors; @@ -1124,11 +1125,10 @@ static int preallocate(BlockDriverState *bs) uint64_t host_offset = 0; int num; int ret; -QCowL2Meta meta; +QCowL2Meta *meta; nb_sectors = bdrv_getlength(bs) 9; offset = 0; -qemu_co_queue_init(meta.dependent_requests); while (nb_sectors) { num = MIN(nb_sectors, INT_MAX 9); @@ -1138,15 +1138,17 @@ static int preallocate(BlockDriverState *bs) return ret; } -ret = qcow2_alloc_cluster_link_l2(bs, meta); +ret = qcow2_alloc_cluster_link_l2(bs, meta); if (ret 0) { -
[Qemu-devel] [RFC PATCH 11/16] qcow2: Add error handling to the l2meta coroutine
Not exactly bisectable, but one large patch isn't much better either :-( m-error is used to allow bdrv_drain() to stop with l2meta in error state rather than go into an endless loop. Signed-off-by: Kevin Wolf kw...@redhat.com --- block/qcow2.c | 44 block/qcow2.h |3 +++ 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 2e220c7..e001436 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -771,11 +771,33 @@ static void coroutine_fn process_l2meta(void *opaque) m-sleeping = false; } +again: qemu_co_mutex_lock(s-lock); ret = qcow2_alloc_cluster_link_l2(bs, m); if (ret 0) { -/* FIXME */ +/* + * This is a nasty situation: We have already completed the allocation + * write request and returned success, so just failing it isn't + * possible. We need to make sure to return an error during the next + * flush. + * + * However, we still can't drop the l2meta because we want I/O errors + * to be recoverable e.g. after the block device has been grown or the + * network connection restored. Sleep until the next flush comes and + * then retry. + */ +s-flush_error = ret; + +qemu_co_mutex_unlock(s-lock); +qemu_co_rwlock_unlock(s-l2meta_flush); +m-sleeping = true; +m-error = true; +qemu_coroutine_yield(); +m-error = false; +m-sleeping = false; +qemu_co_rwlock_rdlock(s-l2meta_flush); +goto again; } run_dependent_requests(s, m); @@ -812,14 +834,27 @@ static bool qcow2_drain(BlockDriverState *bs) { BDRVQcowState *s = bs-opaque; QCowL2Meta *m; +bool busy = false; QLIST_FOREACH(m, s-cluster_allocs, next_in_flight) { -if (m-sleeping) { +if (m-sleeping !m-error) { qemu_coroutine_enter(m-co, NULL); } } -return !QLIST_EMPTY(s-cluster_allocs); +/* + * If there's still a sleeping l2meta, then an error must have occured. + * Don't consider l2metas in this state as busy, they only get active on + * flushes. + */ +QLIST_FOREACH(m, s-cluster_allocs, next_in_flight) { +if (!m-sleeping) { +busy = true; +break; +} +} + +return busy; } static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, @@ -1648,7 +1683,8 @@ static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) } } -ret = 0; +ret = s-flush_error; +s-flush_error = 0; fail: qemu_co_mutex_unlock(s-lock); resume_l2meta(s); diff --git a/block/qcow2.h b/block/qcow2.h index 8bf145c..1c4dc0e 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -171,6 +171,8 @@ typedef struct BDRVQcowState { CoRwlock l2meta_flush; bool in_l2meta_flush; +int flush_error; + uint32_t crypt_method; /* current crypt method, 0 if no key yet */ uint32_t crypt_method_header; AES_KEY aes_encrypt_key; @@ -250,6 +252,7 @@ typedef struct QCowL2Meta * be reentered in order to cancel the timer. */ bool sleeping; +bool error; /** Coroutine that handles delayed COW and updates L2 entry */ Coroutine *co; -- 1.7.6.5
[Qemu-devel] [RFC PATCH 12/16] qcow2: Handle dependencies earlier
Handling overlapping allocations aren't just a detail of cluster allocation. They are rather one of three ways to get the host cluster offset for a write request: 1. If a request overlaps an in-flight allocations, the cluster offset can be taken from there (this is what handle_dependencies will evolve into) or the request must just wait until the allocation has completed. Accessing the L2 is not valid in this case, it has outdated information. 2. Outside overlapping areas, check the clusters that can be written to as they are, with no COW involved. 3. If a COW is required, allocate new clusters Changing the code to reflect this doesn't change the behaviour because overlaps cannot exist for clusters that are kept in step 2. It does however make it easier for later patches to work on clusters that belong to an allocation that is still in flight. Signed-off-by: Kevin Wolf kw...@redhat.com --- block/qcow2-cluster.c | 55 ++-- block/qcow2.h |5 2 files changed, 44 insertions(+), 16 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 0f50888..4d5c3da 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -889,16 +889,10 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, uint64_t *host_offset, unsigned int *nb_clusters) { BDRVQcowState *s = bs-opaque; -int ret; trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, *host_offset, *nb_clusters); -ret = handle_dependencies(bs, guest_offset, nb_clusters); -if (ret 0) { -return ret; -} - /* Allocate new clusters */ trace_qcow2_cluster_alloc_phys(qemu_coroutine_self()); if (*host_offset == 0) { @@ -910,7 +904,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, *host_offset = cluster_offset; return 0; } else { -ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); +int ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); if (ret 0) { return ret; } @@ -950,20 +944,51 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, n_start, n_end); -/* Find L2 entry for the first involved cluster */ again: -ret = get_cluster_table(bs, offset, l2_table, l2_index); -if (ret 0) { -return ret; -} - /* * Calculate the number of clusters to look for. We stop at L2 table * boundaries to keep things simple. */ +l2_index = offset_to_l2_index(s, offset); nb_clusters = MIN(size_to_clusters(s, n_end BDRV_SECTOR_BITS), s-l2_size - l2_index); +/* + * Now start gathering as many contiguous clusters as possible: + * + * 1. Check for overlaps with in-flight allocations + * + * a) Overlap not in the first cluster - shorten this request and let + * the caller handle the rest in its next loop iteration. + * + * b) Real overlaps of two requests. Yield and restart the search for + * contiguous clusters (the situation could have changed while we + * were sleeping) + * + * c) TODO: Request starts in the same cluster as the in-flight + * allocation ends. Shorten the COW of the in-fight allocation, set + * cluster_offset to write to the same cluster and set up the right + * synchronisation between the in-flight request and the new one. + * + * 2. Count contiguous COPIED clusters. + *TODO: Consider cluster_offset if set in step 1c. + * + * 3. If the request still hasn't completed, allocate new clusters, + *considering any cluster_offset of steps 1c or 2. + */ +ret = handle_dependencies(bs, offset, nb_clusters); +if (ret == -EAGAIN) { +goto again; +} else if (ret 0) { +return ret; +} + +/* Find L2 entry for the first involved cluster */ +ret = get_cluster_table(bs, offset, l2_table, l2_index); +if (ret 0) { +return ret; +} + cluster_offset = be64_to_cpu(l2_table[l2_index]); /* @@ -1028,9 +1053,7 @@ again: /* Allocate, if necessary at a given offset in the image file */ ret = do_alloc_cluster_offset(bs, alloc_offset, alloc_cluster_offset, nb_clusters); -if (ret == -EAGAIN) { -goto again; -} else if (ret 0) { +if (ret 0) { goto fail; } diff --git a/block/qcow2.h b/block/qcow2.h index 1c4dc0e..eb94463 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -302,6 +302,11 @@ static inline int size_to_l1(BDRVQcowState *s, int64_t size) return (size + (1ULL shift) -
[Qemu-devel] [RFC PATCH 10/16] qcow2: Delay the COW
Signed-off-by: Kevin Wolf kw...@redhat.com --- block/qcow2-cluster.c | 29 + block/qcow2.c | 31 --- block/qcow2.h | 10 ++ 3 files changed, 67 insertions(+), 3 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index a89d68d..0f50888 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -791,6 +791,34 @@ out: return i; } +struct KickL2Meta { +QEMUBH *bh; +QCowL2Meta *m; +}; + +static void kick_l2meta_bh(void *opaque) +{ +struct KickL2Meta *k = opaque; +QCowL2Meta *m = k-m; + +qemu_bh_delete(k-bh); +free(k); + +if (m-sleeping) { +qemu_coroutine_enter(m-co, NULL); +} +} + +static void kick_l2meta(QCowL2Meta *m) +{ +struct KickL2Meta *k = g_malloc(sizeof(*k)); + +k-bh = qemu_bh_new(kick_l2meta_bh, k); +k-m = m; + +qemu_bh_schedule(k-bh); +} + /* * Check if there already is an AIO write request in flight which allocates * the same cluster. In this case we need to wait until the previous @@ -823,6 +851,7 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, /* Wait for the dependency to complete. We need to recheck * the free/allocated clusters when we continue. */ qemu_co_mutex_unlock(s-lock); +kick_l2meta(old_alloc); qemu_co_queue_wait(old_alloc-dependent_requests); qemu_co_mutex_lock(s-lock); return -EAGAIN; diff --git a/block/qcow2.c b/block/qcow2.c index f9881d0..2e220c7 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -765,6 +765,12 @@ static void coroutine_fn process_l2meta(void *opaque) BDRVQcowState *s = bs-opaque; int ret; +if (!s-in_l2meta_flush) { +m-sleeping = true; +co_sleep_ns(rt_clock, 100); +m-sleeping = false; +} + qemu_co_mutex_lock(s-lock); ret = qcow2_alloc_cluster_link_l2(bs, m); @@ -781,17 +787,37 @@ static void coroutine_fn process_l2meta(void *opaque) static inline coroutine_fn void stop_l2meta(BDRVQcowState *s) { +QCowL2Meta *m; + +s-in_l2meta_flush = true; +again: +QLIST_FOREACH(m, s-cluster_allocs, next_in_flight) { +if (m-sleeping) { +qemu_coroutine_enter(m-co, NULL); +/* next_in_flight link could have become invalid */ +goto again; +} +} + qemu_co_rwlock_wrlock(s-l2meta_flush); } static inline coroutine_fn void resume_l2meta(BDRVQcowState *s) { +s-in_l2meta_flush = false; qemu_co_rwlock_unlock(s-l2meta_flush); } static bool qcow2_drain(BlockDriverState *bs) { BDRVQcowState *s = bs-opaque; +QCowL2Meta *m; + +QLIST_FOREACH(m, s-cluster_allocs, next_in_flight) { +if (m-sleeping) { +qemu_coroutine_enter(m-co, NULL); +} +} return !QLIST_EMPTY(s-cluster_allocs); } @@ -876,7 +902,6 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, } if (l2meta != NULL) { -Coroutine *co; ProcessL2Meta p = { .bs = bs, .m = l2meta, @@ -886,8 +911,8 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, qemu_co_rwlock_rdlock(s-l2meta_flush); l2meta-is_written = true; -co = qemu_coroutine_create(process_l2meta); -qemu_coroutine_enter(co, p); +l2meta-co = qemu_coroutine_create(process_l2meta); +qemu_coroutine_enter(l2meta-co, p); l2meta = NULL; qemu_co_mutex_lock(s-lock); diff --git a/block/qcow2.h b/block/qcow2.h index 73dac17..8bf145c 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -169,6 +169,7 @@ typedef struct BDRVQcowState { * Writers: Anyone who requires l2meta to be flushed */ CoRwlock l2meta_flush; +bool in_l2meta_flush; uint32_t crypt_method; /* current crypt method, 0 if no key yet */ uint32_t crypt_method_header; @@ -245,6 +246,15 @@ typedef struct QCowL2Meta bool is_written; /** + * true if the request is sleeping in the COW delay and the coroutine may + * be reentered in order to cancel the timer. + */ +bool sleeping; + +/** Coroutine that handles delayed COW and updates L2 entry */ +Coroutine *co; + +/** * Requests that overlap with this allocation and wait to be restarted * when the allocating request has completed. */ -- 1.7.6.5
[Qemu-devel] [RFC PATCH 14/16] qcow2: Execute run_dependent_requests() without lock
There's no reason for run_dependent_requests() to hold s-lock, and a later patch will require that in fact the lock is not held. Also, before this patch, run_dependent_requests() not only does what its name suggests, but also removes the l2meta from the list of in-flight requests. Change this, while we're touching it. Signed-off-by: Kevin Wolf kw...@redhat.com --- block/qcow2.c | 26 +++--- 1 files changed, 15 insertions(+), 11 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index e001436..88a2020 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -739,16 +739,9 @@ fail: static void run_dependent_requests(BDRVQcowState *s, QCowL2Meta *m) { -/* Take the request off the list of running requests */ -if (m-nb_clusters != 0) { -QLIST_REMOVE(m, next_in_flight); -} - /* Restart all dependent requests */ if (!qemu_co_queue_empty(m-dependent_requests)) { -qemu_co_mutex_unlock(s-lock); qemu_co_queue_restart_all(m-dependent_requests); -qemu_co_mutex_lock(s-lock); } } @@ -800,10 +793,18 @@ again: goto again; } +qemu_co_mutex_unlock(s-lock); + +/* Take the request off the list of running requests */ +if (m-nb_clusters != 0) { +QLIST_REMOVE(m, next_in_flight); +} + +/* Meanwhile some new dependencies could have accumulated */ run_dependent_requests(s, m); + g_free(m); -qemu_co_mutex_unlock(s-lock); qemu_co_rwlock_unlock(s-l2meta_flush); } @@ -961,13 +962,16 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, ret = 0; fail: +qemu_co_mutex_unlock(s-lock); + if (l2meta != NULL) { +if (l2meta-nb_clusters != 0) { +QLIST_REMOVE(l2meta, next_in_flight); +} run_dependent_requests(s, l2meta); g_free(l2meta); } -qemu_co_mutex_unlock(s-lock); - qemu_iovec_destroy(hd_qiov); qemu_vfree(cluster_data); trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); @@ -1259,7 +1263,7 @@ static int preallocate(BlockDriverState *bs) /* There are no dependent requests, but we need to remove our request * from the list of in-flight requests */ if (meta != NULL) { -run_dependent_requests(bs-opaque, meta); +QLIST_REMOVE(meta, next_in_flight); } /* TODO Preallocate data if requested */ -- 1.7.6.5
Re: [Qemu-devel] [RfC PATCH] vga: add mmio bar to standard vga
On 09/18/12 12:32, Benjamin Herrenschmidt wrote: On Tue, 2012-09-18 at 11:51 +0200, Gerd Hoffmann wrote: This patch adds a mmio bar to the qemu standard vga which allows to access the standard vga registers and bochs dispi interface registers via mmio. I had a patch like that somewhere (or is that it ? :-) I dropped it in favor of a more interesting approach doing a virtio-vga, which Anthony and I have been hacking on a bit, but due to time constraints haven't really finished at this point. Yea, has been quiet on this front for a while, thats why I looked into this. In any case, I'm fine with this patch but does it help anybody ? Well, it gives you time to finish virtio-vga ;) I have a linux kernel driver too, although not kms/drm but fbdev. cheers, Gerd
[Qemu-devel] [RFC PATCH 07/16] qcow2: Factor out handle_dependencies()
Signed-off-by: Kevin Wolf kw...@redhat.com --- block/qcow2-cluster.c | 70 +--- 1 files changed, 42 insertions(+), 28 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 7a038ac..468ef1b 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -753,38 +753,16 @@ out: } /* - * Allocates new clusters for the given guest_offset. - * - * At most *nb_clusters are allocated, and on return *nb_clusters is updated to - * contain the number of clusters that have been allocated and are contiguous - * in the image file. - * - * If *host_offset is non-zero, it specifies the offset in the image file at - * which the new clusters must start. *nb_clusters can be 0 on return in this - * case if the cluster at host_offset is already in use. If *host_offset is - * zero, the clusters can be allocated anywhere in the image file. - * - * *host_offset is updated to contain the offset into the image file at which - * the first allocated cluster starts. - * - * Return 0 on success and -errno in error cases. -EAGAIN means that the - * function has been waiting for another request and the allocation must be - * restarted, but the whole request should not be failed. + * Check if there already is an AIO write request in flight which allocates + * the same cluster. In this case we need to wait until the previous + * request has completed and updated the L2 table accordingly. */ -static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, -uint64_t *host_offset, unsigned int *nb_clusters) +static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, +unsigned int *nb_clusters) { BDRVQcowState *s = bs-opaque; QCowL2Meta *old_alloc; -trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, - *host_offset, *nb_clusters); - -/* - * Check if there already is an AIO write request in flight which allocates - * the same cluster. In this case we need to wait until the previous - * request has completed and updated the L2 table accordingly. - */ QLIST_FOREACH(old_alloc, s-cluster_allocs, next_in_flight) { uint64_t start = guest_offset s-cluster_bits; @@ -817,6 +795,42 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, abort(); } +return 0; +} + +/* + * Allocates new clusters for the given guest_offset. + * + * At most *nb_clusters are allocated, and on return *nb_clusters is updated to + * contain the number of clusters that have been allocated and are contiguous + * in the image file. + * + * If *host_offset is non-zero, it specifies the offset in the image file at + * which the new clusters must start. *nb_clusters can be 0 on return in this + * case if the cluster at host_offset is already in use. If *host_offset is + * zero, the clusters can be allocated anywhere in the image file. + * + * *host_offset is updated to contain the offset into the image file at which + * the first allocated cluster starts. + * + * Return 0 on success and -errno in error cases. -EAGAIN means that the + * function has been waiting for another request and the allocation must be + * restarted, but the whole request should not be failed. + */ +static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, +uint64_t *host_offset, unsigned int *nb_clusters) +{ +BDRVQcowState *s = bs-opaque; +int ret; + +trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, + *host_offset, *nb_clusters); + +ret = handle_dependencies(bs, guest_offset, nb_clusters); +if (ret 0) { +return ret; +} + /* Allocate new clusters */ trace_qcow2_cluster_alloc_phys(qemu_coroutine_self()); if (*host_offset == 0) { @@ -828,7 +842,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, *host_offset = cluster_offset; return 0; } else { -int ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); +ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); if (ret 0) { return ret; } -- 1.7.6.5
[Qemu-devel] [RFC PATCH 02/16] qcow2: Introduce Qcow2COWRegion
This makes it easier to address the areas for which a COW must be performed. As a nice side effect, the COW code in qcow2_alloc_cluster_link_l2 becomes really trivial. Signed-off-by: Kevin Wolf kw...@redhat.com --- block/qcow2-cluster.c | 85 +++-- block/qcow2.h | 29 +--- 2 files changed, 77 insertions(+), 37 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index d17a37c..94b7f13 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -615,13 +615,41 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, return cluster_offset; } +static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r) +{ +BDRVQcowState *s = bs-opaque; +int ret; + +if (r-nb_sectors == 0) { +return 0; +} + +qemu_co_mutex_unlock(s-lock); +ret = copy_sectors(bs, m-offset / BDRV_SECTOR_SIZE, m-alloc_offset, + r-offset / BDRV_SECTOR_SIZE, + r-offset / BDRV_SECTOR_SIZE + r-nb_sectors); +qemu_co_mutex_lock(s-lock); + +if (ret 0) { +return ret; +} + +/* + * Before we update the L2 table to actually point to the new cluster, we + * need to be sure that the refcounts have been increased and COW was + * handled. + */ +qcow2_cache_depends_on_flush(s-l2_table_cache); + +return 0; +} + int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) { BDRVQcowState *s = bs-opaque; int i, j = 0, l2_index, ret; -uint64_t *old_cluster, start_sect, *l2_table; +uint64_t *old_cluster, *l2_table; uint64_t cluster_offset = m-alloc_offset; -bool cow = false; trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m-nb_clusters); @@ -631,36 +659,17 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) old_cluster = g_malloc(m-nb_clusters * sizeof(uint64_t)); /* copy content of unmodified sectors */ -start_sect = m-offset 9; -if (m-n_start) { -cow = true; -qemu_co_mutex_unlock(s-lock); -ret = copy_sectors(bs, start_sect, cluster_offset, 0, m-n_start); -qemu_co_mutex_lock(s-lock); -if (ret 0) -goto err; +ret = perform_cow(bs, m, m-cow_start); +if (ret 0) { +goto err; } -if (m-nb_available (s-cluster_sectors - 1)) { -cow = true; -qemu_co_mutex_unlock(s-lock); -ret = copy_sectors(bs, start_sect, cluster_offset, m-nb_available, - align_offset(m-nb_available, s-cluster_sectors)); -qemu_co_mutex_lock(s-lock); -if (ret 0) -goto err; +ret = perform_cow(bs, m, m-cow_end); +if (ret 0) { +goto err; } -/* - * Update L2 table. - * - * Before we update the L2 table to actually point to the new cluster, we - * need to be sure that the refcounts have been increased and COW was - * handled. - */ -if (cow) { -qcow2_cache_depends_on_flush(s-l2_table_cache); -} +/* Update L2 table. */ if (qcow2_need_accurate_refcounts(s)) { qcow2_cache_set_dependency(bs, s-l2_table_cache, @@ -957,19 +966,33 @@ again: * * avail_sectors: Number of sectors from the start of the first * newly allocated to the end of the last newly allocated cluster. + * + * nb_sectors: The number of sectors from the start of the first + * newly allocated cluster to the end of the aread that the write + * request actually writes to (excluding COW at the end) */ int requested_sectors = n_end - keep_clusters * s-cluster_sectors; int avail_sectors = nb_clusters (s-cluster_bits - BDRV_SECTOR_BITS); +int alloc_n_start = keep_clusters == 0 ? n_start : 0; +int nb_sectors = MIN(requested_sectors, avail_sectors); *m = (QCowL2Meta) { .cluster_offset = keep_clusters == 0 ? alloc_cluster_offset : cluster_offset, .alloc_offset = alloc_cluster_offset, .offset = alloc_offset ~(s-cluster_size - 1), -.n_start= keep_clusters == 0 ? n_start : 0, .nb_clusters= nb_clusters, -.nb_available = MIN(requested_sectors, avail_sectors), +.nb_available = nb_sectors, + +.cow_start = { +.offset = 0, +.nb_sectors = alloc_n_start, +}, +.cow_end = { +.offset = nb_sectors * BDRV_SECTOR_SIZE, +.nb_sectors = avail_sectors - nb_sectors, +}, }; qemu_co_queue_init(m-dependent_requests);
[Qemu-devel] [RFC PATCH 13/16] qcow2: Change handle_dependency to byte granularity
Signed-off-by: Kevin Wolf kw...@redhat.com --- block/qcow2-cluster.c | 26 -- block/qcow2.h | 11 +++ 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 4d5c3da..440fdbf 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -825,29 +825,29 @@ static void kick_l2meta(QCowL2Meta *m) * request has completed and updated the L2 table accordingly. */ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, -unsigned int *nb_clusters) +uint64_t bytes, unsigned int *nb_clusters) { BDRVQcowState *s = bs-opaque; QCowL2Meta *old_alloc; QLIST_FOREACH(old_alloc, s-cluster_allocs, next_in_flight) { -uint64_t start = guest_offset s-cluster_bits; -uint64_t end = start + *nb_clusters; -uint64_t old_start = old_alloc-offset s-cluster_bits; -uint64_t old_end = old_start + old_alloc-nb_clusters; +uint64_t start = guest_offset; +uint64_t end = start + bytes; +uint64_t old_start = l2meta_cow_start(old_alloc); +uint64_t old_end = l2meta_cow_end(old_alloc); -if (end old_start || start old_end) { +if (end = old_start || start = old_end) { /* No intersection */ } else { if (start old_start) { /* Stop at the start of a running allocation */ -*nb_clusters = old_start - start; +bytes = old_start - start; } else { -*nb_clusters = 0; +bytes = 0; } -if (*nb_clusters == 0) { +if (bytes == 0) { /* Wait for the dependency to complete. We need to recheck * the free/allocated clusters when we continue. */ qemu_co_mutex_unlock(s-lock); @@ -859,6 +859,9 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, } } +*nb_clusters = size_to_clusters(s, guest_offset + bytes) + - (guest_offset s-cluster_bits); + if (!*nb_clusters) { abort(); } @@ -952,6 +955,7 @@ again: l2_index = offset_to_l2_index(s, offset); nb_clusters = MIN(size_to_clusters(s, n_end BDRV_SECTOR_BITS), s-l2_size - l2_index); +n_end = MIN(n_end, nb_clusters * s-cluster_sectors); /* * Now start gathering as many contiguous clusters as possible: @@ -976,7 +980,9 @@ again: * 3. If the request still hasn't completed, allocate new clusters, *considering any cluster_offset of steps 1c or 2. */ -ret = handle_dependencies(bs, offset, nb_clusters); +ret = handle_dependencies(bs, offset, + (n_end - n_start) * BDRV_SECTOR_SIZE, + nb_clusters); if (ret == -EAGAIN) { goto again; } else if (ret 0) { diff --git a/block/qcow2.h b/block/qcow2.h index eb94463..06ca195 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -343,6 +343,17 @@ static inline uint64_t l2meta_req_end(QCowL2Meta *m) return m-offset + (m-nb_available BDRV_SECTOR_BITS); } +static inline uint64_t l2meta_cow_start(QCowL2Meta *m) +{ +return m-offset + m-cow_start.offset; +} + +static inline uint64_t l2meta_cow_end(QCowL2Meta *m) +{ +return m-offset + m-cow_end.offset ++ (m-cow_end.nb_sectors BDRV_SECTOR_BITS); +} + // FIXME Need qcow2_ prefix to global functions /* qcow2.c functions */ -- 1.7.6.5
[Qemu-devel] [RFC PATCH 15/16] qcow2: Cancel COW when overwritten
This is the first part of an optimisation to improve the performance of sequential cluster allocations. Typically, write requests aren't aligned to cluster boundaries, so sequential allocation means that every other request has to wait for the COW of the previous request to complete. We can do better: Just cancel the COW instead of waiting for it and then overwriting the same area with the second write request. Signed-off-by: Kevin Wolf kw...@redhat.com --- block/qcow2-cluster.c | 127 +++-- block/qcow2.c | 21 block/qcow2.h | 47 ++ 3 files changed, 180 insertions(+), 15 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 440fdbf..ff22992 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -659,6 +659,8 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r) BDRVQcowState *s = bs-opaque; int ret; +r-final = true; + if (r-nb_sectors == 0) { return 0; } @@ -689,6 +691,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) int i, j = 0, l2_index, ret; uint64_t *old_cluster, *l2_table; uint64_t cluster_offset = m-alloc_offset; +bool has_wr_lock = false; trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m-nb_clusters); assert(m-nb_clusters 0); @@ -707,6 +710,16 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) } /* Update L2 table. */ +qemu_co_mutex_unlock(s-lock); +qemu_co_rwlock_wrlock(m-l2_writeback_lock); +has_wr_lock = true; +qemu_co_mutex_lock(s-lock); + +if (m-no_l2_update) { +ret = 0; +goto err; +} + if (s-compatible_features QCOW2_COMPAT_LAZY_REFCOUNTS) { qcow2_mark_dirty(bs); } @@ -753,6 +766,9 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) ret = 0; err: +if (has_wr_lock) { +qemu_co_rwlock_unlock(m-l2_writeback_lock); +} g_free(old_cluster); return ret; } @@ -825,7 +841,8 @@ static void kick_l2meta(QCowL2Meta *m) * request has completed and updated the L2 table accordingly. */ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, -uint64_t bytes, unsigned int *nb_clusters) +uint64_t *host_offset, uint64_t bytes, unsigned int *nb_clusters, +QCowL2Meta **m) { BDRVQcowState *s = bs-opaque; QCowL2Meta *old_alloc; @@ -840,22 +857,96 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, if (end = old_start || start = old_end) { /* No intersection */ } else { +uint64_t new_bytes; +uint64_t old_cow_end; + + /* +* Shorten the request to stop at the start of a running +* allocation. +*/ if (start old_start) { -/* Stop at the start of a running allocation */ -bytes = old_start - start; +new_bytes = old_start - start; } else { -bytes = 0; +new_bytes = 0; +} + +if (new_bytes 0) { +bytes = new_bytes; +continue; +} + +/* + * Check if we're just overwriting some COW of the old allocation + * that is safe to be replaced by the data of this request. + */ +old_cow_end = old_alloc-offset + old_alloc-cow_end.offset; + +if ((old_end (s-cluster_size - 1)) == 0 + start = old_cow_end + !old_alloc-cow_end.final) +{ +uint64_t subcluster_offset; +int nb_sectors; + +*nb_clusters = 1; +subcluster_offset = offset_into_cluster(s, guest_offset); +nb_sectors = (subcluster_offset + bytes) BDRV_SECTOR_BITS; + +/* Move forward cluster by cluster when overwriting COW areas, + * or we'd have to deal with multiple overlapping requests and + * things would become complicated. */ +nb_sectors = MIN(s-cluster_sectors, nb_sectors); + +/* Shorten the COW area at the end of the old request */ +old_alloc-cow_end.nb_sectors = +(guest_offset - old_cow_end) BDRV_SECTOR_BITS; + +/* The new data region starts in the same cluster where the COW + * region at the end of the old request starts. */ +*host_offset = start_of_cluster(s, +old_alloc-alloc_offset + old_alloc-cow_end.offset); + +/* Create new l2meta that doesn't actually allocate new L2 + * entries, but describes the new data area so that reads + * access the right cluster */ +*m = g_malloc0(sizeof(**m)); +
[Qemu-devel] [PATCH] Added LEON MMU ASI mappings and corrected LEON3 MMU masks.
This patch adds SPARC ASI mappings that are used by the LEON processor.It also corrects the MMU context register and context table pointer mask of the LEON3. Signed-off-by: Ronald Hecht ronald.he...@gmx.de --- target-sparc/cpu.c |4 ++-- target-sparc/ldst_helper.c |6 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/target-sparc/cpu.c b/target-sparc/cpu.c index f7c004c..0d5abb8 100644 --- a/target-sparc/cpu.c +++ b/target-sparc/cpu.c @@ -583,8 +583,8 @@ static const sparc_def_t sparc_defs[] = { .fpu_version = 4 17, /* FPU version 4 (Meiko) */ .mmu_version = 0xf300, .mmu_bm = 0x, -.mmu_ctpr_mask = 0x0070, -.mmu_cxr_mask = 0x003f, +.mmu_ctpr_mask = 0xfffc, +.mmu_cxr_mask = 0x00ff, .mmu_sfsr_mask = 0x, .mmu_trcr_mask = 0x, .nwindows = 8, diff --git a/target-sparc/ldst_helper.c b/target-sparc/ldst_helper.c index 9bec7a9..684b73b 100644 --- a/target-sparc/ldst_helper.c +++ b/target-sparc/ldst_helper.c @@ -511,6 +511,7 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr, int asi, int size, #endif break; case 3: /* MMU probe */ +case 0x18: /* LEON3 MMU probe */ { int mmulev; @@ -525,6 +526,7 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr, int asi, int size, } break; case 4: /* read MMU regs */ +case 0x19: /* LEON3 read MMU regs */ { int reg = (addr 8) 0x1f; @@ -600,6 +602,7 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr, int asi, int size, case 0xf: /* D-cache data */ break; case 0x20: /* MMU passthrough */ +case 0x1c: /* LEON MMU passthrough */ switch (size) { case 1: ret = ldub_phys(addr); @@ -841,6 +844,7 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, uint64_t val, int asi, #endif break; case 3: /* MMU flush */ +case 0x18: /* LEON3 MMU flush */ { int mmulev; @@ -865,6 +869,7 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, uint64_t val, int asi, } break; case 4: /* write MMU regs */ +case 0x19: /* LEON3 write MMU regs */ { int reg = (addr 8) 0x1f; uint32_t oldreg; @@ -993,6 +998,7 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, uint64_t val, int asi, } break; case 0x20: /* MMU passthrough */ +case 0x1c: /* LEON MMU passthrough */ { switch (size) { case 1: -- 1.7.2.5
[Qemu-devel] [PATCH] Added LEON MMU ASI mappings and corrected LEON3 MMU masks.
This patch adds SPARC ASI mappings that are used by the LEON processor.It also corrects the MMU context register and context table pointer mask of the LEON3. Signed-off-by: Ronald Hecht ronald.he...@gmx.de --- target-sparc/cpu.c |4 ++-- target-sparc/ldst_helper.c |6 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/target-sparc/cpu.c b/target-sparc/cpu.c index f7c004c..0d5abb8 100644 --- a/target-sparc/cpu.c +++ b/target-sparc/cpu.c @@ -583,8 +583,8 @@ static const sparc_def_t sparc_defs[] = { .fpu_version = 4 17, /* FPU version 4 (Meiko) */ .mmu_version = 0xf300, .mmu_bm = 0x, -.mmu_ctpr_mask = 0x0070, -.mmu_cxr_mask = 0x003f, +.mmu_ctpr_mask = 0xfffc, +.mmu_cxr_mask = 0x00ff, .mmu_sfsr_mask = 0x, .mmu_trcr_mask = 0x, .nwindows = 8, diff --git a/target-sparc/ldst_helper.c b/target-sparc/ldst_helper.c index 9bec7a9..684b73b 100644 --- a/target-sparc/ldst_helper.c +++ b/target-sparc/ldst_helper.c @@ -511,6 +511,7 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr, int asi, int size, #endif break; case 3: /* MMU probe */ +case 0x18: /* LEON3 MMU probe */ { int mmulev; @@ -525,6 +526,7 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr, int asi, int size, } break; case 4: /* read MMU regs */ +case 0x19: /* LEON3 read MMU regs */ { int reg = (addr 8) 0x1f; @@ -600,6 +602,7 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr, int asi, int size, case 0xf: /* D-cache data */ break; case 0x20: /* MMU passthrough */ +case 0x1c: /* LEON MMU passthrough */ switch (size) { case 1: ret = ldub_phys(addr); @@ -841,6 +844,7 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, uint64_t val, int asi, #endif break; case 3: /* MMU flush */ +case 0x18: /* LEON3 MMU flush */ { int mmulev; @@ -865,6 +869,7 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, uint64_t val, int asi, } break; case 4: /* write MMU regs */ +case 0x19: /* LEON3 write MMU regs */ { int reg = (addr 8) 0x1f; uint32_t oldreg; @@ -993,6 +998,7 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, uint64_t val, int asi, } break; case 0x20: /* MMU passthrough */ +case 0x1c: /* LEON MMU passthrough */ { switch (size) { case 1: -- 1.7.2.5
Re: [Qemu-devel] qmp: dump-guest-memory: -p option has issues, fix it or drop it?
Jan Kiszka jan.kis...@siemens.com writes: On 2012-09-18 03:52, Wen Congyang wrote: At 09/18/2012 01:56 AM, Luiz Capitulino Wrote: Hi Wen, We've re-reviewed the dump-guest-memory command and found some possible issues with the -p option. The main issue is that it seems possible for a malicious guest to set page tables in a way that we allocate a MemoryMapping structure for each possible PTE. If IA-32e paging is used, this could lead to the allocation of dozens of gigabytes by qemu. Of course that this is not expected for the regular case, where a MemoryMapping allocation can be skipped for several reasons (I/O memory, page not present, contiguous/in same range addresses etc), but the point is what a malicious guest can do. Another problem is that the -p option seems to be broken for SMP guests. The problem is in qemu_get_guest_memory_mapping(): first_paging_enabled_cpu = find_paging_enabled_cpu(first_cpu); if (first_paging_enabled_cpu) { for (env = first_paging_enabled_cpu; env != NULL; env = env-next_cpu) { ret = cpu_get_memory_mapping(list, env); if (ret 0) { return -1; } } return 0; } This looks for the first vCPU with paging enabled, and then assumes that all the following vCPUs also have paging enabled. How does this hold? cpu_get_memory_mapping re-validates that paging is one. In fact, cpu_get_memory_mapping should handle both cases so that the generic code need not worry about paging on/off. The loop Luiz quoted is confusing. Actually, the whole function is confusing. Here's how I understand it: if there is a CPU that has paging enabled there is a proper prefix of env whose members don't have paging enabled; ignore them all [WTF#1] for all members of env not in that prefix (the suffix): get memory mapping for a CPU with paging enabled [WTF#2], and merge it into list else get memory mapping for ram_list, and merge it into list WTF#1: Why is it okay to ignore leading CPUs with paging disabled, but only if there's at least one CPU with paging enabled? WTF#2: What if a CPU in the suffix doesn't have paging enabled? Oh, the arch-specific function to get its memory map is expected to do nothing then. Bonus WTF#3: What if a guest enables/disables paging between find_paging_enabled_cpu() and the loop? What if it changes page tables while we walk them? WTF is this function supposed to do? Assuming that this last issue is fixable (ie. we can make the -p option work well with SMP guests), we should at least document that -p can make QEMU allocates lots of memory and end up being killed by the OS. However, I also think that we should consider if having the -p feature is really worth it. It's a complex feature and has a number of limitations*. If libvirt doesn't use this, dropping it shouldn't be a big deal (we can return an error when -p is used). libvirt should surely not be the only reference for debugging features. No, it's just a user, albeit an important one. We don't break known users cavalierly. * The issues discussed in this email plus the fact that the guest memory may be corrupted, and the guest may be in real-mode even when paging is enabled Yes, there are some limitations with this option. Jan said that he always use gdb to deal with vmcore, so he needs such information. The point is to overcome the focus on Linux-only dump processing tools. While I don't care for supporting alternate dump processing tools myself, I certainly don't mind supporting them, as long as the code satisfies basic safety and reliability requirements. This code doesn't, as far as I can tell. If that's correct, we should either rip it out until a satisfactory replacemnt is available, or at least document -p as unsafe and unreliable debugging feature (master stable). I'm sure the memory allocation can be avoided by writing out any found virt-phys mapping directly to the vmcore file. We know where physical RAM will be, we only need the corresponding virtual addresses - IIUC. So first prepare the section according to the guest's RAM size and then, once we identified a page while walking the tables carefully, seek to that file position and write to it. Sounds like a non-trivial change from the current code. Makes me lean towards ripping it out.
[Qemu-devel] [RFC PATCH 16/16] [BROKEN] qcow2: Overwrite COW and allocate new cluster at the same time
Cancelling COW when it's overwritten by a subsequent write request of the guest was a good start, however in practice we don't gain performance yet. The second write request is split in two, the first one containing the overwritten COW area, and the second one allocating another cluster. We can do both at the same time and then we actually do gain performance (iozone initial writer test up from 22 to 35 MB/s). Signed-off-by: Kevin Wolf kw...@redhat.com --- This patch is not correct at all and potentially corrupts images, it's ugly too, but it works good enough to try out what gains to expect, so I decided to include it here anyway. --- block/qcow2-cluster.c | 17 - block/qcow2.c | 29 ++--- block/qcow2.h |1 + 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index ff22992..39ef7b0 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -888,7 +888,6 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, uint64_t subcluster_offset; int nb_sectors; -*nb_clusters = 1; subcluster_offset = offset_into_cluster(s, guest_offset); nb_sectors = (subcluster_offset + bytes) BDRV_SECTOR_BITS; @@ -1032,7 +1031,7 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, BDRVQcowState *s = bs-opaque; int l2_index, ret, sectors; uint64_t *l2_table; -unsigned int nb_clusters, keep_clusters; +unsigned int nb_clusters, keep_clusters = 0; uint64_t cluster_offset = 0; trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, @@ -1079,17 +1078,21 @@ again: } else if (ret 0) { return ret; } else if (*m) { +/* FIXME There could be more dependencies */ keep_clusters = 1; -nb_clusters = 0; -goto done; +nb_clusters -= keep_clusters; } + /* Find L2 entry for the first involved cluster */ ret = get_cluster_table(bs, offset, l2_table, l2_index); if (ret 0) { return ret; } +if (cluster_offset != 0) { +goto do_alloc; +} cluster_offset = be64_to_cpu(l2_table[l2_index]); /* @@ -,6 +1114,7 @@ again: cluster_offset = 0; } +do_alloc: if (nb_clusters 0) { /* For the moment, overwrite compressed clusters one by one */ uint64_t entry = be64_to_cpu(l2_table[l2_index + keep_clusters]); @@ -1177,6 +1181,7 @@ again: (s-cluster_bits - BDRV_SECTOR_BITS); int alloc_n_start = keep_clusters == 0 ? n_start : 0; int nb_sectors = MIN(requested_sectors, avail_sectors); +QCowL2Meta *old_m = *m; if (keep_clusters == 0) { cluster_offset = alloc_cluster_offset; @@ -1185,6 +1190,8 @@ again: *m = g_malloc0(sizeof(**m)); **m = (QCowL2Meta) { +.next = old_m, + .alloc_offset = alloc_cluster_offset, .offset = alloc_offset ~(s-cluster_size - 1), .nb_clusters= nb_clusters, @@ -1198,6 +1205,7 @@ again: .offset = nb_sectors * BDRV_SECTOR_SIZE, .nb_sectors = avail_sectors - nb_sectors, }, + }; qemu_co_queue_init((*m)-dependent_requests); qemu_co_rwlock_init((*m)-l2_writeback_lock); @@ -1206,7 +1214,6 @@ again: } /* Some cleanup work */ -done: sectors = (keep_clusters + nb_clusters) (s-cluster_bits - 9); if (sectors n_end) { sectors = n_end; diff --git a/block/qcow2.c b/block/qcow2.c index abc3de3..e6fa616 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -959,19 +959,21 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, } if (l2meta != NULL) { -ProcessL2Meta p = { -.bs = bs, -.m = l2meta, -}; - qemu_co_mutex_unlock(s-lock); -qemu_co_rwlock_rdlock(s-l2meta_flush); -l2meta-is_written = true; -l2meta-co = qemu_coroutine_create(process_l2meta); -qemu_coroutine_enter(l2meta-co, p); +while (l2meta != NULL) { +ProcessL2Meta p = { +.bs = bs, +.m = l2meta, +}; + +qemu_co_rwlock_rdlock(s-l2meta_flush); +l2meta-is_written = true; +l2meta-co = qemu_coroutine_create(process_l2meta); +qemu_coroutine_enter(l2meta-co, p); +l2meta = l2meta-next; +} -l2meta = NULL; qemu_co_mutex_lock(s-lock); } @@ -985,12 +987,17 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, fail:
[Qemu-devel] [RFC PATCH 08/16] qcow2: Reading from areas not in L2 tables yet
In preparation of delayed COW (i.e. completing the guest write request before the associated COWs have completed) we must make sure that after the guest data has written the new data is read back, even if the COW hasn't completed and the new cluster isn't linked in the L2 table yet. Signed-off-by: Kevin Wolf kw...@redhat.com --- block/qcow2-cluster.c | 39 +++ block/qcow2.c |2 ++ block/qcow2.h | 19 +++ 3 files changed, 60 insertions(+), 0 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 468ef1b..a89d68d 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -372,6 +372,40 @@ out: return ret; } +static bool overlaps_allocation(BlockDriverState *bs, uint64_t start, +int *num, uint64_t *cluster_offset) +{ +BDRVQcowState *s = bs-opaque; +QCowL2Meta *m; +uint64_t end = start + (*num BDRV_SECTOR_BITS); + +QLIST_FOREACH(m, s-cluster_allocs, next_in_flight) { + +uint64_t old_start = l2meta_req_start(m); +uint64_t old_end = l2meta_req_end(m); + +/* If the write hasn't completed yet and the allocating request can't + * have completed yet therefore, we're free to read the old data. */ +if (!m-is_written) { +continue; +} + +if (start = old_start start old_end) { +/* Start of the new request overlaps: Read from the newly allocated + * cluster even if it isn't in the L2 table yet. */ +*num = MIN(*num, (old_end - start) BDRV_SECTOR_BITS); +*cluster_offset = m-alloc_offset ++ ((start - old_start) ~(s-cluster_size - 1)); +return true; +} else if (start old_start end old_start) { +/* Overlap somewhere after the start. Shorten this request so that + * no overlap occurs. */ +*num = MIN(*num, (old_start - start) BDRV_SECTOR_BITS); +} +} + +return false; +} /* * get_cluster_offset @@ -398,6 +432,11 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, uint64_t nb_available, nb_needed; int ret; +/* Check overlap with not yet completed allocations */ +if (overlaps_allocation(bs, offset, num, cluster_offset)) { +return QCOW2_CLUSTER_NORMAL; +} + index_in_cluster = (offset 9) (s-cluster_sectors - 1); nb_needed = *num + index_in_cluster; diff --git a/block/qcow2.c b/block/qcow2.c index 6515fdd..2e32136 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -831,6 +831,8 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, } if (l2meta != NULL) { +l2meta-is_written = true; + ret = qcow2_alloc_cluster_link_l2(bs, l2meta); if (ret 0) { goto fail; diff --git a/block/qcow2.h b/block/qcow2.h index a60fcb4..504dbad 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -229,6 +229,14 @@ typedef struct QCowL2Meta int nb_clusters; /** + * true if the guest data (but not necessarily the related COW) has been + * written to disk, so that read requests can (and after having completed + * this request actually _must_) read the new data instead of reading the + * old data that the L2 table still refers to. + */ +bool is_written; + +/** * Requests that overlap with this allocation and wait to be restarted * when the allocating request has completed. */ @@ -298,6 +306,17 @@ static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s) return !(s-incompatible_features QCOW2_INCOMPAT_DIRTY); } +static inline uint64_t l2meta_req_start(QCowL2Meta *m) +{ +return (m-offset + m-cow_start.offset) ++ (m-cow_start.nb_sectors BDRV_SECTOR_BITS); +} + +static inline uint64_t l2meta_req_end(QCowL2Meta *m) +{ +return m-offset + (m-nb_available BDRV_SECTOR_BITS); +} + // FIXME Need qcow2_ prefix to global functions /* qcow2.c functions */ -- 1.7.6.5
Re: [Qemu-devel] [Qemu-stable] [PATCH] make_device_config.sh: Fix target path in generated dependency file
Has it been applied to anything? I don't think so. Is it still needed? Thanks, /mjt On 07.06.2012 20:23, Andreas Färber wrote: config-devices.mak.d is included from Makefile.target, i.e. from inside the *-softmmu/ directory. It included the directory path, so never applied to the actual config-devices.mak. Symptoms were spurious dependency issues with default-configs/pci.mak. Fix by using `basename` to strip the directory path. Reported-by: Gerhard Wiesinger li...@wiesinger.com Signed-off-by: Andreas Färber afaer...@suse.de --- Seems I forgot to send this out before 1.1... scripts/make_device_config.sh |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/scripts/make_device_config.sh b/scripts/make_device_config.sh index 5d14885..0778fe2 100644 --- a/scripts/make_device_config.sh +++ b/scripts/make_device_config.sh @@ -25,4 +25,4 @@ done process_includes $src $dest cat $src $all_includes | grep -v '^include' $dest -echo $1: $all_includes $dep +echo `basename $1`: $all_includes $dep
[Qemu-devel] [RFC PATCH 00/16] qcow2: Delayed COW
During the last few releases we have got rid of most of the overhead of metadata writes during cluster allocation. What's left is the COW for unaligned allocating write requests, and it's quite expensive. In the general case, this cost cannot be avoided. However, if we're lucky enough that before the next flush the data copied during COW would be overwritten, we can do without the COW. Sequential writes always overwrite the COW area at the end of the cluster immediately, so delaying the COW a bit and cancelling it if it's overwritten is a worthwhile optimisation. The really interesting part of this series should be close to final; however, you only see the improvements with the last patch applied, which isn't quite correct yet. Doing it right requires some additional refactoring, so I thought I'd get this out for a first round of review before fixing it. iozone results with and without this series show significant difference for allocating writes: random random KB reclen write rewritereadrereadread write base65536 817271945125461253924491836 patch 65536 819341949122631252124631796 base 1048576 256 22344 38437 105823 106135 37743 32167 patch 1048576 256 35989 38542 105231 105994 38301 33036 Kevin Wolf (16): qcow2: Round QCowL2Meta.offset down to cluster boundary qcow2: Introduce Qcow2COWRegion qcow2: Allocate l2meta dynamically qcow2: Drop l2meta.cluster_offset qcow2: Allocate l2meta only for cluster allocations qcow2: Enable dirty flag in qcow2_alloc_cluster_link_l2 qcow2: Factor out handle_dependencies() qcow2: Reading from areas not in L2 tables yet qcow2: Move COW and L2 update into own coroutine qcow2: Delay the COW qcow2: Add error handling to the l2meta coroutine qcow2: Handle dependencies earlier qcow2: Change handle_dependency to byte granularity qcow2: Execute run_dependent_requests() without lock qcow2: Cancel COW when overwritten [BROKEN] qcow2: Overwrite COW and allocate new cluster at the same time block.c |5 + block/qcow2-cluster.c | 432 ++--- block/qcow2.c | 239 +++- block/qcow2.h | 153 +- block_int.h |3 + 5 files changed, 692 insertions(+), 140 deletions(-) -- 1.7.6.5
[Qemu-devel] [RFC PATCH 09/16] qcow2: Move COW and L2 update into own coroutine
This creates a separate coroutine for processing the COW and the L2 table update of allocating requests. The request itself can then complete while the second part is still being processed. We need a qemu_aio_flush() hook in order to ensure that these coroutines for the second part aren't still running after bdrv_drain_all (e.g. when the VM is stopped). Signed-off-by: Kevin Wolf kw...@redhat.com --- block.c |5 +++ block/qcow2.c | 89 + block/qcow2.h |8 + block_int.h |3 ++ 4 files changed, 93 insertions(+), 12 deletions(-) diff --git a/block.c b/block.c index e78039b..b852f3e 100644 --- a/block.c +++ b/block.c @@ -948,7 +948,12 @@ void bdrv_drain_all(void) qemu_co_queue_restart_all(bs-throttled_reqs); busy = true; } + +if (bs-drv bs-drv-bdrv_drain) { +busy |= bs-drv-bdrv_drain(bs); +} } + } while (busy); /* If requests are still pending there is a bug somewhere */ diff --git a/block/qcow2.c b/block/qcow2.c index 2e32136..f9881d0 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -482,6 +482,7 @@ static int qcow2_open(BlockDriverState *bs, int flags) /* Initialise locks */ qemu_co_mutex_init(s-lock); +qemu_co_rwlock_init(s-l2meta_flush); /* Repair image if dirty */ if (!(flags BDRV_O_CHECK) !bs-read_only @@ -751,6 +752,50 @@ static void run_dependent_requests(BDRVQcowState *s, QCowL2Meta *m) } } +typedef struct ProcessL2Meta { +BlockDriverState *bs; +QCowL2Meta *m; +} ProcessL2Meta; + +static void coroutine_fn process_l2meta(void *opaque) +{ +ProcessL2Meta *p = opaque; +QCowL2Meta *m = p-m; +BlockDriverState *bs = p-bs; +BDRVQcowState *s = bs-opaque; +int ret; + +qemu_co_mutex_lock(s-lock); + +ret = qcow2_alloc_cluster_link_l2(bs, m); +if (ret 0) { +/* FIXME */ +} + +run_dependent_requests(s, m); +g_free(m); + +qemu_co_mutex_unlock(s-lock); +qemu_co_rwlock_unlock(s-l2meta_flush); +} + +static inline coroutine_fn void stop_l2meta(BDRVQcowState *s) +{ +qemu_co_rwlock_wrlock(s-l2meta_flush); +} + +static inline coroutine_fn void resume_l2meta(BDRVQcowState *s) +{ +qemu_co_rwlock_unlock(s-l2meta_flush); +} + +static bool qcow2_drain(BlockDriverState *bs) +{ +BDRVQcowState *s = bs-opaque; + +return !QLIST_EMPTY(s-cluster_allocs); +} + static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, int64_t sector_num, int remaining_sectors, @@ -831,16 +876,21 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, } if (l2meta != NULL) { -l2meta-is_written = true; +Coroutine *co; +ProcessL2Meta p = { +.bs = bs, +.m = l2meta, +}; -ret = qcow2_alloc_cluster_link_l2(bs, l2meta); -if (ret 0) { -goto fail; -} +qemu_co_mutex_unlock(s-lock); +qemu_co_rwlock_rdlock(s-l2meta_flush); + +l2meta-is_written = true; +co = qemu_coroutine_create(process_l2meta); +qemu_coroutine_enter(co, p); -run_dependent_requests(s, l2meta); -g_free(l2meta); l2meta = NULL; +qemu_co_mutex_lock(s-lock); } remaining_sectors -= cur_nr_sectors; @@ -868,6 +918,11 @@ fail: static void qcow2_close(BlockDriverState *bs) { BDRVQcowState *s = bs-opaque; + +while (qcow2_drain(bs)) { +qemu_aio_wait(); +} + g_free(s-l1_table); qcow2_cache_flush(bs, s-l2_table_cache); @@ -1405,10 +1460,12 @@ static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs, } /* Whatever is left can use real zero clusters */ +stop_l2meta(s); qemu_co_mutex_lock(s-lock); ret = qcow2_zero_clusters(bs, sector_num BDRV_SECTOR_BITS, nb_sectors); qemu_co_mutex_unlock(s-lock); +resume_l2meta(s); return ret; } @@ -1419,10 +1476,13 @@ static coroutine_fn int qcow2_co_discard(BlockDriverState *bs, int ret; BDRVQcowState *s = bs-opaque; +stop_l2meta(s); qemu_co_mutex_lock(s-lock); ret = qcow2_discard_clusters(bs, sector_num BDRV_SECTOR_BITS, nb_sectors); qemu_co_mutex_unlock(s-lock); +resume_l2meta(s); + return ret; } @@ -1548,23 +1608,27 @@ static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) BDRVQcowState *s = bs-opaque; int ret; +stop_l2meta(s); qemu_co_mutex_lock(s-lock); + ret = qcow2_cache_flush(bs, s-l2_table_cache); if (ret 0) { -qemu_co_mutex_unlock(s-lock); -return ret; +goto fail; } if (qcow2_need_accurate_refcounts(s)) { ret = qcow2_cache_flush(bs, s-refcount_block_cache);
[Qemu-devel] [RFC PATCH 01/16] qcow2: Round QCowL2Meta.offset down to cluster boundary
The offset within the cluster is already present as n_start and this is what the code uses. QCowL2Meta.offset is only needed at a cluster granularity. Signed-off-by: Kevin Wolf kw...@redhat.com --- block/qcow2-cluster.c |4 ++-- block/qcow2.h | 22 ++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index e179211..d17a37c 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -631,7 +631,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) old_cluster = g_malloc(m-nb_clusters * sizeof(uint64_t)); /* copy content of unmodified sectors */ -start_sect = (m-offset ~(s-cluster_size - 1)) 9; +start_sect = m-offset 9; if (m-n_start) { cow = true; qemu_co_mutex_unlock(s-lock); @@ -966,7 +966,7 @@ again: .cluster_offset = keep_clusters == 0 ? alloc_cluster_offset : cluster_offset, .alloc_offset = alloc_cluster_offset, -.offset = alloc_offset, +.offset = alloc_offset ~(s-cluster_size - 1), .n_start= keep_clusters == 0 ? n_start : 0, .nb_clusters= nb_clusters, .nb_available = MIN(requested_sectors, avail_sectors), diff --git a/block/qcow2.h b/block/qcow2.h index b4eb654..2a406a7 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -199,12 +199,34 @@ struct QCowAIOCB; /* XXX This could be private for qcow2-cluster.c */ typedef struct QCowL2Meta { +/** Guest offset of the first newly allocated cluster */ uint64_t offset; + +/** Host offset of the first cluster of the request */ uint64_t cluster_offset; + +/** Host offset of the first newly allocated cluster */ uint64_t alloc_offset; + +/** + * Number of sectors between the start of the first allocated cluster and + * the area that the guest actually writes to. + */ int n_start; + +/** + * Number of sectors from the start of the first allocated cluster to + * the end of the (possibly shortened) request + */ int nb_available; + +/** Number of newly allocated clusters */ int nb_clusters; + +/** + * Requests that overlap with this allocation and wait to be restarted + * when the allocating request has completed. + */ CoQueue dependent_requests; QLIST_ENTRY(QCowL2Meta) next_in_flight; -- 1.7.6.5
[Qemu-devel] [RFC PATCH 04/16] qcow2: Drop l2meta.cluster_offset
There's no real reason to have an l2meta for normal requests that don't allocate anything. Before we can get rid of it, we must return the host cluster offset in a different way. Signed-off-by: Kevin Wolf kw...@redhat.com --- block/qcow2-cluster.c | 10 ++ block/qcow2.c | 14 +++--- block/qcow2.h |5 + 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 94b7f13..c4752ee 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -856,7 +856,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, * Return 0 on success and -errno in error cases */ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, -int n_start, int n_end, int *num, QCowL2Meta *m) +int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta *m) { BDRVQcowState *s = bs-opaque; int l2_index, ret, sectors; @@ -929,7 +929,6 @@ again: /* If there is something left to allocate, do that now */ *m = (QCowL2Meta) { -.cluster_offset = cluster_offset, .nb_clusters= 0, }; qemu_co_queue_init(m-dependent_requests); @@ -977,9 +976,11 @@ again: int alloc_n_start = keep_clusters == 0 ? n_start : 0; int nb_sectors = MIN(requested_sectors, avail_sectors); +if (keep_clusters == 0) { +cluster_offset = alloc_cluster_offset; +} + *m = (QCowL2Meta) { -.cluster_offset = keep_clusters == 0 ? - alloc_cluster_offset : cluster_offset, .alloc_offset = alloc_cluster_offset, .offset = alloc_offset ~(s-cluster_size - 1), .nb_clusters= nb_clusters, @@ -1007,6 +1008,7 @@ again: assert(sectors n_start); *num = sectors - n_start; +*host_offset = cluster_offset; return 0; diff --git a/block/qcow2.c b/block/qcow2.c index 9e4d440..a98e899 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -790,7 +790,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, } ret = qcow2_alloc_cluster_offset(bs, sector_num 9, -index_in_cluster, n_end, cur_nr_sectors, l2meta); +index_in_cluster, n_end, cur_nr_sectors, cluster_offset, l2meta); if (ret 0) { goto fail; } @@ -800,7 +800,6 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, qcow2_mark_dirty(bs); } -cluster_offset = l2meta-cluster_offset; assert((cluster_offset 511) == 0); qemu_iovec_reset(hd_qiov); @@ -1122,6 +1121,7 @@ static int preallocate(BlockDriverState *bs) { uint64_t nb_sectors; uint64_t offset; +uint64_t host_offset = 0; int num; int ret; QCowL2Meta meta; @@ -1129,18 +1129,18 @@ static int preallocate(BlockDriverState *bs) nb_sectors = bdrv_getlength(bs) 9; offset = 0; qemu_co_queue_init(meta.dependent_requests); -meta.cluster_offset = 0; while (nb_sectors) { num = MIN(nb_sectors, INT_MAX 9); -ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, num, meta); +ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, num, + host_offset, meta); if (ret 0) { return ret; } ret = qcow2_alloc_cluster_link_l2(bs, meta); if (ret 0) { -qcow2_free_any_clusters(bs, meta.cluster_offset, meta.nb_clusters); +qcow2_free_any_clusters(bs, meta.alloc_offset, meta.nb_clusters); return ret; } @@ -1159,10 +1159,10 @@ static int preallocate(BlockDriverState *bs) * all of the allocated clusters (otherwise we get failing reads after * EOF). Extend the image to the last allocated sector. */ -if (meta.cluster_offset != 0) { +if (host_offset != 0) { uint8_t buf[512]; memset(buf, 0, 512); -ret = bdrv_write(bs-file, (meta.cluster_offset 9) + num - 1, buf, 1); +ret = bdrv_write(bs-file, (host_offset 9) + num - 1, buf, 1); if (ret 0) { return ret; } diff --git a/block/qcow2.h b/block/qcow2.h index 1106b33..24f1001 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -213,9 +213,6 @@ typedef struct QCowL2Meta /** Guest offset of the first newly allocated cluster */ uint64_t offset; -/** Host offset of the first cluster of the request */ -uint64_t cluster_offset; - /** Host offset of the first newly allocated cluster */ uint64_t alloc_offset; @@ -336,7 +333,7 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num, int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, int *num, uint64_t *cluster_offset); int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, -
Re: [Qemu-devel] qmp: dump-guest-memory: -p option has issues, fix it or drop it?
On 2012-09-18 14:23, Markus Armbruster wrote: Jan Kiszka jan.kis...@siemens.com writes: On 2012-09-18 03:52, Wen Congyang wrote: At 09/18/2012 01:56 AM, Luiz Capitulino Wrote: Hi Wen, We've re-reviewed the dump-guest-memory command and found some possible issues with the -p option. The main issue is that it seems possible for a malicious guest to set page tables in a way that we allocate a MemoryMapping structure for each possible PTE. If IA-32e paging is used, this could lead to the allocation of dozens of gigabytes by qemu. Of course that this is not expected for the regular case, where a MemoryMapping allocation can be skipped for several reasons (I/O memory, page not present, contiguous/in same range addresses etc), but the point is what a malicious guest can do. Another problem is that the -p option seems to be broken for SMP guests. The problem is in qemu_get_guest_memory_mapping(): first_paging_enabled_cpu = find_paging_enabled_cpu(first_cpu); if (first_paging_enabled_cpu) { for (env = first_paging_enabled_cpu; env != NULL; env = env-next_cpu) { ret = cpu_get_memory_mapping(list, env); if (ret 0) { return -1; } } return 0; } This looks for the first vCPU with paging enabled, and then assumes that all the following vCPUs also have paging enabled. How does this hold? cpu_get_memory_mapping re-validates that paging is one. In fact, cpu_get_memory_mapping should handle both cases so that the generic code need not worry about paging on/off. The loop Luiz quoted is confusing. Actually, the whole function is confusing. Here's how I understand it: if there is a CPU that has paging enabled there is a proper prefix of env whose members don't have paging enabled; ignore them all [WTF#1] for all members of env not in that prefix (the suffix): get memory mapping for a CPU with paging enabled [WTF#2], and merge it into list else get memory mapping for ram_list, and merge it into list WTF#1: Why is it okay to ignore leading CPUs with paging disabled, but only if there's at least one CPU with paging enabled? WTF#2: What if a CPU in the suffix doesn't have paging enabled? Oh, the arch-specific function to get its memory map is expected to do nothing then. Bonus WTF#3: What if a guest enables/disables paging between find_paging_enabled_cpu() and the loop? What if it changes page tables while we walk them? In fact, the dump should be taken in a consistent state, means it should run synchronously /wrt at least the CPU we refer to. So we need to run the dump over the VCPU thread or with that VCPU stopped. WTF is this function supposed to do? Associate virtual and physical addresses for the whole machine at a given time. The picture is not fully consistent as we cannot express yet that different CPUs have different views on memory. IIRC, the first view is taken, the rests are dropped - correct me if I'm wrong, Wen. Assuming that this last issue is fixable (ie. we can make the -p option work well with SMP guests), we should at least document that -p can make QEMU allocates lots of memory and end up being killed by the OS. However, I also think that we should consider if having the -p feature is really worth it. It's a complex feature and has a number of limitations*. If libvirt doesn't use this, dropping it shouldn't be a big deal (we can return an error when -p is used). libvirt should surely not be the only reference for debugging features. No, it's just a user, albeit an important one. We don't break known users cavalierly. That is not what is being discussed here. It's about dropping a feature because that one user doesn't expose it. * The issues discussed in this email plus the fact that the guest memory may be corrupted, and the guest may be in real-mode even when paging is enabled Yes, there are some limitations with this option. Jan said that he always use gdb to deal with vmcore, so he needs such information. The point is to overcome the focus on Linux-only dump processing tools. While I don't care for supporting alternate dump processing tools myself, I certainly don't mind supporting them, as long as the code satisfies basic safety and reliability requirements. This code doesn't, as far as I can tell. It works, thought not under all circumstances. If that's correct, we should either rip it out until a satisfactory replacemnt is available, or at least document -p as unsafe and unreliable debugging feature (master stable). I'm sure the memory allocation can be avoided by writing out any found virt-phys mapping directly to the vmcore file. We know where physical RAM will be, we only need the corresponding virtual addresses - IIUC. So first prepare the section according to the guest's RAM size and then, once we
[Qemu-devel] [RFC PATCH 03/16] qcow2: Allocate l2meta dynamically
As soon as delayed COW is introduced, the l2meta struct is needed even after completion of the request, so it can't live on the stack. Signed-off-by: Kevin Wolf kw...@redhat.com --- block/qcow2.c | 26 +++--- 1 files changed, 15 insertions(+), 11 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 8f183f1..9e4d440 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -765,15 +765,11 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, QEMUIOVector hd_qiov; uint64_t bytes_done = 0; uint8_t *cluster_data = NULL; -QCowL2Meta l2meta = { -.nb_clusters = 0, -}; +QCowL2Meta *l2meta; trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num, remaining_sectors); -qemu_co_queue_init(l2meta.dependent_requests); - qemu_iovec_init(hd_qiov, qiov-niov); s-cluster_cache_offset = -1; /* disable compressed cache */ @@ -782,6 +778,9 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, while (remaining_sectors != 0) { +l2meta = g_malloc0(sizeof(*l2meta)); +qemu_co_queue_init(l2meta-dependent_requests); + trace_qcow2_writev_start_part(qemu_coroutine_self()); index_in_cluster = sector_num (s-cluster_sectors - 1); n_end = index_in_cluster + remaining_sectors; @@ -791,17 +790,17 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, } ret = qcow2_alloc_cluster_offset(bs, sector_num 9, -index_in_cluster, n_end, cur_nr_sectors, l2meta); +index_in_cluster, n_end, cur_nr_sectors, l2meta); if (ret 0) { goto fail; } -if (l2meta.nb_clusters 0 +if (l2meta-nb_clusters 0 (s-compatible_features QCOW2_COMPAT_LAZY_REFCOUNTS)) { qcow2_mark_dirty(bs); } -cluster_offset = l2meta.cluster_offset; +cluster_offset = l2meta-cluster_offset; assert((cluster_offset 511) == 0); qemu_iovec_reset(hd_qiov); @@ -838,12 +837,14 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, goto fail; } -ret = qcow2_alloc_cluster_link_l2(bs, l2meta); +ret = qcow2_alloc_cluster_link_l2(bs, l2meta); if (ret 0) { goto fail; } -run_dependent_requests(s, l2meta); +run_dependent_requests(s, l2meta); +g_free(l2meta); +l2meta = NULL; remaining_sectors -= cur_nr_sectors; sector_num += cur_nr_sectors; @@ -853,7 +854,10 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, ret = 0; fail: -run_dependent_requests(s, l2meta); +if (l2meta != NULL) { +run_dependent_requests(s, l2meta); +g_free(l2meta); +} qemu_co_mutex_unlock(s-lock); -- 1.7.6.5
Re: [Qemu-devel] [PATCH 5/9] fbdev: add monitor command to enable/disable
On Tue, 18 Sep 2012 09:17:10 +0200 Gerd Hoffmann kra...@redhat.com wrote: This patch adds a fbdev monitor command to enable/disable the fbdev display at runtime to both qmp and hmp. qmp: fbdev enable=on|off hmp: fbdev on|off Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hmp-commands.hx | 15 +++ hmp.c|9 + hmp.h|1 + qapi-schema.json | 14 ++ qmp-commands.hx |6 ++ qmp.c| 17 + 6 files changed, 62 insertions(+), 0 deletions(-) diff --git a/hmp-commands.hx b/hmp-commands.hx index ed67e99..366a92b 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1377,6 +1377,21 @@ passed since 1970, i.e. unix epoch. ETEXI { +.name = fbdev, +.args_type = enable:b, +.params = on|off, +.help = enable/disable fbdev, +.mhandler.cmd = hmp_fbdev, +}, + +STEXI +@item fbdev on | off +@findex fbdev + +enable/disable fbdev +ETEXI + +{ .name = info, .args_type = item:s?, .params = [subcommand], diff --git a/hmp.c b/hmp.c index ba6fbd3..a7feec5 100644 --- a/hmp.c +++ b/hmp.c @@ -1168,3 +1168,12 @@ void hmp_screen_dump(Monitor *mon, const QDict *qdict) qmp_screendump(filename, err); hmp_handle_error(mon, err); } + +void hmp_fbdev(Monitor *mon, const QDict *qdict) +{ +int enable = qdict_get_bool(qdict, enable); +Error *errp = NULL; + +qmp_fbdev(enable, errp); +hmp_handle_error(mon, errp); +} diff --git a/hmp.h b/hmp.h index 48b9c59..9c3d315 100644 --- a/hmp.h +++ b/hmp.h @@ -73,5 +73,6 @@ void hmp_getfd(Monitor *mon, const QDict *qdict); void hmp_closefd(Monitor *mon, const QDict *qdict); void hmp_send_key(Monitor *mon, const QDict *qdict); void hmp_screen_dump(Monitor *mon, const QDict *qdict); +void hmp_fbdev(Monitor *mon, const QDict *qdict); #endif diff --git a/qapi-schema.json b/qapi-schema.json index 14e4419..901c2e8 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -2619,3 +2619,17 @@ # Since: 0.14.0 ## { 'command': 'screendump', 'data': {'filename': 'str'} } + +# @fbdev: Please, use more descriptive names for qmp. Maybe something like frame-buffer-device-add/-enable. +# +# Enable/disable fbdev. +# +# @enable: whenever fbdev should be enabled or disabled. +# +# Returns: Nothing on success +# GenericError on failure. It's not needed to list GenericError as an error. +# +# Since: 1.3 +# +## +{ 'command': 'fbdev', 'data': {'enable': 'bool'} } diff --git a/qmp-commands.hx b/qmp-commands.hx index 6e21ddb..4b95fd0 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -2539,3 +2539,9 @@ EQMP .args_type = , .mhandler.cmd_new = qmp_marshal_input_query_target, }, + +{ +.name = fbdev, +.args_type = enable:b, +.mhandler.cmd_new = qmp_marshal_input_fbdev, +}, diff --git a/qmp.c b/qmp.c index 8463922..7f6cc0b 100644 --- a/qmp.c +++ b/qmp.c @@ -391,6 +391,23 @@ void qmp_change(const char *device, const char *target, } } +void qmp_fbdev(bool enable, Error **errp) +{ +#if defined(CONFIG_LINUX) +DisplayState *ds = get_displaystate(); + +if (enable) { +if (fbdev_display_init(ds, NULL) != 0) { +error_setg(errp, fbdev initialization failed); Would be nice to tell the reason if you have it (error_setg() has printf()-likeformat). +} +} else { +fbdev_display_uninit(ds); +} +#else +error_set(errp, QERR_FEATURE_DISABLED, fbdev); We shouldn't use QERR_ macros in new code. You have two options: 1. use error_setg() 2. add error_set_disabled() in error.h, similar to error_setg(), and use it +#endif +} + static void qom_list_types_tramp(ObjectClass *klass, void *data) { ObjectTypeInfoList *e, **pret = data;
Re: [Qemu-devel] [PATCH v7 1/5] sockets: Make inet_parse() non static.
Am 17.09.2012 17:22, schrieb Bharata B Rao: sockets: Make inet_parse() non static. From: Bharata B Rao bhar...@linux.vnet.ibm.com Make inet_parse() non-static so that other subsystems like gluster can use it to parse inet addresses. As a pre-requisite, define and globalize the qemu_inet_opts. Signed-off-by: Bharata B Rao bhar...@linux.vnet.ibm.com --- Makefile.objs |2 +- qemu-config.c | 31 +++ qemu-sockets.c | 42 ++ qemu_socket.h |1 + 4 files changed, 39 insertions(+), 37 deletions(-) Making the option list global sounds fine, but I don't think it should be moved to qemu-config.c or added to vm_config_groups, which is generally used for command line options and config file entries. At least I can't imagine a command line option -inet would be very useful. Kevin
[Qemu-devel] [RFC PATCH 06/16] qcow2: Enable dirty flag in qcow2_alloc_cluster_link_l2
This is closer to where the dirty flag is really needed, and it avoids having checks for special cases related to cluster allocation directly in the writev loop. Signed-off-by: Kevin Wolf kw...@redhat.com --- block/qcow2-cluster.c |5 - block/qcow2.c |7 +-- block/qcow2.h |2 ++ 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index c2b59e7..7a038ac 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -668,11 +668,14 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) } /* Update L2 table. */ - +if (s-compatible_features QCOW2_COMPAT_LAZY_REFCOUNTS) { +qcow2_mark_dirty(bs); +} if (qcow2_need_accurate_refcounts(s)) { qcow2_cache_set_dependency(bs, s-l2_table_cache, s-refcount_block_cache); } + ret = get_cluster_table(bs, m-offset, l2_table, l2_index); if (ret 0) { goto err; diff --git a/block/qcow2.c b/block/qcow2.c index c0a2822..6515fdd 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -221,7 +221,7 @@ static void report_unsupported_feature(BlockDriverState *bs, * updated successfully. Therefore it is not required to check the return * value of this function. */ -static int qcow2_mark_dirty(BlockDriverState *bs) +int qcow2_mark_dirty(BlockDriverState *bs) { BDRVQcowState *s = bs-opaque; uint64_t val; @@ -794,11 +794,6 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, goto fail; } -if (l2meta-nb_clusters 0 -(s-compatible_features QCOW2_COMPAT_LAZY_REFCOUNTS)) { -qcow2_mark_dirty(bs); -} - assert((cluster_offset 511) == 0); qemu_iovec_reset(hd_qiov); diff --git a/block/qcow2.h b/block/qcow2.h index 6dc79b5..a60fcb4 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -303,6 +303,8 @@ static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s) /* qcow2.c functions */ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, int64_t sector_num, int nb_sectors); + +int qcow2_mark_dirty(BlockDriverState *bs); int qcow2_update_header(BlockDriverState *bs); /* qcow2-refcount.c functions */ -- 1.7.6.5
Re: [Qemu-devel] [PATCH] Added LEON MMU ASI mappings and corrected LEON3 MMU masks.
On 09/18/2012 01:47 PM, Ronald Hecht wrote: This patch adds SPARC ASI mappings that are used by the LEON processor.It also corrects the MMU context register and context table pointer mask of the LEON3. Reviewed-by: Fabien Chouteau chout...@adacore.com -- Fabien Chouteau
Re: [Qemu-devel] [PATCH v7 1/5] sockets: Make inet_parse() non static.
Il 18/09/2012 14:47, Kevin Wolf ha scritto: Makefile.objs |2 +- qemu-config.c | 31 +++ qemu-sockets.c | 42 ++ qemu_socket.h |1 + 4 files changed, 39 insertions(+), 37 deletions(-) Making the option list global sounds fine, but I don't think it should be moved to qemu-config.c or added to vm_config_groups, which is generally used for command line options and config file entries. At least I can't imagine a command line option -inet would be very useful. Yes. Also, I'll send a pull-request for http://patchwork.ozlabs.org/patch/180237/ soon, so you might as well take that patch. Paolo
[Qemu-devel] [PATCH] SPARC LEON power-down support added
Signed-off-by: Ronald Hecht address@hidden --- target-sparc/cpu.c |2 +- target-sparc/cpu.h |1 + target-sparc/helper.h |1 + target-sparc/ldst_helper.c |9 + target-sparc/translate.c |5 + 5 files changed, 17 insertions(+), 1 deletions(-) diff --git a/target-sparc/cpu.c b/target-sparc/cpu.c index 0d5abb8..10563c4 100644 --- a/target-sparc/cpu.c +++ b/target-sparc/cpu.c @@ -589,7 +589,7 @@ static const sparc_def_t sparc_defs[] = { .mmu_trcr_mask = 0x, .nwindows = 8, .features = CPU_DEFAULT_FEATURES | CPU_FEATURE_TA0_SHUTDOWN | -CPU_FEATURE_ASR17 | CPU_FEATURE_CACHE_CTRL, +CPU_FEATURE_ASR17 | CPU_FEATURE_CACHE_CTRL | CPU_FEATURE_POWERDOWN, }, #endif }; diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h index e16b7b3..eadc445 100644 --- a/target-sparc/cpu.h +++ b/target-sparc/cpu.h @@ -270,6 +270,7 @@ typedef struct sparc_def_t { #define CPU_FEATURE_TA0_SHUTDOWN (1 14) /* Shutdown on ta 0x0 */ #define CPU_FEATURE_ASR17(1 15) #define CPU_FEATURE_CACHE_CTRL (1 16) +#define CPU_FEATURE_POWERDOWN(1 17) #ifndef TARGET_SPARC64 #define CPU_DEFAULT_FEATURES (CPU_FEATURE_FLOAT | CPU_FEATURE_SWAP | \ diff --git a/target-sparc/helper.h b/target-sparc/helper.h index e3c7fdd..74ecad1 100644 --- a/target-sparc/helper.h +++ b/target-sparc/helper.h @@ -4,6 +4,7 @@ DEF_HELPER_1(rett, void, env) DEF_HELPER_2(wrpsr, void, env, tl) DEF_HELPER_1(rdpsr, tl, env) +DEF_HELPER_1(power_down, void, env) #else DEF_HELPER_2(wrpil, void, env, tl) DEF_HELPER_2(wrpstate, void, env, tl) diff --git a/target-sparc/ldst_helper.c b/target-sparc/ldst_helper.c index 684b73b..bb5016c 100644 --- a/target-sparc/ldst_helper.c +++ b/target-sparc/ldst_helper.c @@ -2317,6 +2317,15 @@ void helper_stqf(CPUSPARCState *env, target_ulong addr, int mem_idx) #endif } +void helper_power_down(CPUSPARCState *env) +{ +env-halted = 1; +env-exception_index = EXCP_HLT; +env-pc = env-npc; +env-npc = env-pc + 4; +cpu_loop_exit(env); +} + #if !defined(CONFIG_USER_ONLY) #ifndef TARGET_SPARC64 void cpu_unassigned_access(CPUSPARCState *env, target_phys_addr_t addr, diff --git a/target-sparc/translate.c b/target-sparc/translate.c index b95f91c..9babaa8 100644 --- a/target-sparc/translate.c +++ b/target-sparc/translate.c @@ -3671,6 +3671,11 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn) in the SPARCv8 manual, nop on the microSPARC II */ +if ((rd == 0x13) (dc-def-features + CPU_FEATURE_POWERDOWN)) { +/* LEON3 power-down */ +gen_helper_power_down(cpu_env); +} break; #else case 0x2: /* V9 wrccr */ -- 1.7.2.5
Re: [Qemu-devel] Will the ELI incorporated in theKVM?
On 2012-09-18 14:50, GaoYi wrote: Hi Jan, I have followed a previous thread about ELI proposed by Abel Gordon, http://www.spinics.net/lists/kvm/msg73907.html. I wonder whether this mechanism will be incorporated in KVM someday. Likely not. Both Intel and AMD will soon ship hardware that obsoletes this invasive and imperfect software solution, see also [1]. Jan [1] http://thread.gmane.org/gmane.comp.emulators.kvm.devel/97715 -- Siemens AG, Corporate Technology, CT RTC ITP SDP-DE Corporate Competence Center Embedded Linux
[Qemu-devel] guest clock management for testing real-time systems
Hello all, In order to test and debug a linux-based real-time system, I'd like to hook it up to a software simulator, which sadly does not run fast enough to meet the real-time constraints. To workaround this problem, I had the idea of running the real-time system in guest whose clock would be controlled by the simulator, running on the host. Alas, I found no API for doing so with qemu. Yet, it seems qtest does it (I found qtest_clock_step [1] etc.) Is it possible or at least conceivable to control (and slow down) the guest time externally? Where should I look to get some insight on that matter? Note that this question was asked previously [2], [3], but never answered. [1] http://lists.nongnu.org/archive/html/qemu-devel/2012-03/msg05326.html [2] http://lists.gnu.org/archive/html/qemu-discuss/2012-05/msg00034.html [3] http://lists.gnu.org/archive/html/qemu-devel/2008-08/msg01109.html
Re: [Qemu-devel] [PATCH v7 2/5] sockets: Change inet_parse() to accept address specification without port
Am 17.09.2012 17:23, schrieb Bharata B Rao: sockets: Change inet_parse() to accept address specification without port From: Bharata B Rao bhar...@linux.vnet.ibm.com inet_parse() expects address:port. Change it to work without explicit port specification. In addition, don't depend solely on the return value of Things like in addition in a commit message are almost always a sign that the patch should be split in two. sscanf but also consider the value obtained for %n directive used in sscanf. This ensures that the scanning of malformed inet address isn't flagged as success. Can you give an example string that would be falsely accepted? To me the old checks look fine (even though the new ones are a little bit easier to read, so even if they don't fix anything, they might be worth doing). Anyway, it does look correct. Kevin
Re: [Qemu-devel] KVM call for agenda for Tuesday, September 18th
Juan Quintela quint...@redhat.com wrote: Hi Please send in any agenda items you are interested in covering. there are no agenda so . call is cancelled. Happy hacking, Juan.
[Qemu-devel] [PATCH] Added more entries to the LEON processor configuration register
Signed-off-by: Ronald Hecht ronald.he...@gmx.de --- target-sparc/helper.h |1 + target-sparc/ldst_helper.c |6 ++ target-sparc/translate.c | 10 +++--- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/target-sparc/helper.h b/target-sparc/helper.h index 74ecad1..23a2ad4 100644 --- a/target-sparc/helper.h +++ b/target-sparc/helper.h @@ -5,6 +5,7 @@ DEF_HELPER_1(rett, void, env) DEF_HELPER_2(wrpsr, void, env, tl) DEF_HELPER_1(rdpsr, tl, env) DEF_HELPER_1(power_down, void, env) +DEF_HELPER_1(rdasr17, tl, env) #else DEF_HELPER_2(wrpil, void, env, tl) DEF_HELPER_2(wrpstate, void, env, tl) diff --git a/target-sparc/ldst_helper.c b/target-sparc/ldst_helper.c index bb5016c..d620d8f 100644 --- a/target-sparc/ldst_helper.c +++ b/target-sparc/ldst_helper.c @@ -2326,6 +2326,12 @@ void helper_power_down(CPUSPARCState *env) cpu_loop_exit(env); } +target_ulong helper_rdasr17(CPUSPARCState *env) +{ +/* CPU ID, Meiko FPU, SPARC V8, Number of register windows */ +return env-cpu_index 28 | (2 10) | (1 8) | (env-nwindows - 1); +} + #if !defined(CONFIG_USER_ONLY) #ifndef TARGET_SPARC64 void cpu_unassigned_access(CPUSPARCState *env, target_phys_addr_t addr, diff --git a/target-sparc/translate.c b/target-sparc/translate.c index 9babaa8..c0f7887 100644 --- a/target-sparc/translate.c +++ b/target-sparc/translate.c @@ -2590,13 +2590,9 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn) microSPARC II */ /* Read Asr17 */ if (rs1 == 0x11 dc-def-features CPU_FEATURE_ASR17) { -TCGv r_const; - -/* Read Asr17 for a Leon3 monoprocessor */ -r_const = tcg_const_tl((1 8) - | (dc-def-nwindows - 1)); -gen_movl_TN_reg(rd, r_const); -tcg_temp_free(r_const); +/* Read Asr17 on LEON3 */ +gen_helper_rdasr17(cpu_dst, cpu_env); +gen_movl_TN_reg(rd, cpu_dst); break; } #endif -- 1.7.2.5
Re: [Qemu-devel] qmp: dump-guest-memory: -p option has issues, fix it or drop it?
On Tue, 18 Sep 2012 14:41:53 +0200 Jan Kiszka jan.kis...@siemens.com wrote: On 2012-09-18 14:23, Markus Armbruster wrote: Jan Kiszka jan.kis...@siemens.com writes: On 2012-09-18 03:52, Wen Congyang wrote: At 09/18/2012 01:56 AM, Luiz Capitulino Wrote: Hi Wen, We've re-reviewed the dump-guest-memory command and found some possible issues with the -p option. The main issue is that it seems possible for a malicious guest to set page tables in a way that we allocate a MemoryMapping structure for each possible PTE. If IA-32e paging is used, this could lead to the allocation of dozens of gigabytes by qemu. Of course that this is not expected for the regular case, where a MemoryMapping allocation can be skipped for several reasons (I/O memory, page not present, contiguous/in same range addresses etc), but the point is what a malicious guest can do. Another problem is that the -p option seems to be broken for SMP guests. The problem is in qemu_get_guest_memory_mapping(): first_paging_enabled_cpu = find_paging_enabled_cpu(first_cpu); if (first_paging_enabled_cpu) { for (env = first_paging_enabled_cpu; env != NULL; env = env-next_cpu) { ret = cpu_get_memory_mapping(list, env); if (ret 0) { return -1; } } return 0; } This looks for the first vCPU with paging enabled, and then assumes that all the following vCPUs also have paging enabled. How does this hold? cpu_get_memory_mapping re-validates that paging is one. In fact, cpu_get_memory_mapping should handle both cases so that the generic code need not worry about paging on/off. The loop Luiz quoted is confusing. Actually, the whole function is confusing. Here's how I understand it: if there is a CPU that has paging enabled there is a proper prefix of env whose members don't have paging enabled; ignore them all [WTF#1] for all members of env not in that prefix (the suffix): get memory mapping for a CPU with paging enabled [WTF#2], and merge it into list else get memory mapping for ram_list, and merge it into list WTF#1: Why is it okay to ignore leading CPUs with paging disabled, but only if there's at least one CPU with paging enabled? WTF#2: What if a CPU in the suffix doesn't have paging enabled? Oh, the arch-specific function to get its memory map is expected to do nothing then. Bonus WTF#3: What if a guest enables/disables paging between find_paging_enabled_cpu() and the loop? What if it changes page tables while we walk them? In fact, the dump should be taken in a consistent state, means it should run synchronously /wrt at least the CPU we refer to. So we need to run the dump over the VCPU thread or with that VCPU stopped. This command stops all vCPUs, so yes, you're right here. Any idea about WTF#1? WTF is this function supposed to do? Associate virtual and physical addresses for the whole machine at a given time. The picture is not fully consistent as we cannot express yet that different CPUs have different views on memory. IIRC, the first view is taken, the rests are dropped - correct me if I'm wrong, Wen. Assuming that this last issue is fixable (ie. we can make the -p option work well with SMP guests), we should at least document that -p can make QEMU allocates lots of memory and end up being killed by the OS. However, I also think that we should consider if having the -p feature is really worth it. It's a complex feature and has a number of limitations*. If libvirt doesn't use this, dropping it shouldn't be a big deal (we can return an error when -p is used). libvirt should surely not be the only reference for debugging features. No, it's just a user, albeit an important one. We don't break known users cavalierly. That is not what is being discussed here. It's about dropping a feature because that one user doesn't expose it. No, let me clarify. First, what's being discussed is whether or not to drop an unsafe feature. Having an important user relying on the feature would be a strong indication that feature should not be dropped. As it turns out, libvirt is the only known open-source project that is making use of this feature (although they don't use the -p option). We'd give the same importance to any other project that makes themselves heard. Of course, the rule is never to drop anything. There are exceptions though, and this is one of them as it puts the host in danger. * The issues discussed in this email plus the fact that the guest memory may be corrupted, and the guest may be in real-mode even when paging is enabled Yes, there are some limitations with this option. Jan said that he always use gdb to deal with vmcore, so he needs such information. The point is to
Re: [Qemu-devel] qmp: dump-guest-memory: -p option has issues, fix it or drop it?
Jan Kiszka jan.kis...@siemens.com writes: On 2012-09-18 14:23, Markus Armbruster wrote: Jan Kiszka jan.kis...@siemens.com writes: On 2012-09-18 03:52, Wen Congyang wrote: At 09/18/2012 01:56 AM, Luiz Capitulino Wrote: Hi Wen, We've re-reviewed the dump-guest-memory command and found some possible issues with the -p option. The main issue is that it seems possible for a malicious guest to set page tables in a way that we allocate a MemoryMapping structure for each possible PTE. If IA-32e paging is used, this could lead to the allocation of dozens of gigabytes by qemu. Of course that this is not expected for the regular case, where a MemoryMapping allocation can be skipped for several reasons (I/O memory, page not present, contiguous/in same range addresses etc), but the point is what a malicious guest can do. Another problem is that the -p option seems to be broken for SMP guests. The problem is in qemu_get_guest_memory_mapping(): first_paging_enabled_cpu = find_paging_enabled_cpu(first_cpu); if (first_paging_enabled_cpu) { for (env = first_paging_enabled_cpu; env != NULL; env = env-next_cpu) { ret = cpu_get_memory_mapping(list, env); if (ret 0) { return -1; } } return 0; } This looks for the first vCPU with paging enabled, and then assumes that all the following vCPUs also have paging enabled. How does this hold? cpu_get_memory_mapping re-validates that paging is one. In fact, cpu_get_memory_mapping should handle both cases so that the generic code need not worry about paging on/off. The loop Luiz quoted is confusing. Actually, the whole function is confusing. Here's how I understand it: if there is a CPU that has paging enabled there is a proper prefix of env whose members don't have paging enabled; ignore them all [WTF#1] for all members of env not in that prefix (the suffix): get memory mapping for a CPU with paging enabled [WTF#2], and merge it into list else get memory mapping for ram_list, and merge it into list WTF#1: Why is it okay to ignore leading CPUs with paging disabled, but only if there's at least one CPU with paging enabled? WTF#2: What if a CPU in the suffix doesn't have paging enabled? Oh, the arch-specific function to get its memory map is expected to do nothing then. Bonus WTF#3: What if a guest enables/disables paging between find_paging_enabled_cpu() and the loop? What if it changes page tables while we walk them? In fact, the dump should be taken in a consistent state, means it should run synchronously /wrt at least the CPU we refer to. So we need to run the dump over the VCPU thread or with that VCPU stopped. Makes sense. Unfortunately, it's not what the code does. WTF is this function supposed to do? Associate virtual and physical addresses for the whole machine at a given time. The picture is not fully consistent as we cannot express yet that different CPUs have different views on memory. IIRC, the first view is taken, the rests are dropped - correct me if I'm wrong, Wen. As far as I can tell, the code merges the views of all CPUs that have paging enabled, which makes no sense to me. Assuming that this last issue is fixable (ie. we can make the -p option work well with SMP guests), we should at least document that -p can make QEMU allocates lots of memory and end up being killed by the OS. However, I also think that we should consider if having the -p feature is really worth it. It's a complex feature and has a number of limitations*. If libvirt doesn't use this, dropping it shouldn't be a big deal (we can return an error when -p is used). libvirt should surely not be the only reference for debugging features. No, it's just a user, albeit an important one. We don't break known users cavalierly. That is not what is being discussed here. It's about dropping a feature because that one user doesn't expose it. Let's not get into that fruitless discussion again. I'm sure Luiz didn't mean to suggest that other users (including you) don't matter. * The issues discussed in this email plus the fact that the guest memory may be corrupted, and the guest may be in real-mode even when paging is enabled Yes, there are some limitations with this option. Jan said that he always use gdb to deal with vmcore, so he needs such information. The point is to overcome the focus on Linux-only dump processing tools. While I don't care for supporting alternate dump processing tools myself, I certainly don't mind supporting them, as long as the code satisfies basic safety and reliability requirements. This code doesn't, as far as I can tell. It works, thought not under all circumstances. I don't doubt it works often enough to be useful to somebody. But basic safety and reliability requirements are a bit more
Re: [Qemu-devel] [PATCH v7 2/5] sockets: Change inet_parse() to accept address specification without port
Il 18/09/2012 15:22, Kevin Wolf ha scritto: Am 17.09.2012 17:23, schrieb Bharata B Rao: sockets: Change inet_parse() to accept address specification without port From: Bharata B Rao bhar...@linux.vnet.ibm.com inet_parse() expects address:port. Change it to work without explicit port specification. In addition, don't depend solely on the return value of Things like in addition in a commit message are almost always a sign that the patch should be split in two. sscanf but also consider the value obtained for %n directive used in sscanf. This ensures that the scanning of malformed inet address isn't flagged as success. Can you give an example string that would be falsely accepted? To me the old checks look fine (even though the new ones are a little bit easier to read, so even if they don't fix anything, they might be worth doing). localhost would fail to be parsed: -if (2 != sscanf(str,%64[^:]:%32[^,]%n,addr,port,pos)) { +ret = sscanf(str, %64[^:]%n:%32[^,]%n, addr, addr_pos, +port, port_pos); +if (addr_pos == -1 || ret == EOF) { because the : in the format string would not match and sscanf would return 1. However, is it correct to set the port unconditionally to an empty string? Your usecase makes sense, but perhaps the default port be passed as an extra parameter to inet_parse instead. Paolo Anyway, it does look correct. Kevin
Re: [Qemu-devel] [PATCH v2 2/2] Versatile Express: add modelling of NOR flash
On 17 September 2012 21:08, Francesco Lavra francescolavra...@gmail.com wrote: This patch adds modelling of the two NOR flash banks found on the Versatile Express motherboard. Tested with U-Boot running on an emulated Versatile Express, with either A9 or A15 CoreTile. Signed-off-by: Francesco Lavra francescolavra...@gmail.com --- Changes in v2: Use drive_get_next() instead of drive_get() to get a backing storage for each flash bank. hw/vexpress.c | 24 ++-- 1 files changed, 22 insertions(+), 2 deletions(-) diff --git a/hw/vexpress.c b/hw/vexpress.c index 454c2bb..2ffeab1 100644 --- a/hw/vexpress.c +++ b/hw/vexpress.c @@ -29,8 +29,12 @@ #include sysemu.h #include boards.h #include exec-memory.h +#include blockdev.h +#include flash.h #define VEXPRESS_BOARD_ID 0x8e0 +#define VEXPRESS_FLASH_SIZE (64 * 1024 * 1024) +#define VEXPRESS_FLASH_SECT_SIZE (256 * 1024) static struct arm_boot_info vexpress_binfo; @@ -355,6 +359,7 @@ static void vexpress_common_init(const VEDBoardInfo *daughterboard, Something in your email send path is wrapping long lines, which means 'git am' doesn't work cleanly. If you're planning on sending more QEMU patches you might want to look into getting this fixed. qemu_irq pic[64]; uint32_t proc_id; uint32_t sys_id; +DriveInfo *dinfo; ram_addr_t vram_size, sram_size; MemoryRegion *sysmem = get_system_memory(); MemoryRegion *vram = g_new(MemoryRegion, 1); @@ -410,8 +415,23 @@ static void vexpress_common_init(const VEDBoardInfo *daughterboard, sysbus_create_simple(pl111, map[VE_CLCD], pic[14]); -/* VE_NORFLASH0: not modelled */ -/* VE_NORFLASH1: not modelled */ +dinfo = drive_get_next(IF_PFLASH); +if (!pflash_cfi01_register(map[VE_NORFLASH0], NULL, vexpress.flash0, +VEXPRESS_FLASH_SIZE, dinfo ? dinfo-bdrv : NULL, +VEXPRESS_FLASH_SECT_SIZE, +VEXPRESS_FLASH_SIZE / VEXPRESS_FLASH_SECT_SIZE, 4, +0x00, 0x89, 0x00, 0x18, 0)) { +fprintf(stderr, vexpress: error registering flash 0.\n); Shouldn't these errors be fatal? +} + +dinfo = drive_get_next(IF_PFLASH); +if (!pflash_cfi01_register(map[VE_NORFLASH1], NULL, vexpress.flash1, +VEXPRESS_FLASH_SIZE, dinfo ? dinfo-bdrv : NULL, +VEXPRESS_FLASH_SECT_SIZE, +VEXPRESS_FLASH_SIZE / VEXPRESS_FLASH_SECT_SIZE, 4, +0x00, 0x89, 0x00, 0x18, 0)) { +fprintf(stderr, vexpress: error registering flash 1.\n); +} sram_size = 0x200; memory_region_init_ram(sram, vexpress.sram, sram_size); -- 1.7.5.4 Otherwise looks OK. -- PMM
[Qemu-devel] [PATCH] hw/pflash_cfi0[12]: Use host-utils.h ctz32()
Drop the private reimplementation of ctz32() from pflash_cfi0[12] in favour of using the standard version from host-utils.h. Signed-off-by: Peter Maydell peter.mayd...@linaro.org --- hw/pflash_cfi01.c | 37 + hw/pflash_cfi02.c | 37 + 2 files changed, 2 insertions(+), 72 deletions(-) diff --git a/hw/pflash_cfi01.c b/hw/pflash_cfi01.c index d1c7423..00f1cdd 100644 --- a/hw/pflash_cfi01.c +++ b/hw/pflash_cfi01.c @@ -41,6 +41,7 @@ #include block.h #include qemu-timer.h #include exec-memory.h +#include host-utils.h #define PFLASH_BUG(fmt, ...) \ do { \ @@ -543,42 +544,6 @@ static const MemoryRegionOps pflash_cfi01_ops_le = { .endianness = DEVICE_NATIVE_ENDIAN, }; -/* Count trailing zeroes of a 32 bits quantity */ -static int ctz32 (uint32_t n) -{ -int ret; - -ret = 0; -if (!(n 0x)) { -ret += 16; -n = n 16; -} -if (!(n 0xFF)) { -ret += 8; -n = n 8; -} -if (!(n 0xF)) { -ret += 4; -n = n 4; -} -if (!(n 0x3)) { -ret += 2; -n = n 2; -} -if (!(n 0x1)) { -ret++; -#if 0 /* This is not necessary as n is never 0 */ -n = n 1; -#endif -} -#if 0 /* This is not necessary as n is never 0 */ -if (!n) -ret++; -#endif - -return ret; -} - pflash_t *pflash_cfi01_register(target_phys_addr_t base, DeviceState *qdev, const char *name, target_phys_addr_t size, diff --git a/hw/pflash_cfi02.c b/hw/pflash_cfi02.c index 3e2002e..8cb1549 100644 --- a/hw/pflash_cfi02.c +++ b/hw/pflash_cfi02.c @@ -40,6 +40,7 @@ #include qemu-timer.h #include block.h #include exec-memory.h +#include host-utils.h //#define PFLASH_DEBUG #ifdef PFLASH_DEBUG @@ -575,42 +576,6 @@ static const MemoryRegionOps pflash_cfi02_ops_le = { .endianness = DEVICE_NATIVE_ENDIAN, }; -/* Count trailing zeroes of a 32 bits quantity */ -static int ctz32 (uint32_t n) -{ -int ret; - -ret = 0; -if (!(n 0x)) { -ret += 16; -n = n 16; -} -if (!(n 0xFF)) { -ret += 8; -n = n 8; -} -if (!(n 0xF)) { -ret += 4; -n = n 4; -} -if (!(n 0x3)) { -ret += 2; -n = n 2; -} -if (!(n 0x1)) { -ret++; -#if 0 /* This is not necessary as n is never 0 */ -n = n 1; -#endif -} -#if 0 /* This is not necessary as n is never 0 */ -if (!n) -ret++; -#endif - -return ret; -} - pflash_t *pflash_cfi02_register(target_phys_addr_t base, DeviceState *qdev, const char *name, target_phys_addr_t size, -- 1.7.9.5
Re: [Qemu-devel] [PATCH v7 5/5] block: Support GlusterFS as a QEMU block backend.
Am 17.09.2012 17:26, schrieb Bharata B Rao: block: Support GlusterFS as a QEMU block backend. From: Bharata B Rao bhar...@linux.vnet.ibm.com This patch adds gluster as the new block backend in QEMU. This gives QEMU the ability to boot VM images from gluster volumes. Its already possible to boot from VM images on gluster volumes using FUSE mount, but this patchset provides the ability to boot VM images from gluster volumes by by-passing the FUSE layer in gluster. This is made possible by using libgfapi routines to perform IO on gluster volumes directly. VM Image on gluster volume is specified like this: file=gluster[+transport]://[server[:port]]/volname/image[?socket=...] 'gluster' is the protocol. 'transport' specifies the transport type used to connect to gluster management daemon (glusterd). Valid transport types are tcp, unix and rdma. If the transport type isn't specified, then tcp type is assumed. 'server' specifies the server where the volume file specification for the given volume resides. This can be either hostname or ipv4 address or ipv6 address. ipv6 address needs to be with in square brackets [ ]. If transport type is 'unix', then server field is ignored, but the 'socket' field needs to be populated with the path to unix domain socket. 'port' is the port number on which glusterd is listening. This is optional and if not specified, QEMU will send 0 which will make gluster to use the default port. port is ignored for unix type of transport. 'volname' is the name of the gluster volume which contains the VM image. 'image' is the path to the actual VM image that resides on gluster volume. Examples: file=gluster://1.2.3.4/testvol/a.img file=gluster+tcp://1.2.3.4/testvol/a.img file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket file=gluster+rdma://1.2.3.4:24007/testvol/a.img Signed-off-by: Bharata B Rao bhar...@linux.vnet.ibm.com --- block/Makefile.objs |1 block/gluster.c | 694 +++ 2 files changed, 695 insertions(+), 0 deletions(-) create mode 100644 block/gluster.c diff --git a/block/Makefile.objs b/block/Makefile.objs index b5754d3..a1ae67f 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -9,3 +9,4 @@ block-obj-$(CONFIG_POSIX) += raw-posix.o block-obj-$(CONFIG_LIBISCSI) += iscsi.o block-obj-$(CONFIG_CURL) += curl.o block-obj-$(CONFIG_RBD) += rbd.o +block-obj-$(CONFIG_GLUSTERFS) += gluster.o diff --git a/block/gluster.c b/block/gluster.c new file mode 100644 index 000..0de3286 --- /dev/null +++ b/block/gluster.c @@ -0,0 +1,694 @@ +/* + * GlusterFS backend for QEMU + * + * Copyright (C) 2012 Bharata B Rao bhar...@linux.vnet.ibm.com + * + * Pipe handling mechanism in AIO implementation is derived from + * block/rbd.c. Hence, + * + * Copyright (C) 2010-2011 Christian Brunner c...@muc.de, + * Josh Durgin josh.dur...@dreamhost.com + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ +#include glusterfs/api/glfs.h +#include block_int.h +#include qemu_socket.h + +typedef struct GlusterAIOCB { +BlockDriverAIOCB common; +int64_t size; +int ret; +bool *finished; +QEMUBH *bh; +} GlusterAIOCB; + +typedef struct BDRVGlusterState { +struct glfs *glfs; +int fds[2]; +struct glfs_fd *fd; +int qemu_aio_count; +int event_reader_pos; +GlusterAIOCB *event_acb; +} BDRVGlusterState; + +#define GLUSTER_FD_READ 0 +#define GLUSTER_FD_WRITE 1 + +#define GLUSTER_TRANSPORT_DEFAULTgluster:// +#define GLUSTER_TRANSPORT_DEFAULT_SZ strlen(GLUSTER_TRANSPORT_DEFAULT) +#define GLUSTER_TRANSPORT_TCPgluster+tcp:// +#define GLUSTER_TRANSPORT_TCP_SZ strlen(GLUSTER_TRANSPORT_TCP) +#define GLUSTER_TRANSPORT_UNIX gluster+unix:// +#define GLUSTER_TRANSPORT_UNIX_SZstrlen(GLUSTER_TRANSPORT_UNIX) +#define GLUSTER_TRANSPORT_RDMA gluster+rdma:// +#define GLUSTER_TRANSPORT_RDMA_SZstrlen(GLUSTER_TRANSPORT_RDMA) + +typedef struct GlusterURI { +char *server; +int port; +char *volname; +char *image; +char *transport; +bool is_unix; +} GlusterURI; + +static void qemu_gluster_uri_free(GlusterURI *uri) +{ +g_free(uri-server); +g_free(uri-volname); +g_free(uri-image); +g_free(uri-transport); +g_free(uri); +} + +static int parse_socket(GlusterURI *uri, char *socket) +{ +char *token,
Re: [Qemu-devel] [PATCH v7 2/5] sockets: Change inet_parse() to accept address specification without port
Am 18.09.2012 15:31, schrieb Paolo Bonzini: Il 18/09/2012 15:22, Kevin Wolf ha scritto: Am 17.09.2012 17:23, schrieb Bharata B Rao: sockets: Change inet_parse() to accept address specification without port From: Bharata B Rao bhar...@linux.vnet.ibm.com inet_parse() expects address:port. Change it to work without explicit port specification. In addition, don't depend solely on the return value of Things like in addition in a commit message are almost always a sign that the patch should be split in two. sscanf but also consider the value obtained for %n directive used in sscanf. This ensures that the scanning of malformed inet address isn't flagged as success. Can you give an example string that would be falsely accepted? To me the old checks look fine (even though the new ones are a little bit easier to read, so even if they don't fix anything, they might be worth doing). localhost would fail to be parsed: -if (2 != sscanf(str,%64[^:]:%32[^,]%n,addr,port,pos)) { +ret = sscanf(str, %64[^:]%n:%32[^,]%n, addr, addr_pos, +port, port_pos); +if (addr_pos == -1 || ret == EOF) { because the : in the format string would not match and sscanf would return 1. Yes, that's the part with making the port optional. Bharata also claims that scanning of malformed inet address could falsely succeed before, which I can't see (but which I suspect is what the first two hunks of the patch are meant to address). However, is it correct to set the port unconditionally to an empty string? Your usecase makes sense, but perhaps the default port be passed as an extra parameter to inet_parse instead. I thought about this, too, but didn't care enough to mention it. Now that we're two, yes, I'd like adding a default port parameter. Kevin
Re: [Qemu-devel] [PATCH 3/9] move set_mouse + cursor_define callbacks
On Tue, 18 Sep 2012, Gerd Hoffmann wrote: When adding DisplayChangeListeners the set_mouse and cursor_define callbacks have been left in DisplayState for some reason. Fix it. Signed-off-by: Gerd Hoffmann kra...@redhat.com This patch is good. The one thing I don't like is dpy_cursor_define_supported, because it enforces the idea that we cannot register/deregister DisplayChangeListeners at run time. Theoretically a new DisplayChangeListener that support cursor_define could show up at any time. Is dpy_cursor_define_supported really necessary? console.c |2 +- console.h | 39 +++ hw/jazz_led.c |2 +- hw/qxl-render.c|2 +- hw/vga.c | 10 +- hw/vmware_vga.c| 11 ++- ui/sdl.c |8 ui/spice-display.c |4 ++-- ui/vnc.c |8 9 files changed, 59 insertions(+), 27 deletions(-) diff --git a/console.c b/console.c index a8bcc42..cc0479b 100644 --- a/console.c +++ b/console.c @@ -1239,7 +1239,7 @@ static void text_console_update(void *opaque, console_ch_t *chardata) s-text_y[1] = 0; } if (s-cursor_invalidate) { -dpy_cursor(s-ds, s-x, s-y); +dpy_text_cursor(s-ds, s-x, s-y); s-cursor_invalidate = 0; } } diff --git a/console.h b/console.h index 48fef22..bef2d2d 100644 --- a/console.h +++ b/console.h @@ -164,6 +164,9 @@ struct DisplayChangeListener { int w, int h, uint32_t c); void (*dpy_text_cursor)(struct DisplayState *s, int x, int y); +void (*dpy_mouse_set)(struct DisplayState *s, int x, int y, int on); +void (*dpy_cursor_define)(struct DisplayState *s, QEMUCursor *cursor); + QLIST_ENTRY(DisplayChangeListener) next; }; @@ -181,9 +184,6 @@ struct DisplayState { struct DisplayAllocator* allocator; QLIST_HEAD(, DisplayChangeListener) listeners; -void (*mouse_set)(int x, int y, int on); -void (*cursor_define)(QEMUCursor *cursor); - struct DisplayState *next; }; @@ -304,7 +304,7 @@ static inline void dpy_fill(struct DisplayState *s, int x, int y, } } -static inline void dpy_cursor(struct DisplayState *s, int x, int y) +static inline void dpy_text_cursor(struct DisplayState *s, int x, int y) { struct DisplayChangeListener *dcl; QLIST_FOREACH(dcl, s-listeners, next) { @@ -314,6 +314,37 @@ static inline void dpy_cursor(struct DisplayState *s, int x, int y) } } +static inline void dpy_mouse_set(struct DisplayState *s, int x, int y, int on) +{ +struct DisplayChangeListener *dcl; +QLIST_FOREACH(dcl, s-listeners, next) { +if (dcl-dpy_mouse_set) { +dcl-dpy_mouse_set(s, x, y, on); +} +} +} + +static inline void dpy_cursor_define(struct DisplayState *s, QEMUCursor *cursor) +{ +struct DisplayChangeListener *dcl; +QLIST_FOREACH(dcl, s-listeners, next) { +if (dcl-dpy_cursor_define) { +dcl-dpy_cursor_define(s, cursor); +} +} +} + +static inline bool dpy_cursor_define_supported(struct DisplayState *s) +{ +struct DisplayChangeListener *dcl; +QLIST_FOREACH(dcl, s-listeners, next) { +if (dcl-dpy_cursor_define) { +return true; +} +} +return false; +} + static inline int ds_get_linesize(DisplayState *ds) { return ds-surface-linesize; diff --git a/hw/jazz_led.c b/hw/jazz_led.c index 6486523..c4d54e2 100644 --- a/hw/jazz_led.c +++ b/hw/jazz_led.c @@ -210,7 +210,7 @@ static void jazz_led_text_update(void *opaque, console_ch_t *chardata) LedState *s = opaque; char buf[2]; -dpy_cursor(s-ds, -1, -1); +dpy_text_cursor(s-ds, -1, -1); qemu_console_resize(s-ds, 2, 1); /* TODO: draw the segments */ diff --git a/hw/qxl-render.c b/hw/qxl-render.c index e2e3fe2..085a090 100644 --- a/hw/qxl-render.c +++ b/hw/qxl-render.c @@ -238,7 +238,7 @@ int qxl_render_cursor(PCIQXLDevice *qxl, QXLCommandExt *ext) return 1; } -if (!qxl-ssd.ds-mouse_set || !qxl-ssd.ds-cursor_define) { +if (!dpy_cursor_define_supported(qxl-ssd.ds)) { return 0; } diff --git a/hw/vga.c b/hw/vga.c index afaef0d..ec4f0c5 100644 --- a/hw/vga.c +++ b/hw/vga.c @@ -2081,11 +2081,11 @@ static void vga_update_text(void *opaque, console_ch_t *chardata) s-cr[VGA_CRTC_CURSOR_END] != s-cursor_end || full_update) { cursor_visible = !(s-cr[VGA_CRTC_CURSOR_START] 0x20); if (cursor_visible cursor_offset size cursor_offset = 0) -dpy_cursor(s-ds, - TEXTMODE_X(cursor_offset), - TEXTMODE_Y(cursor_offset)); +dpy_text_cursor(s-ds, +TEXTMODE_X(cursor_offset), +
Re: [Qemu-devel] [PATCH] SPARC LEON power-down support added
Hi, Am 18.09.2012 14:58, schrieb Ronald Hecht: Signed-off-by: Ronald Hecht address@hidden That's not a valid SoB, please fix. :) Also please use a target-sparc: prefix in your subjects to make clear what the patch is about (applies to all three patches). If the patches are otherwise okay, hopefully Blue can fix that. Regards, Andreas -- SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg
Re: [Qemu-devel] [PATCH] Added more entries to the LEON processor configuration register
On 09/18/2012 03:29 PM, Ronald Hecht wrote: Signed-off-by: Ronald Hecht ronald.he...@gmx.de Reviewed-by: Fabien Chouteau chout...@adacore.com -- Fabien Chouteau
[Qemu-devel] [PATCH 5/5] tcg: Optimize two-address commutative operations
While swapping constants to the second operand, swap sources matching destinations to the first operand. Signed-off-by: Richard Henderson r...@twiddle.net --- tcg/optimize.c | 22 +++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index 5b0a8ce..becc408 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -337,6 +337,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, const TCGOpDef *def; TCGArg *gen_args; TCGArg tmp; +TCGCond cond; + /* Array VALS has an element for each temp. If this temp holds a constant then its value is kept in VALS' element. If this temp is a copy of other ones then this equivalence class' @@ -363,7 +365,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } } -/* For commutative operations make constant second argument */ +/* For commutative operations make constant second argument, or + if the destination is an input, make it the first argument. */ switch (op) { CASE_OP_32_64(add): CASE_OP_32_64(mul): @@ -373,7 +376,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(eqv): CASE_OP_32_64(nand): CASE_OP_32_64(nor): -if (temps[args[1]].state == TCG_TEMP_CONST) { +if (temps[args[1]].state == TCG_TEMP_CONST +|| (args[0] == args[2] + temps[args[1]].state != TCG_TEMP_CONST)) { tmp = args[1]; args[1] = args[2]; args[2] = tmp; @@ -398,13 +403,24 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } break; CASE_OP_32_64(movcond): +cond = args[5]; if (temps[args[1]].state == TCG_TEMP_CONST temps[args[2]].state != TCG_TEMP_CONST) { tmp = args[1]; args[1] = args[2]; args[2] = tmp; -args[5] = tcg_swap_cond(args[5]); +cond = tcg_swap_cond(cond); +} +/* For movcond, we canonicalize the false input reg to match + the destination reg so that the tcg backend can implement + a move if true operation. */ +if (args[0] == args[3]) { +tmp = args[3]; +args[3] = args[4]; +args[4] = tmp; +cond = tcg_invert_cond(cond); } +args[5] = cond; default: break; } -- 1.7.11.4
Re: [Qemu-devel] [RFC PATCH 10/16] qcow2: Delay the COW
Il 18/09/2012 13:40, Kevin Wolf ha scritto: +again: +QLIST_FOREACH(m, s-cluster_allocs, next_in_flight) { +if (m-sleeping) { +qemu_coroutine_enter(m-co, NULL); +/* next_in_flight link could have become invalid */ +goto again; +} +} + qemu_co_rwlock_wrlock(s-l2meta_flush); } static inline coroutine_fn void resume_l2meta(BDRVQcowState *s) { +s-in_l2meta_flush = false; qemu_co_rwlock_unlock(s-l2meta_flush); } static bool qcow2_drain(BlockDriverState *bs) { BDRVQcowState *s = bs-opaque; +QCowL2Meta *m; + +QLIST_FOREACH(m, s-cluster_allocs, next_in_flight) { +if (m-sleeping) { +qemu_coroutine_enter(m-co, NULL); +} +} Why are the goto and in_l2meta_flush not needed here? If they are, perhaps stop_l2meta can just use qcow2_drain? Paolo
Re: [Qemu-devel] [RFC PATCH 11/16] qcow2: Add error handling to the l2meta coroutine
Il 18/09/2012 13:40, Kevin Wolf ha scritto: Not exactly bisectable, but one large patch isn't much better either For better bisectability you could add the co_sleep_ns in a separate patch, later in the series. Paolo
Re: [Qemu-devel] [Qemu-stable] [PATCH] make_device_config.sh: Fix target path in generated dependency file
Am 18.09.2012 14:29, schrieb Michael Tokarev: Has it been applied to anything? I don't think so. Is it still needed? Not in qemu.git yet, still applicable AFAICT. CC'ing Paolo. /-F Thanks, /mjt On 07.06.2012 20:23, Andreas Färber wrote: config-devices.mak.d is included from Makefile.target, i.e. from inside the *-softmmu/ directory. It included the directory path, so never applied to the actual config-devices.mak. Symptoms were spurious dependency issues with default-configs/pci.mak. Fix by using `basename` to strip the directory path. Reported-by: Gerhard Wiesinger li...@wiesinger.com Signed-off-by: Andreas Färber afaer...@suse.de --- Seems I forgot to send this out before 1.1... scripts/make_device_config.sh |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/scripts/make_device_config.sh b/scripts/make_device_config.sh index 5d14885..0778fe2 100644 --- a/scripts/make_device_config.sh +++ b/scripts/make_device_config.sh @@ -25,4 +25,4 @@ done process_includes $src $dest cat $src $all_includes | grep -v '^include' $dest -echo $1: $all_includes $dep +echo `basename $1`: $all_includes $dep -- SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg
Re: [Qemu-devel] [RFC PATCH 14/16] qcow2: Execute run_dependent_requests() without lock
Il 18/09/2012 13:40, Kevin Wolf ha scritto: static void run_dependent_requests(BDRVQcowState *s, QCowL2Meta *m) { -/* Take the request off the list of running requests */ -if (m-nb_clusters != 0) { -QLIST_REMOVE(m, next_in_flight); -} - /* Restart all dependent requests */ if (!qemu_co_queue_empty(m-dependent_requests)) { -qemu_co_mutex_unlock(s-lock); qemu_co_queue_restart_all(m-dependent_requests); -qemu_co_mutex_lock(s-lock); } The comment and if can go away. Perhaps this patch could be moved earlier in the series? (Just asking, in case the rebase is not too painful). Paolo } @@ -800,10 +793,18 @@ again: goto again; } +qemu_co_mutex_unlock(s-lock); + +/* Take the request off the list of running requests */ +if (m-nb_clusters != 0) { +QLIST_REMOVE(m, next_in_flight); +} + +/* Meanwhile some new dependencies could have accumulated */
[Qemu-devel] [PATCH 1/5] tcg: Introduce movcond
Implemented with setcond if the target does not provide the optional opcode. Signed-off-by: Richard Henderson r...@twiddle.net --- tcg/README | 6 ++ tcg/arm/tcg-target.h | 1 + tcg/hppa/tcg-target.h | 1 + tcg/i386/tcg-target.h | 2 ++ tcg/ia64/tcg-target.h | 2 ++ tcg/mips/tcg-target.h | 1 + tcg/ppc/tcg-target.h | 1 + tcg/ppc64/tcg-target.h | 2 ++ tcg/s390/tcg-target.h | 2 ++ tcg/sparc/tcg-target.h | 2 ++ tcg/tcg-op.h | 41 + tcg/tcg-opc.h | 2 ++ tcg/tcg.c | 11 +-- tcg/tcg.h | 1 + tcg/tci/tcg-target.h | 2 ++ 15 files changed, 71 insertions(+), 6 deletions(-) diff --git a/tcg/README b/tcg/README index cfdfd96..d03ae05 100644 --- a/tcg/README +++ b/tcg/README @@ -307,6 +307,12 @@ dest = (t1 cond t2) Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0. +* movcond_i32/i64 cond, dest, c1, c2, v1, v2 + +dest = (c1 cond c2 ? v1 : v2) + +Set DEST to V1 if (C1 cond C2) is true, otherwise set to V2. + * Type conversions * ext_i32_i64 t0, t1 diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index c0b8f72..e2299ca 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -73,6 +73,7 @@ typedef enum { #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 0 +#define TCG_TARGET_HAS_movcond_i32 0 #define TCG_TARGET_HAS_GUEST_BASE diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h index 01ef960..4defd28 100644 --- a/tcg/hppa/tcg-target.h +++ b/tcg/hppa/tcg-target.h @@ -96,6 +96,7 @@ typedef enum { #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 1 +#define TCG_TARGET_HAS_movcond_i32 0 /* optional instructions automatically implemented */ #define TCG_TARGET_HAS_neg_i32 0 /* sub rd, 0, rs */ diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 8be42f3..504f953 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -86,6 +86,7 @@ typedef enum { #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 1 +#define TCG_TARGET_HAS_movcond_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_div2_i64 1 @@ -107,6 +108,7 @@ typedef enum { #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_deposit_i64 1 +#define TCG_TARGET_HAS_movcond_i64 0 #endif #define TCG_TARGET_deposit_i32_valid(ofs, len) \ diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index c22962a..368aee4 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -133,6 +133,8 @@ typedef enum { #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_deposit_i32 0 #define TCG_TARGET_HAS_deposit_i64 0 +#define TCG_TARGET_HAS_movcond_i32 0 +#define TCG_TARGET_HAS_movcond_i64 0 /* optional instructions automatically implemented */ #define TCG_TARGET_HAS_neg_i32 0 /* sub r1, r0, r3 */ diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 1c61931..9c68a32 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -90,6 +90,7 @@ typedef enum { #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_deposit_i32 0 +#define TCG_TARGET_HAS_movcond_i32 0 /* optional instructions automatically implemented */ #define TCG_TARGET_HAS_neg_i32 0 /* sub rd, zero, rt */ diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 2f37fd2..177eea1 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -92,6 +92,7 @@ typedef enum { #define TCG_TARGET_HAS_nand_i32 1 #define TCG_TARGET_HAS_nor_i32 1 #define TCG_TARGET_HAS_deposit_i32 1 +#define TCG_TARGET_HAS_movcond_i32 0 #define TCG_AREG0 TCG_REG_R27 diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h index 97eec08..57569e8 100644 --- a/tcg/ppc64/tcg-target.h +++ b/tcg/ppc64/tcg-target.h @@ -83,6 +83,7 @@ typedef enum { #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 0 +#define TCG_TARGET_HAS_movcond_i32 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rot_i64 0 @@ -103,6 +104,7 @@ typedef enum { #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_deposit_i64 0 +#define TCG_TARGET_HAS_movcond_i64 0 #define TCG_AREG0 TCG_REG_R27 diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index 4f7dfab..ed55c33 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -63,6 +63,7 @@ typedef enum TCGReg { #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0
[Qemu-devel] [PATCH] target-sparc: SPARC LEON power-down support added
Signed-off-by: Ronald Hecht ronald.he...@gmx.de --- target-sparc/cpu.c |2 +- target-sparc/cpu.h |1 + target-sparc/helper.h |1 + target-sparc/ldst_helper.c |9 + target-sparc/translate.c |5 + 5 files changed, 17 insertions(+), 1 deletions(-) diff --git a/target-sparc/cpu.c b/target-sparc/cpu.c index 0d5abb8..10563c4 100644 --- a/target-sparc/cpu.c +++ b/target-sparc/cpu.c @@ -589,7 +589,7 @@ static const sparc_def_t sparc_defs[] = { .mmu_trcr_mask = 0x, .nwindows = 8, .features = CPU_DEFAULT_FEATURES | CPU_FEATURE_TA0_SHUTDOWN | -CPU_FEATURE_ASR17 | CPU_FEATURE_CACHE_CTRL, +CPU_FEATURE_ASR17 | CPU_FEATURE_CACHE_CTRL | CPU_FEATURE_POWERDOWN, }, #endif }; diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h index e16b7b3..eadc445 100644 --- a/target-sparc/cpu.h +++ b/target-sparc/cpu.h @@ -270,6 +270,7 @@ typedef struct sparc_def_t { #define CPU_FEATURE_TA0_SHUTDOWN (1 14) /* Shutdown on ta 0x0 */ #define CPU_FEATURE_ASR17(1 15) #define CPU_FEATURE_CACHE_CTRL (1 16) +#define CPU_FEATURE_POWERDOWN(1 17) #ifndef TARGET_SPARC64 #define CPU_DEFAULT_FEATURES (CPU_FEATURE_FLOAT | CPU_FEATURE_SWAP | \ diff --git a/target-sparc/helper.h b/target-sparc/helper.h index e3c7fdd..74ecad1 100644 --- a/target-sparc/helper.h +++ b/target-sparc/helper.h @@ -4,6 +4,7 @@ DEF_HELPER_1(rett, void, env) DEF_HELPER_2(wrpsr, void, env, tl) DEF_HELPER_1(rdpsr, tl, env) +DEF_HELPER_1(power_down, void, env) #else DEF_HELPER_2(wrpil, void, env, tl) DEF_HELPER_2(wrpstate, void, env, tl) diff --git a/target-sparc/ldst_helper.c b/target-sparc/ldst_helper.c index 684b73b..bb5016c 100644 --- a/target-sparc/ldst_helper.c +++ b/target-sparc/ldst_helper.c @@ -2317,6 +2317,15 @@ void helper_stqf(CPUSPARCState *env, target_ulong addr, int mem_idx) #endif } +void helper_power_down(CPUSPARCState *env) +{ +env-halted = 1; +env-exception_index = EXCP_HLT; +env-pc = env-npc; +env-npc = env-pc + 4; +cpu_loop_exit(env); +} + #if !defined(CONFIG_USER_ONLY) #ifndef TARGET_SPARC64 void cpu_unassigned_access(CPUSPARCState *env, target_phys_addr_t addr, diff --git a/target-sparc/translate.c b/target-sparc/translate.c index b95f91c..9babaa8 100644 --- a/target-sparc/translate.c +++ b/target-sparc/translate.c @@ -3671,6 +3671,11 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn) in the SPARCv8 manual, nop on the microSPARC II */ +if ((rd == 0x13) (dc-def-features + CPU_FEATURE_POWERDOWN)) { +/* LEON3 power-down */ +gen_helper_power_down(cpu_env); +} break; #else case 0x2: /* V9 wrccr */ -- 1.7.2.5
[Qemu-devel] [PATCH 4/5] tcg: Optimize movcond for constant comparisons
Signed-off-by: Richard Henderson r...@twiddle.net --- tcg/optimize.c | 29 + 1 file changed, 29 insertions(+) diff --git a/tcg/optimize.c b/tcg/optimize.c index fba0ed9..5b0a8ce 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -397,6 +397,14 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, args[3] = tcg_swap_cond(args[3]); } break; +CASE_OP_32_64(movcond): +if (temps[args[1]].state == TCG_TEMP_CONST + temps[args[2]].state != TCG_TEMP_CONST) { +tmp = args[1]; +args[1] = args[2]; +args[2] = tmp; +args[5] = tcg_swap_cond(args[5]); +} default: break; } @@ -617,6 +625,27 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } args += 4; break; +CASE_OP_32_64(movcond): +if (temps[args[1]].state == TCG_TEMP_CONST + temps[args[2]].state == TCG_TEMP_CONST) { +gen_opc_buf[op_index] = op_to_mov(op); +tmp = do_constant_folding_cond(op, temps[args[1]].val, + temps[args[2]].val, args[5]); +tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp], +nb_temps, nb_globals); +gen_args += 2; +} else { +reset_temp(args[0], nb_temps, nb_globals); +gen_args[0] = args[0]; +gen_args[1] = args[1]; +gen_args[2] = args[2]; +gen_args[3] = args[3]; +gen_args[4] = args[4]; +gen_args[5] = args[5]; +gen_args += 6; +} +args += 6; +break; case INDEX_op_call: nb_call_args = (args[0] 16) + (args[0] 0x); if (!(args[nb_call_args + 1] (TCG_CALL_CONST | TCG_CALL_PURE))) { -- 1.7.11.4
Re: [Qemu-devel] [RFC PATCH 09/16] qcow2: Move COW and L2 update into own coroutine
Am 18.09.2012 16:24, schrieb Paolo Bonzini: Il 18/09/2012 13:40, Kevin Wolf ha scritto: +qemu_co_mutex_unlock(s-lock); +qemu_co_rwlock_rdlock(s-l2meta_flush); Should this lock be taken in process_l2meta? It's a bit easier to follow. I'm pretty sure there was a reason, but it isn't obvious any more. I guess I should have put a comment there... Maybe it doesn't exist any more, or maybe it's not that obvious. The difference would be that while waiting for the lock, the original write request could complete instead of waiting as well, and that the lock is potentially taken only in a BH instead of immediately. What happens if bdrv_aio_flush() and bdrv_aio_writev() are both in flight? If the flush runs its stop_l2meta() after the write request has signalled completion, but before the COW coroutine has started, it gets the lock even though a COW must still be processed. I believe we could then return a successful flush when the metadata isn't really on disk yet. So if you agree, I think we need to leave it where it is. Kevin
Re: [Qemu-devel] [RFC PATCH 15/16] qcow2: Cancel COW when overwritten
Il 18/09/2012 13:40, Kevin Wolf ha scritto: +qemu_co_mutex_unlock(s-lock); +qemu_co_rwlock_wrlock(m-l2_writeback_lock); Can anybody else take the lock as reader again at this point? If not, I wonder if this is more clear if you write it as a CoQueue. Paolo +has_wr_lock = true; +qemu_co_mutex_lock(s-lock);
[Qemu-devel] [PATCH 2/5] target-alpha: Use movcond
For proper cmov insns, as well as the non-goto-tb case of conditional branch. Signed-off-by: Richard Henderson r...@twiddle.net --- target-alpha/translate.c | 102 ++- 1 file changed, 48 insertions(+), 54 deletions(-) diff --git a/target-alpha/translate.c b/target-alpha/translate.c index 12de6a3..4a9011a 100644 --- a/target-alpha/translate.c +++ b/target-alpha/translate.c @@ -426,27 +426,15 @@ static ExitStatus gen_bcond_internal(DisasContext *ctx, TCGCond cond, return EXIT_GOTO_TB; } else { -int lab_over = gen_new_label(); - -/* ??? Consider using either - movi pc, next - addi tmp, pc, disp - movcond pc, cond, 0, tmp, pc - or - setcond tmp, cond, 0 - movi pc, next - neg tmp, tmp - andi tmp, tmp, disp - add pc, pc, tmp - The current diamond subgraph surely isn't efficient. */ +TCGv_i64 z = tcg_const_i64(0); +TCGv_i64 d = tcg_const_i64(dest); +TCGv_i64 p = tcg_const_i64(ctx-pc); -tcg_gen_brcondi_i64(cond, cmp, 0, lab_true); -tcg_gen_movi_i64(cpu_pc, ctx-pc); -tcg_gen_br(lab_over); -gen_set_label(lab_true); -tcg_gen_movi_i64(cpu_pc, dest); -gen_set_label(lab_over); +tcg_gen_movcond_i64(cond, cpu_pc, cmp, z, d, p); +tcg_temp_free_i64(z); +tcg_temp_free_i64(d); +tcg_temp_free_i64(p); return EXIT_PC_UPDATED; } } @@ -521,61 +509,67 @@ static ExitStatus gen_fbcond(DisasContext *ctx, TCGCond cond, int ra, static void gen_cmov(TCGCond cond, int ra, int rb, int rc, int islit, uint8_t lit, int mask) { -TCGCond inv_cond = tcg_invert_cond(cond); -int l1; +TCGv_i64 c1, z, v1; -if (unlikely(rc == 31)) +if (unlikely(rc == 31)) { return; +} -l1 = gen_new_label(); - -if (ra != 31) { -if (mask) { -TCGv tmp = tcg_temp_new(); -tcg_gen_andi_i64(tmp, cpu_ir[ra], 1); -tcg_gen_brcondi_i64(inv_cond, tmp, 0, l1); -tcg_temp_free(tmp); -} else -tcg_gen_brcondi_i64(inv_cond, cpu_ir[ra], 0, l1); -} else { +if (ra == 31) { /* Very uncommon case - Do not bother to optimize. */ -TCGv tmp = tcg_const_i64(0); -tcg_gen_brcondi_i64(inv_cond, tmp, 0, l1); -tcg_temp_free(tmp); +c1 = tcg_const_i64(0); +} else if (mask) { +c1 = tcg_const_i64(1); +tcg_gen_and_i64(c1, c1, cpu_ir[ra]); +} else { +c1 = cpu_ir[ra]; } +if (islit) { +v1 = tcg_const_i64(lit); +} else { +v1 = cpu_ir[rb]; +} +z = tcg_const_i64(0); -if (islit) -tcg_gen_movi_i64(cpu_ir[rc], lit); -else -tcg_gen_mov_i64(cpu_ir[rc], cpu_ir[rb]); -gen_set_label(l1); +tcg_gen_movcond_i64(cond, cpu_ir[rc], c1, z, v1, cpu_ir[rc]); + +tcg_temp_free_i64(z); +if (ra == 31 || mask) { +tcg_temp_free_i64(c1); +} +if (islit) { +tcg_temp_free_i64(v1); +} } static void gen_fcmov(TCGCond cond, int ra, int rb, int rc) { -TCGv cmp_tmp; -int l1; +TCGv_i64 c1, z, v1; if (unlikely(rc == 31)) { return; } -cmp_tmp = tcg_temp_new(); +c1 = tcg_temp_new_i64(); if (unlikely(ra == 31)) { -tcg_gen_movi_i64(cmp_tmp, 0); +tcg_gen_movi_i64(c1, 0); +} else { +gen_fold_mzero(cond, c1, cpu_fir[ra]); +} +if (rb == 31) { +v1 = tcg_const_i64(0); } else { -gen_fold_mzero(cond, cmp_tmp, cpu_fir[ra]); +v1 = cpu_fir[rb]; } +z = tcg_const_i64(0); -l1 = gen_new_label(); -tcg_gen_brcondi_i64(tcg_invert_cond(cond), cmp_tmp, 0, l1); -tcg_temp_free(cmp_tmp); +tcg_gen_movcond_i64(cond, cpu_fir[rc], c1, z, v1, cpu_fir[rc]); -if (rb != 31) -tcg_gen_mov_i64(cpu_fir[rc], cpu_fir[rb]); -else -tcg_gen_movi_i64(cpu_fir[rc], 0); -gen_set_label(l1); +tcg_temp_free_i64(z); +tcg_temp_free_i64(c1); +if (rb == 31) { +tcg_temp_free_i64(v1); +} } #define QUAL_RM_N 0x080 /* Round mode nearest even */ -- 1.7.11.4
Re: [Qemu-devel] [RFC PATCH 10/16] qcow2: Delay the COW
Am 18.09.2012 16:27, schrieb Paolo Bonzini: Il 18/09/2012 13:40, Kevin Wolf ha scritto: +again: +QLIST_FOREACH(m, s-cluster_allocs, next_in_flight) { +if (m-sleeping) { +qemu_coroutine_enter(m-co, NULL); +/* next_in_flight link could have become invalid */ +goto again; +} +} + qemu_co_rwlock_wrlock(s-l2meta_flush); } static inline coroutine_fn void resume_l2meta(BDRVQcowState *s) { +s-in_l2meta_flush = false; qemu_co_rwlock_unlock(s-l2meta_flush); } static bool qcow2_drain(BlockDriverState *bs) { BDRVQcowState *s = bs-opaque; +QCowL2Meta *m; + +QLIST_FOREACH(m, s-cluster_allocs, next_in_flight) { +if (m-sleeping) { +qemu_coroutine_enter(m-co, NULL); +} +} Why are the goto and in_l2meta_flush not needed here? If they are, perhaps stop_l2meta can just use qcow2_drain? I think you're right, thanks. Kevin
[Qemu-devel] [PATCH 0/2] target-i386: Fix default Hypervisor level for kvm
Looking at http://lkml.indiana.edu/hypermail/linux/kernel/1205.0/00100.html The new value for EAX is 0x4001. This depends on http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg02497.html As far as I known it is #5. It depends on (1), (2), (3) and (4). Based on cpu-queue[1] branch. (From http://lists.gnu.org/archive/html/qemu-devel/2012-09/msg02639.html) [1] https://github.com/ehabkost/qemu/commits/cpu-queue My branch is now based on Andreas's qom-cpu branch from https://github.com/afaerber/qemu-cpu/commits/qom-cpu Don Slutz (2): target-i386: Fix default Hypervisor level for accel=kvm. target-i386: Fix default Hypervisor level for hypervisor-vendor=kvm. target-i386/cpu.c | 12 +++- target-i386/kvm.c |2 +- 2 files changed, 12 insertions(+), 2 deletions(-)
[Qemu-devel] [PATCH 1/2] target-i386: Fix default Hypervisor level for accel=kvm.
From http://lkml.indiana.edu/hypermail/linux/kernel/1205.0/00100.html EAX should be KVM_CPUID_FEATURES (0x4001) not 0. --- target-i386/kvm.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 761a9b1..0c9f5dd 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -392,7 +392,7 @@ int kvm_arch_init_vcpu(CPUX86State *env) c-function = KVM_CPUID_SIGNATURE; if (env-cpuid_hv_level == 0) { memcpy(signature, KVMKVMKVM\0\0\0, 12); -c-eax = 0; +c-eax = KVM_CPUID_FEATURES; c-ebx = signature[0]; c-ecx = signature[1]; c-edx = signature[2]; -- 1.7.1
Re: [Qemu-devel] [RFC PATCH 09/16] qcow2: Move COW and L2 update into own coroutine
Il 18/09/2012 13:40, Kevin Wolf ha scritto: +qemu_co_mutex_unlock(s-lock); +qemu_co_rwlock_rdlock(s-l2meta_flush); Should this lock be taken in process_l2meta? It's a bit easier to follow. Paolo +l2meta-is_written = true; +co = qemu_coroutine_create(process_l2meta); +qemu_coroutine_enter(co, p);
[Qemu-devel] [PATCH 2/2] target-i386: Fix default Hypervisor level for hypervisor-vendor=kvm.
From http://lkml.indiana.edu/hypermail/linux/kernel/1205.0/00100.html EAX should be KVM_CPUID_FEATURES (0x4001) not 0. If kvm is not configured, the additional option of hypervisor-level=1 (or hypervisor-level=0x4001) needs to be specified to get this. --- target-i386/cpu.c | 12 +++- 1 files changed, 11 insertions(+), 1 deletions(-) diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 6e43eff..d73b0a8 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -1248,7 +1248,12 @@ static char *x86_cpuid_get_hv_vendor(Object *obj, Error **errp) env-cpuid_hv_level == CPUID_HV_LEVEL_XEN) { pstrcpy(value, sizeof(value), xen); } else if (!strcmp(value, CPUID_HV_VENDOR_KVM) - env-cpuid_hv_level == 0) { +#if defined(CONFIG_KVM) + env-cpuid_hv_level == KVM_CPUID_FEATURES +#else + env-cpuid_hv_level == 0 +#endif + ) { pstrcpy(value, sizeof(value), kvm); } return value; @@ -1281,6 +1286,11 @@ static void x86_cpuid_set_hv_vendor(Object *obj, const char *value, } pstrcpy(adj_value, sizeof(adj_value), CPUID_HV_VENDOR_XEN); } else if (!strcmp(value, kvm)) { +#if defined(CONFIG_KVM) +if (env-cpuid_hv_level == 0) { +env-cpuid_hv_level = KVM_CPUID_FEATURES; +} +#endif pstrcpy(adj_value, sizeof(adj_value), CPUID_HV_VENDOR_KVM); } else { pstrcpy(adj_value, sizeof(adj_value), value); -- 1.7.1