Re: [Qemu-devel] [PATCHv5 01/15] Introduce fw_name field to DeviceInfo structure.
On Mon, Nov 15, 2010 at 04:30:55PM +0200, Gleb Natapov wrote: diff --git a/hw/piix_pci.c b/hw/piix_pci.c index b5589b9..38f9d9e 100644 --- a/hw/piix_pci.c +++ b/hw/piix_pci.c @@ -365,6 +365,7 @@ static PCIDeviceInfo i440fx_info[] = { static SysBusDeviceInfo i440fx_pcihost_info = { .init = i440fx_pcihost_initfn, .qdev.name= i440FX-pcihost, +.qdev.fw_name = pci, .qdev.size= sizeof(I440FXState), .qdev.no_user = 1, }; There are other pci host bridges for not pc architecture which needs similar modification. Please grep by pci_register_bus(). - apb_pci.c - bonito.c - grackle_pci.c - unin_pci.c - versatile_pci.c BTW, what happens for non-qdevfied pci host bridge? -- yamahata
[Qemu-devel] How to debug BIOS with Qemu?
Hi, I'm a newbie to qemu and recently I was trying debugging BIOS(VGA BIOS in qemu and maybe the bios part in qemu from bochs project) and I found the guide in the bois dir seemed to be a bit old and didn't work. J had made the lasted bios.bin with symbol but how to debug it on earth? Looking at the date from in_asmout_asm options? Anybody had done the some work?Could you please sent me some infos about this? Thanks advance
[Qemu-devel] [PATCH v9 3/8] pci: clean up of pci status register
This patch refine the initialization/reset of pci status registers. Signed-off-by: Isaku Yamahata yamah...@valinux.co.jp --- hw/pci.c | 41 +++-- 1 files changed, 39 insertions(+), 2 deletions(-) diff --git a/hw/pci.c b/hw/pci.c index 52fe655..fba765b 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -145,6 +145,9 @@ static void pci_device_reset(PCIDevice *dev) pci_word_test_and_clear_mask(dev-config + PCI_COMMAND, pci_get_word(dev-wmask + PCI_COMMAND) | pci_get_word(dev-w1cmask + PCI_COMMAND)); +pci_word_test_and_clear_mask(dev-config + PCI_STATUS, + pci_get_word(dev-wmask + PCI_STATUS) | + pci_get_word(dev-w1cmask + PCI_STATUS)); dev-config[PCI_CACHE_LINE_SIZE] = 0x0; dev-config[PCI_INTERRUPT_LINE] = 0x0; for (r = 0; r PCI_NUM_REGIONS; ++r) { @@ -540,7 +543,7 @@ static void pci_init_cmask(PCIDevice *dev) dev-cmask[PCI_CAPABILITY_LIST] = 0xff; } -static void pci_init_wmask(PCIDevice *dev) +static void pci_init_wmask_w1cmask(PCIDevice *dev) { int config_size = pci_config_size(dev); @@ -595,6 +598,40 @@ static void pci_init_wmask(PCIDevice *dev) PCI_COMMAND_MASTER | PCI_COMMAND_PARITY | PCI_COMMAND_SERR | PCI_COMMAND_INTX_DISABLE); +/* + * bit 0-2: reserved + * bit 3: PCI_STATUS_INTERRUPT: RO + * bit 4: PCI_STATUS_CAP_LIST: RO + * bit 5: PCI_STATUS_66MHZ: RO + * bit 6: PCI_STATUS_UDF: reserved (PCI 2.2-) + * bit 7: PCI_STATUS_FAST_BACK: RO + * bit 8: PCI_STATUS_PARITY + *type 0: RW for bus master + *type 1: RW1C + * bit 9-10: PCI_STATUS_DEVSEL: RO + * bit 11: PCI_STATUS_SIG_TARGET_ABORT + * type 0: RW1C for targets that is capable of terminating + * a transaction. + * type 1: RW1C + * bit 12: PCI_STATUS_REC_TARGET_ABORT + * type 0: RW1C for masters + * type 1: RW1C + * bit 13: PCI_STATUS_REC_MASTER_ABORT + * type 0: RW1C for masters + * type 1: RW1C + * bit 14: PCI_STATUS_SIG_SYSTEM_ERROR + * type 0: RW1C with execptions + * type 1: RW1C + * bit : PCI_STATUS_DETECTED_PARITY: RW1C + * + * It's okay to set w1mask even for RO=0(i.e. reserved) because + * writing value 1 to w1c bit whose value is 0 has no effect. + */ +pci_set_word(dev-w1cmask + PCI_STATUS, + PCI_STATUS_PARITY | PCI_STATUS_SIG_TARGET_ABORT | + PCI_STATUS_REC_TARGET_ABORT | PCI_STATUS_REC_MASTER_ABORT | + PCI_STATUS_SIG_SYSTEM_ERROR | PCI_STATUS_DETECTED_PARITY); + memset(dev-wmask + PCI_CONFIG_HEADER_SIZE, 0xff, config_size - PCI_CONFIG_HEADER_SIZE); } @@ -725,7 +762,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus, pci_set_default_subsystem_id(pci_dev); } pci_init_cmask(pci_dev); -pci_init_wmask(pci_dev); +pci_init_wmask_w1cmask(pci_dev); if (is_bridge) { pci_init_wmask_bridge(pci_dev); } -- 1.7.1.1
[Qemu-devel] [PATCH v9 7/8] x3130/upstream: support aer
add aer support. Signed-off-by: Isaku Yamahata yamah...@valinux.co.jp --- Changes v8 - v9 - error path in initialization. --- hw/xio3130_upstream.c | 33 - 1 files changed, 24 insertions(+), 9 deletions(-) diff --git a/hw/xio3130_upstream.c b/hw/xio3130_upstream.c index d9d637f..387bf6c 100644 --- a/hw/xio3130_upstream.c +++ b/hw/xio3130_upstream.c @@ -41,7 +41,7 @@ static void xio3130_upstream_write_config(PCIDevice *d, uint32_t address, pci_bridge_write_config(d, address, val, len); pcie_cap_flr_write_config(d, address, val, len); msi_write_config(d, address, val, len); -/* TODO: AER */ +pcie_aer_write_config(d, address, val, len); } static void xio3130_upstream_reset(DeviceState *qdev) @@ -57,6 +57,7 @@ static int xio3130_upstream_initfn(PCIDevice *d) PCIBridge* br = DO_UPCAST(PCIBridge, dev, d); PCIEPort *p = DO_UPCAST(PCIEPort, br, br); int rc; +int tmp; rc = pci_bridge_initfn(d); if (rc 0) { @@ -72,33 +73,45 @@ static int xio3130_upstream_initfn(PCIDevice *d) XIO3130_MSI_SUPPORTED_FLAGS PCI_MSI_FLAGS_64BIT, XIO3130_MSI_SUPPORTED_FLAGS PCI_MSI_FLAGS_MASKBIT); if (rc 0) { -return rc; +goto err_bridge; } rc = pci_bridge_ssvid_init(d, XIO3130_SSVID_OFFSET, XIO3130_SSVID_SVID, XIO3130_SSVID_SSID); if (rc 0) { -return rc; +goto err_bridge; } rc = pcie_cap_init(d, XIO3130_EXP_OFFSET, PCI_EXP_TYPE_UPSTREAM, p-port); if (rc 0) { -return rc; +goto err_msi; } /* TODO: implement FLR */ pcie_cap_flr_init(d); pcie_cap_deverr_init(d); -/* TODO: AER */ +rc = pcie_aer_init(d, XIO3130_AER_OFFSET); +if (rc 0) { +goto err; +} return 0; + +err: +pcie_cap_exit(d); +err_msi: +msi_uninit(d); +err_bridge: +tmp = pci_bridge_exitfn(d); +assert(!tmp); +return rc; } static int xio3130_upstream_exitfn(PCIDevice *d) { -/* TODO: AER */ -msi_uninit(d); +pcie_aer_exit(d); pcie_cap_exit(d); +msi_uninit(d); return pci_bridge_exitfn(d); } @@ -131,7 +144,8 @@ static const VMStateDescription vmstate_xio3130_upstream = { .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_PCIE_DEVICE(br.dev, PCIEPort), -/* TODO: AER */ +VMSTATE_STRUCT(br.dev.exp.aer_log, PCIEPort, 0, vmstate_pcie_aer_log, + PCIEAERLog), VMSTATE_END_OF_LIST() } }; @@ -151,7 +165,8 @@ static PCIDeviceInfo xio3130_upstream_info = { .qdev.props = (Property[]) { DEFINE_PROP_UINT8(port, PCIEPort, port, 0), -/* TODO: AER */ +DEFINE_PROP_UINT16(aer_log_max, PCIEPort, br.dev.exp.aer_log.log_max, + PCIE_AER_LOG_MAX_DEFAULT), DEFINE_PROP_END_OF_LIST(), } }; -- 1.7.1.1
[Qemu-devel] [PATCH v9 0/8] pcie port switch emulators
Now v9 of pcie aer patch series. I dropped qmp patch to inject aer error because it will depends on Gleb's openfirmware path patches. Once his patches are merged, the glue patch will be respined. Patch description: The patch series adds pcie/aer functionality to the pcie port emulators and adds new qemu command to inject aer into the guest. Change v8 - v9: - dropped qmp glue aer error injection. - folded pci command register initialization patches - make pcie_aer_init() return error Changes v7 - v8: - Added command to the forward declaration. - revise pci command/status register initialization - various changes to follow the review - use domain:slot.func:slot.func..:slot.func to specify pci function instead of domain:bus:slot.func - allow symbolic name for aer error name in addition to 32 bit value Changes v6 - v7: - the glue patch for pushing attention button is dropped for now. This will be addressed later. - various clean up of aer helper functions. changes v5 - v6: - dropped already merged patches. - add comment on hpev_intx - updated the bridge fix patch - update the aer patch. - reordered the patch series to remove the aer dependency. change v4 - v5: - introduced pci_xxx_test_and_clear/set_mask - eliminated xxx_notify(msi_trigger, int_level) - eliminated FLR bits. FLR will be addressed at the next phase. changes v3 - v4: - introduced new pci config helper functions.(clear set bit) - various clean up and some bug fixes. - dropped pci_shift_xxx(). - dropped function pointerin pcie_aer.h - dropped pci_exp_cap(), pcie_aer_cap(). - file rename (pcie_{root, upstream, downsatrem} = ioh33420, x3130). changes v2 - v3: - msi: improved commant and simplified shift/ffs dance - pci w1c config register framework - split pcie.[ch] into pcie_regs.h, pcie.[ch] and pcie_aer.[ch] - pcie, aer: many changes by following reviews. changes v1 - v2: - update msi - dropped already pushed out patches. - added msix patches. Isaku Yamahata (8): pci: revise pci command register initialization pci: fix accesses to pci status register pci: clean up of pci status register pcie_regs.h: more constants pcie/aer: helper functions for pcie aer capability ioh3420: support aer x3130/upstream: support aer x3130/downstream: support aer. Makefile.objs |2 +- hw/ioh3420.c| 80 - hw/pci.c| 120 +++- hw/pcie.h | 14 + hw/pcie_aer.c | 827 +++ hw/pcie_aer.h | 106 ++ hw/pcie_regs.h |2 + hw/xio3130_downstream.c | 43 ++- hw/xio3130_upstream.c | 33 ++- qemu-common.h |3 + 10 files changed, 1189 insertions(+), 41 deletions(-) create mode 100644 hw/pcie_aer.c create mode 100644 hw/pcie_aer.h
[Qemu-devel] [PATCH v9 1/8] pci: revise pci command register initialization
This patch cleans up command register initialization with comments. It also fixes the initialization of io/memory bit of command register. Those bits for type 1 device is RW. Those bits for type 0 device is RO = 0 if it has no io/memory BAR RW if it has io/memory BAR Signed-off-by: Isaku Yamahata yamah...@valinux.co.jp --- Changes v8 - v9 - patch squash --- hw/pci.c | 58 +- 1 files changed, 57 insertions(+), 1 deletions(-) diff --git a/hw/pci.c b/hw/pci.c index 962886e..2fc8ab1 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -544,8 +544,53 @@ static void pci_init_wmask(PCIDevice *dev) dev-wmask[PCI_CACHE_LINE_SIZE] = 0xff; dev-wmask[PCI_INTERRUPT_LINE] = 0xff; + +/* + * bit 0: PCI_COMMAND_IO + *type 0: if IO BAR is used, RW + *This is handled by pci_register_bar() + *type 1: RW: + *This is fixed by pci_init_wmask_bridge() + * bit 1: PCI_COMMAND_MEMORY + *type 0: if IO BAR is used, RW + *This is handled by pci_register_bar() + *type 1: RW + *This is fixed by pci_init_wmask_bridge() + * bit 2: PCI_COMMAND_MASTER + *type 0: RW if bus master + *type 1: RW + * bit 3: PCI_COMMAND_SPECIAL + *RO=0, optionally RW: Such device should set this bit itself + * bit 4: PCI_COMMAND_INVALIDATE + *RO=0, optionally RW: Such device should set this bit itself + * bit 5: PCI_COMMAND_VGA_PALETTE + *RO=0, optionally RW: Such device should set this bit itself + * bit 6: PCI_COMMAND_PARITY + *RW with exceptions: Such device should clear this bit itself + *Given that qemu doesn't emulate pci bus cycles, so that there + *is no place to generate parity error. So just making this + *register RW is okay because there is no place which refers + *this bit. + *TODO: When device assignment tried to inject PERR# into qemu, + * some extra work would be needed. + * bit 7: PCI_COMMAND_WAIT: reserved (PCI 3.0) + *RO=0 + * bit 8: PCI_COMMAND_SERR + *RW with exceptions: Such device should clear this bit itself + *Given that qemu doesn't emulate pci bus cycles, so that there + *is no place to generate system error. So just making this + *register RW is okay because there is no place which refers + *this bit. + *TODO: When device assignment tried to inject SERR# into qemu, + * some extra work would be needed. + * bit 9: PCI_COMMAND_FAST_BACK + *RO=0, optionally RW: Such device should set this bit itself + * bit 10: PCI_COMMAND_INTX_DISABLE + * RW + * bit 11-15: reserved + */ pci_set_word(dev-wmask + PCI_COMMAND, - PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | + PCI_COMMAND_MASTER | PCI_COMMAND_PARITY | PCI_COMMAND_SERR | PCI_COMMAND_INTX_DISABLE); memset(dev-wmask + PCI_CONFIG_HEADER_SIZE, 0xff, @@ -554,6 +599,9 @@ static void pci_init_wmask(PCIDevice *dev) static void pci_init_wmask_bridge(PCIDevice *d) { +pci_word_test_and_set_mask(d-wmask + PCI_COMMAND, + PCI_COMMAND_IO | PCI_COMMAND_MEMORY); + /* PCI_PRIMARY_BUS, PCI_SECONDARY_BUS, PCI_SUBORDINATE_BUS and PCI_SEC_LETENCY_TIMER */ memset(d-wmask + PCI_PRIMARY_BUS, 0xff, 4); @@ -791,6 +839,14 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, if (region_num == PCI_ROM_SLOT) { /* ROM enable bit is writeable */ wmask |= PCI_ROM_ADDRESS_ENABLE; +} else { +if (r-type PCI_BASE_ADDRESS_SPACE_IO) { +pci_word_test_and_set_mask(pci_dev-wmask + PCI_COMMAND, + PCI_COMMAND_IO); +} else { +pci_word_test_and_set_mask(pci_dev-wmask + PCI_COMMAND, + PCI_COMMAND_MEMORY); +} } pci_set_long(pci_dev-config + addr, type); if (!(r-type PCI_BASE_ADDRESS_SPACE_IO) -- 1.7.1.1
[Qemu-devel] [PATCH v9 2/8] pci: fix accesses to pci status register
pci status register is 16 bit, not 8 bit. So use helper function to manipulate status register. Signed-off-by: Isaku Yamahata yamah...@valinux.co.jp --- hw/pci.c | 21 + 1 files changed, 13 insertions(+), 8 deletions(-) diff --git a/hw/pci.c b/hw/pci.c index 2fc8ab1..52fe655 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -127,9 +127,11 @@ static void pci_change_irq_level(PCIDevice *pci_dev, int irq_num, int change) static void pci_update_irq_status(PCIDevice *dev) { if (dev-irq_state) { -dev-config[PCI_STATUS] |= PCI_STATUS_INTERRUPT; +pci_word_test_and_set_mask(dev-config + PCI_STATUS, + PCI_STATUS_INTERRUPT); } else { -dev-config[PCI_STATUS] = ~PCI_STATUS_INTERRUPT; +pci_word_test_and_clear_mask(dev-config + PCI_STATUS, + PCI_STATUS_INTERRUPT); } } @@ -404,7 +406,7 @@ void pci_device_save(PCIDevice *s, QEMUFile *f) * in irq_state which we are saving. * This makes us compatible with old devices * which never set or clear this bit. */ -s-config[PCI_STATUS] = ~PCI_STATUS_INTERRUPT; +pci_word_test_and_clear_mask(s-config + PCI_STATUS, PCI_STATUS_INTERRUPT); vmstate_save_state(f, pci_get_vmstate(s), s); /* Restore the interrupt status bit. */ pci_update_irq_status(s); @@ -530,7 +532,7 @@ static void pci_init_cmask(PCIDevice *dev) { pci_set_word(dev-cmask + PCI_VENDOR_ID, 0x); pci_set_word(dev-cmask + PCI_DEVICE_ID, 0x); -dev-cmask[PCI_STATUS] = PCI_STATUS_CAP_LIST; +pci_set_word(dev-cmask + PCI_STATUS, PCI_STATUS_CAP_LIST); dev-cmask[PCI_REVISION_ID] = 0xff; dev-cmask[PCI_CLASS_PROG] = 0xff; pci_set_word(dev-cmask + PCI_CLASS_DEVICE, 0x); @@ -1697,8 +1699,9 @@ static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id, { uint8_t next, prev; -if (!(pdev-config[PCI_STATUS] PCI_STATUS_CAP_LIST)) +if (!(pci_get_word(pdev-config + PCI_STATUS) PCI_STATUS_CAP_LIST)) { return 0; +} for (prev = PCI_CAPABILITY_LIST; (next = pdev-config[prev]); prev = next + PCI_CAP_LIST_NEXT) @@ -1804,7 +1807,7 @@ int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, config[PCI_CAP_LIST_ID] = cap_id; config[PCI_CAP_LIST_NEXT] = pdev-config[PCI_CAPABILITY_LIST]; pdev-config[PCI_CAPABILITY_LIST] = offset; -pdev-config[PCI_STATUS] |= PCI_STATUS_CAP_LIST; +pci_word_test_and_set_mask(pdev-config + PCI_STATUS, PCI_STATUS_CAP_LIST); memset(pdev-used + offset, 0xFF, size); /* Make capability read-only by default */ memset(pdev-wmask + offset, 0, size); @@ -1827,8 +1830,10 @@ void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size) memset(pdev-cmask + offset, 0, size); memset(pdev-used + offset, 0, size); -if (!pdev-config[PCI_CAPABILITY_LIST]) -pdev-config[PCI_STATUS] = ~PCI_STATUS_CAP_LIST; +if (!pdev-config[PCI_CAPABILITY_LIST]) { +pci_word_test_and_clear_mask(pdev-config + PCI_STATUS, + PCI_STATUS_CAP_LIST); +} } /* Reserve space for capability at a known offset (to call after load). */ -- 1.7.1.1
[Qemu-devel] [PATCH v9 8/8] x3130/downstream: support aer.
add aer support. Signed-off-by: Isaku Yamahata yamah...@valinux.co.jp --- Changes v8 - v9: - error path in initialization --- hw/xio3130_downstream.c | 43 +-- 1 files changed, 33 insertions(+), 10 deletions(-) diff --git a/hw/xio3130_downstream.c b/hw/xio3130_downstream.c index 854eba8..1a2d258 100644 --- a/hw/xio3130_downstream.c +++ b/hw/xio3130_downstream.c @@ -42,7 +42,7 @@ static void xio3130_downstream_write_config(PCIDevice *d, uint32_t address, pcie_cap_flr_write_config(d, address, val, len); pcie_cap_slot_write_config(d, address, val, len); msi_write_config(d, address, val, len); -/* TODO: AER */ +pcie_aer_write_config(d, address, val, len); } static void xio3130_downstream_reset(DeviceState *qdev) @@ -61,6 +61,7 @@ static int xio3130_downstream_initfn(PCIDevice *d) PCIEPort *p = DO_UPCAST(PCIEPort, br, br); PCIESlot *s = DO_UPCAST(PCIESlot, port, p); int rc; +int tmp; rc = pci_bridge_initfn(d); if (rc 0) { @@ -76,17 +77,17 @@ static int xio3130_downstream_initfn(PCIDevice *d) XIO3130_MSI_SUPPORTED_FLAGS PCI_MSI_FLAGS_64BIT, XIO3130_MSI_SUPPORTED_FLAGS PCI_MSI_FLAGS_MASKBIT); if (rc 0) { -return rc; +goto err_bridge; } rc = pci_bridge_ssvid_init(d, XIO3130_SSVID_OFFSET, XIO3130_SSVID_SVID, XIO3130_SSVID_SSID); if (rc 0) { -return rc; +goto err_bridge; } rc = pcie_cap_init(d, XIO3130_EXP_OFFSET, PCI_EXP_TYPE_DOWNSTREAM, p-port); if (rc 0) { -return rc; +goto err_msi; } pcie_cap_flr_init(d); /* TODO: implement FLR */ pcie_cap_deverr_init(d); @@ -94,19 +95,38 @@ static int xio3130_downstream_initfn(PCIDevice *d) pcie_chassis_create(s-chassis); rc = pcie_chassis_add_slot(s); if (rc 0) { -return rc; +goto err_pcie_cap; } pcie_cap_ari_init(d); -/* TODO: AER */ +rc = pcie_aer_init(d, XIO3130_AER_OFFSET); +if (rc 0) { +goto err; +} return 0; + +err: +pcie_chassis_del_slot(s); +err_pcie_cap: +pcie_cap_exit(d); +err_msi: +msi_uninit(d); +err_bridge: +tmp = pci_bridge_exitfn(d); +assert(!tmp); +return rc; } static int xio3130_downstream_exitfn(PCIDevice *d) { -/* TODO: AER */ -msi_uninit(d); +PCIBridge* br = DO_UPCAST(PCIBridge, dev, d); +PCIEPort *p = DO_UPCAST(PCIEPort, br, br); +PCIESlot *s = DO_UPCAST(PCIESlot, port, p); + +pcie_aer_exit(d); +pcie_chassis_del_slot(s); pcie_cap_exit(d); +msi_uninit(d); return pci_bridge_exitfn(d); } @@ -144,7 +164,8 @@ static const VMStateDescription vmstate_xio3130_downstream = { .post_load = pcie_cap_slot_post_load, .fields = (VMStateField[]) { VMSTATE_PCIE_DEVICE(port.br.dev, PCIESlot), -/* TODO: AER */ +VMSTATE_STRUCT(port.br.dev.exp.aer_log, PCIESlot, 0, + vmstate_pcie_aer_log, PCIEAERLog), VMSTATE_END_OF_LIST() } }; @@ -166,7 +187,9 @@ static PCIDeviceInfo xio3130_downstream_info = { DEFINE_PROP_UINT8(port, PCIESlot, port.port, 0), DEFINE_PROP_UINT8(chassis, PCIESlot, chassis, 0), DEFINE_PROP_UINT16(slot, PCIESlot, slot, 0), -/* TODO: AER */ +DEFINE_PROP_UINT16(aer_log_max, PCIESlot, + port.br.dev.exp.aer_log.log_max, + PCIE_AER_LOG_MAX_DEFAULT), DEFINE_PROP_END_OF_LIST(), } }; -- 1.7.1.1
[Qemu-devel] [PATCH v9 6/8] ioh3420: support aer
Add aer support. Signed-off-by: Isaku Yamahata yamah...@valinux.co.jp --- Changes v8 - v9: - error path in initialization --- hw/ioh3420.c | 80 ++--- 1 files changed, 70 insertions(+), 10 deletions(-) diff --git a/hw/ioh3420.c b/hw/ioh3420.c index 3cc129f..95adf09 100644 --- a/hw/ioh3420.c +++ b/hw/ioh3420.c @@ -36,25 +36,59 @@ #define IOH_EP_EXP_OFFSET 0x90 #define IOH_EP_AER_OFFSET 0x100 +/* + * If two MSI vector are allocated, Advanced Error Interrupt Message Number + * is 1. otherwise 0. + * 17.12.5.10 RPERRSTS, 32:27 bit Advanced Error Interrupt Message Number. + */ +static uint8_t ioh3420_aer_vector(const PCIDevice *d) +{ +switch (msi_nr_vectors_allocated(d)) { +case 1: +return 0; +case 2: +return 1; +case 4: +case 8: +case 16: +case 32: +default: +break; +} +abort(); +return 0; +} + +static void ioh3420_aer_vector_update(PCIDevice *d) +{ +pcie_aer_root_set_vector(d, ioh3420_aer_vector(d)); +} + static void ioh3420_write_config(PCIDevice *d, uint32_t address, uint32_t val, int len) { +uint32_t root_cmd = +pci_get_long(d-config + d-exp.aer_cap + PCI_ERR_ROOT_COMMAND); + pci_bridge_write_config(d, address, val, len); msi_write_config(d, address, val, len); +ioh3420_aer_vector_update(d); pcie_cap_slot_write_config(d, address, val, len); -/* TODO: AER */ +pcie_aer_write_config(d, address, val, len); +pcie_aer_root_write_config(d, address, val, len, root_cmd); } static void ioh3420_reset(DeviceState *qdev) { PCIDevice *d = DO_UPCAST(PCIDevice, qdev, qdev); msi_reset(d); +ioh3420_aer_vector_update(d); pcie_cap_root_reset(d); pcie_cap_deverr_reset(d); pcie_cap_slot_reset(d); +pcie_aer_root_reset(d); pci_bridge_reset(qdev); pci_bridge_disable_base_limit(d); -/* TODO: AER */ } static int ioh3420_initfn(PCIDevice *d) @@ -63,6 +97,7 @@ static int ioh3420_initfn(PCIDevice *d) PCIEPort *p = DO_UPCAST(PCIEPort, br, br); PCIESlot *s = DO_UPCAST(PCIESlot, port, p); int rc; +int tmp; rc = pci_bridge_initfn(d); if (rc 0) { @@ -78,35 +113,57 @@ static int ioh3420_initfn(PCIDevice *d) rc = pci_bridge_ssvid_init(d, IOH_EP_SSVID_OFFSET, IOH_EP_SSVID_SVID, IOH_EP_SSVID_SSID); if (rc 0) { -return rc; +goto err_bridge; } rc = msi_init(d, IOH_EP_MSI_OFFSET, IOH_EP_MSI_NR_VECTOR, IOH_EP_MSI_SUPPORTED_FLAGS PCI_MSI_FLAGS_64BIT, IOH_EP_MSI_SUPPORTED_FLAGS PCI_MSI_FLAGS_MASKBIT); if (rc 0) { -return rc; +goto err_bridge; } rc = pcie_cap_init(d, IOH_EP_EXP_OFFSET, PCI_EXP_TYPE_ROOT_PORT, p-port); if (rc 0) { -return rc; +goto err_msi; } pcie_cap_deverr_init(d); pcie_cap_slot_init(d, s-slot); pcie_chassis_create(s-chassis); rc = pcie_chassis_add_slot(s); if (rc 0) { +goto err_pcie_cap; return rc; } pcie_cap_root_init(d); -/* TODO: AER */ +rc = pcie_aer_init(d, IOH_EP_AER_OFFSET); +if (rc 0) { +goto err; +} +pcie_aer_root_init(d); +ioh3420_aer_vector_update(d); return 0; + +err: +pcie_chassis_del_slot(s); +err_pcie_cap: +pcie_cap_exit(d); +err_msi: +msi_uninit(d); +err_bridge: +tmp = pci_bridge_exitfn(d); +assert(!tmp); +return rc; } static int ioh3420_exitfn(PCIDevice *d) { -/* TODO: AER */ -msi_uninit(d); +PCIBridge* br = DO_UPCAST(PCIBridge, dev, d); +PCIEPort *p = DO_UPCAST(PCIEPort, br, br); +PCIESlot *s = DO_UPCAST(PCIESlot, port, p); + +pcie_aer_exit(d); +pcie_chassis_del_slot(s); pcie_cap_exit(d); +msi_uninit(d); return pci_bridge_exitfn(d); } @@ -142,7 +199,8 @@ static const VMStateDescription vmstate_ioh3420 = { .post_load = pcie_cap_slot_post_load, .fields = (VMStateField[]) { VMSTATE_PCIE_DEVICE(port.br.dev, PCIESlot), -/* TODO: AER */ +VMSTATE_STRUCT(port.br.dev.exp.aer_log, PCIESlot, 0, + vmstate_pcie_aer_log, PCIEAERLog), VMSTATE_END_OF_LIST() } }; @@ -164,7 +222,9 @@ static PCIDeviceInfo ioh3420_info = { DEFINE_PROP_UINT8(port, PCIESlot, port.port, 0), DEFINE_PROP_UINT8(chassis, PCIESlot, chassis, 0), DEFINE_PROP_UINT16(slot, PCIESlot, slot, 0), -/* TODO: AER */ +DEFINE_PROP_UINT16(aer_log_max, PCIESlot, + port.br.dev.exp.aer_log.log_max, + PCIE_AER_LOG_MAX_DEFAULT), DEFINE_PROP_END_OF_LIST(), } }; -- 1.7.1.1
[Qemu-devel] [PATCH v9 4/8] pcie_regs.h: more constants
remove unnecessary sizeof. Signed-off-by: Isaku Yamahata yamah...@valinux.co.jp --- hw/pcie_regs.h |2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/hw/pcie_regs.h b/hw/pcie_regs.h index 3461a1b..4d123d9 100644 --- a/hw/pcie_regs.h +++ b/hw/pcie_regs.h @@ -94,7 +94,9 @@ #define PCI_ERR_CAP_MHRE0x0400 #define PCI_ERR_CAP_TLP 0x0800 +#define PCI_ERR_HEADER_LOG_SIZE 16 #define PCI_ERR_TLP_PREFIX_LOG 0x38 +#define PCI_ERR_TLP_PREFIX_LOG_SIZE 16 #define PCI_SEC_STATUS_RCV_SYSTEM_ERROR 0x4000 -- 1.7.1.1
[Qemu-devel] [PATCH 1/2] [V2] virtio-9p: Use chroot to safely access files in passthrough model
In passthrough security model, following symbolic links in the server side could result in accessing files outside guest's export path.This could happen under two conditions: 1) If a modified guest kernel is sending symbolic link as part of the file path and when resolving that symbolic link at server side, it could result in accessing files outside export path. 2) If a same path is exported to multiple guests and if guest1 tries to open a file a/b/c/passwd and meanwhile guest2 did this rm -rf a/b/c; cd a/b; ln -s ../../etc c. If guest1 lookup happened and guest2 completed these operations just before guest1 opening the file, this operation could result in opening host's /etc/passwd. Following approach is used to avoid the security issue involved in following symbolic links in the passthrough model. Create a sub-process which will chroot into export path, so that even if there is a symbolic link in the path it could never go beyond the share path. When qemu is started with passthrough security model, a process is forked and this sub-process process takes care of accessing files in the passthrough share path. It does * Create socketpair * Chroot into share path * Read file open request from socket descriptor * Open request contains file path, flags, mode, uid, gid, dev etc * Based on the request type it creates/opens file/directory/device node * Return the file descriptor to main process using socket with SCM_RIGHTS as cmsg type. Main process when ever there is a request for a resource to be opened/created, it constructs the open request and writes that into its socket descriptor and reads from chroot process socket to get the file descriptor. This patch implements chroot enviroment, provides necessary functions that can be used by the passthrough function calls. Changes from previous version * Do not send ancillary data if a 9p open/create request fails * Handle error condition in socket read/write * Chroot subprocess exits when main qemu process exits * Return EIO for failed 9p open/create requests Signed-off-by: M. Mohan Kumar mo...@in.ibm.com --- Makefile.objs |1 + hw/file-op-9p.h |2 + hw/virtio-9p-chroot.c | 310 + hw/virtio-9p.c| 25 hw/virtio-9p.h| 21 5 files changed, 359 insertions(+), 0 deletions(-) create mode 100644 hw/virtio-9p-chroot.c diff --git a/Makefile.objs b/Makefile.objs index cd5a24b..134da8e 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -251,6 +251,7 @@ hw-obj-$(CONFIG_SOUND) += $(sound-obj-y) hw-obj-$(CONFIG_VIRTFS) += virtio-9p-debug.o virtio-9p-local.o virtio-9p-xattr.o hw-obj-$(CONFIG_VIRTFS) += virtio-9p-xattr-user.o virtio-9p-posix-acl.o +hw-obj-$(CONFIG_VIRTFS) += virtio-9p-chroot.o ## # libdis diff --git a/hw/file-op-9p.h b/hw/file-op-9p.h index c7731c2..149a915 100644 --- a/hw/file-op-9p.h +++ b/hw/file-op-9p.h @@ -55,6 +55,8 @@ typedef struct FsContext SecModel fs_sm; uid_t uid; struct xattr_operations **xops; +pthread_mutex_t chroot_mutex; +int chroot_socket; } FsContext; extern void cred_init(FsCred *); diff --git a/hw/virtio-9p-chroot.c b/hw/virtio-9p-chroot.c new file mode 100644 index 000..202c8e5 --- /dev/null +++ b/hw/virtio-9p-chroot.c @@ -0,0 +1,310 @@ +/* + * Virtio 9p chroot environment for secured access to exported file + * system + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * M. Mohan Kumar mo...@in.ibm.com + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the copying file in the top-level directory + * + */ + +#include virtio.h +#include qemu_socket.h +#include qemu-thread.h +#include virtio-9p.h +#include sys/fsuid.h +#include sys/resource.h + +/* Structure used to transfer file descriptor and error info to the main + * process. fd will be zero if there was an error(in this case error + * will hold the errno). error will be zero and fd will be a valid + * identifier when open was success + */ +typedef struct { +int fd; +int error; +} FdInfo; + +static int sendfd(int sockfd, FdInfo fd_info) +{ +struct msghdr msg = { }; +struct iovec iov; +union { +struct cmsghdr cmsg; +char control[CMSG_SPACE(sizeof(int))]; +} msg_control; +struct cmsghdr *cmsg; + +iov.iov_base = fd_info; +iov.iov_len = sizeof(fd_info); + +memset(msg, 0, sizeof(msg)); +msg.msg_iov = iov; +msg.msg_iovlen = 1; +/* Don't send ancillary data if there is an error */ +if (!fd_info.error) { +msg.msg_control = msg_control; +msg.msg_controllen = sizeof(msg_control); + +cmsg = msg_control.cmsg; +cmsg-cmsg_len = CMSG_LEN(sizeof(fd_info.fd)); +cmsg-cmsg_level = SOL_SOCKET; +cmsg-cmsg_type = SCM_RIGHTS; +memcpy(CMSG_DATA(cmsg), fd_info.fd, sizeof(fd_info.fd)); +} +return sendmsg(sockfd, msg, 0); +} +
[Qemu-devel] [PATCH 2/2] [V2] virtio-9p: Use chroot interface in passthrough model
Make use of chroot interfaces for passthrough security model to fix the vulnerability in following symbolic links. Signed-off-by: M. Mohan Kumar mo...@in.ibm.com --- hw/virtio-9p-local.c | 294 ++--- 1 files changed, 228 insertions(+), 66 deletions(-) diff --git a/hw/virtio-9p-local.c b/hw/virtio-9p-local.c index 656bfb3..cd55db5 100644 --- a/hw/virtio-9p-local.c +++ b/hw/virtio-9p-local.c @@ -19,16 +19,101 @@ #include sys/socket.h #include sys/un.h #include attr/xattr.h +#include libgen.h + +static int get_fd(FsContext *fs_ctx, const char *path, int flags, FsCred *credp) +{ +V9fsOpenRequest request; +int fd, error = 0; + +memset(request, 0, sizeof(request)); +request.data.path_len = strlen(path); +request.path.path = qemu_strdup(path); +request.data.flags = flags; +if (credp) { +request.data.mode = credp-fc_mode; +request.data.uid = credp-fc_uid; +request.data.gid = credp-fc_gid; +request.data.dev = credp-fc_rdev; +} +fd = v9fs_getfd(request, error, fs_ctx); +if (fd == -2) { +/* Socket read/write fail */ +errno = EIO; +return -1; +} +if (error) { +errno = error; +} else { +errno = error; +} +qemu_strdup(request.path.path); +return fd; +} +static int get_pfd(FsContext *fs_ctx, const char *path) +{ +V9fsOpenRequest request; +int fd, error = 0; +char *dpath = qemu_strdup(path); + +memset(request, 0, sizeof(request)); +request.path.path = dirname(dpath); +request.data.path_len = strlen(request.path.path); +request.data.flags = O_RDONLY | O_DIRECTORY | O_NOFOLLOW; +fd = v9fs_getfd(request, error, fs_ctx); +if (fd == -2) { +/* Socket read/write fail */ +errno = EIO; +return -1; +} +if (error) { +errno = error; +} else { +errno = 0; +} +qemu_free(dpath); +return fd; +} + +static int do_symlink(FsContext *fs_ctx, const char *oldpath, +const char *newpath, FsCred *credp) +{ +V9fsOpenRequest request; +int fd, error = 0; + +memset(request, 0, sizeof(request)); +request.data.path_len = strlen(newpath); +request.path.path = qemu_strdup(newpath); +request.data.oldpath_len = strlen(oldpath); +request.path.old_path = qemu_strdup(oldpath); +request.data.flags = S_IFLNK | O_CREAT; + +if (credp) { +request.data.mode = credp-fc_mode; +request.data.uid = credp-fc_uid; +request.data.gid = credp-fc_gid; +request.data.dev = credp-fc_rdev; +} +fd = v9fs_getfd(request, error, fs_ctx); +if (error) { +errno = error; +} else { +errno = error; +} +qemu_strdup(request.path.path); +return fd; +} static int local_lstat(FsContext *fs_ctx, const char *path, struct stat *stbuf) { int err; -err = lstat(rpath(fs_ctx, path), stbuf); -if (err) { -return err; -} + if (fs_ctx-fs_sm == SM_MAPPED) { +err = lstat(rpath(fs_ctx, path), stbuf); +if (err) { +return err; +} /* Actual credentials are part of extended attrs */ uid_t tmp_uid; gid_t tmp_gid; @@ -50,6 +135,22 @@ static int local_lstat(FsContext *fs_ctx, const char *path, struct stat *stbuf) sizeof(dev_t)) 0) { stbuf-st_rdev = tmp_dev; } +} else if (fs_ctx-fs_sm == SM_PASSTHROUGH) { +int pfd; +char *base, *basep; + +base = qemu_strdup(path); +basep = basename(base); + +pfd = get_pfd(fs_ctx, path); +err = fstatat(pfd, basep, stbuf, AT_SYMLINK_NOFOLLOW); +close(pfd); +free(base); +} else { +err = lstat(rpath(fs_ctx, path), stbuf); +if (err) { +return err; +} } return err; } @@ -88,21 +189,13 @@ static int local_set_xattr(const char *path, FsCred *credp) return 0; } -static int local_post_create_passthrough(FsContext *fs_ctx, const char *path, -FsCred *credp) +static int local_post_create_none(FsContext *fs_ctx, const char *path, +FsCred *credp) { if (chmod(rpath(fs_ctx, path), credp-fc_mode 0) 0) { return -1; } -if (lchown(rpath(fs_ctx, path), credp-fc_uid, credp-fc_gid) 0) { -/* - * If we fail to change ownership and if we are - * using security model none. Ignore the error - */ -if (fs_ctx-fs_sm != SM_NONE) { -return -1; -} -} +lchown(rpath(fs_ctx, path), credp-fc_uid, credp-fc_gid); return 0; } @@ -121,9 +214,16 @@ static ssize_t local_readlink(FsContext *fs_ctx, const char *path, } while (tsize == -1 errno == EINTR); close(fd); return tsize; -} else if ((fs_ctx-fs_sm == SM_PASSTHROUGH) || - (fs_ctx-fs_sm == SM_NONE)) { +
Re: [Qemu-devel] [PATCHv5 01/15] Introduce fw_name field to DeviceInfo structure.
On Tue, Nov 16, 2010 at 05:09:35PM +0900, Isaku Yamahata wrote: On Mon, Nov 15, 2010 at 04:30:55PM +0200, Gleb Natapov wrote: diff --git a/hw/piix_pci.c b/hw/piix_pci.c index b5589b9..38f9d9e 100644 --- a/hw/piix_pci.c +++ b/hw/piix_pci.c @@ -365,6 +365,7 @@ static PCIDeviceInfo i440fx_info[] = { static SysBusDeviceInfo i440fx_pcihost_info = { .init = i440fx_pcihost_initfn, .qdev.name= i440FX-pcihost, +.qdev.fw_name = pci, .qdev.size= sizeof(I440FXState), .qdev.no_user = 1, }; There are other pci host bridges for not pc architecture which needs similar modification. Yes, I know. This patch doesn't try to add fw_name everywhere it is needed, just for x86 for now. Things can be added incrementally. Please grep by pci_register_bus(). - apb_pci.c - bonito.c - grackle_pci.c - unin_pci.c - versatile_pci.c I'll look into using BusInfo-name if fw_name is not available. Then all pci buses will work. BTW, what happens for non-qdevfied pci host bridge? They should be converted someday. -- Gleb.
[Qemu-devel] Re: KVM call agenda for Nov 16
On 11/15/2010 10:20 PM, Chris Wright wrote: Please send in any agenda items you are interested in covering. qemu.git master and stable-0.13 stalled? -- error compiling committee.c: too many arguments to function
[Qemu-devel] Re: [PATCH] add a command line option to specify the interface to send multicast packets on
On 11/15/2010 10:34 PM, Mike Ryan wrote: Given that it is unavailable on at least one platform (mingw32) it sounds like it should be detected in configure. However configure doesn't appear to be generated from autoconf/automake. Is it a custom build system? Yes, but I think #ifndef _WIN32 is enough in this case, as it is what is used in the rest of QEMU for sys/ioctl.h stuff. In Win32 you would use this: http://msdn.microsoft.com/en-us/library/aa365915%28v=VS.85%29.aspx Paolo
Re: [Qemu-devel] [PATCH 2/4] virtio: Convert fprintf() to error_report()
Worth mentioning that this fixes several messages lacking newline.
[Qemu-devel] Is there any approach to setup guest only network
Dear all I have run QEMU with debian I can use TUN/TAP to connect the internet or host when single QEMU However when I run two QEMU with two qemu-ifup and different address (on the same host) at the same time, the second QEMU cannot connect to host... Is there are any approach to setup guest only network. Because I want to run ssh among the QEMUs on the same host. Thanks a lot Best Regards, Sn
[Qemu-devel] Re: [PATCH 0/6] Save state error handling (kill off no_migrate)
Alex Williamson alex.william...@redhat.com wrote: Our code paths for saving or migrating a VM are full of functions that return void, leaving no opportunity for a device to cancel a migration, either from error or incompatibility. The ivshmem driver attempted to solve this with a no_migrate flag on the save state entry. I think the more generic and flexible way to solve this is to allow driver save functions to fail. This series implements that and converts ivshmem to uses a set_params function to NAK migration much earlier in the processes. This touches a lot of files, but bulk of those changes are simply s/void/int/ and tacking a return 0 to the end of functions. Thanks, Alex Reviewed-by: Juan Quintela quint...@redhat.com Just to address some of mst concerns: - no_migrate was wrong from the beggining. We have enough setup to disable tihngs. - I did save handlers that didn't return any error because they dind't have it when I started, it would have been way better if I had done it the other way around. I was going to need this change done _anyways_, didn't start for there because there were other things to fix. - we really need to be able to return errors in save paths: * ihvm device, it can migrate some times, and no others (we can discuss the details) * device assignment: we can't migrate, and we need a way to say so. * if we want reliable migration machine definitions, we are going to have to implement device versions at some point. This clearly requires failure of save migration (i.e. we ask to save a device with version n-1 (or without some subsection) and it finds that this would breaks. So I woh Later, Juan.
[Qemu-devel] Re: [PATCHv2 1/2] char: separate device and system fd handlers
Michael S. Tsirkin m...@redhat.com wrote: Create separate lists for system and device fd handlers. Device handlers will not run while vm is stopped. By default all fds are assumed system so they will keep running as before. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- qemu-char.h |6 +++- qemu-kvm.c |2 +- qemu-tool.c | 10 + vl.c| 117 ++- 4 files changed, 92 insertions(+), 43 deletions(-) diff --git a/qemu-char.h b/qemu-char.h index 18ad12b..ad09f56 100644 --- a/qemu-char.h +++ b/qemu-char.h @@ -101,7 +101,11 @@ CharDriverState *qemu_chr_open_eventfd(int eventfd); extern int term_escape_char; /* async I/O support */ - +int qemu_set_fd_handler3(bool device, int fd, + IOCanReadHandler *fd_read_poll, + IOHandler *fd_read, + IOHandler *fd_write, + void *opaque); No this horror. Can't we just create: qemu_set_device_handler() qemu_set_system_handler() or whatever named you like? qemu_set_fd_handler2 is already bad enough, adding another one just make things worse in my humble opinion. +int qemu_set_fd_handler3(bool device, + int fd, IOCanReadHandler *fd_read_poll, IOHandler *fd_read, IOHandler *fd_write, void *opaque) { +IOHandlerRecordList *list; IOHandlerRecord *ioh; +list = device ? device_io_handlers: system_io_handlers; + If you are going to use this, passing list paramenter instead of device looks like a much better option. It would indeed make things go better. if (!fd_read !fd_write) { -QLIST_FOREACH(ioh, io_handlers, next) { +QLIST_FOREACH(ioh, list, next) { if (ioh-fd == fd) { ioh-deleted = 1; break; } } } else { -QLIST_FOREACH(ioh, io_handlers, next) { +QLIST_FOREACH(ioh, list, next) { if (ioh-fd == fd) goto found; } ioh = qemu_mallocz(sizeof(IOHandlerRecord)); -QLIST_INSERT_HEAD(io_handlers, ioh, next); +QLIST_INSERT_HEAD(list, ioh, next); found: ioh-fd = fd; ioh-fd_read_poll = fd_read_poll; @@ -998,6 +1003,19 @@ int qemu_set_fd_handler2(int fd, return 0; } + +/* XXX: fd_read_poll should be suppressed, but an API change is + necessary in the character devices to suppress fd_can_read(). */ +int qemu_set_fd_handler2(int fd, + IOCanReadHandler *fd_read_poll, + IOHandler *fd_read, + IOHandler *fd_write, + void *opaque) +{ +return qemu_set_fd_handler3(false, fd, fd_read_poll, fd_read, fd_write, +opaque); +} + int qemu_set_fd_handler(int fd, IOHandler *fd_read, IOHandler *fd_write, @@ -1242,9 +1260,52 @@ void qemu_system_powerdown_request(void) qemu_notify_event(); } -void main_loop_wait(int nonblocking) +static inline int get_ioh_fds(IOHandlerRecordList *list, + int nfds, fd_set *rfds, fd_set *wfds) { IOHandlerRecord *ioh; +QLIST_FOREACH(ioh, list, next) { +if (ioh-deleted) +continue; +if (ioh-fd_read +(!ioh-fd_read_poll || + ioh-fd_read_poll(ioh-opaque) != 0)) { +FD_SET(ioh-fd, rfds); +if (ioh-fd nfds) +nfds = ioh-fd; +} +if (ioh-fd_write) { +FD_SET(ioh-fd, wfds); +if (ioh-fd nfds) +nfds = ioh-fd; +} +} +return nfds; +} + +static inline void call_ioh_fds(IOHandlerRecordList *list, + fd_set *rfds, fd_set *wfds) +{ +IOHandlerRecord *ioh, *pioh; + +QLIST_FOREACH_SAFE(ioh, list, next, pioh) { +if (ioh-deleted) { +QLIST_REMOVE(ioh, next); +qemu_free(ioh); +continue; +} +if (ioh-fd_read FD_ISSET(ioh-fd, rfds)) { +ioh-fd_read(ioh-opaque); +if (!(ioh-fd_read_poll ioh-fd_read_poll(ioh-opaque))) +FD_CLR(ioh-fd, rfds); +} +if (ioh-fd_write FD_ISSET(ioh-fd, wfds)) { +ioh-fd_write(ioh-opaque); +} +} +} +void main_loop_wait(int nonblocking) +{ fd_set rfds, wfds, xfds; int ret, nfds; struct timeval tv; @@ -1260,26 +1321,13 @@ void main_loop_wait(int nonblocking) os_host_main_loop_wait(timeout); /* poll any events */ -/* XXX: separate device handlers from system ones */ nfds = -1; FD_ZERO(rfds); FD_ZERO(wfds);
[Qemu-devel] Re: [PATCH 1/2] [V2] virtio-9p: Use chroot to safely access files in passthrough model
On Tue, Nov 16, 2010 at 8:54 AM, M. Mohan Kumar mo...@in.ibm.com wrote: +static int read_openrequest(int sockfd, V9fsOpenRequest *request) +{ + int bytes, retval; + retval = recv(sockfd, request, sizeof(request-data), 0); + if (retval = 0) { + return -1; + } + bytes = retval; + request-path.path = qemu_mallocz(request-data.path_len + 1); Leaked on error. + retval = recv(sockfd, (void *)request-path.path, + request-data.path_len, 0); + if (retval = 0) { + return -1; + } + bytes += retval; + if (request-data.oldpath_len) { + request-path.old_path = + qemu_mallocz(request-data.oldpath_len + 1); Leaked on error. send/recv/read/write could be interrupted by a signal. The patch does not handle this. There is a qemu_write_full() function available to read a number of bytes and handle EINTR. Speaking of signals, what about signal handlers that the main qemu process has set up? If a signal comes in then we'll start executing the qemu signal handler code, which is wrong. The subprocess needs to either block signals or ignore all the possibly registered signals. atexit(3) handlers will run when the forked process exits. This could also lead to weird behavior. How does the subprocess terminate? I only see error exit cases that print something to stderr in the v9fs_chroot() main loop. Stefan
[Qemu-devel] Re: [PATCH v9 1/8] pci: revise pci command register initialization
On Tue, Nov 16, 2010 at 05:26:05PM +0900, Isaku Yamahata wrote: This patch cleans up command register initialization with comments. It also fixes the initialization of io/memory bit of command register. Those bits for type 1 device is RW. Those bits for type 0 device is RO = 0 if it has no io/memory BAR RW if it has io/memory BAR Signed-off-by: Isaku Yamahata yamah...@valinux.co.jp There's a bug here: you can not assume that device that has no io BAR claims no io transactions. Another bug is that migrating from qemu where a bit is writeable to one where it's RO creates a situation where a RW bit becomes RO, or the reverse, which might confuse guests. So we will need a compatibility flag and set it for old machine types. --- Changes v8 - v9 - patch squash --- hw/pci.c | 58 +- 1 files changed, 57 insertions(+), 1 deletions(-) diff --git a/hw/pci.c b/hw/pci.c index 962886e..2fc8ab1 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -544,8 +544,53 @@ static void pci_init_wmask(PCIDevice *dev) dev-wmask[PCI_CACHE_LINE_SIZE] = 0xff; dev-wmask[PCI_INTERRUPT_LINE] = 0xff; + +/* + * bit 0: PCI_COMMAND_IO + *type 0: if IO BAR is used, RW + *This is handled by pci_register_bar() + *type 1: RW: + *This is fixed by pci_init_wmask_bridge() + * bit 1: PCI_COMMAND_MEMORY + *type 0: if IO BAR is used, RW + *This is handled by pci_register_bar() + *type 1: RW + *This is fixed by pci_init_wmask_bridge() + * bit 2: PCI_COMMAND_MASTER + *type 0: RW if bus master + *type 1: RW + * bit 3: PCI_COMMAND_SPECIAL + *RO=0, optionally RW: Such device should set this bit itself + * bit 4: PCI_COMMAND_INVALIDATE + *RO=0, optionally RW: Such device should set this bit itself + * bit 5: PCI_COMMAND_VGA_PALETTE + *RO=0, optionally RW: Such device should set this bit itself + * bit 6: PCI_COMMAND_PARITY + *RW with exceptions: Such device should clear this bit itself + *Given that qemu doesn't emulate pci bus cycles, so that there + *is no place to generate parity error. So just making this + *register RW is okay because there is no place which refers + *this bit. + *TODO: When device assignment tried to inject PERR# into qemu, + * some extra work would be needed. + * bit 7: PCI_COMMAND_WAIT: reserved (PCI 3.0) + *RO=0 + * bit 8: PCI_COMMAND_SERR + *RW with exceptions: Such device should clear this bit itself + *Given that qemu doesn't emulate pci bus cycles, so that there + *is no place to generate system error. So just making this + *register RW is okay because there is no place which refers + *this bit. + *TODO: When device assignment tried to inject SERR# into qemu, + * some extra work would be needed. + * bit 9: PCI_COMMAND_FAST_BACK + *RO=0, optionally RW: Such device should set this bit itself + * bit 10: PCI_COMMAND_INTX_DISABLE + * RW + * bit 11-15: reserved + */ Let's document non-obvious things, like maybe COMMAND_PARITY/COMMAND_SERR. I don't cherish writing each bit up in two places. pci_set_word(dev-wmask + PCI_COMMAND, - PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | + PCI_COMMAND_MASTER | PCI_COMMAND_PARITY | PCI_COMMAND_SERR | PCI_COMMAND_INTX_DISABLE); memset(dev-wmask + PCI_CONFIG_HEADER_SIZE, 0xff, @@ -554,6 +599,9 @@ static void pci_init_wmask(PCIDevice *dev) static void pci_init_wmask_bridge(PCIDevice *d) { +pci_word_test_and_set_mask(d-wmask + PCI_COMMAND, + PCI_COMMAND_IO | PCI_COMMAND_MEMORY); + /* PCI_PRIMARY_BUS, PCI_SECONDARY_BUS, PCI_SUBORDINATE_BUS and PCI_SEC_LETENCY_TIMER */ memset(d-wmask + PCI_PRIMARY_BUS, 0xff, 4); @@ -791,6 +839,14 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, if (region_num == PCI_ROM_SLOT) { /* ROM enable bit is writeable */ wmask |= PCI_ROM_ADDRESS_ENABLE; +} else { +if (r-type PCI_BASE_ADDRESS_SPACE_IO) { +pci_word_test_and_set_mask(pci_dev-wmask + PCI_COMMAND, + PCI_COMMAND_IO); +} else { +pci_word_test_and_set_mask(pci_dev-wmask + PCI_COMMAND, + PCI_COMMAND_MEMORY); +} } pci_set_long(pci_dev-config + addr, type); if (!(r-type PCI_BASE_ADDRESS_SPACE_IO) -- 1.7.1.1
[Qemu-devel] Re: [PATCH v9 2/8] pci: fix accesses to pci status register
On Tue, Nov 16, 2010 at 05:26:06PM +0900, Isaku Yamahata wrote: pci status register is 16 bit, not 8 bit. So use helper function to manipulate status register. Signed-off-by: Isaku Yamahata yamah...@valinux.co.jp At least the subject is wrong: the relevant bit is in the low byte. So the code is correct as written on BE machines. --- hw/pci.c | 21 + 1 files changed, 13 insertions(+), 8 deletions(-) diff --git a/hw/pci.c b/hw/pci.c index 2fc8ab1..52fe655 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -127,9 +127,11 @@ static void pci_change_irq_level(PCIDevice *pci_dev, int irq_num, int change) static void pci_update_irq_status(PCIDevice *dev) { if (dev-irq_state) { -dev-config[PCI_STATUS] |= PCI_STATUS_INTERRUPT; +pci_word_test_and_set_mask(dev-config + PCI_STATUS, + PCI_STATUS_INTERRUPT); } else { -dev-config[PCI_STATUS] = ~PCI_STATUS_INTERRUPT; +pci_word_test_and_clear_mask(dev-config + PCI_STATUS, + PCI_STATUS_INTERRUPT); } } @@ -404,7 +406,7 @@ void pci_device_save(PCIDevice *s, QEMUFile *f) * in irq_state which we are saving. * This makes us compatible with old devices * which never set or clear this bit. */ -s-config[PCI_STATUS] = ~PCI_STATUS_INTERRUPT; +pci_word_test_and_clear_mask(s-config + PCI_STATUS, PCI_STATUS_INTERRUPT); vmstate_save_state(f, pci_get_vmstate(s), s); /* Restore the interrupt status bit. */ pci_update_irq_status(s); @@ -530,7 +532,7 @@ static void pci_init_cmask(PCIDevice *dev) { pci_set_word(dev-cmask + PCI_VENDOR_ID, 0x); pci_set_word(dev-cmask + PCI_DEVICE_ID, 0x); -dev-cmask[PCI_STATUS] = PCI_STATUS_CAP_LIST; +pci_set_word(dev-cmask + PCI_STATUS, PCI_STATUS_CAP_LIST); dev-cmask[PCI_REVISION_ID] = 0xff; dev-cmask[PCI_CLASS_PROG] = 0xff; pci_set_word(dev-cmask + PCI_CLASS_DEVICE, 0x); @@ -1697,8 +1699,9 @@ static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id, { uint8_t next, prev; -if (!(pdev-config[PCI_STATUS] PCI_STATUS_CAP_LIST)) +if (!(pci_get_word(pdev-config + PCI_STATUS) PCI_STATUS_CAP_LIST)) { return 0; +} for (prev = PCI_CAPABILITY_LIST; (next = pdev-config[prev]); prev = next + PCI_CAP_LIST_NEXT) @@ -1804,7 +1807,7 @@ int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, config[PCI_CAP_LIST_ID] = cap_id; config[PCI_CAP_LIST_NEXT] = pdev-config[PCI_CAPABILITY_LIST]; pdev-config[PCI_CAPABILITY_LIST] = offset; -pdev-config[PCI_STATUS] |= PCI_STATUS_CAP_LIST; +pci_word_test_and_set_mask(pdev-config + PCI_STATUS, PCI_STATUS_CAP_LIST); memset(pdev-used + offset, 0xFF, size); /* Make capability read-only by default */ memset(pdev-wmask + offset, 0, size); @@ -1827,8 +1830,10 @@ void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size) memset(pdev-cmask + offset, 0, size); memset(pdev-used + offset, 0, size); -if (!pdev-config[PCI_CAPABILITY_LIST]) -pdev-config[PCI_STATUS] = ~PCI_STATUS_CAP_LIST; +if (!pdev-config[PCI_CAPABILITY_LIST]) { +pci_word_test_and_clear_mask(pdev-config + PCI_STATUS, + PCI_STATUS_CAP_LIST); +} } /* Reserve space for capability at a known offset (to call after load). */ -- 1.7.1.1
Re: [Qemu-devel] Is there any approach to setup guest only network
On Tue, Nov 16, 2010 at 17:11, 郭沐錫 maxgreg13...@gmail.com wrote: Dear all I have run QEMU with debian I can use TUN/TAP to connect the internet or host when single QEMU However when I run two QEMU with two qemu-ifup and different address (on the same host) at the same time, the second QEMU cannot connect to host... Is there are any approach to setup guest only network. Because I want to run ssh among the QEMUs on the same host. Try to paste your complete command together with the parameters you used in this list. Hopefully then we could analyze it. PS: have you assigned different mac address on both guests? -- regards, Mulyadi Santosa Freelance Linux trainer and consultant blog: the-hydra.blogspot.com training: mulyaditraining.blogspot.com
[Qemu-devel] [Bug 563582] Re: KVM 9.10 crashes for suse-10 as guest
I'm guessing that by crash he means lost connectivity to VM. Then it could be bug 579276 or bug 584048. -- KVM 9.10 crashes for suse-10 as guest https://bugs.launchpad.net/bugs/563582 You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. Status in QEMU: New Status in “qemu-kvm” package in Ubuntu: Incomplete Bug description: Binary package hint: qemu-kvm I have tried KVM of Ubuntu 9.10 Karmic,Koala,Version for virtualization; on 64 bit hardware platform I have installed, 4 VM's (Win2k3, TWO WIN-XP's , One Suse-10 Enterprise Edition); I observe that suse-10 virtual machine, crashes after about 2-5 hours; Now after I have installed oracle,in suse-10 PC seems to crash (KVM crash after 1 hour; Anyway to see the logs after the crash. How to analyse such abnormal activities;Pls guide me I read your post on changing the network card interface to e1000,r8139 it does not work for me; Can you please suggest, how to resolve this issue since it is very urgent and important; Many thanks Raghav
Re:Re: Re: [Qemu-devel] How to make shadow memory for a process? and how to trace the data propation from the instruction level in QEMU?
Hi! Hi OK it's getting interesting perhaps it would lead into instrumentation topic, which is quite hot topic in qemu-devel quite recently, so you jump into the wagon just about the right time :) OK, one thing for sure here is, I think you can implement your idea on top of several (not so complete) existing frameworks in Qemu. Tracing...is one of them...not sure about the rest... You are right! I have considered implementing my idea based on some existing framework such as TEMU. But on the other hand, I think I should understand how QEMU works first (TEMU is based on QEMU), in order that once I want to add some other features, I know, at leats, where to modify/add code to implement new functions. That’s also the reason why I started this topic here. Thanks for sharing that...it's new stuff for me. So, why don't you just pick TEMU and improve it instead of...uhm...sorry if I am wrong, working from scratch? After all, I believe in both Argos and TEMU (and maybe other similar projects), they share common codes here and there. But ehm...CMIIW, seems like TEMU is based on Qemu 0.9,x, right? So it's sorry I forgot the name, the generated code is mostly a constructed by fragments of small codes generated by gcc. Now, it is qemu which does it by itself. So, a lot of things change (substantially). L If things change a lot, it is more important for me to figure out how things work in QEMU. Especially something discussed in this topic. I agree that should be the way it worksbut. (see below) How about using unused one of unused PTE flags for such tag? Sorry, what is the PTE flag? Page Table Entry...i believe not all flags are really used by the OS nowadays, so I guess you can utilize 1 or 2 bits there whenever possible... This is the problem of granularity. In fact, one-tag-per-page is too coarse. One tag per byte, or one tag per word, or one tag per double words may be often-seen. As we know, commonly, only a few bytes in a page are malicious, and alarm is raised only when these bytes are used, not when the page is used. Thus the fine-grained tag is necessary, which requires, according to my knowledge, the shadow memory. May we know, what kind of information do you plan to store in such tag? Since the shadow memory exists, you can store any information in the tag as you want. For example, you can use one bit as the tag to indicate whether a byte is tainted or not; or you can use a C-language-like structure as the tag to contain more information, including stack and heap and so on. This depends on your analysis requirement. I think you should hook all the memory operation related opcode (or to be precise, Qemu opcode). That way, you won't miss any.. J That’s the critical problem. Could you tell me how to hook the opcode? I thought before to modify the instruction-translation functions and recompile QEMU, is that right? Is there any better way? Yes, I wanna make QEMU cooperate with the GUEST OS. In fact, malware under analysis is run within the GUEST OS. Hm, I thought it would be host OS + qemudon't you think, if it is guest OS +qemu, while there is a chance guest OS is compromised first, then we get such unreliable data? Or am I missing something here? This is a good question! The scenario that we use the information provided by guest OS is limited within the following scope: “the being analyzed software’s major purpose is NOT to attack OS kernel, but to implement the malicious behavior in the user space”. Since the software under analysis is in the user space and does not touch the kernel, we can safely use the information provided by OS kernel. However, if the main purpose of the software is to attack the OS (for example, rootks), the information got from OS kernel is not reliable. Fortunately, in this case, the target of taint analysis is NOT the user space software, but the kernel. That is to say, we need not any information provided by kernel. For the kernel related dynamic analysis, you can reference the paper “HookScout: Proactive Binary-Centric Hook Detection”. The guest os collects “higher” semantic from the OS level, and the QEMU collects “lower” semantic from the instruction level. Combination of both semantics is necessary in the analysis process. The question is, in a situation where malware already compromise the higher semantic, could we trust the analysis? The question is: how to communicate between the QEMU and the guest OS, so that they can cooperate with each other? OK, so let's assume it's really guest OS +qemu...i think, uhm, better create pseudo device, quite similar with virtioor you can think it's like /dev/sda, /dev/rtc etc... the guest OS must somewhat be installed with a driver which knows how to read and talk to this device. Via the driver, fed any analysis resultqemu collects it...and finally pass it to host OS. Other
Re:Re: [Qemu-devel] How to make shadow memory for a process? and how to trace the data propation from the instruction level in QEMU?
Mulyadi Santosa writes: Yes, I have read that paper, it’s wonderful! Besides the Argos, the bitblaze group, led by Dawn Song in Berkeley, has achieved great success in the taint analysis. The website about their dynamic analysis work (called TEMU) can be found at: http://bitblaze.cs.berkeley.edu/temu.html And TEMU is now open-source. Thanks for sharing that...it's new stuff for me. So, why don't you just pick TEMU and improve it instead of...uhm...sorry if I am wrong, working from scratch? After all, I believe in both Argos and TEMU (and maybe other similar projects), they share common codes here and there. But ehm...CMIIW, seems like TEMU is based on Qemu 0.9,x, right? So it's sorry I forgot the name, the generated code is mostly a constructed by fragments of small codes generated by gcc. Now, it is qemu which does it by itself. So, a lot of things change (substantially). I haven't read the TEMU work, but from the problem description I think you want something similar to Practical Taint-Based Protection using Demand Emulation or many others (I remember reading some of them a few years ago on the ISCA, MICRO and/or ASPLOS conferences). Yes! That is just what I want. A practical taint-analysis environment plus a demand emulation. This topic includes things that I recognized as critical. Have you any suggestions? Yes. For each process’s memory space A, I wanna make a shadow memory B. The shadow memory is used to store the tag of data. In other words, if addr in memory A is tainted, then the corresponding byte in B should be marked to indicate that addr in A is tainted. The main question here is... what is the granularity that you want to track with? Bytes? Words? Pages? This will greatly influence which is your best approach. I think one byte per tag is necessary for malware analysis in most cases, because only a few bytes are used to launch an attack. For example, a few tainted bytes sent to EIP register will cause CPU to do bad things. Now that I think of it, you could use the tracing points I sent for guest virtual memory accesses, and instrument them instead of calling a file-tracing backend (this should provide a hook for an arbitrary granularity). Then, simply keep track also of address-space changes and your instrumentation code can always know when to activate propagation. Sorry, what is “a file-tracing backend”? Could you be a little more detailed? I think I need byte-level granularity. Thanks! This, together with the optimization I sent for dynamic control of trace generation in TCG emulation code should get you on tracks. Of course, you should still modify all register-accessing instructions to propagate information passing through the register set. For that, maybe you could start with the fetch tracing/instrumentation point I sent long time ago, which keeps track of general-purpose register usage/definition on x86 (although I'm sure I left some astray usages due to the decoding complexity in x86). Thanks! I will read that code first, though I am currently just a newbie.L The guest os collects “higher” semantic from the OS level, and the QEMU collects “lower” semantic from the instruction level. Combination of both semantics is necessary in the analysis process. The question is, in a situation where malware already compromise the higher semantic, could we trust the analysis? Beware, I've read exactly this kind of scheme on previous top-tier conferences (but I think tests were using an architectural simulator, so it's not for a current production environment). I've found it :) Secure program execution via dynamic information flow tracking ASPLOS 2004 That is a significant paper, which is cited for more than 300 times! The question is: how to communicate between the QEMU and the guest OS, so that they can cooperate with each other? A few choices here, but you should first define if the communication must be based just on control signals, and/or providing memory storage: * virtual device : If you need some kind of storage that the guest OS must access, you could look at the ivshmem device * backdoor instruction : It's the simplest option; I sent some patch series recently with two different implementations for x86. Both of control signals and (shadow) memory storage are required. So, the virtual device may be the right choice. In this year’s top security conferences (Oakland, CCS, Usenix Security, NDSS and so on), many works are based on virtual technology. So I think QEMU is a good choice for future academic research. Thank you very much for your time and help! Best regards! F. Zhang
[Qemu-devel] [PATCH] trace: Trace vm_start()/vm_stop()
VM state change notifications are invoked from vm_start()/vm_stop(). Trace these state changes so we can reason about the state of the VM from trace output. Signed-off-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com --- trace-events |3 +++ vl.c |3 +++ 2 files changed, 6 insertions(+), 0 deletions(-) diff --git a/trace-events b/trace-events index 947f8b0..da03d4b 100644 --- a/trace-events +++ b/trace-events @@ -189,3 +189,6 @@ disable sun4m_iommu_mem_writel_pgflush(uint32_t val) page flush %x disable sun4m_iommu_page_get_flags(uint64_t pa, uint64_t iopte, uint32_t ret) get flags addr %PRIx64 = pte %PRIx64, *pte = %x disable sun4m_iommu_translate_pa(uint64_t addr, uint64_t pa, uint32_t iopte) xlate dva %PRIx64 = pa %PRIx64 iopte = %x disable sun4m_iommu_bad_addr(uint64_t addr) bad addr %PRIx64 + +# vl.c +disable vm_state_notify(int running, int reason) running %d reason %d diff --git a/vl.c b/vl.c index c58583d..87e76ad 100644 --- a/vl.c +++ b/vl.c @@ -158,6 +158,7 @@ int main(int argc, char **argv) #include slirp/libslirp.h +#include trace.h #include qemu-queue.h #include cpus.h #include arch_init.h @@ -1074,6 +1075,8 @@ void vm_state_notify(int running, int reason) { VMChangeStateEntry *e; +trace_vm_state_notify(running, reason); + for (e = vm_change_state_head.lh_first; e; e = e-entries.le_next) { e-cb(e-opaque, running, reason); } -- 1.7.2.3
[Qemu-devel] [PATCH v2] pc: disable the BOCHS BIOS panic port
We have an OS which writes to port 0x400 when probing for special hardware. This causes an exit of the VM. With SeaBIOS this port isn't used anyway. Signed-off-by: Bernhard Kohl bernhard.k...@nsn.com --- Changes v1 - v2: Keep the ports silent. Don't print debug output if DEBUG_BIOS is enabled which might be confusing. --- hw/pc.c |4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/pc.c b/hw/pc.c index 3bf3862..76eabe8 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -434,8 +434,8 @@ static void bochs_bios_write(void *opaque, uint32_t addr, uint32_t val) /* Bochs BIOS messages */ case 0x400: case 0x401: -fprintf(stderr, BIOS panic at rombios.c, line %d\n, val); -exit(1); +/* used to be panic, now unused */ +break; case 0x402: case 0x403: #ifdef DEBUG_BIOS -- 1.7.2.3
[Qemu-devel] Re: [PATCH] pci: allow hotplug removal of cold-plugged devices
On Mon, Nov 15, 2010 at 11:23:44AM +0900, Isaku Yamahata wrote: Thank you for catching pcie part. The following fix is necessary. diff --git a/hw/pcie.c b/hw/pcie.c index 4df48b8..f461c1c 100644 --- a/hw/pcie.c +++ b/hw/pcie.c @@ -221,7 +221,7 @@ static int pcie_cap_slot_hotplug(DeviceState *qdev, */ assert(PCI_FUNC(pci_dev-devfn) == 0); -if (state) { +if (state == PCI_HOTPLUG_ENABLED) { pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDS); pcie_cap_slot_event(d, PCI_EXP_HP_EV_PDC); Okay. Although note that the enum values were carefully selected to make the old code work as well. On Sun, Nov 14, 2010 at 04:18:04PM +0200, Michael S. Tsirkin wrote: This patch fixes 5beb8ad503c88a76f2b8106c3b74b4ce485a60e1 which broke hotplug removal of cold plugged devices: - pass addition/removal state to hotplug callbacks - use that in piix and pcie This also fixes an assert on hotplug removal of coldplugged express devices. Reported-by: by Cam Macdonell c...@cs.ualberta.ca. Signed-off-by: Isaku Yamahata yamah...@valinux.co.jp Signed-off-by: Michael S. Tsirkin m...@redhat.com --- So I think the below would be the cleanest way to fix the bug as we keep the hot/cold plug logic in a central palce. Untested. Comments? Cam? diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c index 66c7885..f549089 100644 --- a/hw/acpi_piix4.c +++ b/hw/acpi_piix4.c @@ -585,7 +585,8 @@ static void pciej_write(void *opaque, uint32_t addr, uint32_t val) PIIX4_DPRINTF(pciej write %x == %d\n, addr, val); } -static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev, int state); +static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev, +PCIHotplugState state); static void piix4_acpi_system_hot_add_init(PCIBus *bus, PIIX4PMState *s) { @@ -615,18 +616,23 @@ static void disable_device(PIIX4PMState *s, int slot) s-pci0_status.down |= (1 slot); } -static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev, int state) +static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev, + PCIHotplugState state) { int slot = PCI_SLOT(dev-devfn); PIIX4PMState *s = DO_UPCAST(PIIX4PMState, dev, DO_UPCAST(PCIDevice, qdev, qdev)); -if (!dev-qdev.hotplugged) +/* Don't send event when device is enabled during qemu machine creation: + * it is present on boot, no hotplug event is necessary. We do send an + * event when the device is disabled later. */ +if (state == PCI_COLDPLUG_ENABLED) { return 0; +} s-pci0_status.up = 0; s-pci0_status.down = 0; -if (state) { +if (state == PCI_HOTPLUG_ENABLED) { enable_device(s, slot); } else { disable_device(s, slot); diff --git a/hw/pci.c b/hw/pci.c index 30e1603..316b24f 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -1566,8 +1566,11 @@ static int pci_qdev_init(DeviceState *qdev, DeviceInfo *base) pci_add_option_rom(pci_dev); if (bus-hotplug) { -/* lower layer must check qdev-hotplugged */ -rc = bus-hotplug(bus-hotplug_qdev, pci_dev, 1); +/* Let buses differentiate between hotplug and when device is + * enabled during qemu machine creation. */ +rc = bus-hotplug(bus-hotplug_qdev, pci_dev, + qdev-hotplugged ? PCI_HOTPLUG_ENABLED: + PCI_COLDPLUG_ENABLED); if (rc != 0) { int r = pci_unregister_device(pci_dev-qdev); assert(!r); @@ -1581,7 +1584,8 @@ static int pci_unplug_device(DeviceState *qdev) { PCIDevice *dev = DO_UPCAST(PCIDevice, qdev, qdev); -return dev-bus-hotplug(dev-bus-hotplug_qdev, dev, 0); +return dev-bus-hotplug(dev-bus-hotplug_qdev, dev, + PCI_HOTPLUG_DISABLED); } void pci_qdev_register(PCIDeviceInfo *info) diff --git a/hw/pci.h b/hw/pci.h index 7100804..09b3e4c 100644 --- a/hw/pci.h +++ b/hw/pci.h @@ -214,7 +214,15 @@ int pci_device_load(PCIDevice *s, QEMUFile *f); typedef void (*pci_set_irq_fn)(void *opaque, int irq_num, int level); typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num); -typedef int (*pci_hotplug_fn)(DeviceState *qdev, PCIDevice *pci_dev, int state); + +typedef enum { +PCI_HOTPLUG_DISABLED, +PCI_HOTPLUG_ENABLED, +PCI_COLDPLUG_ENABLED, +} PCIHotplugState; + +typedef int (*pci_hotplug_fn)(DeviceState *qdev, PCIDevice *pci_dev, + PCIHotplugState state); void pci_bus_new_inplace(PCIBus *bus, DeviceState *parent, const char *name, int devfn_min); PCIBus *pci_bus_new(DeviceState
[Qemu-devel] [PATCH comment tweaked] msix: allow byte and word reading from mmio
Although explicitly disallowed by the PCI spec, some guests read a single byte or word from mmio. Likely a guest OS bug, but I have an OS which reads single bytes and it works fine on real hardware. Signed-off-by: Bernhard Kohl bernhard.k...@nsn.com Signed-off-by: Michael S. Tsirkin m...@redhat.com --- OK so it could like something like the below. However, my question is: do we need to put this in or can the guest simply be fixed? hw/msix.c | 31 +++ 1 files changed, 27 insertions(+), 4 deletions(-) diff --git a/hw/msix.c b/hw/msix.c index f66d255..38dff59 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -102,10 +102,28 @@ static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr) return pci_get_long(page + offset); } -static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr) + /* Note: + * PCI spec requires that all MSI-X table accesses are either DWORD or QWORD, + * size aligned. Some guests seem to violate this rule for read accesses, + * performing single byte reads. Since it's easy to support this, let's do so. + * Also support 16 bit size aligned reads, just in case. + */ +static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr) { -fprintf(stderr, MSI-X: only dword read is allowed!\n); -return 0; +PCIDevice *dev = opaque; +unsigned int offset = addr (MSIX_PAGE_SIZE - 1) ~0x1; +void *page = dev-msix_table_page; + +return pci_get_word(page + offset); +} + +static uint32_t msix_mmio_readb(void *opaque, target_phys_addr_t addr) +{ +PCIDevice *dev = opaque; +unsigned int offset = addr (MSIX_PAGE_SIZE - 1); +void *page = dev-msix_table_page; + +return pci_get_byte(page + offset); } static uint8_t msix_pending_mask(int vector) @@ -192,6 +210,11 @@ static void msix_mmio_writel(void *opaque, target_phys_addr_t addr, msix_handle_mask_update(dev, vector); } +/* PCI spec: + * For all accesses to MSI-X Table and MSI-X PBA fields, software must use + * aligned full DWORD or aligned full QWORD transactions; otherwise, the result + * is undefined. + */ static void msix_mmio_write_unallowed(void *opaque, target_phys_addr_t addr, uint32_t val) { @@ -203,7 +226,7 @@ static CPUWriteMemoryFunc * const msix_mmio_write[] = { }; static CPUReadMemoryFunc * const msix_mmio_read[] = { -msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl +msix_mmio_readb, msix_mmio_readw, msix_mmio_readl }; /* Should be called from device's map method. */ -- 1.7.3.2.91.g446ac
[Qemu-devel] Re: [PATCH v2] pc: disable the BOCHS BIOS panic port
On 16.11.2010, at 13:28, Bernhard Kohl wrote: We have an OS which writes to port 0x400 when probing for special hardware. This causes an exit of the VM. With SeaBIOS this port isn't used anyway. Signed-off-by: Bernhard Kohl bernhard.k...@nsn.com Signed-off-by: Alexander Graf ag...@suse.de Alex
Re: [Qemu-devel] Re: [PATCH 0/2] v8 Decouple block device removal from device removal
* Kevin Wolf kw...@redhat.com [2010-11-16 08:05]: Am 16.11.2010 14:51, schrieb Luiz Capitulino: On Fri, 12 Nov 2010 18:38:57 +0100 Kevin Wolf kw...@redhat.com wrote: Am 12.11.2010 18:07, schrieb Ryan Harper: details, details, v8 This patch series decouples the detachment of a block device from the removal of the backing pci-device. Removal of a hotplugged pci device requires the guest to respond before qemu tears down the block device. In some cases, the guest may not respond leaving the guest with continued access to the block device. Mgmt layer doesn't have a reliable method to force a disconnect. The new monitor command, drive_del, will revoke a guests access to the block device independently of the removal of the pci device. The first patch implements drive_del, the second patch implements the qmp version of the monitor command. Changes since v7: - Fixed up doc strings (delete - drive_del) Changes since v6: - Updated patch description - Dropped bdrv_unplug and inlined in drive_del - Explicitly invoke drive_uninit() Changes since v5: - Removed dangling pointers in guest and host state. This ensures things like info block no longer displays the deleted drive; though info pci will continue to display the pci device until the guest responds to the removal request. - Renamed drive_unplug - drive_del Changes since v4: - Droppped drive_get_by_id patch and use bdrv_find() instead - Added additional details about drive_unplug to hmp/qmp interface Changes since v3: - Moved QMP command for drive_unplug() to separate patch Changes since v2: - Added QMP command for drive_unplug() Changes since v1: - CodingStyle fixes - Added qemu_aio_flush() to bdrv_unplug() Signed-off-by: Ryan Harper ry...@us.ibm.com Thanks, applied both to the block branch. I guess the conclusion was that we don't want the new command in QMP? http://lists.gnu.org/archive/html/qemu-devel/2010-11/msg01084.html If you compare the time of these mails, Markus sent his mail only a few minutes after I had applied the patches and posted this. Ryan split the patch in two parts only to allow dropping the QMP part if we decided so, so I think he'll agree. I'm going to drop the second Indeed. patch from my queue again before I send a pull request. Kevin -- Ryan Harper Software Engineer; Linux Technology Center IBM Corp., Austin, Tx ry...@us.ibm.com
Re: [Qemu-devel] [PATCH v2 1/2] Minimal RAM API support
On 11/16/2010 09:02 AM, Alexander Graf wrote: +static QemuRamSlot *qemu_ram_find_slot(target_phys_addr_t start_addr, + ram_addr_t size) +{ +QemuRamSlot *slot; + +QLIST_FOREACH(slot,ram_slots.slots, next) { +if (slot-start_addr == start_addr slot-size == size) { +return slot; +} + +if (ranges_overlap(start_addr, size, slot-start_addr, slot-size)) { +abort(); Should display a message before aborting. Why not use hw_error? Another good suggestion. Regards, Anthony Liguori Alex-- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] [RESEND][PATCH] Remove 16-character limit on process title
On 11/05/2010 11:35 AM, John Morrissey wrote: qemu uses prctl() to set its process title. I bumped up against prctl()'s 16-character limit recently, when adding process title support to libvirt[1][2]. The attached patch overwrites argv instead. Linux seems to maintain the length of the original args, even when the new args are shorter and NULL-terminated, so the trailing whitespace in ps(1) output is probably unavoidable. I've seen the same result with other daemons that overwrite argv. john [1] https://www.redhat.com/archives/libvir-list/2010-October/msg00565.html [2] http://libvirt.org/git/?p=libvirt.git;a=commit;h=c08c7b0143b8cdc542e5f4137623d412340c5cf2 Needs a Signed-off-by. Regards, Anthony Liguori
Re: [Qemu-devel] [PATCH 1/2] Add a DTrace tracing backend targetted for SystemTAP compatability
On 11/08/2010 01:33 PM, Daniel P. Berrange wrote: This introduces a new tracing backend that targets the SystemTAP implementation of DTrace userspace tracing. The core functionality should be applicable and standard across any DTrace implementation on Solaris, OS-X, *BSD, but the Makefile rules will likely need some small additional changes to cope with OS specific build requirements. This backend builds a little differently from the other tracing backends. Specifically there is no 'trace.c' file, because the 'dtrace' command line tool generates a '.o' file directly from the dtrace probe definition file. The probe definition is usually named with a '.d' extension but QEMU uses '.d' files for its external makefile dependancy tracking, so this uses '.dtrace' as the extension for the probe definition file. The 'tracetool' program gains the ability to generate a trace.h file for DTrace, and also to generate the trace.d file containing the dtrace probe definition. Example usage of a dtrace probe in systemtap looks like: probe process(qemu).mark(qemu_malloc) { printf(Malloc %d %p\n, $arg1, $arg2); } * .gitignore: Ignore trace-dtrace.* * Makefile: Extra rules for generating DTrace files * Makefile.obj: Don't build trace.o for DTrace, use trace-dtrace.o generated by 'dtrace' instead * tracetool: Support for generating DTrace data files Signed-off-by: Daniel P. Berrangeberra...@redhat.com Applied both. Thanks. Regards, Anthony Liguori --- .gitignore|2 + Makefile | 23 +++ Makefile.objs |4 ++ configure | 14 ++- tracetool | 116 - 5 files changed, 148 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index a43e4d1..3efb4ec 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,8 @@ config-host.* config-target.* trace.h trace.c +trace-dtrace.h +trace-dtrace.dtrace *-timestamp *-softmmu *-darwin-user diff --git a/Makefile b/Makefile index 02698e9..554ad97 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,9 @@ # Makefile for QEMU. GENERATED_HEADERS = config-host.h trace.h qemu-options.def +ifeq ($(TRACE_BACKEND),dtrace) +GENERATED_HEADERS += trace-dtrace.h +endif ifneq ($(wildcard config-host.mak),) # Put the all: rule here so that config-host.mak can contain dependencies. @@ -108,7 +111,11 @@ ui/vnc.o: QEMU_CFLAGS += $(VNC_TLS_CFLAGS) bt-host.o: QEMU_CFLAGS += $(BLUEZ_CFLAGS) +ifeq ($(TRACE_BACKEND),dtrace) +trace.h: trace.h-timestamp trace-dtrace.h +else trace.h: trace.h-timestamp +endif trace.h-timestamp: $(SRC_PATH)/trace-events config-host.mak $(call quiet-command,sh $(SRC_PATH)/tracetool --$(TRACE_BACKEND) -h $ $@, GEN trace.h) @cmp -s $@ trace.h || cp $@ trace.h @@ -120,6 +127,20 @@ trace.c-timestamp: $(SRC_PATH)/trace-events config-host.mak trace.o: trace.c $(GENERATED_HEADERS) +trace-dtrace.h: trace-dtrace.dtrace + $(call quiet-command,dtrace -o $@ -h -s $, GEN trace-dtrace.h) + +# Normal practice is to name DTrace probe file with a '.d' extension +# but that gets picked up by QEMU's Makefile as an external dependancy +# rule file. So we use '.dtrace' instead +trace-dtrace.dtrace: trace-dtrace.dtrace-timestamp +trace-dtrace.dtrace-timestamp: $(SRC_PATH)/trace-events config-host.mak + $(call quiet-command,sh $(SRC_PATH)/tracetool --$(TRACE_BACKEND) -d $ $@, GEN trace-dtrace.dtrace) + @cmp -s $@ trace-dtrace.dtrace || cp $@ trace-dtrace.dtrace + +trace-dtrace.o: trace-dtrace.dtrace $(GENERATED_HEADERS) + $(call quiet-command,dtrace -o $@ -G -s $, GEN trace-dtrace.o) + simpletrace.o: simpletrace.c $(GENERATED_HEADERS) version.o: $(SRC_PATH)/version.rc config-host.mak @@ -157,6 +178,8 @@ clean: rm -f slirp/*.o slirp/*.d audio/*.o audio/*.d block/*.o block/*.d net/*.o net/*.d fsdev/*.o fsdev/*.d ui/*.o ui/*.d rm -f qemu-img-cmds.h rm -f trace.c trace.h trace.c-timestamp trace.h-timestamp + rm -f trace-dtrace.dtrace trace-dtrace.dtrace-timestamp + rm -f trace-dtrace.h trace-dtrace.h-timestamp $(MAKE) -C tests clean for d in $(ALL_SUBDIRS) libhw32 libhw64 libuser libdis libdis-user; do \ if test -d $$d; then $(MAKE) -C $$d $@ || exit 1; fi; \ diff --git a/Makefile.objs b/Makefile.objs index faf485e..84fc80e 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -285,11 +285,15 @@ libdis-$(CONFIG_SPARC_DIS) += sparc-dis.o ## # trace +ifeq ($(TRACE_BACKEND),dtrace) +trace-obj-y = trace-dtrace.o +else trace-obj-y = trace.o ifeq ($(TRACE_BACKEND),simple) trace-obj-y += simpletrace.o user-obj-y += qemu-timer-common.o endif +endif vl.o: QEMU_CFLAGS+=$(GPROF_CFLAGS) diff --git a/configure b/configure index 7025d2b..f8dad3e 100755 --- a/configure +++ b/configure @@ -929,7 +929,7 @@ echo --enable-docsenable documentation
Re: [Qemu-devel] [PATCH RESEND] apic: Don't iterate past last used apic
On 11/05/2010 05:01 PM, Alex Williamson wrote: local_apics are allocated sequentially and never removed, so we can stop any iterations that go to MAX_APICS as soon as we hit the first NULL. Looking at a small guest running a virtio-net workload with oprofile, this drops apic_get_delivery_bitmask() from #3 in the profile to down in the noise. Signed-off-by: Alex Williamsonalex.william...@redhat.com Applied. Thanks. Regards, Anthony LIguori --- hw/apic.c |4 1 files changed, 4 insertions(+), 0 deletions(-) diff --git a/hw/apic.c b/hw/apic.c index 63d62c7..5f4a87c 100644 --- a/hw/apic.c +++ b/hw/apic.c @@ -437,6 +437,8 @@ static int apic_find_dest(uint8_t dest) apic = local_apics[i]; if (apic apic-id == dest) return i; +if (!apic) +break; } return -1; @@ -472,6 +474,8 @@ static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask, set_bit(deliver_bitmask, i); } } +} else { +break; } } }
[Qemu-devel] [RFC][PATCH v4 00/18] virtagent: host/guest RPC communication agent
This set of patches is meant to be applied on top of the recently submitted Virtproxy v3 patchset. It can also be obtained at: git://repo.or.cz/qemu/mdroth.git virtproxy_v3 OVERVIEW: There are a wide range of use cases motivating the need for a guest agent of some sort to extend the functionality/usability/control offered by QEMU. Some examples include graceful guest shutdown/reboot and notifications thereof, copy/paste syncing between host/guest, guest statistics gathering, file access, etc. Ideally these would all be served by a single, easilly extensible agent that can be deployed in a wide range of guests. Virtagent is an XMLRPC server, integrated into QEMU and the Virtproxy guest daemon, aimed at providing this type of functionality. NOTE: The guest agent can potentially be implemented independently of virtproxy depending on the feedback, we simply make use of it to provide an abstraction from the actual transport layer (ISA vs. Virtio serial) and use it's multiplexing capabilities to avoid having to dedicate 2 isa/virtio serial ports to the virtagent service. Please evaluate these patches as being seperate from virtproxy. CHANGES IN V4: - Added guest agent capabilities negotiation - Added RPC/monitor command to invoke guest shutdown/reboot/powerdown - Added RPC/monitor command to the guest agent - Added guest startup notification (hello) - Added syslog()'ing of guest agent RPCs - Various cleanups CHANGES IN V3: - Integrated virtagent server into virtproxy chardev. Usage examples below. - Consolidated RPC server/client setup into a pair of init routines - Fixed buffer overflow in agent_viewfile() and various memory leaks CHANGES IN V2: - All RPC communication is now done using asynchronous/non-blocking read/write handlers - Previously fork()'d RPC server loop is now integrated into qemu-vp/virtproxy i/o loop - Cleanups/suggestions from previous RFC DESIGN: There are actually 2 RPC servers: 1) a server in the guest integrated into qemu-vp, the Virtproxy guest daemon, which handles RPC requests from QEMU 2) a server in the host, integrated into the virtproxy chardev, to handle RPC requests sent by the guest agent (mainly for handling asynchronous events reported by the agent). At the Virtagent level, communication is done via standard RPCs (HTTP between host and guest). Virtproxy transparently handles transport over a network or isa/virtio serial channel, allowing the agent to be deployed on older guests which may not support virtio-serial. EXAMPLE USAGE: - Configure guest agent to talk to host via virtio-serial # start guest with virtio-serial/virtproxy/virtagent. for example (RHEL6rc1): qemu \ -chardev virtproxy,id=test0,virtagent=on \ -device virtio-serial \ -device virtserialport,chardev=test0,name=virtagent0 \ -monitor stdio ... # in the guest: ./qemu-vp -c virtserial-open:/dev/virtio-ports/virtagent0:- -g ... # monitor commands (qemu) agent_viewdmesg [139311.710326] wlan0: deauthenticating from 00:30:bd:f7:12:d5 by local choice (reason=3) [139323.469857] wlan0: deauthenticating from 00:21:29:cd:41:ee by local choice (reason=3) ... [257683.375646] wlan0: authenticated [257683.375684] wlan0: associate with AP 00:30:bd:f7:12:d5 (try 1) [257683.377932] wlan0: RX AssocResp from 00:30:bd:f7:12:d5 (capab=0x411 status=0 aid=4) [257683.377940] wlan0: associated (qemu) agent_viewfile /proc/meminfo MemTotal:3985488 kB MemFree: 400524 kB Buffers: 220556 kB Cached: 2073160 kB SwapCached:0 kB ... Hugepagesize: 2048 kB DirectMap4k:8896 kB DirectMap2M: 4110336 kB (qemu) agent_shutdown powerdown (qemu) KNOWN ISSUES/PLANS: - the client socket that qemu connects to send RPCs is a hardcoded filepath. This is unacceptable as the socket is channel/process specific and things will break when multiple guests are started. - proper channel negotiation is needed to avoid hung monitors and such when a guest reboots or the guest agent is stopped for whatever reason. additionally, a timeout may need to be imposed on the amount of time the http read handler can block the monitor. [RFC][PATCH v4 01/18] virtagent: add common rpc transport defs [RFC][PATCH v4 02/18] virtagent: base definitions for host/guest RPC server [RFC][PATCH v4 03/18] virtagent: qemu-vp, integrate virtagent server [RFC][PATCH v4 04/18] virtagent: base RPC client definitions [RFC][PATCH v4 05/18] virtagent: add getfile RPC [RFC][PATCH v4 06/18] virtagent: add agent_viewfile command [RFC][PATCH v4 07/18] virtagent: add getdmesg RPC [RFC][PATCH v4 08/18] virtagent: add agent_viewdmesg command [RFC][PATCH v4 09/18] virtagent: add va_shutdown RPC [RFC][PATCH v4 10/18] virtagent: add agent_shutdown monitor command [RFC][PATCH v4 11/18] virtagent: add va_ping RPC [RFC][PATCH v4 12/18] virtagent: add agent_ping
[Qemu-devel] [RFC][PATCH v4 03/18] virtagent: qemu-vp, integrate virtagent server
This allows the guest RPC server to be integrated into the qemu-vp/virtproxy i/o loop Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- qemu-char.c | 26 qemu-config.c |3 ++ qemu-vp.c | 94 +++- 3 files changed, 114 insertions(+), 9 deletions(-) diff --git a/qemu-char.c b/qemu-char.c index bc7925c..fd02640 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -1905,6 +1905,8 @@ return_err: /* Virtproxy chardev driver */ #include virtproxy.h +#include virtagent.h +#include virtagent-daemon.h static int vp_init_oforward(VPDriver *drv, QemuOpts *opts) { @@ -2016,6 +2018,8 @@ static CharDriverState *qemu_chr_open_virtproxy(QemuOpts *opts) { CharDriverState *chr = qemu_mallocz(sizeof(CharDriverState)); VPDriver *drv = vp_new(VP_CTX_CHARDEV, chr, 0, 0); +int ret; +bool enable_virtagent; chr-opaque = drv; chr-chr_write = vp_chr_write; @@ -2025,9 +2029,31 @@ static CharDriverState *qemu_chr_open_virtproxy(QemuOpts *opts) /* parse socket forwarding options */ qemu_opt_foreach(opts, vp_init_forwards, drv, 1); +/* add forwarding options to enable virtagent server */ +enable_virtagent = qemu_opt_get_bool(opts, virtagent, 0); +if (enable_virtagent) { +/* outbound RPCs */ +ret = va_client_init(drv, true); +if (ret) { +fprintf(stderr, error enabling virtagent client); +goto fail; +} +/* inbound RPCs */ +ret = va_server_init(drv, true); +if (ret) { +fprintf(stderr, error enabling virtagent server); +goto fail; +} +} + /* for info chardev monitor command */ chr-filename = NULL; return chr; + +fail: +qemu_free(drv); +qemu_free(chr); +return NULL; } /***/ diff --git a/qemu-config.c b/qemu-config.c index 400e61a..41ba54d 100644 --- a/qemu-config.c +++ b/qemu-config.c @@ -152,6 +152,9 @@ static QemuOptsList qemu_chardev_opts = { },{ .name = iforward, .type = QEMU_OPT_STRING, +},{ +.name = virtagent, +.type = QEMU_OPT_BOOL, }, { /* end of list */ } }, diff --git a/qemu-vp.c b/qemu-vp.c index cfd2a69..38959e5 100644 --- a/qemu-vp.c +++ b/qemu-vp.c @@ -37,6 +37,8 @@ #include qemu-option.h #include qemu_socket.h #include virtproxy.h +#include virtagent.h +#include virtagent-daemon.h static bool verbose_enabled = 0; #define DEBUG_ENABLED @@ -219,14 +221,16 @@ static void usage(const char *cmd) [-o oforward_opts ...]\n QEMU virt-proxy communication channel\n \n - -c, --channelchannel options of the form:\n - method:addr:port[:channel_id]\n - -o, --oforward oforward options of the form:\n - service_id:addr:port[:channel_id]\n - -i, --iforward iforward options of the form:\n - service_id:addr:port[:channel_id]\n - -v, --verbosedisplay extra debugging information\n - -h, --help display this help and exit\n + -c, --channel channel options of the form:\n +method:addr:port[:channel_id]\n + -g, --guest-agent guest rpc server, options of the form:\n +[channel_id]\n + -o, --oforwardoforward options of the form:\n +service_id:addr:port[:channel_id]\n + -i, --iforwardiforward options of the form:\n +service_id:addr:port[:channel_id]\n + -v, --verbose display extra debugging information\n + -h, --helpdisplay this help and exit\n \n channels are used to establish a data connection between 2 end-points in\n the host or the guest (connection method specified by method).\n @@ -426,13 +430,52 @@ static int init_iforwards(void) { return 0; } +static int init_agent(const VPData *agent_iforward) { +QemuOpts *opts = agent_iforward-opts; +VPDriver *drv; +int ret, index; + +INFO(initializing agent...); +if (verbose_enabled) { +qemu_opts_print(opts, NULL); +} + +index = qemu_opt_get_number(agent_iforward-opts, index, 0); +drv = get_channel_drv(index); +if (drv == NULL) { +warnx(unable to find channel with index: %d, index); +goto err; +} + +/* outbound RPCs */ +ret = va_client_init(drv, false); +if (ret) { +warnx(error starting RPC server); +goto err; +} + +/* start guest RPC server */ +ret = va_server_init(drv, false); +if (ret != 0) { +warnx(error starting RPC server); +goto err; +} + +return 0; + +err: +return -1; +} + int main(int argc, char **argv) { -const char *sopt = hVvi:o:c:; +const char *sopt = hVvi:o:c:g::p::; struct option lopt[] = { { help, 0, NULL, 'h' }, { version, 0, NULL, 'V' }, { verbose, 0, NULL, 'v' }, +{
[Qemu-devel] [RFC][PATCH v4 01/18] virtagent: add common rpc transport defs
Common code for sending/recieving RPCs via http over virtproxy channel. All communication is done via asynchronous read/write handlers and using non-blocking reads/writes Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- virtagent-common.c | 440 virtagent-common.h | 73 + 2 files changed, 513 insertions(+), 0 deletions(-) create mode 100644 virtagent-common.c create mode 100644 virtagent-common.h diff --git a/virtagent-common.c b/virtagent-common.c new file mode 100644 index 000..8c4dcd4 --- /dev/null +++ b/virtagent-common.c @@ -0,0 +1,440 @@ +/* + * virt-agent - common host/guest RPC functions + * + * Copyright IBM Corp. 2010 + * + * Authors: + * Adam Litkeagli...@linux.vnet.ibm.com + * Michael Roth mdr...@linux.vnet.ibm.com + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include virtagent-common.h + +#define VA_READ true +#define VA_SEND false + +enum va_rpc_type { +VA_RPC_REQUEST, +VA_RPC_RESPONSE, +}; + +typedef struct VARPCState { +char hdr[VA_HDR_LEN_MAX]; +int fd; +size_t hdr_len; +size_t hdr_pos; +enum { +VA_READ_START, +VA_READ_HDR, +VA_READ_BODY, +VA_SEND_START, +VA_SEND_HDR, +VA_SEND_BODY, +} state; +enum va_rpc_type rpc_type; +char *content; +size_t content_len; +size_t content_pos; +VARPCData *data; +} VARPCState; + +static void va_rpc_read_handler(void *opaque); +static void va_rpc_send_handler(void *opaque); + +static int end_of_header(char *buf, int end_pos) +{ +return !strncmp(buf+(end_pos-2), \n\r\n, 3); +} + +static void va_rpc_hdr_init(VARPCState *s) { +const char *preamble; + +TRACE(called); +/* essentially ignored in the context of virtagent, but might as well */ +if (s-rpc_type == VA_RPC_REQUEST) { +preamble = POST /RPC2 HTTP/1.1; +} else if (s-rpc_type == VA_RPC_RESPONSE) { +preamble = HTTP/1.1 200 OK; +} else { +s-hdr_len = 0; +return; +} + +s-hdr_len = sprintf(s-hdr, + %s EOL + Content-Type: text/xml EOL + Content-Length: %u EOL EOL, + preamble, + (uint32_t)s-content_len); +} + +static void va_rpc_parse_hdr(VARPCState *s) +{ +int i, line_pos = 0; +char line_buf[4096]; + +for (i = 0; i VA_HDR_LEN_MAX; ++i) { +if (s-hdr[i] != '\n') { +/* read line */ +line_buf[line_pos++] = s-hdr[i]; +} else { +/* process line */ +if (strncmp(line_buf, Content-Length: , 16) == 0) { +s-content_len = atoi(line_buf[16]); +return; +} +line_pos = 0; +} +} +} + +static VARPCState *va_rpc_state_new(VARPCData *data, int fd, +enum va_rpc_type rpc_type, bool read) +{ +VARPCState *s = qemu_mallocz(sizeof(VARPCState)); + +s-rpc_type = rpc_type; +s-fd = fd; +s-data = data; +if (s-data == NULL) { +goto EXIT_BAD; +} + +if (read) { +s-state = VA_READ_START; +s-content = NULL; +} else { +s-state = VA_SEND_START; +if (rpc_type == VA_RPC_REQUEST) { +s-content = XMLRPC_MEMBLOCK_CONTENTS(char, s-data-send_req_xml); +s-content_len = XMLRPC_MEMBLOCK_SIZE(char, s-data-send_req_xml); +} else if (rpc_type == VA_RPC_RESPONSE) { +s-content = XMLRPC_MEMBLOCK_CONTENTS(char, s-data-send_resp_xml); +s-content_len = XMLRPC_MEMBLOCK_SIZE(char, s-data-send_resp_xml); +} else { +LOG(unknown rcp type); +goto EXIT_BAD; +} +va_rpc_hdr_init(s); +if (s-hdr_len == 0) { +LOG(failed to initialize http header); +goto EXIT_BAD; +} +} + +return s; +EXIT_BAD: +qemu_free(s); +return NULL; +} + +/* called by va_rpc_read_handler after reading requests */ +static int va_rpc_send_response(VARPCData *data, int fd) +{ +VARPCState *s = va_rpc_state_new(data, fd, VA_RPC_RESPONSE, VA_SEND); + +TRACE(called); +if (s == NULL) { +LOG(failed to set up RPC state); +return -1; +} +TRACE(setting up send handler for RPC request); +vp_set_fd_handler(fd, NULL, va_rpc_send_handler, s); + +return 0; +} + +static void va_rpc_read_handler_completion(VARPCState *s) { +int ret; + +if (s-rpc_type == VA_RPC_REQUEST) { +/* server read request, call it's cb function then set up + * a send handler for the rpc response if there weren't any + * communication errors + */ +if (s-data-cb) { +s-data-cb(s-data); +} +if (s-data-status == VA_RPC_STATUS_OK) { +ret =
[Qemu-devel] [RFC][PATCH v4 02/18] virtagent: base definitions for host/guest RPC server
Basic skeleton code for RPC server. This is shared by both the guest-side RPC server as well as the host-side one (the advertised RPCs for each by guest/host-specific arrays). Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- virtagent-daemon.c | 209 virtagent-daemon.h | 22 ++ 2 files changed, 231 insertions(+), 0 deletions(-) create mode 100644 virtagent-daemon.c create mode 100644 virtagent-daemon.h diff --git a/virtagent-daemon.c b/virtagent-daemon.c new file mode 100644 index 000..78d550f --- /dev/null +++ b/virtagent-daemon.c @@ -0,0 +1,209 @@ +/* + * virt-agent - host/guest RPC daemon functions + * + * Copyright IBM Corp. 2010 + * + * Authors: + * Adam Litkeagli...@linux.vnet.ibm.com + * Michael Roth mdr...@linux.vnet.ibm.com + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ +#include syslog.h +#include qemu_socket.h +#include virtagent-daemon.h +#include virtagent-common.h +#include virtagent.h + +static bool va_enable_syslog = false; /* enable syslog'ing of RPCs */ + +#define SLOG(msg, ...) do { \ +char msg_buf[1024]; \ +if (!va_enable_syslog) { \ +break; \ +} \ +sprintf(msg_buf, msg, ## __VA_ARGS__); \ +syslog(LOG_INFO, virtagent, %s, msg_buf); \ +} while(0) + +static int va_accept(int listen_fd) { +struct sockaddr_in saddr; +struct sockaddr *addr; +socklen_t len; +int fd; + +while (1) { +len = sizeof(saddr); +addr = (struct sockaddr *)saddr; +fd = qemu_accept(listen_fd, addr, len); +if (fd 0 errno != EINTR) { +LOG(accept() failed); +break; +} else if (fd = 0) { +TRACE(accepted connection); +break; +} +} +return fd; +} + +typedef struct RPCFunction { +xmlrpc_value *(*func)(xmlrpc_env *env, xmlrpc_value *param, void *unused); +const char *func_name; +} RPCFunction; + +static RPCFunction guest_functions[] = { +{ NULL, NULL } +}; +static RPCFunction host_functions[] = { +{ NULL, NULL } +}; + +static void va_register_functions(xmlrpc_env *env, xmlrpc_registry *registry, + RPCFunction *list) +{ +int i; +for (i = 0; list[i].func != NULL; ++i) { +TRACE(adding func: %s, list[i].func_name); +xmlrpc_registry_add_method(env, registry, NULL, list[i].func_name, + list[i].func, NULL); +} +} + +typedef struct VARPCServerState { +VPDriver *vp; +int listen_fd; +xmlrpc_env env; +xmlrpc_registry *registry; +} VARPCServerState; + +/* only one virtagent server instance can exist at a time */ +static VARPCServerState *server_state = NULL; + +static void va_accept_handler(void *opaque); + +static void va_rpc_send_cb(void *opaque) +{ +VARPCData *rpc_data = opaque; +VARPCServerState *s = server_state; + +TRACE(called); +if (rpc_data-status != VA_RPC_STATUS_OK) { +LOG(error sending RPC response); +} else { +TRACE(RPC completed); +} + +TRACE(waiting for RPC request...); +vp_set_fd_handler(s-listen_fd, va_accept_handler, NULL, s); +} + +static void va_rpc_read_cb(void *opaque) +{ +VARPCData *rpc_data = opaque; +VARPCServerState *s = server_state; + +TRACE(called); +if (rpc_data-status != VA_RPC_STATUS_OK) { +LOG(error reading RPC request); +goto out_bad; +} + +rpc_data-send_resp_xml = +xmlrpc_registry_process_call(s-env, s-registry, NULL, + rpc_data-req_xml, rpc_data-req_xml_len); +if (rpc_data-send_resp_xml == NULL) { +LOG(error handling RPC request); +goto out_bad; +} + +rpc_data-cb = va_rpc_send_cb; +return; + +out_bad: +TRACE(waiting for RPC request...); +vp_set_fd_handler(s-listen_fd, va_accept_handler, NULL, s); +} + +static void va_accept_handler(void *opaque) +{ +VARPCData *rpc_data; +int ret, fd; + +TRACE(called); +fd = va_accept(server_state-listen_fd); +if (fd 0) { +TRACE(connection error: %s, strerror(errno)); +return; +} +ret = fcntl(fd, F_GETFL); +ret = fcntl(fd, F_SETFL, ret | O_NONBLOCK); + +TRACE(RPC client connected, reading RPC request...); +rpc_data = qemu_mallocz(sizeof(VARPCData)); +rpc_data-cb = va_rpc_read_cb; +ret = va_rpc_read_request(rpc_data, fd); +if (ret != 0) { +LOG(error setting up read handler); +qemu_free(rpc_data); +return; +} +vp_set_fd_handler(server_state-listen_fd, NULL, NULL, NULL); +} + +int va_server_init(VPDriver *vp_drv, bool is_host) +{ +RPCFunction *func_list = is_host ? host_functions : guest_functions; +QemuOpts *opts; +int ret, fd; +const char *path, *service_id; + +if (server_state) { +LOG(virtagent server already
[Qemu-devel] [RFC][PATCH v4 07/18] virtagent: add getdmesg RPC
Add RPC to view guest dmesg output. Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- virtagent-daemon.c | 46 ++ virtagent-daemon.h |1 + 2 files changed, 47 insertions(+), 0 deletions(-) diff --git a/virtagent-daemon.c b/virtagent-daemon.c index 44c0754..0dd72c0 100644 --- a/virtagent-daemon.c +++ b/virtagent-daemon.c @@ -81,6 +81,50 @@ EXIT_CLOSE_BAD: return result; } +/* getdmesg(): return dmesg output + * rpc return values: + * - dmesg output as a string + */ +static xmlrpc_value *getdmesg(xmlrpc_env *env, + xmlrpc_value *param, + void *user_data) +{ +char *dmesg_buf = NULL, cmd[256]; +int ret; +xmlrpc_value *result = NULL; +FILE *pipe; + +SLOG(getdmesg()); + +dmesg_buf = qemu_mallocz(VA_DMESG_LEN + 2048); +sprintf(cmd, dmesg -s %d, VA_DMESG_LEN); + +pipe = popen(cmd, r); +if (pipe == NULL) { +LOG(popen failed: %s, strerror(errno)); +xmlrpc_faultf(env, popen failed: %s, strerror(errno)); +goto EXIT_NOCLOSE; +} + +ret = fread(dmesg_buf, sizeof(char), VA_DMESG_LEN, pipe); +if (!ferror(pipe)) { +dmesg_buf[ret] = '\0'; +TRACE(dmesg:\n%s, dmesg_buf); +result = xmlrpc_build_value(env, s, dmesg_buf); +} else { +LOG(fread failed); +xmlrpc_faultf(env, popen failed: %s, strerror(errno)); +} + +pclose(pipe); +EXIT_NOCLOSE: +if (dmesg_buf) { +qemu_free(dmesg_buf); +} + +return result; +} + static int va_accept(int listen_fd) { struct sockaddr_in saddr; struct sockaddr *addr; @@ -110,6 +154,8 @@ typedef struct RPCFunction { static RPCFunction guest_functions[] = { { .func = getfile, .func_name = getfile }, +{ .func = getdmesg, + .func_name = getdmesg }, { NULL, NULL } }; static RPCFunction host_functions[] = { diff --git a/virtagent-daemon.h b/virtagent-daemon.h index 6c3436a..09b0097 100644 --- a/virtagent-daemon.h +++ b/virtagent-daemon.h @@ -18,5 +18,6 @@ #define HOST_AGENT_PATH /tmp/virtagent-host.sock #define VA_GETFILE_MAX 1 30 #define VA_FILEBUF_LEN 16384 +#define VA_DMESG_LEN 16384 int va_server_init(VPDriver *vp_drv, bool is_host); -- 1.7.0.4
[Qemu-devel] [RFC][PATCH v4 06/18] virtagent: add agent_viewfile command
Utilize the getfile RPC to provide a means to view text files in the guest. Getfile can handle binary files as well but we don't advertise that here due to the special handling requiring to store it and provide it back to the user (base64 encoding it for instance). Hence the potentially confusing viewfile as opposed to getfile. Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- hmp-commands.hx | 16 + qmp-commands.hx | 33 ++ virtagent.c | 102 +++ virtagent.h |3 ++ 4 files changed, 154 insertions(+), 0 deletions(-) diff --git a/hmp-commands.hx b/hmp-commands.hx index e5585ba..423c752 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1212,6 +1212,22 @@ show available trace events and their state ETEXI #endif +{ +.name = agent_viewfile, +.args_type = filepath:s, +.params = filepath, +.help = Echo a file from the guest filesystem, +.user_print = do_agent_viewfile_print, +.mhandler.cmd_async = do_agent_viewfile, +.flags = MONITOR_CMD_ASYNC, +}, + +STEXI +...@item agent_viewfile @var{filepath} +...@findex agent_viewfile +Echo the file identified by @var{filepath} on the guest filesystem +ETEXI + STEXI @end table ETEXI diff --git a/qmp-commands.hx b/qmp-commands.hx index 793cf1c..efa2137 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -738,6 +738,39 @@ Example: EQMP { +.name = agent_viewfile, +.args_type = filepath:s, +.params = filepath, +.help = Echo a file from the guest filesystem, +.user_print = monitor_user_noop, +.mhandler.cmd_async = do_agent_viewfile, +.flags = MONITOR_CMD_ASYNC, +}, + +STEXI +...@item agent_viewfile @var{filepath} +...@findex agent_viewfile +Echo the file identified by @var{filepath} on the guest filesystem +ETEXI +SQMP +agent_viewfile + + +Echo the file identified by @var{filepath} from the guest filesystem. + +Arguments: + +- filepath: Full guest path of the desired file + +Example: + +- { execute: agent_viewfile, +arguments: { filepath: /sys/kernel/kexec_loaded } } +- { return: { contents: 0 } } + +EQMP + +{ .name = qmp_capabilities, .args_type = , .params = , diff --git a/virtagent.c b/virtagent.c index 750c167..3fe0a7b 100644 --- a/virtagent.c +++ b/virtagent.c @@ -235,3 +235,105 @@ out_callxml: out: return ret; } + +/* QMP/HMP RPC client functions */ + +void do_agent_viewfile_print(Monitor *mon, const QObject *data) +{ +QDict *qdict; +const char *contents = NULL; +int i; + +qdict = qobject_to_qdict(data); +if (!qdict_haskey(qdict, contents)) { +return; +} + +contents = qdict_get_str(qdict, contents); +if (contents != NULL) { + /* monitor_printf truncates so do it in chunks. also, file_contents + * may not be null-termed at proper location so explicitly calc + * last chunk sizes */ +for (i = 0; i strlen(contents); i += 1024) { +monitor_printf(mon, %.1024s, contents + i); +} +} +monitor_printf(mon, \n); +} + +static void do_agent_viewfile_cb(void *opaque) +{ +VARPCData *rpc_data = opaque; +xmlrpc_value *resp = NULL; +char *file_contents = NULL; +size_t file_size; +int ret; +xmlrpc_env env; +QDict *qdict = qdict_new(); + +if (rpc_data-status != VA_RPC_STATUS_OK) { +LOG(error handling RPC request); +goto out_no_resp; +} + +xmlrpc_env_init(env); +resp = xmlrpc_parse_response(env, rpc_data-resp_xml, + rpc_data-resp_xml_len); +if (rpc_has_error(env)) { +ret = -1; +goto out_no_resp; +} + +xmlrpc_parse_value(env, resp, 6, file_contents, file_size); +if (rpc_has_error(env)) { +ret = -1; +goto out; +} + +if (file_contents != NULL) { +qdict_put(qdict, contents, + qstring_from_substr(file_contents, 0, file_size-1)); +} + +out: +xmlrpc_DECREF(resp); +out_no_resp: +rpc_data-mon_cb(rpc_data-mon_data, QOBJECT(qdict)); +} + +/* + * do_agent_viewfile(): View a text file in the guest + */ +int do_agent_viewfile(Monitor *mon, const QDict *mon_params, + MonitorCompletion cb, void *opaque) +{ +xmlrpc_env env; +xmlrpc_value *params; +VARPCData *rpc_data; +const char *filepath; +int ret; + +filepath = qdict_get_str(mon_params, filepath); +xmlrpc_env_init(env); +params = xmlrpc_build_value(env, (s), filepath); +if (rpc_has_error(env)) { +return -1; +} + +rpc_data = qemu_mallocz(sizeof(VARPCData)); +rpc_data-cb = do_agent_viewfile_cb; +rpc_data-mon_cb = cb; +rpc_data-mon_data = opaque; + +ret = rpc_execute(env, getfile, params, rpc_data); +if (ret == -EREMOTE) { +
[Qemu-devel] [RFC][PATCH v4 05/18] virtagent: add getfile RPC
Add RPC to retrieve a guest file. A size limit of some sort will eventually be needed else we can block the monitor for arbitrarily long periods of time. This interface is intended for smaller reads like peeking at logs and /proc and such. Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- virtagent-daemon.c | 55 1 files changed, 55 insertions(+), 0 deletions(-) diff --git a/virtagent-daemon.c b/virtagent-daemon.c index 78d550f..44c0754 100644 --- a/virtagent-daemon.c +++ b/virtagent-daemon.c @@ -28,6 +28,59 @@ static bool va_enable_syslog = false; /* enable syslog'ing of RPCs */ syslog(LOG_INFO, virtagent, %s, msg_buf); \ } while(0) +/* RPC functions common to guest/host daemons */ + +static xmlrpc_value *getfile(xmlrpc_env *env, +xmlrpc_value *param, +void *user_data) +{ +const char *path; +char *file_contents = NULL; +char buf[VA_FILEBUF_LEN]; +int fd, ret, count = 0; +xmlrpc_value *result = NULL; + +/* parse argument array */ +xmlrpc_decompose_value(env, param, (s), path); +if (env-fault_occurred) { +return NULL; +} + +SLOG(getfile(), path:%s, path); + +fd = open(path, O_RDONLY); +if (fd == -1) { +LOG(open failed: %s, strerror(errno)); +xmlrpc_faultf(env, open failed: %s, strerror(errno)); +return NULL; +} + +while ((ret = read(fd, buf, VA_FILEBUF_LEN)) 0) { +file_contents = qemu_realloc(file_contents, count + VA_FILEBUF_LEN); +memcpy(file_contents + count, buf, ret); +count += ret; +if (count VA_GETFILE_MAX) { +xmlrpc_faultf(env, max file size (%d bytes) exceeded, + VA_GETFILE_MAX); +goto EXIT_CLOSE_BAD; +} +} +if (ret == -1) { +LOG(read failed: %s, strerror(errno)); +xmlrpc_faultf(env, read failed: %s, strerror(errno)); +goto EXIT_CLOSE_BAD; +} + +result = xmlrpc_build_value(env, 6, file_contents, count); + +EXIT_CLOSE_BAD: +if (file_contents) { +qemu_free(file_contents); +} +close(fd); +return result; +} + static int va_accept(int listen_fd) { struct sockaddr_in saddr; struct sockaddr *addr; @@ -55,6 +108,8 @@ typedef struct RPCFunction { } RPCFunction; static RPCFunction guest_functions[] = { +{ .func = getfile, + .func_name = getfile }, { NULL, NULL } }; static RPCFunction host_functions[] = { -- 1.7.0.4
[Qemu-devel] [RFC][PATCH v4 08/18] virtagent: add agent_viewdmesg command
Add commands to view guest dmesg output. Currently it is a 16K buffer. Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- hmp-commands.hx | 16 + qmp-commands.hx | 35 +++ virtagent.c | 100 +++ virtagent.h |3 ++ 4 files changed, 154 insertions(+), 0 deletions(-) diff --git a/hmp-commands.hx b/hmp-commands.hx index 423c752..5b9db62 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1228,6 +1228,22 @@ STEXI Echo the file identified by @var{filepath} on the guest filesystem ETEXI +{ +.name = agent_viewdmesg, +.args_type = , +.params = , +.help = View guest dmesg output, +.user_print = do_agent_viewdmesg_print, +.mhandler.cmd_async = do_agent_viewdmesg, +.flags = MONITOR_CMD_ASYNC, +}, + +STEXI +...@item agent_viewdmesg +...@findex agent_viewdmesg +View guest dmesg output +ETEXI + STEXI @end table ETEXI diff --git a/qmp-commands.hx b/qmp-commands.hx index efa2137..dc319b7 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -771,6 +771,41 @@ Example: EQMP { +.name = agent_viewdmesg, +.args_type = , +.params = , +.help = View guest dmesg output, +.user_print = do_agent_viewdmesg_print, +.mhandler.cmd_async = do_agent_viewdmesg, +.flags = MONITOR_CMD_ASYNC, +}, + +STEXI +...@item agent_viewdmesg +...@findex agent_viewdmesg +View guest dmesg output +ETEXI +SQMP +agent_viewdmesg + + +View guest dmesg output + +Arguments: + +(none) + +Example: + +- { execute: agent_viewdmesg } +- { return: { + contents: [353487.942215] usb 1-4: USB disconnect, address 9\n... + } + } + +EQMP + +{ .name = qmp_capabilities, .args_type = , .params = , diff --git a/virtagent.c b/virtagent.c index 3fe0a7b..e0298b9 100644 --- a/virtagent.c +++ b/virtagent.c @@ -299,6 +299,7 @@ out: xmlrpc_DECREF(resp); out_no_resp: rpc_data-mon_cb(rpc_data-mon_data, QOBJECT(qdict)); +qobject_decref(QOBJECT(qdict)); } /* @@ -337,3 +338,102 @@ int do_agent_viewfile(Monitor *mon, const QDict *mon_params, return 0; } + +void do_agent_viewdmesg_print(Monitor *mon, const QObject *data) +{ +QDict *qdict; +const char *contents = NULL; +int i; + +qdict = qobject_to_qdict(data); +if (!qdict_haskey(qdict, contents)) { +goto out; +} + +contents = qdict_get_str(qdict, contents); +if (contents != NULL) { + /* monitor_printf truncates so do it in chunks. also, file_contents + * may not be null-termed at proper location so explicitly calc + * last chunk sizes */ +for (i = 0; i strlen(contents); i += 1024) { +monitor_printf(mon, %.1024s, contents + i); +} +} + +out: +monitor_printf(mon, \n); +} + +static void do_agent_viewdmesg_cb(void *opaque) +{ +VARPCData *rpc_data = opaque; +xmlrpc_value *resp = NULL; +char *dmesg = NULL; +int ret; +xmlrpc_env env; +QDict *qdict = qdict_new(); + +if (rpc_data-status != VA_RPC_STATUS_OK) { +LOG(error handling RPC request); +goto out_no_resp; +} + +xmlrpc_env_init(env); +resp = xmlrpc_parse_response(env, rpc_data-resp_xml, + rpc_data-resp_xml_len); +if (rpc_has_error(env)) { +ret = -1; +goto out_no_resp; +} + +xmlrpc_parse_value(env, resp, s, dmesg); +if (rpc_has_error(env)) { +ret = -1; +goto out; +} + +if (dmesg != NULL) { +qdict_put(qdict, contents, qstring_from_str(dmesg)); +} + +out: +xmlrpc_DECREF(resp); +out_no_resp: +rpc_data-mon_cb(rpc_data-mon_data, QOBJECT(qdict)); +} + +/* + * do_agent_viewdmesg(): View guest dmesg output + */ +int do_agent_viewdmesg(Monitor *mon, const QDict *mon_params, + MonitorCompletion cb, void *opaque) +{ +xmlrpc_env env; +xmlrpc_value *params; +VARPCData *rpc_data; +int ret; + +xmlrpc_env_init(env); + +params = xmlrpc_build_value(env, (n)); +if (rpc_has_error(env)) { +return -1; +} + +rpc_data = qemu_mallocz(sizeof(VARPCData)); +rpc_data-cb = do_agent_viewdmesg_cb; +rpc_data-mon_cb = cb; +rpc_data-mon_data = opaque; + +ret = rpc_execute(env, getdmesg, params, rpc_data); +if (ret == -EREMOTE) { +monitor_printf(mon, RPC Failed (%i): %s\n, env.fault_code, + env.fault_string); +return -1; +} else if (ret == -1) { +monitor_printf(mon, RPC communication error\n); +return -1; +} + +return 0; +} diff --git a/virtagent.h b/virtagent.h index 63d77c2..c077582 100644 --- a/virtagent.h +++ b/virtagent.h @@ -26,5 +26,8 @@ int va_client_init(VPDriver *vp_drv, bool is_host); void do_agent_viewfile_print(Monitor *mon, const
[Qemu-devel] Re: [PATCH v3 2/3] virtio-pci: Use ioeventfd for virtqueue notify
On Fri, Nov 12, 2010 at 01:24:28PM +, Stefan Hajnoczi wrote: Virtqueue notify is currently handled synchronously in userspace virtio. This prevents the vcpu from executing guest code while hardware emulation code handles the notify. On systems that support KVM, the ioeventfd mechanism can be used to make virtqueue notify a lightweight exit by deferring hardware emulation to the iothread and allowing the VM to continue execution. This model is similar to how vhost receives virtqueue notifies. The result of this change is improved performance for userspace virtio devices. Virtio-blk throughput increases especially for multithreaded scenarios and virtio-net transmit throughput increases substantially. Some virtio devices are known to have guest drivers which expect a notify to be processed synchronously and spin waiting for completion. Only enable ioeventfd for virtio-blk and virtio-net for now. Care must be taken not to interfere with vhost-net, which already uses ioeventfd host notifiers. The following list shows the behavior implemented in this patch and is designed to take vhost-net into account: * VIRTIO_CONFIG_S_DRIVER_OK - assign host notifiers, qemu_set_fd_handler(virtio_pci_host_notifier_read) * !VIRTIO_CONFIG_S_DRIVER_OK - qemu_set_fd_handler(NULL), deassign host notifiers * virtio_pci_set_host_notifier(true) - qemu_set_fd_handler(NULL) * virtio_pci_set_host_notifier(false) - qemu_set_fd_handler(virtio_pci_host_notifier_read) Signed-off-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com --- hw/virtio-pci.c | 152 ++ hw/virtio.c | 14 - hw/virtio.h | 13 + 3 files changed, 153 insertions(+), 26 deletions(-) Now toggles host notifiers based on VIRTIO_CONFIG_S_DRIVER_OK status changes. The cleanest way I could see was to introduce pre and a post set_status() callbacks. They allow a binding to hook status changes, including the status change from virtio_reset(). diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index 549118d..117e855 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -83,6 +83,11 @@ /* Flags track per-device state like workarounds for quirks in older guests. */ #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 0) +/* Performance improves when virtqueue kick processing is decoupled from the + * vcpu thread using ioeventfd for some devices. */ +#define VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT 1 +#define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT) + /* QEMU doesn't strictly need write barriers since everything runs in * lock-step. We'll leave the calls to wmb() in though to make it obvious for * KVM or if kqemu gets SMP support. @@ -179,12 +184,125 @@ static int virtio_pci_load_queue(void * opaque, int n, QEMUFile *f) return 0; } +static int virtio_pci_set_host_notifier_ioeventfd(VirtIOPCIProxy *proxy, + int n, bool assign) +{ +VirtQueue *vq = virtio_get_queue(proxy-vdev, n); +EventNotifier *notifier = virtio_queue_get_host_notifier(vq); +int r; +if (assign) { +r = event_notifier_init(notifier, 1); +if (r 0) { +return r; +} +r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier), + proxy-addr + VIRTIO_PCI_QUEUE_NOTIFY, + n, assign); +if (r 0) { +event_notifier_cleanup(notifier); +} +} else { +r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier), + proxy-addr + VIRTIO_PCI_QUEUE_NOTIFY, + n, assign); +if (r 0) { +return r; +} +event_notifier_cleanup(notifier); +} +return r; +} + +static void virtio_pci_host_notifier_read(void *opaque) +{ +VirtQueue *vq = opaque; +EventNotifier *n = virtio_queue_get_host_notifier(vq); +if (event_notifier_test_and_clear(n)) { +virtio_queue_notify_vq(vq); +} +} + +static void virtio_pci_set_host_notifier_fd_handler(VirtIOPCIProxy *proxy, +int n, bool assign) +{ +VirtQueue *vq = virtio_get_queue(proxy-vdev, n); +EventNotifier *notifier = virtio_queue_get_host_notifier(vq); +if (assign) { +qemu_set_fd_handler(event_notifier_get_fd(notifier), +virtio_pci_host_notifier_read, NULL, vq); +} else { +qemu_set_fd_handler(event_notifier_get_fd(notifier), +NULL, NULL, NULL); +} +} + +static int virtio_pci_set_host_notifiers(VirtIOPCIProxy *proxy, bool assign) +{ +int n, r; + +for (n = 0; n VIRTIO_PCI_QUEUE_MAX; n++) { +if (!virtio_queue_get_num(proxy-vdev, n)) { +
[Qemu-devel] [RFC][PATCH v4 11/18] virtagent: add va_ping RPC
Do nothing RPC that simply replies to client Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- virtagent-daemon.c | 17 + 1 files changed, 17 insertions(+), 0 deletions(-) diff --git a/virtagent-daemon.c b/virtagent-daemon.c index 18992dd..137641d 100644 --- a/virtagent-daemon.c +++ b/virtagent-daemon.c @@ -181,6 +181,19 @@ out_bad: return NULL; } +/* va_ping(): respond to client. response without error in env + * variable indicates successful response + * rpc return values: none + */ +static xmlrpc_value *va_ping(xmlrpc_env *env, + xmlrpc_value *param, + void *user_data) +{ +xmlrpc_value *result = xmlrpc_build_value(env, s, dummy); +SLOG(va_ping()); +return result; +} + static int va_accept(int listen_fd) { struct sockaddr_in saddr; struct sockaddr *addr; @@ -214,9 +227,13 @@ static RPCFunction guest_functions[] = { .func_name = getdmesg }, { .func = va_shutdown, .func_name = va_shutdown }, +{ .func = va_ping, + .func_name = va_ping }, { NULL, NULL } }; static RPCFunction host_functions[] = { +{ .func = va_ping, + .func_name = va_ping }, { NULL, NULL } }; -- 1.7.0.4
[Qemu-devel] [RFC][PATCH v4 09/18] virtagent: add va_shutdown RPC
RPC to initiate guest shutdown/reboot/powerdown Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- virtagent-daemon.c | 58 1 files changed, 58 insertions(+), 0 deletions(-) diff --git a/virtagent-daemon.c b/virtagent-daemon.c index 0dd72c0..18992dd 100644 --- a/virtagent-daemon.c +++ b/virtagent-daemon.c @@ -125,6 +125,62 @@ EXIT_NOCLOSE: return result; } +/* va_shutdown(): initiate guest shutdown + * rpc return values: none + */ +static xmlrpc_value *va_shutdown(xmlrpc_env *env, +xmlrpc_value *param, +void *user_data) +{ +int ret; +const char *shutdown_type, *shutdown_flag; +xmlrpc_value *result = xmlrpc_build_value(env, s, dummy); + +TRACE(called); +xmlrpc_decompose_value(env, param, (s), shutdown_type); +if (env-fault_occurred) { +goto out_bad; +} + +if (strcmp(shutdown_type, halt) == 0) { +shutdown_flag = -H; +} else if (strcmp(shutdown_type, powerdown) == 0) { +shutdown_flag = -P; +} else if (strcmp(shutdown_type, reboot) == 0) { +shutdown_flag = -r; +} else { +xmlrpc_faultf(env, invalid shutdown type: %s, shutdown_type); +goto out_bad; +} + +SLOG(va_shutdown(), shutdown_type:%s, shutdown_type); + +ret = fork(); +if (ret == 0) { +/* child, start the shutdown */ +setsid(); +fclose(stdin); +fclose(stdout); +fclose(stderr); + +sleep(5); +ret = execl(/sbin/shutdown, shutdown, shutdown_flag, +0, +hypervisor initiated shutdown, (char*)NULL); +if (ret 0) { +LOG(execl() failed: %s, strerror(errno)); +exit(1); +} +TRACE(shouldn't be here); +exit(0); +} else if (ret 0) { +xmlrpc_faultf(env, fork() failed: %s, strerror(errno)); +} + +return result; +out_bad: +return NULL; +} + static int va_accept(int listen_fd) { struct sockaddr_in saddr; struct sockaddr *addr; @@ -156,6 +212,8 @@ static RPCFunction guest_functions[] = { .func_name = getfile }, { .func = getdmesg, .func_name = getdmesg }, +{ .func = va_shutdown, + .func_name = va_shutdown }, { NULL, NULL } }; static RPCFunction host_functions[] = { -- 1.7.0.4
[Qemu-devel] [RFC][PATCH v4 12/18] virtagent: add agent_ping monitor command
Monitor command to ping the RPC server. Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- hmp-commands.hx | 16 ++ qmp-commands.hx | 32 + virtagent.c | 84 +++ virtagent.h |3 ++ 4 files changed, 135 insertions(+), 0 deletions(-) diff --git a/hmp-commands.hx b/hmp-commands.hx index 3250e41..d3f642f 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1260,6 +1260,22 @@ STEXI Shutdown/reboot a guest locally ETEXI +{ +.name = agent_ping, +.args_type = , +.params = , +.help = Ping a guest, +.user_print = do_agent_ping_print, +.mhandler.cmd_async = do_agent_ping, +.flags = MONITOR_CMD_ASYNC, +}, + +STEXI +...@item agent_ping +...@findex agent_ping +Ping a guest +ETEXI + STEXI @end table ETEXI diff --git a/qmp-commands.hx b/qmp-commands.hx index 0f983cc..1e798f5 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -838,6 +838,38 @@ Example: EQMP { +.name = agent_ping, +.args_type = , +.params = , +.help = Ping a guest, +.user_print = do_agent_ping_print, +.mhandler.cmd_async = do_agent_ping, +.flags = MONITOR_CMD_ASYNC, +}, + +STEXI +...@item agent_ping +...@findex agent_ping +Ping a guest +ETEXI +SQMP +agent_ping + + +Ping a guest + +Arguments: + +(none) + +Example: + +- { execute: agent_ping } +- { return: { response:ok } } + +EQMP + +{ .name = qmp_capabilities, .args_type = , .params = , diff --git a/virtagent.c b/virtagent.c index a56aeac..9071131 100644 --- a/virtagent.c +++ b/virtagent.c @@ -503,3 +503,87 @@ int do_agent_shutdown(Monitor *mon, const QDict *mon_params, return 0; } + +void do_agent_ping_print(Monitor *mon, const QObject *data) +{ +QDict *qdict; +const char *response; + +TRACE(called); + +qdict = qobject_to_qdict(data); +response = qdict_get_str(qdict, response); +if (qdict_haskey(qdict, response)) { +monitor_printf(mon, %s, response); +} + +monitor_printf(mon, \n); +} + +static void do_agent_ping_cb(void *opaque) +{ +VARPCData *rpc_data = opaque; +xmlrpc_value *resp = NULL; +xmlrpc_env env; +QDict *qdict = qdict_new(); + +TRACE(called); + +if (rpc_data-status != VA_RPC_STATUS_OK) { +LOG(error handling RPC request); +qdict_put(qdict, response, qstring_from_str(error)); +goto out_no_resp; +} + +xmlrpc_env_init(env); +resp = xmlrpc_parse_response(env, rpc_data-resp_xml, + rpc_data-resp_xml_len); +if (rpc_has_error(env)) { +qdict_put(qdict, response, qstring_from_str(error)); +goto out_no_resp; +} +qdict_put(qdict, response, qstring_from_str(ok)); + +xmlrpc_DECREF(resp); +out_no_resp: +if (rpc_data-mon_cb) { +rpc_data-mon_cb(rpc_data-mon_data, QOBJECT(qdict)); +} +qobject_decref(QOBJECT(qdict)); +} + +/* + * do_agent_ping(): Ping a guest + */ +int do_agent_ping(Monitor *mon, const QDict *mon_params, + MonitorCompletion cb, void *opaque) +{ +xmlrpc_env env; +xmlrpc_value *params; +VARPCData *rpc_data; +int ret; + +xmlrpc_env_init(env); + +params = xmlrpc_build_value(env, (n)); +if (rpc_has_error(env)) { +return -1; +} + +rpc_data = qemu_mallocz(sizeof(VARPCData)); +rpc_data-cb = do_agent_ping_cb; +rpc_data-mon_cb = cb; +rpc_data-mon_data = opaque; + +ret = rpc_execute(env, va_ping, params, rpc_data); +if (ret == -EREMOTE) { +monitor_printf(mon, RPC Failed (%i): %s\n, env.fault_code, + env.fault_string); +return -1; +} else if (ret == -1) { +monitor_printf(mon, RPC communication error\n); +return -1; +} + +return 0; +} diff --git a/virtagent.h b/virtagent.h index 96c6260..071530c 100644 --- a/virtagent.h +++ b/virtagent.h @@ -31,5 +31,8 @@ int do_agent_viewdmesg(Monitor *mon, const QDict *mon_params, MonitorCompletion cb, void *opaque); int do_agent_shutdown(Monitor *mon, const QDict *mon_params, MonitorCompletion cb, void *opaque); +void do_agent_ping_print(Monitor *mon, const QObject *qobject); +int do_agent_ping(Monitor *mon, const QDict *mon_params, + MonitorCompletion cb, void *opaque); #endif /* VIRTAGENT_H */ -- 1.7.0.4
[Qemu-devel] [RFC][PATCH v4 16/18] virtagent: add va_send_hello() client function
This tells the host RPC server (QEMU) that we're up and running Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- virtagent.c | 55 +++ virtagent.h |1 + 2 files changed, 56 insertions(+), 0 deletions(-) diff --git a/virtagent.c b/virtagent.c index 4ec1b42..3be9082 100644 --- a/virtagent.c +++ b/virtagent.c @@ -728,3 +728,58 @@ int va_client_init_capabilities(void) return 0; } + +static void va_send_hello_cb(void *opaque) +{ +VARPCData *rpc_data = opaque; +xmlrpc_value *resp = NULL; +xmlrpc_env env; + +TRACE(called); + +if (rpc_data-status != VA_RPC_STATUS_OK) { +LOG(error handling RPC request); +return; +} + +xmlrpc_env_init(env); +resp = xmlrpc_parse_response(env, rpc_data-resp_xml, + rpc_data-resp_xml_len); +if (rpc_has_error(env)) { +LOG(error parsing RPC response); +return; +} + +xmlrpc_DECREF(resp); +} + +int va_send_hello(void) +{ +xmlrpc_env env; +xmlrpc_value *params; +VARPCData *rpc_data; +int ret; + +TRACE(called); + +xmlrpc_env_init(env); +params = xmlrpc_build_value(env, (s), dummy); +if (rpc_has_error(env)) { +return -1; +} + +rpc_data = qemu_mallocz(sizeof(VARPCData)); +rpc_data-cb = va_send_hello_cb; + +ret = rpc_execute(env, va_hello, params, rpc_data); +if (ret == -EREMOTE) { +LOG(RPC Failed (%i): %s, env.fault_code, +env.fault_string); +return -1; +} else if (ret == -1) { +LOG(RPC communication error); +return -1; +} + +return 0; +} diff --git a/virtagent.h b/virtagent.h index da4be60..83033e3 100644 --- a/virtagent.h +++ b/virtagent.h @@ -38,5 +38,6 @@ int do_agent_ping(Monitor *mon, const QDict *mon_params, void do_agent_capabilities_print(Monitor *mon, const QObject *qobject); int do_agent_capabilities(Monitor *mon, const QDict *mon_params, MonitorCompletion cb, void *opaque); +int va_send_hello(void); #endif /* VIRTAGENT_H */ -- 1.7.0.4
[Qemu-devel] [RFC][PATCH v4 13/18] virtagent: add agent_capabilities monitor function
Call guest agent's built-in introspection functions to get a list of supported RPCs, and re-negotiate guest agent capabilities to determine what agent_* commands are supported. Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- hmp-commands.hx | 16 qmp-commands.hx | 32 virtagent.c | 107 +++ virtagent.h |3 ++ 4 files changed, 158 insertions(+), 0 deletions(-) diff --git a/hmp-commands.hx b/hmp-commands.hx index d3f642f..d68c1ba 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1276,6 +1276,22 @@ STEXI Ping a guest ETEXI +{ +.name = agent_capabilities, +.args_type = , +.params = , +.help = Fetch and re-negotiate guest agent capabilities, +.user_print = do_agent_capabilities_print, +.mhandler.cmd_async = do_agent_capabilities, +.flags = MONITOR_CMD_ASYNC, +}, + +STEXI +...@item agent_capabilities +...@findex agent_capabilities +Fetch and re-negotiate guest agent capabilties +ETEXI + STEXI @end table ETEXI diff --git a/qmp-commands.hx b/qmp-commands.hx index 1e798f5..4ae0890 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -870,6 +870,38 @@ Example: EQMP { +.name = agent_capabilities, +.args_type = , +.params = , +.help = Fetch and re-negotiate guest agent capabilities, +.user_print = do_agent_capabilities_print, +.mhandler.cmd_async = do_agent_capabilities, +.flags = MONITOR_CMD_ASYNC, +}, + +STEXI +...@item agent_capabilities +...@findex agent_capabilities +Fetch and re-negotiate guest agent capabilties +ETEXI +SQMP +agent_capabilities + + +Fetch and re-negotiate guest agent capabilities + +Arguments: + +(none) + +Example: + +- { execute: agent_capabilities } +- { return:[va.shutdown, va.getdmesg, va.getfile, ... ] } + +EQMP + +{ .name = qmp_capabilities, .args_type = , .params = , diff --git a/virtagent.c b/virtagent.c index 9071131..e0f7f99 100644 --- a/virtagent.c +++ b/virtagent.c @@ -587,3 +587,110 @@ int do_agent_ping(Monitor *mon, const QDict *mon_params, return 0; } + +static void va_print_capability_iter(QObject *obj, void *opaque) +{ +Monitor *mon = opaque; +QString *method = qobject_to_qstring(obj); +const char *method_str; + +if (method) { +method_str = qstring_get_str(method); +monitor_printf(mon, %s\n, method_str); +} +} + +void do_agent_capabilities_print(Monitor *mon, const QObject *data) +{ +QList *qlist; + +TRACE(called); + +monitor_printf(mon, the following RPC methods are supported by the guest agent:\n); +qlist = qobject_to_qlist(data); +qlist_iter(qlist, va_print_capability_iter, mon); +} + +static void do_agent_capabilities_cb(void *opaque) +{ +VARPCData *rpc_data = opaque; +xmlrpc_value *resp = NULL; +xmlrpc_value *cur_val = NULL; +const char *cur_method = NULL; +xmlrpc_env env; +QList *qlist = qlist_new(); +int i; + +TRACE(called); + +if (rpc_data-status != VA_RPC_STATUS_OK) { +LOG(error handling RPC request); +goto out_no_resp; +} + +TRACE(resp = %s\n, rpc_data-resp_xml); + +xmlrpc_env_init(env); +resp = xmlrpc_parse_response(env, rpc_data-resp_xml, + rpc_data-resp_xml_len); +if (rpc_has_error(env)) { +goto out_no_resp; +} + +/* extract the list of supported RPCs */ +for (i = 0; i xmlrpc_array_size(env, resp); i++) { +xmlrpc_array_read_item(env, resp, i, cur_val); +xmlrpc_read_string(env, cur_val, cur_method); +if (cur_method) { +TRACE(cur_method: %s, cur_method); +qlist_append_obj(qlist, QOBJECT(qstring_from_str(cur_method))); +} +xmlrpc_DECREF(cur_val); +} + +/* set our client capabilities accordingly */ +va_set_capabilities(qlist); + +xmlrpc_DECREF(resp); +out_no_resp: +if (rpc_data-mon_cb) { +rpc_data-mon_cb(rpc_data-mon_data, QOBJECT(qlist)); +} +qobject_decref(QOBJECT(qlist)); +} + +/* + * do_agent_capabilities(): Fetch/re-negotiate guest agent capabilities + */ +int do_agent_capabilities(Monitor *mon, const QDict *mon_params, + MonitorCompletion cb, void *opaque) +{ +xmlrpc_env env; +xmlrpc_value *params; +VARPCData *rpc_data; +int ret; + +xmlrpc_env_init(env); + +params = xmlrpc_build_value(env, ()); +if (rpc_has_error(env)) { +return -1; +} + +rpc_data = qemu_mallocz(sizeof(VARPCData)); +rpc_data-cb = do_agent_capabilities_cb; +rpc_data-mon_cb = cb; +rpc_data-mon_data = opaque; + +ret = rpc_execute(env, system.listMethods, params, rpc_data); +if (ret == -EREMOTE) { +monitor_printf(mon, RPC Failed (%i): %s\n, env.fault_code, +
Re: [Qemu-devel] Is there any approach to setup guest only network
Hi 2010/11/16 郭沐錫 maxgreg13...@gmail.com: Dear all I have set two directory for QEMU. One of my qemu-ifup is as follow(the other ip address is different) #!/bin/sh sudo modprobe tun sudo /sbin/ifconfig $1 up 192.168.2.52 netmask 255.255.255.0 broadcast 192.168.2.255 # IP masquerade sudo echo 1 /proc/sys/net/ipv4/ip_forward sudo /sbin/iptables -N nat sudo /sbin/iptables -t nat -F sudo /sbin/iptables -t nat -A POSTROUTING -s 192.168.2.62 -j MASQUERADE sudo /sbin/iptables -t nat -A POSTROUTING -d 192.168.2.62 -o $1 to me, the scripts look good...regarding the missing eth, are you sure it's not there even if you issue ifconfig -a command? -- regards, Mulyadi Santosa Freelance Linux trainer and consultant blog: the-hydra.blogspot.com training: mulyaditraining.blogspot.com
[Qemu-devel] [RFC][PATCH v4 14/18] virtagent: add client capabilities init function
Non-monitor version of agent_capabilities monitor function. This is called by the local RPC server when it gets a hello from the guest agent to re-negotiate guest agent capabilities. Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- virtagent.c | 34 ++ virtagent.h |1 + 2 files changed, 35 insertions(+), 0 deletions(-) diff --git a/virtagent.c b/virtagent.c index e0f7f99..4ec1b42 100644 --- a/virtagent.c +++ b/virtagent.c @@ -694,3 +694,37 @@ int do_agent_capabilities(Monitor *mon, const QDict *mon_params, return 0; } + +/* non-HMP/QMP RPC client functions */ + +int va_client_init_capabilities(void) +{ +xmlrpc_env env; +xmlrpc_value *params; +VARPCData *rpc_data; +int ret; + +xmlrpc_env_init(env); + +params = xmlrpc_build_value(env, ()); +if (rpc_has_error(env)) { +return -1; +} + +rpc_data = qemu_mallocz(sizeof(VARPCData)); +rpc_data-cb = do_agent_capabilities_cb; +rpc_data-mon_cb = NULL; +rpc_data-mon_data = NULL; + +ret = rpc_execute(env, system.listMethods, params, rpc_data); +if (ret == -EREMOTE) { +LOG(RPC Failed (%i): %s\n, env.fault_code, +env.fault_string); +return -1; +} else if (ret == -1) { +LOG(RPC communication error\n); +return -1; +} + +return 0; +} diff --git a/virtagent.h b/virtagent.h index c10ee35..da4be60 100644 --- a/virtagent.h +++ b/virtagent.h @@ -23,6 +23,7 @@ #define VA_MAX_CHUNK_SIZE 4096 /* max bytes at a time for get/send file */ int va_client_init(VPDriver *vp_drv, bool is_host); +int va_client_init_capabilities(void); void do_agent_viewfile_print(Monitor *mon, const QObject *qobject); int do_agent_viewfile(Monitor *mon, const QDict *mon_params, MonitorCompletion cb, void *opaque); -- 1.7.0.4
[Qemu-devel] [RFC][PATCH v4 15/18] virtagent: add va_hello RPC function
This RPC tells us the guest agent is up and ready, and invokes guest agent capability negotiation Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- virtagent-daemon.c | 19 +++ 1 files changed, 19 insertions(+), 0 deletions(-) diff --git a/virtagent-daemon.c b/virtagent-daemon.c index 137641d..ae306b9 100644 --- a/virtagent-daemon.c +++ b/virtagent-daemon.c @@ -194,6 +194,23 @@ static xmlrpc_value *va_ping(xmlrpc_env *env, return result; } +/* va_hello(): handle client startup notification + * rpc return values: none + */ + +static xmlrpc_value *va_hello(xmlrpc_env *env, + xmlrpc_value *param, + void *user_data) +{ +int ret = va_client_init_capabilities(); +TRACE(called); +SLOG(va_hello()); +if (ret 0) { +LOG(error setting initializing client capabilities); +} +return NULL; +} + static int va_accept(int listen_fd) { struct sockaddr_in saddr; struct sockaddr *addr; @@ -234,6 +251,8 @@ static RPCFunction guest_functions[] = { static RPCFunction host_functions[] = { { .func = va_ping, .func_name = va_ping }, +{ .func = va_hello, + .func_name = va_hello }, { NULL, NULL } }; -- 1.7.0.4
[Qemu-devel] Re: [RFC PATCH 8/8] device-assignment: pass through and stub more PCI caps
On Fri, Nov 12, 2010 at 08:42:38AM -0700, Alex Williamson wrote: On Fri, 2010-11-12 at 11:11 +0200, Michael S. Tsirkin wrote: On Thu, Nov 11, 2010 at 11:30:07PM -0700, Alex Williamson wrote: On Fri, 2010-11-12 at 07:36 +0200, Michael S. Tsirkin wrote: On Thu, Nov 11, 2010 at 07:56:46PM -0700, Alex Williamson wrote: Some drivers depend on finding capabilities like power management, PCI express/X, vital product data, or vendor specific fields. Now that we have better capability support, we can pass more of these tables through to the guest. Note that VPD and VNDR are direct pass through capabilies, the rest are mostly empty shells with a few writable bits where necessary. Signed-off-by: Alex Williamson alex.william...@redhat.com --- hw/device-assignment.c | 160 +--- 1 files changed, 149 insertions(+), 11 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index 179c7dc..1b228ad 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -366,6 +366,27 @@ static uint8_t assigned_dev_pci_read_byte(PCIDevice *d, int pos) return (uint8_t)assigned_dev_pci_read(d, pos, 1); } +static void assigned_dev_pci_write(PCIDevice *d, int pos, uint32_t val, int len) +{ +AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev); +ssize_t ret; +int fd = pci_dev-real_device.config_fd; + +again: +ret = pwrite(fd, val, len, pos); +if (ret != len) { + if ((ret 0) (errno == EINTR || errno == EAGAIN)) + goto again; do {} while() ? Sure, this is just a copy of another place that does something similar. They should either be merged or both converted in a separate patch. + + fprintf(stderr, %s: pwrite failed, ret = %zd errno = %d\n, + __func__, ret, errno); + + exit(1); +} + +return; +} + static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap) { int id; @@ -1244,37 +1265,75 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos) #endif #endif +static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev, +uint8_t cap_id, +uint32_t address, int len) +{ +uint8_t cap; + +switch (cap_id) { + +case PCI_CAP_ID_VPD: +cap = pci_find_capability(pci_dev, cap_id); +if (address - cap = PCI_CAP_FLAGS) { +return assigned_dev_pci_read(pci_dev, address, len); +} +break; + +case PCI_CAP_ID_VNDR: +cap = pci_find_capability(pci_dev, cap_id); +if (address - cap PCI_CAP_FLAGS) { +return assigned_dev_pci_read(pci_dev, address, len); +} +break; +} + +return pci_default_cap_read_config(pci_dev, cap_id, address, len); +} + static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint8_t cap_id, uint32_t address, uint32_t val, int len) { +uint8_t cap; + pci_default_cap_write_config(pci_dev, cap_id, address, val, len); switch (cap_id) { #ifdef KVM_CAP_IRQ_ROUTING case PCI_CAP_ID_MSI: #ifdef KVM_CAP_DEVICE_MSI -{ -uint8_t cap = pci_find_capability(pci_dev, cap_id); -if (ranges_overlap(address - cap, len, PCI_MSI_FLAGS, 1)) { -assigned_dev_update_msi(pci_dev, cap + PCI_MSI_FLAGS); -} +cap = pci_find_capability(pci_dev, cap_id); +if (ranges_overlap(address - cap, len, PCI_MSI_FLAGS, 1)) { +assigned_dev_update_msi(pci_dev, cap + PCI_MSI_FLAGS); } #endif break; case PCI_CAP_ID_MSIX: #ifdef KVM_CAP_DEVICE_MSIX -{ -uint8_t cap = pci_find_capability(pci_dev, cap_id); -if (ranges_overlap(address - cap, len, PCI_MSIX_FLAGS + 1, 1)) { -assigned_dev_update_msix(pci_dev, cap + PCI_MSIX_FLAGS); -} +cap = pci_find_capability(pci_dev, cap_id); +if (ranges_overlap(address - cap, len, PCI_MSIX_FLAGS + 1, 1)) { +assigned_dev_update_msix(pci_dev, cap + PCI_MSIX_FLAGS); } #endif break;
Re: [Qemu-devel] [PATCH] Introduce -accel command option.
On Mon, 15 Nov 2010, Anthony Liguori wrote: On 11/15/2010 09:45 AM, anthony.per...@citrix.com wrote: From: Anthony PERARDanthony.per...@citrix.com This option gives the ability to switch one accelerator like kvm, xen or the default one tcg. We can specify more than one accelerator by separate them by a comma. QEMU will try each one and use the first whose works. So, -accel xen,kvm,tcg which would try Xen support first, then KVM and finaly tcg if none of the other works. Should use QemuOpts instead of parsing by hand. Ok for that. I'd rather it be presented as a -machine option too with accel=xen:kvm:tcg to specify order. This is not clear to me, did you mean that you prefer to have both -accel accels and -machine accel=accels options? For the -machine options, I can put it in qemu-config.c and it will be saved. Regards, -- Anthony PERARD
[Qemu-devel] [RFC][PATCH v4 18/18] virtagent: Makefile/configure changes to build virtagent bits
Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- Makefile|2 +- Makefile.target |2 +- configure | 25 + 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index b4f93eb..f20032f 100644 --- a/Makefile +++ b/Makefile @@ -137,7 +137,7 @@ qemu-nbd$(EXESUF): qemu-nbd.o qemu-tool.o qemu-error.o $(oslib-obj-y) $(trace-ob qemu-io$(EXESUF): qemu-io.o cmd.o qemu-tool.o qemu-error.o $(oslib-obj-y) $(trace-obj-y) $(block-obj-y) $(qobject-obj-y) $(version-obj-y) qemu-timer-common.o -qemu-vp$(EXESUF): qemu-vp.o virtproxy.o qemu-tool.o qemu-error.o qemu-sockets.c $(oslib-obj-y) $(trace-obj-y) $(block-obj-y) $(qobject-obj-y) $(version-obj-y) qemu-timer-common.o +qemu-vp$(EXESUF): qemu-vp.o virtproxy.o virtagent.o virtagent-daemon.o virtagent-common.o qemu-tool.o qemu-error.o qemu-sockets.c $(oslib-obj-y) $(trace-obj-y) $(block-obj-y) $(qobject-obj-y) $(version-obj-y) qemu-timer-common.o qemu-img-cmds.h: $(SRC_PATH)/qemu-img-cmds.hx $(call quiet-command,sh $(SRC_PATH)/hxtool -h $ $@, GEN $@) diff --git a/Makefile.target b/Makefile.target index f08c435..829332c 100644 --- a/Makefile.target +++ b/Makefile.target @@ -164,7 +164,7 @@ endif #CONFIG_BSD_USER # System emulator target ifdef CONFIG_SOFTMMU -obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o virtproxy.o virtproxy-builtin.o +obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o balloon.o virtproxy.o virtproxy-builtin.o virtagent.o virtagent-daemon.o virtagent-common.o # virtio has to be here due to weird dependency between PCI and virtio-net. # need to fix this properly obj-y += virtio-blk.o virtio-balloon.o virtio-net.o virtio-serial-bus.o diff --git a/configure b/configure index 01bde83..67f5ed2 100755 --- a/configure +++ b/configure @@ -1264,6 +1264,31 @@ EOF fi ## +# xmlrpc-c probe + +# Look for the xmlrpc-c config program +if test -n $cross_prefix has ${cross_prefix}xmlrpc-c-config; then + xmlrpccconfig=${cross_prefix}xmlrpc-c-config +elif has xmlrpc-c-config; then + xmlrpccconfig=xmlrpc-c-config +else + feature_not_found xmlrpc-c +fi + +cat $TMPC EOF +#include xmlrpc.h +int main(void) { xmlrpc_env env; xmlrpc_env_init(env); return 0; } +EOF +xmlrpc_cflags=`$xmlrpccconfig --cflags 2 /dev/null` +xmlrpc_libs=`$xmlrpccconfig client server-util --libs 2 /dev/null` +if compile_prog $xmlrpc_cflags $xmlrpc_libs; then + libs_softmmu=$xmlrpc_libs $libs_softmmu + libs_tools=$xmlrpc_libs $libs_tools +else + feature_not_found xmlrpc-c +fi + +## # VNC TLS detection if test $vnc_tls != no ; then cat $TMPC EOF -- 1.7.0.4
Re: [Qemu-devel] [PATCH] Introduce -accel command option.
On 11/16/2010 10:10 AM, Anthony PERARD wrote: On Mon, 15 Nov 2010, Anthony Liguori wrote: On 11/15/2010 09:45 AM, anthony.per...@citrix.com wrote: From: Anthony PERARDanthony.per...@citrix.com This option gives the ability to switch one accelerator like kvm, xen or the default one tcg. We can specify more than one accelerator by separate them by a comma. QEMU will try each one and use the first whose works. So, -accel xen,kvm,tcg which would try Xen support first, then KVM and finaly tcg if none of the other works. Should use QemuOpts instead of parsing by hand. Ok for that. I'd rather it be presented as a -machine option too with accel=xen:kvm:tcg to specify order. This is not clear to me, did you mean that you prefer to have both -accel accels and -machine accel=accels options? Just -machine accel=accels. Part of my rational is that accelerator is a machine property. If you do -M xenpv it ought to imply -machine accel=xen. Regards, Anthony Liguori For the -machine options, I can put it in qemu-config.c and it will be saved. Regards,
[Qemu-devel] Re: [PATCH comment tweaked] msix: allow byte and word reading from mmio
Am 16.11.2010 14:14, schrieb m...@redhat.com: Although explicitly disallowed by the PCI spec, some guests read a single byte or word from mmio. Likely a guest OS bug, but I have an OS which reads single bytes and it works fine on real hardware. Signed-off-by: Bernhard Kohlbernhard.k...@nsn.com Signed-off-by: Michael S. Tsirkinm...@redhat.com --- OK so it could like something like the below. Yes, this looks good for me. However, my question is: do we need to put this in or can the guest simply be fixed? I tried to locate the code where the readw occurs, but not successful. It only occurs during booting our OS, and the virtio-net driver seems to be OK. With 4 virtio NICs we have the following readw accesses, thats all! 3 accesses per NIC and the first NIC appears twice. MSI-X: msix_mmio_readw dev=0x9767c58 addr=0008 MSI-X: msix_mmio_readw dev=0x9767c58 addr=0018 MSI-X: msix_mmio_readw dev=0x9767c58 addr=0028 MSI-X: msix_mmio_readw dev=0x9772c40 addr=0008 MSI-X: msix_mmio_readw dev=0x9772c40 addr=0018 MSI-X: msix_mmio_readw dev=0x9772c40 addr=0028 MSI-X: msix_mmio_readw dev=0x977dc38 addr=0008 MSI-X: msix_mmio_readw dev=0x977dc38 addr=0018 MSI-X: msix_mmio_readw dev=0x977dc38 addr=0028 MSI-X: msix_mmio_readw dev=0x9788d90 addr=0008 MSI-X: msix_mmio_readw dev=0x9788d90 addr=0018 MSI-X: msix_mmio_readw dev=0x9788d90 addr=0028 MSI-X: msix_mmio_readw dev=0x9767c58 addr=0008 MSI-X: msix_mmio_readw dev=0x9767c58 addr=0018 MSI-X: msix_mmio_readw dev=0x9767c58 addr=0028 Is it possible to add a stack back tace printing to the readw function? hw/msix.c | 31 +++ 1 files changed, 27 insertions(+), 4 deletions(-) diff --git a/hw/msix.c b/hw/msix.c index f66d255..38dff59 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -102,10 +102,28 @@ static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr) return pci_get_long(page + offset); } -static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr) + /* Note: + * PCI spec requires that all MSI-X table accesses are either DWORD or QWORD, + * size aligned. Some guests seem to violate this rule for read accesses, + * performing single byte reads. Since it's easy to support this, let's do so. + * Also support 16 bit size aligned reads, just in case. + */ +static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr) { -fprintf(stderr, MSI-X: only dword read is allowed!\n); -return 0; +PCIDevice *dev = opaque; +unsigned int offset = addr (MSIX_PAGE_SIZE - 1) ~0x1; +void *page = dev-msix_table_page; + +return pci_get_word(page + offset); +} + +static uint32_t msix_mmio_readb(void *opaque, target_phys_addr_t addr) +{ +PCIDevice *dev = opaque; +unsigned int offset = addr (MSIX_PAGE_SIZE - 1); +void *page = dev-msix_table_page; + +return pci_get_byte(page + offset); } static uint8_t msix_pending_mask(int vector) @@ -192,6 +210,11 @@ static void msix_mmio_writel(void *opaque, target_phys_addr_t addr, msix_handle_mask_update(dev, vector); } +/* PCI spec: + * For all accesses to MSI-X Table and MSI-X PBA fields, software must use + * aligned full DWORD or aligned full QWORD transactions; otherwise, the result + * is undefined. + */ static void msix_mmio_write_unallowed(void *opaque, target_phys_addr_t addr, uint32_t val) { @@ -203,7 +226,7 @@ static CPUWriteMemoryFunc * const msix_mmio_write[] = { }; static CPUReadMemoryFunc * const msix_mmio_read[] = { -msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl +msix_mmio_readb, msix_mmio_readw, msix_mmio_readl }; /* Should be called from device's map method. */
[Qemu-devel] [RFC][PATCH v4 17/18] virtagent: qemu-vp, va_send_hello() on startup
Make the hello call on guest agent startup so QEMU can do whatever init it needs (currently, capabilities negotiation). Temporarilly commented due to this tending to induce a virtio bug in RHEL 6.0. As a result capabilities negotiation must be invoked manually from QEMU via the agent_capabilities monitor command. Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com --- qemu-vp.c |2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/qemu-vp.c b/qemu-vp.c index 38959e5..b8af513 100644 --- a/qemu-vp.c +++ b/qemu-vp.c @@ -580,6 +580,8 @@ int main(int argc, char **argv) errx(EXIT_FAILURE, error initializing guest agent); } +/* tell the host the agent is running */ +//va_send_hello(); } /* main i/o loop */ -- 1.7.0.4
Re: [Qemu-devel] [PATCH] Introduce -accel command option.
On Tue, Nov 16, 2010 at 10:41:25AM -0600, Anthony Liguori wrote: On 11/16/2010 10:10 AM, Anthony PERARD wrote: On Mon, 15 Nov 2010, Anthony Liguori wrote: On 11/15/2010 09:45 AM, anthony.per...@citrix.com wrote: From: Anthony PERARDanthony.per...@citrix.com This option gives the ability to switch one accelerator like kvm, xen or the default one tcg. We can specify more than one accelerator by separate them by a comma. QEMU will try each one and use the first whose works. So, -accel xen,kvm,tcg which would try Xen support first, then KVM and finaly tcg if none of the other works. Should use QemuOpts instead of parsing by hand. Ok for that. I'd rather it be presented as a -machine option too with accel=xen:kvm:tcg to specify order. This is not clear to me, did you mean that you prefer to have both -accel accels and -machine accel=accels options? Just -machine accel=accels. Part of my rational is that accelerator is a machine property. If you do -M xenpv it ought to imply -machine accel=xen. Surely, only if it is running on a Xen Dom0. If you use -M xenpv on a KVM host, then -M xenpv should imply -machine accel=kvm (ie it would be using xenner) Regards, Daniel -- |: Red Hat, Engineering, London-o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :| |: http://autobuild.org-o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
[Qemu-devel] Re: [PATCH v3 2/3] virtio-pci: Use ioeventfd for virtqueue notify
On Tue, Nov 16, 2010 at 4:02 PM, Michael S. Tsirkin m...@redhat.com wrote: On Fri, Nov 12, 2010 at 01:24:28PM +, Stefan Hajnoczi wrote: Virtqueue notify is currently handled synchronously in userspace virtio. This prevents the vcpu from executing guest code while hardware emulation code handles the notify. On systems that support KVM, the ioeventfd mechanism can be used to make virtqueue notify a lightweight exit by deferring hardware emulation to the iothread and allowing the VM to continue execution. This model is similar to how vhost receives virtqueue notifies. The result of this change is improved performance for userspace virtio devices. Virtio-blk throughput increases especially for multithreaded scenarios and virtio-net transmit throughput increases substantially. Some virtio devices are known to have guest drivers which expect a notify to be processed synchronously and spin waiting for completion. Only enable ioeventfd for virtio-blk and virtio-net for now. Care must be taken not to interfere with vhost-net, which already uses ioeventfd host notifiers. The following list shows the behavior implemented in this patch and is designed to take vhost-net into account: * VIRTIO_CONFIG_S_DRIVER_OK - assign host notifiers, qemu_set_fd_handler(virtio_pci_host_notifier_read) * !VIRTIO_CONFIG_S_DRIVER_OK - qemu_set_fd_handler(NULL), deassign host notifiers * virtio_pci_set_host_notifier(true) - qemu_set_fd_handler(NULL) * virtio_pci_set_host_notifier(false) - qemu_set_fd_handler(virtio_pci_host_notifier_read) Signed-off-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com --- hw/virtio-pci.c | 152 ++ hw/virtio.c | 14 - hw/virtio.h | 13 + 3 files changed, 153 insertions(+), 26 deletions(-) Now toggles host notifiers based on VIRTIO_CONFIG_S_DRIVER_OK status changes. The cleanest way I could see was to introduce pre and a post set_status() callbacks. They allow a binding to hook status changes, including the status change from virtio_reset(). diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index 549118d..117e855 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -83,6 +83,11 @@ /* Flags track per-device state like workarounds for quirks in older guests. */ #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG (1 0) +/* Performance improves when virtqueue kick processing is decoupled from the + * vcpu thread using ioeventfd for some devices. */ +#define VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT 1 +#define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT) + /* QEMU doesn't strictly need write barriers since everything runs in * lock-step. We'll leave the calls to wmb() in though to make it obvious for * KVM or if kqemu gets SMP support. @@ -179,12 +184,125 @@ static int virtio_pci_load_queue(void * opaque, int n, QEMUFile *f) return 0; } +static int virtio_pci_set_host_notifier_ioeventfd(VirtIOPCIProxy *proxy, + int n, bool assign) +{ + VirtQueue *vq = virtio_get_queue(proxy-vdev, n); + EventNotifier *notifier = virtio_queue_get_host_notifier(vq); + int r; + if (assign) { + r = event_notifier_init(notifier, 1); + if (r 0) { + return r; + } + r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier), + proxy-addr + VIRTIO_PCI_QUEUE_NOTIFY, + n, assign); + if (r 0) { + event_notifier_cleanup(notifier); + } + } else { + r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier), + proxy-addr + VIRTIO_PCI_QUEUE_NOTIFY, + n, assign); + if (r 0) { + return r; + } + event_notifier_cleanup(notifier); + } + return r; +} + +static void virtio_pci_host_notifier_read(void *opaque) +{ + VirtQueue *vq = opaque; + EventNotifier *n = virtio_queue_get_host_notifier(vq); + if (event_notifier_test_and_clear(n)) { + virtio_queue_notify_vq(vq); + } +} + +static void virtio_pci_set_host_notifier_fd_handler(VirtIOPCIProxy *proxy, + int n, bool assign) +{ + VirtQueue *vq = virtio_get_queue(proxy-vdev, n); + EventNotifier *notifier = virtio_queue_get_host_notifier(vq); + if (assign) { + qemu_set_fd_handler(event_notifier_get_fd(notifier), + virtio_pci_host_notifier_read, NULL, vq); + } else { + qemu_set_fd_handler(event_notifier_get_fd(notifier), + NULL, NULL, NULL); + } +} + +static int virtio_pci_set_host_notifiers(VirtIOPCIProxy *proxy, bool assign) +{ + int n, r; + + for (n = 0; n VIRTIO_PCI_QUEUE_MAX;
Re: [Qemu-devel] [PATCH] Introduce -accel command option.
On 16.11.2010, at 17:55, Daniel P. Berrange wrote: On Tue, Nov 16, 2010 at 10:41:25AM -0600, Anthony Liguori wrote: On 11/16/2010 10:10 AM, Anthony PERARD wrote: On Mon, 15 Nov 2010, Anthony Liguori wrote: On 11/15/2010 09:45 AM, anthony.per...@citrix.com wrote: From: Anthony PERARDanthony.per...@citrix.com This option gives the ability to switch one accelerator like kvm, xen or the default one tcg. We can specify more than one accelerator by separate them by a comma. QEMU will try each one and use the first whose works. So, -accel xen,kvm,tcg which would try Xen support first, then KVM and finaly tcg if none of the other works. Should use QemuOpts instead of parsing by hand. Ok for that. I'd rather it be presented as a -machine option too with accel=xen:kvm:tcg to specify order. This is not clear to me, did you mean that you prefer to have both -accel accels and -machine accel=accels options? Just -machine accel=accels. Part of my rational is that accelerator is a machine property. If you do -M xenpv it ought to imply -machine accel=xen. Surely, only if it is running on a Xen Dom0. If you use -M xenpv on a KVM host, then -M xenpv should imply -machine accel=kvm (ie it would be using xenner) Actually, it should imply -machine accel=kvm,tcg :). Accelerators really are not a machine property. In an ideal world, -M pc would just work with xen hvm if -accel xen is given. Alex
Re: [Qemu-devel] [PATCH v2 3/3] trace: enable all events by default
On Thu, Nov 11, 2010 at 9:31 AM, Stefan Hajnoczi stefa...@gmail.com wrote: On Wed, Nov 10, 2010 at 9:20 PM, Lluís xscr...@gmx.net wrote: Blue Swirl writes: On Wed, Nov 10, 2010 at 7:57 PM, Lluís xscr...@gmx.net wrote: Blue Swirl writes: On Wed, Nov 10, 2010 at 5:59 PM, Lluís xscr...@gmx.net wrote: So, my patch is just a matter of having all events available _only_ when you use a backend other than nop. Then the default for simpletrace (and dtrace?) should also be disabled initial state. If you have 1000 tracepoints automatically enabled at start (with simpletrace, not nop), disabling all of them if you are only interested in a few of them is difficult. That's how it is. The patch sets always .state=0 in simple, and dtrace just does not do anything unless you plug something into the probe (similarly with ust). Oh, I missed .state=0 part, it was also not mentioned in the description. Then these changes should be OK. My fault. If these are to be merged I can cook up a new series with a more verbose description. Jan Kiszka suggested a solution for the simple trace backend: have a qemu -trace events=file command-line option that enables the trace events listed in the file on startup. Then we can really get rid of disable. On LTTng UST and SystemTap building with everything in is a good default, the user must explicitly enable probes at runtime with those trace backends anyway. Currently .state = 0 makes the simple backend less usable since it's a drag to manually enable trace events from the monitor every time. Hi Lluis, Just a poke to say I've looked at this patchset. This patchset will make the simple trace backend hard to use. If you implement the -trace events=file command-line option mentioned above then we can consolidate the 'disable' behavior and apply the patchset. If you don't get around to it I'll do it sometime but I'm short on time right now. Wanted to let you know in case you're waiting for more discussion on this thread. Stefan
Re: [Qemu-devel] [PATCH] Introduce -accel command option.
On 11/16/2010 10:55 AM, Daniel P. Berrange wrote: Just -machine accel=accels. Part of my rational is that accelerator is a machine property. If you do -M xenpv it ought to imply -machine accel=xen. Surely, only if it is running on a Xen Dom0. If you use -M xenpv on a KVM host, then -M xenpv should imply -machine accel=kvm (ie it would be using xenner) Xenner is very different than Xen so I'd rather there be a separate machine type. Too much magic makes debugging difficult because it's wildly different code paths even given the same invocation depending on whether you're on a dom0, a domU, or a KVM-enabled normal Linux. Regards, Anthony Liguori Regards, Daniel
[Qemu-devel] Re: Hitting 29 NIC limit
On Thu, Oct 14, 2010 at 05:17:36PM -0500, Anthony Liguori wrote: On 10/14/2010 05:12 PM, Anjali Kulkarni wrote: Thanks. Does this work for e1000 as well? Haven't tried. I don't know how various e1000 drivers would react. Also, does it support pci hotplug? No, but that's fixable down the road. Regards, Anthony Liguori Probably not.
Re: [Qemu-devel] [PATCH] Introduce -accel command option.
On 11/16/2010 10:59 AM, Alexander Graf wrote: Surely, only if it is running on a Xen Dom0. If you use -M xenpv on a KVM host, then -M xenpv should imply -machine accel=kvm (ie it would be using xenner) Actually, it should imply -machine accel=kvm,tcg :). Accelerators really are not a machine property. In an ideal world, -M pc would just work with xen hvm if -accel xen is given. No, an accelerator is both a CPU selection and a machine characteristic. For KVM, we overload -cpu to modify both the KVM CPU and the TCG CPU both this won't work with accel=xen. We probably shouldn't do this with KVM either because there's a significant different between trying to do cpuid masking with KVM and modifying the TCG cpu emulation support. Both KVM and Xen have other impacts on the platform devices though. KVM does not support SMM so it disables that in the i440fx. KVM prefers to use it's own in-kernel local APIC (and IOAPIC). That makes it a property of the machine. Regards, Anthony Liguori Alex
Re: [Qemu-devel] [PATCH] Introduce -accel command option.
On 16.11.2010, at 18:20, Anthony Liguori wrote: On 11/16/2010 10:59 AM, Alexander Graf wrote: Surely, only if it is running on a Xen Dom0. If you use -M xenpv on a KVM host, then -M xenpv should imply -machine accel=kvm (ie it would be using xenner) Actually, it should imply -machine accel=kvm,tcg :). Accelerators really are not a machine property. In an ideal world, -M pc would just work with xen hvm if -accel xen is given. No, an accelerator is both a CPU selection and a machine characteristic. For KVM, we overload -cpu to modify both the KVM CPU and the TCG CPU both this won't work with accel=xen. We probably shouldn't do this with KVM either because there's a significant different between trying to do cpuid masking with KVM and modifying the TCG cpu emulation support. Both KVM and Xen have other impacts on the platform devices though. KVM does not support SMM so it disables that in the i440fx. KVM prefers to use it's own in-kernel local APIC (and IOAPIC). That makes it a property of the machine. So you're saying the machine should define an accel mask of accels it supports? However all this ends up internally, giving the user an easy option to choose accels would still be nice. Users don't use -device or -machine. They want shortcuts :). Alex
[Qemu-devel] Re: Hitting 29 NIC limit
On Thu, Oct 14, 2010 at 02:07:17PM +0200, Avi Kivity wrote: On 10/14/2010 12:54 AM, Anthony Liguori wrote: On 10/13/2010 05:32 PM, Anjali Kulkarni wrote: Hi, Using the legacy way of starting up NICs, I am hitting a limitation after 29 NICs ie no more than 29 are detected (that's because of the 32 PCI slot limit on a single bus- 3 are already taken up) I had initially increased the MAX_NICS to 48, just on my tree, to get to more, but ofcource that wont work. Is there any way to go beyond 29 NICs the legacy way? What is the maximum that can be supported by the qdev mothod? I got up to 104 without trying very hard using the following script: args= for slot in 5 6 7 8 9 10 11 12 13 14 15 16 17; do for fn in 0 1 2 3 4 5 6 7; do args=$args -netdev user,id=eth${slot}_${fn} args=$args -device virtio-net-pci,addr=${slot}.${fn},netdev=eth${slot}_${fn},multifunction=on,romfile= done done x86_64-softmmu/qemu-system-x86_64 -hda ~/images/linux.img ${args} -enable-kvm The key is to make the virtio-net devices multifunction and to fill out all 8 functions for each slot. This is unlikely to work right wrt pci hotplug. If we want to support a large number of interfaces, we need true multiport cards. Bridge support seems to be working mostly fine here. That will let you go up to 256 devices without multifunction. -- MST
Re: [Qemu-devel] [PATCH] Introduce -accel command option.
On 11/16/2010 11:24 AM, Alexander Graf wrote: On 16.11.2010, at 18:20, Anthony Liguori wrote: On 11/16/2010 10:59 AM, Alexander Graf wrote: Surely, only if it is running on a Xen Dom0. If you use -M xenpv on a KVM host, then -M xenpv should imply -machine accel=kvm (ie it would be using xenner) Actually, it should imply -machine accel=kvm,tcg :). Accelerators really are not a machine property. In an ideal world, -M pc would just work with xen hvm if -accel xen is given. No, an accelerator is both a CPU selection and a machine characteristic. For KVM, we overload -cpu to modify both the KVM CPU and the TCG CPU both this won't work with accel=xen. We probably shouldn't do this with KVM either because there's a significant different between trying to do cpuid masking with KVM and modifying the TCG cpu emulation support. Both KVM and Xen have other impacts on the platform devices though. KVM does not support SMM so it disables that in the i440fx. KVM prefers to use it's own in-kernel local APIC (and IOAPIC). That makes it a property of the machine. So you're saying the machine should define an accel mask of accels it supports? However all this ends up internally, giving the user an easy option to choose accels would still be nice. Users don't use -device or -machine. They want shortcuts :). User's want things to just work. That's why -M xenpv should imply -machine accel=xen. A user should never have to specify and accelerator option IMHO. Regards, Anthony Liguori Alex
[Qemu-devel] Re: [PATCH 0/2] msi support for virtfs
On Thu, Nov 11, 2010 at 12:59:24PM +0100, Gerd Hoffmann wrote: Hi, This tiny patch series adds msi support for virtfs. It's two patches only because we need a compat property to stay compatible with -stable and we don't have a pc-0.14 machine type yet, so this is added first. Scared me ... you really mean msix. Acked-by: Michael S. Tsirkin m...@redhat.com please apply, Gerd Gerd Hoffmann (2): pc: add 0.13 pc machine type virtfs: enable MSI-X hw/pc_piix.c| 18 +- hw/virtio-pci.c |5 - 2 files changed, 21 insertions(+), 2 deletions(-)
Re: [Qemu-devel] [PATCH 1/2] Add a DTrace tracing backend targetted for SystemTAP compatability
On 11/16/2010 11:43 AM, Peter Maydell wrote: On 16 November 2010 15:46, Anthony Liguorianth...@codemonkey.ws wrote: On 11/08/2010 01:33 PM, Daniel P. Berrange wrote: This introduces a new tracing backend that targets the SystemTAP implementation of DTrace userspace tracing. Applied both. Thanks. Unfortunately these commits: 2834c3e Add support for generating a systemtap tapset static probes 4addb11 Add a DTrace tracing backend targetted for SystemTAP compatability What's your configure output? I don't have the right environment to build with systemtap support, but --trace-backend=nop should work regardless. Regards, Anthony Liguori seem to have broken building on x86: git clone git://git.qemu.org/qemu.git cd qemu ./configure make fails with LINK i386-softmmu/trace /usr/lib/gcc/x86_64-linux-gnu/4.4.5/../../../../lib/crt1.o: In function `_start': (.text+0x20): undefined reference to `main' collect2: ld returned 1 exit status Incidentally, although trace.c is autogenerated, if you delete it and then type make this does not cause it to be regenerated, which seems wrong to me. -- PMM
Re: [Qemu-devel] [PATCH] Introduce -accel command option.
On 16.11.2010, at 18:27, Anthony Liguori wrote: On 11/16/2010 11:24 AM, Alexander Graf wrote: On 16.11.2010, at 18:20, Anthony Liguori wrote: On 11/16/2010 10:59 AM, Alexander Graf wrote: Surely, only if it is running on a Xen Dom0. If you use -M xenpv on a KVM host, then -M xenpv should imply -machine accel=kvm (ie it would be using xenner) Actually, it should imply -machine accel=kvm,tcg :). Accelerators really are not a machine property. In an ideal world, -M pc would just work with xen hvm if -accel xen is given. No, an accelerator is both a CPU selection and a machine characteristic. For KVM, we overload -cpu to modify both the KVM CPU and the TCG CPU both this won't work with accel=xen. We probably shouldn't do this with KVM either because there's a significant different between trying to do cpuid masking with KVM and modifying the TCG cpu emulation support. Both KVM and Xen have other impacts on the platform devices though. KVM does not support SMM so it disables that in the i440fx. KVM prefers to use it's own in-kernel local APIC (and IOAPIC). That makes it a property of the machine. So you're saying the machine should define an accel mask of accels it supports? However all this ends up internally, giving the user an easy option to choose accels would still be nice. Users don't use -device or -machine. They want shortcuts :). User's want things to just work. That's why -M xenpv should imply -machine accel=xen. No, it should imply -machine accel=xen,kvm,tcg. Which should be the default anyways. For machines that don't work with specific accels, I'd agree that a machine should be able to mask those out. But in general, we'll always get back to defaulting to xen,kvm,tcg. A user should never have to specify and accelerator option IMHO. If things don't work out, he should be able to do things like -no-kvm for bug hunting. In normal use cases, I tend to agree. Things should just work automatically. Alex
[Qemu-devel] [Bug 563582] Re: KVM 9.10 crashes for suse-10 as guest
Please do not file bugs against upstream unless they've been explicitly reproduced against an upstream version. Thanks. ** Changed in: qemu Status: New = Invalid -- KVM 9.10 crashes for suse-10 as guest https://bugs.launchpad.net/bugs/563582 You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. Status in QEMU: Invalid Status in “qemu-kvm” package in Ubuntu: Incomplete Bug description: Binary package hint: qemu-kvm I have tried KVM of Ubuntu 9.10 Karmic,Koala,Version for virtualization; on 64 bit hardware platform I have installed, 4 VM's (Win2k3, TWO WIN-XP's , One Suse-10 Enterprise Edition); I observe that suse-10 virtual machine, crashes after about 2-5 hours; Now after I have installed oracle,in suse-10 PC seems to crash (KVM crash after 1 hour; Anyway to see the logs after the crash. How to analyse such abnormal activities;Pls guide me I read your post on changing the network card interface to e1000,r8139 it does not work for me; Can you please suggest, how to resolve this issue since it is very urgent and important; Many thanks Raghav
Re: [Qemu-devel] [PATCH] Introduce -accel command option.
On Tue, 16 Nov 2010, Anthony Liguori wrote: On 11/16/2010 11:24 AM, Alexander Graf wrote: On 16.11.2010, at 18:20, Anthony Liguori wrote: On 11/16/2010 10:59 AM, Alexander Graf wrote: Surely, only if it is running on a Xen Dom0. If you use -M xenpv on a KVM host, then -M xenpv should imply -machine accel=kvm (ie it would be using xenner) Actually, it should imply -machine accel=kvm,tcg :). Accelerators really are not a machine property. In an ideal world, -M pc would just work with xen hvm if -accel xen is given. No, an accelerator is both a CPU selection and a machine characteristic. For KVM, we overload -cpu to modify both the KVM CPU and the TCG CPU both this won't work with accel=xen. We probably shouldn't do this with KVM either because there's a significant different between trying to do cpuid masking with KVM and modifying the TCG cpu emulation support. Both KVM and Xen have other impacts on the platform devices though. KVM does not support SMM so it disables that in the i440fx. KVM prefers to use it's own in-kernel local APIC (and IOAPIC). That makes it a property of the machine. So you're saying the machine should define an accel mask of accels it supports? However all this ends up internally, giving the user an easy option to choose accels would still be nice. Users don't use -device or -machine. They want shortcuts :). User's want things to just work. That's why -M xenpv should imply -machine accel=xen. Actually, it works like that for qemu-xen, we just specify -M xenpv, and we don't have any --enable-xen or other -accel xen. A user should never have to specify and accelerator option IMHO. -- Anthony PERARD
[Qemu-devel] Re: [PATCHv4 15/15] Pass boot device list to firmware.
2010/11/16 Gleb Natapov g...@redhat.com: On Mon, Nov 15, 2010 at 08:29:24PM +, Blue Swirl wrote: 2010/11/15 Gleb Natapov g...@redhat.com: On Sun, Nov 14, 2010 at 10:50:13PM +, Blue Swirl wrote: On Sun, Nov 14, 2010 at 3:39 PM, Gleb Natapov g...@redhat.com wrote: Signed-off-by: Gleb Natapov g...@redhat.com --- hw/fw_cfg.c | 14 ++ hw/fw_cfg.h | 4 +++- sysemu.h | 1 + vl.c | 51 +++ 4 files changed, 69 insertions(+), 1 deletions(-) diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c index 7b9434f..f6a67db 100644 --- a/hw/fw_cfg.c +++ b/hw/fw_cfg.c @@ -53,6 +53,7 @@ struct FWCfgState { FWCfgFiles *files; uint16_t cur_entry; uint32_t cur_offset; + Notifier machine_ready; }; static void fw_cfg_write(FWCfgState *s, uint8_t value) @@ -315,6 +316,15 @@ int fw_cfg_add_file(FWCfgState *s, const char *filename, uint8_t *data, return 1; } +static void fw_cfg_machine_ready(struct Notifier* n) +{ + uint32_t len; + char *bootindex = get_boot_devices_list(len); + + fw_cfg_add_bytes(container_of(n, FWCfgState, machine_ready), + FW_CFG_BOOTINDEX, (uint8_t*)bootindex, len); +} + FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port, target_phys_addr_t ctl_addr, target_phys_addr_t data_addr) { @@ -343,6 +353,10 @@ FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port, fw_cfg_add_i16(s, FW_CFG_MAX_CPUS, (uint16_t)max_cpus); fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu); + + s-machine_ready.notify = fw_cfg_machine_ready; + qemu_add_machine_init_done_notifier(s-machine_ready); + return s; } diff --git a/hw/fw_cfg.h b/hw/fw_cfg.h index 856bf91..4d61410 100644 --- a/hw/fw_cfg.h +++ b/hw/fw_cfg.h @@ -30,7 +30,9 @@ #define FW_CFG_FILE_FIRST 0x20 #define FW_CFG_FILE_SLOTS 0x10 -#define FW_CFG_MAX_ENTRY (FW_CFG_FILE_FIRST+FW_CFG_FILE_SLOTS) +#define FW_CFG_FILE_LAST_SLOT (FW_CFG_FILE_FIRST+FW_CFG_FILE_SLOTS) +#define FW_CFG_BOOTINDEX (FW_CFG_FILE_LAST_SLOT + 1) +#define FW_CFG_MAX_ENTRY FW_CFG_BOOTINDEX This should be #define FW_CFG_MAX_ENTRY (FW_CFG_BOOTINDEX + 1) because the check is like this: if ((key FW_CFG_ENTRY_MASK) = FW_CFG_MAX_ENTRY) { s-cur_entry = FW_CFG_INVALID; Yeah, will fix. With that change, I got the bootindex passed to OpenBIOS: OpenBIOS for Sparc64 Configuration device id QEMU version 1 machine id 0 kernel cmdline CPUs: 1 x SUNW,UltraSPARC-IIi UUID: ---- bootindex num_strings 1 bootindex /p...@01fe/i...@5/dr...@1/d...@0 The device path does not match exactly, but it's close: /p...@1fe,0/pci-...@5/i...@600/d...@0 pbm-pci should be solvable by the patch at the end. Were in the spec it is allowed to abbreviate 1fe as 1fe,0? Spec allows to drop starting zeroes but TARGET_FMT_plx definition in targphys.h has 0 after %. I can define another one without leading zeroes. Can you suggest a name? I think OpenBIOS for Sparc64 is not correct here, so it may be a bad reference architecture. OBP on a real Ultra-5 used a path like this: /p...@1f,0/p...@1,1/i...@3/d...@0,0 p...@1f,0 specifies the PCI host bridge at UPA bus port ID of 0x1f. According to device name qemu creates pci controller is memory mapped at address 1fe and by looking at the code I can see that this is indeed the case. How is UPA naming works? No idea. p...@1,1 specifies a PCI-PCI bridge. TARGET_FMT_lx is poisoned. As of ATA there is no open firmware binding spec for ATA, so everyone does what he pleases. I based my implementation on what open firmware showing when running on qemu x86. pci-ata should be ide according to PCI binding spec :) Yes, for example there is no ATA in the Ultra-5 tree but in UltraAX it exists: /p...@1f,4000/i...@3/a...@0,0/c...@0,0 diff --git a/hw/apb_pci.c b/hw/apb_pci.c index c619112..643aa49 100644 --- a/hw/apb_pci.c +++ b/hw/apb_pci.c @@ -453,6 +453,7 @@ static PCIDeviceInfo pbm_pci_host_info = { static SysBusDeviceInfo pbm_host_info = { .qdev.name = pbm, + .qdev.fw_name = pci, Perhaps the FW path should use device class names if no name is specified. What do you mean by device class name. We can do something like this: if (dev-child_bus.lh_first) return dev-child_bus.lh_first-info-name; i.e if there is child bus use its bus name as fw name. This will make all pci devices to have pci as fw name automatically. The problem is that theoretically same device can provide different buses. I meant PCI class name, like display for display controllers, network for NICs etc. I'll try Sparc32 to see how this fits
[Qemu-devel] Re: [PATCH] add a command line option to specify the interface to send multicast packets on
On Wed, Nov 10, 2010 at 05:47:35PM -0800, Mike Ryan wrote: Add an option to specify the host interface to send multicast packets on when using a multicast socket for networking. The option takes the name of a host interface (e.g., eth0) and sets the IP_MULTICAST_IF socket option, which causes the packets to use that interface as an egress. This is useful if the host machine has several interfaces with several virtual networks across disparate interfaces. @@ -201,6 +203,23 @@ static int net_socket_mcast_create(struct sockaddr_in *mcastaddr) goto fail; } +/* If an interface name is given, only send packets out that interface */ +if (interface != NULL) { +strncpy(ifr.ifr_name, interface, IFNAMSIZ); +ret = ioctl(fd, SIOCGIFADDR, ifr); +if (ret 0) { +fprintf(stderr, qemu: error: specified interface \%s\ does not exist\n, interface); +goto fail; +} + +maddr = ((struct sockaddr_in *)ifr.ifr_addr)-sin_addr; +ret = setsockopt(fd, IPPROTO_IP, IP_MULTICAST_IF, maddr, sizeof(maddr)); +if (ret 0) { +perror(setsockopt(IP_MULTICAST_IF)); +goto fail; +} Let's let the user pass in the IP address? That would solve the portability issue. Further, we could be doing IPv6, right? So you'd need IPV6_MULTICAST_IF? Also - you might also want to control IP_MULTICAST_LOOP/IPV6_MULTICAST_LOOP? +} + socket_set_nonblock(fd); return fd; fail: @@ -248,7 +267,7 @@ static NetSocketState *net_socket_fd_init_dgram(VLANState *vlan, return NULL; } /* clone dgram socket */ - newfd = net_socket_mcast_create(saddr); + newfd = net_socket_mcast_create(saddr, NULL); if (newfd 0) { /* error already reported by net_socket_mcast_create() */ close(fd); @@ -468,7 +487,8 @@ static int net_socket_connect_init(VLANState *vlan, static int net_socket_mcast_init(VLANState *vlan, const char *model, const char *name, - const char *host_str) + const char *host_str, + const char *interface) { NetSocketState *s; int fd; @@ -478,7 +498,7 @@ static int net_socket_mcast_init(VLANState *vlan, return -1; -fd = net_socket_mcast_create(saddr); +fd = net_socket_mcast_create(saddr, interface); if (fd 0) return -1; @@ -505,8 +525,9 @@ int net_init_socket(QemuOpts *opts, if (qemu_opt_get(opts, listen) || qemu_opt_get(opts, connect) || -qemu_opt_get(opts, mcast)) { -error_report(listen=, connect= and mcast= is invalid with fd=); +qemu_opt_get(opts, mcast) || +qemu_opt_get(opts, interface)) { +error_report(listen=, connect=, mcast= and interface= is invalid with fd=\n); return -1; } @@ -524,8 +545,9 @@ int net_init_socket(QemuOpts *opts, if (qemu_opt_get(opts, fd) || qemu_opt_get(opts, connect) || -qemu_opt_get(opts, mcast)) { -error_report(fd=, connect= and mcast= is invalid with listen=); +qemu_opt_get(opts, mcast) || +qemu_opt_get(opts, interface)) { +error_report(fd=, connect=, mcast= and interface= is invalid with listen=\n); return -1; } @@ -539,8 +561,9 @@ int net_init_socket(QemuOpts *opts, if (qemu_opt_get(opts, fd) || qemu_opt_get(opts, listen) || -qemu_opt_get(opts, mcast)) { -error_report(fd=, listen= and mcast= is invalid with connect=); +qemu_opt_get(opts, mcast) || +qemu_opt_get(opts, interface)) { +error_report(fd=, listen=, mcast= and interface= is invalid with connect=\n); return -1; } @@ -550,7 +573,7 @@ int net_init_socket(QemuOpts *opts, return -1; } } else if (qemu_opt_get(opts, mcast)) { -const char *mcast; +const char *mcast, *interface; if (qemu_opt_get(opts, fd) || qemu_opt_get(opts, connect) || @@ -560,8 +583,9 @@ int net_init_socket(QemuOpts *opts, } mcast = qemu_opt_get(opts, mcast); +interface = qemu_opt_get(opts, interface); -if (net_socket_mcast_init(vlan, socket, name, mcast) == -1) { +if (net_socket_mcast_init(vlan, socket, name, mcast, interface) == -1) { return -1; } } else { diff --git a/qemu-common.h b/qemu-common.h index b3957f1..e8bc4af 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -34,6 +34,7 @@ typedef struct DeviceState DeviceState; #include fcntl.h #include sys/stat.h #include assert.h
[Qemu-devel] Re: [PATCH] spice: add qxl device
On Tue, Nov 02, 2010 at 02:34:58PM +0100, Gerd Hoffmann wrote: +if (ram_size 32 * 1024 * 1024) +ram_size = 32 * 1024 * 1024; +vga_common_init(vga, ram_size); +vga_init(vga); +register_ioport_write(0x3c0, 16, 1, qxl_vga_ioport_write, vga); +register_ioport_write(0x3b4, 2, 1, qxl_vga_ioport_write, vga); +register_ioport_write(0x3d4, 2, 1, qxl_vga_ioport_write, vga); +register_ioport_write(0x3ba, 1, 1, qxl_vga_ioport_write, vga); +register_ioport_write(0x3da, 1, 1, qxl_vga_ioport_write, vga); + +vga-ds = graphic_console_init(qxl_hw_update, qxl_hw_invalidate, + qxl_hw_screen_dump, qxl_hw_text_update, qxl); +qxl-ssd.ds = vga-ds; +qxl-ssd.bufsize = (16 * 1024 * 1024); +qxl-ssd.buf = qemu_malloc(qxl-ssd.bufsize); + +qxl0 = qxl; What happens when this device is then removed? +register_displaychangelistener(vga-ds, display_listener); + +if (qxl-pci.romfile == NULL) { +if (pci_device_id == 0x01ff) { +qxl-pci.romfile = qemu_strdup(vgabios-qxldev.bin); +} else { +qxl-pci.romfile = qemu_strdup(vgabios-qxl.bin); +} +} +pci_config_set_class(config, PCI_CLASS_DISPLAY_VGA); +} else { +if (ram_size 16 * 1024 * 1024) +ram_size = 16 * 1024 * 1024; +qxl-vga.vram_size = ram_size; +qxl-vga.vram_offset = qemu_ram_alloc(qxl-pci.qdev, qxl.vgavram, + qxl-vga.vram_size); +qxl-vga.vram_ptr = qemu_get_ram_ptr(qxl-vga.vram_offset); + +pci_config_set_class(config, PCI_CLASS_DISPLAY_OTHER); So 1st device has device id different from the rest? Why? +} + +pci_config_set_vendor_id(config, REDHAT_PCI_VENDOR_ID); +pci_config_set_device_id(config, pci_device_id); +pci_set_byte(config[PCI_REVISION_ID], pci_device_rev); +pci_set_byte(config[PCI_INTERRUPT_PIN], 1); + +qxl-rom_size = qxl_rom_size(); +qxl-rom_offset = qemu_ram_alloc(qxl-pci.qdev, qxl.vrom, qxl-rom_size); +init_qxl_rom(qxl); +init_qxl_ram(qxl); + +if (qxl-vram_size 16 * 1024 * 1024) { +qxl-vram_size = 16 * 1024 * 1024; +} +if (qxl-revision == 1) { +qxl-vram_size = 4096; +} +qxl-vram_size = msb_mask(qxl-vram_size * 2 - 1); +qxl-vram_offset = qemu_ram_alloc(qxl-pci.qdev, qxl.vram, qxl-vram_size); + +io_size = msb_mask(QXL_IO_RANGE_SIZE * 2 - 1); +if (qxl-revision == 1) { +io_size = 8; +} + +pci_register_bar(qxl-pci, QXL_IO_RANGE_INDEX, + io_size, PCI_BASE_ADDRESS_SPACE_IO, qxl_map); + +pci_register_bar(qxl-pci, QXL_ROM_RANGE_INDEX, + qxl-rom_size, PCI_BASE_ADDRESS_SPACE_MEMORY, + qxl_map); + +pci_register_bar(qxl-pci, QXL_RAM_RANGE_INDEX, + qxl-vga.vram_size, PCI_BASE_ADDRESS_SPACE_MEMORY, + qxl_map); + +pci_register_bar(qxl-pci, QXL_VRAM_RANGE_INDEX, qxl-vram_size, + PCI_BASE_ADDRESS_SPACE_MEMORY, qxl_map); + +qxl-ssd.qxl.base.sif = qxl_interface.base; +qxl-ssd.qxl.id = qxl-id; +qemu_spice_add_interface(qxl-ssd.qxl.base); +qemu_add_vm_change_state_handler(qxl_vm_change_state_handler, qxl); + +init_pipe_signaling(qxl); +qxl_reset_state(qxl); + +device_id++; what happens when this wraps around? Since it's an int probably undefined behaviour ...
Re: [Qemu-devel] Is there any approach to setup guest only network
Dear all Sorry I can't understand what you mean. I know I still have many things to learn. Please can you explain more clearly. Thanks a lot Best Regards, Sn 2010/11/17 Mulyadi Santosa mulyadi.sant...@gmail.com Hi 2010/11/16 郭沐錫 maxgreg13...@gmail.com: Dear all I have set two directory for QEMU. One of my qemu-ifup is as follow(the other ip address is different) #!/bin/sh sudo modprobe tun sudo /sbin/ifconfig $1 up 192.168.2.52 netmask 255.255.255.0 broadcast 192.168.2.255 # IP masquerade sudo echo 1 /proc/sys/net/ipv4/ip_forward sudo /sbin/iptables -N nat sudo /sbin/iptables -t nat -F sudo /sbin/iptables -t nat -A POSTROUTING -s 192.168.2.62 -j MASQUERADE sudo /sbin/iptables -t nat -A POSTROUTING -d 192.168.2.62 -o $1 to me, the scripts look good...regarding the missing eth, are you sure it's not there even if you issue ifconfig -a command? -- regards, Mulyadi Santosa Freelance Linux trainer and consultant blog: the-hydra.blogspot.com training: mulyaditraining.blogspot.com
[Qemu-devel] [PATCH] block: Remove unused s-hd in various drivers
All drivers use bs-file instead of s-hd fpr quite a while now, so it's time to remove s-hd. Signed-off-by: Kevin Wolf kw...@redhat.com --- block/qcow.c |1 - block/qcow2.h |1 - block/vdi.c |1 - block/vmdk.c |1 - block/vpc.c |2 -- 5 files changed, 0 insertions(+), 6 deletions(-) diff --git a/block/qcow.c b/block/qcow.c index 9cd547d..f67d3d3 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -54,7 +54,6 @@ typedef struct QCowHeader { #define L2_CACHE_SIZE 16 typedef struct BDRVQcowState { -BlockDriverState *hd; int cluster_bits; int cluster_size; int cluster_sectors; diff --git a/block/qcow2.h b/block/qcow2.h index 2d22e5e..5217bea 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -79,7 +79,6 @@ typedef struct QCowSnapshot { } QCowSnapshot; typedef struct BDRVQcowState { -BlockDriverState *hd; int cluster_bits; int cluster_size; int cluster_sectors; diff --git a/block/vdi.c b/block/vdi.c index 3b51e53..ab8f70f 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -186,7 +186,6 @@ typedef struct { } VdiHeader; typedef struct { -BlockDriverState *hd; /* The block map entries are little endian (even in memory). */ uint32_t *bmap; /* Size of block (bytes). */ diff --git a/block/vmdk.c b/block/vmdk.c index f505303..2298a75 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -61,7 +61,6 @@ typedef struct { #define L2_CACHE_SIZE 16 typedef struct BDRVVmdkState { -BlockDriverState *hd; int64_t l1_table_offset; int64_t l1_backup_table_offset; uint32_t *l1_table; diff --git a/block/vpc.c b/block/vpc.c index 416f489..21e2a68 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -110,8 +110,6 @@ struct vhd_dyndisk_header { }; typedef struct BDRVVPCState { -BlockDriverState *hd; - uint8_t footer_buf[HEADER_SIZE]; uint64_t free_data_block_offset; int max_table_entries; -- 1.7.2.3
[Qemu-devel] [Bug 676190] [NEW] Latest git fails to link - missing main() routine
Public bug reported: I can't get git 2834c3e0140c3b0ed4422909dfa0607b7213d95d to link on my Debian Linux/x86_64 machine with gcc4.4: All goes well until the link stage: /usr/lib/gcc/x86_64-linux-gnu/4.4.5/../../../../lib/crt1.o: In function `_start': (.text+0x20): undefined reference to `main' collect2: ld returned 1 exit status make[1]: *** [trace] Error 1 make: *** [subdir-i386-softmmu] Error 2 I did make distclean ./configure --enable-linux-aio --enable-io-thread --enable-kvm ** Affects: qemu Importance: Undecided Status: New -- Latest git fails to link - missing main() routine https://bugs.launchpad.net/bugs/676190 You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. Status in QEMU: New Bug description: I can't get git 2834c3e0140c3b0ed4422909dfa0607b7213d95d to link on my Debian Linux/x86_64 machine with gcc4.4: All goes well until the link stage: /usr/lib/gcc/x86_64-linux-gnu/4.4.5/../../../../lib/crt1.o: In function `_start': (.text+0x20): undefined reference to `main' collect2: ld returned 1 exit status make[1]: *** [trace] Error 1 make: *** [subdir-i386-softmmu] Error 2 I did make distclean ./configure --enable-linux-aio --enable-io-thread --enable-kvm
Re: [Qemu-devel] [PATCH] Introduce -accel command option.
On 16.11.2010, at 19:22, Anthony PERARD wrote: On Tue, 16 Nov 2010, Anthony Liguori wrote: On 11/16/2010 11:24 AM, Alexander Graf wrote: On 16.11.2010, at 18:20, Anthony Liguori wrote: On 11/16/2010 10:59 AM, Alexander Graf wrote: Surely, only if it is running on a Xen Dom0. If you use -M xenpv on a KVM host, then -M xenpv should imply -machine accel=kvm (ie it would be using xenner) Actually, it should imply -machine accel=kvm,tcg :). Accelerators really are not a machine property. In an ideal world, -M pc would just work with xen hvm if -accel xen is given. No, an accelerator is both a CPU selection and a machine characteristic. For KVM, we overload -cpu to modify both the KVM CPU and the TCG CPU both this won't work with accel=xen. We probably shouldn't do this with KVM either because there's a significant different between trying to do cpuid masking with KVM and modifying the TCG cpu emulation support. Both KVM and Xen have other impacts on the platform devices though. KVM does not support SMM so it disables that in the i440fx. KVM prefers to use it's own in-kernel local APIC (and IOAPIC). That makes it a property of the machine. So you're saying the machine should define an accel mask of accels it supports? However all this ends up internally, giving the user an easy option to choose accels would still be nice. Users don't use -device or -machine. They want shortcuts :). User's want things to just work. That's why -M xenpv should imply -machine accel=xen. Actually, it works like that for qemu-xen, we just specify -M xenpv, and we don't have any --enable-xen or other -accel xen. Yes, and that's exactly the behavior you'd get :). -accel would default to xen,kvm,tcg and thus -M xenpv on xen defaults to real xen pv code, -M xenpv on kvm would default to xenner w/ kvm and -M xenpv without any accelerator available would just fall back to xenner with tcg. Everyone is happy :). Alex
Re: [Qemu-devel] [PATCH 1/2] Add a DTrace tracing backend targetted for SystemTAP compatability
On 16 November 2010 18:10, Anthony Liguori anth...@codemonkey.ws wrote: On 11/16/2010 11:43 AM, Peter Maydell wrote: Unfortunately these commits: 2834c3e Add support for generating a systemtap tapset static probes 4addb11 Add a DTrace tracing backend targetted for SystemTAP compatability What's your configure output? I've attached it; the trace related bits are: Trace backend nop Trace output file trace-pid I don't have the right environment to build with systemtap support, but --trace-backend=nop should work regardless. I'm using the nop backend, yes. I think the problem is that commit 2834c3e adds a target 'trace:' to the Makefile.target which looks like it's intended to be a phony target. However it isn't marked as such, so make actually tries to create a binary 'trace' by falling back to its default rules (since there's a trace.c in the root directory): petma...@linaroe102767:~/qemu-test/qemu/i386-softmmu$ make -n trace echo CCtrace.o gcc -I/home/petmay01/qemu-test/qemu/slirp -Werror -m64 -I. -I/home/petmay01/qemu-test/qemu -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes -Wredundant-decls -Wall -Wundef -Wendif-labels -Wwrite-strings -Wmissing-prototypes -fno-strict-aliasing -fstack-protector-all -Wempty-body -Wnested-externs -Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers -Wold-style-declaration -Wold-style-definition -Wtype-limits -DHAS_AUDIO -DHAS_AUDIO_CHOICE -I/home/petmay01/qemu-test/qemu/fpu -I/home/petmay01/qemu-test/qemu/tcg -I/home/petmay01/qemu-test/qemu/tcg/i386 -DTARGET_PHYS_ADDR_BITS=32 -I.. -I/home/petmay01/qemu-test/qemu/target-i386 -DNEED_CPU_H -MMD -MP -MT trace.o -MF ./trace.d -O2 -g -c -o trace.o /home/petmay01/qemu-test/qemu/trace.c echo LINK trace gcc -I/home/petmay01/qemu-test/qemu/slirp -Werror -m64 -I. -I/home/petmay01/qemu-test/qemu -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes -Wredundant-decls -Wall -Wundef -Wendif-labels -Wwrite-strings -Wmissing-prototypes -fno-strict-aliasing -fstack-protector-all -Wempty-body -Wnested-externs -Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers -Wold-style-declaration -Wold-style-definition -Wtype-limits -DHAS_AUDIO -DHAS_AUDIO_CHOICE -I/home/petmay01/qemu-test/qemu/fpu -I/home/petmay01/qemu-test/qemu/tcg -I/home/petmay01/qemu-test/qemu/tcg/i386 -DTARGET_PHYS_ADDR_BITS=32 -I.. -I/home/petmay01/qemu-test/qemu/target-i386 -DNEED_CPU_H -O2 -g -Wl,--warn-common -m64 -g -o trace trace.o -lrt -lpthread -lutil -lcurl -lncurses -luuid -lpng -lsasl2 -lgnutls -lSDL -lX11 -laio -lm -lz ...and linking only trace.o into a binary 'trace' fails because trace.c doesn't have a main() (or indeed any functions at all). If I add a .PHONY: trace or change the trace target name to tracexyzzy then this fixes the problem. -- PMM configure.out Description: Binary data
Re: [Qemu-devel] [PATCH] Introduce -accel command option.
On 11/16/2010 12:49 PM, Alexander Graf wrote: On 16.11.2010, at 19:22, Anthony PERARD wrote: On Tue, 16 Nov 2010, Anthony Liguori wrote: On 11/16/2010 11:24 AM, Alexander Graf wrote: On 16.11.2010, at 18:20, Anthony Liguori wrote: On 11/16/2010 10:59 AM, Alexander Graf wrote: Surely, only if it is running on a Xen Dom0. If you use -M xenpv on a KVM host, then -M xenpv should imply -machine accel=kvm (ie it would be using xenner) Actually, it should imply -machine accel=kvm,tcg :). Accelerators really are not a machine property. In an ideal world, -M pc would just work with xen hvm if -accel xen is given. No, an accelerator is both a CPU selection and a machine characteristic. For KVM, we overload -cpu to modify both the KVM CPU and the TCG CPU both this won't work with accel=xen. We probably shouldn't do this with KVM either because there's a significant different between trying to do cpuid masking with KVM and modifying the TCG cpu emulation support. Both KVM and Xen have other impacts on the platform devices though. KVM does not support SMM so it disables that in the i440fx. KVM prefers to use it's own in-kernel local APIC (and IOAPIC). That makes it a property of the machine. So you're saying the machine should define an accel mask of accels it supports? However all this ends up internally, giving the user an easy option to choose accels would still be nice. Users don't use -device or -machine. They want shortcuts :). User's want things to just work. That's why -M xenpv should imply -machine accel=xen. Actually, it works like that for qemu-xen, we just specify -M xenpv, and we don't have any --enable-xen or other -accel xen. Yes, and that's exactly the behavior you'd get :). -accel would default to xen,kvm,tcg No, that's the wrong default. The default should be -accel tcg,[xen|kvm] if anything. That said, if tied into QemuOpts, the default can be specified in the global config file which will make everyone happy. Regards, Anthony Liguori
[Qemu-devel] Re: [PATCH v9 8/8] x3130/downstream: support aer.
On Tue, 2010-11-16 at 17:26 +0900, Isaku Yamahata wrote: add aer support. Signed-off-by: Isaku Yamahata yamah...@valinux.co.jp I'm actually working on a scheme to provide support to handle PCI errors related to assigned devices. The goal is to notify the coresponding driver so that all his I/O access are stop quickly and to provide that same driver a chance to get back in sync with the device. I'm just wondering how can I make use of your aer support in Q35? As you already know, error detection and error recovery phases are driven by a state machine where the next state depends on the returned value from the current callback. Also only the host can performed the link recovery sequence for assigned devices. Because of such it seems like the only way to maintain consistency between the assigned device and it's corresponding driver is to perform the error detection/recovery phase in lockstep with the host? thanks, -Etienne
[Qemu-devel] Re: Hitting 29 NIC limit
On 11/16/2010 11:23 AM, Michael S. Tsirkin wrote: On Thu, Oct 14, 2010 at 05:17:36PM -0500, Anthony Liguori wrote: On 10/14/2010 05:12 PM, Anjali Kulkarni wrote: Thanks. Does this work for e1000 as well? Haven't tried. I don't know how various e1000 drivers would react. Also, does it support pci hotplug? No, but that's fixable down the road. Regards, Anthony Liguori Probably not. FWIW, by fixable, I meant, we could add the ability to hot plug a multi-port NIC device. For instance, we could have -device virtio-net-pci-x8 which would be an 8-port virtio-net card. Regards, Anthony Liguori
Re: [Qemu-devel] [PATCH 1/2] Add a DTrace tracing backend targetted for SystemTAP compatability
On 11/16/2010 12:54 PM, Peter Maydell wrote: On 16 November 2010 18:10, Anthony Liguorianth...@codemonkey.ws wrote: On 11/16/2010 11:43 AM, Peter Maydell wrote: Unfortunately these commits: 2834c3e Add support for generating a systemtap tapset static probes 4addb11 Add a DTrace tracing backend targetted for SystemTAP compatability What's your configure output? I've attached it; the trace related bits are: Trace backend nop Trace output file trace-pid I don't have the right environment to build with systemtap support, but --trace-backend=nop should work regardless. I'm using the nop backend, yes. I think the problem is that commit 2834c3e adds a target 'trace:' to the Makefile.target which looks like it's intended to be a phony target. However it isn't marked as such, so make actually tries to create a binary 'trace' by falling back to its default rules (since there's a trace.c in the root directory): petma...@linaroe102767:~/qemu-test/qemu/i386-softmmu$ make -n trace echo CCtrace.o gcc -I/home/petmay01/qemu-test/qemu/slirp -Werror -m64 -I. -I/home/petmay01/qemu-test/qemu -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes -Wredundant-decls -Wall -Wundef -Wendif-labels -Wwrite-strings -Wmissing-prototypes -fno-strict-aliasing -fstack-protector-all -Wempty-body -Wnested-externs -Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers -Wold-style-declaration -Wold-style-definition -Wtype-limits -DHAS_AUDIO -DHAS_AUDIO_CHOICE -I/home/petmay01/qemu-test/qemu/fpu -I/home/petmay01/qemu-test/qemu/tcg -I/home/petmay01/qemu-test/qemu/tcg/i386 -DTARGET_PHYS_ADDR_BITS=32 -I.. -I/home/petmay01/qemu-test/qemu/target-i386 -DNEED_CPU_H -MMD -MP -MT trace.o -MF ./trace.d -O2 -g -c -o trace.o /home/petmay01/qemu-test/qemu/trace.c echo LINK trace gcc -I/home/petmay01/qemu-test/qemu/slirp -Werror -m64 -I. -I/home/petmay01/qemu-test/qemu -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes -Wredundant-decls -Wall -Wundef -Wendif-labels -Wwrite-strings -Wmissing-prototypes -fno-strict-aliasing -fstack-protector-all -Wempty-body -Wnested-externs -Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers -Wold-style-declaration -Wold-style-definition -Wtype-limits -DHAS_AUDIO -DHAS_AUDIO_CHOICE -I/home/petmay01/qemu-test/qemu/fpu -I/home/petmay01/qemu-test/qemu/tcg -I/home/petmay01/qemu-test/qemu/tcg/i386 -DTARGET_PHYS_ADDR_BITS=32 -I.. -I/home/petmay01/qemu-test/qemu/target-i386 -DNEED_CPU_H -O2 -g -Wl,--warn-common -m64 -g -o trace trace.o -lrt -lpthread -lutil -lcurl -lncurses -luuid -lpng -lsasl2 -lgnutls -lSDL -lX11 -laio -lm -lz ...and linking only trace.o into a binary 'trace' fails because trace.c doesn't have a main() (or indeed any functions at all). If I add a .PHONY: trace or change the trace target name to tracexyzzy then this fixes the problem. Curious, care to send a patch? I think I'm not seeing this because I build with srcdir != objdir. Regards, Anthony Liguori -- PMM
[Qemu-devel] Re: [PATCHv4 15/15] Pass boot device list to firmware.
On Tue, Nov 16, 2010 at 06:30:19PM +, Blue Swirl wrote: Perhaps the FW path should use device class names if no name is specified. What do you mean by device class name. We can do something like this: if (dev-child_bus.lh_first) return dev-child_bus.lh_first-info-name; i.e if there is child bus use its bus name as fw name. This will make all pci devices to have pci as fw name automatically. The problem is that theoretically same device can provide different buses. I meant PCI class name, like display for display controllers, network for NICs etc. That is what my pci bus related patch is doing already. I'll try Sparc32 to see how this fits there. Except bootindex is not implemented for SCSI. Will look into adding it. -- Gleb.
Re: [Qemu-devel] Re: [PATCH] add a command line option to specify the interface to send multicast packets on
On Tue, Nov 16, 2010 at 07:54:58PM +0200, Michael S. Tsirkin wrote: snip Let's let the user pass in the IP address? That would solve the portability issue. Further, we could be doing IPv6, right? So you'd need IPV6_MULTICAST_IF? Also - you might also want to control IP_MULTICAST_LOOP/IPV6_MULTICAST_LOOP? IP address seems like a good alternative and does indeed get rid of the messy platform-specific bits. The multicast code all assumes IPv4. For instance, it uses v4-only parse_host_port to get the multicast destination address. Given that, I'm not going to bother shoehorning in IPV6_MULTICAST_IF.
[Qemu-devel] [PATCH 1/3] qemu-char: Introduce Memory driver
This driver handles in-memory chardev operations. That's, all writes to this driver are stored in an internal buffer and it doesn't talk to the external world in any way. Right now it's very simple: it supports only writes. But it can be easily extended to support more operations. This is going to be used by the monitor's HMP passthrough via QMP feature, which needs to run monitor handlers without a backing device. Signed-off-by: Luiz Capitulino lcapitul...@redhat.com --- qemu-char.c | 64 +++ qemu-char.h |7 ++ 2 files changed, 71 insertions(+), 0 deletions(-) diff --git a/qemu-char.c b/qemu-char.c index 88997f9..edc9ad6 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -2275,6 +2275,70 @@ static CharDriverState *qemu_chr_open_socket(QemuOpts *opts) return NULL; } +/***/ +/* Memory chardev */ +typedef struct { +size_t outbuf_size; +size_t outbuf_capacity; +uint8_t *outbuf; +} MemoryDriver; + +static int mem_chr_write(CharDriverState *chr, const uint8_t *buf, int len) +{ +MemoryDriver *d = chr-opaque; + +/* TODO: the QString implementation has the same code, we should + * introduce a generic way to do this in cutils.c */ +if (d-outbuf_capacity d-outbuf_size + len) { +/* grow outbuf */ +d-outbuf_capacity += len; +d-outbuf_capacity *= 2; +d-outbuf = qemu_realloc(d-outbuf, d-outbuf_capacity); +} + +memcpy(d-outbuf + d-outbuf_size, buf, len); +d-outbuf_size += len; + +return len; +} + +void qemu_chr_init_mem(CharDriverState *chr) +{ +MemoryDriver *d; + +d = qemu_malloc(sizeof(*d)); +d-outbuf_size = 0; +d-outbuf_capacity = 4096; +d-outbuf = qemu_mallocz(d-outbuf_capacity); + +memset(chr, 0, sizeof(*chr)); +chr-opaque = d; +chr-chr_write = mem_chr_write; +} + +QString *qemu_chr_mem_to_qs(CharDriverState *chr) +{ +MemoryDriver *d = chr-opaque; +return qstring_from_substr((char *) d-outbuf, 0, d-outbuf_size - 1); +} + +/* NOTE: this driver can not be closed with qemu_chr_close()! */ +void qemu_chr_close_mem(CharDriverState *chr) +{ +MemoryDriver *d = chr-opaque; + +qemu_free(d-outbuf); +qemu_free(chr-opaque); +chr-opaque = NULL; +chr-chr_write = NULL; +} + +size_t qemu_chr_mem_osize(const CharDriverState *chr) +{ +const MemoryDriver *d = chr-opaque; +return d-outbuf_size; +} + QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename) { char host[65], port[33], width[8], height[8]; diff --git a/qemu-char.h b/qemu-char.h index 18ad12b..e6ee6c4 100644 --- a/qemu-char.h +++ b/qemu-char.h @@ -6,6 +6,7 @@ #include qemu-option.h #include qemu-config.h #include qobject.h +#include qstring.h /* character device */ @@ -100,6 +101,12 @@ CharDriverState *qemu_chr_open_eventfd(int eventfd); extern int term_escape_char; +/* memory chardev */ +void qemu_chr_init_mem(CharDriverState *chr); +void qemu_chr_close_mem(CharDriverState *chr); +QString *qemu_chr_mem_to_qs(CharDriverState *chr); +size_t qemu_chr_mem_osize(const CharDriverState *chr); + /* async I/O support */ int qemu_set_fd_handler2(int fd, -- 1.7.3.2.168.gd6b63
[Qemu-devel] [PATCH v4 0/3]: QMP: Human Monitor passthrough
Simple example: - { execute: human-monitor-command, arguments: { command-line: print /i 10+25 } } - { return: 35\r\n } Please, check individual patches for details. Also note that this series depends on the script improvements one. Also, Markus suggestion of having an assert() in qemu_chr_close() have not been added this series because I don't know what to assert(). But that's an incremental change anyway and should prevent this series from being merged. changelog - v3 - v4 - Simplify qemu_chr_mem_to_qs() (as per Markus review) - Fix qmp-shell not to cache bad CPU index values v2 - v3 - Renamed command name to human-monitor-command - Fixed buggy error reporting when cpu-index is invalid - Make qemu_chr_mem_to_qs() return a string when outbuf is empty - Introduced qemu_chr_mem_osize() along with some cleanups v1 - v2 - A number of small cleanups and clarifications Thanks.
[Qemu-devel] [PATCH 3/3] QMP/qmp-shell: Introduce HMP mode
In which qmp-shell will exclusively use the HMP passthrough feature, this is useful for testing. Example: # ./qmp-shell -H qmp-sock Welcome to the HMP shell! Connected to QEMU 0.13.50 (QEMU) info network VLAN 0 devices: user.0: net=10.0.2.0, restricted=n e1000.0: model=e1000,macaddr=52:54:00:12:34:56 Devices not on any VLAN: (QEMU) Signed-off-by: Luiz Capitulino lcapitul...@redhat.com --- QMP/qmp-shell | 79 - 1 files changed, 78 insertions(+), 1 deletions(-) diff --git a/QMP/qmp-shell b/QMP/qmp-shell index 1fb7e76..42dabc8 100755 --- a/QMP/qmp-shell +++ b/QMP/qmp-shell @@ -145,6 +145,76 @@ class QMPShell(qmp.QEMUMonitorProtocol): else: return self._execute_cmd(cmdline) +class HMPShell(QMPShell): +def __init__(self, address): +QMPShell.__init__(self, address) +self.__cpu_index = 0 + +def __cmd_completion(self): +for cmd in self.__cmd_passthrough('help')['return'].split('\r\n'): +if cmd and cmd[0] != '[' and cmd[0] != '\t': +name = cmd.split()[0] # drop help text +if name == 'info': +continue +if name.find('|') != -1: +# Command in the form 'foobar|f' or 'f|foobar', take the +# full name +opt = name.split('|') +if len(opt[0]) == 1: +name = opt[1] +else: +name = opt[0] +self._completer.append(name) +self._completer.append('help ' + name) # help completion + +def __info_completion(self): +for cmd in self.__cmd_passthrough('info')['return'].split('\r\n'): +if cmd: +self._completer.append('info ' + cmd.split()[1]) + +def __other_completion(self): +# special cases +self._completer.append('help info') + +def _fill_completion(self): +self.__cmd_completion() +self.__info_completion() +self.__other_completion() + +def __cmd_passthrough(self, cmdline, cpu_index = 0): +return self.cmd_obj({ 'execute': 'human-monitor-command', 'arguments': + { 'command-line': cmdline, +'cpu-index': cpu_index } }) + +def _execute_cmd(self, cmdline): +if cmdline.split()[0] == cpu: +# trap the cpu command, it requires special setting +try: +idx = int(cmdline.split()[1]) +if not 'return' in self.__cmd_passthrough('info version', idx): +print 'bad CPU index' +return True +self.__cpu_index = idx +except ValueError: +print 'cpu command takes an integer argument' +return True +resp = self.__cmd_passthrough(cmdline, self.__cpu_index) +if resp is None: +print 'Disconnected' +return False +assert 'return' in resp or 'error' in resp +if 'return' in resp: +# Success +if len(resp['return']) 0: +print resp['return'], +else: +# Error +print '%s: %s' % (resp['error']['class'], resp['error']['desc']) +return True + +def show_banner(self): +QMPShell.show_banner(self, msg='Welcome to the HMP shell!') + def die(msg): sys.stderr.write('ERROR: %s\n' % msg) sys.exit(1) @@ -156,9 +226,16 @@ def fail_cmdline(option=None): sys.exit(1) def main(): +addr = '' try: if len(sys.argv) == 2: qemu = QMPShell(sys.argv[1]) +addr = sys.argv[1] +elif len(sys.argv) == 3: +if sys.argv[1] != '-H': +fail_cmdline(sys.argv[1]) +qemu = HMPShell(sys.argv[2]) +addr = sys.argv[2] else: fail_cmdline() except QMPShellBadPort: @@ -171,7 +248,7 @@ def main(): except qmp.QMPCapabilitiesError: die('Could not negotiate capabilities') except qemu.error: -die('Could not connect to %s' % sys.argv[1]) +die('Could not connect to %s' % addr) qemu.show_banner() while qemu.read_exec_command('(QEMU) '): -- 1.7.3.2.168.gd6b63
[Qemu-devel] [PATCH 2/3] QMP: Introduce Human Monitor passthrough command
This command allows QMP clients to execute HMP commands. Please, check the documentation added to the qmp-commands.hx file for additional details about the interface and its limitations. Signed-off-by: Luiz Capitulino lcapitul...@redhat.com --- monitor.c | 38 ++ qmp-commands.hx | 45 + 2 files changed, 83 insertions(+), 0 deletions(-) diff --git a/monitor.c b/monitor.c index 8cee35d..ec31eac 100644 --- a/monitor.c +++ b/monitor.c @@ -491,6 +491,44 @@ static int do_qmp_capabilities(Monitor *mon, const QDict *params, return 0; } +static int mon_set_cpu(int cpu_index); +static void handle_user_command(Monitor *mon, const char *cmdline); + +static int do_hmp_passthrough(Monitor *mon, const QDict *params, + QObject **ret_data) +{ +int ret = 0; +Monitor *old_mon, hmp; +CharDriverState mchar; + +memset(hmp, 0, sizeof(hmp)); +qemu_chr_init_mem(mchar); +hmp.chr = mchar; + +old_mon = cur_mon; +cur_mon = hmp; + +if (qdict_haskey(params, cpu-index)) { +ret = mon_set_cpu(qdict_get_int(params, cpu-index)); +if (ret 0) { +cur_mon = old_mon; +qerror_report(QERR_INVALID_PARAMETER_VALUE, cpu-index, a CPU number); +goto out; +} +} + +handle_user_command(hmp, qdict_get_str(params, command-line)); +cur_mon = old_mon; + +if (qemu_chr_mem_osize(hmp.chr) 0) { +*ret_data = QOBJECT(qemu_chr_mem_to_qs(hmp.chr)); +} + +out: +qemu_chr_close_mem(hmp.chr); +return ret; +} + static int compare_cmd(const char *name, const char *list) { const char *p, *pstart; diff --git a/qmp-commands.hx b/qmp-commands.hx index 793cf1c..e5f157f 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -761,6 +761,51 @@ Example: Note: This command must be issued before issuing any other command. +EQMP + +{ +.name = human-monitor-command, +.args_type = command-line:s,cpu-index:i?, +.params = , +.help = , +.user_print = monitor_user_noop, +.mhandler.cmd_new = do_hmp_passthrough, +}, + +SQMP +human-monitor-command +- + +Execute a Human Monitor command. + +Arguments: + +- command-line: the command name and its arguments, just like the +Human Monitor's shell (json-string) +- cpu-index: select the CPU number to be used by commands which access CPU + data, like 'info registers'. The Monitor selects CPU 0 if this + argument is not provided (json-int, optional) + +Example: + +- { execute: human-monitor-command, arguments: { command-line: info kvm } } +- { return: kvm support: enabled\r\n } + +Notes: + +(1) The Human Monitor is NOT an stable interface, this means that command +names, arguments and responses can change or be removed at ANY time. +Applications that rely on long term stability guarantees should NOT +use this command + +(2) Limitations: + +o This command is stateless, this means that commands that depend + on state information (such as getfd) might not work + +o Commands that prompt the user for data (eg. 'cont' when the block + device is encrypted) don't currently work + 3. Query Commands = -- 1.7.3.2.168.gd6b63
Re: [Qemu-devel] Re: [Try2][PATCH] Initial implementation of a mpeg1 layer2 streaming audio driver.
Am 15.11.2010 um 22:53 schrieb François Revol: I'm still having a hard time getting sound to work reliably in Haiku guests regardless the output or emulated card. OTH it works perfectly in VirtualBox with almost no cpu overhead. If anyone has a clue... It might be related to the use of RDTSC as primary time source, still it's the same in vbox. Did you check the time drift command line options? Iirc Gleb needed some hacks for timer-related issues on Windows. (does [KVM] even work on OSX ?) No. Andreas
Re: [Qemu-devel] Re: [Try2][PATCH] Initial implementation of a mpeg1 layer2 streaming audio driver.
On 11/16/2010 01:22 PM, Andreas Färber wrote: Am 15.11.2010 um 22:53 schrieb François Revol: I'm still having a hard time getting sound to work reliably in Haiku guests regardless the output or emulated card. OTH it works perfectly in VirtualBox with almost no cpu overhead. If anyone has a clue... It might be related to the use of RDTSC as primary time source, still it's the same in vbox. Did you check the time drift command line options? Iirc Gleb needed some hacks for timer-related issues on Windows. Time drift correction is only applicable to the RTC and PIT and deals with catching up the periodic timers. The issue with the TSC as a time source is that it skips around during CPU migration. The easiest way to work around this is to simply pin your guest to a single physical CPU (assuming you're using KVM). With TCG, it depends, but you can actually end up accessing the host TSC in which case, your SOL because the code in QEMU that uses the host TSC assumes a UP host which aren't all that common anymore :-) Regards, Anthony Liguori (does [KVM] even work on OSX ?) No. Andreas
Re: [Qemu-devel] Fwd: [PATCH v2] Guest OS hangs on usb_add
On 11/16/2010 10:00 AM, Anthony Liguori wrote: On 11/02/2010 09:51 AM, TJ wrote: Doesn't look like this has ever been committed. qemu-kvm-0.13 has just arrived to the portage tree, but I am still having problems with it. I checked the git log and it's not there! Please commit. One off device hacks are concerning because it's basically impossible to review. Why does this work on bare metal? Regards, Anthony Liguori Probably because bare metal USB 2.0 controllers don't give a damn about USB 3 spec. :) My guess is that they ignore the device descriptor length and assume that it's always equal 18. Although the USB 2.0 spec doesn't explicitly say anywhere that it can't be more than 18. IIRC USB 3 even adds some extensions to the device descriptor. And since I wanted my code to be portable and USB 3 ready ;) I rely on the value in dev_descr_len. BTW, this patch is more than just a hack for the device in question. Without this patch qemu simply locks up when I attach the remote and spins in endless loop, because USB parsing is so very primitive. With this patch, USB parsing is done more intelligently and devices with whacky USB descriptors are simply rejected. The hack part is really just 3 lines: +if (dev_descr_len == 0x18 dev-descr[ 8] == 0x47 dev-descr[ 9] == 0x46 + dev-descr[10] == 0x00 dev-descr[11] == 0x30) +dev_descr_len = USB_DT_DEVICE_LEN; /* for buggy MX-950 remote reporting len in hex */ And it is very harmless, as all it does is overwrites the device descriptor length with correct one. If you don't like the hack, you can just remove the 3 lines above and use the rest of the patch. I will just have to remember to manually patch mine every time I upgrade. Your thoughts? -TJ
Re: [Qemu-devel] Fwd: [PATCH v2] Guest OS hangs on usb_add
On 11/16/2010 01:36 PM, TJ wrote: On 11/16/2010 10:00 AM, Anthony Liguori wrote: On 11/02/2010 09:51 AM, TJ wrote: Doesn't look like this has ever been committed. qemu-kvm-0.13 has just arrived to the portage tree, but I am still having problems with it. I checked the git log and it's not there! Please commit. One off device hacks are concerning because it's basically impossible to review. Why does this work on bare metal? Regards, Anthony Liguori Probably because bare metal USB 2.0 controllers don't give a damn about USB 3 spec. :) My guess is that they ignore the device descriptor length and assume that it's always equal 18. Although the USB 2.0 spec doesn't explicitly say anywhere that it can't be more than 18. IIRC USB 3 even adds some extensions to the device descriptor. And since I wanted my code to be portable and USB 3 ready ;) I rely on the value in dev_descr_len. BTW, this patch is more than just a hack for the device in question. Without this patch qemu simply locks up when I attach the remote and spins in endless loop, because USB parsing is so very primitive. With this patch, USB parsing is done more intelligently and devices with whacky USB descriptors are simply rejected. The hack part is really just 3 lines: +if (dev_descr_len == 0x18 dev-descr[ 8] == 0x47 dev-descr[ 9] == 0x46 + dev-descr[10] == 0x00 dev-descr[11] == 0x30) +dev_descr_len = USB_DT_DEVICE_LEN; /* for buggy MX-950 remote reporting len in hex */ And it is very harmless, as all it does is overwrites the device descriptor length with correct one. If you don't like the hack, you can just remove the 3 lines above and use the rest of the patch. I will just have to remember to manually patch mine every time I upgrade. Your thoughts? Yeah, that bit is a bit too gnarly for my tastes, but if you can resend the rest of it with a Signed-off-by, I'd appreciate. Regards, Anthony Liguori -TJ
Re: [Qemu-devel] [PATCH v2 1/2] rtl8139: add vlan tag insertion
On 11/08/2010 07:46 PM, Benjamin Poirier wrote: Add support to the emulated hardware to add vlan tags in packets going from the guest to the network. Signed-off-by: Benjamin Poirierbenjamin.poir...@polymtl.ca Cc: Igor V. Kovalenkoigor.v.kovale...@gmail.com --- Changes since v1: * moved the debug print statement inside the if block and reworded accordingly. (as suggested by Igor) hw/rtl8139.c | 45 ++--- 1 files changed, 34 insertions(+), 11 deletions(-) diff --git a/hw/rtl8139.c b/hw/rtl8139.c index d92981d..b599945 100644 --- a/hw/rtl8139.c +++ b/hw/rtl8139.c @@ -47,6 +47,8 @@ * Darwin) */ +#includenet/ethernet.h + #include hw.h #include pci.h #include qemu-timer.h @@ -58,6 +60,10 @@ #define PCI_FREQUENCY 3300L +/* bytes in VLAN tag */ +#define VLAN_TCI_LEN 2 +#define VLAN_HDR_LEN (ETHER_TYPE_LEN + VLAN_TCI_LEN) + /* debug RTL8139 card C+ mode only */ //#define DEBUG_RTL8139CP 1 @@ -1913,7 +1919,6 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s) cpu_physical_memory_read(cplus_tx_ring_desc,(uint8_t *)val, 4); txdw0 = le32_to_cpu(val); -/* TODO: implement VLAN tagging support, VLAN tag data is read to txdw1 */ cpu_physical_memory_read(cplus_tx_ring_desc+4, (uint8_t *)val, 4); txdw1 = le32_to_cpu(val); cpu_physical_memory_read(cplus_tx_ring_desc+8, (uint8_t *)val, 4); @@ -1925,9 +1930,6 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s) descriptor, txdw0, txdw1, txbufLO, txbufHI)); -/* TODO: the following discard cast should clean clang analyzer output */ -(void)txdw1; - /* w0 ownership flag */ #define CP_TX_OWN (131) /* w0 end of ring flag */ @@ -1951,8 +1953,8 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s) /* w0 bits 0...15 : buffer size */ #define CP_TX_BUFFER_SIZE (116) #define CP_TX_BUFFER_SIZE_MASK (CP_TX_BUFFER_SIZE - 1) -/* w1 tag available flag */ -#define CP_RX_TAGC (117) +/* w1 add tag flag */ +#define CP_TX_TAGC (117) /* w1 bits 0...15 : VLAN tag */ #define CP_TX_VLAN_TAG_MASK ((116) - 1) /* w2 low 32bit of Rx buffer ptr */ @@ -1978,12 +1980,21 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s) DEBUG_PRINT((RTL8139: +++ C+ Tx mode : transmitting from descriptor %d\n, descriptor)); +int vlan_extra_size = 0; if (txdw0 CP_TX_FS) { DEBUG_PRINT((RTL8139: +++ C+ Tx mode : descriptor %d is first segment descriptor\n, descriptor)); /* reset internal buffer offset */ s-cplus_txbuffer_offset = 0; + +if (txdw1 CP_TX_TAGC) +{ +vlan_extra_size = VLAN_HDR_LEN; + +DEBUG_PRINT((RTL8139: +++ C+ Tx mode : inserting vlan tag with +tci: %u\n, bswap16(txdw1 CP_TX_VLAN_TAG_MASK))); +} } int txsize = txdw0 CP_TX_BUFFER_SIZE_MASK; @@ -1992,14 +2003,15 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s) /* make sure we have enough space to assemble the packet */ if (!s-cplus_txbuffer) { -s-cplus_txbuffer_len = CP_TX_BUFFER_SIZE; +s-cplus_txbuffer_len = CP_TX_BUFFER_SIZE + VLAN_HDR_LEN; s-cplus_txbuffer = qemu_malloc(s-cplus_txbuffer_len); s-cplus_txbuffer_offset = 0; DEBUG_PRINT((RTL8139: +++ C+ mode transmission buffer allocated space %d\n, s-cplus_txbuffer_len)); } -while (s-cplus_txbuffer s-cplus_txbuffer_offset + txsize= s-cplus_txbuffer_len) +while (s-cplus_txbuffer s-cplus_txbuffer_offset + txsize + +vlan_extra_size= s-cplus_txbuffer_len) { s-cplus_txbuffer_len += CP_TX_BUFFER_SIZE; s-cplus_txbuffer = qemu_realloc(s-cplus_txbuffer, s-cplus_txbuffer_len); @@ -2025,6 +2037,20 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s) DEBUG_PRINT((RTL8139: +++ C+ mode transmit reading %d bytes from host memory at %016 PRIx64 to offset %d\n, txsize, (uint64_t)tx_addr, s-cplus_txbuffer_offset)); +if (vlan_extra_size txsize= 2 * ETHER_ADDR_LEN) +{ +/* copy addresses */ +cpu_physical_memory_read(tx_addr, s-cplus_txbuffer, 2 * +ETHER_ADDR_LEN); +tx_addr += 2 * ETHER_ADDR_LEN; +txsize -= 2 * ETHER_ADDR_LEN; +/* insert vlan tag */ +*(uint16_t *)(s-cplus_txbuffer + 2 * ETHER_ADDR_LEN) = +cpu_to_be16(ETHERTYPE_VLAN); +*(uint16_t *)(s-cplus_txbuffer + 2 * ETHER_ADDR_LEN + ETHER_TYPE_LEN) += cpu_to_le16(txdw1 CP_TX_VLAN_TAG_MASK); This looks wrong. You check for txsize = 2 * ETHER_ADDR_LEN but then you assign at 2 * ETHER_ADDR_LEN. This is a potential overflow, no? Regards, Anthony Liguori +s-cplus_txbuffer_offset += 2 * ETHER_ADDR_LEN + VLAN_HDR_LEN; +} cpu_physical_memory_read(tx_addr, s-cplus_txbuffer + s-cplus_txbuffer_offset, txsize); s-cplus_txbuffer_offset
[Qemu-devel] [PATCH] Fix compilation failure with simple trace when srcdir==objdir
Fix a makefile error that meant that qemu would not compile if the source and object directories were the same. Signed-off-by: Peter Maydell peter.mayd...@linaro.org --- Makefile.target | 11 +-- 1 files changed, 5 insertions(+), 6 deletions(-) diff --git a/Makefile.target b/Makefile.target index a5e6410..652c7d2 100644 --- a/Makefile.target +++ b/Makefile.target @@ -30,6 +30,7 @@ endif endif PROGS=$(QEMU_PROG) +STPFILES= ifndef CONFIG_HAIKU LIBS+=-lm @@ -41,19 +42,17 @@ config-target.h: config-target.h-timestamp config-target.h-timestamp: config-target.mak ifdef CONFIG_SYSTEMTAP_TRACE -trace: $(QEMU_PROG).stp +STPFILES+=$(QEMU_PROG).stp $(QEMU_PROG).stp: $(call quiet-command,sh $(SRC_PATH)/tracetool \ --$(TRACE_BACKEND) \ --bindir $(bindir) \ --target $(TARGET_ARCH) \ - -s $(SRC_PATH)/trace-events $(QEMU_PROG).stp, GEN $(QEMU_PROG).stp) -else -trace: + -s $(SRC_PATH)/trace-events $@, GEN $@) endif -all: $(PROGS) trace +all: $(PROGS) $(STPFILES) # Dummy command so that make thinks it has done something @true @@ -363,7 +362,7 @@ endif endif ifdef CONFIG_SYSTEMTAP_TRACE $(INSTALL_DIR) $(DESTDIR)$(datadir)/../systemtap/tapset - $(INSTALL_DATA) $(QEMU_PROG).stp $(DESTDIR)$(datadir)/../systemtap/tapset + $(INSTALL_DATA) $(STPFILES) $(DESTDIR)$(datadir)/../systemtap/tapset endif # Include automatically generated dependency files -- 1.7.1
Re: [Qemu-devel] [PATCH] Out off array access in usb-net
On 11/09/2010 04:51 AM, Markus Armbruster wrote: Gleb Natapovg...@redhat.com writes: On Tue, Nov 09, 2010 at 11:16:43AM +0100, Markus Armbruster wrote: Gleb Natapovg...@redhat.com writes: On Tue, Nov 09, 2010 at 10:30:54AM +0100, Markus Armbruster wrote: Gleb Natapovg...@redhat.com writes: Properly check array bounds before accessing array element. Impact? Gapping security hole for those unfortunate enough to use usb-net? Doesn't that bit of information belong in the commit message. Some people prefer not to put such information into commit message. Correct, but does some people include the QEMU maintainers? Anthony? I don't have a strong opinion either way. If there's a CVE, I'd prefer the CVE number was prominent in the commit log but other than that, I'd leave it to the author's discretion. Regards, Anthony Liguori [...]
Re: [Qemu-devel] [PATCH] multiboot: Prevent loading of x86_64 images
On 11/04/2010 05:22 PM, Adam Lackorzynski wrote: A via -kernel supplied x86_64 ELF image is being started in 32bit mode. Detect and exit if a 64bit image has been supplied. Signed-off-by: Adam Lackorzynskia...@os.inf.tu-dresden.de Acked-by: Alexander Grafag...@suse.de Applied. Thanks. Regards, Anthony Liguori --- hw/multiboot.c |6 ++ 1 files changed, 6 insertions(+), 0 deletions(-) diff --git a/hw/multiboot.c b/hw/multiboot.c index f9097a2..e710bbb 100644 --- a/hw/multiboot.c +++ b/hw/multiboot.c @@ -171,6 +171,12 @@ int load_multiboot(void *fw_cfg, uint64_t elf_low, elf_high; int kernel_size; fclose(f); + +if (((struct elf64_hdr*)header)-e_machine == EM_X86_64) { +fprintf(stderr, Cannot load x86-64 image, give a 32bit one.\n); +exit(1); +} + kernel_size = load_elf(kernel_filename, NULL, NULL,elf_entry, elf_low,elf_high, 0, ELF_MACHINE, 0); if (kernel_size 0) {