[PATCH v2 1/1] virtio: Add support for the virtio suspend feature
Add support for the VIRTIO_F_SUSPEND feature. When this feature is negotiated, power management can use it to suspend virtio devices instead of resorting to resetting the devices entirely. Signed-off-by: David Stevens --- drivers/virtio/virtio.c| 60 ++ drivers/virtio/virtio_pci_common.c | 34 - drivers/virtio/virtio_pci_modern.c | 19 ++ include/linux/virtio.h | 8 include/uapi/linux/virtio_config.h | 10 - 5 files changed, 112 insertions(+), 19 deletions(-) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index f4080692b351..c7685a0dc995 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only #include +#include #include #include #include @@ -498,6 +499,13 @@ void unregister_virtio_device(struct virtio_device *dev) } EXPORT_SYMBOL_GPL(unregister_virtio_device); +void virtio_device_mark_removed(struct virtio_device *dev) +{ + /* Pairs with READ_ONCE() in virtio_device_set_suspend_bit(). */ + WRITE_ONCE(dev->removed, true); +} +EXPORT_SYMBOL_GPL(virtio_device_mark_removed); + #ifdef CONFIG_PM_SLEEP int virtio_device_freeze(struct virtio_device *dev) { @@ -580,6 +588,58 @@ int virtio_device_restore(struct virtio_device *dev) return ret; } EXPORT_SYMBOL_GPL(virtio_device_restore); + +static int virtio_device_set_suspend_bit(struct virtio_device *dev, bool enabled) +{ + u8 status, target; + + status = dev->config->get_status(dev); + if (enabled) + target = status | VIRTIO_CONFIG_S_SUSPEND; + else + target = status & ~VIRTIO_CONFIG_S_SUSPEND; + + if (target == status) + return 0; + + dev->config->set_status(dev, target); + + while ((status = dev->config->get_status(dev)) != target) { + if (status & VIRTIO_CONFIG_S_NEEDS_RESET) + return -EIO; + /* Pairs with WRITE_ONCE() in virtio_device_mark_removed(). */ + if (READ_ONCE(dev->removed)) + return -EIO; + msleep(10); + } + return 0; +} + +bool virtio_device_can_suspend(struct virtio_device *dev) +{ + return virtio_has_feature(dev, VIRTIO_F_SUSPEND) && + (dev->config->get_status(dev) & VIRTIO_CONFIG_S_FEATURES_OK); +} +EXPORT_SYMBOL_GPL(virtio_device_can_suspend); + +int virtio_device_suspend(struct virtio_device *dev) +{ + return virtio_device_set_suspend_bit(dev, true); +} +EXPORT_SYMBOL_GPL(virtio_device_suspend); + +bool virtio_device_can_resume(struct virtio_device *dev) +{ + return virtio_has_feature(dev, VIRTIO_F_SUSPEND) && + (dev->config->get_status(dev) & VIRTIO_CONFIG_S_SUSPEND); +} +EXPORT_SYMBOL_GPL(virtio_device_can_resume); + +int virtio_device_resume(struct virtio_device *dev) +{ + return virtio_device_set_suspend_bit(dev, false); +} +EXPORT_SYMBOL_GPL(virtio_device_resume); #endif static int virtio_init(void) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index b655fccaf773..a6ca7718b5dc 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -495,31 +495,26 @@ static int virtio_pci_restore(struct device *dev) return virtio_device_restore(_dev->vdev); } -static bool vp_supports_pm_no_reset(struct device *dev) +static int virtio_pci_suspend(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - u16 pmcsr; - - if (!pci_dev->pm_cap) - return false; - - pci_read_config_word(pci_dev, pci_dev->pm_cap + PCI_PM_CTRL, ); - if (PCI_POSSIBLE_ERROR(pmcsr)) { - dev_err(dev, "Unable to query pmcsr"); - return false; - } + struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); - return pmcsr & PCI_PM_CTRL_NO_SOFT_RESET; -} + if (virtio_device_can_suspend(_dev->vdev)) + return virtio_device_suspend(_dev->vdev); -static int virtio_pci_suspend(struct device *dev) -{ - return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_freeze(dev); + return virtio_pci_freeze(dev); } static int virtio_pci_resume(struct device *dev) { - return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_restore(dev); + struct pci_dev *pci_dev = to_pci_dev(dev); + struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); + + if (virtio_device_can_resume(_dev->vdev)) + return virtio_device_resume(_dev->vdev); + + return virtio_pci_restore(dev); } static const struct dev_pm_ops virtio_pci_pm_ops = { @@ -623,9 +618,12 @@ static void virtio_pci_remove(struct pci_dev *pci_dev) * Device is marked broken on surprise removal so that virtio upper
[PATCH v2 0/1] virtio: Add suspend support
This series implements support for the virtio device suspend feature that is under discussion. Unfortunately, the virtio mailing list is currently being migrated, so recent discussion of the proposal is not archived anywhere. There current version of the proposal is a combination of [1] and [2]. [1] https://lore.kernel.org/all/20230906081637.32185-3-lingshan@intel.com/ [2] https://lists.oasis-open.org/archives/virtio-comment/202402/msg00088.html v1 -> v2: - Check for device removal while waiting for suspend bit. - Don't try to suspend uninitialized deivces. - Use msleep instead of mdelay. David Stevens (1): virtio: Add support for the virtio suspend feature drivers/virtio/virtio.c| 60 ++ drivers/virtio/virtio_pci_common.c | 34 - drivers/virtio/virtio_pci_modern.c | 19 ++ include/linux/virtio.h | 8 include/uapi/linux/virtio_config.h | 10 - 5 files changed, 112 insertions(+), 19 deletions(-) base-commit: e8f897f4afef0031fe618a8e94127a0934896aba -- 2.44.0.769.g3c40516874-goog
[PATCH 1/1] virtio: Add support for the virtio suspend feature
Add support for the VIRTIO_F_SUSPEND feature. When this feature is negotiated, power management can use it to suspend virtio devices instead of resorting to resetting the devices entirely. Signed-off-by: David Stevens --- drivers/virtio/virtio.c| 32 ++ drivers/virtio/virtio_pci_common.c | 29 +++ drivers/virtio/virtio_pci_modern.c | 19 ++ include/linux/virtio.h | 2 ++ include/uapi/linux/virtio_config.h | 10 +- 5 files changed, 74 insertions(+), 18 deletions(-) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index f4080692b351..cd11495a5098 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only #include +#include #include #include #include @@ -580,6 +581,37 @@ int virtio_device_restore(struct virtio_device *dev) return ret; } EXPORT_SYMBOL_GPL(virtio_device_restore); + +static int virtio_device_set_suspend_bit(struct virtio_device *dev, bool enabled) +{ + u8 status, target; + + status = dev->config->get_status(dev); + if (enabled) + target = status | VIRTIO_CONFIG_S_SUSPEND; + else + target = status & ~VIRTIO_CONFIG_S_SUSPEND; + dev->config->set_status(dev, target); + + while ((status = dev->config->get_status(dev)) != target) { + if (status & VIRTIO_CONFIG_S_NEEDS_RESET) + return -EIO; + mdelay(10); + } + return 0; +} + +int virtio_device_suspend(struct virtio_device *dev) +{ + return virtio_device_set_suspend_bit(dev, true); +} +EXPORT_SYMBOL_GPL(virtio_device_suspend); + +int virtio_device_resume(struct virtio_device *dev) +{ + return virtio_device_set_suspend_bit(dev, false); +} +EXPORT_SYMBOL_GPL(virtio_device_resume); #endif static int virtio_init(void) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index b655fccaf773..4d542de05970 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -495,31 +495,26 @@ static int virtio_pci_restore(struct device *dev) return virtio_device_restore(_dev->vdev); } -static bool vp_supports_pm_no_reset(struct device *dev) +static int virtio_pci_suspend(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - u16 pmcsr; - - if (!pci_dev->pm_cap) - return false; - - pci_read_config_word(pci_dev, pci_dev->pm_cap + PCI_PM_CTRL, ); - if (PCI_POSSIBLE_ERROR(pmcsr)) { - dev_err(dev, "Unable to query pmcsr"); - return false; - } + struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); - return pmcsr & PCI_PM_CTRL_NO_SOFT_RESET; -} + if (virtio_has_feature(_dev->vdev, VIRTIO_F_SUSPEND)) + return virtio_device_suspend(_dev->vdev); -static int virtio_pci_suspend(struct device *dev) -{ - return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_freeze(dev); + return virtio_pci_freeze(dev); } static int virtio_pci_resume(struct device *dev) { - return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_restore(dev); + struct pci_dev *pci_dev = to_pci_dev(dev); + struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); + + if (virtio_has_feature(_dev->vdev, VIRTIO_F_SUSPEND)) + return virtio_device_resume(_dev->vdev); + + return virtio_pci_restore(dev); } static const struct dev_pm_ops virtio_pci_pm_ops = { diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index f62b530aa3b5..ac8734526b8d 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -209,6 +209,22 @@ static void vp_modern_avq_deactivate(struct virtio_device *vdev) __virtqueue_break(admin_vq->info.vq); } +static bool vp_supports_pm_no_reset(struct pci_dev *pci_dev) +{ + u16 pmcsr; + + if (!pci_dev->pm_cap) + return false; + + pci_read_config_word(pci_dev, pci_dev->pm_cap + PCI_PM_CTRL, ); + if (PCI_POSSIBLE_ERROR(pmcsr)) { + dev_err(_dev->dev, "Unable to query pmcsr"); + return false; + } + + return pmcsr & PCI_PM_CTRL_NO_SOFT_RESET; +} + static void vp_transport_features(struct virtio_device *vdev, u64 features) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); @@ -223,6 +239,9 @@ static void vp_transport_features(struct virtio_device *vdev, u64 features) if (features & BIT_ULL(VIRTIO_F_ADMIN_VQ)) __virtio_set_bit(vdev, VIRTIO_F_ADMIN_VQ); + + if (features & BIT_ULL(VIRTIO_F_SUSPEND) && vp_supports_pm_no_reset(pci_dev)) + __virtio_set_bit(vdev, VIRTIO_F_SUSPEND); } static i
[PATCH 0/1] virtio: Add suspend support
This series implements support for the virtio device suspend feature that is under discussion. Unfortunately, the virtio mailing list is currently being migrated, so recent discussion of the proposal is not archived anywhere. There current version of the proposal is a combination of [1] and [2]. [1] https://lore.kernel.org/all/20230906081637.32185-3-lingshan@intel.com/ [2] https://lists.oasis-open.org/archives/virtio-comment/202402/msg00088.html David Stevens (1): virtio: Add support for the virtio suspend feature drivers/virtio/virtio.c| 32 ++ drivers/virtio/virtio_pci_common.c | 29 +++ drivers/virtio/virtio_pci_modern.c | 19 ++ include/linux/virtio.h | 2 ++ include/uapi/linux/virtio_config.h | 10 +- 5 files changed, 74 insertions(+), 18 deletions(-) base-commit: e8f897f4afef0031fe618a8e94127a0934896aba -- 2.44.0.683.g7961c838ac-goog
[PATCH v2 0/2] Improvements to virtio_balloon pm
From: David Stevens The virtio_balloon driver uses wakeup sources to allow the guest to enter system power management sleep states (e.g. s2idle) without running the risk of becoming unresponsive to cooperative memory management requests from the host. This series fixes an issue where wakeup sources for inflate/deflate were improperly shared between drivers. It also closes a race where stats requests that come in immediately before a sleep state transition could fail to be handled in a timely manner. v1: https://lore.kernel.org/lkml/20240318091034.535573-1-steve...@google.com/ v1 -> v2: - Add comment about virtio-balloon's wakeup source - Rename virtio-balloon wakeup event macros David Stevens (2): virtio_balloon: Give the balloon its own wakeup source virtio_balloon: Treat stats requests as wakeup events drivers/virtio/virtio_balloon.c | 84 + 1 file changed, 55 insertions(+), 29 deletions(-) base-commit: e8f897f4afef0031fe618a8e94127a0934896aba -- 2.44.0.291.gc1ea87d7ee-goog
[PATCH v2 2/2] virtio_balloon: Treat stats requests as wakeup events
From: David Stevens Treat stats requests as wakeup events to ensure that the driver responds to device requests in a timely manner. Signed-off-by: David Stevens Acked-by: David Hildenbrand --- drivers/virtio/virtio_balloon.c | 75 - 1 file changed, 46 insertions(+), 29 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 89bc8da80519..b09e8e3c62e5 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -121,11 +121,14 @@ struct virtio_balloon { struct page_reporting_dev_info pr_dev_info; /* State for keeping the wakeup_source active while adjusting the balloon */ - spinlock_t adjustment_lock; - bool adjustment_signal_pending; - bool adjustment_in_progress; + spinlock_t wakeup_lock; + bool processing_wakeup_event; + u32 wakeup_signal_mask; }; +#define VIRTIO_BALLOON_WAKEUP_SIGNAL_ADJUST (1 << 0) +#define VIRTIO_BALLOON_WAKEUP_SIGNAL_STATS (1 << 1) + static const struct virtio_device_id id_table[] = { { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID }, { 0 }, @@ -140,6 +143,36 @@ static u32 page_to_balloon_pfn(struct page *page) return pfn * VIRTIO_BALLOON_PAGES_PER_PAGE; } +static void start_wakeup_event(struct virtio_balloon *vb, u32 mask) +{ + unsigned long flags; + + spin_lock_irqsave(>wakeup_lock, flags); + vb->wakeup_signal_mask |= mask; + if (!vb->processing_wakeup_event) { + vb->processing_wakeup_event = true; + pm_stay_awake(>vdev->dev); + } + spin_unlock_irqrestore(>wakeup_lock, flags); +} + +static void process_wakeup_event(struct virtio_balloon *vb, u32 mask) +{ + spin_lock_irq(>wakeup_lock); + vb->wakeup_signal_mask &= ~mask; + spin_unlock_irq(>wakeup_lock); +} + +static void finish_wakeup_event(struct virtio_balloon *vb) +{ + spin_lock_irq(>wakeup_lock); + if (!vb->wakeup_signal_mask && vb->processing_wakeup_event) { + vb->processing_wakeup_event = false; + pm_relax(>vdev->dev); + } + spin_unlock_irq(>wakeup_lock); +} + static void balloon_ack(struct virtqueue *vq) { struct virtio_balloon *vb = vq->vdev->priv; @@ -370,8 +403,10 @@ static void stats_request(struct virtqueue *vq) struct virtio_balloon *vb = vq->vdev->priv; spin_lock(>stop_update_lock); - if (!vb->stop_update) + if (!vb->stop_update) { + start_wakeup_event(vb, VIRTIO_BALLOON_WAKEUP_SIGNAL_STATS); queue_work(system_freezable_wq, >update_balloon_stats_work); + } spin_unlock(>stop_update_lock); } @@ -444,29 +479,10 @@ static void virtio_balloon_queue_free_page_work(struct virtio_balloon *vb) static void start_update_balloon_size(struct virtio_balloon *vb) { - unsigned long flags; - - spin_lock_irqsave(>adjustment_lock, flags); - vb->adjustment_signal_pending = true; - if (!vb->adjustment_in_progress) { - vb->adjustment_in_progress = true; - pm_stay_awake(>vdev->dev); - } - spin_unlock_irqrestore(>adjustment_lock, flags); - + start_wakeup_event(vb, VIRTIO_BALLOON_WAKEUP_SIGNAL_ADJUST); queue_work(system_freezable_wq, >update_balloon_size_work); } -static void end_update_balloon_size(struct virtio_balloon *vb) -{ - spin_lock_irq(>adjustment_lock); - if (!vb->adjustment_signal_pending && vb->adjustment_in_progress) { - vb->adjustment_in_progress = false; - pm_relax(>vdev->dev); - } - spin_unlock_irq(>adjustment_lock); -} - static void virtballoon_changed(struct virtio_device *vdev) { struct virtio_balloon *vb = vdev->priv; @@ -495,7 +511,10 @@ static void update_balloon_stats_func(struct work_struct *work) vb = container_of(work, struct virtio_balloon, update_balloon_stats_work); + + process_wakeup_event(vb, VIRTIO_BALLOON_WAKEUP_SIGNAL_STATS); stats_handle_request(vb); + finish_wakeup_event(vb); } static void update_balloon_size_func(struct work_struct *work) @@ -506,9 +525,7 @@ static void update_balloon_size_func(struct work_struct *work) vb = container_of(work, struct virtio_balloon, update_balloon_size_work); - spin_lock_irq(>adjustment_lock); - vb->adjustment_signal_pending = false; - spin_unlock_irq(>adjustment_lock); + process_wakeup_event(vb, VIRTIO_BALLOON_WAKEUP_SIGNAL_ADJUST); diff = towards_target(vb); @@ -523,7 +540,7 @@ static void update_balloon_size_func(struct work_struct *work) if (diff) queue_work(system_freezable_wq, work); else -
[PATCH v2 1/2] virtio_balloon: Give the balloon its own wakeup source
From: David Stevens Wakeup sources don't support nesting multiple events, so sharing a single object between multiple drivers can result in one driver overriding the wakeup event processing period specified by another driver. Have the virtio balloon driver use the wakeup source of the device it is bound to rather than the wakeup source of the parent device, to avoid conflicts with the transport layer. Note that although the virtio balloon's virtio_device itself isn't what actually wakes up the device, it is responsible for processing wakeup events. In the same way that EPOLLWAKEUP uses a dedicated wakeup_source to prevent suspend when userspace is processing wakeup events, a dedicated wakeup_source is necessary when processing wakeup events in a higher layer in the kernel. Fixes: b12fbc3f787e ("virtio_balloon: stay awake while adjusting balloon") Signed-off-by: David Stevens Acked-by: David Hildenbrand --- drivers/virtio/virtio_balloon.c | 13 +++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 1f5b3dd31fcf..89bc8da80519 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -450,7 +450,7 @@ static void start_update_balloon_size(struct virtio_balloon *vb) vb->adjustment_signal_pending = true; if (!vb->adjustment_in_progress) { vb->adjustment_in_progress = true; - pm_stay_awake(vb->vdev->dev.parent); + pm_stay_awake(>vdev->dev); } spin_unlock_irqrestore(>adjustment_lock, flags); @@ -462,7 +462,7 @@ static void end_update_balloon_size(struct virtio_balloon *vb) spin_lock_irq(>adjustment_lock); if (!vb->adjustment_signal_pending && vb->adjustment_in_progress) { vb->adjustment_in_progress = false; - pm_relax(vb->vdev->dev.parent); + pm_relax(>vdev->dev); } spin_unlock_irq(>adjustment_lock); } @@ -1029,6 +1029,15 @@ static int virtballoon_probe(struct virtio_device *vdev) spin_lock_init(>adjustment_lock); + /* +* The virtio balloon itself can't wake up the device, but it is +* responsible for processing wakeup events passed up from the transport +* layer. Wakeup sources don't support nesting/chaining calls, so we use +* our own wakeup source to ensure wakeup events are properly handled +* without trampling on the transport layer's wakeup source. +*/ + device_set_wakeup_capable(>vdev->dev, true); + virtio_device_ready(vdev); if (towards_target(vb)) -- 2.44.0.291.gc1ea87d7ee-goog
[PATCH 2/2] virtio_balloon: Treat stats requests as wakeup events
From: David Stevens Treat stats requests as wakeup events to ensure that the driver responds to device requests in a timely manner. Signed-off-by: David Stevens --- drivers/virtio/virtio_balloon.c | 75 - 1 file changed, 46 insertions(+), 29 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 7fe7ef5f1c77..402dec98e08c 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -121,11 +121,14 @@ struct virtio_balloon { struct page_reporting_dev_info pr_dev_info; /* State for keeping the wakeup_source active while adjusting the balloon */ - spinlock_t adjustment_lock; - bool adjustment_signal_pending; - bool adjustment_in_progress; + spinlock_t wakeup_lock; + bool processing_wakeup_event; + u32 wakeup_signal_mask; }; +#define ADJUSTMENT_WAKEUP_SIGNAL (1 << 0) +#define STATS_WAKEUP_SIGNAL (1 << 1) + static const struct virtio_device_id id_table[] = { { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID }, { 0 }, @@ -140,6 +143,36 @@ static u32 page_to_balloon_pfn(struct page *page) return pfn * VIRTIO_BALLOON_PAGES_PER_PAGE; } +static void start_wakeup_event(struct virtio_balloon *vb, u32 mask) +{ + unsigned long flags; + + spin_lock_irqsave(>wakeup_lock, flags); + vb->wakeup_signal_mask |= mask; + if (!vb->processing_wakeup_event) { + vb->processing_wakeup_event = true; + pm_stay_awake(>vdev->dev); + } + spin_unlock_irqrestore(>wakeup_lock, flags); +} + +static void process_wakeup_event(struct virtio_balloon *vb, u32 mask) +{ + spin_lock_irq(>wakeup_lock); + vb->wakeup_signal_mask &= ~mask; + spin_unlock_irq(>wakeup_lock); +} + +static void finish_wakeup_event(struct virtio_balloon *vb) +{ + spin_lock_irq(>wakeup_lock); + if (!vb->wakeup_signal_mask && vb->processing_wakeup_event) { + vb->processing_wakeup_event = false; + pm_relax(>vdev->dev); + } + spin_unlock_irq(>wakeup_lock); +} + static void balloon_ack(struct virtqueue *vq) { struct virtio_balloon *vb = vq->vdev->priv; @@ -370,8 +403,10 @@ static void stats_request(struct virtqueue *vq) struct virtio_balloon *vb = vq->vdev->priv; spin_lock(>stop_update_lock); - if (!vb->stop_update) + if (!vb->stop_update) { + start_wakeup_event(vb, STATS_WAKEUP_SIGNAL); queue_work(system_freezable_wq, >update_balloon_stats_work); + } spin_unlock(>stop_update_lock); } @@ -444,29 +479,10 @@ static void virtio_balloon_queue_free_page_work(struct virtio_balloon *vb) static void start_update_balloon_size(struct virtio_balloon *vb) { - unsigned long flags; - - spin_lock_irqsave(>adjustment_lock, flags); - vb->adjustment_signal_pending = true; - if (!vb->adjustment_in_progress) { - vb->adjustment_in_progress = true; - pm_stay_awake(>vdev->dev); - } - spin_unlock_irqrestore(>adjustment_lock, flags); - + start_wakeup_event(vb, ADJUSTMENT_WAKEUP_SIGNAL); queue_work(system_freezable_wq, >update_balloon_size_work); } -static void end_update_balloon_size(struct virtio_balloon *vb) -{ - spin_lock_irq(>adjustment_lock); - if (!vb->adjustment_signal_pending && vb->adjustment_in_progress) { - vb->adjustment_in_progress = false; - pm_relax(>vdev->dev); - } - spin_unlock_irq(>adjustment_lock); -} - static void virtballoon_changed(struct virtio_device *vdev) { struct virtio_balloon *vb = vdev->priv; @@ -495,7 +511,10 @@ static void update_balloon_stats_func(struct work_struct *work) vb = container_of(work, struct virtio_balloon, update_balloon_stats_work); + + process_wakeup_event(vb, STATS_WAKEUP_SIGNAL); stats_handle_request(vb); + finish_wakeup_event(vb); } static void update_balloon_size_func(struct work_struct *work) @@ -506,9 +525,7 @@ static void update_balloon_size_func(struct work_struct *work) vb = container_of(work, struct virtio_balloon, update_balloon_size_work); - spin_lock_irq(>adjustment_lock); - vb->adjustment_signal_pending = false; - spin_unlock_irq(>adjustment_lock); + process_wakeup_event(vb, ADJUSTMENT_WAKEUP_SIGNAL); diff = towards_target(vb); @@ -523,7 +540,7 @@ static void update_balloon_size_func(struct work_struct *work) if (diff) queue_work(system_freezable_wq, work); else - end_update_balloon_size(vb); + finish_wakeup_event(vb); } static
[PATCH 1/2] virtio_balloon: Give the balloon its own wakeup source
From: David Stevens Wakeup sources don't support nesting multiple events, so sharing a single object between multiple drivers can result in one driver overriding the wakeup event processing period specified by another driver. Have the virtio balloon driver use the wakeup source of the device it is bound to rather than the wakeup source of the parent device, to avoid conflicts with the transport layer. Note that although the virtio balloon's virtio_device itself isn't what actually wakes up the device, it is responsible for processing wakeup events. In the same way that EPOLLWAKEUP uses a dedicated wakeup_source to prevent suspend when userspace is processing wakeup events, a dedicated wakeup_source is necessary when processing wakeup events in a higher layer in the kernel. Fixes: b12fbc3f787e ("virtio_balloon: stay awake while adjusting balloon") Signed-off-by: David Stevens --- drivers/virtio/virtio_balloon.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 1f5b3dd31fcf..7fe7ef5f1c77 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -450,7 +450,7 @@ static void start_update_balloon_size(struct virtio_balloon *vb) vb->adjustment_signal_pending = true; if (!vb->adjustment_in_progress) { vb->adjustment_in_progress = true; - pm_stay_awake(vb->vdev->dev.parent); + pm_stay_awake(>vdev->dev); } spin_unlock_irqrestore(>adjustment_lock, flags); @@ -462,7 +462,7 @@ static void end_update_balloon_size(struct virtio_balloon *vb) spin_lock_irq(>adjustment_lock); if (!vb->adjustment_signal_pending && vb->adjustment_in_progress) { vb->adjustment_in_progress = false; - pm_relax(vb->vdev->dev.parent); + pm_relax(>vdev->dev); } spin_unlock_irq(>adjustment_lock); } @@ -1028,6 +1028,7 @@ static int virtballoon_probe(struct virtio_device *vdev) } spin_lock_init(>adjustment_lock); + device_set_wakeup_capable(>vdev->dev, true); virtio_device_ready(vdev); -- 2.44.0.291.gc1ea87d7ee-goog
[PATCH 0/2] Improvements to virtio_balloon pm
From: David Stevens The virtio_balloon driver uses wakeup sources to allow the guest to enter system power management sleep states (e.g. s2idle) without running the risk of becoming unresponsive to cooperative memory management requests from the host. This series fixes an issue where wakeup sources for inflate/deflate were improperly shared between drivers. It also closes a race where stats requests that come in immediately before a sleep state transition could fail to be handled in a timely manner. David Stevens (2): virtio_balloon: Give the balloon its own wakeup source virtio_balloon: Treat stats requests as wakeup events drivers/virtio/virtio_balloon.c | 76 - 1 file changed, 47 insertions(+), 29 deletions(-) base-commit: e8f897f4afef0031fe618a8e94127a0934896aba -- 2.44.0.291.gc1ea87d7ee-goog
[PATCH v3] virtio_balloon: stay awake while adjusting balloon
From: David Stevens A virtio_balloon's parent device may be configured so that a configuration change interrupt is a wakeup event. Extend the processing of such a wakeup event until the balloon finishes inflating or deflating by calling pm_stay_awake/pm_relax in the virtio_balloon driver. Note that these calls are no-ops if the parent device doesn't support wakeup events or if the wakeup events are not enabled. This change allows the guest to use system power states such as s2idle without running the risk the virtio_balloon's cooperative memory management becoming unresponsive to the host's requests. Signed-off-by: David Stevens --- v2->v3: - Use _irq spinlock functions with adjustment_lock in workqueue, since the lock is accessed in an interrupt context. v1 -> v2: - Use adjustment_signal_pending flag instead of a sequence number - Call pm_stay_awake/pm_relax on parent device instead of adding a wake event to the virtio balloon device drivers/virtio/virtio_balloon.c | 57 +++-- 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 1fe93e93f5bc..fa710e6c505a 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -119,6 +119,11 @@ struct virtio_balloon { /* Free page reporting device */ struct virtqueue *reporting_vq; struct page_reporting_dev_info pr_dev_info; + + /* State for keeping the wakeup_source active while adjusting the balloon */ + spinlock_t adjustment_lock; + bool adjustment_signal_pending; + bool adjustment_in_progress; }; static const struct virtio_device_id id_table[] = { @@ -437,6 +442,31 @@ static void virtio_balloon_queue_free_page_work(struct virtio_balloon *vb) queue_work(vb->balloon_wq, >report_free_page_work); } +static void start_update_balloon_size(struct virtio_balloon *vb) +{ + unsigned long flags; + + spin_lock_irqsave(>adjustment_lock, flags); + vb->adjustment_signal_pending = true; + if (!vb->adjustment_in_progress) { + vb->adjustment_in_progress = true; + pm_stay_awake(vb->vdev->dev.parent); + } + spin_unlock_irqrestore(>adjustment_lock, flags); + + queue_work(system_freezable_wq, >update_balloon_size_work); +} + +static void end_update_balloon_size(struct virtio_balloon *vb) +{ + spin_lock_irq(>adjustment_lock); + if (!vb->adjustment_signal_pending && vb->adjustment_in_progress) { + vb->adjustment_in_progress = false; + pm_relax(vb->vdev->dev.parent); + } + spin_unlock_irq(>adjustment_lock); +} + static void virtballoon_changed(struct virtio_device *vdev) { struct virtio_balloon *vb = vdev->priv; @@ -444,8 +474,7 @@ static void virtballoon_changed(struct virtio_device *vdev) spin_lock_irqsave(>stop_update_lock, flags); if (!vb->stop_update) { - queue_work(system_freezable_wq, - >update_balloon_size_work); + start_update_balloon_size(vb); virtio_balloon_queue_free_page_work(vb); } spin_unlock_irqrestore(>stop_update_lock, flags); @@ -476,19 +505,25 @@ static void update_balloon_size_func(struct work_struct *work) vb = container_of(work, struct virtio_balloon, update_balloon_size_work); - diff = towards_target(vb); - if (!diff) - return; + spin_lock_irq(>adjustment_lock); + vb->adjustment_signal_pending = false; + spin_unlock_irq(>adjustment_lock); - if (diff > 0) - diff -= fill_balloon(vb, diff); - else - diff += leak_balloon(vb, -diff); - update_balloon_size(vb); + diff = towards_target(vb); + + if (diff) { + if (diff > 0) + diff -= fill_balloon(vb, diff); + else + diff += leak_balloon(vb, -diff); + update_balloon_size(vb); + } if (diff) queue_work(system_freezable_wq, work); + else + end_update_balloon_size(vb); } static int init_vqs(struct virtio_balloon *vb) @@ -992,6 +1027,8 @@ static int virtballoon_probe(struct virtio_device *vdev) goto out_unregister_oom; } + spin_lock_init(>adjustment_lock); + virtio_device_ready(vdev); if (towards_target(vb)) -- 2.43.0.472.g3155946c3a-goog
Re: REGRESSION: lockdep warning triggered by 15b9ce7ecd: virtio_balloon: stay awake while adjusting balloon
3605] Oh, that's embarrassing, I completely whiffed on interactions with interrupts. The following patch fixes it, and I've locally repro'ed the issue and verified the fix. What's the process for getting this fix merged? Does it get merged as a seperatch patch, or squashed into the original commit? >From a99a1efa6a2b470a98ea2c87e58bebe90ce329a1 Mon Sep 17 00:00:00 2001 From: David Stevens Date: Tue, 9 Jan 2024 14:41:21 +0900 Subject: [PATCH] virtio_balloon: Fix interrupt context deadlock Use _irq spinlock functions with the adjustment_lock, since start_update_balloon_size needs to acquire it in an interrupt context. Fixes: 5b9ce7ecd715 ("virtio_balloon: stay awake while adjusting balloon") Reported-by: Theodore Ts'o Signed-off-by: David Stevens --- drivers/virtio/virtio_balloon.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index aa6a1a649ad6..1f5b3dd31fcf 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -459,12 +459,12 @@ static void start_update_balloon_size(struct virtio_balloon *vb) static void end_update_balloon_size(struct virtio_balloon *vb) { - spin_lock(>adjustment_lock); + spin_lock_irq(>adjustment_lock); if (!vb->adjustment_signal_pending && vb->adjustment_in_progress) { vb->adjustment_in_progress = false; pm_relax(vb->vdev->dev.parent); } - spin_unlock(>adjustment_lock); + spin_unlock_irq(>adjustment_lock); } static void virtballoon_changed(struct virtio_device *vdev) @@ -506,9 +506,9 @@ static void update_balloon_size_func(struct work_struct *work) vb = container_of(work, struct virtio_balloon, update_balloon_size_work); - spin_lock(>adjustment_lock); + spin_lock_irq(>adjustment_lock); vb->adjustment_signal_pending = false; - spin_unlock(>adjustment_lock); + spin_unlock_irq(>adjustment_lock); diff = towards_target(vb); -- 2.43.0.472.g3155946c3a-goog
Re: [PATCH v2] virtio_balloon: stay awake while adjusting balloon
On Mon, Dec 18, 2023 at 12:33 PM David Hildenbrand wrote: > > On 18.12.23 16:18, David Stevens wrote: > > From: David Stevens > > > > A virtio_balloon's parent device may be configured so that a > > configuration change interrupt is a wakeup event. Extend the processing > > of such a wakeup event until the balloon finishes inflating or deflating > > by calling pm_stay_awake/pm_relax in the virtio_balloon driver. Note > > that these calls are no-ops if the parent device doesn't support wakeup > > events or if the wakeup events are not enabled. > > > > This change allows the guest to use system power states such as s2idle > > without running the risk the virtio_balloon's cooperative memory > > management becoming unresponsive to the host's requests. > > > > Signed-off-by: David Stevens > > --- > > v1 -> v2: > > - Use adjustment_signal_pending flag instead of a sequence number > > - Call pm_stay_awake/pm_relax on parent device instead of adding a wake > > event to the virtio balloon device > > > > drivers/virtio/virtio_balloon.c | 57 +++-- > > 1 file changed, 47 insertions(+), 10 deletions(-) > > > > diff --git a/drivers/virtio/virtio_balloon.c > > b/drivers/virtio/virtio_balloon.c > > index 1fe93e93f5bc..a3c11159cbe0 100644 > > --- a/drivers/virtio/virtio_balloon.c > > +++ b/drivers/virtio/virtio_balloon.c > > @@ -119,6 +119,11 @@ struct virtio_balloon { > > /* Free page reporting device */ > > struct virtqueue *reporting_vq; > > struct page_reporting_dev_info pr_dev_info; > > + > > + /* State for keeping the wakeup_source active while adjusting the > > balloon */ > > + spinlock_t adjustment_lock; > > + bool adjustment_signal_pending; > > + bool adjustment_in_progress; > > }; > > > > static const struct virtio_device_id id_table[] = { > > @@ -437,6 +442,31 @@ static void virtio_balloon_queue_free_page_work(struct > > virtio_balloon *vb) > > queue_work(vb->balloon_wq, >report_free_page_work); > > } > > > > +static void start_update_balloon_size(struct virtio_balloon *vb) > > +{ > > + unsigned long flags; > > + > > + spin_lock_irqsave(>adjustment_lock, flags); > > + vb->adjustment_signal_pending = true; > > + if (!vb->adjustment_in_progress) { > > + vb->adjustment_in_progress = true; > > + pm_stay_awake(vb->vdev->dev.parent); > > + } > > + spin_unlock_irqrestore(>adjustment_lock, flags); > > + > > + queue_work(system_freezable_wq, >update_balloon_size_work); > > +} > > + > > +static void end_update_balloon_size(struct virtio_balloon *vb) > > +{ > > + spin_lock(>adjustment_lock); > > + if (!vb->adjustment_signal_pending && vb->adjustment_in_progress) { > > How could vb->adjustment_in_progress ever not be set at this point? > > > + vb->adjustment_in_progress = false; > > + pm_relax(vb->vdev->dev.parent); > > + } > > + spin_unlock(>adjustment_lock); > > +} > > + > > LGTM, although I wonder what happens when calling pm_stay_awake() etc. > on a parent device that is not wakeup-even-capable? If the parent device is not wakeup capable or if wakeup isn't enabled, then the vb->vdev->dev.parent->power.wakeup pointer will be NULL, so the NULL checks in __pm_relax/__pm_stay_awake will immediately return. -David
Re: [PATCH] virtio_balloon: stay awake while adjusting balloon
On Mon, Dec 18, 2023 at 10:18 AM David Hildenbrand wrote: > > On 18.12.23 16:16, David Stevens wrote: > > On Mon, Dec 18, 2023 at 6:37 AM David Hildenbrand wrote: > >> > >> On 14.12.23 05:13, David Stevens wrote: > >>> On Wed, Dec 13, 2023 at 5:44 PM David Hildenbrand > >>> wrote: > >>>> > >>>> On 11.12.23 12:43, David Stevens wrote: > >>>>> From: David Stevens > >>>>> > >>>> > >>>> Hi David, > >>>> > >>>>> Add a wakeup event for when the balloon is inflating or deflating. > >>>>> Userspace can enable this wakeup event to prevent the system from > >>>>> suspending while the balloon is being adjusted. This allows > >>>>> /sys/power/wakeup_count to be used without breaking virtio_balloon's > >>>>> cooperative memory management. > >>>> > >>>> Can you add/share some more details > >>> > >>> I'm working on enabling support for Linux s2Idle in our Android > >>> virtual machine, to restrict apps from running in the background > >>> without holding an Android partial wakelock. With the patch I recently > >>> sent out [1], since crosvm advertises native PCI power management for > >>> virtio devices, the Android guest can properly enter s2idle, and it > >>> can be woken up by incoming IO. However, one of the remaining problems > >>> is that when the host needs to reclaim memory from the guest via the > >>> virtio-balloon, there is nothing preventing the guest from entering > >>> s2idle before the balloon driver finishes returning memory to the > >>> host. > >> > >> Thanks for the information. So you also want to wakeup the VM when > >> wanting to get more memory from the VM? > >> > >> Using which mechanism would that wakeup happen? Likely not the device > >> itself? > > > > The wakeup would happen via the parent device's interrupt. I've sent a > > new version of this patch that uses the parent device's wakeup event > > instead of adding a new one. > > > >>> > >>> One alternative to this approach would be to add a virtballoon_suspend > >>> callback to abort suspend if the balloon is inflating/adjusting. > >>> However, it seems cleaner to just prevent suspend in the first place. > >> > >> Also, the PM notifier could also be used with very high priority, so the > >> device would respond early to PM_SUSPEND_PREPARE. > > > > One drawback of blocking suspend via a PM notifier is that the > > behavior isn't configurable by userspace, whereas wakeup events can be > > enabled/disabled by userspace. > > The question is if that behavior for the balloon is really worth it > being configured by user space? It seems to me that the current behavior of completely resetting the virtio balloon in virtballoon_freeze is basically antithetical to the power management integration I'm adding, where power management doesn't interrupt the virtio balloon's operation at all. So if there are any systems that are actually happy with the current power management behavior, they probably don't want suspend to be blocked by inflation/deflation. -David
[PATCH v2] virtio_balloon: stay awake while adjusting balloon
From: David Stevens A virtio_balloon's parent device may be configured so that a configuration change interrupt is a wakeup event. Extend the processing of such a wakeup event until the balloon finishes inflating or deflating by calling pm_stay_awake/pm_relax in the virtio_balloon driver. Note that these calls are no-ops if the parent device doesn't support wakeup events or if the wakeup events are not enabled. This change allows the guest to use system power states such as s2idle without running the risk the virtio_balloon's cooperative memory management becoming unresponsive to the host's requests. Signed-off-by: David Stevens --- v1 -> v2: - Use adjustment_signal_pending flag instead of a sequence number - Call pm_stay_awake/pm_relax on parent device instead of adding a wake event to the virtio balloon device drivers/virtio/virtio_balloon.c | 57 +++-- 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 1fe93e93f5bc..a3c11159cbe0 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -119,6 +119,11 @@ struct virtio_balloon { /* Free page reporting device */ struct virtqueue *reporting_vq; struct page_reporting_dev_info pr_dev_info; + + /* State for keeping the wakeup_source active while adjusting the balloon */ + spinlock_t adjustment_lock; + bool adjustment_signal_pending; + bool adjustment_in_progress; }; static const struct virtio_device_id id_table[] = { @@ -437,6 +442,31 @@ static void virtio_balloon_queue_free_page_work(struct virtio_balloon *vb) queue_work(vb->balloon_wq, >report_free_page_work); } +static void start_update_balloon_size(struct virtio_balloon *vb) +{ + unsigned long flags; + + spin_lock_irqsave(>adjustment_lock, flags); + vb->adjustment_signal_pending = true; + if (!vb->adjustment_in_progress) { + vb->adjustment_in_progress = true; + pm_stay_awake(vb->vdev->dev.parent); + } + spin_unlock_irqrestore(>adjustment_lock, flags); + + queue_work(system_freezable_wq, >update_balloon_size_work); +} + +static void end_update_balloon_size(struct virtio_balloon *vb) +{ + spin_lock(>adjustment_lock); + if (!vb->adjustment_signal_pending && vb->adjustment_in_progress) { + vb->adjustment_in_progress = false; + pm_relax(vb->vdev->dev.parent); + } + spin_unlock(>adjustment_lock); +} + static void virtballoon_changed(struct virtio_device *vdev) { struct virtio_balloon *vb = vdev->priv; @@ -444,8 +474,7 @@ static void virtballoon_changed(struct virtio_device *vdev) spin_lock_irqsave(>stop_update_lock, flags); if (!vb->stop_update) { - queue_work(system_freezable_wq, - >update_balloon_size_work); + start_update_balloon_size(vb); virtio_balloon_queue_free_page_work(vb); } spin_unlock_irqrestore(>stop_update_lock, flags); @@ -476,19 +505,25 @@ static void update_balloon_size_func(struct work_struct *work) vb = container_of(work, struct virtio_balloon, update_balloon_size_work); - diff = towards_target(vb); - if (!diff) - return; + spin_lock(>adjustment_lock); + vb->adjustment_signal_pending = false; + spin_unlock(>adjustment_lock); - if (diff > 0) - diff -= fill_balloon(vb, diff); - else - diff += leak_balloon(vb, -diff); - update_balloon_size(vb); + diff = towards_target(vb); + + if (diff) { + if (diff > 0) + diff -= fill_balloon(vb, diff); + else + diff += leak_balloon(vb, -diff); + update_balloon_size(vb); + } if (diff) queue_work(system_freezable_wq, work); + else + end_update_balloon_size(vb); } static int init_vqs(struct virtio_balloon *vb) @@ -992,6 +1027,8 @@ static int virtballoon_probe(struct virtio_device *vdev) goto out_unregister_oom; } + spin_lock_init(>adjustment_lock); + virtio_device_ready(vdev); if (towards_target(vb)) -- 2.43.0.472.g3155946c3a-goog
Re: [PATCH] virtio_balloon: stay awake while adjusting balloon
On Mon, Dec 18, 2023 at 6:37 AM David Hildenbrand wrote: > > On 14.12.23 05:13, David Stevens wrote: > > On Wed, Dec 13, 2023 at 5:44 PM David Hildenbrand wrote: > >> > >> On 11.12.23 12:43, David Stevens wrote: > >>> From: David Stevens > >>> > >> > >> Hi David, > >> > >>> Add a wakeup event for when the balloon is inflating or deflating. > >>> Userspace can enable this wakeup event to prevent the system from > >>> suspending while the balloon is being adjusted. This allows > >>> /sys/power/wakeup_count to be used without breaking virtio_balloon's > >>> cooperative memory management. > >> > >> Can you add/share some more details > > > > I'm working on enabling support for Linux s2Idle in our Android > > virtual machine, to restrict apps from running in the background > > without holding an Android partial wakelock. With the patch I recently > > sent out [1], since crosvm advertises native PCI power management for > > virtio devices, the Android guest can properly enter s2idle, and it > > can be woken up by incoming IO. However, one of the remaining problems > > is that when the host needs to reclaim memory from the guest via the > > virtio-balloon, there is nothing preventing the guest from entering > > s2idle before the balloon driver finishes returning memory to the > > host. > > Thanks for the information. So you also want to wakeup the VM when > wanting to get more memory from the VM? > > Using which mechanism would that wakeup happen? Likely not the device > itself? The wakeup would happen via the parent device's interrupt. I've sent a new version of this patch that uses the parent device's wakeup event instead of adding a new one. > > > > One alternative to this approach would be to add a virtballoon_suspend > > callback to abort suspend if the balloon is inflating/adjusting. > > However, it seems cleaner to just prevent suspend in the first place. > > Also, the PM notifier could also be used with very high priority, so the > device would respond early to PM_SUSPEND_PREPARE. One drawback of blocking suspend via a PM notifier is that the behavior isn't configurable by userspace, whereas wakeup events can be enabled/disabled by userspace. -David
Re: [PATCH v2] virtio: Add support for no-reset virtio PCI PM
On Thu, Dec 14, 2023 at 6:47 PM David Hildenbrand wrote: > > On 08.12.23 08:07, David Stevens wrote: > > If a virtio_pci_device supports native PCI power management and has the > > No_Soft_Reset bit set, then skip resetting and reinitializing the device > > when suspending and restoring the device. This allows system-wide low > > power states like s2idle to be used in systems with stateful virtio > > devices that can't simply be re-initialized (e.g. virtio-fs). > > > > Signed-off-by: David Stevens > > --- > > v1 -> v2: > > - Check the No_Soft_Reset bit > > > > drivers/virtio/virtio_pci_common.c | 34 +- > > 1 file changed, 33 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/virtio/virtio_pci_common.c > > b/drivers/virtio/virtio_pci_common.c > > index c2524a7207cf..3a95ecaf12dc 100644 > > --- a/drivers/virtio/virtio_pci_common.c > > +++ b/drivers/virtio/virtio_pci_common.c > > @@ -492,8 +492,40 @@ static int virtio_pci_restore(struct device *dev) > > return virtio_device_restore(_dev->vdev); > > } > > > > +static bool vp_supports_pm_no_reset(struct device *dev) > > +{ > > + struct pci_dev *pci_dev = to_pci_dev(dev); > > + u16 pmcsr; > > + > > + if (!pci_dev->pm_cap) > > + return false; > > + > > + pci_read_config_word(pci_dev, pci_dev->pm_cap + PCI_PM_CTRL, ); > > + if (PCI_POSSIBLE_ERROR(pmcsr)) { > > + dev_err(dev, "Unable to query pmcsr"); > > + return false; > > + } > > + > > + return pmcsr & PCI_PM_CTRL_NO_SOFT_RESET; > > +} > > + > > +static int virtio_pci_suspend(struct device *dev) > > +{ > > + return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_freeze(dev); > > +} > > + > > +static int virtio_pci_resume(struct device *dev) > > +{ > > + return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_restore(dev); > > +} > > + > > static const struct dev_pm_ops virtio_pci_pm_ops = { > > - SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore) > > + .suspend = virtio_pci_suspend, > > + .resume = virtio_pci_resume, > > + .freeze = virtio_pci_freeze, > > + .thaw = virtio_pci_restore, > > + .poweroff = virtio_pci_freeze, > > + .restore = virtio_pci_restore, > > }; > > #endif > > > > Am I correct with my assumption that this will make s2idle work with > virtio-mem-pci as well? > > Right now, all suspending is disabled, but really only s4/hibernate is > problematic. > > [root@vm-0 ~]# echo "s2idle" > /sys/power/mem_sleep > [root@vm-0 ~]# echo "mem" > /sys/power/state > [ 101.822991] PM: suspend entry (s2idle) > [ 101.828978] Filesystems sync: 0.004 seconds > [ 101.831618] Freezing user space processes > [ 101.834569] Freezing user space processes completed (elapsed 0.001 seconds) > [ 101.836915] OOM killer disabled. > [ 101.838072] Freezing remaining freezable tasks > [ 101.841054] Freezing remaining freezable tasks completed (elapsed 0.001 > seconds) > [ 101.843538] printk: Suspending console(s) (use no_console_suspend to debug) > [ 101.957676] virtio_mem virtio0: save/restore not supported. > [ 101.957683] virtio-pci :00:02.0: PM: pci_pm_suspend(): > virtio_pci_freeze+0x0/0x50 returns -1 > [ 101.957702] virtio-pci :00:02.0: PM: dpm_run_callback(): > pci_pm_suspend+0x0/0x170 returns -1 > [ 101.957718] virtio-pci :00:02.0: PM: failed to suspend async: error -1 QEMU's virtio-pci devices don't advertise no_soft_reset, so this patch won't affect vanilla QEMU. But if you add PCI_PM_CTRL_NO_SOFT_RESET to the capability, then it should work. I'm working with crosvm, which doesn't have virtio-mem implemented, but this patch makes virtio-fs work with no extra kernel changes. -David
Re: [PATCH] virtio_balloon: stay awake while adjusting balloon
On Wed, Dec 13, 2023 at 5:44 PM David Hildenbrand wrote: > > On 11.12.23 12:43, David Stevens wrote: > > From: David Stevens > > > > Hi David, > > > Add a wakeup event for when the balloon is inflating or deflating. > > Userspace can enable this wakeup event to prevent the system from > > suspending while the balloon is being adjusted. This allows > > /sys/power/wakeup_count to be used without breaking virtio_balloon's > > cooperative memory management. > > Can you add/share some more details I'm working on enabling support for Linux s2Idle in our Android virtual machine, to restrict apps from running in the background without holding an Android partial wakelock. With the patch I recently sent out [1], since crosvm advertises native PCI power management for virtio devices, the Android guest can properly enter s2idle, and it can be woken up by incoming IO. However, one of the remaining problems is that when the host needs to reclaim memory from the guest via the virtio-balloon, there is nothing preventing the guest from entering s2idle before the balloon driver finishes returning memory to the host. One alternative to this approach would be to add a virtballoon_suspend callback to abort suspend if the balloon is inflating/adjusting. However, it seems cleaner to just prevent suspend in the first place. [1] https://lore.kernel.org/lkml/20231208070754.3132339-1-steve...@chromium.org/ > > > > Signed-off-by: David Stevens > > --- > > drivers/virtio/virtio_balloon.c | 59 +++-- > > 1 file changed, 49 insertions(+), 10 deletions(-) > > > > diff --git a/drivers/virtio/virtio_balloon.c > > b/drivers/virtio/virtio_balloon.c > > index 1fe93e93f5bc..811d8937246a 100644 > > --- a/drivers/virtio/virtio_balloon.c > > +++ b/drivers/virtio/virtio_balloon.c > > @@ -119,6 +119,11 @@ struct virtio_balloon { > > /* Free page reporting device */ > > struct virtqueue *reporting_vq; > > struct page_reporting_dev_info pr_dev_info; > > + > > + /* State for keeping the wakeup_source active while adjusting the > > balloon */ > > + spinlock_t adjustment_lock; > > + u32 adjustment_seqno; > > Using a simple flag that gets set when updating the balloon size and > test-and-clear when testing for changes should be easier to get. > > bool adjustment_balloon_size_changed; > > or sth like that. That's a good simplification, thanks. > > + bool adjustment_in_progress; > > }; > > > > static const struct virtio_device_id id_table[] = { > > @@ -437,6 +442,31 @@ static void virtio_balloon_queue_free_page_work(struct > > virtio_balloon *vb) > > queue_work(vb->balloon_wq, >report_free_page_work); > > } > > > > +static void start_update_balloon_size(struct virtio_balloon *vb) > > +{ > > + unsigned long flags; > > + > > + spin_lock_irqsave(>adjustment_lock, flags); > > + vb->adjustment_seqno++; > > + if (!vb->adjustment_in_progress) { > > + vb->adjustment_in_progress = true; > > + pm_stay_awake(>vdev->dev); > > + } > > + spin_unlock_irqrestore(>adjustment_lock, flags); > > + > > + queue_work(system_freezable_wq, >update_balloon_size_work); > > +} > > + > > +static void end_update_balloon_size(struct virtio_balloon *vb, u32 seqno) > > +{ > > + spin_lock(>adjustment_lock); > > + if (vb->adjustment_seqno == seqno && vb->adjustment_in_progress) { > > + vb->adjustment_in_progress = false; > > + pm_relax(>vdev->dev); > > + } > > + spin_unlock(>adjustment_lock); > > +} > > + > > static void virtballoon_changed(struct virtio_device *vdev) > > { > > struct virtio_balloon *vb = vdev->priv; > > @@ -444,8 +474,7 @@ static void virtballoon_changed(struct virtio_device > > *vdev) > > > > spin_lock_irqsave(>stop_update_lock, flags); > > if (!vb->stop_update) { > > - queue_work(system_freezable_wq, > > ->update_balloon_size_work); > > + start_update_balloon_size(vb); > > virtio_balloon_queue_free_page_work(vb); > > } > > spin_unlock_irqrestore(>stop_update_lock, flags); > > @@ -473,22 +502,29 @@ static void update_balloon_size_func(struct > > work_struct *work) > > { > > struct virtio_balloon *vb; > > s64 diff; > > + u32 seqno; > > > > vb = container_of(work, str
[PATCH] virtio_balloon: stay awake while adjusting balloon
From: David Stevens Add a wakeup event for when the balloon is inflating or deflating. Userspace can enable this wakeup event to prevent the system from suspending while the balloon is being adjusted. This allows /sys/power/wakeup_count to be used without breaking virtio_balloon's cooperative memory management. Signed-off-by: David Stevens --- drivers/virtio/virtio_balloon.c | 59 +++-- 1 file changed, 49 insertions(+), 10 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 1fe93e93f5bc..811d8937246a 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -119,6 +119,11 @@ struct virtio_balloon { /* Free page reporting device */ struct virtqueue *reporting_vq; struct page_reporting_dev_info pr_dev_info; + + /* State for keeping the wakeup_source active while adjusting the balloon */ + spinlock_t adjustment_lock; + u32 adjustment_seqno; + bool adjustment_in_progress; }; static const struct virtio_device_id id_table[] = { @@ -437,6 +442,31 @@ static void virtio_balloon_queue_free_page_work(struct virtio_balloon *vb) queue_work(vb->balloon_wq, >report_free_page_work); } +static void start_update_balloon_size(struct virtio_balloon *vb) +{ + unsigned long flags; + + spin_lock_irqsave(>adjustment_lock, flags); + vb->adjustment_seqno++; + if (!vb->adjustment_in_progress) { + vb->adjustment_in_progress = true; + pm_stay_awake(>vdev->dev); + } + spin_unlock_irqrestore(>adjustment_lock, flags); + + queue_work(system_freezable_wq, >update_balloon_size_work); +} + +static void end_update_balloon_size(struct virtio_balloon *vb, u32 seqno) +{ + spin_lock(>adjustment_lock); + if (vb->adjustment_seqno == seqno && vb->adjustment_in_progress) { + vb->adjustment_in_progress = false; + pm_relax(>vdev->dev); + } + spin_unlock(>adjustment_lock); +} + static void virtballoon_changed(struct virtio_device *vdev) { struct virtio_balloon *vb = vdev->priv; @@ -444,8 +474,7 @@ static void virtballoon_changed(struct virtio_device *vdev) spin_lock_irqsave(>stop_update_lock, flags); if (!vb->stop_update) { - queue_work(system_freezable_wq, - >update_balloon_size_work); + start_update_balloon_size(vb); virtio_balloon_queue_free_page_work(vb); } spin_unlock_irqrestore(>stop_update_lock, flags); @@ -473,22 +502,29 @@ static void update_balloon_size_func(struct work_struct *work) { struct virtio_balloon *vb; s64 diff; + u32 seqno; vb = container_of(work, struct virtio_balloon, update_balloon_size_work); - diff = towards_target(vb); - if (!diff) - return; + spin_lock(>adjustment_lock); + seqno = vb->adjustment_seqno; + spin_unlock(>adjustment_lock); - if (diff > 0) - diff -= fill_balloon(vb, diff); - else - diff += leak_balloon(vb, -diff); - update_balloon_size(vb); + diff = towards_target(vb); + + if (diff) { + if (diff > 0) + diff -= fill_balloon(vb, diff); + else + diff += leak_balloon(vb, -diff); + update_balloon_size(vb); + } if (diff) queue_work(system_freezable_wq, work); + else + end_update_balloon_size(vb, seqno); } static int init_vqs(struct virtio_balloon *vb) @@ -992,6 +1028,9 @@ static int virtballoon_probe(struct virtio_device *vdev) goto out_unregister_oom; } + spin_lock_init(>adjustment_lock); + device_set_wakeup_capable(>vdev->dev, true); + virtio_device_ready(vdev); if (towards_target(vb)) -- 2.43.0.472.g3155946c3a-goog
[PATCH v2] virtio: Add support for no-reset virtio PCI PM
If a virtio_pci_device supports native PCI power management and has the No_Soft_Reset bit set, then skip resetting and reinitializing the device when suspending and restoring the device. This allows system-wide low power states like s2idle to be used in systems with stateful virtio devices that can't simply be re-initialized (e.g. virtio-fs). Signed-off-by: David Stevens --- v1 -> v2: - Check the No_Soft_Reset bit drivers/virtio/virtio_pci_common.c | 34 +- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index c2524a7207cf..3a95ecaf12dc 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -492,8 +492,40 @@ static int virtio_pci_restore(struct device *dev) return virtio_device_restore(_dev->vdev); } +static bool vp_supports_pm_no_reset(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + u16 pmcsr; + + if (!pci_dev->pm_cap) + return false; + + pci_read_config_word(pci_dev, pci_dev->pm_cap + PCI_PM_CTRL, ); + if (PCI_POSSIBLE_ERROR(pmcsr)) { + dev_err(dev, "Unable to query pmcsr"); + return false; + } + + return pmcsr & PCI_PM_CTRL_NO_SOFT_RESET; +} + +static int virtio_pci_suspend(struct device *dev) +{ + return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_freeze(dev); +} + +static int virtio_pci_resume(struct device *dev) +{ + return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_restore(dev); +} + static const struct dev_pm_ops virtio_pci_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore) + .suspend = virtio_pci_suspend, + .resume = virtio_pci_resume, + .freeze = virtio_pci_freeze, + .thaw = virtio_pci_restore, + .poweroff = virtio_pci_freeze, + .restore = virtio_pci_restore, }; #endif -- 2.43.0.472.g3155946c3a-goog
[PATCH] virtio: Add virtio-device power management
Virtio power management for virtio pci devices is defined on top of PCI power management, so it mostly works out of the box. All that's needed is to add suspend/resume callbacks to the virtio core, instead of reusing the freeze/restore. Signed-off-by: David Stevens --- drivers/virtio/virtio_pci_common.c | 21 - 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index c2524a7207cf..0cad900eaf4f 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -492,8 +492,27 @@ static int virtio_pci_restore(struct device *dev) return virtio_device_restore(_dev->vdev); } +static int virtio_pci_suspend(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + + return pci_dev->pm_cap ? 0 : virtio_pci_freeze(dev); +} + +static int virtio_pci_resume(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + + return pci_dev->pm_cap ? 0 : virtio_pci_restore(dev); +} + static const struct dev_pm_ops virtio_pci_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore) + .suspend = virtio_pci_suspend, + .resume = virtio_pci_resume, + .freeze = virtio_pci_freeze, + .thaw = virtio_pci_restore, + .poweroff = virtio_pci_freeze, + .restore = virtio_pci_restore, }; #endif -- 2.42.0.869.gea05f2083d-goog
Re: [PATCH v3] drm/syncobj: use newly allocated stub fences
Pushing to drm-misc-next works for me. Thanks for the quick responses. -David On Thu, Apr 8, 2021 at 6:56 PM Christian König wrote: > > Am 08.04.21 um 11:54 schrieb David Stevens: > > From: David Stevens > > > > Allocate a new private stub fence in drm_syncobj_assign_null_handle, > > instead of using a static stub fence. > > > > When userspace creates a fence with DRM_SYNCOBJ_CREATE_SIGNALED or when > > userspace signals a fence via DRM_IOCTL_SYNCOBJ_SIGNAL, the timestamp > > obtained when the fence is exported and queried with SYNC_IOC_FILE_INFO > > should match when the fence's status was changed from the perspective of > > userspace, which is during the respective ioctl. > > > > When a static stub fence started being used in by these ioctls, this > > behavior changed. Instead, the timestamp returned by SYNC_IOC_FILE_INFO > > became the first time anything used the static stub fence, which has no > > meaning to userspace. > > > > Signed-off-by: David Stevens > > Reviewed-by: Christian König > > Should I push this to drm-misc-next or how do you want to upstream it? > > Thanks, > Christian. > > > --- > > v2 -> v3: > > - reuse the static stub spinlock > > v1 -> v2: > > - checkpatch style fixes > > > > drivers/dma-buf/dma-fence.c | 27 ++- > > drivers/gpu/drm/drm_syncobj.c | 25 +++-- > > include/linux/dma-fence.h | 1 + > > 3 files changed, 46 insertions(+), 7 deletions(-) > > > > diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c > > index d64fc03929be..ce0f5eff575d 100644 > > --- a/drivers/dma-buf/dma-fence.c > > +++ b/drivers/dma-buf/dma-fence.c > > @@ -123,7 +123,9 @@ static const struct dma_fence_ops dma_fence_stub_ops = { > > /** > >* dma_fence_get_stub - return a signaled fence > >* > > - * Return a stub fence which is already signaled. > > + * Return a stub fence which is already signaled. The fence's > > + * timestamp corresponds to the first time after boot this > > + * function is called. > >*/ > > struct dma_fence *dma_fence_get_stub(void) > > { > > @@ -141,6 +143,29 @@ struct dma_fence *dma_fence_get_stub(void) > > } > > EXPORT_SYMBOL(dma_fence_get_stub); > > > > +/** > > + * dma_fence_allocate_private_stub - return a private, signaled fence > > + * > > + * Return a newly allocated and signaled stub fence. > > + */ > > +struct dma_fence *dma_fence_allocate_private_stub(void) > > +{ > > + struct dma_fence *fence; > > + > > + fence = kzalloc(sizeof(*fence), GFP_KERNEL); > > + if (fence == NULL) > > + return ERR_PTR(-ENOMEM); > > + > > + dma_fence_init(fence, > > +_fence_stub_ops, > > +_fence_stub_lock, > > +0, 0); > > + dma_fence_signal(fence); > > + > > + return fence; > > +} > > +EXPORT_SYMBOL(dma_fence_allocate_private_stub); > > + > > /** > >* dma_fence_context_alloc - allocate an array of fence contexts > >* @num: amount of contexts to allocate > > diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c > > index 349146049849..a54aa850d143 100644 > > --- a/drivers/gpu/drm/drm_syncobj.c > > +++ b/drivers/gpu/drm/drm_syncobj.c > > @@ -350,12 +350,16 @@ EXPORT_SYMBOL(drm_syncobj_replace_fence); > >* > >* Assign a already signaled stub fence to the sync object. > >*/ > > -static void drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) > > +static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) > > { > > - struct dma_fence *fence = dma_fence_get_stub(); > > + struct dma_fence *fence = dma_fence_allocate_private_stub(); > > + > > + if (IS_ERR(fence)) > > + return PTR_ERR(fence); > > > > drm_syncobj_replace_fence(syncobj, fence); > > dma_fence_put(fence); > > + return 0; > > } > > > > /* 5s default for wait submission */ > > @@ -469,6 +473,7 @@ EXPORT_SYMBOL(drm_syncobj_free); > > int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags, > > struct dma_fence *fence) > > { > > + int ret; > > struct drm_syncobj *syncobj; > > > > syncobj = kzalloc(sizeof(struct drm_syncobj), GFP_KERNEL); > > @@ -479,8 +484,13 @@ int drm_syncobj_create(struct drm_syncobj > > **out_syncobj,
[PATCH v3] drm/syncobj: use newly allocated stub fences
From: David Stevens Allocate a new private stub fence in drm_syncobj_assign_null_handle, instead of using a static stub fence. When userspace creates a fence with DRM_SYNCOBJ_CREATE_SIGNALED or when userspace signals a fence via DRM_IOCTL_SYNCOBJ_SIGNAL, the timestamp obtained when the fence is exported and queried with SYNC_IOC_FILE_INFO should match when the fence's status was changed from the perspective of userspace, which is during the respective ioctl. When a static stub fence started being used in by these ioctls, this behavior changed. Instead, the timestamp returned by SYNC_IOC_FILE_INFO became the first time anything used the static stub fence, which has no meaning to userspace. Signed-off-by: David Stevens --- v2 -> v3: - reuse the static stub spinlock v1 -> v2: - checkpatch style fixes drivers/dma-buf/dma-fence.c | 27 ++- drivers/gpu/drm/drm_syncobj.c | 25 +++-- include/linux/dma-fence.h | 1 + 3 files changed, 46 insertions(+), 7 deletions(-) diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index d64fc03929be..ce0f5eff575d 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -123,7 +123,9 @@ static const struct dma_fence_ops dma_fence_stub_ops = { /** * dma_fence_get_stub - return a signaled fence * - * Return a stub fence which is already signaled. + * Return a stub fence which is already signaled. The fence's + * timestamp corresponds to the first time after boot this + * function is called. */ struct dma_fence *dma_fence_get_stub(void) { @@ -141,6 +143,29 @@ struct dma_fence *dma_fence_get_stub(void) } EXPORT_SYMBOL(dma_fence_get_stub); +/** + * dma_fence_allocate_private_stub - return a private, signaled fence + * + * Return a newly allocated and signaled stub fence. + */ +struct dma_fence *dma_fence_allocate_private_stub(void) +{ + struct dma_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (fence == NULL) + return ERR_PTR(-ENOMEM); + + dma_fence_init(fence, + _fence_stub_ops, + _fence_stub_lock, + 0, 0); + dma_fence_signal(fence); + + return fence; +} +EXPORT_SYMBOL(dma_fence_allocate_private_stub); + /** * dma_fence_context_alloc - allocate an array of fence contexts * @num: amount of contexts to allocate diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 349146049849..a54aa850d143 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -350,12 +350,16 @@ EXPORT_SYMBOL(drm_syncobj_replace_fence); * * Assign a already signaled stub fence to the sync object. */ -static void drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) +static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) { - struct dma_fence *fence = dma_fence_get_stub(); + struct dma_fence *fence = dma_fence_allocate_private_stub(); + + if (IS_ERR(fence)) + return PTR_ERR(fence); drm_syncobj_replace_fence(syncobj, fence); dma_fence_put(fence); + return 0; } /* 5s default for wait submission */ @@ -469,6 +473,7 @@ EXPORT_SYMBOL(drm_syncobj_free); int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags, struct dma_fence *fence) { + int ret; struct drm_syncobj *syncobj; syncobj = kzalloc(sizeof(struct drm_syncobj), GFP_KERNEL); @@ -479,8 +484,13 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags, INIT_LIST_HEAD(>cb_list); spin_lock_init(>lock); - if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) - drm_syncobj_assign_null_handle(syncobj); + if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) { + ret = drm_syncobj_assign_null_handle(syncobj); + if (ret < 0) { + drm_syncobj_put(syncobj); + return ret; + } + } if (fence) drm_syncobj_replace_fence(syncobj, fence); @@ -1322,8 +1332,11 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, if (ret < 0) return ret; - for (i = 0; i < args->count_handles; i++) - drm_syncobj_assign_null_handle(syncobjs[i]); + for (i = 0; i < args->count_handles; i++) { + ret = drm_syncobj_assign_null_handle(syncobjs[i]); + if (ret < 0) + break; + } drm_syncobj_array_free(syncobjs, args->count_handles); diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 9f12efaaa93a..6ffb4b2c6371 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -587,6 +587,7 @@ static inline signed long dma_fence_wait(struct dma_fence *fence, bool intr) } struct dma_fence
[PATCH v2] drm/syncobj: use newly allocated stub fences
From: David Stevens Allocate a new private stub fence in drm_syncobj_assign_null_handle, instead of using a static stub fence. When userspace creates a fence with DRM_SYNCOBJ_CREATE_SIGNALED or when userspace signals a fence via DRM_IOCTL_SYNCOBJ_SIGNAL, the timestamp obtained when the fence is exported and queried with SYNC_IOC_FILE_INFO should match when the fence's status was changed from the perspective of userspace, which is during the respective ioctl. When a static stub fence started being used in by these ioctls, this behavior changed. Instead, the timestamp returned by SYNC_IOC_FILE_INFO became the first time anything used the static stub fence, which has no meaning to userspace. Signed-off-by: David Stevens --- v1 -> v2: - checkpatch style fixes drivers/dma-buf/dma-fence.c | 33 - drivers/gpu/drm/drm_syncobj.c | 25 +++-- include/linux/dma-fence.h | 1 + 3 files changed, 52 insertions(+), 7 deletions(-) diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index d64fc03929be..6081eb962490 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -26,6 +26,11 @@ EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled); static DEFINE_SPINLOCK(dma_fence_stub_lock); static struct dma_fence dma_fence_stub; +struct drm_fence_private_stub { + struct dma_fence base; + spinlock_t lock; +}; + /* * fence context counter: each execution context should have its own * fence context, this allows checking if fences belong to the same @@ -123,7 +128,9 @@ static const struct dma_fence_ops dma_fence_stub_ops = { /** * dma_fence_get_stub - return a signaled fence * - * Return a stub fence which is already signaled. + * Return a stub fence which is already signaled. The fence's + * timestamp corresponds to the first time after boot this + * function is called. */ struct dma_fence *dma_fence_get_stub(void) { @@ -141,6 +148,30 @@ struct dma_fence *dma_fence_get_stub(void) } EXPORT_SYMBOL(dma_fence_get_stub); +/** + * dma_fence_allocate_private_stub - return a private, signaled fence + * + * Return a newly allocated and signaled stub fence. + */ +struct dma_fence *dma_fence_allocate_private_stub(void) +{ + struct drm_fence_private_stub *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (fence == NULL) + return ERR_PTR(-ENOMEM); + + spin_lock_init(>lock); + dma_fence_init(>base, + _fence_stub_ops, + >lock, + 0, 0); + dma_fence_signal(>base); + + return >base; +} +EXPORT_SYMBOL(dma_fence_allocate_private_stub); + /** * dma_fence_context_alloc - allocate an array of fence contexts * @num: amount of contexts to allocate diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 349146049849..a54aa850d143 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -350,12 +350,16 @@ EXPORT_SYMBOL(drm_syncobj_replace_fence); * * Assign a already signaled stub fence to the sync object. */ -static void drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) +static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) { - struct dma_fence *fence = dma_fence_get_stub(); + struct dma_fence *fence = dma_fence_allocate_private_stub(); + + if (IS_ERR(fence)) + return PTR_ERR(fence); drm_syncobj_replace_fence(syncobj, fence); dma_fence_put(fence); + return 0; } /* 5s default for wait submission */ @@ -469,6 +473,7 @@ EXPORT_SYMBOL(drm_syncobj_free); int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags, struct dma_fence *fence) { + int ret; struct drm_syncobj *syncobj; syncobj = kzalloc(sizeof(struct drm_syncobj), GFP_KERNEL); @@ -479,8 +484,13 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags, INIT_LIST_HEAD(>cb_list); spin_lock_init(>lock); - if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) - drm_syncobj_assign_null_handle(syncobj); + if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) { + ret = drm_syncobj_assign_null_handle(syncobj); + if (ret < 0) { + drm_syncobj_put(syncobj); + return ret; + } + } if (fence) drm_syncobj_replace_fence(syncobj, fence); @@ -1322,8 +1332,11 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, if (ret < 0) return ret; - for (i = 0; i < args->count_handles; i++) - drm_syncobj_assign_null_handle(syncobjs[i]); + for (i = 0; i < args->count_handles; i++) { + ret = drm_syncobj_assign_null_handle(syncobjs[i]); + if (ret < 0) +
Re: [PATCH] drm/syncobj: use newly allocated stub fences
Sorry, I forgot to checkpatch this, I'll resend it momentarily. -David On Thu, Apr 8, 2021 at 6:34 PM David Stevens wrote: > > From: David Stevens > > Allocate a new private stub fence in drm_syncobj_assign_null_handle, > instead of using a static stub fence. > > When userspace creates a fence with DRM_SYNCOBJ_CREATE_SIGNALED or when > userspace signals a fence via DRM_IOCTL_SYNCOBJ_SIGNAL, the timestamp > obtained when the fence is exported and queried with SYNC_IOC_FILE_INFO > should match when the fence's status was changed from the perspective of > userspace, which is during the respective ioctl. > > When a static stub fence started being used in by these ioctls, this > behavior changed. Instead, the timestamp returned by SYNC_IOC_FILE_INFO > became the first time anything used the static stub fence, which has no > meaning to userspace. > > Signed-off-by: David Stevens > --- > drivers/dma-buf/dma-fence.c | 33 - > drivers/gpu/drm/drm_syncobj.c | 28 > include/linux/dma-fence.h | 1 + > 3 files changed, 53 insertions(+), 9 deletions(-) > > diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c > index d64fc03929be..6081eb962490 100644 > --- a/drivers/dma-buf/dma-fence.c > +++ b/drivers/dma-buf/dma-fence.c > @@ -26,6 +26,11 @@ EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled); > static DEFINE_SPINLOCK(dma_fence_stub_lock); > static struct dma_fence dma_fence_stub; > > +struct drm_fence_private_stub { > + struct dma_fence base; > + spinlock_t lock; > +}; > + > /* > * fence context counter: each execution context should have its own > * fence context, this allows checking if fences belong to the same > @@ -123,7 +128,9 @@ static const struct dma_fence_ops dma_fence_stub_ops = { > /** > * dma_fence_get_stub - return a signaled fence > * > - * Return a stub fence which is already signaled. > + * Return a stub fence which is already signaled. The fence's > + * timestamp corresponds to the first time after boot this > + * function is called. > */ > struct dma_fence *dma_fence_get_stub(void) > { > @@ -141,6 +148,30 @@ struct dma_fence *dma_fence_get_stub(void) > } > EXPORT_SYMBOL(dma_fence_get_stub); > > +/** > + * dma_fence_allocate_private_stub - return a private, signaled fence > + * > + * Return a newly allocated and signaled stub fence. > + */ > +struct dma_fence *dma_fence_allocate_private_stub(void) > +{ > + struct drm_fence_private_stub *fence; > + > + fence = kzalloc(sizeof(*fence), GFP_KERNEL); > + if (fence == NULL) > + return ERR_PTR(-ENOMEM); > + > + spin_lock_init(>lock); > + dma_fence_init(>base, > + _fence_stub_ops, > + >lock, > + 0, 0); > + dma_fence_signal(>base); > + > + return >base; > +} > +EXPORT_SYMBOL(dma_fence_allocate_private_stub); > + > /** > * dma_fence_context_alloc - allocate an array of fence contexts > * @num: amount of contexts to allocate > diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c > index 349146049849..c6125e57ae37 100644 > --- a/drivers/gpu/drm/drm_syncobj.c > +++ b/drivers/gpu/drm/drm_syncobj.c > @@ -350,12 +350,15 @@ EXPORT_SYMBOL(drm_syncobj_replace_fence); > * > * Assign a already signaled stub fence to the sync object. > */ > -static void drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) > +static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) > { > - struct dma_fence *fence = dma_fence_get_stub(); > + struct dma_fence *fence = dma_fence_allocate_private_stub(); > + if (IS_ERR(fence)) > + return PTR_ERR(fence); > > - drm_syncobj_replace_fence(syncobj, fence); > - dma_fence_put(fence); > + drm_syncobj_replace_fence(syncobj, fence); > + dma_fence_put(fence); > + return 0; > } > > /* 5s default for wait submission */ > @@ -469,6 +472,7 @@ EXPORT_SYMBOL(drm_syncobj_free); > int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags, >struct dma_fence *fence) > { > + int ret; > struct drm_syncobj *syncobj; > > syncobj = kzalloc(sizeof(struct drm_syncobj), GFP_KERNEL); > @@ -479,8 +483,13 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, > uint32_t flags, > INIT_LIST_HEAD(>cb_list); > spin_lock_init(>lock); > > - if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) > - drm_syncobj_assign_null_handle(syncobj); > + if (flags &a
[PATCH] drm/syncobj: use newly allocated stub fences
From: David Stevens Allocate a new private stub fence in drm_syncobj_assign_null_handle, instead of using a static stub fence. When userspace creates a fence with DRM_SYNCOBJ_CREATE_SIGNALED or when userspace signals a fence via DRM_IOCTL_SYNCOBJ_SIGNAL, the timestamp obtained when the fence is exported and queried with SYNC_IOC_FILE_INFO should match when the fence's status was changed from the perspective of userspace, which is during the respective ioctl. When a static stub fence started being used in by these ioctls, this behavior changed. Instead, the timestamp returned by SYNC_IOC_FILE_INFO became the first time anything used the static stub fence, which has no meaning to userspace. Signed-off-by: David Stevens --- drivers/dma-buf/dma-fence.c | 33 - drivers/gpu/drm/drm_syncobj.c | 28 include/linux/dma-fence.h | 1 + 3 files changed, 53 insertions(+), 9 deletions(-) diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index d64fc03929be..6081eb962490 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -26,6 +26,11 @@ EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled); static DEFINE_SPINLOCK(dma_fence_stub_lock); static struct dma_fence dma_fence_stub; +struct drm_fence_private_stub { + struct dma_fence base; + spinlock_t lock; +}; + /* * fence context counter: each execution context should have its own * fence context, this allows checking if fences belong to the same @@ -123,7 +128,9 @@ static const struct dma_fence_ops dma_fence_stub_ops = { /** * dma_fence_get_stub - return a signaled fence * - * Return a stub fence which is already signaled. + * Return a stub fence which is already signaled. The fence's + * timestamp corresponds to the first time after boot this + * function is called. */ struct dma_fence *dma_fence_get_stub(void) { @@ -141,6 +148,30 @@ struct dma_fence *dma_fence_get_stub(void) } EXPORT_SYMBOL(dma_fence_get_stub); +/** + * dma_fence_allocate_private_stub - return a private, signaled fence + * + * Return a newly allocated and signaled stub fence. + */ +struct dma_fence *dma_fence_allocate_private_stub(void) +{ + struct drm_fence_private_stub *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (fence == NULL) + return ERR_PTR(-ENOMEM); + + spin_lock_init(>lock); + dma_fence_init(>base, + _fence_stub_ops, + >lock, + 0, 0); + dma_fence_signal(>base); + + return >base; +} +EXPORT_SYMBOL(dma_fence_allocate_private_stub); + /** * dma_fence_context_alloc - allocate an array of fence contexts * @num: amount of contexts to allocate diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 349146049849..c6125e57ae37 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -350,12 +350,15 @@ EXPORT_SYMBOL(drm_syncobj_replace_fence); * * Assign a already signaled stub fence to the sync object. */ -static void drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) +static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) { - struct dma_fence *fence = dma_fence_get_stub(); + struct dma_fence *fence = dma_fence_allocate_private_stub(); + if (IS_ERR(fence)) + return PTR_ERR(fence); - drm_syncobj_replace_fence(syncobj, fence); - dma_fence_put(fence); + drm_syncobj_replace_fence(syncobj, fence); + dma_fence_put(fence); + return 0; } /* 5s default for wait submission */ @@ -469,6 +472,7 @@ EXPORT_SYMBOL(drm_syncobj_free); int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags, struct dma_fence *fence) { + int ret; struct drm_syncobj *syncobj; syncobj = kzalloc(sizeof(struct drm_syncobj), GFP_KERNEL); @@ -479,8 +483,13 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags, INIT_LIST_HEAD(>cb_list); spin_lock_init(>lock); - if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) - drm_syncobj_assign_null_handle(syncobj); + if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) { + ret = drm_syncobj_assign_null_handle(syncobj); + if (ret < 0) { + drm_syncobj_put(syncobj); + return ret; + } + } if (fence) drm_syncobj_replace_fence(syncobj, fence); @@ -1322,8 +1331,11 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, if (ret < 0) return ret; - for (i = 0; i < args->count_handles; i++) - drm_syncobj_assign_null_handle(syncobjs[i]); + for (i = 0; i < args->count_handles; i++) { + ret = drm_syncobj_assign_null_handle(syncobjs[i]); +
Re: [PATCH] Revert "drm/syncobj: use dma_fence_get_stub"
On Thu, Apr 8, 2021 at 4:03 PM Christian König wrote: > > Am 08.04.21 um 06:59 schrieb David Stevens: > > From: David Stevens > > > > This reverts commit 86bbd89d5da66fe760049ad3f04adc407ec0c4d6. > > > > Using the singleton stub fence in drm_syncobj_assign_null_handle means > > that all syncobjs created in an already signaled state or any syncobjs > > signaled by userspace will reference the singleton fence when exported > > to a sync_file. If those sync_files are queried with SYNC_IOC_FILE_INFO, > > then the timestamp_ns value returned will correspond to whenever the > > singleton stub fence was first initialized. This can break the ability > > of userspace to use timestamps of these fences, as the singleton stub > > fence's timestamp bears no relationship to any meaningful event. > > And why exactly is having the timestamp of the call to > drm_syncobj_assign_null_handle() better? The timestamp returned by SYNC_IOC_FILE_INFO is the "timestamp of status change in nanoseconds". If userspace signals the fence with DRM_IOCTL_SYNCOBJ_SIGNAL, then a timestamp from drm_syncobj_assign_null_handle corresponds to the status change. If userspace sets DRM_SYNCOBJ_CREATE_SIGNALED when creating a fence, then the status change happens immediately upon creation, which again corresponds to when drm_syncobj_assign_null_handle gets called. > Additional if you really need that please don't revert the patch. > Instead provide a function which returns a newly initialized stub fence > in the dma_fence.c code. Ack. -David > Regards, > Christian. > > > > > Signed-off-by: David Stevens > > --- > > drivers/gpu/drm/drm_syncobj.c | 58 ++- > > 1 file changed, 44 insertions(+), 14 deletions(-) > > > > diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c > > index 349146049849..7cc11f1a83f4 100644 > > --- a/drivers/gpu/drm/drm_syncobj.c > > +++ b/drivers/gpu/drm/drm_syncobj.c > > @@ -211,6 +211,21 @@ struct syncobj_wait_entry { > > static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj, > > struct syncobj_wait_entry *wait); > > > > +struct drm_syncobj_stub_fence { > > + struct dma_fence base; > > + spinlock_t lock; > > +}; > > + > > +static const char *drm_syncobj_stub_fence_get_name(struct dma_fence *fence) > > +{ > > + return "syncobjstub"; > > +} > > + > > +static const struct dma_fence_ops drm_syncobj_stub_fence_ops = { > > + .get_driver_name = drm_syncobj_stub_fence_get_name, > > + .get_timeline_name = drm_syncobj_stub_fence_get_name, > > +}; > > + > > /** > >* drm_syncobj_find - lookup and reference a sync object. > >* @file_private: drm file private pointer > > @@ -344,18 +359,24 @@ void drm_syncobj_replace_fence(struct drm_syncobj > > *syncobj, > > } > > EXPORT_SYMBOL(drm_syncobj_replace_fence); > > > > -/** > > - * drm_syncobj_assign_null_handle - assign a stub fence to the sync object > > - * @syncobj: sync object to assign the fence on > > - * > > - * Assign a already signaled stub fence to the sync object. > > - */ > > -static void drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) > > +static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) > > { > > - struct dma_fence *fence = dma_fence_get_stub(); > > + struct drm_syncobj_stub_fence *fence; > > > > - drm_syncobj_replace_fence(syncobj, fence); > > - dma_fence_put(fence); > > + fence = kzalloc(sizeof(*fence), GFP_KERNEL); > > + if (fence == NULL) > > + return -ENOMEM; > > + > > + spin_lock_init(>lock); > > + dma_fence_init(>base, _syncobj_stub_fence_ops, > > +>lock, 0, 0); > > + dma_fence_signal(>base); > > + > > + drm_syncobj_replace_fence(syncobj, >base); > > + > > + dma_fence_put(>base); > > + > > + return 0; > > } > > > > /* 5s default for wait submission */ > > @@ -469,6 +490,7 @@ EXPORT_SYMBOL(drm_syncobj_free); > > int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags, > > struct dma_fence *fence) > > { > > + int ret; > > struct drm_syncobj *syncobj; > > > > syncobj = kzalloc(sizeof(struct drm_syncobj), GFP_KERNEL); > > @@ -479,8 +501,13 @@ int drm_syncobj_create(struct drm_syncobj > > **out_syncobj, uint32_t flags, > > INIT_L
[PATCH] Revert "drm/syncobj: use dma_fence_get_stub"
From: David Stevens This reverts commit 86bbd89d5da66fe760049ad3f04adc407ec0c4d6. Using the singleton stub fence in drm_syncobj_assign_null_handle means that all syncobjs created in an already signaled state or any syncobjs signaled by userspace will reference the singleton fence when exported to a sync_file. If those sync_files are queried with SYNC_IOC_FILE_INFO, then the timestamp_ns value returned will correspond to whenever the singleton stub fence was first initialized. This can break the ability of userspace to use timestamps of these fences, as the singleton stub fence's timestamp bears no relationship to any meaningful event. Signed-off-by: David Stevens --- drivers/gpu/drm/drm_syncobj.c | 58 ++- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 349146049849..7cc11f1a83f4 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -211,6 +211,21 @@ struct syncobj_wait_entry { static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj, struct syncobj_wait_entry *wait); +struct drm_syncobj_stub_fence { + struct dma_fence base; + spinlock_t lock; +}; + +static const char *drm_syncobj_stub_fence_get_name(struct dma_fence *fence) +{ + return "syncobjstub"; +} + +static const struct dma_fence_ops drm_syncobj_stub_fence_ops = { + .get_driver_name = drm_syncobj_stub_fence_get_name, + .get_timeline_name = drm_syncobj_stub_fence_get_name, +}; + /** * drm_syncobj_find - lookup and reference a sync object. * @file_private: drm file private pointer @@ -344,18 +359,24 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, } EXPORT_SYMBOL(drm_syncobj_replace_fence); -/** - * drm_syncobj_assign_null_handle - assign a stub fence to the sync object - * @syncobj: sync object to assign the fence on - * - * Assign a already signaled stub fence to the sync object. - */ -static void drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) +static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) { - struct dma_fence *fence = dma_fence_get_stub(); + struct drm_syncobj_stub_fence *fence; - drm_syncobj_replace_fence(syncobj, fence); - dma_fence_put(fence); + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (fence == NULL) + return -ENOMEM; + + spin_lock_init(>lock); + dma_fence_init(>base, _syncobj_stub_fence_ops, + >lock, 0, 0); + dma_fence_signal(>base); + + drm_syncobj_replace_fence(syncobj, >base); + + dma_fence_put(>base); + + return 0; } /* 5s default for wait submission */ @@ -469,6 +490,7 @@ EXPORT_SYMBOL(drm_syncobj_free); int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags, struct dma_fence *fence) { + int ret; struct drm_syncobj *syncobj; syncobj = kzalloc(sizeof(struct drm_syncobj), GFP_KERNEL); @@ -479,8 +501,13 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags, INIT_LIST_HEAD(>cb_list); spin_lock_init(>lock); - if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) - drm_syncobj_assign_null_handle(syncobj); + if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) { + ret = drm_syncobj_assign_null_handle(syncobj); + if (ret < 0) { + drm_syncobj_put(syncobj); + return ret; + } + } if (fence) drm_syncobj_replace_fence(syncobj, fence); @@ -1322,8 +1349,11 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, if (ret < 0) return ret; - for (i = 0; i < args->count_handles; i++) - drm_syncobj_assign_null_handle(syncobjs[i]); + for (i = 0; i < args->count_handles; i++) { + ret = drm_syncobj_assign_null_handle(syncobjs[i]); + if (ret < 0) + break; + } drm_syncobj_array_free(syncobjs, args->count_handles); -- 2.31.0.208.g409f899ff0-goog
[PATCH v4 2/2] KVM: x86/mmu: Consider the hva in mmu_notifier retry
From: David Stevens Track the range being invalidated by mmu_notifier and skip page fault retries if the fault address is not affected by the in-progress invalidation. Handle concurrent invalidations by finding the minimal range which includes all ranges being invalidated. Although the combined range may include unrelated addresses and cannot be shrunk as individual invalidation operations complete, it is unlikely the marginal gains of proper range tracking are worth the additional complexity. The primary benefit of this change is the reduction in the likelihood of extreme latency when handing a page fault due to another thread having been preempted while modifying host virtual addresses. Signed-off-by: David Stevens --- v3 -> v4: - Skip prefetch while invalidations are in progress v2 -> v3: - Removed unnecessary ifdef - Style changes v1 -> v2: - Improve handling of concurrent invalidation requests by unioning ranges, instead of just giving up and using [0, ULONG_MAX). - Add lockdep check - Code comments and formatting arch/powerpc/kvm/book3s_64_mmu_hv.c| 2 +- arch/powerpc/kvm/book3s_64_mmu_radix.c | 2 +- arch/x86/kvm/mmu/mmu.c | 23 ++-- arch/x86/kvm/mmu/paging_tmpl.h | 7 --- include/linux/kvm_host.h | 25 +- virt/kvm/kvm_main.c| 29 ++ 6 files changed, 72 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 38ea396a23d6..8e06cd3f759c 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -590,7 +590,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu, } else { /* Call KVM generic code to do the slow-path check */ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, - writing, _ok); + writing, _ok, NULL); if (is_error_noslot_pfn(pfn)) return -EFAULT; page = NULL; diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index bb35490400e9..e603de7ade52 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -822,7 +822,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, /* Call KVM generic code to do the slow-path check */ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, - writing, upgrade_p); + writing, upgrade_p, NULL); if (is_error_noslot_pfn(pfn)) return -EFAULT; page = NULL; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 9ac0a727015d..f6aaac729667 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2758,6 +2758,13 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) if (sp->role.level > PG_LEVEL_4K) return; + /* +* If addresses are being invalidated, skip prefetching to avoid +* accidentally prefetching those addresses. +*/ + if (unlikely(vcpu->kvm->mmu_notifier_count)) + return; + __direct_pte_prefetch(vcpu, sp, sptep); } @@ -3658,8 +3665,8 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, } static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, -gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write, -bool *writable) +gpa_t cr2_or_gpa, kvm_pfn_t *pfn, hva_t *hva, +bool write, bool *writable) { struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); bool async; @@ -3672,7 +3679,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, } async = false; - *pfn = __gfn_to_pfn_memslot(slot, gfn, false, , write, writable); + *pfn = __gfn_to_pfn_memslot(slot, gfn, false, , + write, writable, hva); if (!async) return false; /* *pfn has correct page already */ @@ -3686,7 +3694,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, return true; } - *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, write, writable); + *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, + write, writable, hva); return false; } @@ -3699,6 +3708,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, gfn_t gfn = gpa >> PAGE_SHIFT; unsigned long mmu_seq; kvm_pfn_t pfn; + hva_t hva; int r; if (p
[PATCH v4 1/2] KVM: x86/mmu: Skip mmu_notifier check when handling MMIO page fault
From: Sean Christopherson Don't retry a page fault due to an mmu_notifier invalidation when handling a page fault for a GPA that did not resolve to a memslot, i.e. an MMIO page fault. Invalidations from the mmu_notifier signal a change in a host virtual address (HVA) mapping; without a memslot, there is no HVA and thus no possibility that the invalidation is relevant to the page fault being handled. Note, the MMIO vs. memslot generation checks handle the case where a pending memslot will create a memslot overlapping the faulting GPA. The mmu_notifier checks are orthogonal to memslot updates. Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 2 +- arch/x86/kvm/mmu/paging_tmpl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6d16481aa29d..9ac0a727015d 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3725,7 +3725,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, r = RET_PF_RETRY; spin_lock(>kvm->mmu_lock); - if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) + if (!is_noslot_pfn(pfn) && mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; r = make_mmu_pages_available(vcpu); if (r) diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 50e268eb8e1a..ab54263d857c 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -869,7 +869,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code, r = RET_PF_RETRY; spin_lock(>kvm->mmu_lock); - if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) + if (!is_noslot_pfn(pfn) && mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); -- 2.30.0.617.g56c4b15f3c-goog
[PATCH v4 0/2] KVM: x86/mmu: Skip mmu_notifier changes when possible
These patches reduce how often mmu_notifier updates block guest page faults. The primary benefit of this is the reduction in the likelihood of extreme latency when handling a page fault due to another thread having been preempted while modifying host virtual addresses. v3 -> v4: - Fix bug by skipping prefetch during invalidation v2 -> v3: - Added patch to skip check for MMIO page faults - Style changes David Stevens (1): KVM: x86/mmu: Consider the hva in mmu_notifier retry Sean Christopherson (1): KVM: x86/mmu: Skip mmu_notifier check when handling MMIO page fault arch/powerpc/kvm/book3s_64_mmu_hv.c| 2 +- arch/powerpc/kvm/book3s_64_mmu_radix.c | 2 +- arch/x86/kvm/mmu/mmu.c | 23 ++-- arch/x86/kvm/mmu/paging_tmpl.h | 7 --- include/linux/kvm_host.h | 25 +- virt/kvm/kvm_main.c| 29 ++ 6 files changed, 72 insertions(+), 16 deletions(-) -- 2.30.0.617.g56c4b15f3c-goog
Re: [PATCH v3 0/2] KVM: x86/mmu: Skip mmu_notifier changes when possible
These patches might be responsible for some instability in one of our stress tests. I'll send an update once I figure out what's going on. Thanks, David On Thu, Jan 28, 2021 at 9:48 PM Paolo Bonzini wrote: > > On 28/01/21 07:05, David Stevens wrote: > > These patches reduce how often mmu_notifier updates block guest page > > faults. The primary benefit of this is the reduction in the likelihood > > of extreme latency when handling a page fault due to another thread > > having been preempted while modifying host virtual addresses. > > > > v2 -> v3: > > - Added patch to skip check for MMIO page faults > > - Style changes > > > > David Stevens (1): > >KVM: x86/mmu: Consider the hva in mmu_notifier retry > > > > Sean Christopherson (1): > >KVM: x86/mmu: Skip mmu_notifier check when handling MMIO page fault > > > > arch/powerpc/kvm/book3s_64_mmu_hv.c| 2 +- > > arch/powerpc/kvm/book3s_64_mmu_radix.c | 2 +- > > arch/x86/kvm/mmu/mmu.c | 16 -- > > arch/x86/kvm/mmu/paging_tmpl.h | 7 --- > > include/linux/kvm_host.h | 25 +- > > virt/kvm/kvm_main.c| 29 ++ > > 6 files changed, 65 insertions(+), 16 deletions(-) > > > > Queued, thanks. > > Paolo >
[PATCH v3 0/2] KVM: x86/mmu: Skip mmu_notifier changes when possible
These patches reduce how often mmu_notifier updates block guest page faults. The primary benefit of this is the reduction in the likelihood of extreme latency when handling a page fault due to another thread having been preempted while modifying host virtual addresses. v2 -> v3: - Added patch to skip check for MMIO page faults - Style changes David Stevens (1): KVM: x86/mmu: Consider the hva in mmu_notifier retry Sean Christopherson (1): KVM: x86/mmu: Skip mmu_notifier check when handling MMIO page fault arch/powerpc/kvm/book3s_64_mmu_hv.c| 2 +- arch/powerpc/kvm/book3s_64_mmu_radix.c | 2 +- arch/x86/kvm/mmu/mmu.c | 16 -- arch/x86/kvm/mmu/paging_tmpl.h | 7 --- include/linux/kvm_host.h | 25 +- virt/kvm/kvm_main.c| 29 ++ 6 files changed, 65 insertions(+), 16 deletions(-) -- 2.30.0.280.ga3ce27912f-goog
[PATCH v3 1/2] KVM: x86/mmu: Skip mmu_notifier check when handling MMIO page fault
From: Sean Christopherson Don't retry a page fault due to an mmu_notifier invalidation when handling a page fault for a GPA that did not resolve to a memslot, i.e. an MMIO page fault. Invalidations from the mmu_notifier signal a change in a host virtual address (HVA) mapping; without a memslot, there is no HVA and thus no possibility that the invalidation is relevant to the page fault being handled. Note, the MMIO vs. memslot generation checks handle the case where a pending memslot will create a memslot overlapping the faulting GPA. The mmu_notifier checks are orthogonal to memslot updates. Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 2 +- arch/x86/kvm/mmu/paging_tmpl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6d16481aa29d..9ac0a727015d 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3725,7 +3725,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, r = RET_PF_RETRY; spin_lock(>kvm->mmu_lock); - if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) + if (!is_noslot_pfn(pfn) && mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; r = make_mmu_pages_available(vcpu); if (r) diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 50e268eb8e1a..ab54263d857c 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -869,7 +869,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code, r = RET_PF_RETRY; spin_lock(>kvm->mmu_lock); - if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) + if (!is_noslot_pfn(pfn) && mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); -- 2.30.0.280.ga3ce27912f-goog
[PATCH v3 2/2] KVM: x86/mmu: Consider the hva in mmu_notifier retry
From: David Stevens Track the range being invalidated by mmu_notifier and skip page fault retries if the fault address is not affected by the in-progress invalidation. Handle concurrent invalidations by finding the minimal range which includes all ranges being invalidated. Although the combined range may include unrelated addresses and cannot be shrunk as individual invalidation operations complete, it is unlikely the marginal gains of proper range tracking are worth the additional complexity. The primary benefit of this change is the reduction in the likelihood of extreme latency when handing a page fault due to another thread having been preempted while modifying host virtual addresses. Signed-off-by: David Stevens --- v2 -> v3: - Removed unnecessary ifdef - Style changes v1 -> v2: - Improve handling of concurrent invalidation requests by unioning ranges, instead of just giving up and using [0, ULONG_MAX). - Add lockdep check - Code comments and formatting arch/powerpc/kvm/book3s_64_mmu_hv.c| 2 +- arch/powerpc/kvm/book3s_64_mmu_radix.c | 2 +- arch/x86/kvm/mmu/mmu.c | 16 -- arch/x86/kvm/mmu/paging_tmpl.h | 7 --- include/linux/kvm_host.h | 25 +- virt/kvm/kvm_main.c| 29 ++ 6 files changed, 65 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 38ea396a23d6..8e06cd3f759c 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -590,7 +590,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu, } else { /* Call KVM generic code to do the slow-path check */ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, - writing, _ok); + writing, _ok, NULL); if (is_error_noslot_pfn(pfn)) return -EFAULT; page = NULL; diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index bb35490400e9..e603de7ade52 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -822,7 +822,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, /* Call KVM generic code to do the slow-path check */ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, - writing, upgrade_p); + writing, upgrade_p, NULL); if (is_error_noslot_pfn(pfn)) return -EFAULT; page = NULL; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 9ac0a727015d..8d08e97b2487 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3658,8 +3658,8 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, } static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, -gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write, -bool *writable) +gpa_t cr2_or_gpa, kvm_pfn_t *pfn, hva_t *hva, +bool write, bool *writable) { struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); bool async; @@ -3672,7 +3672,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, } async = false; - *pfn = __gfn_to_pfn_memslot(slot, gfn, false, , write, writable); + *pfn = __gfn_to_pfn_memslot(slot, gfn, false, , + write, writable, hva); if (!async) return false; /* *pfn has correct page already */ @@ -3686,7 +3687,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, return true; } - *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, write, writable); + *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, + write, writable, hva); return false; } @@ -3699,6 +3701,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, gfn_t gfn = gpa >> PAGE_SHIFT; unsigned long mmu_seq; kvm_pfn_t pfn; + hva_t hva; int r; if (page_fault_handle_page_track(vcpu, error_code, gfn)) @@ -3717,7 +3720,8 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); - if (try_async_pf(vcpu, prefault, gfn, gpa, , write, _writable)) + if (try_async_pf(vcpu, prefault, gfn, gpa, , , +write, _writable)) return RET_PF_RETRY; if (handle_abnormal_pfn(vcpu, is_tdp ? 0 : gpa, gfn, pfn, ACC_ALL, ))
[PATCH v2] KVM: x86/mmu: consider the hva in mmu_notifier retry
From: David Stevens Track the range being invalidated by mmu_notifier and skip page fault retries if the fault address is not affected by the in-progress invalidation. Handle concurrent invalidations by finding the minimal range which includes all ranges being invalidated. Although the combined range may include unrelated addresses and cannot be shrunk as individual invalidation operations complete, it is unlikely the marginal gains of proper range tracking are worth the additional complexity. The primary benefit of this change is the reduction in the likelihood of extreme latency when handing a page fault due to another thread having been preempted while modifying host virtual addresses. Signed-off-by: David Stevens --- v1 -> v2: - improve handling of concurrent invalidation requests by unioning ranges, instead of just giving up and using [0, ULONG_MAX). - add lockdep check - code comments and formatting arch/powerpc/kvm/book3s_64_mmu_hv.c| 2 +- arch/powerpc/kvm/book3s_64_mmu_radix.c | 2 +- arch/x86/kvm/mmu/mmu.c | 16 -- arch/x86/kvm/mmu/paging_tmpl.h | 7 --- include/linux/kvm_host.h | 27 +++- virt/kvm/kvm_main.c| 29 ++ 6 files changed, 67 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 38ea396a23d6..8e06cd3f759c 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -590,7 +590,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu, } else { /* Call KVM generic code to do the slow-path check */ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, - writing, _ok); + writing, _ok, NULL); if (is_error_noslot_pfn(pfn)) return -EFAULT; page = NULL; diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index bb35490400e9..e603de7ade52 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -822,7 +822,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, /* Call KVM generic code to do the slow-path check */ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, - writing, upgrade_p); + writing, upgrade_p, NULL); if (is_error_noslot_pfn(pfn)) return -EFAULT; page = NULL; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6d16481aa29d..79166288ed03 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3658,8 +3658,8 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, } static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, -gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write, -bool *writable) +gpa_t cr2_or_gpa, kvm_pfn_t *pfn, hva_t *hva, +bool write, bool *writable) { struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); bool async; @@ -3672,7 +3672,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, } async = false; - *pfn = __gfn_to_pfn_memslot(slot, gfn, false, , write, writable); + *pfn = __gfn_to_pfn_memslot(slot, gfn, false, , + write, writable, hva); if (!async) return false; /* *pfn has correct page already */ @@ -3686,7 +3687,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, return true; } - *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, write, writable); + *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, + write, writable, hva); return false; } @@ -3699,6 +3701,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, gfn_t gfn = gpa >> PAGE_SHIFT; unsigned long mmu_seq; kvm_pfn_t pfn; + hva_t hva; int r; if (page_fault_handle_page_track(vcpu, error_code, gfn)) @@ -3717,7 +3720,8 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); - if (try_async_pf(vcpu, prefault, gfn, gpa, , write, _writable)) + if (try_async_pf(vcpu, prefault, gfn, gpa, , , +write, _writable)) return RET_PF_RETRY; if (handle_abnormal_pfn(vcpu, is_tdp ? 0 : gpa, gfn, pfn, ACC_ALL, )) @@ -3725,7 +3729,7 @@ static int direct_page_fault(struct kvm_vc
Re: [PATCH] KVM: x86/mmu: consider the hva in mmu_notifer retry
> > This has the secondary effect of greatly reducing the likelihood of extreme > > Out of curiosity, is this really the _secondary_ effect? I would expect this > change to primarily benefit scenarios where the invalidation has gotten > waylaid for whatever reason. Yeah, this is the primary benefit. I was thinking about it as the reduction in page fault retries is the direct effect, and that in turn leads to a secondary effect of a reduction in the chance of extreme latency. But I guess that's not a particularly important distinction to make. I'll reword this. > > This needs a comment to explicitly state that 'count > 1' cannot be done at > this time. My initial thought is that it would be more intuitive to check for > 'count > 1' here, but that would potentially check the wrong wrange when count > goes from 2->1. The comment about persistence in invalidate_range_start() is > a > good hint, but I think it's worth being explicit to avoid bad "cleanup" in the > future. > > > + if (unlikely(kvm->mmu_notifier_count)) { > > + if (kvm->mmu_notifier_range_start <= hva && > > + hva < kvm->mmu_notifier_range_end) I'm not sure I understand what you're suggesting here. How exactly would 'count > 1' be used incorrectly here? I'm fine with adding a comment, but I'm not sure what the comment needs to clarify. -David
[PATCH] KVM: x86/mmu: consider the hva in mmu_notifer retry
From: David Stevens Use the range passed to mmu_notifer's invalidate_range_start to prevent spurious page fault retries due to changes in unrelated host virtual addresses. This has the secondary effect of greatly reducing the likelihood of extreme latency when handing a page fault due to another thread having been preempted while modifying host virtual addresses. Signed-off-by: David Stevens --- arch/powerpc/kvm/book3s_64_mmu_hv.c| 2 +- arch/powerpc/kvm/book3s_64_mmu_radix.c | 2 +- arch/x86/kvm/mmu/mmu.c | 16 ++-- arch/x86/kvm/mmu/paging_tmpl.h | 7 --- include/linux/kvm_host.h | 22 +- virt/kvm/kvm_main.c| 22 ++ 6 files changed, 55 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 38ea396a23d6..8e06cd3f759c 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -590,7 +590,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu, } else { /* Call KVM generic code to do the slow-path check */ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, - writing, _ok); + writing, _ok, NULL); if (is_error_noslot_pfn(pfn)) return -EFAULT; page = NULL; diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index bb35490400e9..e603de7ade52 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -822,7 +822,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, /* Call KVM generic code to do the slow-path check */ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, - writing, upgrade_p); + writing, upgrade_p, NULL); if (is_error_noslot_pfn(pfn)) return -EFAULT; page = NULL; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6d16481aa29d..79166288ed03 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3658,8 +3658,8 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, } static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, -gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write, -bool *writable) +gpa_t cr2_or_gpa, kvm_pfn_t *pfn, hva_t *hva, +bool write, bool *writable) { struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); bool async; @@ -3672,7 +3672,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, } async = false; - *pfn = __gfn_to_pfn_memslot(slot, gfn, false, , write, writable); + *pfn = __gfn_to_pfn_memslot(slot, gfn, false, , + write, writable, hva); if (!async) return false; /* *pfn has correct page already */ @@ -3686,7 +3687,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, return true; } - *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, write, writable); + *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, + write, writable, hva); return false; } @@ -3699,6 +3701,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, gfn_t gfn = gpa >> PAGE_SHIFT; unsigned long mmu_seq; kvm_pfn_t pfn; + hva_t hva; int r; if (page_fault_handle_page_track(vcpu, error_code, gfn)) @@ -3717,7 +3720,8 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); - if (try_async_pf(vcpu, prefault, gfn, gpa, , write, _writable)) + if (try_async_pf(vcpu, prefault, gfn, gpa, , , +write, _writable)) return RET_PF_RETRY; if (handle_abnormal_pfn(vcpu, is_tdp ? 0 : gpa, gfn, pfn, ACC_ALL, )) @@ -3725,7 +3729,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, r = RET_PF_RETRY; spin_lock(>kvm->mmu_lock); - if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) + if (mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, hva)) goto out_unlock; r = make_mmu_pages_available(vcpu); if (r) diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 50e268eb8e1a..3171784139a4 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -790,6 +790,7 @@ sta
[PATCH v7 0/3] Support virtio cross-device resources
This patchset implements the current proposal for virtio cross-device resource sharing [1]. It will be used to import virtio resources into the virtio-video driver currently under discussion [2]. The patch under consideration to add support in the virtio-video driver is [3]. It uses the APIs from v3 of this series, but the changes to update it are relatively minor. This patchset adds a new flavor of dma-bufs that supports querying the underlying virtio object UUID, as well as adding support for exporting resources from virtgpu. [1] https://markmail.org/thread/2ypjt5cfeu3m6lxu [2] https://markmail.org/thread/p5d3k566srtdtute [3] https://markmail.org/thread/j4xlqaaim266qpks v6 -> v7 changes: - Fix most strict checkpatch comments David Stevens (3): virtio: add dma-buf support for exported objects virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature drm/virtio: Support virtgpu exported resources drivers/gpu/drm/virtio/virtgpu_drv.c | 3 + drivers/gpu/drm/virtio/virtgpu_drv.h | 21 ++ drivers/gpu/drm/virtio/virtgpu_kms.c | 4 ++ drivers/gpu/drm/virtio/virtgpu_prime.c | 96 +- drivers/gpu/drm/virtio/virtgpu_vq.c| 55 +++ drivers/virtio/Makefile| 2 +- drivers/virtio/virtio.c| 6 ++ drivers/virtio/virtio_dma_buf.c| 85 +++ include/linux/virtio.h | 1 + include/linux/virtio_dma_buf.h | 37 ++ include/uapi/linux/virtio_gpu.h| 19 + 11 files changed, 325 insertions(+), 4 deletions(-) create mode 100644 drivers/virtio/virtio_dma_buf.c create mode 100644 include/linux/virtio_dma_buf.h -- 2.28.0.220.ged08abb693-goog
[PATCH v7 2/3] virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature
This feature allows the guest to request a UUID from the host for a particular virtio_gpu resource. The UUID can then be shared with other virtio devices, to allow the other host devices to access the virtio_gpu's corresponding host resource. Signed-off-by: David Stevens --- include/uapi/linux/virtio_gpu.h | 19 +++ 1 file changed, 19 insertions(+) diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index 0c85914d9369..9721d58b4d58 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -50,6 +50,10 @@ * VIRTIO_GPU_CMD_GET_EDID */ #define VIRTIO_GPU_F_EDID1 +/* + * VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID + */ +#define VIRTIO_GPU_F_RESOURCE_UUID 2 enum virtio_gpu_ctrl_type { VIRTIO_GPU_UNDEFINED = 0, @@ -66,6 +70,7 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_CMD_GET_CAPSET_INFO, VIRTIO_GPU_CMD_GET_CAPSET, VIRTIO_GPU_CMD_GET_EDID, + VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID, /* 3d commands */ VIRTIO_GPU_CMD_CTX_CREATE = 0x0200, @@ -87,6 +92,7 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_RESP_OK_CAPSET_INFO, VIRTIO_GPU_RESP_OK_CAPSET, VIRTIO_GPU_RESP_OK_EDID, + VIRTIO_GPU_RESP_OK_RESOURCE_UUID, /* error responses */ VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200, @@ -340,4 +346,17 @@ enum virtio_gpu_formats { VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM = 134, }; +/* VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID */ +struct virtio_gpu_resource_assign_uuid { + struct virtio_gpu_ctrl_hdr hdr; + __le32 resource_id; + __le32 padding; +}; + +/* VIRTIO_GPU_RESP_OK_RESOURCE_UUID */ +struct virtio_gpu_resp_resource_uuid { + struct virtio_gpu_ctrl_hdr hdr; + __u8 uuid[16]; +}; + #endif -- 2.28.0.220.ged08abb693-goog
[PATCH v7 1/3] virtio: add dma-buf support for exported objects
This change adds a new flavor of dma-bufs that can be used by virtio drivers to share exported objects. A virtio dma-buf can be queried by virtio drivers to obtain the UUID which identifies the underlying exported object. Signed-off-by: David Stevens --- drivers/virtio/Makefile | 2 +- drivers/virtio/virtio.c | 6 +++ drivers/virtio/virtio_dma_buf.c | 85 + include/linux/virtio.h | 1 + include/linux/virtio_dma_buf.h | 37 ++ 5 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 drivers/virtio/virtio_dma_buf.c create mode 100644 include/linux/virtio_dma_buf.h diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index 29a1386ecc03..ecdae5b596de 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o +obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o virtio_dma_buf.o obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index a977e32a88f2..5d46f0ded92d 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -357,6 +357,12 @@ int register_virtio_device(struct virtio_device *dev) } EXPORT_SYMBOL_GPL(register_virtio_device); +bool is_virtio_device(struct device *dev) +{ + return dev->bus == _bus; +} +EXPORT_SYMBOL_GPL(is_virtio_device); + void unregister_virtio_device(struct virtio_device *dev) { int index = dev->index; /* save for after device release */ diff --git a/drivers/virtio/virtio_dma_buf.c b/drivers/virtio/virtio_dma_buf.c new file mode 100644 index ..45d6e8647dcf --- /dev/null +++ b/drivers/virtio/virtio_dma_buf.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * dma-bufs for virtio exported objects + * + * Copyright (C) 2020 Google, Inc. + */ + +#include + +/** + * virtio_dma_buf_export - Creates a new dma-buf for a virtio exported object + * @exp_info: [in] see dma_buf_export(). ops MUST refer to a dma_buf_ops + * struct embedded in a virtio_dma_buf_ops. + * + * This wraps dma_buf_export() to allow virtio drivers to create a dma-buf + * for an virtio exported object that can be queried by other virtio drivers + * for the object's UUID. + */ +struct dma_buf *virtio_dma_buf_export + (const struct dma_buf_export_info *exp_info) +{ + const struct virtio_dma_buf_ops *virtio_ops = + container_of(exp_info->ops, +const struct virtio_dma_buf_ops, ops); + + if (!exp_info->ops || + exp_info->ops->attach != _dma_buf_attach || + !virtio_ops->get_uuid) { + return ERR_PTR(-EINVAL); + } + + return dma_buf_export(exp_info); +} +EXPORT_SYMBOL(virtio_dma_buf_export); + +/** + * virtio_dma_buf_attach - mandatory attach callback for virtio dma-bufs + */ +int virtio_dma_buf_attach(struct dma_buf *dma_buf, + struct dma_buf_attachment *attach) +{ + int ret; + const struct virtio_dma_buf_ops *ops = + container_of(dma_buf->ops, +const struct virtio_dma_buf_ops, ops); + + if (ops->device_attach) { + ret = ops->device_attach(dma_buf, attach); + if (ret) + return ret; + } + return 0; +} +EXPORT_SYMBOL(virtio_dma_buf_attach); + +/** + * is_virtio_dma_buf - returns true if the given dma-buf is a virtio dma-buf + * @dma_buf: buffer to query + */ +bool is_virtio_dma_buf(struct dma_buf *dma_buf) +{ + return dma_buf->ops->attach == _dma_buf_attach; +} +EXPORT_SYMBOL(is_virtio_dma_buf); + +/** + * virtio_dma_buf_get_uuid - gets a virtio dma-buf's exported object's uuid + * @dma_buf: [in] buffer to query + * @uuid: [out] the uuid + * + * Returns: 0 on success, negative on failure. + */ +int virtio_dma_buf_get_uuid(struct dma_buf *dma_buf, + uuid_t *uuid) +{ + const struct virtio_dma_buf_ops *ops = + container_of(dma_buf->ops, +const struct virtio_dma_buf_ops, ops); + + if (!is_virtio_dma_buf(dma_buf)) + return -EINVAL; + + return ops->get_uuid(dma_buf, uuid); +} +EXPORT_SYMBOL(virtio_dma_buf_get_uuid); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 15f906e4a748..9397e25616c4 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -128,6 +128,7 @@ static inline struct virtio_device *dev_to_virtio(struct device *_dev) void virtio_add_status(struct virtio_device *dev, unsigned int status); int register_virtio_device(struct virtio_device *dev); void unregister_virtio_device(struct virtio_device *dev); +bool is_virtio_device(struct device *dev); void virtio_break_device(struct v
[PATCH v7 3/3] drm/virtio: Support virtgpu exported resources
Add support for UUID-based resource sharing mechanism to virtgpu. This implements the new virtgpu commands and hooks them up to dma-buf's get_uuid callback. Signed-off-by: David Stevens --- drivers/gpu/drm/virtio/virtgpu_drv.c | 3 + drivers/gpu/drm/virtio/virtgpu_drv.h | 21 ++ drivers/gpu/drm/virtio/virtgpu_kms.c | 4 ++ drivers/gpu/drm/virtio/virtgpu_prime.c | 96 +- drivers/gpu/drm/virtio/virtgpu_vq.c| 55 +++ 5 files changed, 176 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index ab4bed78e656..b039f493bda9 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -165,6 +165,7 @@ static unsigned int features[] = { VIRTIO_GPU_F_VIRGL, #endif VIRTIO_GPU_F_EDID, + VIRTIO_GPU_F_RESOURCE_UUID, }; static struct virtio_driver virtio_gpu_driver = { .feature_table = features, @@ -202,6 +203,8 @@ static struct drm_driver driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_mmap = drm_gem_prime_mmap, + .gem_prime_export = virtgpu_gem_prime_export, + .gem_prime_import = virtgpu_gem_prime_import, .gem_prime_import_sg_table = virtgpu_gem_prime_import_sg_table, .gem_create_object = virtio_gpu_create_object, diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 49bebdee6d91..cf54b89d9ab1 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -49,6 +49,10 @@ #define DRIVER_MINOR 1 #define DRIVER_PATCHLEVEL 0 +#define UUID_INITIALIZING 0 +#define UUID_INITIALIZED 1 +#define UUID_INITIALIZATION_FAILED 2 + struct virtio_gpu_object_params { uint32_t format; uint32_t width; @@ -71,6 +75,9 @@ struct virtio_gpu_object { uint32_t hw_res_handle; bool dumb; bool created; + + int uuid_state; + uuid_t uuid; }; #define gem_to_virtio_gpu_obj(gobj) \ container_of((gobj), struct virtio_gpu_object, base.base) @@ -200,6 +207,7 @@ struct virtio_gpu_device { bool has_virgl_3d; bool has_edid; bool has_indirect; + bool has_resource_assign_uuid; struct work_struct config_changed_work; @@ -210,6 +218,9 @@ struct virtio_gpu_device { struct virtio_gpu_drv_capset *capsets; uint32_t num_capsets; struct list_head cap_cache; + + /* protects resource state when exporting */ + spinlock_t resource_export_lock; }; struct virtio_gpu_fpriv { @@ -335,6 +346,10 @@ void virtio_gpu_dequeue_fence_func(struct work_struct *work); void virtio_gpu_notify(struct virtio_gpu_device *vgdev); +int +virtio_gpu_cmd_resource_assign_uuid(struct virtio_gpu_device *vgdev, + struct virtio_gpu_object_array *objs); + /* virtgpu_display.c */ void virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev); void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev); @@ -366,6 +381,12 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev, bool virtio_gpu_is_shmem(struct virtio_gpu_object *bo); /* virtgpu_prime.c */ +struct dma_buf *virtgpu_gem_prime_export(struct drm_gem_object *obj, +int flags); +struct drm_gem_object *virtgpu_gem_prime_import(struct drm_device *dev, + struct dma_buf *buf); +int virtgpu_gem_prime_get_uuid(struct drm_gem_object *obj, + uuid_t *uuid); struct drm_gem_object *virtgpu_gem_prime_import_sg_table( struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sgt); diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index 023a030ca7b9..7bcd0c75effa 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -125,6 +125,7 @@ int virtio_gpu_init(struct drm_device *dev) vgdev->dev = dev->dev; spin_lock_init(>display_info_lock); + spin_lock_init(>resource_export_lock); ida_init(>ctx_id_ida); ida_init(>resource_ida); init_waitqueue_head(>resp_wq); @@ -153,6 +154,9 @@ int virtio_gpu_init(struct drm_device *dev) if (virtio_has_feature(vgdev->vdev, VIRTIO_RING_F_INDIRECT_DESC)) { vgdev->has_indirect = true; } + if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_RESOURCE_UUID)) { + vgdev->has_resource_assign_uuid = true; + } DRM_INFO("features: %cvirgl %cedid\n", vgdev->has_virgl_3d ? '+' : '-', diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c b/drivers/gpu/drm/virtio/virtgpu_prime.c index 050d24c39a8f..acd14ef73d56 100644 --- a/drivers/gpu/drm/virti
Re: [PATCH v6 0/3] Support virtio cross-device resources
> Hmm, checkpatch still complains, full log below. > > IIRC "dim checkpatch" runs scripts/checkpatch.pl with --strict > so it is a bit more picky ... Ah, I didn't know --strict was being used. I'll send an update momentarily. Sorry for the churn. > -:250: CHECK:PREFER_KERNEL_TYPES: Prefer kernel type 'u32' over 'uint32_t' > #250: FILE: drivers/gpu/drm/virtio/virtgpu_vq.c:1118: > + uint32_t resp_type = le32_to_cpu(resp->hdr.type); > For consistency with the rest of the virtgpu code, I'll leave uint32_t. Cheers, David
Re: [virtio-dev] Re: [PATCH v5 0/3] Support virtio cross-device resources
On Mon, Aug 17, 2020 at 4:12 AM Gerd Hoffmann wrote: > > On Mon, Aug 17, 2020 at 12:50:08PM +0200, Gerd Hoffmann wrote: > > On Tue, Jun 23, 2020 at 10:31:28AM +0900, David Stevens wrote: > > > Unless there are any remaining objections to these patches, what are > > > the next steps towards getting these merged? Sorry, I'm not familiar > > > with the workflow for contributing patches to Linux. > > > > Sorry, just have been busy and not paying as much attention to drm > > patches as usual. Playing catch-up now. Queued for drm-misc-next, > > unless something goes wrong in my testing the patches should land > > in linux-next soon and be merged upstream in the next merge window. > > Oh, spoke too soon. scripts/checkpatch.pl has a bunch of codestyle > warnings. Can you fix them and resend? Sent a new version to fix the line length warning. There's still a MAINTAINER warning, but I think the new virtio_dma_buf file can fall under virtio core (and the existing wildcards do match it), rather than get its own MAINTAINER entry. I can break it out into its own thing if that's better, though. -David
[PATCH v6 0/3] Support virtio cross-device resources
This patchset implements the current proposal for virtio cross-device resource sharing [1]. It will be used to import virtio resources into the virtio-video driver currently under discussion [2]. The patch under consideration to add support in the virtio-video driver is [3]. It uses the APIs from v3 of this series, but the changes to update it are relatively minor. This patchset adds a new flavor of dma-bufs that supports querying the underlying virtio object UUID, as well as adding support for exporting resources from virtgpu. [1] https://markmail.org/thread/2ypjt5cfeu3m6lxu [2] https://markmail.org/thread/p5d3k566srtdtute [3] https://markmail.org/thread/j4xlqaaim266qpks v5 -> v6 changes: - Fix >80 character comment David Stevens (3): virtio: add dma-buf support for exported objects virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature drm/virtio: Support virtgpu exported resources drivers/gpu/drm/virtio/virtgpu_drv.c | 3 + drivers/gpu/drm/virtio/virtgpu_drv.h | 20 ++ drivers/gpu/drm/virtio/virtgpu_kms.c | 4 ++ drivers/gpu/drm/virtio/virtgpu_prime.c | 96 +- drivers/gpu/drm/virtio/virtgpu_vq.c| 55 +++ drivers/virtio/Makefile| 2 +- drivers/virtio/virtio.c| 6 ++ drivers/virtio/virtio_dma_buf.c| 82 ++ include/linux/virtio.h | 1 + include/linux/virtio_dma_buf.h | 37 ++ include/uapi/linux/virtio_gpu.h| 19 + 11 files changed, 321 insertions(+), 4 deletions(-) create mode 100644 drivers/virtio/virtio_dma_buf.c create mode 100644 include/linux/virtio_dma_buf.h -- 2.28.0.220.ged08abb693-goog
[PATCH v6 1/3] virtio: add dma-buf support for exported objects
This change adds a new flavor of dma-bufs that can be used by virtio drivers to share exported objects. A virtio dma-buf can be queried by virtio drivers to obtain the UUID which identifies the underlying exported object. Signed-off-by: David Stevens --- drivers/virtio/Makefile | 2 +- drivers/virtio/virtio.c | 6 +++ drivers/virtio/virtio_dma_buf.c | 82 + include/linux/virtio.h | 1 + include/linux/virtio_dma_buf.h | 37 +++ 5 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 drivers/virtio/virtio_dma_buf.c create mode 100644 include/linux/virtio_dma_buf.h diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index 29a1386ecc03..ecdae5b596de 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o +obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o virtio_dma_buf.o obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index a977e32a88f2..5d46f0ded92d 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -357,6 +357,12 @@ int register_virtio_device(struct virtio_device *dev) } EXPORT_SYMBOL_GPL(register_virtio_device); +bool is_virtio_device(struct device *dev) +{ + return dev->bus == _bus; +} +EXPORT_SYMBOL_GPL(is_virtio_device); + void unregister_virtio_device(struct virtio_device *dev) { int index = dev->index; /* save for after device release */ diff --git a/drivers/virtio/virtio_dma_buf.c b/drivers/virtio/virtio_dma_buf.c new file mode 100644 index ..63e6b1908397 --- /dev/null +++ b/drivers/virtio/virtio_dma_buf.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * dma-bufs for virtio exported objects + * + * Copyright (C) 2020 Google, Inc. + */ + +#include + +/** + * virtio_dma_buf_export - Creates a new dma-buf for a virtio exported object + * @exp_info: [in] see dma_buf_export(). ops MUST refer to a dma_buf_ops + * struct embedded in a virtio_dma_buf_ops. + * + * This wraps dma_buf_export() to allow virtio drivers to create a dma-buf + * for an virtio exported object that can be queried by other virtio drivers + * for the object's UUID. + */ +struct dma_buf *virtio_dma_buf_export( + const struct dma_buf_export_info *exp_info) +{ + const struct virtio_dma_buf_ops *virtio_ops = container_of( + exp_info->ops, const struct virtio_dma_buf_ops, ops); + + if (!exp_info->ops + || exp_info->ops->attach != _dma_buf_attach + || !virtio_ops->get_uuid) { + return ERR_PTR(-EINVAL); + } + + return dma_buf_export(exp_info); +} +EXPORT_SYMBOL(virtio_dma_buf_export); + +/** + * virtio_dma_buf_attach - mandatory attach callback for virtio dma-bufs + */ +int virtio_dma_buf_attach(struct dma_buf *dma_buf, + struct dma_buf_attachment *attach) +{ + int ret; + const struct virtio_dma_buf_ops *ops = container_of( + dma_buf->ops, const struct virtio_dma_buf_ops, ops); + + if (ops->device_attach) { + ret = ops->device_attach(dma_buf, attach); + if (ret) + return ret; + } + return 0; +} +EXPORT_SYMBOL(virtio_dma_buf_attach); + +/** + * is_virtio_dma_buf - returns true if the given dma-buf is a virtio dma-buf + * @dma_buf: buffer to query + */ +bool is_virtio_dma_buf(struct dma_buf *dma_buf) +{ + return dma_buf->ops->attach == _dma_buf_attach; +} +EXPORT_SYMBOL(is_virtio_dma_buf); + +/** + * virtio_dma_buf_get_uuid - gets a virtio dma-buf's exported object's uuid + * @dma_buf: [in] buffer to query + * @uuid: [out] the uuid + * + * Returns: 0 on success, negative on failure. + */ +int virtio_dma_buf_get_uuid(struct dma_buf *dma_buf, + uuid_t *uuid) +{ + const struct virtio_dma_buf_ops *ops = container_of( + dma_buf->ops, const struct virtio_dma_buf_ops, ops); + + if (!is_virtio_dma_buf(dma_buf)) + return -EINVAL; + + return ops->get_uuid(dma_buf, uuid); +} +EXPORT_SYMBOL(virtio_dma_buf_get_uuid); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 15f906e4a748..9397e25616c4 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -128,6 +128,7 @@ static inline struct virtio_device *dev_to_virtio(struct device *_dev) void virtio_add_status(struct virtio_device *dev, unsigned int status); int register_virtio_device(struct virtio_device *dev); void unregister_virtio_device(struct virtio_device *dev); +bool is_virtio_device(struct device *dev); void virtio_break_device(struct virtio_device *dev); diff --git a/incl
[PATCH v6 3/3] drm/virtio: Support virtgpu exported resources
Add support for UUID-based resource sharing mechanism to virtgpu. This implements the new virtgpu commands and hooks them up to dma-buf's get_uuid callback. Signed-off-by: David Stevens --- drivers/gpu/drm/virtio/virtgpu_drv.c | 3 + drivers/gpu/drm/virtio/virtgpu_drv.h | 20 ++ drivers/gpu/drm/virtio/virtgpu_kms.c | 4 ++ drivers/gpu/drm/virtio/virtgpu_prime.c | 96 +- drivers/gpu/drm/virtio/virtgpu_vq.c| 55 +++ 5 files changed, 175 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index ab4bed78e656..b039f493bda9 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -165,6 +165,7 @@ static unsigned int features[] = { VIRTIO_GPU_F_VIRGL, #endif VIRTIO_GPU_F_EDID, + VIRTIO_GPU_F_RESOURCE_UUID, }; static struct virtio_driver virtio_gpu_driver = { .feature_table = features, @@ -202,6 +203,8 @@ static struct drm_driver driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_mmap = drm_gem_prime_mmap, + .gem_prime_export = virtgpu_gem_prime_export, + .gem_prime_import = virtgpu_gem_prime_import, .gem_prime_import_sg_table = virtgpu_gem_prime_import_sg_table, .gem_create_object = virtio_gpu_create_object, diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 49bebdee6d91..39dc907aa805 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -49,6 +49,10 @@ #define DRIVER_MINOR 1 #define DRIVER_PATCHLEVEL 0 +#define UUID_INITIALIZING 0 +#define UUID_INITIALIZED 1 +#define UUID_INITIALIZATION_FAILED 2 + struct virtio_gpu_object_params { uint32_t format; uint32_t width; @@ -71,6 +75,9 @@ struct virtio_gpu_object { uint32_t hw_res_handle; bool dumb; bool created; + + int uuid_state; + uuid_t uuid; }; #define gem_to_virtio_gpu_obj(gobj) \ container_of((gobj), struct virtio_gpu_object, base.base) @@ -200,6 +207,7 @@ struct virtio_gpu_device { bool has_virgl_3d; bool has_edid; bool has_indirect; + bool has_resource_assign_uuid; struct work_struct config_changed_work; @@ -210,6 +218,8 @@ struct virtio_gpu_device { struct virtio_gpu_drv_capset *capsets; uint32_t num_capsets; struct list_head cap_cache; + + spinlock_t resource_export_lock; }; struct virtio_gpu_fpriv { @@ -335,6 +345,10 @@ void virtio_gpu_dequeue_fence_func(struct work_struct *work); void virtio_gpu_notify(struct virtio_gpu_device *vgdev); +int +virtio_gpu_cmd_resource_assign_uuid(struct virtio_gpu_device *vgdev, + struct virtio_gpu_object_array *objs); + /* virtgpu_display.c */ void virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev); void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev); @@ -366,6 +380,12 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev, bool virtio_gpu_is_shmem(struct virtio_gpu_object *bo); /* virtgpu_prime.c */ +struct dma_buf *virtgpu_gem_prime_export(struct drm_gem_object *obj, +int flags); +struct drm_gem_object *virtgpu_gem_prime_import(struct drm_device *dev, + struct dma_buf *buf); +int virtgpu_gem_prime_get_uuid(struct drm_gem_object *obj, + uuid_t *uuid); struct drm_gem_object *virtgpu_gem_prime_import_sg_table( struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sgt); diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index 023a030ca7b9..7bcd0c75effa 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -125,6 +125,7 @@ int virtio_gpu_init(struct drm_device *dev) vgdev->dev = dev->dev; spin_lock_init(>display_info_lock); + spin_lock_init(>resource_export_lock); ida_init(>ctx_id_ida); ida_init(>resource_ida); init_waitqueue_head(>resp_wq); @@ -153,6 +154,9 @@ int virtio_gpu_init(struct drm_device *dev) if (virtio_has_feature(vgdev->vdev, VIRTIO_RING_F_INDIRECT_DESC)) { vgdev->has_indirect = true; } + if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_RESOURCE_UUID)) { + vgdev->has_resource_assign_uuid = true; + } DRM_INFO("features: %cvirgl %cedid\n", vgdev->has_virgl_3d ? '+' : '-', diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c b/drivers/gpu/drm/virtio/virtgpu_prime.c index 050d24c39a8f..acd14ef73d56 100644 --- a/drivers/gpu/drm/virtio/virtgpu_prime.c +++ b/drivers/gpu/drm/virtio/virtgpu_prime.c @@
[PATCH v6 2/3] virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature
This feature allows the guest to request a UUID from the host for a particular virtio_gpu resource. The UUID can then be shared with other virtio devices, to allow the other host devices to access the virtio_gpu's corresponding host resource. Signed-off-by: David Stevens --- include/uapi/linux/virtio_gpu.h | 19 +++ 1 file changed, 19 insertions(+) diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index 0c85914d9369..9721d58b4d58 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -50,6 +50,10 @@ * VIRTIO_GPU_CMD_GET_EDID */ #define VIRTIO_GPU_F_EDID1 +/* + * VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID + */ +#define VIRTIO_GPU_F_RESOURCE_UUID 2 enum virtio_gpu_ctrl_type { VIRTIO_GPU_UNDEFINED = 0, @@ -66,6 +70,7 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_CMD_GET_CAPSET_INFO, VIRTIO_GPU_CMD_GET_CAPSET, VIRTIO_GPU_CMD_GET_EDID, + VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID, /* 3d commands */ VIRTIO_GPU_CMD_CTX_CREATE = 0x0200, @@ -87,6 +92,7 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_RESP_OK_CAPSET_INFO, VIRTIO_GPU_RESP_OK_CAPSET, VIRTIO_GPU_RESP_OK_EDID, + VIRTIO_GPU_RESP_OK_RESOURCE_UUID, /* error responses */ VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200, @@ -340,4 +346,17 @@ enum virtio_gpu_formats { VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM = 134, }; +/* VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID */ +struct virtio_gpu_resource_assign_uuid { + struct virtio_gpu_ctrl_hdr hdr; + __le32 resource_id; + __le32 padding; +}; + +/* VIRTIO_GPU_RESP_OK_RESOURCE_UUID */ +struct virtio_gpu_resp_resource_uuid { + struct virtio_gpu_ctrl_hdr hdr; + __u8 uuid[16]; +}; + #endif -- 2.28.0.220.ged08abb693-goog
Re: [virtio-dev] Re: [PATCH v5 0/3] Support virtio cross-device resources
Unless there are any remaining objections to these patches, what are the next steps towards getting these merged? Sorry, I'm not familiar with the workflow for contributing patches to Linux. Thanks, David On Tue, Jun 9, 2020 at 6:53 PM Michael S. Tsirkin wrote: > > On Tue, Jun 09, 2020 at 10:25:15AM +0900, David Stevens wrote: > > This patchset implements the current proposal for virtio cross-device > > resource sharing [1]. It will be used to import virtio resources into > > the virtio-video driver currently under discussion [2]. The patch > > under consideration to add support in the virtio-video driver is [3]. > > It uses the APIs from v3 of this series, but the changes to update it > > are relatively minor. > > > > This patchset adds a new flavor of dma-bufs that supports querying the > > underlying virtio object UUID, as well as adding support for exporting > > resources from virtgpu. > > Gerd, David, if possible, please test this in configuration with > virtual VTD enabled but with iommu_platform=off > to make sure we didn't break this config. > > > Besides that, for virtio parts: > > Acked-by: Michael S. Tsirkin > > > > [1] https://markmail.org/thread/2ypjt5cfeu3m6lxu > > [2] https://markmail.org/thread/p5d3k566srtdtute > > [3] https://markmail.org/thread/j4xlqaaim266qpks > > > > v4 -> v5 changes: > > - Remove virtio_dma_buf_export_info. > > > > David Stevens (3): > > virtio: add dma-buf support for exported objects > > virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature > > drm/virtio: Support virtgpu exported resources > > > > drivers/gpu/drm/virtio/virtgpu_drv.c | 3 + > > drivers/gpu/drm/virtio/virtgpu_drv.h | 20 ++ > > drivers/gpu/drm/virtio/virtgpu_kms.c | 4 ++ > > drivers/gpu/drm/virtio/virtgpu_prime.c | 96 +- > > drivers/gpu/drm/virtio/virtgpu_vq.c| 55 +++ > > drivers/virtio/Makefile| 2 +- > > drivers/virtio/virtio.c| 6 ++ > > drivers/virtio/virtio_dma_buf.c| 82 ++ > > include/linux/virtio.h | 1 + > > include/linux/virtio_dma_buf.h | 37 ++ > > include/uapi/linux/virtio_gpu.h| 19 + > > 11 files changed, 321 insertions(+), 4 deletions(-) > > create mode 100644 drivers/virtio/virtio_dma_buf.c > > create mode 100644 include/linux/virtio_dma_buf.h > > > > -- > > 2.27.0.278.ge193c7cf3a9-goog > > > - > To unsubscribe, e-mail: virtio-dev-unsubscr...@lists.oasis-open.org > For additional commands, e-mail: virtio-dev-h...@lists.oasis-open.org >
Re: [PATCH v4 1/3] virtio: add dma-buf support for exported objects
On Thu, Jun 18, 2020 at 9:29 PM Guennadi Liakhovetski wrote: > > Hi Michael, > > On Thu, Jun 04, 2020 at 03:05:23PM -0400, Michael S. Tsirkin wrote: > > On Tue, May 26, 2020 at 07:58:09PM +0900, David Stevens wrote: > > > This change adds a new flavor of dma-bufs that can be used by virtio > > > drivers to share exported objects. A virtio dma-buf can be queried by > > > virtio drivers to obtain the UUID which identifies the underlying > > > exported object. > > > > > > Signed-off-by: David Stevens > > > > Is this just for graphics? If yes I'd rather we put it in the graphics > > driver. We can always move it later ... > > Wouldn't this be the API that audio virtualisation will have to use to share > buffers between the host and any guests? The new flavor of dma-buf isn't directly related to sharing buffers between the host and the guest. The purpose of this API is to help share buffers between multiple virtio devices - e.g. share buffers created by a virito-gpu device with a virito-video device. In particular, the new flavor of dma-buf provides a mechanism to attach identifying metadata to a dma-buf object that is shared between different virtio drivers in a single guest. This identifying metadata can then be passed to the importing device and used to fetch some resource from the exporting device. But the new flavor of dma-buf is an abstraction within the guest kernel, independent of the host-guest boundary, and it's definitely not necessary if we're only talking about a single virtio subsystem. -David > Thanks > Guennadi > > > > --- > > > drivers/virtio/Makefile | 2 +- > > > drivers/virtio/virtio.c | 6 +++ > > > drivers/virtio/virtio_dma_buf.c | 89 + > > > include/linux/virtio.h | 1 + > > > include/linux/virtio_dma_buf.h | 58 + > > > 5 files changed, 155 insertions(+), 1 deletion(-) > > > create mode 100644 drivers/virtio/virtio_dma_buf.c > > > create mode 100644 include/linux/virtio_dma_buf.h > > > > > > diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile > > > index 29a1386ecc03..ecdae5b596de 100644 > > > --- a/drivers/virtio/Makefile > > > +++ b/drivers/virtio/Makefile > > > @@ -1,5 +1,5 @@ > > > # SPDX-License-Identifier: GPL-2.0 > > > -obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o > > > +obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o virtio_dma_buf.o > > > obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o > > > obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o > > > virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c > > > index a977e32a88f2..5d46f0ded92d 100644 > > > --- a/drivers/virtio/virtio.c > > > +++ b/drivers/virtio/virtio.c > > > @@ -357,6 +357,12 @@ int register_virtio_device(struct virtio_device *dev) > > > } > > > EXPORT_SYMBOL_GPL(register_virtio_device); > > > > > > +bool is_virtio_device(struct device *dev) > > > +{ > > > + return dev->bus == _bus; > > > +} > > > +EXPORT_SYMBOL_GPL(is_virtio_device); > > > + > > > void unregister_virtio_device(struct virtio_device *dev) > > > { > > > int index = dev->index; /* save for after device release */ > > > diff --git a/drivers/virtio/virtio_dma_buf.c > > > b/drivers/virtio/virtio_dma_buf.c > > > new file mode 100644 > > > index ..23e3399b11ed > > > --- /dev/null > > > +++ b/drivers/virtio/virtio_dma_buf.c > > > @@ -0,0 +1,89 @@ > > > +// SPDX-License-Identifier: GPL-2.0-or-later > > > +/* > > > + * dma-bufs for virtio exported objects > > > + * > > > + * Copyright (C) 2020 Google, Inc. > > > + */ > > > + > > > +#include > > > + > > > +/** > > > + * virtio_dma_buf_export - Creates a new dma-buf for a virtio exported > > > object > > > + * > > > + * This wraps dma_buf_export() to allow virtio drivers to create a > > > dma-buf > > > + * for an virtio exported object that can be queried by other virtio > > > drivers > > > + * for the object's UUID. > > > + */ > > > +struct dma_buf *virtio_dma_buf_export( > > > + const struct virtio_dma_buf_export_info *virtio_exp_info) > > > +{ > > > + struct dma_buf_export_info exp_info; > > > + > > > + if (!virtio_exp_info->ops > > > + || virt
[PATCH v5 1/3] virtio: add dma-buf support for exported objects
This change adds a new flavor of dma-bufs that can be used by virtio drivers to share exported objects. A virtio dma-buf can be queried by virtio drivers to obtain the UUID which identifies the underlying exported object. Signed-off-by: David Stevens --- drivers/virtio/Makefile | 2 +- drivers/virtio/virtio.c | 6 +++ drivers/virtio/virtio_dma_buf.c | 82 + include/linux/virtio.h | 1 + include/linux/virtio_dma_buf.h | 37 +++ 5 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 drivers/virtio/virtio_dma_buf.c create mode 100644 include/linux/virtio_dma_buf.h diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index 29a1386ecc03..ecdae5b596de 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o +obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o virtio_dma_buf.o obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index a977e32a88f2..5d46f0ded92d 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -357,6 +357,12 @@ int register_virtio_device(struct virtio_device *dev) } EXPORT_SYMBOL_GPL(register_virtio_device); +bool is_virtio_device(struct device *dev) +{ + return dev->bus == _bus; +} +EXPORT_SYMBOL_GPL(is_virtio_device); + void unregister_virtio_device(struct virtio_device *dev) { int index = dev->index; /* save for after device release */ diff --git a/drivers/virtio/virtio_dma_buf.c b/drivers/virtio/virtio_dma_buf.c new file mode 100644 index ..fa0d3a668f53 --- /dev/null +++ b/drivers/virtio/virtio_dma_buf.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * dma-bufs for virtio exported objects + * + * Copyright (C) 2020 Google, Inc. + */ + +#include + +/** + * virtio_dma_buf_export - Creates a new dma-buf for a virtio exported object + * @exp_info: [in] see dma_buf_export(). ops MUST refer to a dma_buf_ops + * struct embedded in a virtio_dma_buf_ops. + * + * This wraps dma_buf_export() to allow virtio drivers to create a dma-buf + * for an virtio exported object that can be queried by other virtio drivers + * for the object's UUID. + */ +struct dma_buf *virtio_dma_buf_export( + const struct dma_buf_export_info *exp_info) +{ + const struct virtio_dma_buf_ops *virtio_ops = container_of( + exp_info->ops, const struct virtio_dma_buf_ops, ops); + + if (!exp_info->ops + || exp_info->ops->attach != _dma_buf_attach + || !virtio_ops->get_uuid) { + return ERR_PTR(-EINVAL); + } + + return dma_buf_export(exp_info); +} +EXPORT_SYMBOL(virtio_dma_buf_export); + +/** + * virtio_dma_buf_attach - mandatory attach callback for virtio dma-bufs + */ +int virtio_dma_buf_attach(struct dma_buf *dma_buf, + struct dma_buf_attachment *attach) +{ + int ret; + const struct virtio_dma_buf_ops *ops = container_of( + dma_buf->ops, const struct virtio_dma_buf_ops, ops); + + if (ops->device_attach) { + ret = ops->device_attach(dma_buf, attach); + if (ret) + return ret; + } + return 0; +} +EXPORT_SYMBOL(virtio_dma_buf_attach); + +/** + * is_virtio_dma_buf - returns true if the given dma-buf is a virtio dma-buf + * @dma_buf: buffer to query + */ +bool is_virtio_dma_buf(struct dma_buf *dma_buf) +{ + return dma_buf->ops->attach == _dma_buf_attach; +} +EXPORT_SYMBOL(is_virtio_dma_buf); + +/** + * virtio_dma_buf_get_uuid - gets the uuid of the virtio dma-buf's exported object + * @dma_buf: [in] buffer to query + * @uuid: [out] the uuid + * + * Returns: 0 on success, negative on failure. + */ +int virtio_dma_buf_get_uuid(struct dma_buf *dma_buf, + uuid_t *uuid) +{ + const struct virtio_dma_buf_ops *ops = container_of( + dma_buf->ops, const struct virtio_dma_buf_ops, ops); + + if (!is_virtio_dma_buf(dma_buf)) + return -EINVAL; + + return ops->get_uuid(dma_buf, uuid); +} +EXPORT_SYMBOL(virtio_dma_buf_get_uuid); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 15f906e4a748..9397e25616c4 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -128,6 +128,7 @@ static inline struct virtio_device *dev_to_virtio(struct device *_dev) void virtio_add_status(struct virtio_device *dev, unsigned int status); int register_virtio_device(struct virtio_device *dev); void unregister_virtio_device(struct virtio_device *dev); +bool is_virtio_device(struct device *dev); void virtio_break_device(struct virtio_device *dev); diff --git
[PATCH v5 3/3] drm/virtio: Support virtgpu exported resources
Add support for UUID-based resource sharing mechanism to virtgpu. This implements the new virtgpu commands and hooks them up to dma-buf's get_uuid callback. Signed-off-by: David Stevens --- drivers/gpu/drm/virtio/virtgpu_drv.c | 3 + drivers/gpu/drm/virtio/virtgpu_drv.h | 20 ++ drivers/gpu/drm/virtio/virtgpu_kms.c | 4 ++ drivers/gpu/drm/virtio/virtgpu_prime.c | 96 +- drivers/gpu/drm/virtio/virtgpu_vq.c| 55 +++ 5 files changed, 175 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index ab4bed78e656..b039f493bda9 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -165,6 +165,7 @@ static unsigned int features[] = { VIRTIO_GPU_F_VIRGL, #endif VIRTIO_GPU_F_EDID, + VIRTIO_GPU_F_RESOURCE_UUID, }; static struct virtio_driver virtio_gpu_driver = { .feature_table = features, @@ -202,6 +203,8 @@ static struct drm_driver driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_mmap = drm_gem_prime_mmap, + .gem_prime_export = virtgpu_gem_prime_export, + .gem_prime_import = virtgpu_gem_prime_import, .gem_prime_import_sg_table = virtgpu_gem_prime_import_sg_table, .gem_create_object = virtio_gpu_create_object, diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 49bebdee6d91..39dc907aa805 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -49,6 +49,10 @@ #define DRIVER_MINOR 1 #define DRIVER_PATCHLEVEL 0 +#define UUID_INITIALIZING 0 +#define UUID_INITIALIZED 1 +#define UUID_INITIALIZATION_FAILED 2 + struct virtio_gpu_object_params { uint32_t format; uint32_t width; @@ -71,6 +75,9 @@ struct virtio_gpu_object { uint32_t hw_res_handle; bool dumb; bool created; + + int uuid_state; + uuid_t uuid; }; #define gem_to_virtio_gpu_obj(gobj) \ container_of((gobj), struct virtio_gpu_object, base.base) @@ -200,6 +207,7 @@ struct virtio_gpu_device { bool has_virgl_3d; bool has_edid; bool has_indirect; + bool has_resource_assign_uuid; struct work_struct config_changed_work; @@ -210,6 +218,8 @@ struct virtio_gpu_device { struct virtio_gpu_drv_capset *capsets; uint32_t num_capsets; struct list_head cap_cache; + + spinlock_t resource_export_lock; }; struct virtio_gpu_fpriv { @@ -335,6 +345,10 @@ void virtio_gpu_dequeue_fence_func(struct work_struct *work); void virtio_gpu_notify(struct virtio_gpu_device *vgdev); +int +virtio_gpu_cmd_resource_assign_uuid(struct virtio_gpu_device *vgdev, + struct virtio_gpu_object_array *objs); + /* virtgpu_display.c */ void virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev); void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev); @@ -366,6 +380,12 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev, bool virtio_gpu_is_shmem(struct virtio_gpu_object *bo); /* virtgpu_prime.c */ +struct dma_buf *virtgpu_gem_prime_export(struct drm_gem_object *obj, +int flags); +struct drm_gem_object *virtgpu_gem_prime_import(struct drm_device *dev, + struct dma_buf *buf); +int virtgpu_gem_prime_get_uuid(struct drm_gem_object *obj, + uuid_t *uuid); struct drm_gem_object *virtgpu_gem_prime_import_sg_table( struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sgt); diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index 023a030ca7b9..7bcd0c75effa 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -125,6 +125,7 @@ int virtio_gpu_init(struct drm_device *dev) vgdev->dev = dev->dev; spin_lock_init(>display_info_lock); + spin_lock_init(>resource_export_lock); ida_init(>ctx_id_ida); ida_init(>resource_ida); init_waitqueue_head(>resp_wq); @@ -153,6 +154,9 @@ int virtio_gpu_init(struct drm_device *dev) if (virtio_has_feature(vgdev->vdev, VIRTIO_RING_F_INDIRECT_DESC)) { vgdev->has_indirect = true; } + if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_RESOURCE_UUID)) { + vgdev->has_resource_assign_uuid = true; + } DRM_INFO("features: %cvirgl %cedid\n", vgdev->has_virgl_3d ? '+' : '-', diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c b/drivers/gpu/drm/virtio/virtgpu_prime.c index 050d24c39a8f..acd14ef73d56 100644 --- a/drivers/gpu/drm/virtio/virtgpu_prime.c +++ b/drivers/gpu/drm/virtio/virtgpu_prime.c @@
[PATCH v5 2/3] virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature
This feature allows the guest to request a UUID from the host for a particular virtio_gpu resource. The UUID can then be shared with other virtio devices, to allow the other host devices to access the virtio_gpu's corresponding host resource. Signed-off-by: David Stevens --- include/uapi/linux/virtio_gpu.h | 19 +++ 1 file changed, 19 insertions(+) diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index 0c85914d9369..9721d58b4d58 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -50,6 +50,10 @@ * VIRTIO_GPU_CMD_GET_EDID */ #define VIRTIO_GPU_F_EDID1 +/* + * VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID + */ +#define VIRTIO_GPU_F_RESOURCE_UUID 2 enum virtio_gpu_ctrl_type { VIRTIO_GPU_UNDEFINED = 0, @@ -66,6 +70,7 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_CMD_GET_CAPSET_INFO, VIRTIO_GPU_CMD_GET_CAPSET, VIRTIO_GPU_CMD_GET_EDID, + VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID, /* 3d commands */ VIRTIO_GPU_CMD_CTX_CREATE = 0x0200, @@ -87,6 +92,7 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_RESP_OK_CAPSET_INFO, VIRTIO_GPU_RESP_OK_CAPSET, VIRTIO_GPU_RESP_OK_EDID, + VIRTIO_GPU_RESP_OK_RESOURCE_UUID, /* error responses */ VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200, @@ -340,4 +346,17 @@ enum virtio_gpu_formats { VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM = 134, }; +/* VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID */ +struct virtio_gpu_resource_assign_uuid { + struct virtio_gpu_ctrl_hdr hdr; + __le32 resource_id; + __le32 padding; +}; + +/* VIRTIO_GPU_RESP_OK_RESOURCE_UUID */ +struct virtio_gpu_resp_resource_uuid { + struct virtio_gpu_ctrl_hdr hdr; + __u8 uuid[16]; +}; + #endif -- 2.27.0.278.ge193c7cf3a9-goog
[PATCH v5 0/3] Support virtio cross-device resources
This patchset implements the current proposal for virtio cross-device resource sharing [1]. It will be used to import virtio resources into the virtio-video driver currently under discussion [2]. The patch under consideration to add support in the virtio-video driver is [3]. It uses the APIs from v3 of this series, but the changes to update it are relatively minor. This patchset adds a new flavor of dma-bufs that supports querying the underlying virtio object UUID, as well as adding support for exporting resources from virtgpu. [1] https://markmail.org/thread/2ypjt5cfeu3m6lxu [2] https://markmail.org/thread/p5d3k566srtdtute [3] https://markmail.org/thread/j4xlqaaim266qpks v4 -> v5 changes: - Remove virtio_dma_buf_export_info. David Stevens (3): virtio: add dma-buf support for exported objects virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature drm/virtio: Support virtgpu exported resources drivers/gpu/drm/virtio/virtgpu_drv.c | 3 + drivers/gpu/drm/virtio/virtgpu_drv.h | 20 ++ drivers/gpu/drm/virtio/virtgpu_kms.c | 4 ++ drivers/gpu/drm/virtio/virtgpu_prime.c | 96 +- drivers/gpu/drm/virtio/virtgpu_vq.c| 55 +++ drivers/virtio/Makefile| 2 +- drivers/virtio/virtio.c| 6 ++ drivers/virtio/virtio_dma_buf.c| 82 ++ include/linux/virtio.h | 1 + include/linux/virtio_dma_buf.h | 37 ++ include/uapi/linux/virtio_gpu.h| 19 + 11 files changed, 321 insertions(+), 4 deletions(-) create mode 100644 drivers/virtio/virtio_dma_buf.c create mode 100644 include/linux/virtio_dma_buf.h -- 2.27.0.278.ge193c7cf3a9-goog
Re: [PATCH v3 4/4] drm/virtio: Support virtgpu exported resources
On Mon, Jun 8, 2020 at 6:43 PM Michael S. Tsirkin wrote: > > On Fri, May 15, 2020 at 04:26:15PM +0900, David Stevens wrote: > > > > + if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_RESOURCE_UUID)) { > > > > + vgdev->has_resource_assign_uuid = true; > > > > + } > > > > > > > > > Just a question: this relies on DMA bufs so I assume it is > > > not really assumed to work when DMA API is bypassed, right? > > > Rather than worry what does it mean, how about just > > > disabling this feature without PLATFORM_DMA for now? > > > > By PLATFORM_DMA, do you mean CONFIG_DMA_SHARED_BUFFER? > > Sorry, no. I mean VIRTIO_F_IOMMU_PLATFORM which in the > future will be renamed to VIRTIO_F_PLATFORM_ACCESS. Shouldn't things work independent of whether or not that feature is set? If a virtio driver properly uses the dma_buf APIs (which virtgpu seems to), then that should take care of any mapping/synchronization related to VIRTIO_F_IOMMU_PLATFORM. If anything, the case where VIRTIO_F_IOMMU_PLATFORM isn't set is easier, since then we know that the "the device has same access [sic] to memory addresses supplied to it as the driver has", according to the specification. -David
Re: [PATCH v4 1/3] virtio: add dma-buf support for exported objects
On Mon, Jun 8, 2020 at 6:05 PM Michael S. Tsirkin wrote: > > On Mon, Jun 08, 2020 at 05:32:26PM +0900, David Stevens wrote: > > On Mon, Jun 8, 2020 at 3:00 PM Michael S. Tsirkin wrote: > > > > > > On Mon, Jun 08, 2020 at 10:33:09AM +0900, David Stevens wrote: > > > > On Sun, Jun 7, 2020 at 5:04 AM Michael S. Tsirkin > > > > wrote: > > > > > > > > > > On Fri, Jun 05, 2020 at 10:28:42AM +0900, David Stevens wrote: > > > > > > On Fri, Jun 5, 2020 at 4:05 AM Michael S. Tsirkin > > > > > > wrote: > > > > > > > > > > > > > > On Tue, May 26, 2020 at 07:58:09PM +0900, David Stevens wrote: > > > > > > > > This change adds a new flavor of dma-bufs that can be used by > > > > > > > > virtio > > > > > > > > drivers to share exported objects. A virtio dma-buf can be > > > > > > > > queried by > > > > > > > > virtio drivers to obtain the UUID which identifies the > > > > > > > > underlying > > > > > > > > exported object. > > > > > > > > > > > > > > > > Signed-off-by: David Stevens > > > > > > > > > > > > > > Is this just for graphics? If yes I'd rather we put it in the > > > > > > > graphics > > > > > > > driver. We can always move it later ... > > > > > > > > > > > > As stated in the cover letter, this will be used by virtio-video. > > > > > > > > > > > > The proposed virtio-video patches: > > > > > > https://markmail.org/thread/p5d3k566srtdtute > > > > > > The patch which imports these dma-bufs (slightly out of data, uses > > > > > > v3 > > > > > > of this patch set): https://markmail.org/thread/j4xlqaaim266qpks > > > > > > > > > > > > > > --- > > > > > > > > drivers/virtio/Makefile | 2 +- > > > > > > > > drivers/virtio/virtio.c | 6 +++ > > > > > > > > drivers/virtio/virtio_dma_buf.c | 89 > > > > > > > > + > > > > > > > > include/linux/virtio.h | 1 + > > > > > > > > include/linux/virtio_dma_buf.h | 58 + > > > > > > > > 5 files changed, 155 insertions(+), 1 deletion(-) > > > > > > > > create mode 100644 drivers/virtio/virtio_dma_buf.c > > > > > > > > create mode 100644 include/linux/virtio_dma_buf.h > > > > > > > > > > > > > > > > diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile > > > > > > > > index 29a1386ecc03..ecdae5b596de 100644 > > > > > > > > --- a/drivers/virtio/Makefile > > > > > > > > +++ b/drivers/virtio/Makefile > > > > > > > > @@ -1,5 +1,5 @@ > > > > > > > > # SPDX-License-Identifier: GPL-2.0 > > > > > > > > -obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o > > > > > > > > +obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o virtio_dma_buf.o > > > > > > > > obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o > > > > > > > > obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o > > > > > > > > virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o > > > > > > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c > > > > > > > > index a977e32a88f2..5d46f0ded92d 100644 > > > > > > > > --- a/drivers/virtio/virtio.c > > > > > > > > +++ b/drivers/virtio/virtio.c > > > > > > > > @@ -357,6 +357,12 @@ int register_virtio_device(struct > > > > > > > > virtio_device *dev) > > > > > > > > } > > > > > > > > EXPORT_SYMBOL_GPL(register_virtio_device); > > > > > > > > > > > > > > > > +bool is_virtio_device(struct device *dev) > > > > > > > > +{ > > > > > > > > + return dev->bus == _bus; > > > > > > > > +} > > > > > > > > +EXPORT_SYMBOL_GPL(is_virtio_device); > > > > > > > > + > > > > > >
Re: [PATCH v4 1/3] virtio: add dma-buf support for exported objects
On Mon, Jun 8, 2020 at 3:00 PM Michael S. Tsirkin wrote: > > On Mon, Jun 08, 2020 at 10:33:09AM +0900, David Stevens wrote: > > On Sun, Jun 7, 2020 at 5:04 AM Michael S. Tsirkin wrote: > > > > > > On Fri, Jun 05, 2020 at 10:28:42AM +0900, David Stevens wrote: > > > > On Fri, Jun 5, 2020 at 4:05 AM Michael S. Tsirkin > > > > wrote: > > > > > > > > > > On Tue, May 26, 2020 at 07:58:09PM +0900, David Stevens wrote: > > > > > > This change adds a new flavor of dma-bufs that can be used by virtio > > > > > > drivers to share exported objects. A virtio dma-buf can be queried > > > > > > by > > > > > > virtio drivers to obtain the UUID which identifies the underlying > > > > > > exported object. > > > > > > > > > > > > Signed-off-by: David Stevens > > > > > > > > > > Is this just for graphics? If yes I'd rather we put it in the graphics > > > > > driver. We can always move it later ... > > > > > > > > As stated in the cover letter, this will be used by virtio-video. > > > > > > > > The proposed virtio-video patches: > > > > https://markmail.org/thread/p5d3k566srtdtute > > > > The patch which imports these dma-bufs (slightly out of data, uses v3 > > > > of this patch set): https://markmail.org/thread/j4xlqaaim266qpks > > > > > > > > > > --- > > > > > > drivers/virtio/Makefile | 2 +- > > > > > > drivers/virtio/virtio.c | 6 +++ > > > > > > drivers/virtio/virtio_dma_buf.c | 89 > > > > > > + > > > > > > include/linux/virtio.h | 1 + > > > > > > include/linux/virtio_dma_buf.h | 58 + > > > > > > 5 files changed, 155 insertions(+), 1 deletion(-) > > > > > > create mode 100644 drivers/virtio/virtio_dma_buf.c > > > > > > create mode 100644 include/linux/virtio_dma_buf.h > > > > > > > > > > > > diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile > > > > > > index 29a1386ecc03..ecdae5b596de 100644 > > > > > > --- a/drivers/virtio/Makefile > > > > > > +++ b/drivers/virtio/Makefile > > > > > > @@ -1,5 +1,5 @@ > > > > > > # SPDX-License-Identifier: GPL-2.0 > > > > > > -obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o > > > > > > +obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o virtio_dma_buf.o > > > > > > obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o > > > > > > obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o > > > > > > virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o > > > > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c > > > > > > index a977e32a88f2..5d46f0ded92d 100644 > > > > > > --- a/drivers/virtio/virtio.c > > > > > > +++ b/drivers/virtio/virtio.c > > > > > > @@ -357,6 +357,12 @@ int register_virtio_device(struct > > > > > > virtio_device *dev) > > > > > > } > > > > > > EXPORT_SYMBOL_GPL(register_virtio_device); > > > > > > > > > > > > +bool is_virtio_device(struct device *dev) > > > > > > +{ > > > > > > + return dev->bus == _bus; > > > > > > +} > > > > > > +EXPORT_SYMBOL_GPL(is_virtio_device); > > > > > > + > > > > > > void unregister_virtio_device(struct virtio_device *dev) > > > > > > { > > > > > > int index = dev->index; /* save for after device release */ > > > > > > diff --git a/drivers/virtio/virtio_dma_buf.c > > > > > > b/drivers/virtio/virtio_dma_buf.c > > > > > > new file mode 100644 > > > > > > index ..23e3399b11ed > > > > > > --- /dev/null > > > > > > +++ b/drivers/virtio/virtio_dma_buf.c > > > > > > @@ -0,0 +1,89 @@ > > > > > > +// SPDX-License-Identifier: GPL-2.0-or-later > > > > > > +/* > > > > > > + * dma-bufs for virtio exported objects > > > > > > + * > > > > > > + * Copyright (C) 2020 Google, Inc. > > > > > > + */ > > > > > > + > &g
Re: [PATCH v4 1/3] virtio: add dma-buf support for exported objects
On Sun, Jun 7, 2020 at 5:04 AM Michael S. Tsirkin wrote: > > On Fri, Jun 05, 2020 at 10:28:42AM +0900, David Stevens wrote: > > On Fri, Jun 5, 2020 at 4:05 AM Michael S. Tsirkin wrote: > > > > > > On Tue, May 26, 2020 at 07:58:09PM +0900, David Stevens wrote: > > > > This change adds a new flavor of dma-bufs that can be used by virtio > > > > drivers to share exported objects. A virtio dma-buf can be queried by > > > > virtio drivers to obtain the UUID which identifies the underlying > > > > exported object. > > > > > > > > Signed-off-by: David Stevens > > > > > > Is this just for graphics? If yes I'd rather we put it in the graphics > > > driver. We can always move it later ... > > > > As stated in the cover letter, this will be used by virtio-video. > > > > The proposed virtio-video patches: > > https://markmail.org/thread/p5d3k566srtdtute > > The patch which imports these dma-bufs (slightly out of data, uses v3 > > of this patch set): https://markmail.org/thread/j4xlqaaim266qpks > > > > > > --- > > > > drivers/virtio/Makefile | 2 +- > > > > drivers/virtio/virtio.c | 6 +++ > > > > drivers/virtio/virtio_dma_buf.c | 89 + > > > > include/linux/virtio.h | 1 + > > > > include/linux/virtio_dma_buf.h | 58 + > > > > 5 files changed, 155 insertions(+), 1 deletion(-) > > > > create mode 100644 drivers/virtio/virtio_dma_buf.c > > > > create mode 100644 include/linux/virtio_dma_buf.h > > > > > > > > diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile > > > > index 29a1386ecc03..ecdae5b596de 100644 > > > > --- a/drivers/virtio/Makefile > > > > +++ b/drivers/virtio/Makefile > > > > @@ -1,5 +1,5 @@ > > > > # SPDX-License-Identifier: GPL-2.0 > > > > -obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o > > > > +obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o virtio_dma_buf.o > > > > obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o > > > > obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o > > > > virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o > > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c > > > > index a977e32a88f2..5d46f0ded92d 100644 > > > > --- a/drivers/virtio/virtio.c > > > > +++ b/drivers/virtio/virtio.c > > > > @@ -357,6 +357,12 @@ int register_virtio_device(struct virtio_device > > > > *dev) > > > > } > > > > EXPORT_SYMBOL_GPL(register_virtio_device); > > > > > > > > +bool is_virtio_device(struct device *dev) > > > > +{ > > > > + return dev->bus == _bus; > > > > +} > > > > +EXPORT_SYMBOL_GPL(is_virtio_device); > > > > + > > > > void unregister_virtio_device(struct virtio_device *dev) > > > > { > > > > int index = dev->index; /* save for after device release */ > > > > diff --git a/drivers/virtio/virtio_dma_buf.c > > > > b/drivers/virtio/virtio_dma_buf.c > > > > new file mode 100644 > > > > index ..23e3399b11ed > > > > --- /dev/null > > > > +++ b/drivers/virtio/virtio_dma_buf.c > > > > @@ -0,0 +1,89 @@ > > > > +// SPDX-License-Identifier: GPL-2.0-or-later > > > > +/* > > > > + * dma-bufs for virtio exported objects > > > > + * > > > > + * Copyright (C) 2020 Google, Inc. > > > > + */ > > > > + > > > > +#include > > > > + > > > > +/** > > > > + * virtio_dma_buf_export - Creates a new dma-buf for a virtio exported > > > > object > > > > + * > > > > + * This wraps dma_buf_export() to allow virtio drivers to create a > > > > dma-buf > > > > + * for an virtio exported object that can be queried by other virtio > > > > drivers > > > > + * for the object's UUID. > > > > + */ > > > > +struct dma_buf *virtio_dma_buf_export( > > > > + const struct virtio_dma_buf_export_info *virtio_exp_info) > > > > +{ > > > > + struct dma_buf_export_info exp_info; > > > > + > > > > + if (!virtio_exp_info->ops > > > > + || virtio_exp_info->ops->ops.attach != > > > > _dma_buf_attach > > > > +
Re: [PATCH v4 1/3] virtio: add dma-buf support for exported objects
On Fri, Jun 5, 2020 at 4:05 AM Michael S. Tsirkin wrote: > > On Tue, May 26, 2020 at 07:58:09PM +0900, David Stevens wrote: > > This change adds a new flavor of dma-bufs that can be used by virtio > > drivers to share exported objects. A virtio dma-buf can be queried by > > virtio drivers to obtain the UUID which identifies the underlying > > exported object. > > > > Signed-off-by: David Stevens > > Is this just for graphics? If yes I'd rather we put it in the graphics > driver. We can always move it later ... As stated in the cover letter, this will be used by virtio-video. The proposed virtio-video patches: https://markmail.org/thread/p5d3k566srtdtute The patch which imports these dma-bufs (slightly out of data, uses v3 of this patch set): https://markmail.org/thread/j4xlqaaim266qpks > > --- > > drivers/virtio/Makefile | 2 +- > > drivers/virtio/virtio.c | 6 +++ > > drivers/virtio/virtio_dma_buf.c | 89 + > > include/linux/virtio.h | 1 + > > include/linux/virtio_dma_buf.h | 58 + > > 5 files changed, 155 insertions(+), 1 deletion(-) > > create mode 100644 drivers/virtio/virtio_dma_buf.c > > create mode 100644 include/linux/virtio_dma_buf.h > > > > diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile > > index 29a1386ecc03..ecdae5b596de 100644 > > --- a/drivers/virtio/Makefile > > +++ b/drivers/virtio/Makefile > > @@ -1,5 +1,5 @@ > > # SPDX-License-Identifier: GPL-2.0 > > -obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o > > +obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o virtio_dma_buf.o > > obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o > > obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o > > virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c > > index a977e32a88f2..5d46f0ded92d 100644 > > --- a/drivers/virtio/virtio.c > > +++ b/drivers/virtio/virtio.c > > @@ -357,6 +357,12 @@ int register_virtio_device(struct virtio_device *dev) > > } > > EXPORT_SYMBOL_GPL(register_virtio_device); > > > > +bool is_virtio_device(struct device *dev) > > +{ > > + return dev->bus == _bus; > > +} > > +EXPORT_SYMBOL_GPL(is_virtio_device); > > + > > void unregister_virtio_device(struct virtio_device *dev) > > { > > int index = dev->index; /* save for after device release */ > > diff --git a/drivers/virtio/virtio_dma_buf.c > > b/drivers/virtio/virtio_dma_buf.c > > new file mode 100644 > > index ..23e3399b11ed > > --- /dev/null > > +++ b/drivers/virtio/virtio_dma_buf.c > > @@ -0,0 +1,89 @@ > > +// SPDX-License-Identifier: GPL-2.0-or-later > > +/* > > + * dma-bufs for virtio exported objects > > + * > > + * Copyright (C) 2020 Google, Inc. > > + */ > > + > > +#include > > + > > +/** > > + * virtio_dma_buf_export - Creates a new dma-buf for a virtio exported > > object > > + * > > + * This wraps dma_buf_export() to allow virtio drivers to create a dma-buf > > + * for an virtio exported object that can be queried by other virtio > > drivers > > + * for the object's UUID. > > + */ > > +struct dma_buf *virtio_dma_buf_export( > > + const struct virtio_dma_buf_export_info *virtio_exp_info) > > +{ > > + struct dma_buf_export_info exp_info; > > + > > + if (!virtio_exp_info->ops > > + || virtio_exp_info->ops->ops.attach != _dma_buf_attach > > + || !virtio_exp_info->ops->get_uuid) { > > + return ERR_PTR(-EINVAL); > > + } > > + > > + exp_info.exp_name = virtio_exp_info->exp_name; > > + exp_info.owner = virtio_exp_info->owner; > > + exp_info.ops = _exp_info->ops->ops; > > + exp_info.size = virtio_exp_info->size; > > + exp_info.flags = virtio_exp_info->flags; > > + exp_info.resv = virtio_exp_info->resv; > > + exp_info.priv = virtio_exp_info->priv; > > + BUILD_BUG_ON(sizeof(struct virtio_dma_buf_export_info) > > + != sizeof(struct dma_buf_export_info)); > > This is the only part that gives me pause. Why do we need this hack? > What's wrong with just using dma_buf_export_info directly, > and if you want the virtio ops, just using container_off? This approach provides a more explicit type signature and a little more type safety, I think. If others don't think it's a worthwhile tradeoff, I can remove it. -David
[PATCH v4 2/3] virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature
This feature allows the guest to request a UUID from the host for a particular virtio_gpu resource. The UUID can then be shared with other virtio devices, to allow the other host devices to access the virtio_gpu's corresponding host resource. Signed-off-by: David Stevens --- include/uapi/linux/virtio_gpu.h | 19 +++ 1 file changed, 19 insertions(+) diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index 0c85914d9369..9721d58b4d58 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -50,6 +50,10 @@ * VIRTIO_GPU_CMD_GET_EDID */ #define VIRTIO_GPU_F_EDID1 +/* + * VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID + */ +#define VIRTIO_GPU_F_RESOURCE_UUID 2 enum virtio_gpu_ctrl_type { VIRTIO_GPU_UNDEFINED = 0, @@ -66,6 +70,7 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_CMD_GET_CAPSET_INFO, VIRTIO_GPU_CMD_GET_CAPSET, VIRTIO_GPU_CMD_GET_EDID, + VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID, /* 3d commands */ VIRTIO_GPU_CMD_CTX_CREATE = 0x0200, @@ -87,6 +92,7 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_RESP_OK_CAPSET_INFO, VIRTIO_GPU_RESP_OK_CAPSET, VIRTIO_GPU_RESP_OK_EDID, + VIRTIO_GPU_RESP_OK_RESOURCE_UUID, /* error responses */ VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200, @@ -340,4 +346,17 @@ enum virtio_gpu_formats { VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM = 134, }; +/* VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID */ +struct virtio_gpu_resource_assign_uuid { + struct virtio_gpu_ctrl_hdr hdr; + __le32 resource_id; + __le32 padding; +}; + +/* VIRTIO_GPU_RESP_OK_RESOURCE_UUID */ +struct virtio_gpu_resp_resource_uuid { + struct virtio_gpu_ctrl_hdr hdr; + __u8 uuid[16]; +}; + #endif -- 2.27.0.rc0.183.gde8f92d652-goog
[PATCH v4 3/3] drm/virtio: Support virtgpu exported resources
Add support for UUID-based resource sharing mechanism to virtgpu. This implements the new virtgpu commands and hooks them up to dma-buf's get_uuid callback. Signed-off-by: David Stevens --- drivers/gpu/drm/virtio/virtgpu_drv.c | 3 + drivers/gpu/drm/virtio/virtgpu_drv.h | 20 ++ drivers/gpu/drm/virtio/virtgpu_kms.c | 4 ++ drivers/gpu/drm/virtio/virtgpu_prime.c | 96 +- drivers/gpu/drm/virtio/virtgpu_vq.c| 55 +++ 5 files changed, 175 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index ab4bed78e656..b039f493bda9 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -165,6 +165,7 @@ static unsigned int features[] = { VIRTIO_GPU_F_VIRGL, #endif VIRTIO_GPU_F_EDID, + VIRTIO_GPU_F_RESOURCE_UUID, }; static struct virtio_driver virtio_gpu_driver = { .feature_table = features, @@ -202,6 +203,8 @@ static struct drm_driver driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_mmap = drm_gem_prime_mmap, + .gem_prime_export = virtgpu_gem_prime_export, + .gem_prime_import = virtgpu_gem_prime_import, .gem_prime_import_sg_table = virtgpu_gem_prime_import_sg_table, .gem_create_object = virtio_gpu_create_object, diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 49bebdee6d91..39dc907aa805 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -49,6 +49,10 @@ #define DRIVER_MINOR 1 #define DRIVER_PATCHLEVEL 0 +#define UUID_INITIALIZING 0 +#define UUID_INITIALIZED 1 +#define UUID_INITIALIZATION_FAILED 2 + struct virtio_gpu_object_params { uint32_t format; uint32_t width; @@ -71,6 +75,9 @@ struct virtio_gpu_object { uint32_t hw_res_handle; bool dumb; bool created; + + int uuid_state; + uuid_t uuid; }; #define gem_to_virtio_gpu_obj(gobj) \ container_of((gobj), struct virtio_gpu_object, base.base) @@ -200,6 +207,7 @@ struct virtio_gpu_device { bool has_virgl_3d; bool has_edid; bool has_indirect; + bool has_resource_assign_uuid; struct work_struct config_changed_work; @@ -210,6 +218,8 @@ struct virtio_gpu_device { struct virtio_gpu_drv_capset *capsets; uint32_t num_capsets; struct list_head cap_cache; + + spinlock_t resource_export_lock; }; struct virtio_gpu_fpriv { @@ -335,6 +345,10 @@ void virtio_gpu_dequeue_fence_func(struct work_struct *work); void virtio_gpu_notify(struct virtio_gpu_device *vgdev); +int +virtio_gpu_cmd_resource_assign_uuid(struct virtio_gpu_device *vgdev, + struct virtio_gpu_object_array *objs); + /* virtgpu_display.c */ void virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev); void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev); @@ -366,6 +380,12 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev, bool virtio_gpu_is_shmem(struct virtio_gpu_object *bo); /* virtgpu_prime.c */ +struct dma_buf *virtgpu_gem_prime_export(struct drm_gem_object *obj, +int flags); +struct drm_gem_object *virtgpu_gem_prime_import(struct drm_device *dev, + struct dma_buf *buf); +int virtgpu_gem_prime_get_uuid(struct drm_gem_object *obj, + uuid_t *uuid); struct drm_gem_object *virtgpu_gem_prime_import_sg_table( struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sgt); diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index 023a030ca7b9..7bcd0c75effa 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -125,6 +125,7 @@ int virtio_gpu_init(struct drm_device *dev) vgdev->dev = dev->dev; spin_lock_init(>display_info_lock); + spin_lock_init(>resource_export_lock); ida_init(>ctx_id_ida); ida_init(>resource_ida); init_waitqueue_head(>resp_wq); @@ -153,6 +154,9 @@ int virtio_gpu_init(struct drm_device *dev) if (virtio_has_feature(vgdev->vdev, VIRTIO_RING_F_INDIRECT_DESC)) { vgdev->has_indirect = true; } + if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_RESOURCE_UUID)) { + vgdev->has_resource_assign_uuid = true; + } DRM_INFO("features: %cvirgl %cedid\n", vgdev->has_virgl_3d ? '+' : '-', diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c b/drivers/gpu/drm/virtio/virtgpu_prime.c index 050d24c39a8f..a753bb70fcf1 100644 --- a/drivers/gpu/drm/virtio/virtgpu_prime.c +++ b/drivers/gpu/drm/virtio/virtgpu_prime.c @@
[PATCH v4 0/3] Support virtio cross-device resources
This patchset implements the current proposal for virtio cross-device resource sharing [1]. It will be used to import virtio resources into the virtio-video driver currently under discussion [2]. The patch under consideration to add support in the virtio-video driver is [3]. It uses the APIs from v3 of this series, but the changes to update it are relatively minor. This patchset adds a new flavor of dma-bufs that supports querying the underlying virtio object UUID, as well as adding support for exporting resources from virtgpu. [1] https://markmail.org/thread/2ypjt5cfeu3m6lxu [2] https://markmail.org/thread/p5d3k566srtdtute [3] https://markmail.org/thread/j4xlqaaim266qpks v3 -> v4 changes: - Replace dma-buf hooks with virtio dma-buf from v1. - Remove virtio_attach callback, as the work that had been done in that callback is now done on dma-buf export. The documented requirement that get_uuid only be called on attached virtio dma-bufs is also removed. - Rebase and add call to virtio_gpu_notify for ASSIGN_UUID. David Stevens (3): virtio: add dma-buf support for exported objects virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature drm/virtio: Support virtgpu exported resources drivers/gpu/drm/virtio/virtgpu_drv.c | 3 + drivers/gpu/drm/virtio/virtgpu_drv.h | 20 ++ drivers/gpu/drm/virtio/virtgpu_kms.c | 4 ++ drivers/gpu/drm/virtio/virtgpu_prime.c | 98 +- drivers/gpu/drm/virtio/virtgpu_vq.c| 55 +++ drivers/virtio/Makefile| 2 +- drivers/virtio/virtio.c| 6 ++ drivers/virtio/virtio_dma_buf.c| 91 include/linux/virtio.h | 1 + include/linux/virtio_dma_buf.h | 58 +++ include/uapi/linux/virtio_gpu.h| 19 + 11 files changed, 353 insertions(+), 4 deletions(-) create mode 100644 drivers/virtio/virtio_dma_buf.c create mode 100644 include/linux/virtio_dma_buf.h -- 2.27.0.rc0.183.gde8f92d652-goog
[PATCH v4 1/3] virtio: add dma-buf support for exported objects
This change adds a new flavor of dma-bufs that can be used by virtio drivers to share exported objects. A virtio dma-buf can be queried by virtio drivers to obtain the UUID which identifies the underlying exported object. Signed-off-by: David Stevens --- drivers/virtio/Makefile | 2 +- drivers/virtio/virtio.c | 6 +++ drivers/virtio/virtio_dma_buf.c | 89 + include/linux/virtio.h | 1 + include/linux/virtio_dma_buf.h | 58 + 5 files changed, 155 insertions(+), 1 deletion(-) create mode 100644 drivers/virtio/virtio_dma_buf.c create mode 100644 include/linux/virtio_dma_buf.h diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index 29a1386ecc03..ecdae5b596de 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o +obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o virtio_dma_buf.o obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index a977e32a88f2..5d46f0ded92d 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -357,6 +357,12 @@ int register_virtio_device(struct virtio_device *dev) } EXPORT_SYMBOL_GPL(register_virtio_device); +bool is_virtio_device(struct device *dev) +{ + return dev->bus == _bus; +} +EXPORT_SYMBOL_GPL(is_virtio_device); + void unregister_virtio_device(struct virtio_device *dev) { int index = dev->index; /* save for after device release */ diff --git a/drivers/virtio/virtio_dma_buf.c b/drivers/virtio/virtio_dma_buf.c new file mode 100644 index ..23e3399b11ed --- /dev/null +++ b/drivers/virtio/virtio_dma_buf.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * dma-bufs for virtio exported objects + * + * Copyright (C) 2020 Google, Inc. + */ + +#include + +/** + * virtio_dma_buf_export - Creates a new dma-buf for a virtio exported object + * + * This wraps dma_buf_export() to allow virtio drivers to create a dma-buf + * for an virtio exported object that can be queried by other virtio drivers + * for the object's UUID. + */ +struct dma_buf *virtio_dma_buf_export( + const struct virtio_dma_buf_export_info *virtio_exp_info) +{ + struct dma_buf_export_info exp_info; + + if (!virtio_exp_info->ops + || virtio_exp_info->ops->ops.attach != _dma_buf_attach + || !virtio_exp_info->ops->get_uuid) { + return ERR_PTR(-EINVAL); + } + + exp_info.exp_name = virtio_exp_info->exp_name; + exp_info.owner = virtio_exp_info->owner; + exp_info.ops = _exp_info->ops->ops; + exp_info.size = virtio_exp_info->size; + exp_info.flags = virtio_exp_info->flags; + exp_info.resv = virtio_exp_info->resv; + exp_info.priv = virtio_exp_info->priv; + BUILD_BUG_ON(sizeof(struct virtio_dma_buf_export_info) +!= sizeof(struct dma_buf_export_info)); + + return dma_buf_export(_info); +} +EXPORT_SYMBOL(virtio_dma_buf_export); + +/** + * virtio_dma_buf_attach - mandatory attach callback for virtio dma-bufs + */ +int virtio_dma_buf_attach(struct dma_buf *dma_buf, + struct dma_buf_attachment *attach) +{ + int ret; + const struct virtio_dma_buf_ops *ops = container_of( + dma_buf->ops, const struct virtio_dma_buf_ops, ops); + + if (ops->device_attach) { + ret = ops->device_attach(dma_buf, attach); + if (ret) + return ret; + } + return 0; +} +EXPORT_SYMBOL(virtio_dma_buf_attach); + +/** + * is_virtio_dma_buf - returns true if the given dma-buf is a virtio dma-buf + * @dma_buf: buffer to query + */ +bool is_virtio_dma_buf(struct dma_buf *dma_buf) +{ + return dma_buf->ops->attach == _dma_buf_attach; +} +EXPORT_SYMBOL(is_virtio_dma_buf); + +/** + * virtio_dma_buf_get_uuid - gets the uuid of the virtio dma-buf's exported object + * @dma_buf: [in] buffer to query + * @uuid: [out] the uuid + * + * Returns: 0 on success, negative on failure. + */ +int virtio_dma_buf_get_uuid(struct dma_buf *dma_buf, + uuid_t *uuid) +{ + const struct virtio_dma_buf_ops *ops = container_of( + dma_buf->ops, const struct virtio_dma_buf_ops, ops); + + if (!is_virtio_dma_buf(dma_buf)) + return -EINVAL; + + return ops->get_uuid(dma_buf, uuid); +} +EXPORT_SYMBOL(virtio_dma_buf_get_uuid); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 15f906e4a748..9397e25616c4 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -128,6 +128,7 @@ static inline struct virtio_device *dev_to_virtio(struct device
Re: [PATCH v3 4/4] drm/virtio: Support virtgpu exported resources
> > + if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_RESOURCE_UUID)) { > > + vgdev->has_resource_assign_uuid = true; > > + } > > > Just a question: this relies on DMA bufs so I assume it is > not really assumed to work when DMA API is bypassed, right? > Rather than worry what does it mean, how about just > disabling this feature without PLATFORM_DMA for now? By PLATFORM_DMA, do you mean CONFIG_DMA_SHARED_BUFFER? Virtio-gpu depends on DRM, which selects that feature. So I think DMA bufs should always be available when virtio-gpu is present. -David
Re: [PATCH v3 1/4] dma-buf: add support for virtio exported objects
On Thu, May 14, 2020 at 9:30 PM Daniel Vetter wrote: > On Thu, May 14, 2020 at 05:19:40PM +0900, David Stevens wrote: > > Sorry for the duplicate reply, didn't notice this until now. > > > > > Just storing > > > the uuid should be doable (assuming this doesn't change during the > > > lifetime of the buffer), so no need for a callback. > > > > Directly storing the uuid doesn't work that well because of > > synchronization issues. The uuid needs to be shared between multiple > > virtio devices with independent command streams, so to prevent races > > between importing and exporting, the exporting driver can't share the > > uuid with other drivers until it knows that the device has finished > > registering the uuid. That requires a round trip to and then back from > > the device. Using a callback allows the latency from that round trip > > registration to be hidden. > > Uh, that means you actually do something and there's locking involved. > Makes stuff more complicated, invariant attributes are a lot easier > generally. Registering that uuid just always doesn't work, and blocking > when you're exporting? Registering the id at creation and blocking in gem export is feasible, but it doesn't work well for systems with a centralized buffer allocator that doesn't support batch allocations (e.g. gralloc). In such a system, the round trip latency would almost certainly be included in the buffer allocation time. At least on the system I'm working on, I suspect that would add 10s of milliseconds of startup latency to video pipelines (although I haven't benchmarked the difference). Doing the blocking as late as possible means most or all of the latency can be hidden behind other pipeline setup work. In terms of complexity, I think the synchronization would be basically the same in either approach, just in different locations. All it would do is alleviate the need for a callback to fetch the UUID. -David
Re: [PATCH v3 1/4] dma-buf: add support for virtio exported objects
Sorry for the duplicate reply, didn't notice this until now. > Just storing > the uuid should be doable (assuming this doesn't change during the > lifetime of the buffer), so no need for a callback. Directly storing the uuid doesn't work that well because of synchronization issues. The uuid needs to be shared between multiple virtio devices with independent command streams, so to prevent races between importing and exporting, the exporting driver can't share the uuid with other drivers until it knows that the device has finished registering the uuid. That requires a round trip to and then back from the device. Using a callback allows the latency from that round trip registration to be hidden. -David
Re: [PATCH v3 1/4] dma-buf: add support for virtio exported objects
On Thu, May 14, 2020 at 12:45 AM Daniel Vetter wrote: > On Wed, Mar 11, 2020 at 12:20 PM David Stevens wrote: > > > > This change adds a new dma-buf operation that allows dma-bufs to be used > > by virtio drivers to share exported objects. The new operation allows > > the importing driver to query the exporting driver for the UUID which > > identifies the underlying exported object. > > > > Signed-off-by: David Stevens > > Adding Tomasz Figa, I've discussed this with him at elce last year I > think. Just to make sure. > > Bunch of things: > - obviously we need the users of this in a few drivers, can't really > review anything stand-alone Here is a link to the usage of this feature by the currently under development virtio-video driver: https://markmail.org/thread/j4xlqaaim266qpks > - adding very specific ops to the generic interface is rather awkward, > eventually everyone wants that and we end up in a mess. I think the > best solution here would be if we create a struct virtio_dma_buf which > subclasses dma-buf, add a (hopefully safe) runtime upcasting > functions, and then a virtio_dma_buf_get_uuid() function. Just storing > the uuid should be doable (assuming this doesn't change during the > lifetime of the buffer), so no need for a callback. So you would prefer a solution similar to the original version of this patchset? https://markmail.org/message/z7if4u56q5fmaok4
Re: [2.6 patch] unexport icmpmsg_statistics
My bad -- I see what it's doing, and it looks ok after all. I thought I saw an INMSGS (but didn't). These are ICMP errors that went through icmp_rcv() and were counted correctly before getting to the protocol error handlers. These are failures due mostly to not having enough, or the right protocol info in the error packet being handled. I'm not sure I'd count those as ICMP errors, since the ICMP header itself is correct, but ok... SCTP doesn't look so bad, though I think the references are still questionable (but debatable) as ICMP errors. sctp_v4_err is incrementing ICMP_MIB_INERRORS if there isn't enough IP header to find the ports, I see. I'm not sure that counts as an ICMP error, but it's not so terrible. It's doing the same thing if a lookup fails to match "vtag" from the encapsulated error packet. Again, I don't know that those are ICMP errors (which normally are something wrong with the ICMP header). So, I stand corrected, and sorry about the histrionics. Since these are arguably ICMP errors, and since errors is the only thing being MIB-counted in DCCP and SCTP, then it now looks ok to me as-is, and also ok to remove icmpmsg_statistics from exporting. +-DLS - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [2.6 patch] unexport icmpmsg_statistics
I took a look at the DCCP references, and I think they're just incrementing the wrong MIB variable -- e.g., it's incrementing ICMP_MIB_INERRORS when the skb length is less than the header indicates. That's not an ICMP_MIB_INERRORS error, that's an IPSTATS_MIB_INHDRERRORS error. ICMP_MIB_INERRORS is when you receive an ICMP error packet; an IP header error is something else entirely. That's followed by a failed lookup incrementing ICMP_MIB_INERRORS which should be an unknown port error in the transport MIB (assuming it has one-- it's not an ICMP error; could be an IP error, if the address isn't local, rather than unknown port). In SCTP, it appears to have similar problems. SCTP errors are not ICMP errors, though it perhaps should be calling icmp_send() to send one to the offending host for some of the cases. I haven't seen any ICMP-relevant stats correctly referenced in these yet. I don't want to patch them directly, since I can't easily test them; if someone who works with DCCP and SCTP would like to, I'd be happy to review. Any volunteers? +-DLS - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [2.6 patch] unexport icmpmsg_statistics
[EMAIL PROTECTED] wrote on 10/24/2007 12:14:37 PM: > On Wed, Oct 24, 2007 at 12:07:45PM -0700, David Stevens wrote: > > [EMAIL PROTECTED] wrote on 10/24/2007 09:24:10 AM: > > > > > This patch removes the unused EXPORT_SYMBOL(icmpmsg_statistics). > > > > > > Signed-off-by: Adrian Bunk <[EMAIL PROTECTED]> > > > > > > --- > > > 4ce74657ac0b1bdcb4c7bc359d05643f8cc4a08b > > > diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c > > > index 272c69e..233de06 100644 > > > --- a/net/ipv4/icmp.c > > > +++ b/net/ipv4/icmp.c > > > @@ -1104,5 +1104,4 @@ void __init icmp_init(struct net_proto_family > > *ops) > > > EXPORT_SYMBOL(icmp_err_convert); > > > EXPORT_SYMBOL(icmp_send); > > > EXPORT_SYMBOL(icmp_statistics); > > > -EXPORT_SYMBOL(icmpmsg_statistics); > > > EXPORT_SYMBOL(xrlim_allow); > > > > "icmpmsg_statistics" belongs with (and replaces some of the > > old...) > > "icmp_statistics". I'm not sure that any modules use it, but I think you > > should remove both or neither. > > icmp_statistics is used by the dccp_ipv4 and sctp modules. The only items left in icmp_statistics are "InMsgs, InErrs, OutMsgs, OutErrs", so if dccp and sctp are sending or receiving any in or out ICMP messages, they should be using the new macros (which reference icmpmsg_statistics, not icmp_statistics) to count them. I took a quick look at SCTP. I don't know if that's going through icmp_rcv() or not; if so, I think it's double-counting; if not, then it isn't counting the individual types (as it should), and it should have ICMPMSG macros doing that. So, again, icmpmsg_statistics either should stay exported, or neither icmpmsg_statistics nor icmp_statistics should be exported (depending on how SCTP and DCCP code is resolved). It's incorrect in the current code to incrememnt ICMP_MIB_INMSGS without incrementing one of the types too. +-DLS - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [2.6 patch] unexport icmpmsg_statistics
[EMAIL PROTECTED] wrote on 10/24/2007 09:24:10 AM: > This patch removes the unused EXPORT_SYMBOL(icmpmsg_statistics). > > Signed-off-by: Adrian Bunk <[EMAIL PROTECTED]> > > --- > 4ce74657ac0b1bdcb4c7bc359d05643f8cc4a08b > diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c > index 272c69e..233de06 100644 > --- a/net/ipv4/icmp.c > +++ b/net/ipv4/icmp.c > @@ -1104,5 +1104,4 @@ void __init icmp_init(struct net_proto_family *ops) > EXPORT_SYMBOL(icmp_err_convert); > EXPORT_SYMBOL(icmp_send); > EXPORT_SYMBOL(icmp_statistics); > -EXPORT_SYMBOL(icmpmsg_statistics); > EXPORT_SYMBOL(xrlim_allow); "icmpmsg_statistics" belongs with (and replaces some of the old...) "icmp_statistics". I'm not sure that any modules use it, but I think you should remove both or neither. +-DLS - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [2.6 patch] unexport icmpmsg_statistics
[EMAIL PROTECTED] wrote on 10/24/2007 09:24:10 AM: This patch removes the unused EXPORT_SYMBOL(icmpmsg_statistics). Signed-off-by: Adrian Bunk [EMAIL PROTECTED] --- 4ce74657ac0b1bdcb4c7bc359d05643f8cc4a08b diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 272c69e..233de06 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1104,5 +1104,4 @@ void __init icmp_init(struct net_proto_family *ops) EXPORT_SYMBOL(icmp_err_convert); EXPORT_SYMBOL(icmp_send); EXPORT_SYMBOL(icmp_statistics); -EXPORT_SYMBOL(icmpmsg_statistics); EXPORT_SYMBOL(xrlim_allow); icmpmsg_statistics belongs with (and replaces some of the old...) icmp_statistics. I'm not sure that any modules use it, but I think you should remove both or neither. +-DLS - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [2.6 patch] unexport icmpmsg_statistics
[EMAIL PROTECTED] wrote on 10/24/2007 12:14:37 PM: On Wed, Oct 24, 2007 at 12:07:45PM -0700, David Stevens wrote: [EMAIL PROTECTED] wrote on 10/24/2007 09:24:10 AM: This patch removes the unused EXPORT_SYMBOL(icmpmsg_statistics). Signed-off-by: Adrian Bunk [EMAIL PROTECTED] --- 4ce74657ac0b1bdcb4c7bc359d05643f8cc4a08b diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 272c69e..233de06 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1104,5 +1104,4 @@ void __init icmp_init(struct net_proto_family *ops) EXPORT_SYMBOL(icmp_err_convert); EXPORT_SYMBOL(icmp_send); EXPORT_SYMBOL(icmp_statistics); -EXPORT_SYMBOL(icmpmsg_statistics); EXPORT_SYMBOL(xrlim_allow); icmpmsg_statistics belongs with (and replaces some of the old...) icmp_statistics. I'm not sure that any modules use it, but I think you should remove both or neither. icmp_statistics is used by the dccp_ipv4 and sctp modules. The only items left in icmp_statistics are InMsgs, InErrs, OutMsgs, OutErrs, so if dccp and sctp are sending or receiving any in or out ICMP messages, they should be using the new macros (which reference icmpmsg_statistics, not icmp_statistics) to count them. I took a quick look at SCTP. I don't know if that's going through icmp_rcv() or not; if so, I think it's double-counting; if not, then it isn't counting the individual types (as it should), and it should have ICMPMSG macros doing that. So, again, icmpmsg_statistics either should stay exported, or neither icmpmsg_statistics nor icmp_statistics should be exported (depending on how SCTP and DCCP code is resolved). It's incorrect in the current code to incrememnt ICMP_MIB_INMSGS without incrementing one of the types too. +-DLS - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [2.6 patch] unexport icmpmsg_statistics
I took a look at the DCCP references, and I think they're just incrementing the wrong MIB variable -- e.g., it's incrementing ICMP_MIB_INERRORS when the skb length is less than the header indicates. That's not an ICMP_MIB_INERRORS error, that's an IPSTATS_MIB_INHDRERRORS error. ICMP_MIB_INERRORS is when you receive an ICMP error packet; an IP header error is something else entirely. That's followed by a failed lookup incrementing ICMP_MIB_INERRORS which should be an unknown port error in the transport MIB (assuming it has one-- it's not an ICMP error; could be an IP error, if the address isn't local, rather than unknown port). In SCTP, it appears to have similar problems. SCTP errors are not ICMP errors, though it perhaps should be calling icmp_send() to send one to the offending host for some of the cases. I haven't seen any ICMP-relevant stats correctly referenced in these yet. I don't want to patch them directly, since I can't easily test them; if someone who works with DCCP and SCTP would like to, I'd be happy to review. Any volunteers? +-DLS - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [2.6 patch] unexport icmpmsg_statistics
My bad -- I see what it's doing, and it looks ok after all. I thought I saw an INMSGS (but didn't). These are ICMP errors that went through icmp_rcv() and were counted correctly before getting to the protocol error handlers. These are failures due mostly to not having enough, or the right protocol info in the error packet being handled. I'm not sure I'd count those as ICMP errors, since the ICMP header itself is correct, but ok... SCTP doesn't look so bad, though I think the references are still questionable (but debatable) as ICMP errors. sctp_v4_err is incrementing ICMP_MIB_INERRORS if there isn't enough IP header to find the ports, I see. I'm not sure that counts as an ICMP error, but it's not so terrible. It's doing the same thing if a lookup fails to match vtag from the encapsulated error packet. Again, I don't know that those are ICMP errors (which normally are something wrong with the ICMP header). So, I stand corrected, and sorry about the histrionics. Since these are arguably ICMP errors, and since errors is the only thing being MIB-counted in DCCP and SCTP, then it now looks ok to me as-is, and also ok to remove icmpmsg_statistics from exporting. +-DLS - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] Fallback to ipv4 if we try to add join IPv4 multicast group via ipv4-mapped address.
Dmitry, Good catch; a couple comments: > struct ipv6_pinfo *np = inet6_sk(sk); > int err; > + int addr_type = ipv6_addr_type(addr); > + > + if (addr_type == IPV6_ADDR_MAPPED) { > + __be32 v4addr = addr->s6_addr32[3]; > + struct ip_mreqn mreq; > + mreq.imr_multiaddr.s_addr = v4addr; > + mreq.imr_address.s_addr = INADDR_ANY; > + mreq.imr_ifindex = ifindex; > + > + return ip_mc_join_group(sk, ); > + } ipv6_addr_type() returns a bitmask, so you should use: if (addr_type & IPV6_ADDR_MAPPED) { Also, you should have a blank line after the "mreq" declaration. Ditto for both in ipv6_mc_sock_drop(). I don't expect the multicast source filtering interface will behave well for mapped addresses, either. The mapped multicast address won't appear to be a multicast address (and return error there), and all the source filters would have to be v4mapped addresses and modify the v4 source filters for this to do as you expect. So, there's more to it (and it may be a bit messy) to support mapped multicast addresses fully. I'll think about that part some more. +-DLS - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] Fallback to ipv4 if we try to add join IPv4 multicast group via ipv4-mapped address.
Dmitry, Good catch; a couple comments: struct ipv6_pinfo *np = inet6_sk(sk); int err; + int addr_type = ipv6_addr_type(addr); + + if (addr_type == IPV6_ADDR_MAPPED) { + __be32 v4addr = addr-s6_addr32[3]; + struct ip_mreqn mreq; + mreq.imr_multiaddr.s_addr = v4addr; + mreq.imr_address.s_addr = INADDR_ANY; + mreq.imr_ifindex = ifindex; + + return ip_mc_join_group(sk, mreq); + } ipv6_addr_type() returns a bitmask, so you should use: if (addr_type IPV6_ADDR_MAPPED) { Also, you should have a blank line after the mreq declaration. Ditto for both in ipv6_mc_sock_drop(). I don't expect the multicast source filtering interface will behave well for mapped addresses, either. The mapped multicast address won't appear to be a multicast address (and return error there), and all the source filters would have to be v4mapped addresses and modify the v4 source filters for this to do as you expect. So, there's more to it (and it may be a bit messy) to support mapped multicast addresses fully. I'll think about that part some more. +-DLS - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH RESEND] 2.6.22.6 networking [ipv4]: fix wrong destination when reply packetes
I'm not sure why it's using rt_src here, but there are relevant cases that your description doesn't cover. For example, what happens if the source is not set in the original packet? Does NAT affect this? You quote RFC text for ICMP echo and the case where the receiving machine is the final destination, but you're modifying code that is used for all ICMP types and used for ICMP errors generated when acting as an intermediate router. In ordinary cases, and certainly with ICMP echo when the source is set in the original packet and no rewriting is going on (and the address is not spoofed), using the original source as the destination is fine. But have you tested or considered the other cases? +-DLS - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH RESEND] 2.6.22.6 networking [ipv4]: fix wrong destination when reply packetes
I'm not sure why it's using rt_src here, but there are relevant cases that your description doesn't cover. For example, what happens if the source is not set in the original packet? Does NAT affect this? You quote RFC text for ICMP echo and the case where the receiving machine is the final destination, but you're modifying code that is used for all ICMP types and used for ICMP errors generated when acting as an intermediate router. In ordinary cases, and certainly with ICMP echo when the source is set in the original packet and no rewriting is going on (and the address is not spoofed), using the original source as the destination is fine. But have you tested or considered the other cases? +-DLS - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: RFC: issues concerning the next NAPI interface
Stephen Hemminger <[EMAIL PROTECTED]> wrote on 08/24/2007 08:52:03 AM: > > You need hardware support for deferred interrupts. Most devices have it > (e1000, sky2, tg3) > and it interacts well with NAPI. It is not a generic thing you want done by the stack, > you want the hardware to hold off interrupts until X packets or Y usecs have expired. For generic hardware that doesn't support it, couldn't you use an estimater and adjust the timer dynamicly in software based on sampled values? Switch to per-packet interrupts when the receive rate is low... Actually, that's how I thought NAPI worked before I found out otherwise (ie, before I looked :-)). The hardware-accelerated one is essentially siloing as done by ancient serial devices on UNIX systems. If you had a tunable for a target count, and an estimator for the time interval, then switch to per-packet when the estimator exceeds a tunable max threshold (and also, I suppose, if you near overflowing the ring on the min timer granularity), you get almost all of it, right? Problem is if it increases rapidly, you may drop packets before you notice that the ring is full in the current estimated interval. +-DLS - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: RFC: issues concerning the next NAPI interface
Stephen Hemminger [EMAIL PROTECTED] wrote on 08/24/2007 08:52:03 AM: You need hardware support for deferred interrupts. Most devices have it (e1000, sky2, tg3) and it interacts well with NAPI. It is not a generic thing you want done by the stack, you want the hardware to hold off interrupts until X packets or Y usecs have expired. For generic hardware that doesn't support it, couldn't you use an estimater and adjust the timer dynamicly in software based on sampled values? Switch to per-packet interrupts when the receive rate is low... Actually, that's how I thought NAPI worked before I found out otherwise (ie, before I looked :-)). The hardware-accelerated one is essentially siloing as done by ancient serial devices on UNIX systems. If you had a tunable for a target count, and an estimator for the time interval, then switch to per-packet when the estimator exceeds a tunable max threshold (and also, I suppose, if you near overflowing the ring on the min timer granularity), you get almost all of it, right? Problem is if it increases rapidly, you may drop packets before you notice that the ring is full in the current estimated interval. +-DLS - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: kernel module to list sockets with their multicast group joins and respective processes
> sockets that join different groups receive messages from the respective > other group (if they are only bound to the wildcard address). Obviously > this is handled differently in Linux for IPv4, where the socket matching > for incoming message is done solely on the 4-tuple of addresses and ports, > and IPv6, where the explicit socket joins are taken into account [1,2]. These are not different in v4 vs v6. Group membership is per-interface, delivery depends on the binding (for both v4 and v6), and this is the same as BSD behavior since IP multicasting was invented. There are two levels of checks, and I think you're confusing them. inet6_mc_check() is checking that somebody (*anybody*) has joined the group on the receiving interface, before delivering the packet to IP. Delivery to a particular socket depends on source filter settings, if they are in use, and bindings (only). The socket has a list of joined addresses to track source filters and to check for correct join/leave (a socket can't leave a group it hasn't joined), but that list does not determine delivery of a multicast packet to the socket when source filtering is not in use. +-DLS - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: kernel module to list sockets with their multicast group joins and respective processes
sockets that join different groups receive messages from the respective other group (if they are only bound to the wildcard address). Obviously this is handled differently in Linux for IPv4, where the socket matching for incoming message is done solely on the 4-tuple of addresses and ports, and IPv6, where the explicit socket joins are taken into account [1,2]. These are not different in v4 vs v6. Group membership is per-interface, delivery depends on the binding (for both v4 and v6), and this is the same as BSD behavior since IP multicasting was invented. There are two levels of checks, and I think you're confusing them. inet6_mc_check() is checking that somebody (*anybody*) has joined the group on the receiving interface, before delivering the packet to IP. Delivery to a particular socket depends on source filter settings, if they are in use, and bindings (only). The socket has a list of joined addresses to track source filters and to check for correct join/leave (a socket can't leave a group it hasn't joined), but that list does not determine delivery of a multicast packet to the socket when source filtering is not in use. +-DLS - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Scaling Max IP address limitation
> > I keep having hopeful dreams that one day netfilter will grow support > for cross-protocol NAT (IE: NAT a TCPv4 connection over TCPv6 to the > IPv6-only local web server, or vice versa). It would seem that would > require a merged "xtables" program. You don't actually need it (at least for easy cases like that), because IPv6 defines IPv4 mapped IPv6 addresses of the form :::a.b.c.d. These will generate IPv4 packets for a.b.c.d, from a v6 socket. Unless you're using v6only binding (a sysctl option), you can connect to v6-only servers using a v4 network and a v4 address of the server. The peer address on those connections will show up as a v4 mapped address, and all the traffic will be v4, but the socket layer is all v6. +-DLS - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Scaling Max IP address limitation
Unrelated wishful thinking I keep having hopeful dreams that one day netfilter will grow support for cross-protocol NAT (IE: NAT a TCPv4 connection over TCPv6 to the IPv6-only local web server, or vice versa). It would seem that would require a merged xtables program. You don't actually need it (at least for easy cases like that), because IPv6 defines IPv4 mapped IPv6 addresses of the form :::a.b.c.d. These will generate IPv4 packets for a.b.c.d, from a v6 socket. Unless you're using v6only binding (a sysctl option), you can connect to v6-only servers using a v4 network and a v4 address of the server. The peer address on those connections will show up as a v4 mapped address, and all the traffic will be v4, but the socket layer is all v6. +-DLS - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.19.2 regression introduced by "IPV4/IPV6: Fix inet{,6} device initialization order."
I expect this is the failure to join the all-nodes multicast group, in which case the fix has already been posted to netdev. I believe the router advertisements are sent to that, and if the join failed, it wouldn't receive any of them. I think it's better to add the fix than withdraw this patch, since the original bug is a crash. Details: The IPv6 code passes the "dev" entry to the multicast group incrementer and uses it to dereference to get the in6_dev. IPv4, by contrast, passes the in_dev directly to its equivalent functions. IPv6 joins the required "all-nodes" multicast group in the multicast device initialization function, which due to the fix won't have a dev entry at that time. The patch posted by Yoshifuji Hideaki moves the all-nodes join until after the ip6_ptr is added to the dev. +-DLS - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.19.2 regression introduced by IPV4/IPV6: Fix inet{,6} device initialization order.
I expect this is the failure to join the all-nodes multicast group, in which case the fix has already been posted to netdev. I believe the router advertisements are sent to that, and if the join failed, it wouldn't receive any of them. I think it's better to add the fix than withdraw this patch, since the original bug is a crash. Details: The IPv6 code passes the dev entry to the multicast group incrementer and uses it to dereference to get the in6_dev. IPv4, by contrast, passes the in_dev directly to its equivalent functions. IPv6 joins the required all-nodes multicast group in the multicast device initialization function, which due to the fix won't have a dev entry at that time. The patch posted by Yoshifuji Hideaki moves the all-nodes join until after the ip6_ptr is added to the dev. +-DLS - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/