[PATCH v2 1/1] virtio: Add support for the virtio suspend feature

2024-04-22 Thread David Stevens
Add support for the VIRTIO_F_SUSPEND feature. When this feature is
negotiated, power management can use it to suspend virtio devices
instead of resorting to resetting the devices entirely.

Signed-off-by: David Stevens 
---
 drivers/virtio/virtio.c| 60 ++
 drivers/virtio/virtio_pci_common.c | 34 -
 drivers/virtio/virtio_pci_modern.c | 19 ++
 include/linux/virtio.h |  8 
 include/uapi/linux/virtio_config.h | 10 -
 5 files changed, 112 insertions(+), 19 deletions(-)

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index f4080692b351..c7685a0dc995 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -498,6 +499,13 @@ void unregister_virtio_device(struct virtio_device *dev)
 }
 EXPORT_SYMBOL_GPL(unregister_virtio_device);
 
+void virtio_device_mark_removed(struct virtio_device *dev)
+{
+   /* Pairs with READ_ONCE() in virtio_device_set_suspend_bit(). */
+   WRITE_ONCE(dev->removed, true);
+}
+EXPORT_SYMBOL_GPL(virtio_device_mark_removed);
+
 #ifdef CONFIG_PM_SLEEP
 int virtio_device_freeze(struct virtio_device *dev)
 {
@@ -580,6 +588,58 @@ int virtio_device_restore(struct virtio_device *dev)
return ret;
 }
 EXPORT_SYMBOL_GPL(virtio_device_restore);
+
+static int virtio_device_set_suspend_bit(struct virtio_device *dev, bool 
enabled)
+{
+   u8 status, target;
+
+   status = dev->config->get_status(dev);
+   if (enabled)
+   target = status | VIRTIO_CONFIG_S_SUSPEND;
+   else
+   target = status & ~VIRTIO_CONFIG_S_SUSPEND;
+
+   if (target == status)
+   return 0;
+
+   dev->config->set_status(dev, target);
+
+   while ((status = dev->config->get_status(dev)) != target) {
+   if (status & VIRTIO_CONFIG_S_NEEDS_RESET)
+   return -EIO;
+   /* Pairs with WRITE_ONCE() in virtio_device_mark_removed(). */
+   if (READ_ONCE(dev->removed))
+   return -EIO;
+   msleep(10);
+   }
+   return 0;
+}
+
+bool virtio_device_can_suspend(struct virtio_device *dev)
+{
+   return virtio_has_feature(dev, VIRTIO_F_SUSPEND) &&
+  (dev->config->get_status(dev) & VIRTIO_CONFIG_S_FEATURES_OK);
+}
+EXPORT_SYMBOL_GPL(virtio_device_can_suspend);
+
+int virtio_device_suspend(struct virtio_device *dev)
+{
+   return virtio_device_set_suspend_bit(dev, true);
+}
+EXPORT_SYMBOL_GPL(virtio_device_suspend);
+
+bool virtio_device_can_resume(struct virtio_device *dev)
+{
+   return virtio_has_feature(dev, VIRTIO_F_SUSPEND) &&
+  (dev->config->get_status(dev) & VIRTIO_CONFIG_S_SUSPEND);
+}
+EXPORT_SYMBOL_GPL(virtio_device_can_resume);
+
+int virtio_device_resume(struct virtio_device *dev)
+{
+   return virtio_device_set_suspend_bit(dev, false);
+}
+EXPORT_SYMBOL_GPL(virtio_device_resume);
 #endif
 
 static int virtio_init(void)
diff --git a/drivers/virtio/virtio_pci_common.c 
b/drivers/virtio/virtio_pci_common.c
index b655fccaf773..a6ca7718b5dc 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -495,31 +495,26 @@ static int virtio_pci_restore(struct device *dev)
return virtio_device_restore(_dev->vdev);
 }
 
-static bool vp_supports_pm_no_reset(struct device *dev)
+static int virtio_pci_suspend(struct device *dev)
 {
struct pci_dev *pci_dev = to_pci_dev(dev);
-   u16 pmcsr;
-
-   if (!pci_dev->pm_cap)
-   return false;
-
-   pci_read_config_word(pci_dev, pci_dev->pm_cap + PCI_PM_CTRL, );
-   if (PCI_POSSIBLE_ERROR(pmcsr)) {
-   dev_err(dev, "Unable to query pmcsr");
-   return false;
-   }
+   struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
 
-   return pmcsr & PCI_PM_CTRL_NO_SOFT_RESET;
-}
+   if (virtio_device_can_suspend(_dev->vdev))
+   return virtio_device_suspend(_dev->vdev);
 
-static int virtio_pci_suspend(struct device *dev)
-{
-   return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_freeze(dev);
+   return virtio_pci_freeze(dev);
 }
 
 static int virtio_pci_resume(struct device *dev)
 {
-   return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_restore(dev);
+   struct pci_dev *pci_dev = to_pci_dev(dev);
+   struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+
+   if (virtio_device_can_resume(_dev->vdev))
+   return virtio_device_resume(_dev->vdev);
+
+   return virtio_pci_restore(dev);
 }
 
 static const struct dev_pm_ops virtio_pci_pm_ops = {
@@ -623,9 +618,12 @@ static void virtio_pci_remove(struct pci_dev *pci_dev)
 * Device is marked broken on surprise removal so that virtio upper
  

[PATCH v2 0/1] virtio: Add suspend support

2024-04-22 Thread David Stevens
This series implements support for the virtio device suspend feature
that is under discussion. Unfortunately, the virtio mailing list is
currently being migrated, so recent discussion of the proposal is not
archived anywhere. There current version of the proposal is a
combination of [1] and [2].

[1] https://lore.kernel.org/all/20230906081637.32185-3-lingshan@intel.com/
[2] https://lists.oasis-open.org/archives/virtio-comment/202402/msg00088.html

v1 -> v2:
 - Check for device removal while waiting for suspend bit.
 - Don't try to suspend uninitialized deivces.
 - Use msleep instead of mdelay.

David Stevens (1):
  virtio: Add support for the virtio suspend feature

 drivers/virtio/virtio.c| 60 ++
 drivers/virtio/virtio_pci_common.c | 34 -
 drivers/virtio/virtio_pci_modern.c | 19 ++
 include/linux/virtio.h |  8 
 include/uapi/linux/virtio_config.h | 10 -
 5 files changed, 112 insertions(+), 19 deletions(-)


base-commit: e8f897f4afef0031fe618a8e94127a0934896aba
-- 
2.44.0.769.g3c40516874-goog




[PATCH 1/1] virtio: Add support for the virtio suspend feature

2024-04-17 Thread David Stevens
Add support for the VIRTIO_F_SUSPEND feature. When this feature is
negotiated, power management can use it to suspend virtio devices
instead of resorting to resetting the devices entirely.

Signed-off-by: David Stevens 
---
 drivers/virtio/virtio.c| 32 ++
 drivers/virtio/virtio_pci_common.c | 29 +++
 drivers/virtio/virtio_pci_modern.c | 19 ++
 include/linux/virtio.h |  2 ++
 include/uapi/linux/virtio_config.h | 10 +-
 5 files changed, 74 insertions(+), 18 deletions(-)

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index f4080692b351..cd11495a5098 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -580,6 +581,37 @@ int virtio_device_restore(struct virtio_device *dev)
return ret;
 }
 EXPORT_SYMBOL_GPL(virtio_device_restore);
+
+static int virtio_device_set_suspend_bit(struct virtio_device *dev, bool 
enabled)
+{
+   u8 status, target;
+
+   status = dev->config->get_status(dev);
+   if (enabled)
+   target = status | VIRTIO_CONFIG_S_SUSPEND;
+   else
+   target = status & ~VIRTIO_CONFIG_S_SUSPEND;
+   dev->config->set_status(dev, target);
+
+   while ((status = dev->config->get_status(dev)) != target) {
+   if (status & VIRTIO_CONFIG_S_NEEDS_RESET)
+   return -EIO;
+   mdelay(10);
+   }
+   return 0;
+}
+
+int virtio_device_suspend(struct virtio_device *dev)
+{
+   return virtio_device_set_suspend_bit(dev, true);
+}
+EXPORT_SYMBOL_GPL(virtio_device_suspend);
+
+int virtio_device_resume(struct virtio_device *dev)
+{
+   return virtio_device_set_suspend_bit(dev, false);
+}
+EXPORT_SYMBOL_GPL(virtio_device_resume);
 #endif
 
 static int virtio_init(void)
diff --git a/drivers/virtio/virtio_pci_common.c 
b/drivers/virtio/virtio_pci_common.c
index b655fccaf773..4d542de05970 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -495,31 +495,26 @@ static int virtio_pci_restore(struct device *dev)
return virtio_device_restore(_dev->vdev);
 }
 
-static bool vp_supports_pm_no_reset(struct device *dev)
+static int virtio_pci_suspend(struct device *dev)
 {
struct pci_dev *pci_dev = to_pci_dev(dev);
-   u16 pmcsr;
-
-   if (!pci_dev->pm_cap)
-   return false;
-
-   pci_read_config_word(pci_dev, pci_dev->pm_cap + PCI_PM_CTRL, );
-   if (PCI_POSSIBLE_ERROR(pmcsr)) {
-   dev_err(dev, "Unable to query pmcsr");
-   return false;
-   }
+   struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
 
-   return pmcsr & PCI_PM_CTRL_NO_SOFT_RESET;
-}
+   if (virtio_has_feature(_dev->vdev, VIRTIO_F_SUSPEND))
+   return virtio_device_suspend(_dev->vdev);
 
-static int virtio_pci_suspend(struct device *dev)
-{
-   return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_freeze(dev);
+   return virtio_pci_freeze(dev);
 }
 
 static int virtio_pci_resume(struct device *dev)
 {
-   return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_restore(dev);
+   struct pci_dev *pci_dev = to_pci_dev(dev);
+   struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+
+   if (virtio_has_feature(_dev->vdev, VIRTIO_F_SUSPEND))
+   return virtio_device_resume(_dev->vdev);
+
+   return virtio_pci_restore(dev);
 }
 
 static const struct dev_pm_ops virtio_pci_pm_ops = {
diff --git a/drivers/virtio/virtio_pci_modern.c 
b/drivers/virtio/virtio_pci_modern.c
index f62b530aa3b5..ac8734526b8d 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -209,6 +209,22 @@ static void vp_modern_avq_deactivate(struct virtio_device 
*vdev)
__virtqueue_break(admin_vq->info.vq);
 }
 
+static bool vp_supports_pm_no_reset(struct pci_dev *pci_dev)
+{
+   u16 pmcsr;
+
+   if (!pci_dev->pm_cap)
+   return false;
+
+   pci_read_config_word(pci_dev, pci_dev->pm_cap + PCI_PM_CTRL, );
+   if (PCI_POSSIBLE_ERROR(pmcsr)) {
+   dev_err(_dev->dev, "Unable to query pmcsr");
+   return false;
+   }
+
+   return pmcsr & PCI_PM_CTRL_NO_SOFT_RESET;
+}
+
 static void vp_transport_features(struct virtio_device *vdev, u64 features)
 {
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
@@ -223,6 +239,9 @@ static void vp_transport_features(struct virtio_device 
*vdev, u64 features)
 
if (features & BIT_ULL(VIRTIO_F_ADMIN_VQ))
__virtio_set_bit(vdev, VIRTIO_F_ADMIN_VQ);
+
+   if (features & BIT_ULL(VIRTIO_F_SUSPEND) && 
vp_supports_pm_no_reset(pci_dev))
+   __virtio_set_bit(vdev, VIRTIO_F_SUSPEND);
 }
 
 static i

[PATCH 0/1] virtio: Add suspend support

2024-04-17 Thread David Stevens
This series implements support for the virtio device suspend feature
that is under discussion. Unfortunately, the virtio mailing list is
currently being migrated, so recent discussion of the proposal is not
archived anywhere. There current version of the proposal is a
combination of [1] and [2].

[1] https://lore.kernel.org/all/20230906081637.32185-3-lingshan@intel.com/
[2] https://lists.oasis-open.org/archives/virtio-comment/202402/msg00088.html

David Stevens (1):
  virtio: Add support for the virtio suspend feature

 drivers/virtio/virtio.c| 32 ++
 drivers/virtio/virtio_pci_common.c | 29 +++
 drivers/virtio/virtio_pci_modern.c | 19 ++
 include/linux/virtio.h |  2 ++
 include/uapi/linux/virtio_config.h | 10 +-
 5 files changed, 74 insertions(+), 18 deletions(-)


base-commit: e8f897f4afef0031fe618a8e94127a0934896aba
-- 
2.44.0.683.g7961c838ac-goog




[PATCH v2 0/2] Improvements to virtio_balloon pm

2024-03-20 Thread David Stevens
From: David Stevens 

The virtio_balloon driver uses wakeup sources to allow the guest to
enter system power management sleep states (e.g. s2idle) without running
the risk of becoming unresponsive to cooperative memory management
requests from the host. This series fixes an issue where wakeup sources
for inflate/deflate were improperly shared between drivers. It also
closes a race where stats requests that come in immediately before a
sleep state transition could fail to be handled in a timely manner.

v1: https://lore.kernel.org/lkml/20240318091034.535573-1-steve...@google.com/

v1 -> v2:
 - Add comment about virtio-balloon's wakeup source
 - Rename virtio-balloon wakeup event macros

David Stevens (2):
  virtio_balloon: Give the balloon its own wakeup source
  virtio_balloon: Treat stats requests as wakeup events

 drivers/virtio/virtio_balloon.c | 84 +
 1 file changed, 55 insertions(+), 29 deletions(-)


base-commit: e8f897f4afef0031fe618a8e94127a0934896aba
-- 
2.44.0.291.gc1ea87d7ee-goog




[PATCH v2 2/2] virtio_balloon: Treat stats requests as wakeup events

2024-03-20 Thread David Stevens
From: David Stevens 

Treat stats requests as wakeup events to ensure that the driver responds
to device requests in a timely manner.

Signed-off-by: David Stevens 
Acked-by: David Hildenbrand 
---
 drivers/virtio/virtio_balloon.c | 75 -
 1 file changed, 46 insertions(+), 29 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 89bc8da80519..b09e8e3c62e5 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -121,11 +121,14 @@ struct virtio_balloon {
struct page_reporting_dev_info pr_dev_info;
 
/* State for keeping the wakeup_source active while adjusting the 
balloon */
-   spinlock_t adjustment_lock;
-   bool adjustment_signal_pending;
-   bool adjustment_in_progress;
+   spinlock_t wakeup_lock;
+   bool processing_wakeup_event;
+   u32 wakeup_signal_mask;
 };
 
+#define VIRTIO_BALLOON_WAKEUP_SIGNAL_ADJUST (1 << 0)
+#define VIRTIO_BALLOON_WAKEUP_SIGNAL_STATS (1 << 1)
+
 static const struct virtio_device_id id_table[] = {
{ VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID },
{ 0 },
@@ -140,6 +143,36 @@ static u32 page_to_balloon_pfn(struct page *page)
return pfn * VIRTIO_BALLOON_PAGES_PER_PAGE;
 }
 
+static void start_wakeup_event(struct virtio_balloon *vb, u32 mask)
+{
+   unsigned long flags;
+
+   spin_lock_irqsave(>wakeup_lock, flags);
+   vb->wakeup_signal_mask |= mask;
+   if (!vb->processing_wakeup_event) {
+   vb->processing_wakeup_event = true;
+   pm_stay_awake(>vdev->dev);
+   }
+   spin_unlock_irqrestore(>wakeup_lock, flags);
+}
+
+static void process_wakeup_event(struct virtio_balloon *vb, u32 mask)
+{
+   spin_lock_irq(>wakeup_lock);
+   vb->wakeup_signal_mask &= ~mask;
+   spin_unlock_irq(>wakeup_lock);
+}
+
+static void finish_wakeup_event(struct virtio_balloon *vb)
+{
+   spin_lock_irq(>wakeup_lock);
+   if (!vb->wakeup_signal_mask && vb->processing_wakeup_event) {
+   vb->processing_wakeup_event = false;
+   pm_relax(>vdev->dev);
+   }
+   spin_unlock_irq(>wakeup_lock);
+}
+
 static void balloon_ack(struct virtqueue *vq)
 {
struct virtio_balloon *vb = vq->vdev->priv;
@@ -370,8 +403,10 @@ static void stats_request(struct virtqueue *vq)
struct virtio_balloon *vb = vq->vdev->priv;
 
spin_lock(>stop_update_lock);
-   if (!vb->stop_update)
+   if (!vb->stop_update) {
+   start_wakeup_event(vb, VIRTIO_BALLOON_WAKEUP_SIGNAL_STATS);
queue_work(system_freezable_wq, >update_balloon_stats_work);
+   }
spin_unlock(>stop_update_lock);
 }
 
@@ -444,29 +479,10 @@ static void virtio_balloon_queue_free_page_work(struct 
virtio_balloon *vb)
 
 static void start_update_balloon_size(struct virtio_balloon *vb)
 {
-   unsigned long flags;
-
-   spin_lock_irqsave(>adjustment_lock, flags);
-   vb->adjustment_signal_pending = true;
-   if (!vb->adjustment_in_progress) {
-   vb->adjustment_in_progress = true;
-   pm_stay_awake(>vdev->dev);
-   }
-   spin_unlock_irqrestore(>adjustment_lock, flags);
-
+   start_wakeup_event(vb, VIRTIO_BALLOON_WAKEUP_SIGNAL_ADJUST);
queue_work(system_freezable_wq, >update_balloon_size_work);
 }
 
-static void end_update_balloon_size(struct virtio_balloon *vb)
-{
-   spin_lock_irq(>adjustment_lock);
-   if (!vb->adjustment_signal_pending && vb->adjustment_in_progress) {
-   vb->adjustment_in_progress = false;
-   pm_relax(>vdev->dev);
-   }
-   spin_unlock_irq(>adjustment_lock);
-}
-
 static void virtballoon_changed(struct virtio_device *vdev)
 {
struct virtio_balloon *vb = vdev->priv;
@@ -495,7 +511,10 @@ static void update_balloon_stats_func(struct work_struct 
*work)
 
vb = container_of(work, struct virtio_balloon,
  update_balloon_stats_work);
+
+   process_wakeup_event(vb, VIRTIO_BALLOON_WAKEUP_SIGNAL_STATS);
stats_handle_request(vb);
+   finish_wakeup_event(vb);
 }
 
 static void update_balloon_size_func(struct work_struct *work)
@@ -506,9 +525,7 @@ static void update_balloon_size_func(struct work_struct 
*work)
vb = container_of(work, struct virtio_balloon,
  update_balloon_size_work);
 
-   spin_lock_irq(>adjustment_lock);
-   vb->adjustment_signal_pending = false;
-   spin_unlock_irq(>adjustment_lock);
+   process_wakeup_event(vb, VIRTIO_BALLOON_WAKEUP_SIGNAL_ADJUST);
 
diff = towards_target(vb);
 
@@ -523,7 +540,7 @@ static void update_balloon_size_func(struct work_struct 
*work)
if (diff)
queue_work(system_freezable_wq, work);
else
-   

[PATCH v2 1/2] virtio_balloon: Give the balloon its own wakeup source

2024-03-20 Thread David Stevens
From: David Stevens 

Wakeup sources don't support nesting multiple events, so sharing a
single object between multiple drivers can result in one driver
overriding the wakeup event processing period specified by another
driver. Have the virtio balloon driver use the wakeup source of the
device it is bound to rather than the wakeup source of the parent
device, to avoid conflicts with the transport layer.

Note that although the virtio balloon's virtio_device itself isn't what
actually wakes up the device, it is responsible for processing wakeup
events. In the same way that EPOLLWAKEUP uses a dedicated wakeup_source
to prevent suspend when userspace is processing wakeup events, a
dedicated wakeup_source is necessary when processing wakeup events in a
higher layer in the kernel.

Fixes: b12fbc3f787e ("virtio_balloon: stay awake while adjusting balloon")
Signed-off-by: David Stevens 
Acked-by: David Hildenbrand 
---
 drivers/virtio/virtio_balloon.c | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 1f5b3dd31fcf..89bc8da80519 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -450,7 +450,7 @@ static void start_update_balloon_size(struct virtio_balloon 
*vb)
vb->adjustment_signal_pending = true;
if (!vb->adjustment_in_progress) {
vb->adjustment_in_progress = true;
-   pm_stay_awake(vb->vdev->dev.parent);
+   pm_stay_awake(>vdev->dev);
}
spin_unlock_irqrestore(>adjustment_lock, flags);
 
@@ -462,7 +462,7 @@ static void end_update_balloon_size(struct virtio_balloon 
*vb)
spin_lock_irq(>adjustment_lock);
if (!vb->adjustment_signal_pending && vb->adjustment_in_progress) {
vb->adjustment_in_progress = false;
-   pm_relax(vb->vdev->dev.parent);
+   pm_relax(>vdev->dev);
}
spin_unlock_irq(>adjustment_lock);
 }
@@ -1029,6 +1029,15 @@ static int virtballoon_probe(struct virtio_device *vdev)
 
spin_lock_init(>adjustment_lock);
 
+   /*
+* The virtio balloon itself can't wake up the device, but it is
+* responsible for processing wakeup events passed up from the transport
+* layer. Wakeup sources don't support nesting/chaining calls, so we use
+* our own wakeup source to ensure wakeup events are properly handled
+* without trampling on the transport layer's wakeup source.
+*/
+   device_set_wakeup_capable(>vdev->dev, true);
+
virtio_device_ready(vdev);
 
if (towards_target(vb))
-- 
2.44.0.291.gc1ea87d7ee-goog




[PATCH 2/2] virtio_balloon: Treat stats requests as wakeup events

2024-03-18 Thread David Stevens
From: David Stevens 

Treat stats requests as wakeup events to ensure that the driver responds
to device requests in a timely manner.

Signed-off-by: David Stevens 
---
 drivers/virtio/virtio_balloon.c | 75 -
 1 file changed, 46 insertions(+), 29 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 7fe7ef5f1c77..402dec98e08c 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -121,11 +121,14 @@ struct virtio_balloon {
struct page_reporting_dev_info pr_dev_info;
 
/* State for keeping the wakeup_source active while adjusting the 
balloon */
-   spinlock_t adjustment_lock;
-   bool adjustment_signal_pending;
-   bool adjustment_in_progress;
+   spinlock_t wakeup_lock;
+   bool processing_wakeup_event;
+   u32 wakeup_signal_mask;
 };
 
+#define ADJUSTMENT_WAKEUP_SIGNAL (1 << 0)
+#define STATS_WAKEUP_SIGNAL (1 << 1)
+
 static const struct virtio_device_id id_table[] = {
{ VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID },
{ 0 },
@@ -140,6 +143,36 @@ static u32 page_to_balloon_pfn(struct page *page)
return pfn * VIRTIO_BALLOON_PAGES_PER_PAGE;
 }
 
+static void start_wakeup_event(struct virtio_balloon *vb, u32 mask)
+{
+   unsigned long flags;
+
+   spin_lock_irqsave(>wakeup_lock, flags);
+   vb->wakeup_signal_mask |= mask;
+   if (!vb->processing_wakeup_event) {
+   vb->processing_wakeup_event = true;
+   pm_stay_awake(>vdev->dev);
+   }
+   spin_unlock_irqrestore(>wakeup_lock, flags);
+}
+
+static void process_wakeup_event(struct virtio_balloon *vb, u32 mask)
+{
+   spin_lock_irq(>wakeup_lock);
+   vb->wakeup_signal_mask &= ~mask;
+   spin_unlock_irq(>wakeup_lock);
+}
+
+static void finish_wakeup_event(struct virtio_balloon *vb)
+{
+   spin_lock_irq(>wakeup_lock);
+   if (!vb->wakeup_signal_mask && vb->processing_wakeup_event) {
+   vb->processing_wakeup_event = false;
+   pm_relax(>vdev->dev);
+   }
+   spin_unlock_irq(>wakeup_lock);
+}
+
 static void balloon_ack(struct virtqueue *vq)
 {
struct virtio_balloon *vb = vq->vdev->priv;
@@ -370,8 +403,10 @@ static void stats_request(struct virtqueue *vq)
struct virtio_balloon *vb = vq->vdev->priv;
 
spin_lock(>stop_update_lock);
-   if (!vb->stop_update)
+   if (!vb->stop_update) {
+   start_wakeup_event(vb, STATS_WAKEUP_SIGNAL);
queue_work(system_freezable_wq, >update_balloon_stats_work);
+   }
spin_unlock(>stop_update_lock);
 }
 
@@ -444,29 +479,10 @@ static void virtio_balloon_queue_free_page_work(struct 
virtio_balloon *vb)
 
 static void start_update_balloon_size(struct virtio_balloon *vb)
 {
-   unsigned long flags;
-
-   spin_lock_irqsave(>adjustment_lock, flags);
-   vb->adjustment_signal_pending = true;
-   if (!vb->adjustment_in_progress) {
-   vb->adjustment_in_progress = true;
-   pm_stay_awake(>vdev->dev);
-   }
-   spin_unlock_irqrestore(>adjustment_lock, flags);
-
+   start_wakeup_event(vb, ADJUSTMENT_WAKEUP_SIGNAL);
queue_work(system_freezable_wq, >update_balloon_size_work);
 }
 
-static void end_update_balloon_size(struct virtio_balloon *vb)
-{
-   spin_lock_irq(>adjustment_lock);
-   if (!vb->adjustment_signal_pending && vb->adjustment_in_progress) {
-   vb->adjustment_in_progress = false;
-   pm_relax(>vdev->dev);
-   }
-   spin_unlock_irq(>adjustment_lock);
-}
-
 static void virtballoon_changed(struct virtio_device *vdev)
 {
struct virtio_balloon *vb = vdev->priv;
@@ -495,7 +511,10 @@ static void update_balloon_stats_func(struct work_struct 
*work)
 
vb = container_of(work, struct virtio_balloon,
  update_balloon_stats_work);
+
+   process_wakeup_event(vb, STATS_WAKEUP_SIGNAL);
stats_handle_request(vb);
+   finish_wakeup_event(vb);
 }
 
 static void update_balloon_size_func(struct work_struct *work)
@@ -506,9 +525,7 @@ static void update_balloon_size_func(struct work_struct 
*work)
vb = container_of(work, struct virtio_balloon,
  update_balloon_size_work);
 
-   spin_lock_irq(>adjustment_lock);
-   vb->adjustment_signal_pending = false;
-   spin_unlock_irq(>adjustment_lock);
+   process_wakeup_event(vb, ADJUSTMENT_WAKEUP_SIGNAL);
 
diff = towards_target(vb);
 
@@ -523,7 +540,7 @@ static void update_balloon_size_func(struct work_struct 
*work)
if (diff)
queue_work(system_freezable_wq, work);
else
-   end_update_balloon_size(vb);
+   finish_wakeup_event(vb);
 }
 
 static

[PATCH 1/2] virtio_balloon: Give the balloon its own wakeup source

2024-03-18 Thread David Stevens
From: David Stevens 

Wakeup sources don't support nesting multiple events, so sharing a
single object between multiple drivers can result in one driver
overriding the wakeup event processing period specified by another
driver. Have the virtio balloon driver use the wakeup source of the
device it is bound to rather than the wakeup source of the parent
device, to avoid conflicts with the transport layer.

Note that although the virtio balloon's virtio_device itself isn't what
actually wakes up the device, it is responsible for processing wakeup
events. In the same way that EPOLLWAKEUP uses a dedicated wakeup_source
to prevent suspend when userspace is processing wakeup events, a
dedicated wakeup_source is necessary when processing wakeup events in a
higher layer in the kernel.

Fixes: b12fbc3f787e ("virtio_balloon: stay awake while adjusting balloon")
Signed-off-by: David Stevens 
---
 drivers/virtio/virtio_balloon.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 1f5b3dd31fcf..7fe7ef5f1c77 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -450,7 +450,7 @@ static void start_update_balloon_size(struct virtio_balloon 
*vb)
vb->adjustment_signal_pending = true;
if (!vb->adjustment_in_progress) {
vb->adjustment_in_progress = true;
-   pm_stay_awake(vb->vdev->dev.parent);
+   pm_stay_awake(>vdev->dev);
}
spin_unlock_irqrestore(>adjustment_lock, flags);
 
@@ -462,7 +462,7 @@ static void end_update_balloon_size(struct virtio_balloon 
*vb)
spin_lock_irq(>adjustment_lock);
if (!vb->adjustment_signal_pending && vb->adjustment_in_progress) {
vb->adjustment_in_progress = false;
-   pm_relax(vb->vdev->dev.parent);
+   pm_relax(>vdev->dev);
}
spin_unlock_irq(>adjustment_lock);
 }
@@ -1028,6 +1028,7 @@ static int virtballoon_probe(struct virtio_device *vdev)
}
 
spin_lock_init(>adjustment_lock);
+   device_set_wakeup_capable(>vdev->dev, true);
 
virtio_device_ready(vdev);
 
-- 
2.44.0.291.gc1ea87d7ee-goog




[PATCH 0/2] Improvements to virtio_balloon pm

2024-03-18 Thread David Stevens
From: David Stevens 

The virtio_balloon driver uses wakeup sources to allow the guest to
enter system power management sleep states (e.g. s2idle) without running
the risk of becoming unresponsive to cooperative memory management
requests from the host. This series fixes an issue where wakeup sources
for inflate/deflate were improperly shared between drivers. It also
closes a race where stats requests that come in immediately before a
sleep state transition could fail to be handled in a timely manner.

David Stevens (2):
  virtio_balloon: Give the balloon its own wakeup source
  virtio_balloon: Treat stats requests as wakeup events

 drivers/virtio/virtio_balloon.c | 76 -
 1 file changed, 47 insertions(+), 29 deletions(-)


base-commit: e8f897f4afef0031fe618a8e94127a0934896aba
-- 
2.44.0.291.gc1ea87d7ee-goog




[PATCH v3] virtio_balloon: stay awake while adjusting balloon

2024-01-09 Thread David Stevens
From: David Stevens 

A virtio_balloon's parent device may be configured so that a
configuration change interrupt is a wakeup event. Extend the processing
of such a wakeup event until the balloon finishes inflating or deflating
by calling pm_stay_awake/pm_relax in the virtio_balloon driver. Note
that these calls are no-ops if the parent device doesn't support wakeup
events or if the wakeup events are not enabled.

This change allows the guest to use system power states such as s2idle
without running the risk the virtio_balloon's cooperative memory
management becoming unresponsive to the host's requests.

Signed-off-by: David Stevens 
---
v2->v3:
 - Use _irq spinlock functions with adjustment_lock in workqueue, since
   the lock is accessed in an interrupt context.
v1 -> v2:
 - Use adjustment_signal_pending flag instead of a sequence number
 - Call pm_stay_awake/pm_relax on parent device instead of adding a wake
   event to the virtio balloon device

 drivers/virtio/virtio_balloon.c | 57 +++--
 1 file changed, 47 insertions(+), 10 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 1fe93e93f5bc..fa710e6c505a 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -119,6 +119,11 @@ struct virtio_balloon {
/* Free page reporting device */
struct virtqueue *reporting_vq;
struct page_reporting_dev_info pr_dev_info;
+
+   /* State for keeping the wakeup_source active while adjusting the 
balloon */
+   spinlock_t adjustment_lock;
+   bool adjustment_signal_pending;
+   bool adjustment_in_progress;
 };
 
 static const struct virtio_device_id id_table[] = {
@@ -437,6 +442,31 @@ static void virtio_balloon_queue_free_page_work(struct 
virtio_balloon *vb)
queue_work(vb->balloon_wq, >report_free_page_work);
 }
 
+static void start_update_balloon_size(struct virtio_balloon *vb)
+{
+   unsigned long flags;
+
+   spin_lock_irqsave(>adjustment_lock, flags);
+   vb->adjustment_signal_pending = true;
+   if (!vb->adjustment_in_progress) {
+   vb->adjustment_in_progress = true;
+   pm_stay_awake(vb->vdev->dev.parent);
+   }
+   spin_unlock_irqrestore(>adjustment_lock, flags);
+
+   queue_work(system_freezable_wq, >update_balloon_size_work);
+}
+
+static void end_update_balloon_size(struct virtio_balloon *vb)
+{
+   spin_lock_irq(>adjustment_lock);
+   if (!vb->adjustment_signal_pending && vb->adjustment_in_progress) {
+   vb->adjustment_in_progress = false;
+   pm_relax(vb->vdev->dev.parent);
+   }
+   spin_unlock_irq(>adjustment_lock);
+}
+
 static void virtballoon_changed(struct virtio_device *vdev)
 {
struct virtio_balloon *vb = vdev->priv;
@@ -444,8 +474,7 @@ static void virtballoon_changed(struct virtio_device *vdev)
 
spin_lock_irqsave(>stop_update_lock, flags);
if (!vb->stop_update) {
-   queue_work(system_freezable_wq,
-  >update_balloon_size_work);
+   start_update_balloon_size(vb);
virtio_balloon_queue_free_page_work(vb);
}
spin_unlock_irqrestore(>stop_update_lock, flags);
@@ -476,19 +505,25 @@ static void update_balloon_size_func(struct work_struct 
*work)
 
vb = container_of(work, struct virtio_balloon,
  update_balloon_size_work);
-   diff = towards_target(vb);
 
-   if (!diff)
-   return;
+   spin_lock_irq(>adjustment_lock);
+   vb->adjustment_signal_pending = false;
+   spin_unlock_irq(>adjustment_lock);
 
-   if (diff > 0)
-   diff -= fill_balloon(vb, diff);
-   else
-   diff += leak_balloon(vb, -diff);
-   update_balloon_size(vb);
+   diff = towards_target(vb);
+
+   if (diff) {
+   if (diff > 0)
+   diff -= fill_balloon(vb, diff);
+   else
+   diff += leak_balloon(vb, -diff);
+   update_balloon_size(vb);
+   }
 
if (diff)
queue_work(system_freezable_wq, work);
+   else
+   end_update_balloon_size(vb);
 }
 
 static int init_vqs(struct virtio_balloon *vb)
@@ -992,6 +1027,8 @@ static int virtballoon_probe(struct virtio_device *vdev)
goto out_unregister_oom;
}
 
+   spin_lock_init(>adjustment_lock);
+
virtio_device_ready(vdev);
 
if (towards_target(vb))
-- 
2.43.0.472.g3155946c3a-goog




Re: REGRESSION: lockdep warning triggered by 15b9ce7ecd: virtio_balloon: stay awake while adjusting balloon

2024-01-08 Thread David Stevens
3605]

Oh, that's embarrassing, I completely whiffed on interactions with
interrupts. The following patch fixes it, and I've locally repro'ed
the issue and verified the fix. What's the process for getting this
fix merged? Does it get merged as a seperatch patch, or squashed into
the original commit?

>From a99a1efa6a2b470a98ea2c87e58bebe90ce329a1 Mon Sep 17 00:00:00 2001
From: David Stevens 
Date: Tue, 9 Jan 2024 14:41:21 +0900
Subject: [PATCH] virtio_balloon: Fix interrupt context deadlock

Use _irq spinlock functions with the adjustment_lock, since
start_update_balloon_size needs to acquire it in an interrupt context.

Fixes: 5b9ce7ecd715 ("virtio_balloon: stay awake while adjusting balloon")
Reported-by: Theodore Ts'o 
Signed-off-by: David Stevens 
---
 drivers/virtio/virtio_balloon.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index aa6a1a649ad6..1f5b3dd31fcf 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -459,12 +459,12 @@ static void start_update_balloon_size(struct
virtio_balloon *vb)

 static void end_update_balloon_size(struct virtio_balloon *vb)
 {
-   spin_lock(>adjustment_lock);
+   spin_lock_irq(>adjustment_lock);
if (!vb->adjustment_signal_pending && vb->adjustment_in_progress) {
vb->adjustment_in_progress = false;
pm_relax(vb->vdev->dev.parent);
}
-   spin_unlock(>adjustment_lock);
+   spin_unlock_irq(>adjustment_lock);
 }

 static void virtballoon_changed(struct virtio_device *vdev)
@@ -506,9 +506,9 @@ static void update_balloon_size_func(struct
work_struct *work)
vb = container_of(work, struct virtio_balloon,
  update_balloon_size_work);

-   spin_lock(>adjustment_lock);
+   spin_lock_irq(>adjustment_lock);
vb->adjustment_signal_pending = false;
-   spin_unlock(>adjustment_lock);
+   spin_unlock_irq(>adjustment_lock);

diff = towards_target(vb);

-- 
2.43.0.472.g3155946c3a-goog



Re: [PATCH v2] virtio_balloon: stay awake while adjusting balloon

2023-12-19 Thread David Stevens
On Mon, Dec 18, 2023 at 12:33 PM David Hildenbrand  wrote:
>
> On 18.12.23 16:18, David Stevens wrote:
> > From: David Stevens 
> >
> > A virtio_balloon's parent device may be configured so that a
> > configuration change interrupt is a wakeup event. Extend the processing
> > of such a wakeup event until the balloon finishes inflating or deflating
> > by calling pm_stay_awake/pm_relax in the virtio_balloon driver. Note
> > that these calls are no-ops if the parent device doesn't support wakeup
> > events or if the wakeup events are not enabled.
> >
> > This change allows the guest to use system power states such as s2idle
> > without running the risk the virtio_balloon's cooperative memory
> > management becoming unresponsive to the host's requests.
> >
> > Signed-off-by: David Stevens 
> > ---
> > v1 -> v2:
> >   - Use adjustment_signal_pending flag instead of a sequence number
> >   - Call pm_stay_awake/pm_relax on parent device instead of adding a wake
> > event to the virtio balloon device
> >
> >   drivers/virtio/virtio_balloon.c | 57 +++--
> >   1 file changed, 47 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_balloon.c 
> > b/drivers/virtio/virtio_balloon.c
> > index 1fe93e93f5bc..a3c11159cbe0 100644
> > --- a/drivers/virtio/virtio_balloon.c
> > +++ b/drivers/virtio/virtio_balloon.c
> > @@ -119,6 +119,11 @@ struct virtio_balloon {
> >   /* Free page reporting device */
> >   struct virtqueue *reporting_vq;
> >   struct page_reporting_dev_info pr_dev_info;
> > +
> > + /* State for keeping the wakeup_source active while adjusting the 
> > balloon */
> > + spinlock_t adjustment_lock;
> > + bool adjustment_signal_pending;
> > + bool adjustment_in_progress;
> >   };
> >
> >   static const struct virtio_device_id id_table[] = {
> > @@ -437,6 +442,31 @@ static void virtio_balloon_queue_free_page_work(struct 
> > virtio_balloon *vb)
> >   queue_work(vb->balloon_wq, >report_free_page_work);
> >   }
> >
> > +static void start_update_balloon_size(struct virtio_balloon *vb)
> > +{
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(>adjustment_lock, flags);
> > + vb->adjustment_signal_pending = true;
> > + if (!vb->adjustment_in_progress) {
> > + vb->adjustment_in_progress = true;
> > + pm_stay_awake(vb->vdev->dev.parent);
> > + }
> > + spin_unlock_irqrestore(>adjustment_lock, flags);
> > +
> > + queue_work(system_freezable_wq, >update_balloon_size_work);
> > +}
> > +
> > +static void end_update_balloon_size(struct virtio_balloon *vb)
> > +{
> > + spin_lock(>adjustment_lock);
> > + if (!vb->adjustment_signal_pending && vb->adjustment_in_progress) {
>
> How could vb->adjustment_in_progress ever not be set at this point?
>
> > + vb->adjustment_in_progress = false;
> > + pm_relax(vb->vdev->dev.parent);
> > + }
> > + spin_unlock(>adjustment_lock);
> > +}
> > +
>
> LGTM, although I wonder what happens when calling pm_stay_awake() etc.
> on a parent device that is not wakeup-even-capable?

If the parent device is not wakeup capable or if wakeup isn't enabled,
then the vb->vdev->dev.parent->power.wakeup pointer will be NULL, so
the NULL checks in __pm_relax/__pm_stay_awake will immediately return.

-David



Re: [PATCH] virtio_balloon: stay awake while adjusting balloon

2023-12-18 Thread David Stevens
On Mon, Dec 18, 2023 at 10:18 AM David Hildenbrand  wrote:
>
> On 18.12.23 16:16, David Stevens wrote:
> > On Mon, Dec 18, 2023 at 6:37 AM David Hildenbrand  wrote:
> >>
> >> On 14.12.23 05:13, David Stevens wrote:
> >>> On Wed, Dec 13, 2023 at 5:44 PM David Hildenbrand  
> >>> wrote:
> >>>>
> >>>> On 11.12.23 12:43, David Stevens wrote:
> >>>>> From: David Stevens 
> >>>>>
> >>>>
> >>>> Hi David,
> >>>>
> >>>>> Add a wakeup event for when the balloon is inflating or deflating.
> >>>>> Userspace can enable this wakeup event to prevent the system from
> >>>>> suspending while the balloon is being adjusted. This allows
> >>>>> /sys/power/wakeup_count to be used without breaking virtio_balloon's
> >>>>> cooperative memory management.
> >>>>
> >>>> Can you add/share some more details
> >>>
> >>> I'm working on enabling support for Linux s2Idle in our Android
> >>> virtual machine, to restrict apps from running in the background
> >>> without holding an Android partial wakelock. With the patch I recently
> >>> sent out [1], since crosvm advertises native PCI power management for
> >>> virtio devices, the Android guest can properly enter s2idle, and it
> >>> can be woken up by incoming IO. However, one of the remaining problems
> >>> is that when the host needs to reclaim memory from the guest via the
> >>> virtio-balloon, there is nothing preventing the guest from entering
> >>> s2idle before the balloon driver finishes returning memory to the
> >>> host.
> >>
> >> Thanks for the information. So you also want to wakeup the VM when
> >> wanting to get more memory from the VM?
> >>
> >> Using which mechanism would that wakeup happen? Likely not the device
> >> itself?
> >
> > The wakeup would happen via the parent device's interrupt. I've sent a
> > new version of this patch that uses the parent device's wakeup event
> > instead of adding a new one.
> >
> >>>
> >>> One alternative to this approach would be to add a virtballoon_suspend
> >>> callback to abort suspend if the balloon is inflating/adjusting.
> >>> However, it seems cleaner to just prevent suspend in the first place.
> >>
> >> Also, the PM notifier could also be used with very high priority, so the
> >> device would respond early to PM_SUSPEND_PREPARE.
> >
> > One drawback of blocking suspend via a PM notifier is that the
> > behavior isn't configurable by userspace, whereas wakeup events can be
> > enabled/disabled by userspace.
>
> The question is if that behavior for the balloon is really worth it
> being configured by user space?

It seems to me that the current behavior of completely resetting the
virtio balloon in virtballoon_freeze is basically antithetical to the
power management integration I'm adding, where power management
doesn't interrupt the virtio balloon's operation at all. So if there
are any systems that are actually happy with the current power
management behavior, they probably don't want suspend to be blocked by
inflation/deflation.

-David



[PATCH v2] virtio_balloon: stay awake while adjusting balloon

2023-12-18 Thread David Stevens
From: David Stevens 

A virtio_balloon's parent device may be configured so that a
configuration change interrupt is a wakeup event. Extend the processing
of such a wakeup event until the balloon finishes inflating or deflating
by calling pm_stay_awake/pm_relax in the virtio_balloon driver. Note
that these calls are no-ops if the parent device doesn't support wakeup
events or if the wakeup events are not enabled.

This change allows the guest to use system power states such as s2idle
without running the risk the virtio_balloon's cooperative memory
management becoming unresponsive to the host's requests.

Signed-off-by: David Stevens 
---
v1 -> v2:
 - Use adjustment_signal_pending flag instead of a sequence number
 - Call pm_stay_awake/pm_relax on parent device instead of adding a wake
   event to the virtio balloon device

 drivers/virtio/virtio_balloon.c | 57 +++--
 1 file changed, 47 insertions(+), 10 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 1fe93e93f5bc..a3c11159cbe0 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -119,6 +119,11 @@ struct virtio_balloon {
/* Free page reporting device */
struct virtqueue *reporting_vq;
struct page_reporting_dev_info pr_dev_info;
+
+   /* State for keeping the wakeup_source active while adjusting the 
balloon */
+   spinlock_t adjustment_lock;
+   bool adjustment_signal_pending;
+   bool adjustment_in_progress;
 };
 
 static const struct virtio_device_id id_table[] = {
@@ -437,6 +442,31 @@ static void virtio_balloon_queue_free_page_work(struct 
virtio_balloon *vb)
queue_work(vb->balloon_wq, >report_free_page_work);
 }
 
+static void start_update_balloon_size(struct virtio_balloon *vb)
+{
+   unsigned long flags;
+
+   spin_lock_irqsave(>adjustment_lock, flags);
+   vb->adjustment_signal_pending = true;
+   if (!vb->adjustment_in_progress) {
+   vb->adjustment_in_progress = true;
+   pm_stay_awake(vb->vdev->dev.parent);
+   }
+   spin_unlock_irqrestore(>adjustment_lock, flags);
+
+   queue_work(system_freezable_wq, >update_balloon_size_work);
+}
+
+static void end_update_balloon_size(struct virtio_balloon *vb)
+{
+   spin_lock(>adjustment_lock);
+   if (!vb->adjustment_signal_pending && vb->adjustment_in_progress) {
+   vb->adjustment_in_progress = false;
+   pm_relax(vb->vdev->dev.parent);
+   }
+   spin_unlock(>adjustment_lock);
+}
+
 static void virtballoon_changed(struct virtio_device *vdev)
 {
struct virtio_balloon *vb = vdev->priv;
@@ -444,8 +474,7 @@ static void virtballoon_changed(struct virtio_device *vdev)
 
spin_lock_irqsave(>stop_update_lock, flags);
if (!vb->stop_update) {
-   queue_work(system_freezable_wq,
-  >update_balloon_size_work);
+   start_update_balloon_size(vb);
virtio_balloon_queue_free_page_work(vb);
}
spin_unlock_irqrestore(>stop_update_lock, flags);
@@ -476,19 +505,25 @@ static void update_balloon_size_func(struct work_struct 
*work)
 
vb = container_of(work, struct virtio_balloon,
  update_balloon_size_work);
-   diff = towards_target(vb);
 
-   if (!diff)
-   return;
+   spin_lock(>adjustment_lock);
+   vb->adjustment_signal_pending = false;
+   spin_unlock(>adjustment_lock);
 
-   if (diff > 0)
-   diff -= fill_balloon(vb, diff);
-   else
-   diff += leak_balloon(vb, -diff);
-   update_balloon_size(vb);
+   diff = towards_target(vb);
+
+   if (diff) {
+   if (diff > 0)
+   diff -= fill_balloon(vb, diff);
+   else
+   diff += leak_balloon(vb, -diff);
+   update_balloon_size(vb);
+   }
 
if (diff)
queue_work(system_freezable_wq, work);
+   else
+   end_update_balloon_size(vb);
 }
 
 static int init_vqs(struct virtio_balloon *vb)
@@ -992,6 +1027,8 @@ static int virtballoon_probe(struct virtio_device *vdev)
goto out_unregister_oom;
}
 
+   spin_lock_init(>adjustment_lock);
+
virtio_device_ready(vdev);
 
if (towards_target(vb))
-- 
2.43.0.472.g3155946c3a-goog




Re: [PATCH] virtio_balloon: stay awake while adjusting balloon

2023-12-18 Thread David Stevens
On Mon, Dec 18, 2023 at 6:37 AM David Hildenbrand  wrote:
>
> On 14.12.23 05:13, David Stevens wrote:
> > On Wed, Dec 13, 2023 at 5:44 PM David Hildenbrand  wrote:
> >>
> >> On 11.12.23 12:43, David Stevens wrote:
> >>> From: David Stevens 
> >>>
> >>
> >> Hi David,
> >>
> >>> Add a wakeup event for when the balloon is inflating or deflating.
> >>> Userspace can enable this wakeup event to prevent the system from
> >>> suspending while the balloon is being adjusted. This allows
> >>> /sys/power/wakeup_count to be used without breaking virtio_balloon's
> >>> cooperative memory management.
> >>
> >> Can you add/share some more details
> >
> > I'm working on enabling support for Linux s2Idle in our Android
> > virtual machine, to restrict apps from running in the background
> > without holding an Android partial wakelock. With the patch I recently
> > sent out [1], since crosvm advertises native PCI power management for
> > virtio devices, the Android guest can properly enter s2idle, and it
> > can be woken up by incoming IO. However, one of the remaining problems
> > is that when the host needs to reclaim memory from the guest via the
> > virtio-balloon, there is nothing preventing the guest from entering
> > s2idle before the balloon driver finishes returning memory to the
> > host.
>
> Thanks for the information. So you also want to wakeup the VM when
> wanting to get more memory from the VM?
>
> Using which mechanism would that wakeup happen? Likely not the device
> itself?

The wakeup would happen via the parent device's interrupt. I've sent a
new version of this patch that uses the parent device's wakeup event
instead of adding a new one.

> >
> > One alternative to this approach would be to add a virtballoon_suspend
> > callback to abort suspend if the balloon is inflating/adjusting.
> > However, it seems cleaner to just prevent suspend in the first place.
>
> Also, the PM notifier could also be used with very high priority, so the
> device would respond early to PM_SUSPEND_PREPARE.

One drawback of blocking suspend via a PM notifier is that the
behavior isn't configurable by userspace, whereas wakeup events can be
enabled/disabled by userspace.

-David



Re: [PATCH v2] virtio: Add support for no-reset virtio PCI PM

2023-12-14 Thread David Stevens
On Thu, Dec 14, 2023 at 6:47 PM David Hildenbrand  wrote:
>
> On 08.12.23 08:07, David Stevens wrote:
> > If a virtio_pci_device supports native PCI power management and has the
> > No_Soft_Reset bit set, then skip resetting and reinitializing the device
> > when suspending and restoring the device. This allows system-wide low
> > power states like s2idle to be used in systems with stateful virtio
> > devices that can't simply be re-initialized (e.g. virtio-fs).
> >
> > Signed-off-by: David Stevens 
> > ---
> > v1 -> v2:
> >   - Check the No_Soft_Reset bit
> >
> >   drivers/virtio/virtio_pci_common.c | 34 +-
> >   1 file changed, 33 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/virtio/virtio_pci_common.c 
> > b/drivers/virtio/virtio_pci_common.c
> > index c2524a7207cf..3a95ecaf12dc 100644
> > --- a/drivers/virtio/virtio_pci_common.c
> > +++ b/drivers/virtio/virtio_pci_common.c
> > @@ -492,8 +492,40 @@ static int virtio_pci_restore(struct device *dev)
> >   return virtio_device_restore(_dev->vdev);
> >   }
> >
> > +static bool vp_supports_pm_no_reset(struct device *dev)
> > +{
> > + struct pci_dev *pci_dev = to_pci_dev(dev);
> > + u16 pmcsr;
> > +
> > + if (!pci_dev->pm_cap)
> > + return false;
> > +
> > + pci_read_config_word(pci_dev, pci_dev->pm_cap + PCI_PM_CTRL, );
> > + if (PCI_POSSIBLE_ERROR(pmcsr)) {
> > + dev_err(dev, "Unable to query pmcsr");
> > + return false;
> > + }
> > +
> > + return pmcsr & PCI_PM_CTRL_NO_SOFT_RESET;
> > +}
> > +
> > +static int virtio_pci_suspend(struct device *dev)
> > +{
> > + return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_freeze(dev);
> > +}
> > +
> > +static int virtio_pci_resume(struct device *dev)
> > +{
> > + return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_restore(dev);
> > +}
> > +
> >   static const struct dev_pm_ops virtio_pci_pm_ops = {
> > - SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore)
> > + .suspend = virtio_pci_suspend,
> > + .resume = virtio_pci_resume,
> > + .freeze = virtio_pci_freeze,
> > + .thaw = virtio_pci_restore,
> > + .poweroff = virtio_pci_freeze,
> > + .restore = virtio_pci_restore,
> >   };
> >   #endif
> >
>
> Am I correct with my assumption that this will make s2idle work with 
> virtio-mem-pci as well?
>
> Right now, all suspending is disabled, but really only s4/hibernate is 
> problematic.
>
> [root@vm-0 ~]# echo "s2idle" > /sys/power/mem_sleep
> [root@vm-0 ~]# echo "mem" > /sys/power/state
> [  101.822991] PM: suspend entry (s2idle)
> [  101.828978] Filesystems sync: 0.004 seconds
> [  101.831618] Freezing user space processes
> [  101.834569] Freezing user space processes completed (elapsed 0.001 seconds)
> [  101.836915] OOM killer disabled.
> [  101.838072] Freezing remaining freezable tasks
> [  101.841054] Freezing remaining freezable tasks completed (elapsed 0.001 
> seconds)
> [  101.843538] printk: Suspending console(s) (use no_console_suspend to debug)
> [  101.957676] virtio_mem virtio0: save/restore not supported.
> [  101.957683] virtio-pci :00:02.0: PM: pci_pm_suspend(): 
> virtio_pci_freeze+0x0/0x50 returns -1
> [  101.957702] virtio-pci :00:02.0: PM: dpm_run_callback(): 
> pci_pm_suspend+0x0/0x170 returns -1
> [  101.957718] virtio-pci :00:02.0: PM: failed to suspend async: error -1

QEMU's virtio-pci devices don't advertise no_soft_reset, so this patch
won't affect vanilla QEMU. But if you add PCI_PM_CTRL_NO_SOFT_RESET to
the capability, then it should work. I'm working with crosvm, which
doesn't have virtio-mem implemented, but this patch makes virtio-fs
work with no extra kernel changes.

-David



Re: [PATCH] virtio_balloon: stay awake while adjusting balloon

2023-12-13 Thread David Stevens
On Wed, Dec 13, 2023 at 5:44 PM David Hildenbrand  wrote:
>
> On 11.12.23 12:43, David Stevens wrote:
> > From: David Stevens 
> >
>
> Hi David,
>
> > Add a wakeup event for when the balloon is inflating or deflating.
> > Userspace can enable this wakeup event to prevent the system from
> > suspending while the balloon is being adjusted. This allows
> > /sys/power/wakeup_count to be used without breaking virtio_balloon's
> > cooperative memory management.
>
> Can you add/share some more details

I'm working on enabling support for Linux s2Idle in our Android
virtual machine, to restrict apps from running in the background
without holding an Android partial wakelock. With the patch I recently
sent out [1], since crosvm advertises native PCI power management for
virtio devices, the Android guest can properly enter s2idle, and it
can be woken up by incoming IO. However, one of the remaining problems
is that when the host needs to reclaim memory from the guest via the
virtio-balloon, there is nothing preventing the guest from entering
s2idle before the balloon driver finishes returning memory to the
host.

One alternative to this approach would be to add a virtballoon_suspend
callback to abort suspend if the balloon is inflating/adjusting.
However, it seems cleaner to just prevent suspend in the first place.

[1] https://lore.kernel.org/lkml/20231208070754.3132339-1-steve...@chromium.org/

> >
> > Signed-off-by: David Stevens 
> > ---
> >   drivers/virtio/virtio_balloon.c | 59 +++--
> >   1 file changed, 49 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_balloon.c 
> > b/drivers/virtio/virtio_balloon.c
> > index 1fe93e93f5bc..811d8937246a 100644
> > --- a/drivers/virtio/virtio_balloon.c
> > +++ b/drivers/virtio/virtio_balloon.c
> > @@ -119,6 +119,11 @@ struct virtio_balloon {
> >   /* Free page reporting device */
> >   struct virtqueue *reporting_vq;
> >   struct page_reporting_dev_info pr_dev_info;
> > +
> > + /* State for keeping the wakeup_source active while adjusting the 
> > balloon */
> > + spinlock_t adjustment_lock;
> > + u32 adjustment_seqno;
>
> Using a simple flag that gets set when updating the balloon size and
> test-and-clear when testing for changes should be easier to get.
>
> bool adjustment_balloon_size_changed;
>
> or sth like that.

That's a good simplification, thanks.

> > + bool adjustment_in_progress;
> >   };
> >
> >   static const struct virtio_device_id id_table[] = {
> > @@ -437,6 +442,31 @@ static void virtio_balloon_queue_free_page_work(struct 
> > virtio_balloon *vb)
> >   queue_work(vb->balloon_wq, >report_free_page_work);
> >   }
> >
> > +static void start_update_balloon_size(struct virtio_balloon *vb)
> > +{
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(>adjustment_lock, flags);
> > + vb->adjustment_seqno++;
> > + if (!vb->adjustment_in_progress) {
> > + vb->adjustment_in_progress = true;
> > + pm_stay_awake(>vdev->dev);
> > + }
> > + spin_unlock_irqrestore(>adjustment_lock, flags);
> > +
> > + queue_work(system_freezable_wq, >update_balloon_size_work);
> > +}
> > +
> > +static void end_update_balloon_size(struct virtio_balloon *vb, u32 seqno)
> > +{
> > + spin_lock(>adjustment_lock);
> > + if (vb->adjustment_seqno == seqno && vb->adjustment_in_progress) {
> > + vb->adjustment_in_progress = false;
> > + pm_relax(>vdev->dev);
> > + }
> > + spin_unlock(>adjustment_lock);
> > +}
> > +
> >   static void virtballoon_changed(struct virtio_device *vdev)
> >   {
> >   struct virtio_balloon *vb = vdev->priv;
> > @@ -444,8 +474,7 @@ static void virtballoon_changed(struct virtio_device 
> > *vdev)
> >
> >   spin_lock_irqsave(>stop_update_lock, flags);
> >   if (!vb->stop_update) {
> > - queue_work(system_freezable_wq,
> > ->update_balloon_size_work);
> > + start_update_balloon_size(vb);
> >   virtio_balloon_queue_free_page_work(vb);
> >   }
> >   spin_unlock_irqrestore(>stop_update_lock, flags);
> > @@ -473,22 +502,29 @@ static void update_balloon_size_func(struct 
> > work_struct *work)
> >   {
> >   struct virtio_balloon *vb;
> >   s64 diff;
> > + u32 seqno;
> >
> >   vb = container_of(work, str

[PATCH] virtio_balloon: stay awake while adjusting balloon

2023-12-11 Thread David Stevens
From: David Stevens 

Add a wakeup event for when the balloon is inflating or deflating.
Userspace can enable this wakeup event to prevent the system from
suspending while the balloon is being adjusted. This allows
/sys/power/wakeup_count to be used without breaking virtio_balloon's
cooperative memory management.

Signed-off-by: David Stevens 
---
 drivers/virtio/virtio_balloon.c | 59 +++--
 1 file changed, 49 insertions(+), 10 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 1fe93e93f5bc..811d8937246a 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -119,6 +119,11 @@ struct virtio_balloon {
/* Free page reporting device */
struct virtqueue *reporting_vq;
struct page_reporting_dev_info pr_dev_info;
+
+   /* State for keeping the wakeup_source active while adjusting the 
balloon */
+   spinlock_t adjustment_lock;
+   u32 adjustment_seqno;
+   bool adjustment_in_progress;
 };
 
 static const struct virtio_device_id id_table[] = {
@@ -437,6 +442,31 @@ static void virtio_balloon_queue_free_page_work(struct 
virtio_balloon *vb)
queue_work(vb->balloon_wq, >report_free_page_work);
 }
 
+static void start_update_balloon_size(struct virtio_balloon *vb)
+{
+   unsigned long flags;
+
+   spin_lock_irqsave(>adjustment_lock, flags);
+   vb->adjustment_seqno++;
+   if (!vb->adjustment_in_progress) {
+   vb->adjustment_in_progress = true;
+   pm_stay_awake(>vdev->dev);
+   }
+   spin_unlock_irqrestore(>adjustment_lock, flags);
+
+   queue_work(system_freezable_wq, >update_balloon_size_work);
+}
+
+static void end_update_balloon_size(struct virtio_balloon *vb, u32 seqno)
+{
+   spin_lock(>adjustment_lock);
+   if (vb->adjustment_seqno == seqno && vb->adjustment_in_progress) {
+   vb->adjustment_in_progress = false;
+   pm_relax(>vdev->dev);
+   }
+   spin_unlock(>adjustment_lock);
+}
+
 static void virtballoon_changed(struct virtio_device *vdev)
 {
struct virtio_balloon *vb = vdev->priv;
@@ -444,8 +474,7 @@ static void virtballoon_changed(struct virtio_device *vdev)
 
spin_lock_irqsave(>stop_update_lock, flags);
if (!vb->stop_update) {
-   queue_work(system_freezable_wq,
-  >update_balloon_size_work);
+   start_update_balloon_size(vb);
virtio_balloon_queue_free_page_work(vb);
}
spin_unlock_irqrestore(>stop_update_lock, flags);
@@ -473,22 +502,29 @@ static void update_balloon_size_func(struct work_struct 
*work)
 {
struct virtio_balloon *vb;
s64 diff;
+   u32 seqno;
 
vb = container_of(work, struct virtio_balloon,
  update_balloon_size_work);
-   diff = towards_target(vb);
 
-   if (!diff)
-   return;
+   spin_lock(>adjustment_lock);
+   seqno = vb->adjustment_seqno;
+   spin_unlock(>adjustment_lock);
 
-   if (diff > 0)
-   diff -= fill_balloon(vb, diff);
-   else
-   diff += leak_balloon(vb, -diff);
-   update_balloon_size(vb);
+   diff = towards_target(vb);
+
+   if (diff) {
+   if (diff > 0)
+   diff -= fill_balloon(vb, diff);
+   else
+   diff += leak_balloon(vb, -diff);
+   update_balloon_size(vb);
+   }
 
if (diff)
queue_work(system_freezable_wq, work);
+   else
+   end_update_balloon_size(vb, seqno);
 }
 
 static int init_vqs(struct virtio_balloon *vb)
@@ -992,6 +1028,9 @@ static int virtballoon_probe(struct virtio_device *vdev)
goto out_unregister_oom;
}
 
+   spin_lock_init(>adjustment_lock);
+   device_set_wakeup_capable(>vdev->dev, true);
+
virtio_device_ready(vdev);
 
if (towards_target(vb))
-- 
2.43.0.472.g3155946c3a-goog




[PATCH v2] virtio: Add support for no-reset virtio PCI PM

2023-12-07 Thread David Stevens
If a virtio_pci_device supports native PCI power management and has the
No_Soft_Reset bit set, then skip resetting and reinitializing the device
when suspending and restoring the device. This allows system-wide low
power states like s2idle to be used in systems with stateful virtio
devices that can't simply be re-initialized (e.g. virtio-fs).

Signed-off-by: David Stevens 
---
v1 -> v2:
 - Check the No_Soft_Reset bit

 drivers/virtio/virtio_pci_common.c | 34 +-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/virtio/virtio_pci_common.c 
b/drivers/virtio/virtio_pci_common.c
index c2524a7207cf..3a95ecaf12dc 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -492,8 +492,40 @@ static int virtio_pci_restore(struct device *dev)
return virtio_device_restore(_dev->vdev);
 }
 
+static bool vp_supports_pm_no_reset(struct device *dev)
+{
+   struct pci_dev *pci_dev = to_pci_dev(dev);
+   u16 pmcsr;
+
+   if (!pci_dev->pm_cap)
+   return false;
+
+   pci_read_config_word(pci_dev, pci_dev->pm_cap + PCI_PM_CTRL, );
+   if (PCI_POSSIBLE_ERROR(pmcsr)) {
+   dev_err(dev, "Unable to query pmcsr");
+   return false;
+   }
+
+   return pmcsr & PCI_PM_CTRL_NO_SOFT_RESET;
+}
+
+static int virtio_pci_suspend(struct device *dev)
+{
+   return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_freeze(dev);
+}
+
+static int virtio_pci_resume(struct device *dev)
+{
+   return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_restore(dev);
+}
+
 static const struct dev_pm_ops virtio_pci_pm_ops = {
-   SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore)
+   .suspend = virtio_pci_suspend,
+   .resume = virtio_pci_resume,
+   .freeze = virtio_pci_freeze,
+   .thaw = virtio_pci_restore,
+   .poweroff = virtio_pci_freeze,
+   .restore = virtio_pci_restore,
 };
 #endif
 
-- 
2.43.0.472.g3155946c3a-goog




[PATCH] virtio: Add virtio-device power management

2023-11-12 Thread David Stevens
Virtio power management for virtio pci devices is defined on top of PCI
power management, so it mostly works out of the box. All that's needed
is to add suspend/resume callbacks to the virtio core, instead of
reusing the freeze/restore.

Signed-off-by: David Stevens 
---
 drivers/virtio/virtio_pci_common.c | 21 -
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/virtio/virtio_pci_common.c 
b/drivers/virtio/virtio_pci_common.c
index c2524a7207cf..0cad900eaf4f 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -492,8 +492,27 @@ static int virtio_pci_restore(struct device *dev)
return virtio_device_restore(_dev->vdev);
 }
 
+static int virtio_pci_suspend(struct device *dev)
+{
+   struct pci_dev *pci_dev = to_pci_dev(dev);
+
+   return pci_dev->pm_cap ? 0 : virtio_pci_freeze(dev);
+}
+
+static int virtio_pci_resume(struct device *dev)
+{
+   struct pci_dev *pci_dev = to_pci_dev(dev);
+
+   return pci_dev->pm_cap ? 0 : virtio_pci_restore(dev);
+}
+
 static const struct dev_pm_ops virtio_pci_pm_ops = {
-   SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore)
+   .suspend = virtio_pci_suspend,
+   .resume = virtio_pci_resume,
+   .freeze = virtio_pci_freeze,
+   .thaw = virtio_pci_restore,
+   .poweroff = virtio_pci_freeze,
+   .restore = virtio_pci_restore,
 };
 #endif
 
-- 
2.42.0.869.gea05f2083d-goog




Re: [PATCH v3] drm/syncobj: use newly allocated stub fences

2021-04-08 Thread David Stevens
Pushing to drm-misc-next works for me. Thanks for the quick responses.

-David

On Thu, Apr 8, 2021 at 6:56 PM Christian König  wrote:
>
> Am 08.04.21 um 11:54 schrieb David Stevens:
> > From: David Stevens 
> >
> > Allocate a new private stub fence in drm_syncobj_assign_null_handle,
> > instead of using a static stub fence.
> >
> > When userspace creates a fence with DRM_SYNCOBJ_CREATE_SIGNALED or when
> > userspace signals a fence via DRM_IOCTL_SYNCOBJ_SIGNAL, the timestamp
> > obtained when the fence is exported and queried with SYNC_IOC_FILE_INFO
> > should match when the fence's status was changed from the perspective of
> > userspace, which is during the respective ioctl.
> >
> > When a static stub fence started being used in by these ioctls, this
> > behavior changed. Instead, the timestamp returned by SYNC_IOC_FILE_INFO
> > became the first time anything used the static stub fence, which has no
> > meaning to userspace.
> >
> > Signed-off-by: David Stevens 
>
> Reviewed-by: Christian König 
>
> Should I push this to drm-misc-next or how do you want to upstream it?
>
> Thanks,
> Christian.
>
> > ---
> > v2 -> v3:
> >   - reuse the static stub spinlock
> > v1 -> v2:
> >   - checkpatch style fixes
> >
> >   drivers/dma-buf/dma-fence.c   | 27 ++-
> >   drivers/gpu/drm/drm_syncobj.c | 25 +++--
> >   include/linux/dma-fence.h |  1 +
> >   3 files changed, 46 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> > index d64fc03929be..ce0f5eff575d 100644
> > --- a/drivers/dma-buf/dma-fence.c
> > +++ b/drivers/dma-buf/dma-fence.c
> > @@ -123,7 +123,9 @@ static const struct dma_fence_ops dma_fence_stub_ops = {
> >   /**
> >* dma_fence_get_stub - return a signaled fence
> >*
> > - * Return a stub fence which is already signaled.
> > + * Return a stub fence which is already signaled. The fence's
> > + * timestamp corresponds to the first time after boot this
> > + * function is called.
> >*/
> >   struct dma_fence *dma_fence_get_stub(void)
> >   {
> > @@ -141,6 +143,29 @@ struct dma_fence *dma_fence_get_stub(void)
> >   }
> >   EXPORT_SYMBOL(dma_fence_get_stub);
> >
> > +/**
> > + * dma_fence_allocate_private_stub - return a private, signaled fence
> > + *
> > + * Return a newly allocated and signaled stub fence.
> > + */
> > +struct dma_fence *dma_fence_allocate_private_stub(void)
> > +{
> > + struct dma_fence *fence;
> > +
> > + fence = kzalloc(sizeof(*fence), GFP_KERNEL);
> > + if (fence == NULL)
> > + return ERR_PTR(-ENOMEM);
> > +
> > + dma_fence_init(fence,
> > +_fence_stub_ops,
> > +_fence_stub_lock,
> > +0, 0);
> > + dma_fence_signal(fence);
> > +
> > + return fence;
> > +}
> > +EXPORT_SYMBOL(dma_fence_allocate_private_stub);
> > +
> >   /**
> >* dma_fence_context_alloc - allocate an array of fence contexts
> >* @num: amount of contexts to allocate
> > diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> > index 349146049849..a54aa850d143 100644
> > --- a/drivers/gpu/drm/drm_syncobj.c
> > +++ b/drivers/gpu/drm/drm_syncobj.c
> > @@ -350,12 +350,16 @@ EXPORT_SYMBOL(drm_syncobj_replace_fence);
> >*
> >* Assign a already signaled stub fence to the sync object.
> >*/
> > -static void drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
> > +static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
> >   {
> > - struct dma_fence *fence = dma_fence_get_stub();
> > + struct dma_fence *fence = dma_fence_allocate_private_stub();
> > +
> > + if (IS_ERR(fence))
> > + return PTR_ERR(fence);
> >
> >   drm_syncobj_replace_fence(syncobj, fence);
> >   dma_fence_put(fence);
> > + return 0;
> >   }
> >
> >   /* 5s default for wait submission */
> > @@ -469,6 +473,7 @@ EXPORT_SYMBOL(drm_syncobj_free);
> >   int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
> >  struct dma_fence *fence)
> >   {
> > + int ret;
> >   struct drm_syncobj *syncobj;
> >
> >   syncobj = kzalloc(sizeof(struct drm_syncobj), GFP_KERNEL);
> > @@ -479,8 +484,13 @@ int drm_syncobj_create(struct drm_syncobj 
> > **out_syncobj, 

[PATCH v3] drm/syncobj: use newly allocated stub fences

2021-04-08 Thread David Stevens
From: David Stevens 

Allocate a new private stub fence in drm_syncobj_assign_null_handle,
instead of using a static stub fence.

When userspace creates a fence with DRM_SYNCOBJ_CREATE_SIGNALED or when
userspace signals a fence via DRM_IOCTL_SYNCOBJ_SIGNAL, the timestamp
obtained when the fence is exported and queried with SYNC_IOC_FILE_INFO
should match when the fence's status was changed from the perspective of
userspace, which is during the respective ioctl.

When a static stub fence started being used in by these ioctls, this
behavior changed. Instead, the timestamp returned by SYNC_IOC_FILE_INFO
became the first time anything used the static stub fence, which has no
meaning to userspace.

Signed-off-by: David Stevens 
---
v2 -> v3:
 - reuse the static stub spinlock
v1 -> v2:
 - checkpatch style fixes

 drivers/dma-buf/dma-fence.c   | 27 ++-
 drivers/gpu/drm/drm_syncobj.c | 25 +++--
 include/linux/dma-fence.h |  1 +
 3 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index d64fc03929be..ce0f5eff575d 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -123,7 +123,9 @@ static const struct dma_fence_ops dma_fence_stub_ops = {
 /**
  * dma_fence_get_stub - return a signaled fence
  *
- * Return a stub fence which is already signaled.
+ * Return a stub fence which is already signaled. The fence's
+ * timestamp corresponds to the first time after boot this
+ * function is called.
  */
 struct dma_fence *dma_fence_get_stub(void)
 {
@@ -141,6 +143,29 @@ struct dma_fence *dma_fence_get_stub(void)
 }
 EXPORT_SYMBOL(dma_fence_get_stub);
 
+/**
+ * dma_fence_allocate_private_stub - return a private, signaled fence
+ *
+ * Return a newly allocated and signaled stub fence.
+ */
+struct dma_fence *dma_fence_allocate_private_stub(void)
+{
+   struct dma_fence *fence;
+
+   fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+   if (fence == NULL)
+   return ERR_PTR(-ENOMEM);
+
+   dma_fence_init(fence,
+  _fence_stub_ops,
+  _fence_stub_lock,
+  0, 0);
+   dma_fence_signal(fence);
+
+   return fence;
+}
+EXPORT_SYMBOL(dma_fence_allocate_private_stub);
+
 /**
  * dma_fence_context_alloc - allocate an array of fence contexts
  * @num: amount of contexts to allocate
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 349146049849..a54aa850d143 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -350,12 +350,16 @@ EXPORT_SYMBOL(drm_syncobj_replace_fence);
  *
  * Assign a already signaled stub fence to the sync object.
  */
-static void drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
+static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
 {
-   struct dma_fence *fence = dma_fence_get_stub();
+   struct dma_fence *fence = dma_fence_allocate_private_stub();
+
+   if (IS_ERR(fence))
+   return PTR_ERR(fence);
 
drm_syncobj_replace_fence(syncobj, fence);
dma_fence_put(fence);
+   return 0;
 }
 
 /* 5s default for wait submission */
@@ -469,6 +473,7 @@ EXPORT_SYMBOL(drm_syncobj_free);
 int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
   struct dma_fence *fence)
 {
+   int ret;
struct drm_syncobj *syncobj;
 
syncobj = kzalloc(sizeof(struct drm_syncobj), GFP_KERNEL);
@@ -479,8 +484,13 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, 
uint32_t flags,
INIT_LIST_HEAD(>cb_list);
spin_lock_init(>lock);
 
-   if (flags & DRM_SYNCOBJ_CREATE_SIGNALED)
-   drm_syncobj_assign_null_handle(syncobj);
+   if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
+   ret = drm_syncobj_assign_null_handle(syncobj);
+   if (ret < 0) {
+   drm_syncobj_put(syncobj);
+   return ret;
+   }
+   }
 
if (fence)
drm_syncobj_replace_fence(syncobj, fence);
@@ -1322,8 +1332,11 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void 
*data,
if (ret < 0)
return ret;
 
-   for (i = 0; i < args->count_handles; i++)
-   drm_syncobj_assign_null_handle(syncobjs[i]);
+   for (i = 0; i < args->count_handles; i++) {
+   ret = drm_syncobj_assign_null_handle(syncobjs[i]);
+   if (ret < 0)
+   break;
+   }
 
drm_syncobj_array_free(syncobjs, args->count_handles);
 
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 9f12efaaa93a..6ffb4b2c6371 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -587,6 +587,7 @@ static inline signed long dma_fence_wait(struct dma_fence 
*fence, bool intr)
 }
 
 struct dma_fence 

[PATCH v2] drm/syncobj: use newly allocated stub fences

2021-04-08 Thread David Stevens
From: David Stevens 

Allocate a new private stub fence in drm_syncobj_assign_null_handle,
instead of using a static stub fence.

When userspace creates a fence with DRM_SYNCOBJ_CREATE_SIGNALED or when
userspace signals a fence via DRM_IOCTL_SYNCOBJ_SIGNAL, the timestamp
obtained when the fence is exported and queried with SYNC_IOC_FILE_INFO
should match when the fence's status was changed from the perspective of
userspace, which is during the respective ioctl.

When a static stub fence started being used in by these ioctls, this
behavior changed. Instead, the timestamp returned by SYNC_IOC_FILE_INFO
became the first time anything used the static stub fence, which has no
meaning to userspace.

Signed-off-by: David Stevens 
---
v1 -> v2:
 - checkpatch style fixes

 drivers/dma-buf/dma-fence.c   | 33 -
 drivers/gpu/drm/drm_syncobj.c | 25 +++--
 include/linux/dma-fence.h |  1 +
 3 files changed, 52 insertions(+), 7 deletions(-)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index d64fc03929be..6081eb962490 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -26,6 +26,11 @@ EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
 static DEFINE_SPINLOCK(dma_fence_stub_lock);
 static struct dma_fence dma_fence_stub;
 
+struct drm_fence_private_stub {
+   struct dma_fence base;
+   spinlock_t lock;
+};
+
 /*
  * fence context counter: each execution context should have its own
  * fence context, this allows checking if fences belong to the same
@@ -123,7 +128,9 @@ static const struct dma_fence_ops dma_fence_stub_ops = {
 /**
  * dma_fence_get_stub - return a signaled fence
  *
- * Return a stub fence which is already signaled.
+ * Return a stub fence which is already signaled. The fence's
+ * timestamp corresponds to the first time after boot this
+ * function is called.
  */
 struct dma_fence *dma_fence_get_stub(void)
 {
@@ -141,6 +148,30 @@ struct dma_fence *dma_fence_get_stub(void)
 }
 EXPORT_SYMBOL(dma_fence_get_stub);
 
+/**
+ * dma_fence_allocate_private_stub - return a private, signaled fence
+ *
+ * Return a newly allocated and signaled stub fence.
+ */
+struct dma_fence *dma_fence_allocate_private_stub(void)
+{
+   struct drm_fence_private_stub *fence;
+
+   fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+   if (fence == NULL)
+   return ERR_PTR(-ENOMEM);
+
+   spin_lock_init(>lock);
+   dma_fence_init(>base,
+  _fence_stub_ops,
+  >lock,
+  0, 0);
+   dma_fence_signal(>base);
+
+   return >base;
+}
+EXPORT_SYMBOL(dma_fence_allocate_private_stub);
+
 /**
  * dma_fence_context_alloc - allocate an array of fence contexts
  * @num: amount of contexts to allocate
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 349146049849..a54aa850d143 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -350,12 +350,16 @@ EXPORT_SYMBOL(drm_syncobj_replace_fence);
  *
  * Assign a already signaled stub fence to the sync object.
  */
-static void drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
+static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
 {
-   struct dma_fence *fence = dma_fence_get_stub();
+   struct dma_fence *fence = dma_fence_allocate_private_stub();
+
+   if (IS_ERR(fence))
+   return PTR_ERR(fence);
 
drm_syncobj_replace_fence(syncobj, fence);
dma_fence_put(fence);
+   return 0;
 }
 
 /* 5s default for wait submission */
@@ -469,6 +473,7 @@ EXPORT_SYMBOL(drm_syncobj_free);
 int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
   struct dma_fence *fence)
 {
+   int ret;
struct drm_syncobj *syncobj;
 
syncobj = kzalloc(sizeof(struct drm_syncobj), GFP_KERNEL);
@@ -479,8 +484,13 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, 
uint32_t flags,
INIT_LIST_HEAD(>cb_list);
spin_lock_init(>lock);
 
-   if (flags & DRM_SYNCOBJ_CREATE_SIGNALED)
-   drm_syncobj_assign_null_handle(syncobj);
+   if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
+   ret = drm_syncobj_assign_null_handle(syncobj);
+   if (ret < 0) {
+   drm_syncobj_put(syncobj);
+   return ret;
+   }
+   }
 
if (fence)
drm_syncobj_replace_fence(syncobj, fence);
@@ -1322,8 +1332,11 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void 
*data,
if (ret < 0)
return ret;
 
-   for (i = 0; i < args->count_handles; i++)
-   drm_syncobj_assign_null_handle(syncobjs[i]);
+   for (i = 0; i < args->count_handles; i++) {
+   ret = drm_syncobj_assign_null_handle(syncobjs[i]);
+   if (ret < 0)
+   

Re: [PATCH] drm/syncobj: use newly allocated stub fences

2021-04-08 Thread David Stevens
Sorry, I forgot to checkpatch this, I'll resend it momentarily.

-David

On Thu, Apr 8, 2021 at 6:34 PM David Stevens  wrote:
>
> From: David Stevens 
>
> Allocate a new private stub fence in drm_syncobj_assign_null_handle,
> instead of using a static stub fence.
>
> When userspace creates a fence with DRM_SYNCOBJ_CREATE_SIGNALED or when
> userspace signals a fence via DRM_IOCTL_SYNCOBJ_SIGNAL, the timestamp
> obtained when the fence is exported and queried with SYNC_IOC_FILE_INFO
> should match when the fence's status was changed from the perspective of
> userspace, which is during the respective ioctl.
>
> When a static stub fence started being used in by these ioctls, this
> behavior changed. Instead, the timestamp returned by SYNC_IOC_FILE_INFO
> became the first time anything used the static stub fence, which has no
> meaning to userspace.
>
> Signed-off-by: David Stevens 
> ---
>  drivers/dma-buf/dma-fence.c   | 33 -
>  drivers/gpu/drm/drm_syncobj.c | 28 
>  include/linux/dma-fence.h |  1 +
>  3 files changed, 53 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> index d64fc03929be..6081eb962490 100644
> --- a/drivers/dma-buf/dma-fence.c
> +++ b/drivers/dma-buf/dma-fence.c
> @@ -26,6 +26,11 @@ EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
>  static DEFINE_SPINLOCK(dma_fence_stub_lock);
>  static struct dma_fence dma_fence_stub;
>
> +struct drm_fence_private_stub {
> +   struct dma_fence base;
> +   spinlock_t lock;
> +};
> +
>  /*
>   * fence context counter: each execution context should have its own
>   * fence context, this allows checking if fences belong to the same
> @@ -123,7 +128,9 @@ static const struct dma_fence_ops dma_fence_stub_ops = {
>  /**
>   * dma_fence_get_stub - return a signaled fence
>   *
> - * Return a stub fence which is already signaled.
> + * Return a stub fence which is already signaled. The fence's
> + * timestamp corresponds to the first time after boot this
> + * function is called.
>   */
>  struct dma_fence *dma_fence_get_stub(void)
>  {
> @@ -141,6 +148,30 @@ struct dma_fence *dma_fence_get_stub(void)
>  }
>  EXPORT_SYMBOL(dma_fence_get_stub);
>
> +/**
> + * dma_fence_allocate_private_stub - return a private, signaled fence
> + *
> + * Return a newly allocated and signaled stub fence.
> + */
> +struct dma_fence *dma_fence_allocate_private_stub(void)
> +{
> +   struct drm_fence_private_stub *fence;
> +
> +   fence = kzalloc(sizeof(*fence), GFP_KERNEL);
> +   if (fence == NULL)
> +   return ERR_PTR(-ENOMEM);
> +
> +   spin_lock_init(>lock);
> +   dma_fence_init(>base,
> +  _fence_stub_ops,
> +  >lock,
> +  0, 0);
> +   dma_fence_signal(>base);
> +
> +   return >base;
> +}
> +EXPORT_SYMBOL(dma_fence_allocate_private_stub);
> +
>  /**
>   * dma_fence_context_alloc - allocate an array of fence contexts
>   * @num: amount of contexts to allocate
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index 349146049849..c6125e57ae37 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -350,12 +350,15 @@ EXPORT_SYMBOL(drm_syncobj_replace_fence);
>   *
>   * Assign a already signaled stub fence to the sync object.
>   */
> -static void drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
> +static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>  {
> -   struct dma_fence *fence = dma_fence_get_stub();
> +   struct dma_fence *fence = dma_fence_allocate_private_stub();
> +   if (IS_ERR(fence))
> +  return PTR_ERR(fence);
>
> -   drm_syncobj_replace_fence(syncobj, fence);
> -   dma_fence_put(fence);
> +   drm_syncobj_replace_fence(syncobj, fence);
> +   dma_fence_put(fence);
> +   return 0;
>  }
>
>  /* 5s default for wait submission */
> @@ -469,6 +472,7 @@ EXPORT_SYMBOL(drm_syncobj_free);
>  int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
>struct dma_fence *fence)
>  {
> +   int ret;
> struct drm_syncobj *syncobj;
>
> syncobj = kzalloc(sizeof(struct drm_syncobj), GFP_KERNEL);
> @@ -479,8 +483,13 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, 
> uint32_t flags,
> INIT_LIST_HEAD(>cb_list);
> spin_lock_init(>lock);
>
> -   if (flags & DRM_SYNCOBJ_CREATE_SIGNALED)
> -   drm_syncobj_assign_null_handle(syncobj);
> +   if (flags &a

[PATCH] drm/syncobj: use newly allocated stub fences

2021-04-08 Thread David Stevens
From: David Stevens 

Allocate a new private stub fence in drm_syncobj_assign_null_handle,
instead of using a static stub fence.

When userspace creates a fence with DRM_SYNCOBJ_CREATE_SIGNALED or when
userspace signals a fence via DRM_IOCTL_SYNCOBJ_SIGNAL, the timestamp
obtained when the fence is exported and queried with SYNC_IOC_FILE_INFO
should match when the fence's status was changed from the perspective of
userspace, which is during the respective ioctl.

When a static stub fence started being used in by these ioctls, this
behavior changed. Instead, the timestamp returned by SYNC_IOC_FILE_INFO
became the first time anything used the static stub fence, which has no
meaning to userspace.

Signed-off-by: David Stevens 
---
 drivers/dma-buf/dma-fence.c   | 33 -
 drivers/gpu/drm/drm_syncobj.c | 28 
 include/linux/dma-fence.h |  1 +
 3 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index d64fc03929be..6081eb962490 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -26,6 +26,11 @@ EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
 static DEFINE_SPINLOCK(dma_fence_stub_lock);
 static struct dma_fence dma_fence_stub;
 
+struct drm_fence_private_stub {
+   struct dma_fence base;
+   spinlock_t lock;
+};
+
 /*
  * fence context counter: each execution context should have its own
  * fence context, this allows checking if fences belong to the same
@@ -123,7 +128,9 @@ static const struct dma_fence_ops dma_fence_stub_ops = {
 /**
  * dma_fence_get_stub - return a signaled fence
  *
- * Return a stub fence which is already signaled.
+ * Return a stub fence which is already signaled. The fence's
+ * timestamp corresponds to the first time after boot this
+ * function is called.
  */
 struct dma_fence *dma_fence_get_stub(void)
 {
@@ -141,6 +148,30 @@ struct dma_fence *dma_fence_get_stub(void)
 }
 EXPORT_SYMBOL(dma_fence_get_stub);
 
+/**
+ * dma_fence_allocate_private_stub - return a private, signaled fence
+ *
+ * Return a newly allocated and signaled stub fence.
+ */
+struct dma_fence *dma_fence_allocate_private_stub(void)
+{
+   struct drm_fence_private_stub *fence;
+
+   fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+   if (fence == NULL)
+   return ERR_PTR(-ENOMEM);
+
+   spin_lock_init(>lock);
+   dma_fence_init(>base,
+  _fence_stub_ops,
+  >lock,
+  0, 0);
+   dma_fence_signal(>base);
+
+   return >base;
+}
+EXPORT_SYMBOL(dma_fence_allocate_private_stub);
+
 /**
  * dma_fence_context_alloc - allocate an array of fence contexts
  * @num: amount of contexts to allocate
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 349146049849..c6125e57ae37 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -350,12 +350,15 @@ EXPORT_SYMBOL(drm_syncobj_replace_fence);
  *
  * Assign a already signaled stub fence to the sync object.
  */
-static void drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
+static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
 {
-   struct dma_fence *fence = dma_fence_get_stub();
+   struct dma_fence *fence = dma_fence_allocate_private_stub();
+   if (IS_ERR(fence))
+  return PTR_ERR(fence);
 
-   drm_syncobj_replace_fence(syncobj, fence);
-   dma_fence_put(fence);
+   drm_syncobj_replace_fence(syncobj, fence);
+   dma_fence_put(fence);
+   return 0;
 }
 
 /* 5s default for wait submission */
@@ -469,6 +472,7 @@ EXPORT_SYMBOL(drm_syncobj_free);
 int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
   struct dma_fence *fence)
 {
+   int ret;
struct drm_syncobj *syncobj;
 
syncobj = kzalloc(sizeof(struct drm_syncobj), GFP_KERNEL);
@@ -479,8 +483,13 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, 
uint32_t flags,
INIT_LIST_HEAD(>cb_list);
spin_lock_init(>lock);
 
-   if (flags & DRM_SYNCOBJ_CREATE_SIGNALED)
-   drm_syncobj_assign_null_handle(syncobj);
+   if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
+   ret = drm_syncobj_assign_null_handle(syncobj);
+   if (ret < 0) {
+   drm_syncobj_put(syncobj);
+   return ret;
+   }
+   }
 
if (fence)
drm_syncobj_replace_fence(syncobj, fence);
@@ -1322,8 +1331,11 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void 
*data,
if (ret < 0)
return ret;
 
-   for (i = 0; i < args->count_handles; i++)
-   drm_syncobj_assign_null_handle(syncobjs[i]);
+   for (i = 0; i < args->count_handles; i++) {
+   ret = drm_syncobj_assign_null_handle(syncobjs[i]);
+  

Re: [PATCH] Revert "drm/syncobj: use dma_fence_get_stub"

2021-04-08 Thread David Stevens
On Thu, Apr 8, 2021 at 4:03 PM Christian König  wrote:
>
> Am 08.04.21 um 06:59 schrieb David Stevens:
> > From: David Stevens 
> >
> > This reverts commit 86bbd89d5da66fe760049ad3f04adc407ec0c4d6.
> >
> > Using the singleton stub fence in drm_syncobj_assign_null_handle means
> > that all syncobjs created in an already signaled state or any syncobjs
> > signaled by userspace will reference the singleton fence when exported
> > to a sync_file. If those sync_files are queried with SYNC_IOC_FILE_INFO,
> > then the timestamp_ns value returned will correspond to whenever the
> > singleton stub fence was first initialized. This can break the ability
> > of userspace to use timestamps of these fences, as the singleton stub
> > fence's timestamp bears no relationship to any meaningful event.
>
> And why exactly is having the timestamp of the call to
> drm_syncobj_assign_null_handle() better?

The timestamp returned by SYNC_IOC_FILE_INFO is the "timestamp of
status change in nanoseconds". If userspace signals the fence with
DRM_IOCTL_SYNCOBJ_SIGNAL, then a timestamp from
drm_syncobj_assign_null_handle corresponds to the status change. If
userspace sets DRM_SYNCOBJ_CREATE_SIGNALED when creating a fence, then
the status change happens immediately upon creation, which again
corresponds to when drm_syncobj_assign_null_handle gets called.

> Additional if you really need that please don't revert the patch.
> Instead provide a function which returns a newly initialized stub fence
> in the dma_fence.c code.

Ack.

-David

> Regards,
> Christian.
>
> >
> > Signed-off-by: David Stevens 
> > ---
> >   drivers/gpu/drm/drm_syncobj.c | 58 ++-
> >   1 file changed, 44 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> > index 349146049849..7cc11f1a83f4 100644
> > --- a/drivers/gpu/drm/drm_syncobj.c
> > +++ b/drivers/gpu/drm/drm_syncobj.c
> > @@ -211,6 +211,21 @@ struct syncobj_wait_entry {
> >   static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
> > struct syncobj_wait_entry *wait);
> >
> > +struct drm_syncobj_stub_fence {
> > + struct dma_fence base;
> > + spinlock_t lock;
> > +};
> > +
> > +static const char *drm_syncobj_stub_fence_get_name(struct dma_fence *fence)
> > +{
> > + return "syncobjstub";
> > +}
> > +
> > +static const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
> > + .get_driver_name = drm_syncobj_stub_fence_get_name,
> > + .get_timeline_name = drm_syncobj_stub_fence_get_name,
> > +};
> > +
> >   /**
> >* drm_syncobj_find - lookup and reference a sync object.
> >* @file_private: drm file private pointer
> > @@ -344,18 +359,24 @@ void drm_syncobj_replace_fence(struct drm_syncobj 
> > *syncobj,
> >   }
> >   EXPORT_SYMBOL(drm_syncobj_replace_fence);
> >
> > -/**
> > - * drm_syncobj_assign_null_handle - assign a stub fence to the sync object
> > - * @syncobj: sync object to assign the fence on
> > - *
> > - * Assign a already signaled stub fence to the sync object.
> > - */
> > -static void drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
> > +static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
> >   {
> > - struct dma_fence *fence = dma_fence_get_stub();
> > + struct drm_syncobj_stub_fence *fence;
> >
> > - drm_syncobj_replace_fence(syncobj, fence);
> > - dma_fence_put(fence);
> > + fence = kzalloc(sizeof(*fence), GFP_KERNEL);
> > + if (fence == NULL)
> > + return -ENOMEM;
> > +
> > + spin_lock_init(>lock);
> > + dma_fence_init(>base, _syncobj_stub_fence_ops,
> > +>lock, 0, 0);
> > + dma_fence_signal(>base);
> > +
> > + drm_syncobj_replace_fence(syncobj, >base);
> > +
> > + dma_fence_put(>base);
> > +
> > + return 0;
> >   }
> >
> >   /* 5s default for wait submission */
> > @@ -469,6 +490,7 @@ EXPORT_SYMBOL(drm_syncobj_free);
> >   int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
> >  struct dma_fence *fence)
> >   {
> > + int ret;
> >   struct drm_syncobj *syncobj;
> >
> >   syncobj = kzalloc(sizeof(struct drm_syncobj), GFP_KERNEL);
> > @@ -479,8 +501,13 @@ int drm_syncobj_create(struct drm_syncobj 
> > **out_syncobj, uint32_t flags,
> >   INIT_L

[PATCH] Revert "drm/syncobj: use dma_fence_get_stub"

2021-04-07 Thread David Stevens
From: David Stevens 

This reverts commit 86bbd89d5da66fe760049ad3f04adc407ec0c4d6.

Using the singleton stub fence in drm_syncobj_assign_null_handle means
that all syncobjs created in an already signaled state or any syncobjs
signaled by userspace will reference the singleton fence when exported
to a sync_file. If those sync_files are queried with SYNC_IOC_FILE_INFO,
then the timestamp_ns value returned will correspond to whenever the
singleton stub fence was first initialized. This can break the ability
of userspace to use timestamps of these fences, as the singleton stub
fence's timestamp bears no relationship to any meaningful event.

Signed-off-by: David Stevens 
---
 drivers/gpu/drm/drm_syncobj.c | 58 ++-
 1 file changed, 44 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 349146049849..7cc11f1a83f4 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -211,6 +211,21 @@ struct syncobj_wait_entry {
 static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
  struct syncobj_wait_entry *wait);
 
+struct drm_syncobj_stub_fence {
+   struct dma_fence base;
+   spinlock_t lock;
+};
+
+static const char *drm_syncobj_stub_fence_get_name(struct dma_fence *fence)
+{
+   return "syncobjstub";
+}
+
+static const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
+   .get_driver_name = drm_syncobj_stub_fence_get_name,
+   .get_timeline_name = drm_syncobj_stub_fence_get_name,
+};
+
 /**
  * drm_syncobj_find - lookup and reference a sync object.
  * @file_private: drm file private pointer
@@ -344,18 +359,24 @@ void drm_syncobj_replace_fence(struct drm_syncobj 
*syncobj,
 }
 EXPORT_SYMBOL(drm_syncobj_replace_fence);
 
-/**
- * drm_syncobj_assign_null_handle - assign a stub fence to the sync object
- * @syncobj: sync object to assign the fence on
- *
- * Assign a already signaled stub fence to the sync object.
- */
-static void drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
+static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
 {
-   struct dma_fence *fence = dma_fence_get_stub();
+   struct drm_syncobj_stub_fence *fence;
 
-   drm_syncobj_replace_fence(syncobj, fence);
-   dma_fence_put(fence);
+   fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+   if (fence == NULL)
+   return -ENOMEM;
+
+   spin_lock_init(>lock);
+   dma_fence_init(>base, _syncobj_stub_fence_ops,
+  >lock, 0, 0);
+   dma_fence_signal(>base);
+
+   drm_syncobj_replace_fence(syncobj, >base);
+
+   dma_fence_put(>base);
+
+   return 0;
 }
 
 /* 5s default for wait submission */
@@ -469,6 +490,7 @@ EXPORT_SYMBOL(drm_syncobj_free);
 int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
   struct dma_fence *fence)
 {
+   int ret;
struct drm_syncobj *syncobj;
 
syncobj = kzalloc(sizeof(struct drm_syncobj), GFP_KERNEL);
@@ -479,8 +501,13 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, 
uint32_t flags,
INIT_LIST_HEAD(>cb_list);
spin_lock_init(>lock);
 
-   if (flags & DRM_SYNCOBJ_CREATE_SIGNALED)
-   drm_syncobj_assign_null_handle(syncobj);
+   if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
+   ret = drm_syncobj_assign_null_handle(syncobj);
+   if (ret < 0) {
+   drm_syncobj_put(syncobj);
+   return ret;
+   }
+   }
 
if (fence)
drm_syncobj_replace_fence(syncobj, fence);
@@ -1322,8 +1349,11 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void 
*data,
if (ret < 0)
return ret;
 
-   for (i = 0; i < args->count_handles; i++)
-   drm_syncobj_assign_null_handle(syncobjs[i]);
+   for (i = 0; i < args->count_handles; i++) {
+   ret = drm_syncobj_assign_null_handle(syncobjs[i]);
+   if (ret < 0)
+   break;
+   }
 
drm_syncobj_array_free(syncobjs, args->count_handles);
 
-- 
2.31.0.208.g409f899ff0-goog



[PATCH v4 2/2] KVM: x86/mmu: Consider the hva in mmu_notifier retry

2021-02-21 Thread David Stevens
From: David Stevens 

Track the range being invalidated by mmu_notifier and skip page fault
retries if the fault address is not affected by the in-progress
invalidation. Handle concurrent invalidations by finding the minimal
range which includes all ranges being invalidated. Although the combined
range may include unrelated addresses and cannot be shrunk as individual
invalidation operations complete, it is unlikely the marginal gains of
proper range tracking are worth the additional complexity.

The primary benefit of this change is the reduction in the likelihood of
extreme latency when handing a page fault due to another thread having
been preempted while modifying host virtual addresses.

Signed-off-by: David Stevens 
---
v3 -> v4:
 - Skip prefetch while invalidations are in progress

v2 -> v3:
 - Removed unnecessary ifdef
 - Style changes

v1 -> v2:
 - Improve handling of concurrent invalidation requests by unioning
   ranges, instead of just giving up and using [0, ULONG_MAX).
 - Add lockdep check
 - Code comments and formatting

 arch/powerpc/kvm/book3s_64_mmu_hv.c|  2 +-
 arch/powerpc/kvm/book3s_64_mmu_radix.c |  2 +-
 arch/x86/kvm/mmu/mmu.c | 23 ++--
 arch/x86/kvm/mmu/paging_tmpl.h |  7 ---
 include/linux/kvm_host.h   | 25 +-
 virt/kvm/kvm_main.c| 29 ++
 6 files changed, 72 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 38ea396a23d6..8e06cd3f759c 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -590,7 +590,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu,
} else {
/* Call KVM generic code to do the slow-path check */
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
-  writing, _ok);
+  writing, _ok, NULL);
if (is_error_noslot_pfn(pfn))
return -EFAULT;
page = NULL;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index bb35490400e9..e603de7ade52 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -822,7 +822,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
 
/* Call KVM generic code to do the slow-path check */
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
-  writing, upgrade_p);
+  writing, upgrade_p, NULL);
if (is_error_noslot_pfn(pfn))
return -EFAULT;
page = NULL;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 9ac0a727015d..f6aaac729667 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2758,6 +2758,13 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, 
u64 *sptep)
if (sp->role.level > PG_LEVEL_4K)
return;
 
+   /*
+* If addresses are being invalidated, skip prefetching to avoid
+* accidentally prefetching those addresses.
+*/
+   if (unlikely(vcpu->kvm->mmu_notifier_count))
+   return;
+
__direct_pte_prefetch(vcpu, sp, sptep);
 }
 
@@ -3658,8 +3665,8 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu 
*vcpu, gpa_t cr2_or_gpa,
 }
 
 static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
-gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write,
-bool *writable)
+gpa_t cr2_or_gpa, kvm_pfn_t *pfn, hva_t *hva,
+bool write, bool *writable)
 {
struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
bool async;
@@ -3672,7 +3679,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool 
prefault, gfn_t gfn,
}
 
async = false;
-   *pfn = __gfn_to_pfn_memslot(slot, gfn, false, , write, writable);
+   *pfn = __gfn_to_pfn_memslot(slot, gfn, false, ,
+   write, writable, hva);
if (!async)
return false; /* *pfn has correct page already */
 
@@ -3686,7 +3694,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool 
prefault, gfn_t gfn,
return true;
}
 
-   *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, write, writable);
+   *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL,
+   write, writable, hva);
return false;
 }
 
@@ -3699,6 +3708,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t 
gpa, u32 error_code,
gfn_t gfn = gpa >> PAGE_SHIFT;
unsigned long mmu_seq;
kvm_pfn_t pfn;
+   hva_t hva;
int r;
 
if (p

[PATCH v4 1/2] KVM: x86/mmu: Skip mmu_notifier check when handling MMIO page fault

2021-02-21 Thread David Stevens
From: Sean Christopherson 

Don't retry a page fault due to an mmu_notifier invalidation when
handling a page fault for a GPA that did not resolve to a memslot, i.e.
an MMIO page fault.  Invalidations from the mmu_notifier signal a change
in a host virtual address (HVA) mapping; without a memslot, there is no
HVA and thus no possibility that the invalidation is relevant to the
page fault being handled.

Note, the MMIO vs. memslot generation checks handle the case where a
pending memslot will create a memslot overlapping the faulting GPA.  The
mmu_notifier checks are orthogonal to memslot updates.

Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/mmu/mmu.c | 2 +-
 arch/x86/kvm/mmu/paging_tmpl.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 6d16481aa29d..9ac0a727015d 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3725,7 +3725,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t 
gpa, u32 error_code,
 
r = RET_PF_RETRY;
spin_lock(>kvm->mmu_lock);
-   if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+   if (!is_noslot_pfn(pfn) && mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
r = make_mmu_pages_available(vcpu);
if (r)
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 50e268eb8e1a..ab54263d857c 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -869,7 +869,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t 
addr, u32 error_code,
 
r = RET_PF_RETRY;
spin_lock(>kvm->mmu_lock);
-   if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+   if (!is_noslot_pfn(pfn) && mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
 
kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
-- 
2.30.0.617.g56c4b15f3c-goog



[PATCH v4 0/2] KVM: x86/mmu: Skip mmu_notifier changes when possible

2021-02-21 Thread David Stevens
These patches reduce how often mmu_notifier updates block guest page
faults. The primary benefit of this is the reduction in the likelihood
of extreme latency when handling a page fault due to another thread
having been preempted while modifying host virtual addresses.

v3 -> v4:
 - Fix bug by skipping prefetch during invalidation

v2 -> v3:
 - Added patch to skip check for MMIO page faults
 - Style changes

David Stevens (1):
  KVM: x86/mmu: Consider the hva in mmu_notifier retry

Sean Christopherson (1):
  KVM: x86/mmu: Skip mmu_notifier check when handling MMIO page fault

 arch/powerpc/kvm/book3s_64_mmu_hv.c|  2 +-
 arch/powerpc/kvm/book3s_64_mmu_radix.c |  2 +-
 arch/x86/kvm/mmu/mmu.c | 23 ++--
 arch/x86/kvm/mmu/paging_tmpl.h |  7 ---
 include/linux/kvm_host.h   | 25 +-
 virt/kvm/kvm_main.c| 29 ++
 6 files changed, 72 insertions(+), 16 deletions(-)

-- 
2.30.0.617.g56c4b15f3c-goog



Re: [PATCH v3 0/2] KVM: x86/mmu: Skip mmu_notifier changes when possible

2021-02-04 Thread David Stevens
These patches might be responsible for some instability in one of our
stress tests. I'll send an update once I figure out what's going on.

Thanks,
David

On Thu, Jan 28, 2021 at 9:48 PM Paolo Bonzini  wrote:
>
> On 28/01/21 07:05, David Stevens wrote:
> > These patches reduce how often mmu_notifier updates block guest page
> > faults. The primary benefit of this is the reduction in the likelihood
> > of extreme latency when handling a page fault due to another thread
> > having been preempted while modifying host virtual addresses.
> >
> > v2 -> v3:
> >   - Added patch to skip check for MMIO page faults
> >   - Style changes
> >
> > David Stevens (1):
> >KVM: x86/mmu: Consider the hva in mmu_notifier retry
> >
> > Sean Christopherson (1):
> >KVM: x86/mmu: Skip mmu_notifier check when handling MMIO page fault
> >
> >   arch/powerpc/kvm/book3s_64_mmu_hv.c|  2 +-
> >   arch/powerpc/kvm/book3s_64_mmu_radix.c |  2 +-
> >   arch/x86/kvm/mmu/mmu.c | 16 --
> >   arch/x86/kvm/mmu/paging_tmpl.h |  7 ---
> >   include/linux/kvm_host.h   | 25 +-
> >   virt/kvm/kvm_main.c| 29 ++
> >   6 files changed, 65 insertions(+), 16 deletions(-)
> >
>
> Queued, thanks.
>
> Paolo
>


[PATCH v3 0/2] KVM: x86/mmu: Skip mmu_notifier changes when possible

2021-01-27 Thread David Stevens
These patches reduce how often mmu_notifier updates block guest page
faults. The primary benefit of this is the reduction in the likelihood
of extreme latency when handling a page fault due to another thread
having been preempted while modifying host virtual addresses.

v2 -> v3:
 - Added patch to skip check for MMIO page faults
 - Style changes

David Stevens (1):
  KVM: x86/mmu: Consider the hva in mmu_notifier retry

Sean Christopherson (1):
  KVM: x86/mmu: Skip mmu_notifier check when handling MMIO page fault

 arch/powerpc/kvm/book3s_64_mmu_hv.c|  2 +-
 arch/powerpc/kvm/book3s_64_mmu_radix.c |  2 +-
 arch/x86/kvm/mmu/mmu.c | 16 --
 arch/x86/kvm/mmu/paging_tmpl.h |  7 ---
 include/linux/kvm_host.h   | 25 +-
 virt/kvm/kvm_main.c| 29 ++
 6 files changed, 65 insertions(+), 16 deletions(-)

-- 
2.30.0.280.ga3ce27912f-goog



[PATCH v3 1/2] KVM: x86/mmu: Skip mmu_notifier check when handling MMIO page fault

2021-01-27 Thread David Stevens
From: Sean Christopherson 

Don't retry a page fault due to an mmu_notifier invalidation when
handling a page fault for a GPA that did not resolve to a memslot, i.e.
an MMIO page fault.  Invalidations from the mmu_notifier signal a change
in a host virtual address (HVA) mapping; without a memslot, there is no
HVA and thus no possibility that the invalidation is relevant to the
page fault being handled.

Note, the MMIO vs. memslot generation checks handle the case where a
pending memslot will create a memslot overlapping the faulting GPA.  The
mmu_notifier checks are orthogonal to memslot updates.

Signed-off-by: Sean Christopherson 
---
 arch/x86/kvm/mmu/mmu.c | 2 +-
 arch/x86/kvm/mmu/paging_tmpl.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 6d16481aa29d..9ac0a727015d 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3725,7 +3725,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t 
gpa, u32 error_code,
 
r = RET_PF_RETRY;
spin_lock(>kvm->mmu_lock);
-   if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+   if (!is_noslot_pfn(pfn) && mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
r = make_mmu_pages_available(vcpu);
if (r)
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 50e268eb8e1a..ab54263d857c 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -869,7 +869,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t 
addr, u32 error_code,
 
r = RET_PF_RETRY;
spin_lock(>kvm->mmu_lock);
-   if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+   if (!is_noslot_pfn(pfn) && mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
 
kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
-- 
2.30.0.280.ga3ce27912f-goog



[PATCH v3 2/2] KVM: x86/mmu: Consider the hva in mmu_notifier retry

2021-01-27 Thread David Stevens
From: David Stevens 

Track the range being invalidated by mmu_notifier and skip page fault
retries if the fault address is not affected by the in-progress
invalidation. Handle concurrent invalidations by finding the minimal
range which includes all ranges being invalidated. Although the combined
range may include unrelated addresses and cannot be shrunk as individual
invalidation operations complete, it is unlikely the marginal gains of
proper range tracking are worth the additional complexity.

The primary benefit of this change is the reduction in the likelihood of
extreme latency when handing a page fault due to another thread having
been preempted while modifying host virtual addresses.

Signed-off-by: David Stevens 
---
v2 -> v3:
 - Removed unnecessary ifdef
 - Style changes

v1 -> v2: 
 - Improve handling of concurrent invalidation requests by unioning
   ranges, instead of just giving up and using [0, ULONG_MAX).
 - Add lockdep check
 - Code comments and formatting

 arch/powerpc/kvm/book3s_64_mmu_hv.c|  2 +-
 arch/powerpc/kvm/book3s_64_mmu_radix.c |  2 +-
 arch/x86/kvm/mmu/mmu.c | 16 --
 arch/x86/kvm/mmu/paging_tmpl.h |  7 ---
 include/linux/kvm_host.h   | 25 +-
 virt/kvm/kvm_main.c| 29 ++
 6 files changed, 65 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 38ea396a23d6..8e06cd3f759c 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -590,7 +590,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu,
} else {
/* Call KVM generic code to do the slow-path check */
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
-  writing, _ok);
+  writing, _ok, NULL);
if (is_error_noslot_pfn(pfn))
return -EFAULT;
page = NULL;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index bb35490400e9..e603de7ade52 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -822,7 +822,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
 
/* Call KVM generic code to do the slow-path check */
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
-  writing, upgrade_p);
+  writing, upgrade_p, NULL);
if (is_error_noslot_pfn(pfn))
return -EFAULT;
page = NULL;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 9ac0a727015d..8d08e97b2487 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3658,8 +3658,8 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu 
*vcpu, gpa_t cr2_or_gpa,
 }
 
 static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
-gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write,
-bool *writable)
+gpa_t cr2_or_gpa, kvm_pfn_t *pfn, hva_t *hva,
+bool write, bool *writable)
 {
struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
bool async;
@@ -3672,7 +3672,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool 
prefault, gfn_t gfn,
}
 
async = false;
-   *pfn = __gfn_to_pfn_memslot(slot, gfn, false, , write, writable);
+   *pfn = __gfn_to_pfn_memslot(slot, gfn, false, ,
+   write, writable, hva);
if (!async)
return false; /* *pfn has correct page already */
 
@@ -3686,7 +3687,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool 
prefault, gfn_t gfn,
return true;
}
 
-   *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, write, writable);
+   *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL,
+   write, writable, hva);
return false;
 }
 
@@ -3699,6 +3701,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t 
gpa, u32 error_code,
gfn_t gfn = gpa >> PAGE_SHIFT;
unsigned long mmu_seq;
kvm_pfn_t pfn;
+   hva_t hva;
int r;
 
if (page_fault_handle_page_track(vcpu, error_code, gfn))
@@ -3717,7 +3720,8 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t 
gpa, u32 error_code,
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
 
-   if (try_async_pf(vcpu, prefault, gfn, gpa, , write, _writable))
+   if (try_async_pf(vcpu, prefault, gfn, gpa, , ,
+write, _writable))
return RET_PF_RETRY;
 
if (handle_abnormal_pfn(vcpu, is_tdp ? 0 : gpa, gfn, pfn, ACC_ALL, ))

[PATCH v2] KVM: x86/mmu: consider the hva in mmu_notifier retry

2021-01-26 Thread David Stevens
From: David Stevens 

Track the range being invalidated by mmu_notifier and skip page fault
retries if the fault address is not affected by the in-progress
invalidation. Handle concurrent invalidations by finding the minimal
range which includes all ranges being invalidated. Although the combined
range may include unrelated addresses and cannot be shrunk as individual
invalidation operations complete, it is unlikely the marginal gains of
proper range tracking are worth the additional complexity.

The primary benefit of this change is the reduction in the likelihood of
extreme latency when handing a page fault due to another thread having
been preempted while modifying host virtual addresses.

Signed-off-by: David Stevens 
---
v1 -> v2:
 - improve handling of concurrent invalidation requests by unioning
   ranges, instead of just giving up and using [0, ULONG_MAX).
 - add lockdep check
 - code comments and formatting

 arch/powerpc/kvm/book3s_64_mmu_hv.c|  2 +-
 arch/powerpc/kvm/book3s_64_mmu_radix.c |  2 +-
 arch/x86/kvm/mmu/mmu.c | 16 --
 arch/x86/kvm/mmu/paging_tmpl.h |  7 ---
 include/linux/kvm_host.h   | 27 +++-
 virt/kvm/kvm_main.c| 29 ++
 6 files changed, 67 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 38ea396a23d6..8e06cd3f759c 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -590,7 +590,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu,
} else {
/* Call KVM generic code to do the slow-path check */
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
-  writing, _ok);
+  writing, _ok, NULL);
if (is_error_noslot_pfn(pfn))
return -EFAULT;
page = NULL;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index bb35490400e9..e603de7ade52 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -822,7 +822,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
 
/* Call KVM generic code to do the slow-path check */
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
-  writing, upgrade_p);
+  writing, upgrade_p, NULL);
if (is_error_noslot_pfn(pfn))
return -EFAULT;
page = NULL;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 6d16481aa29d..79166288ed03 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3658,8 +3658,8 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu 
*vcpu, gpa_t cr2_or_gpa,
 }
 
 static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
-gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write,
-bool *writable)
+gpa_t cr2_or_gpa, kvm_pfn_t *pfn, hva_t *hva,
+bool write, bool *writable)
 {
struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
bool async;
@@ -3672,7 +3672,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool 
prefault, gfn_t gfn,
}
 
async = false;
-   *pfn = __gfn_to_pfn_memslot(slot, gfn, false, , write, writable);
+   *pfn = __gfn_to_pfn_memslot(slot, gfn, false, ,
+   write, writable, hva);
if (!async)
return false; /* *pfn has correct page already */
 
@@ -3686,7 +3687,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool 
prefault, gfn_t gfn,
return true;
}
 
-   *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, write, writable);
+   *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL,
+   write, writable, hva);
return false;
 }
 
@@ -3699,6 +3701,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t 
gpa, u32 error_code,
gfn_t gfn = gpa >> PAGE_SHIFT;
unsigned long mmu_seq;
kvm_pfn_t pfn;
+   hva_t hva;
int r;
 
if (page_fault_handle_page_track(vcpu, error_code, gfn))
@@ -3717,7 +3720,8 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t 
gpa, u32 error_code,
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
 
-   if (try_async_pf(vcpu, prefault, gfn, gpa, , write, _writable))
+   if (try_async_pf(vcpu, prefault, gfn, gpa, , ,
+write, _writable))
return RET_PF_RETRY;
 
if (handle_abnormal_pfn(vcpu, is_tdp ? 0 : gpa, gfn, pfn, ACC_ALL, ))
@@ -3725,7 +3729,7 @@ static int direct_page_fault(struct kvm_vc

Re: [PATCH] KVM: x86/mmu: consider the hva in mmu_notifer retry

2021-01-26 Thread David Stevens
> > This has the secondary effect of greatly reducing the likelihood of extreme
>
> Out of curiosity, is this really the _secondary_ effect?  I would expect this
> change to primarily benefit scenarios where the invalidation has gotten
> waylaid for whatever reason.

Yeah, this is the primary benefit. I was thinking about it as the
reduction in page fault retries is the direct effect, and that in turn
leads to a secondary effect of a reduction in the chance of extreme
latency. But I guess that's not a particularly important distinction
to make. I'll reword this.

>
> This needs a comment to explicitly state that 'count > 1' cannot be done at
> this time.  My initial thought is that it would be more intuitive to check for
> 'count > 1' here, but that would potentially check the wrong wrange when count
> goes from 2->1.  The comment about persistence in invalidate_range_start() is 
> a
> good hint, but I think it's worth being explicit to avoid bad "cleanup" in the
> future.
>
> > + if (unlikely(kvm->mmu_notifier_count)) {
> > + if (kvm->mmu_notifier_range_start <= hva &&
> > + hva < kvm->mmu_notifier_range_end)

I'm not sure I understand what you're suggesting here. How exactly
would 'count > 1' be used incorrectly here? I'm fine with adding a
comment, but I'm not sure what the comment needs to clarify.

-David


[PATCH] KVM: x86/mmu: consider the hva in mmu_notifer retry

2021-01-24 Thread David Stevens
From: David Stevens 

Use the range passed to mmu_notifer's invalidate_range_start to prevent
spurious page fault retries due to changes in unrelated host virtual
addresses. This has the secondary effect of greatly reducing the
likelihood of extreme latency when handing a page fault due to another
thread having been preempted while modifying host virtual addresses.

Signed-off-by: David Stevens 
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c|  2 +-
 arch/powerpc/kvm/book3s_64_mmu_radix.c |  2 +-
 arch/x86/kvm/mmu/mmu.c | 16 ++--
 arch/x86/kvm/mmu/paging_tmpl.h |  7 ---
 include/linux/kvm_host.h   | 22 +-
 virt/kvm/kvm_main.c| 22 ++
 6 files changed, 55 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 38ea396a23d6..8e06cd3f759c 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -590,7 +590,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu,
} else {
/* Call KVM generic code to do the slow-path check */
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
-  writing, _ok);
+  writing, _ok, NULL);
if (is_error_noslot_pfn(pfn))
return -EFAULT;
page = NULL;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index bb35490400e9..e603de7ade52 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -822,7 +822,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
 
/* Call KVM generic code to do the slow-path check */
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
-  writing, upgrade_p);
+  writing, upgrade_p, NULL);
if (is_error_noslot_pfn(pfn))
return -EFAULT;
page = NULL;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 6d16481aa29d..79166288ed03 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3658,8 +3658,8 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu 
*vcpu, gpa_t cr2_or_gpa,
 }
 
 static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
-gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write,
-bool *writable)
+gpa_t cr2_or_gpa, kvm_pfn_t *pfn, hva_t *hva,
+bool write, bool *writable)
 {
struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
bool async;
@@ -3672,7 +3672,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool 
prefault, gfn_t gfn,
}
 
async = false;
-   *pfn = __gfn_to_pfn_memslot(slot, gfn, false, , write, writable);
+   *pfn = __gfn_to_pfn_memslot(slot, gfn, false, ,
+   write, writable, hva);
if (!async)
return false; /* *pfn has correct page already */
 
@@ -3686,7 +3687,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool 
prefault, gfn_t gfn,
return true;
}
 
-   *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, write, writable);
+   *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL,
+   write, writable, hva);
return false;
 }
 
@@ -3699,6 +3701,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t 
gpa, u32 error_code,
gfn_t gfn = gpa >> PAGE_SHIFT;
unsigned long mmu_seq;
kvm_pfn_t pfn;
+   hva_t hva;
int r;
 
if (page_fault_handle_page_track(vcpu, error_code, gfn))
@@ -3717,7 +3720,8 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t 
gpa, u32 error_code,
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
 
-   if (try_async_pf(vcpu, prefault, gfn, gpa, , write, _writable))
+   if (try_async_pf(vcpu, prefault, gfn, gpa, , ,
+write, _writable))
return RET_PF_RETRY;
 
if (handle_abnormal_pfn(vcpu, is_tdp ? 0 : gpa, gfn, pfn, ACC_ALL, ))
@@ -3725,7 +3729,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t 
gpa, u32 error_code,
 
r = RET_PF_RETRY;
spin_lock(>kvm->mmu_lock);
-   if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+   if (mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, hva))
goto out_unlock;
r = make_mmu_pages_available(vcpu);
if (r)
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 50e268eb8e1a..3171784139a4 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -790,6 +790,7 @@ sta

[PATCH v7 0/3] Support virtio cross-device resources

2020-08-18 Thread David Stevens
This patchset implements the current proposal for virtio cross-device
resource sharing [1]. It will be used to import virtio resources into
the virtio-video driver currently under discussion [2]. The patch
under consideration to add support in the virtio-video driver is [3].
It uses the APIs from v3 of this series, but the changes to update it
are relatively minor.

This patchset adds a new flavor of dma-bufs that supports querying the
underlying virtio object UUID, as well as adding support for exporting
resources from virtgpu.

[1] https://markmail.org/thread/2ypjt5cfeu3m6lxu
[2] https://markmail.org/thread/p5d3k566srtdtute
[3] https://markmail.org/thread/j4xlqaaim266qpks

v6 -> v7 changes:
 - Fix most strict checkpatch comments

David Stevens (3):
  virtio: add dma-buf support for exported objects
  virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature
  drm/virtio: Support virtgpu exported resources

 drivers/gpu/drm/virtio/virtgpu_drv.c   |  3 +
 drivers/gpu/drm/virtio/virtgpu_drv.h   | 21 ++
 drivers/gpu/drm/virtio/virtgpu_kms.c   |  4 ++
 drivers/gpu/drm/virtio/virtgpu_prime.c | 96 +-
 drivers/gpu/drm/virtio/virtgpu_vq.c| 55 +++
 drivers/virtio/Makefile|  2 +-
 drivers/virtio/virtio.c|  6 ++
 drivers/virtio/virtio_dma_buf.c| 85 +++
 include/linux/virtio.h |  1 +
 include/linux/virtio_dma_buf.h | 37 ++
 include/uapi/linux/virtio_gpu.h| 19 +
 11 files changed, 325 insertions(+), 4 deletions(-)
 create mode 100644 drivers/virtio/virtio_dma_buf.c
 create mode 100644 include/linux/virtio_dma_buf.h

-- 
2.28.0.220.ged08abb693-goog



[PATCH v7 2/3] virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature

2020-08-18 Thread David Stevens
This feature allows the guest to request a UUID from the host for a
particular virtio_gpu resource. The UUID can then be shared with other
virtio devices, to allow the other host devices to access the
virtio_gpu's corresponding host resource.

Signed-off-by: David Stevens 
---
 include/uapi/linux/virtio_gpu.h | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
index 0c85914d9369..9721d58b4d58 100644
--- a/include/uapi/linux/virtio_gpu.h
+++ b/include/uapi/linux/virtio_gpu.h
@@ -50,6 +50,10 @@
  * VIRTIO_GPU_CMD_GET_EDID
  */
 #define VIRTIO_GPU_F_EDID1
+/*
+ * VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID
+ */
+#define VIRTIO_GPU_F_RESOURCE_UUID   2
 
 enum virtio_gpu_ctrl_type {
VIRTIO_GPU_UNDEFINED = 0,
@@ -66,6 +70,7 @@ enum virtio_gpu_ctrl_type {
VIRTIO_GPU_CMD_GET_CAPSET_INFO,
VIRTIO_GPU_CMD_GET_CAPSET,
VIRTIO_GPU_CMD_GET_EDID,
+   VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID,
 
/* 3d commands */
VIRTIO_GPU_CMD_CTX_CREATE = 0x0200,
@@ -87,6 +92,7 @@ enum virtio_gpu_ctrl_type {
VIRTIO_GPU_RESP_OK_CAPSET_INFO,
VIRTIO_GPU_RESP_OK_CAPSET,
VIRTIO_GPU_RESP_OK_EDID,
+   VIRTIO_GPU_RESP_OK_RESOURCE_UUID,
 
/* error responses */
VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200,
@@ -340,4 +346,17 @@ enum virtio_gpu_formats {
VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM  = 134,
 };
 
+/* VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID */
+struct virtio_gpu_resource_assign_uuid {
+   struct virtio_gpu_ctrl_hdr hdr;
+   __le32 resource_id;
+   __le32 padding;
+};
+
+/* VIRTIO_GPU_RESP_OK_RESOURCE_UUID */
+struct virtio_gpu_resp_resource_uuid {
+   struct virtio_gpu_ctrl_hdr hdr;
+   __u8 uuid[16];
+};
+
 #endif
-- 
2.28.0.220.ged08abb693-goog



[PATCH v7 1/3] virtio: add dma-buf support for exported objects

2020-08-18 Thread David Stevens
This change adds a new flavor of dma-bufs that can be used by virtio
drivers to share exported objects. A virtio dma-buf can be queried by
virtio drivers to obtain the UUID which identifies the underlying
exported object.

Signed-off-by: David Stevens 
---
 drivers/virtio/Makefile |  2 +-
 drivers/virtio/virtio.c |  6 +++
 drivers/virtio/virtio_dma_buf.c | 85 +
 include/linux/virtio.h  |  1 +
 include/linux/virtio_dma_buf.h  | 37 ++
 5 files changed, 130 insertions(+), 1 deletion(-)
 create mode 100644 drivers/virtio/virtio_dma_buf.c
 create mode 100644 include/linux/virtio_dma_buf.h

diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
index 29a1386ecc03..ecdae5b596de 100644
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o
+obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o virtio_dma_buf.o
 obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
 obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
 virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index a977e32a88f2..5d46f0ded92d 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -357,6 +357,12 @@ int register_virtio_device(struct virtio_device *dev)
 }
 EXPORT_SYMBOL_GPL(register_virtio_device);
 
+bool is_virtio_device(struct device *dev)
+{
+   return dev->bus == _bus;
+}
+EXPORT_SYMBOL_GPL(is_virtio_device);
+
 void unregister_virtio_device(struct virtio_device *dev)
 {
int index = dev->index; /* save for after device release */
diff --git a/drivers/virtio/virtio_dma_buf.c b/drivers/virtio/virtio_dma_buf.c
new file mode 100644
index ..45d6e8647dcf
--- /dev/null
+++ b/drivers/virtio/virtio_dma_buf.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * dma-bufs for virtio exported objects
+ *
+ * Copyright (C) 2020 Google, Inc.
+ */
+
+#include 
+
+/**
+ * virtio_dma_buf_export - Creates a new dma-buf for a virtio exported object
+ * @exp_info: [in] see dma_buf_export(). ops MUST refer to a dma_buf_ops
+ * struct embedded in a virtio_dma_buf_ops.
+ *
+ * This wraps dma_buf_export() to allow virtio drivers to create a dma-buf
+ * for an virtio exported object that can be queried by other virtio drivers
+ * for the object's UUID.
+ */
+struct dma_buf *virtio_dma_buf_export
+   (const struct dma_buf_export_info *exp_info)
+{
+   const struct virtio_dma_buf_ops *virtio_ops =
+   container_of(exp_info->ops,
+const struct virtio_dma_buf_ops, ops);
+
+   if (!exp_info->ops ||
+   exp_info->ops->attach != _dma_buf_attach ||
+   !virtio_ops->get_uuid) {
+   return ERR_PTR(-EINVAL);
+   }
+
+   return dma_buf_export(exp_info);
+}
+EXPORT_SYMBOL(virtio_dma_buf_export);
+
+/**
+ * virtio_dma_buf_attach - mandatory attach callback for virtio dma-bufs
+ */
+int virtio_dma_buf_attach(struct dma_buf *dma_buf,
+ struct dma_buf_attachment *attach)
+{
+   int ret;
+   const struct virtio_dma_buf_ops *ops =
+   container_of(dma_buf->ops,
+const struct virtio_dma_buf_ops, ops);
+
+   if (ops->device_attach) {
+   ret = ops->device_attach(dma_buf, attach);
+   if (ret)
+   return ret;
+   }
+   return 0;
+}
+EXPORT_SYMBOL(virtio_dma_buf_attach);
+
+/**
+ * is_virtio_dma_buf - returns true if the given dma-buf is a virtio dma-buf
+ * @dma_buf: buffer to query
+ */
+bool is_virtio_dma_buf(struct dma_buf *dma_buf)
+{
+   return dma_buf->ops->attach == _dma_buf_attach;
+}
+EXPORT_SYMBOL(is_virtio_dma_buf);
+
+/**
+ * virtio_dma_buf_get_uuid - gets a virtio dma-buf's exported object's uuid
+ * @dma_buf: [in] buffer to query
+ * @uuid: [out] the uuid
+ *
+ * Returns: 0 on success, negative on failure.
+ */
+int virtio_dma_buf_get_uuid(struct dma_buf *dma_buf,
+   uuid_t *uuid)
+{
+   const struct virtio_dma_buf_ops *ops =
+   container_of(dma_buf->ops,
+const struct virtio_dma_buf_ops, ops);
+
+   if (!is_virtio_dma_buf(dma_buf))
+   return -EINVAL;
+
+   return ops->get_uuid(dma_buf, uuid);
+}
+EXPORT_SYMBOL(virtio_dma_buf_get_uuid);
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 15f906e4a748..9397e25616c4 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -128,6 +128,7 @@ static inline struct virtio_device *dev_to_virtio(struct 
device *_dev)
 void virtio_add_status(struct virtio_device *dev, unsigned int status);
 int register_virtio_device(struct virtio_device *dev);
 void unregister_virtio_device(struct virtio_device *dev);
+bool is_virtio_device(struct device *dev);
 
 void virtio_break_device(struct v

[PATCH v7 3/3] drm/virtio: Support virtgpu exported resources

2020-08-18 Thread David Stevens
Add support for UUID-based resource sharing mechanism to virtgpu. This
implements the new virtgpu commands and hooks them up to dma-buf's
get_uuid callback.

Signed-off-by: David Stevens 
---
 drivers/gpu/drm/virtio/virtgpu_drv.c   |  3 +
 drivers/gpu/drm/virtio/virtgpu_drv.h   | 21 ++
 drivers/gpu/drm/virtio/virtgpu_kms.c   |  4 ++
 drivers/gpu/drm/virtio/virtgpu_prime.c | 96 +-
 drivers/gpu/drm/virtio/virtgpu_vq.c| 55 +++
 5 files changed, 176 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c 
b/drivers/gpu/drm/virtio/virtgpu_drv.c
index ab4bed78e656..b039f493bda9 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -165,6 +165,7 @@ static unsigned int features[] = {
VIRTIO_GPU_F_VIRGL,
 #endif
VIRTIO_GPU_F_EDID,
+   VIRTIO_GPU_F_RESOURCE_UUID,
 };
 static struct virtio_driver virtio_gpu_driver = {
.feature_table = features,
@@ -202,6 +203,8 @@ static struct drm_driver driver = {
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_mmap = drm_gem_prime_mmap,
+   .gem_prime_export = virtgpu_gem_prime_export,
+   .gem_prime_import = virtgpu_gem_prime_import,
.gem_prime_import_sg_table = virtgpu_gem_prime_import_sg_table,
 
.gem_create_object = virtio_gpu_create_object,
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h 
b/drivers/gpu/drm/virtio/virtgpu_drv.h
index 49bebdee6d91..cf54b89d9ab1 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -49,6 +49,10 @@
 #define DRIVER_MINOR 1
 #define DRIVER_PATCHLEVEL 0
 
+#define UUID_INITIALIZING 0
+#define UUID_INITIALIZED 1
+#define UUID_INITIALIZATION_FAILED 2
+
 struct virtio_gpu_object_params {
uint32_t format;
uint32_t width;
@@ -71,6 +75,9 @@ struct virtio_gpu_object {
uint32_t hw_res_handle;
bool dumb;
bool created;
+
+   int uuid_state;
+   uuid_t uuid;
 };
 #define gem_to_virtio_gpu_obj(gobj) \
container_of((gobj), struct virtio_gpu_object, base.base)
@@ -200,6 +207,7 @@ struct virtio_gpu_device {
bool has_virgl_3d;
bool has_edid;
bool has_indirect;
+   bool has_resource_assign_uuid;
 
struct work_struct config_changed_work;
 
@@ -210,6 +218,9 @@ struct virtio_gpu_device {
struct virtio_gpu_drv_capset *capsets;
uint32_t num_capsets;
struct list_head cap_cache;
+
+   /* protects resource state when exporting */
+   spinlock_t resource_export_lock;
 };
 
 struct virtio_gpu_fpriv {
@@ -335,6 +346,10 @@ void virtio_gpu_dequeue_fence_func(struct work_struct 
*work);
 
 void virtio_gpu_notify(struct virtio_gpu_device *vgdev);
 
+int
+virtio_gpu_cmd_resource_assign_uuid(struct virtio_gpu_device *vgdev,
+   struct virtio_gpu_object_array *objs);
+
 /* virtgpu_display.c */
 void virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev);
 void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev);
@@ -366,6 +381,12 @@ int virtio_gpu_object_create(struct virtio_gpu_device 
*vgdev,
 bool virtio_gpu_is_shmem(struct virtio_gpu_object *bo);
 
 /* virtgpu_prime.c */
+struct dma_buf *virtgpu_gem_prime_export(struct drm_gem_object *obj,
+int flags);
+struct drm_gem_object *virtgpu_gem_prime_import(struct drm_device *dev,
+   struct dma_buf *buf);
+int virtgpu_gem_prime_get_uuid(struct drm_gem_object *obj,
+  uuid_t *uuid);
 struct drm_gem_object *virtgpu_gem_prime_import_sg_table(
struct drm_device *dev, struct dma_buf_attachment *attach,
struct sg_table *sgt);
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c 
b/drivers/gpu/drm/virtio/virtgpu_kms.c
index 023a030ca7b9..7bcd0c75effa 100644
--- a/drivers/gpu/drm/virtio/virtgpu_kms.c
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -125,6 +125,7 @@ int virtio_gpu_init(struct drm_device *dev)
vgdev->dev = dev->dev;
 
spin_lock_init(>display_info_lock);
+   spin_lock_init(>resource_export_lock);
ida_init(>ctx_id_ida);
ida_init(>resource_ida);
init_waitqueue_head(>resp_wq);
@@ -153,6 +154,9 @@ int virtio_gpu_init(struct drm_device *dev)
if (virtio_has_feature(vgdev->vdev, VIRTIO_RING_F_INDIRECT_DESC)) {
vgdev->has_indirect = true;
}
+   if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_RESOURCE_UUID)) {
+   vgdev->has_resource_assign_uuid = true;
+   }
 
DRM_INFO("features: %cvirgl %cedid\n",
 vgdev->has_virgl_3d ? '+' : '-',
diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c 
b/drivers/gpu/drm/virtio/virtgpu_prime.c
index 050d24c39a8f..acd14ef73d56 100644
--- a/drivers/gpu/drm/virti

Re: [PATCH v6 0/3] Support virtio cross-device resources

2020-08-18 Thread David Stevens
> Hmm, checkpatch still complains, full log below.
>
> IIRC "dim checkpatch" runs scripts/checkpatch.pl with --strict
> so it is a bit more picky ...

Ah, I didn't know --strict was being used. I'll send an update
momentarily. Sorry for the churn.

> -:250: CHECK:PREFER_KERNEL_TYPES: Prefer kernel type 'u32' over 'uint32_t'
> #250: FILE: drivers/gpu/drm/virtio/virtgpu_vq.c:1118:
> +   uint32_t resp_type = le32_to_cpu(resp->hdr.type);
>

For consistency with the rest of the virtgpu code, I'll leave uint32_t.

Cheers,
David


Re: [virtio-dev] Re: [PATCH v5 0/3] Support virtio cross-device resources

2020-08-17 Thread David Stevens
On Mon, Aug 17, 2020 at 4:12 AM Gerd Hoffmann  wrote:
>
> On Mon, Aug 17, 2020 at 12:50:08PM +0200, Gerd Hoffmann wrote:
> > On Tue, Jun 23, 2020 at 10:31:28AM +0900, David Stevens wrote:
> > > Unless there are any remaining objections to these patches, what are
> > > the next steps towards getting these merged? Sorry, I'm not familiar
> > > with the workflow for contributing patches to Linux.
> >
> > Sorry, just have been busy and not paying as much attention to drm
> > patches as usual.  Playing catch-up now.  Queued for drm-misc-next,
> > unless something goes wrong in my testing the patches should land
> > in linux-next soon and be merged upstream in the next merge window.
>
> Oh, spoke too soon.  scripts/checkpatch.pl has a bunch of codestyle
> warnings.  Can you fix them and resend?

Sent a new version to fix the line length warning. There's still a
MAINTAINER warning, but I think the new virtio_dma_buf file can fall
under virtio core (and the existing wildcards do match it), rather
than get its own MAINTAINER entry. I can break it out into its own
thing if that's better, though.

-David


[PATCH v6 0/3] Support virtio cross-device resources

2020-08-17 Thread David Stevens
This patchset implements the current proposal for virtio cross-device
resource sharing [1]. It will be used to import virtio resources into
the virtio-video driver currently under discussion [2]. The patch
under consideration to add support in the virtio-video driver is [3].
It uses the APIs from v3 of this series, but the changes to update it
are relatively minor.

This patchset adds a new flavor of dma-bufs that supports querying the
underlying virtio object UUID, as well as adding support for exporting
resources from virtgpu.

[1] https://markmail.org/thread/2ypjt5cfeu3m6lxu
[2] https://markmail.org/thread/p5d3k566srtdtute
[3] https://markmail.org/thread/j4xlqaaim266qpks

v5 -> v6 changes:
 - Fix >80 character comment

David Stevens (3):
  virtio: add dma-buf support for exported objects
  virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature
  drm/virtio: Support virtgpu exported resources

 drivers/gpu/drm/virtio/virtgpu_drv.c   |  3 +
 drivers/gpu/drm/virtio/virtgpu_drv.h   | 20 ++
 drivers/gpu/drm/virtio/virtgpu_kms.c   |  4 ++
 drivers/gpu/drm/virtio/virtgpu_prime.c | 96 +-
 drivers/gpu/drm/virtio/virtgpu_vq.c| 55 +++
 drivers/virtio/Makefile|  2 +-
 drivers/virtio/virtio.c|  6 ++
 drivers/virtio/virtio_dma_buf.c| 82 ++
 include/linux/virtio.h |  1 +
 include/linux/virtio_dma_buf.h | 37 ++
 include/uapi/linux/virtio_gpu.h| 19 +
 11 files changed, 321 insertions(+), 4 deletions(-)
 create mode 100644 drivers/virtio/virtio_dma_buf.c
 create mode 100644 include/linux/virtio_dma_buf.h

-- 
2.28.0.220.ged08abb693-goog



[PATCH v6 1/3] virtio: add dma-buf support for exported objects

2020-08-17 Thread David Stevens
This change adds a new flavor of dma-bufs that can be used by virtio
drivers to share exported objects. A virtio dma-buf can be queried by
virtio drivers to obtain the UUID which identifies the underlying
exported object.

Signed-off-by: David Stevens 
---
 drivers/virtio/Makefile |  2 +-
 drivers/virtio/virtio.c |  6 +++
 drivers/virtio/virtio_dma_buf.c | 82 +
 include/linux/virtio.h  |  1 +
 include/linux/virtio_dma_buf.h  | 37 +++
 5 files changed, 127 insertions(+), 1 deletion(-)
 create mode 100644 drivers/virtio/virtio_dma_buf.c
 create mode 100644 include/linux/virtio_dma_buf.h

diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
index 29a1386ecc03..ecdae5b596de 100644
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o
+obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o virtio_dma_buf.o
 obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
 obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
 virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index a977e32a88f2..5d46f0ded92d 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -357,6 +357,12 @@ int register_virtio_device(struct virtio_device *dev)
 }
 EXPORT_SYMBOL_GPL(register_virtio_device);
 
+bool is_virtio_device(struct device *dev)
+{
+   return dev->bus == _bus;
+}
+EXPORT_SYMBOL_GPL(is_virtio_device);
+
 void unregister_virtio_device(struct virtio_device *dev)
 {
int index = dev->index; /* save for after device release */
diff --git a/drivers/virtio/virtio_dma_buf.c b/drivers/virtio/virtio_dma_buf.c
new file mode 100644
index ..63e6b1908397
--- /dev/null
+++ b/drivers/virtio/virtio_dma_buf.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * dma-bufs for virtio exported objects
+ *
+ * Copyright (C) 2020 Google, Inc.
+ */
+
+#include 
+
+/**
+ * virtio_dma_buf_export - Creates a new dma-buf for a virtio exported object
+ * @exp_info: [in] see dma_buf_export(). ops MUST refer to a dma_buf_ops
+ * struct embedded in a virtio_dma_buf_ops.
+ *
+ * This wraps dma_buf_export() to allow virtio drivers to create a dma-buf
+ * for an virtio exported object that can be queried by other virtio drivers
+ * for the object's UUID.
+ */
+struct dma_buf *virtio_dma_buf_export(
+   const struct dma_buf_export_info *exp_info)
+{
+   const struct virtio_dma_buf_ops *virtio_ops = container_of(
+   exp_info->ops, const struct virtio_dma_buf_ops, ops);
+
+   if (!exp_info->ops
+   || exp_info->ops->attach != _dma_buf_attach
+   || !virtio_ops->get_uuid) {
+   return ERR_PTR(-EINVAL);
+   }
+
+   return dma_buf_export(exp_info);
+}
+EXPORT_SYMBOL(virtio_dma_buf_export);
+
+/**
+ * virtio_dma_buf_attach - mandatory attach callback for virtio dma-bufs
+ */
+int virtio_dma_buf_attach(struct dma_buf *dma_buf,
+ struct dma_buf_attachment *attach)
+{
+   int ret;
+   const struct virtio_dma_buf_ops *ops = container_of(
+   dma_buf->ops, const struct virtio_dma_buf_ops, ops);
+
+   if (ops->device_attach) {
+   ret = ops->device_attach(dma_buf, attach);
+   if (ret)
+   return ret;
+   }
+   return 0;
+}
+EXPORT_SYMBOL(virtio_dma_buf_attach);
+
+/**
+ * is_virtio_dma_buf - returns true if the given dma-buf is a virtio dma-buf
+ * @dma_buf: buffer to query
+ */
+bool is_virtio_dma_buf(struct dma_buf *dma_buf)
+{
+   return dma_buf->ops->attach == _dma_buf_attach;
+}
+EXPORT_SYMBOL(is_virtio_dma_buf);
+
+/**
+ * virtio_dma_buf_get_uuid - gets a virtio dma-buf's exported object's uuid
+ * @dma_buf: [in] buffer to query
+ * @uuid: [out] the uuid
+ *
+ * Returns: 0 on success, negative on failure.
+ */
+int virtio_dma_buf_get_uuid(struct dma_buf *dma_buf,
+   uuid_t *uuid)
+{
+   const struct virtio_dma_buf_ops *ops = container_of(
+   dma_buf->ops, const struct virtio_dma_buf_ops, ops);
+
+   if (!is_virtio_dma_buf(dma_buf))
+   return -EINVAL;
+
+   return ops->get_uuid(dma_buf, uuid);
+}
+EXPORT_SYMBOL(virtio_dma_buf_get_uuid);
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 15f906e4a748..9397e25616c4 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -128,6 +128,7 @@ static inline struct virtio_device *dev_to_virtio(struct 
device *_dev)
 void virtio_add_status(struct virtio_device *dev, unsigned int status);
 int register_virtio_device(struct virtio_device *dev);
 void unregister_virtio_device(struct virtio_device *dev);
+bool is_virtio_device(struct device *dev);
 
 void virtio_break_device(struct virtio_device *dev);
 
diff --git a/incl

[PATCH v6 3/3] drm/virtio: Support virtgpu exported resources

2020-08-17 Thread David Stevens
Add support for UUID-based resource sharing mechanism to virtgpu. This
implements the new virtgpu commands and hooks them up to dma-buf's
get_uuid callback.

Signed-off-by: David Stevens 
---
 drivers/gpu/drm/virtio/virtgpu_drv.c   |  3 +
 drivers/gpu/drm/virtio/virtgpu_drv.h   | 20 ++
 drivers/gpu/drm/virtio/virtgpu_kms.c   |  4 ++
 drivers/gpu/drm/virtio/virtgpu_prime.c | 96 +-
 drivers/gpu/drm/virtio/virtgpu_vq.c| 55 +++
 5 files changed, 175 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c 
b/drivers/gpu/drm/virtio/virtgpu_drv.c
index ab4bed78e656..b039f493bda9 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -165,6 +165,7 @@ static unsigned int features[] = {
VIRTIO_GPU_F_VIRGL,
 #endif
VIRTIO_GPU_F_EDID,
+   VIRTIO_GPU_F_RESOURCE_UUID,
 };
 static struct virtio_driver virtio_gpu_driver = {
.feature_table = features,
@@ -202,6 +203,8 @@ static struct drm_driver driver = {
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_mmap = drm_gem_prime_mmap,
+   .gem_prime_export = virtgpu_gem_prime_export,
+   .gem_prime_import = virtgpu_gem_prime_import,
.gem_prime_import_sg_table = virtgpu_gem_prime_import_sg_table,
 
.gem_create_object = virtio_gpu_create_object,
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h 
b/drivers/gpu/drm/virtio/virtgpu_drv.h
index 49bebdee6d91..39dc907aa805 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -49,6 +49,10 @@
 #define DRIVER_MINOR 1
 #define DRIVER_PATCHLEVEL 0
 
+#define UUID_INITIALIZING 0
+#define UUID_INITIALIZED 1
+#define UUID_INITIALIZATION_FAILED 2
+
 struct virtio_gpu_object_params {
uint32_t format;
uint32_t width;
@@ -71,6 +75,9 @@ struct virtio_gpu_object {
uint32_t hw_res_handle;
bool dumb;
bool created;
+
+   int uuid_state;
+   uuid_t uuid;
 };
 #define gem_to_virtio_gpu_obj(gobj) \
container_of((gobj), struct virtio_gpu_object, base.base)
@@ -200,6 +207,7 @@ struct virtio_gpu_device {
bool has_virgl_3d;
bool has_edid;
bool has_indirect;
+   bool has_resource_assign_uuid;
 
struct work_struct config_changed_work;
 
@@ -210,6 +218,8 @@ struct virtio_gpu_device {
struct virtio_gpu_drv_capset *capsets;
uint32_t num_capsets;
struct list_head cap_cache;
+
+   spinlock_t resource_export_lock;
 };
 
 struct virtio_gpu_fpriv {
@@ -335,6 +345,10 @@ void virtio_gpu_dequeue_fence_func(struct work_struct 
*work);
 
 void virtio_gpu_notify(struct virtio_gpu_device *vgdev);
 
+int
+virtio_gpu_cmd_resource_assign_uuid(struct virtio_gpu_device *vgdev,
+   struct virtio_gpu_object_array *objs);
+
 /* virtgpu_display.c */
 void virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev);
 void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev);
@@ -366,6 +380,12 @@ int virtio_gpu_object_create(struct virtio_gpu_device 
*vgdev,
 bool virtio_gpu_is_shmem(struct virtio_gpu_object *bo);
 
 /* virtgpu_prime.c */
+struct dma_buf *virtgpu_gem_prime_export(struct drm_gem_object *obj,
+int flags);
+struct drm_gem_object *virtgpu_gem_prime_import(struct drm_device *dev,
+   struct dma_buf *buf);
+int virtgpu_gem_prime_get_uuid(struct drm_gem_object *obj,
+  uuid_t *uuid);
 struct drm_gem_object *virtgpu_gem_prime_import_sg_table(
struct drm_device *dev, struct dma_buf_attachment *attach,
struct sg_table *sgt);
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c 
b/drivers/gpu/drm/virtio/virtgpu_kms.c
index 023a030ca7b9..7bcd0c75effa 100644
--- a/drivers/gpu/drm/virtio/virtgpu_kms.c
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -125,6 +125,7 @@ int virtio_gpu_init(struct drm_device *dev)
vgdev->dev = dev->dev;
 
spin_lock_init(>display_info_lock);
+   spin_lock_init(>resource_export_lock);
ida_init(>ctx_id_ida);
ida_init(>resource_ida);
init_waitqueue_head(>resp_wq);
@@ -153,6 +154,9 @@ int virtio_gpu_init(struct drm_device *dev)
if (virtio_has_feature(vgdev->vdev, VIRTIO_RING_F_INDIRECT_DESC)) {
vgdev->has_indirect = true;
}
+   if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_RESOURCE_UUID)) {
+   vgdev->has_resource_assign_uuid = true;
+   }
 
DRM_INFO("features: %cvirgl %cedid\n",
 vgdev->has_virgl_3d ? '+' : '-',
diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c 
b/drivers/gpu/drm/virtio/virtgpu_prime.c
index 050d24c39a8f..acd14ef73d56 100644
--- a/drivers/gpu/drm/virtio/virtgpu_prime.c
+++ b/drivers/gpu/drm/virtio/virtgpu_prime.c
@@

[PATCH v6 2/3] virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature

2020-08-17 Thread David Stevens
This feature allows the guest to request a UUID from the host for a
particular virtio_gpu resource. The UUID can then be shared with other
virtio devices, to allow the other host devices to access the
virtio_gpu's corresponding host resource.

Signed-off-by: David Stevens 
---
 include/uapi/linux/virtio_gpu.h | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
index 0c85914d9369..9721d58b4d58 100644
--- a/include/uapi/linux/virtio_gpu.h
+++ b/include/uapi/linux/virtio_gpu.h
@@ -50,6 +50,10 @@
  * VIRTIO_GPU_CMD_GET_EDID
  */
 #define VIRTIO_GPU_F_EDID1
+/*
+ * VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID
+ */
+#define VIRTIO_GPU_F_RESOURCE_UUID   2
 
 enum virtio_gpu_ctrl_type {
VIRTIO_GPU_UNDEFINED = 0,
@@ -66,6 +70,7 @@ enum virtio_gpu_ctrl_type {
VIRTIO_GPU_CMD_GET_CAPSET_INFO,
VIRTIO_GPU_CMD_GET_CAPSET,
VIRTIO_GPU_CMD_GET_EDID,
+   VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID,
 
/* 3d commands */
VIRTIO_GPU_CMD_CTX_CREATE = 0x0200,
@@ -87,6 +92,7 @@ enum virtio_gpu_ctrl_type {
VIRTIO_GPU_RESP_OK_CAPSET_INFO,
VIRTIO_GPU_RESP_OK_CAPSET,
VIRTIO_GPU_RESP_OK_EDID,
+   VIRTIO_GPU_RESP_OK_RESOURCE_UUID,
 
/* error responses */
VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200,
@@ -340,4 +346,17 @@ enum virtio_gpu_formats {
VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM  = 134,
 };
 
+/* VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID */
+struct virtio_gpu_resource_assign_uuid {
+   struct virtio_gpu_ctrl_hdr hdr;
+   __le32 resource_id;
+   __le32 padding;
+};
+
+/* VIRTIO_GPU_RESP_OK_RESOURCE_UUID */
+struct virtio_gpu_resp_resource_uuid {
+   struct virtio_gpu_ctrl_hdr hdr;
+   __u8 uuid[16];
+};
+
 #endif
-- 
2.28.0.220.ged08abb693-goog



Re: [virtio-dev] Re: [PATCH v5 0/3] Support virtio cross-device resources

2020-06-22 Thread David Stevens
Unless there are any remaining objections to these patches, what are
the next steps towards getting these merged? Sorry, I'm not familiar
with the workflow for contributing patches to Linux.

Thanks,
David

On Tue, Jun 9, 2020 at 6:53 PM Michael S. Tsirkin  wrote:
>
> On Tue, Jun 09, 2020 at 10:25:15AM +0900, David Stevens wrote:
> > This patchset implements the current proposal for virtio cross-device
> > resource sharing [1]. It will be used to import virtio resources into
> > the virtio-video driver currently under discussion [2]. The patch
> > under consideration to add support in the virtio-video driver is [3].
> > It uses the APIs from v3 of this series, but the changes to update it
> > are relatively minor.
> >
> > This patchset adds a new flavor of dma-bufs that supports querying the
> > underlying virtio object UUID, as well as adding support for exporting
> > resources from virtgpu.
>
> Gerd, David, if possible, please test this in configuration with
> virtual VTD enabled but with iommu_platform=off
> to make sure we didn't break this config.
>
>
> Besides that, for virtio parts:
>
> Acked-by: Michael S. Tsirkin 
>
>
> > [1] https://markmail.org/thread/2ypjt5cfeu3m6lxu
> > [2] https://markmail.org/thread/p5d3k566srtdtute
> > [3] https://markmail.org/thread/j4xlqaaim266qpks
> >
> > v4 -> v5 changes:
> >  - Remove virtio_dma_buf_export_info.
> >
> > David Stevens (3):
> >   virtio: add dma-buf support for exported objects
> >   virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature
> >   drm/virtio: Support virtgpu exported resources
> >
> >  drivers/gpu/drm/virtio/virtgpu_drv.c   |  3 +
> >  drivers/gpu/drm/virtio/virtgpu_drv.h   | 20 ++
> >  drivers/gpu/drm/virtio/virtgpu_kms.c   |  4 ++
> >  drivers/gpu/drm/virtio/virtgpu_prime.c | 96 +-
> >  drivers/gpu/drm/virtio/virtgpu_vq.c| 55 +++
> >  drivers/virtio/Makefile|  2 +-
> >  drivers/virtio/virtio.c|  6 ++
> >  drivers/virtio/virtio_dma_buf.c| 82 ++
> >  include/linux/virtio.h |  1 +
> >  include/linux/virtio_dma_buf.h | 37 ++
> >  include/uapi/linux/virtio_gpu.h| 19 +
> >  11 files changed, 321 insertions(+), 4 deletions(-)
> >  create mode 100644 drivers/virtio/virtio_dma_buf.c
> >  create mode 100644 include/linux/virtio_dma_buf.h
> >
> > --
> > 2.27.0.278.ge193c7cf3a9-goog
>
>
> -
> To unsubscribe, e-mail: virtio-dev-unsubscr...@lists.oasis-open.org
> For additional commands, e-mail: virtio-dev-h...@lists.oasis-open.org
>


Re: [PATCH v4 1/3] virtio: add dma-buf support for exported objects

2020-06-18 Thread David Stevens
On Thu, Jun 18, 2020 at 9:29 PM Guennadi Liakhovetski
 wrote:
>
> Hi Michael,
>
> On Thu, Jun 04, 2020 at 03:05:23PM -0400, Michael S. Tsirkin wrote:
> > On Tue, May 26, 2020 at 07:58:09PM +0900, David Stevens wrote:
> > > This change adds a new flavor of dma-bufs that can be used by virtio
> > > drivers to share exported objects. A virtio dma-buf can be queried by
> > > virtio drivers to obtain the UUID which identifies the underlying
> > > exported object.
> > >
> > > Signed-off-by: David Stevens 
> >
> > Is this just for graphics? If yes I'd rather we put it in the graphics
> > driver. We can always move it later ...
>
> Wouldn't this be the API that audio virtualisation will have to use to share
> buffers between the host and any guests?

The new flavor of dma-buf isn't directly related to sharing buffers
between the host and the guest. The purpose of this API is to help
share buffers between multiple virtio devices - e.g. share buffers
created by a virito-gpu device with a virito-video device. In
particular, the new flavor of dma-buf provides a mechanism to attach
identifying metadata to a dma-buf object that is shared between
different virtio drivers in a single guest. This identifying metadata
can then be passed to the importing device and used to fetch some
resource from the exporting device. But the new flavor of dma-buf is
an abstraction within the guest kernel, independent of the host-guest
boundary, and it's definitely not necessary if we're only talking
about a single virtio subsystem.

-David

> Thanks
> Guennadi
>
> > > ---
> > >  drivers/virtio/Makefile |  2 +-
> > >  drivers/virtio/virtio.c |  6 +++
> > >  drivers/virtio/virtio_dma_buf.c | 89 +
> > >  include/linux/virtio.h  |  1 +
> > >  include/linux/virtio_dma_buf.h  | 58 +
> > >  5 files changed, 155 insertions(+), 1 deletion(-)
> > >  create mode 100644 drivers/virtio/virtio_dma_buf.c
> > >  create mode 100644 include/linux/virtio_dma_buf.h
> > >
> > > diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
> > > index 29a1386ecc03..ecdae5b596de 100644
> > > --- a/drivers/virtio/Makefile
> > > +++ b/drivers/virtio/Makefile
> > > @@ -1,5 +1,5 @@
> > >  # SPDX-License-Identifier: GPL-2.0
> > > -obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o
> > > +obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o virtio_dma_buf.o
> > >  obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
> > >  obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
> > >  virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
> > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > index a977e32a88f2..5d46f0ded92d 100644
> > > --- a/drivers/virtio/virtio.c
> > > +++ b/drivers/virtio/virtio.c
> > > @@ -357,6 +357,12 @@ int register_virtio_device(struct virtio_device *dev)
> > >  }
> > >  EXPORT_SYMBOL_GPL(register_virtio_device);
> > >
> > > +bool is_virtio_device(struct device *dev)
> > > +{
> > > +   return dev->bus == _bus;
> > > +}
> > > +EXPORT_SYMBOL_GPL(is_virtio_device);
> > > +
> > >  void unregister_virtio_device(struct virtio_device *dev)
> > >  {
> > > int index = dev->index; /* save for after device release */
> > > diff --git a/drivers/virtio/virtio_dma_buf.c 
> > > b/drivers/virtio/virtio_dma_buf.c
> > > new file mode 100644
> > > index ..23e3399b11ed
> > > --- /dev/null
> > > +++ b/drivers/virtio/virtio_dma_buf.c
> > > @@ -0,0 +1,89 @@
> > > +// SPDX-License-Identifier: GPL-2.0-or-later
> > > +/*
> > > + * dma-bufs for virtio exported objects
> > > + *
> > > + * Copyright (C) 2020 Google, Inc.
> > > + */
> > > +
> > > +#include 
> > > +
> > > +/**
> > > + * virtio_dma_buf_export - Creates a new dma-buf for a virtio exported 
> > > object
> > > + *
> > > + * This wraps dma_buf_export() to allow virtio drivers to create a 
> > > dma-buf
> > > + * for an virtio exported object that can be queried by other virtio 
> > > drivers
> > > + * for the object's UUID.
> > > + */
> > > +struct dma_buf *virtio_dma_buf_export(
> > > +   const struct virtio_dma_buf_export_info *virtio_exp_info)
> > > +{
> > > +   struct dma_buf_export_info exp_info;
> > > +
> > > +   if (!virtio_exp_info->ops
> > > +   || virt

[PATCH v5 1/3] virtio: add dma-buf support for exported objects

2020-06-08 Thread David Stevens
This change adds a new flavor of dma-bufs that can be used by virtio
drivers to share exported objects. A virtio dma-buf can be queried by
virtio drivers to obtain the UUID which identifies the underlying
exported object.

Signed-off-by: David Stevens 
---
 drivers/virtio/Makefile |  2 +-
 drivers/virtio/virtio.c |  6 +++
 drivers/virtio/virtio_dma_buf.c | 82 +
 include/linux/virtio.h  |  1 +
 include/linux/virtio_dma_buf.h  | 37 +++
 5 files changed, 127 insertions(+), 1 deletion(-)
 create mode 100644 drivers/virtio/virtio_dma_buf.c
 create mode 100644 include/linux/virtio_dma_buf.h

diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
index 29a1386ecc03..ecdae5b596de 100644
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o
+obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o virtio_dma_buf.o
 obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
 obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
 virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index a977e32a88f2..5d46f0ded92d 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -357,6 +357,12 @@ int register_virtio_device(struct virtio_device *dev)
 }
 EXPORT_SYMBOL_GPL(register_virtio_device);
 
+bool is_virtio_device(struct device *dev)
+{
+   return dev->bus == _bus;
+}
+EXPORT_SYMBOL_GPL(is_virtio_device);
+
 void unregister_virtio_device(struct virtio_device *dev)
 {
int index = dev->index; /* save for after device release */
diff --git a/drivers/virtio/virtio_dma_buf.c b/drivers/virtio/virtio_dma_buf.c
new file mode 100644
index ..fa0d3a668f53
--- /dev/null
+++ b/drivers/virtio/virtio_dma_buf.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * dma-bufs for virtio exported objects
+ *
+ * Copyright (C) 2020 Google, Inc.
+ */
+
+#include 
+
+/**
+ * virtio_dma_buf_export - Creates a new dma-buf for a virtio exported object
+ * @exp_info: [in] see dma_buf_export(). ops MUST refer to a dma_buf_ops
+ * struct embedded in a virtio_dma_buf_ops.
+ *
+ * This wraps dma_buf_export() to allow virtio drivers to create a dma-buf
+ * for an virtio exported object that can be queried by other virtio drivers
+ * for the object's UUID.
+ */
+struct dma_buf *virtio_dma_buf_export(
+   const struct dma_buf_export_info *exp_info)
+{
+   const struct virtio_dma_buf_ops *virtio_ops = container_of(
+   exp_info->ops, const struct virtio_dma_buf_ops, ops);
+
+   if (!exp_info->ops
+   || exp_info->ops->attach != _dma_buf_attach
+   || !virtio_ops->get_uuid) {
+   return ERR_PTR(-EINVAL);
+   }
+
+   return dma_buf_export(exp_info);
+}
+EXPORT_SYMBOL(virtio_dma_buf_export);
+
+/**
+ * virtio_dma_buf_attach - mandatory attach callback for virtio dma-bufs
+ */
+int virtio_dma_buf_attach(struct dma_buf *dma_buf,
+ struct dma_buf_attachment *attach)
+{
+   int ret;
+   const struct virtio_dma_buf_ops *ops = container_of(
+   dma_buf->ops, const struct virtio_dma_buf_ops, ops);
+
+   if (ops->device_attach) {
+   ret = ops->device_attach(dma_buf, attach);
+   if (ret)
+   return ret;
+   }
+   return 0;
+}
+EXPORT_SYMBOL(virtio_dma_buf_attach);
+
+/**
+ * is_virtio_dma_buf - returns true if the given dma-buf is a virtio dma-buf
+ * @dma_buf: buffer to query
+ */
+bool is_virtio_dma_buf(struct dma_buf *dma_buf)
+{
+   return dma_buf->ops->attach == _dma_buf_attach;
+}
+EXPORT_SYMBOL(is_virtio_dma_buf);
+
+/**
+ * virtio_dma_buf_get_uuid - gets the uuid of the virtio dma-buf's exported 
object
+ * @dma_buf: [in] buffer to query
+ * @uuid: [out] the uuid
+ *
+ * Returns: 0 on success, negative on failure.
+ */
+int virtio_dma_buf_get_uuid(struct dma_buf *dma_buf,
+   uuid_t *uuid)
+{
+   const struct virtio_dma_buf_ops *ops = container_of(
+   dma_buf->ops, const struct virtio_dma_buf_ops, ops);
+
+   if (!is_virtio_dma_buf(dma_buf))
+   return -EINVAL;
+
+   return ops->get_uuid(dma_buf, uuid);
+}
+EXPORT_SYMBOL(virtio_dma_buf_get_uuid);
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 15f906e4a748..9397e25616c4 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -128,6 +128,7 @@ static inline struct virtio_device *dev_to_virtio(struct 
device *_dev)
 void virtio_add_status(struct virtio_device *dev, unsigned int status);
 int register_virtio_device(struct virtio_device *dev);
 void unregister_virtio_device(struct virtio_device *dev);
+bool is_virtio_device(struct device *dev);
 
 void virtio_break_device(struct virtio_device *dev);
 
diff --git 

[PATCH v5 3/3] drm/virtio: Support virtgpu exported resources

2020-06-08 Thread David Stevens
Add support for UUID-based resource sharing mechanism to virtgpu. This
implements the new virtgpu commands and hooks them up to dma-buf's
get_uuid callback.

Signed-off-by: David Stevens 
---
 drivers/gpu/drm/virtio/virtgpu_drv.c   |  3 +
 drivers/gpu/drm/virtio/virtgpu_drv.h   | 20 ++
 drivers/gpu/drm/virtio/virtgpu_kms.c   |  4 ++
 drivers/gpu/drm/virtio/virtgpu_prime.c | 96 +-
 drivers/gpu/drm/virtio/virtgpu_vq.c| 55 +++
 5 files changed, 175 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c 
b/drivers/gpu/drm/virtio/virtgpu_drv.c
index ab4bed78e656..b039f493bda9 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -165,6 +165,7 @@ static unsigned int features[] = {
VIRTIO_GPU_F_VIRGL,
 #endif
VIRTIO_GPU_F_EDID,
+   VIRTIO_GPU_F_RESOURCE_UUID,
 };
 static struct virtio_driver virtio_gpu_driver = {
.feature_table = features,
@@ -202,6 +203,8 @@ static struct drm_driver driver = {
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_mmap = drm_gem_prime_mmap,
+   .gem_prime_export = virtgpu_gem_prime_export,
+   .gem_prime_import = virtgpu_gem_prime_import,
.gem_prime_import_sg_table = virtgpu_gem_prime_import_sg_table,
 
.gem_create_object = virtio_gpu_create_object,
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h 
b/drivers/gpu/drm/virtio/virtgpu_drv.h
index 49bebdee6d91..39dc907aa805 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -49,6 +49,10 @@
 #define DRIVER_MINOR 1
 #define DRIVER_PATCHLEVEL 0
 
+#define UUID_INITIALIZING 0
+#define UUID_INITIALIZED 1
+#define UUID_INITIALIZATION_FAILED 2
+
 struct virtio_gpu_object_params {
uint32_t format;
uint32_t width;
@@ -71,6 +75,9 @@ struct virtio_gpu_object {
uint32_t hw_res_handle;
bool dumb;
bool created;
+
+   int uuid_state;
+   uuid_t uuid;
 };
 #define gem_to_virtio_gpu_obj(gobj) \
container_of((gobj), struct virtio_gpu_object, base.base)
@@ -200,6 +207,7 @@ struct virtio_gpu_device {
bool has_virgl_3d;
bool has_edid;
bool has_indirect;
+   bool has_resource_assign_uuid;
 
struct work_struct config_changed_work;
 
@@ -210,6 +218,8 @@ struct virtio_gpu_device {
struct virtio_gpu_drv_capset *capsets;
uint32_t num_capsets;
struct list_head cap_cache;
+
+   spinlock_t resource_export_lock;
 };
 
 struct virtio_gpu_fpriv {
@@ -335,6 +345,10 @@ void virtio_gpu_dequeue_fence_func(struct work_struct 
*work);
 
 void virtio_gpu_notify(struct virtio_gpu_device *vgdev);
 
+int
+virtio_gpu_cmd_resource_assign_uuid(struct virtio_gpu_device *vgdev,
+   struct virtio_gpu_object_array *objs);
+
 /* virtgpu_display.c */
 void virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev);
 void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev);
@@ -366,6 +380,12 @@ int virtio_gpu_object_create(struct virtio_gpu_device 
*vgdev,
 bool virtio_gpu_is_shmem(struct virtio_gpu_object *bo);
 
 /* virtgpu_prime.c */
+struct dma_buf *virtgpu_gem_prime_export(struct drm_gem_object *obj,
+int flags);
+struct drm_gem_object *virtgpu_gem_prime_import(struct drm_device *dev,
+   struct dma_buf *buf);
+int virtgpu_gem_prime_get_uuid(struct drm_gem_object *obj,
+  uuid_t *uuid);
 struct drm_gem_object *virtgpu_gem_prime_import_sg_table(
struct drm_device *dev, struct dma_buf_attachment *attach,
struct sg_table *sgt);
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c 
b/drivers/gpu/drm/virtio/virtgpu_kms.c
index 023a030ca7b9..7bcd0c75effa 100644
--- a/drivers/gpu/drm/virtio/virtgpu_kms.c
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -125,6 +125,7 @@ int virtio_gpu_init(struct drm_device *dev)
vgdev->dev = dev->dev;
 
spin_lock_init(>display_info_lock);
+   spin_lock_init(>resource_export_lock);
ida_init(>ctx_id_ida);
ida_init(>resource_ida);
init_waitqueue_head(>resp_wq);
@@ -153,6 +154,9 @@ int virtio_gpu_init(struct drm_device *dev)
if (virtio_has_feature(vgdev->vdev, VIRTIO_RING_F_INDIRECT_DESC)) {
vgdev->has_indirect = true;
}
+   if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_RESOURCE_UUID)) {
+   vgdev->has_resource_assign_uuid = true;
+   }
 
DRM_INFO("features: %cvirgl %cedid\n",
 vgdev->has_virgl_3d ? '+' : '-',
diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c 
b/drivers/gpu/drm/virtio/virtgpu_prime.c
index 050d24c39a8f..acd14ef73d56 100644
--- a/drivers/gpu/drm/virtio/virtgpu_prime.c
+++ b/drivers/gpu/drm/virtio/virtgpu_prime.c
@@

[PATCH v5 2/3] virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature

2020-06-08 Thread David Stevens
This feature allows the guest to request a UUID from the host for a
particular virtio_gpu resource. The UUID can then be shared with other
virtio devices, to allow the other host devices to access the
virtio_gpu's corresponding host resource.

Signed-off-by: David Stevens 
---
 include/uapi/linux/virtio_gpu.h | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
index 0c85914d9369..9721d58b4d58 100644
--- a/include/uapi/linux/virtio_gpu.h
+++ b/include/uapi/linux/virtio_gpu.h
@@ -50,6 +50,10 @@
  * VIRTIO_GPU_CMD_GET_EDID
  */
 #define VIRTIO_GPU_F_EDID1
+/*
+ * VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID
+ */
+#define VIRTIO_GPU_F_RESOURCE_UUID   2
 
 enum virtio_gpu_ctrl_type {
VIRTIO_GPU_UNDEFINED = 0,
@@ -66,6 +70,7 @@ enum virtio_gpu_ctrl_type {
VIRTIO_GPU_CMD_GET_CAPSET_INFO,
VIRTIO_GPU_CMD_GET_CAPSET,
VIRTIO_GPU_CMD_GET_EDID,
+   VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID,
 
/* 3d commands */
VIRTIO_GPU_CMD_CTX_CREATE = 0x0200,
@@ -87,6 +92,7 @@ enum virtio_gpu_ctrl_type {
VIRTIO_GPU_RESP_OK_CAPSET_INFO,
VIRTIO_GPU_RESP_OK_CAPSET,
VIRTIO_GPU_RESP_OK_EDID,
+   VIRTIO_GPU_RESP_OK_RESOURCE_UUID,
 
/* error responses */
VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200,
@@ -340,4 +346,17 @@ enum virtio_gpu_formats {
VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM  = 134,
 };
 
+/* VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID */
+struct virtio_gpu_resource_assign_uuid {
+   struct virtio_gpu_ctrl_hdr hdr;
+   __le32 resource_id;
+   __le32 padding;
+};
+
+/* VIRTIO_GPU_RESP_OK_RESOURCE_UUID */
+struct virtio_gpu_resp_resource_uuid {
+   struct virtio_gpu_ctrl_hdr hdr;
+   __u8 uuid[16];
+};
+
 #endif
-- 
2.27.0.278.ge193c7cf3a9-goog



[PATCH v5 0/3] Support virtio cross-device resources

2020-06-08 Thread David Stevens
This patchset implements the current proposal for virtio cross-device
resource sharing [1]. It will be used to import virtio resources into
the virtio-video driver currently under discussion [2]. The patch
under consideration to add support in the virtio-video driver is [3].
It uses the APIs from v3 of this series, but the changes to update it
are relatively minor.

This patchset adds a new flavor of dma-bufs that supports querying the
underlying virtio object UUID, as well as adding support for exporting
resources from virtgpu.

[1] https://markmail.org/thread/2ypjt5cfeu3m6lxu
[2] https://markmail.org/thread/p5d3k566srtdtute
[3] https://markmail.org/thread/j4xlqaaim266qpks

v4 -> v5 changes:
 - Remove virtio_dma_buf_export_info.

David Stevens (3):
  virtio: add dma-buf support for exported objects
  virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature
  drm/virtio: Support virtgpu exported resources

 drivers/gpu/drm/virtio/virtgpu_drv.c   |  3 +
 drivers/gpu/drm/virtio/virtgpu_drv.h   | 20 ++
 drivers/gpu/drm/virtio/virtgpu_kms.c   |  4 ++
 drivers/gpu/drm/virtio/virtgpu_prime.c | 96 +-
 drivers/gpu/drm/virtio/virtgpu_vq.c| 55 +++
 drivers/virtio/Makefile|  2 +-
 drivers/virtio/virtio.c|  6 ++
 drivers/virtio/virtio_dma_buf.c| 82 ++
 include/linux/virtio.h |  1 +
 include/linux/virtio_dma_buf.h | 37 ++
 include/uapi/linux/virtio_gpu.h| 19 +
 11 files changed, 321 insertions(+), 4 deletions(-)
 create mode 100644 drivers/virtio/virtio_dma_buf.c
 create mode 100644 include/linux/virtio_dma_buf.h

-- 
2.27.0.278.ge193c7cf3a9-goog



Re: [PATCH v3 4/4] drm/virtio: Support virtgpu exported resources

2020-06-08 Thread David Stevens
On Mon, Jun 8, 2020 at 6:43 PM Michael S. Tsirkin  wrote:
>
> On Fri, May 15, 2020 at 04:26:15PM +0900, David Stevens wrote:
> > > > + if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_RESOURCE_UUID)) {
> > > > + vgdev->has_resource_assign_uuid = true;
> > > > + }
> > >
> > >
> > > Just a question: this relies on DMA bufs so I assume it is
> > > not really assumed to work when DMA API is bypassed, right?
> > > Rather than worry what does it mean, how about just
> > > disabling  this feature without PLATFORM_DMA for now?
> >
> > By PLATFORM_DMA, do you mean CONFIG_DMA_SHARED_BUFFER?
>
> Sorry, no. I mean VIRTIO_F_IOMMU_PLATFORM which in the
> future will be renamed to VIRTIO_F_PLATFORM_ACCESS.

Shouldn't things work independent of whether or not that feature is
set? If a virtio driver properly uses the dma_buf APIs (which virtgpu
seems to), then that should take care of any mapping/synchronization
related to VIRTIO_F_IOMMU_PLATFORM. If anything, the case where
VIRTIO_F_IOMMU_PLATFORM isn't set is easier, since then we know that
the "the device has same access [sic] to memory addresses supplied to
it as the driver has", according to the specification.

-David


Re: [PATCH v4 1/3] virtio: add dma-buf support for exported objects

2020-06-08 Thread David Stevens
On Mon, Jun 8, 2020 at 6:05 PM Michael S. Tsirkin  wrote:
>
> On Mon, Jun 08, 2020 at 05:32:26PM +0900, David Stevens wrote:
> > On Mon, Jun 8, 2020 at 3:00 PM Michael S. Tsirkin  wrote:
> > >
> > > On Mon, Jun 08, 2020 at 10:33:09AM +0900, David Stevens wrote:
> > > > On Sun, Jun 7, 2020 at 5:04 AM Michael S. Tsirkin  
> > > > wrote:
> > > > >
> > > > > On Fri, Jun 05, 2020 at 10:28:42AM +0900, David Stevens wrote:
> > > > > > On Fri, Jun 5, 2020 at 4:05 AM Michael S. Tsirkin  
> > > > > > wrote:
> > > > > > >
> > > > > > > On Tue, May 26, 2020 at 07:58:09PM +0900, David Stevens wrote:
> > > > > > > > This change adds a new flavor of dma-bufs that can be used by 
> > > > > > > > virtio
> > > > > > > > drivers to share exported objects. A virtio dma-buf can be 
> > > > > > > > queried by
> > > > > > > > virtio drivers to obtain the UUID which identifies the 
> > > > > > > > underlying
> > > > > > > > exported object.
> > > > > > > >
> > > > > > > > Signed-off-by: David Stevens 
> > > > > > >
> > > > > > > Is this just for graphics? If yes I'd rather we put it in the 
> > > > > > > graphics
> > > > > > > driver. We can always move it later ...
> > > > > >
> > > > > > As stated in the cover letter, this will be used by virtio-video.
> > > > > >
> > > > > > The proposed virtio-video patches: 
> > > > > > https://markmail.org/thread/p5d3k566srtdtute
> > > > > > The patch which imports these dma-bufs (slightly out of data, uses 
> > > > > > v3
> > > > > > of this patch set): https://markmail.org/thread/j4xlqaaim266qpks
> > > > > >
> > > > > > > > ---
> > > > > > > >  drivers/virtio/Makefile |  2 +-
> > > > > > > >  drivers/virtio/virtio.c |  6 +++
> > > > > > > >  drivers/virtio/virtio_dma_buf.c | 89 
> > > > > > > > +
> > > > > > > >  include/linux/virtio.h  |  1 +
> > > > > > > >  include/linux/virtio_dma_buf.h  | 58 +
> > > > > > > >  5 files changed, 155 insertions(+), 1 deletion(-)
> > > > > > > >  create mode 100644 drivers/virtio/virtio_dma_buf.c
> > > > > > > >  create mode 100644 include/linux/virtio_dma_buf.h
> > > > > > > >
> > > > > > > > diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
> > > > > > > > index 29a1386ecc03..ecdae5b596de 100644
> > > > > > > > --- a/drivers/virtio/Makefile
> > > > > > > > +++ b/drivers/virtio/Makefile
> > > > > > > > @@ -1,5 +1,5 @@
> > > > > > > >  # SPDX-License-Identifier: GPL-2.0
> > > > > > > > -obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o
> > > > > > > > +obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o virtio_dma_buf.o
> > > > > > > >  obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
> > > > > > > >  obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
> > > > > > > >  virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
> > > > > > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > > > > > > index a977e32a88f2..5d46f0ded92d 100644
> > > > > > > > --- a/drivers/virtio/virtio.c
> > > > > > > > +++ b/drivers/virtio/virtio.c
> > > > > > > > @@ -357,6 +357,12 @@ int register_virtio_device(struct 
> > > > > > > > virtio_device *dev)
> > > > > > > >  }
> > > > > > > >  EXPORT_SYMBOL_GPL(register_virtio_device);
> > > > > > > >
> > > > > > > > +bool is_virtio_device(struct device *dev)
> > > > > > > > +{
> > > > > > > > + return dev->bus == _bus;
> > > > > > > > +}
> > > > > > > > +EXPORT_SYMBOL_GPL(is_virtio_device);
> > > > > > > > +
> > > > > >

Re: [PATCH v4 1/3] virtio: add dma-buf support for exported objects

2020-06-08 Thread David Stevens
On Mon, Jun 8, 2020 at 3:00 PM Michael S. Tsirkin  wrote:
>
> On Mon, Jun 08, 2020 at 10:33:09AM +0900, David Stevens wrote:
> > On Sun, Jun 7, 2020 at 5:04 AM Michael S. Tsirkin  wrote:
> > >
> > > On Fri, Jun 05, 2020 at 10:28:42AM +0900, David Stevens wrote:
> > > > On Fri, Jun 5, 2020 at 4:05 AM Michael S. Tsirkin  
> > > > wrote:
> > > > >
> > > > > On Tue, May 26, 2020 at 07:58:09PM +0900, David Stevens wrote:
> > > > > > This change adds a new flavor of dma-bufs that can be used by virtio
> > > > > > drivers to share exported objects. A virtio dma-buf can be queried 
> > > > > > by
> > > > > > virtio drivers to obtain the UUID which identifies the underlying
> > > > > > exported object.
> > > > > >
> > > > > > Signed-off-by: David Stevens 
> > > > >
> > > > > Is this just for graphics? If yes I'd rather we put it in the graphics
> > > > > driver. We can always move it later ...
> > > >
> > > > As stated in the cover letter, this will be used by virtio-video.
> > > >
> > > > The proposed virtio-video patches: 
> > > > https://markmail.org/thread/p5d3k566srtdtute
> > > > The patch which imports these dma-bufs (slightly out of data, uses v3
> > > > of this patch set): https://markmail.org/thread/j4xlqaaim266qpks
> > > >
> > > > > > ---
> > > > > >  drivers/virtio/Makefile |  2 +-
> > > > > >  drivers/virtio/virtio.c |  6 +++
> > > > > >  drivers/virtio/virtio_dma_buf.c | 89 
> > > > > > +
> > > > > >  include/linux/virtio.h  |  1 +
> > > > > >  include/linux/virtio_dma_buf.h  | 58 +
> > > > > >  5 files changed, 155 insertions(+), 1 deletion(-)
> > > > > >  create mode 100644 drivers/virtio/virtio_dma_buf.c
> > > > > >  create mode 100644 include/linux/virtio_dma_buf.h
> > > > > >
> > > > > > diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
> > > > > > index 29a1386ecc03..ecdae5b596de 100644
> > > > > > --- a/drivers/virtio/Makefile
> > > > > > +++ b/drivers/virtio/Makefile
> > > > > > @@ -1,5 +1,5 @@
> > > > > >  # SPDX-License-Identifier: GPL-2.0
> > > > > > -obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o
> > > > > > +obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o virtio_dma_buf.o
> > > > > >  obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
> > > > > >  obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
> > > > > >  virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
> > > > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > > > > index a977e32a88f2..5d46f0ded92d 100644
> > > > > > --- a/drivers/virtio/virtio.c
> > > > > > +++ b/drivers/virtio/virtio.c
> > > > > > @@ -357,6 +357,12 @@ int register_virtio_device(struct 
> > > > > > virtio_device *dev)
> > > > > >  }
> > > > > >  EXPORT_SYMBOL_GPL(register_virtio_device);
> > > > > >
> > > > > > +bool is_virtio_device(struct device *dev)
> > > > > > +{
> > > > > > + return dev->bus == _bus;
> > > > > > +}
> > > > > > +EXPORT_SYMBOL_GPL(is_virtio_device);
> > > > > > +
> > > > > >  void unregister_virtio_device(struct virtio_device *dev)
> > > > > >  {
> > > > > >   int index = dev->index; /* save for after device release */
> > > > > > diff --git a/drivers/virtio/virtio_dma_buf.c 
> > > > > > b/drivers/virtio/virtio_dma_buf.c
> > > > > > new file mode 100644
> > > > > > index ..23e3399b11ed
> > > > > > --- /dev/null
> > > > > > +++ b/drivers/virtio/virtio_dma_buf.c
> > > > > > @@ -0,0 +1,89 @@
> > > > > > +// SPDX-License-Identifier: GPL-2.0-or-later
> > > > > > +/*
> > > > > > + * dma-bufs for virtio exported objects
> > > > > > + *
> > > > > > + * Copyright (C) 2020 Google, Inc.
> > > > > > + */
> > > > > > +
> &g

Re: [PATCH v4 1/3] virtio: add dma-buf support for exported objects

2020-06-07 Thread David Stevens
On Sun, Jun 7, 2020 at 5:04 AM Michael S. Tsirkin  wrote:
>
> On Fri, Jun 05, 2020 at 10:28:42AM +0900, David Stevens wrote:
> > On Fri, Jun 5, 2020 at 4:05 AM Michael S. Tsirkin  wrote:
> > >
> > > On Tue, May 26, 2020 at 07:58:09PM +0900, David Stevens wrote:
> > > > This change adds a new flavor of dma-bufs that can be used by virtio
> > > > drivers to share exported objects. A virtio dma-buf can be queried by
> > > > virtio drivers to obtain the UUID which identifies the underlying
> > > > exported object.
> > > >
> > > > Signed-off-by: David Stevens 
> > >
> > > Is this just for graphics? If yes I'd rather we put it in the graphics
> > > driver. We can always move it later ...
> >
> > As stated in the cover letter, this will be used by virtio-video.
> >
> > The proposed virtio-video patches: 
> > https://markmail.org/thread/p5d3k566srtdtute
> > The patch which imports these dma-bufs (slightly out of data, uses v3
> > of this patch set): https://markmail.org/thread/j4xlqaaim266qpks
> >
> > > > ---
> > > >  drivers/virtio/Makefile |  2 +-
> > > >  drivers/virtio/virtio.c |  6 +++
> > > >  drivers/virtio/virtio_dma_buf.c | 89 +
> > > >  include/linux/virtio.h  |  1 +
> > > >  include/linux/virtio_dma_buf.h  | 58 +
> > > >  5 files changed, 155 insertions(+), 1 deletion(-)
> > > >  create mode 100644 drivers/virtio/virtio_dma_buf.c
> > > >  create mode 100644 include/linux/virtio_dma_buf.h
> > > >
> > > > diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
> > > > index 29a1386ecc03..ecdae5b596de 100644
> > > > --- a/drivers/virtio/Makefile
> > > > +++ b/drivers/virtio/Makefile
> > > > @@ -1,5 +1,5 @@
> > > >  # SPDX-License-Identifier: GPL-2.0
> > > > -obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o
> > > > +obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o virtio_dma_buf.o
> > > >  obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
> > > >  obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
> > > >  virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
> > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > > index a977e32a88f2..5d46f0ded92d 100644
> > > > --- a/drivers/virtio/virtio.c
> > > > +++ b/drivers/virtio/virtio.c
> > > > @@ -357,6 +357,12 @@ int register_virtio_device(struct virtio_device 
> > > > *dev)
> > > >  }
> > > >  EXPORT_SYMBOL_GPL(register_virtio_device);
> > > >
> > > > +bool is_virtio_device(struct device *dev)
> > > > +{
> > > > + return dev->bus == _bus;
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(is_virtio_device);
> > > > +
> > > >  void unregister_virtio_device(struct virtio_device *dev)
> > > >  {
> > > >   int index = dev->index; /* save for after device release */
> > > > diff --git a/drivers/virtio/virtio_dma_buf.c 
> > > > b/drivers/virtio/virtio_dma_buf.c
> > > > new file mode 100644
> > > > index ..23e3399b11ed
> > > > --- /dev/null
> > > > +++ b/drivers/virtio/virtio_dma_buf.c
> > > > @@ -0,0 +1,89 @@
> > > > +// SPDX-License-Identifier: GPL-2.0-or-later
> > > > +/*
> > > > + * dma-bufs for virtio exported objects
> > > > + *
> > > > + * Copyright (C) 2020 Google, Inc.
> > > > + */
> > > > +
> > > > +#include 
> > > > +
> > > > +/**
> > > > + * virtio_dma_buf_export - Creates a new dma-buf for a virtio exported 
> > > > object
> > > > + *
> > > > + * This wraps dma_buf_export() to allow virtio drivers to create a 
> > > > dma-buf
> > > > + * for an virtio exported object that can be queried by other virtio 
> > > > drivers
> > > > + * for the object's UUID.
> > > > + */
> > > > +struct dma_buf *virtio_dma_buf_export(
> > > > + const struct virtio_dma_buf_export_info *virtio_exp_info)
> > > > +{
> > > > + struct dma_buf_export_info exp_info;
> > > > +
> > > > + if (!virtio_exp_info->ops
> > > > + || virtio_exp_info->ops->ops.attach != 
> > > > _dma_buf_attach
> > > > + 

Re: [PATCH v4 1/3] virtio: add dma-buf support for exported objects

2020-06-04 Thread David Stevens
On Fri, Jun 5, 2020 at 4:05 AM Michael S. Tsirkin  wrote:
>
> On Tue, May 26, 2020 at 07:58:09PM +0900, David Stevens wrote:
> > This change adds a new flavor of dma-bufs that can be used by virtio
> > drivers to share exported objects. A virtio dma-buf can be queried by
> > virtio drivers to obtain the UUID which identifies the underlying
> > exported object.
> >
> > Signed-off-by: David Stevens 
>
> Is this just for graphics? If yes I'd rather we put it in the graphics
> driver. We can always move it later ...

As stated in the cover letter, this will be used by virtio-video.

The proposed virtio-video patches: https://markmail.org/thread/p5d3k566srtdtute
The patch which imports these dma-bufs (slightly out of data, uses v3
of this patch set): https://markmail.org/thread/j4xlqaaim266qpks

> > ---
> >  drivers/virtio/Makefile |  2 +-
> >  drivers/virtio/virtio.c |  6 +++
> >  drivers/virtio/virtio_dma_buf.c | 89 +
> >  include/linux/virtio.h  |  1 +
> >  include/linux/virtio_dma_buf.h  | 58 +
> >  5 files changed, 155 insertions(+), 1 deletion(-)
> >  create mode 100644 drivers/virtio/virtio_dma_buf.c
> >  create mode 100644 include/linux/virtio_dma_buf.h
> >
> > diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
> > index 29a1386ecc03..ecdae5b596de 100644
> > --- a/drivers/virtio/Makefile
> > +++ b/drivers/virtio/Makefile
> > @@ -1,5 +1,5 @@
> >  # SPDX-License-Identifier: GPL-2.0
> > -obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o
> > +obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o virtio_dma_buf.o
> >  obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
> >  obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
> >  virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
> > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > index a977e32a88f2..5d46f0ded92d 100644
> > --- a/drivers/virtio/virtio.c
> > +++ b/drivers/virtio/virtio.c
> > @@ -357,6 +357,12 @@ int register_virtio_device(struct virtio_device *dev)
> >  }
> >  EXPORT_SYMBOL_GPL(register_virtio_device);
> >
> > +bool is_virtio_device(struct device *dev)
> > +{
> > + return dev->bus == _bus;
> > +}
> > +EXPORT_SYMBOL_GPL(is_virtio_device);
> > +
> >  void unregister_virtio_device(struct virtio_device *dev)
> >  {
> >   int index = dev->index; /* save for after device release */
> > diff --git a/drivers/virtio/virtio_dma_buf.c 
> > b/drivers/virtio/virtio_dma_buf.c
> > new file mode 100644
> > index ..23e3399b11ed
> > --- /dev/null
> > +++ b/drivers/virtio/virtio_dma_buf.c
> > @@ -0,0 +1,89 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +/*
> > + * dma-bufs for virtio exported objects
> > + *
> > + * Copyright (C) 2020 Google, Inc.
> > + */
> > +
> > +#include 
> > +
> > +/**
> > + * virtio_dma_buf_export - Creates a new dma-buf for a virtio exported 
> > object
> > + *
> > + * This wraps dma_buf_export() to allow virtio drivers to create a dma-buf
> > + * for an virtio exported object that can be queried by other virtio 
> > drivers
> > + * for the object's UUID.
> > + */
> > +struct dma_buf *virtio_dma_buf_export(
> > + const struct virtio_dma_buf_export_info *virtio_exp_info)
> > +{
> > + struct dma_buf_export_info exp_info;
> > +
> > + if (!virtio_exp_info->ops
> > + || virtio_exp_info->ops->ops.attach != _dma_buf_attach
> > + || !virtio_exp_info->ops->get_uuid) {
> > + return ERR_PTR(-EINVAL);
> > + }
> > +
> > + exp_info.exp_name = virtio_exp_info->exp_name;
> > + exp_info.owner = virtio_exp_info->owner;
> > + exp_info.ops = _exp_info->ops->ops;
> > + exp_info.size = virtio_exp_info->size;
> > + exp_info.flags = virtio_exp_info->flags;
> > + exp_info.resv = virtio_exp_info->resv;
> > + exp_info.priv = virtio_exp_info->priv;
> > + BUILD_BUG_ON(sizeof(struct virtio_dma_buf_export_info)
> > +  != sizeof(struct dma_buf_export_info));
>
> This is the only part that gives me pause. Why do we need this hack?
> What's wrong with just using dma_buf_export_info directly,
> and if you want the virtio ops, just using container_off?

This approach provides a more explicit type signature and a little
more type safety, I think. If others don't think it's a worthwhile
tradeoff, I can remove it.

-David


[PATCH v4 2/3] virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature

2020-05-26 Thread David Stevens
This feature allows the guest to request a UUID from the host for a
particular virtio_gpu resource. The UUID can then be shared with other
virtio devices, to allow the other host devices to access the
virtio_gpu's corresponding host resource.

Signed-off-by: David Stevens 
---
 include/uapi/linux/virtio_gpu.h | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
index 0c85914d9369..9721d58b4d58 100644
--- a/include/uapi/linux/virtio_gpu.h
+++ b/include/uapi/linux/virtio_gpu.h
@@ -50,6 +50,10 @@
  * VIRTIO_GPU_CMD_GET_EDID
  */
 #define VIRTIO_GPU_F_EDID1
+/*
+ * VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID
+ */
+#define VIRTIO_GPU_F_RESOURCE_UUID   2
 
 enum virtio_gpu_ctrl_type {
VIRTIO_GPU_UNDEFINED = 0,
@@ -66,6 +70,7 @@ enum virtio_gpu_ctrl_type {
VIRTIO_GPU_CMD_GET_CAPSET_INFO,
VIRTIO_GPU_CMD_GET_CAPSET,
VIRTIO_GPU_CMD_GET_EDID,
+   VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID,
 
/* 3d commands */
VIRTIO_GPU_CMD_CTX_CREATE = 0x0200,
@@ -87,6 +92,7 @@ enum virtio_gpu_ctrl_type {
VIRTIO_GPU_RESP_OK_CAPSET_INFO,
VIRTIO_GPU_RESP_OK_CAPSET,
VIRTIO_GPU_RESP_OK_EDID,
+   VIRTIO_GPU_RESP_OK_RESOURCE_UUID,
 
/* error responses */
VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200,
@@ -340,4 +346,17 @@ enum virtio_gpu_formats {
VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM  = 134,
 };
 
+/* VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID */
+struct virtio_gpu_resource_assign_uuid {
+   struct virtio_gpu_ctrl_hdr hdr;
+   __le32 resource_id;
+   __le32 padding;
+};
+
+/* VIRTIO_GPU_RESP_OK_RESOURCE_UUID */
+struct virtio_gpu_resp_resource_uuid {
+   struct virtio_gpu_ctrl_hdr hdr;
+   __u8 uuid[16];
+};
+
 #endif
-- 
2.27.0.rc0.183.gde8f92d652-goog



[PATCH v4 3/3] drm/virtio: Support virtgpu exported resources

2020-05-26 Thread David Stevens
Add support for UUID-based resource sharing mechanism to virtgpu. This
implements the new virtgpu commands and hooks them up to dma-buf's
get_uuid callback.

Signed-off-by: David Stevens 
---
 drivers/gpu/drm/virtio/virtgpu_drv.c   |  3 +
 drivers/gpu/drm/virtio/virtgpu_drv.h   | 20 ++
 drivers/gpu/drm/virtio/virtgpu_kms.c   |  4 ++
 drivers/gpu/drm/virtio/virtgpu_prime.c | 96 +-
 drivers/gpu/drm/virtio/virtgpu_vq.c| 55 +++
 5 files changed, 175 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c 
b/drivers/gpu/drm/virtio/virtgpu_drv.c
index ab4bed78e656..b039f493bda9 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -165,6 +165,7 @@ static unsigned int features[] = {
VIRTIO_GPU_F_VIRGL,
 #endif
VIRTIO_GPU_F_EDID,
+   VIRTIO_GPU_F_RESOURCE_UUID,
 };
 static struct virtio_driver virtio_gpu_driver = {
.feature_table = features,
@@ -202,6 +203,8 @@ static struct drm_driver driver = {
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_mmap = drm_gem_prime_mmap,
+   .gem_prime_export = virtgpu_gem_prime_export,
+   .gem_prime_import = virtgpu_gem_prime_import,
.gem_prime_import_sg_table = virtgpu_gem_prime_import_sg_table,
 
.gem_create_object = virtio_gpu_create_object,
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h 
b/drivers/gpu/drm/virtio/virtgpu_drv.h
index 49bebdee6d91..39dc907aa805 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -49,6 +49,10 @@
 #define DRIVER_MINOR 1
 #define DRIVER_PATCHLEVEL 0
 
+#define UUID_INITIALIZING 0
+#define UUID_INITIALIZED 1
+#define UUID_INITIALIZATION_FAILED 2
+
 struct virtio_gpu_object_params {
uint32_t format;
uint32_t width;
@@ -71,6 +75,9 @@ struct virtio_gpu_object {
uint32_t hw_res_handle;
bool dumb;
bool created;
+
+   int uuid_state;
+   uuid_t uuid;
 };
 #define gem_to_virtio_gpu_obj(gobj) \
container_of((gobj), struct virtio_gpu_object, base.base)
@@ -200,6 +207,7 @@ struct virtio_gpu_device {
bool has_virgl_3d;
bool has_edid;
bool has_indirect;
+   bool has_resource_assign_uuid;
 
struct work_struct config_changed_work;
 
@@ -210,6 +218,8 @@ struct virtio_gpu_device {
struct virtio_gpu_drv_capset *capsets;
uint32_t num_capsets;
struct list_head cap_cache;
+
+   spinlock_t resource_export_lock;
 };
 
 struct virtio_gpu_fpriv {
@@ -335,6 +345,10 @@ void virtio_gpu_dequeue_fence_func(struct work_struct 
*work);
 
 void virtio_gpu_notify(struct virtio_gpu_device *vgdev);
 
+int
+virtio_gpu_cmd_resource_assign_uuid(struct virtio_gpu_device *vgdev,
+   struct virtio_gpu_object_array *objs);
+
 /* virtgpu_display.c */
 void virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev);
 void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev);
@@ -366,6 +380,12 @@ int virtio_gpu_object_create(struct virtio_gpu_device 
*vgdev,
 bool virtio_gpu_is_shmem(struct virtio_gpu_object *bo);
 
 /* virtgpu_prime.c */
+struct dma_buf *virtgpu_gem_prime_export(struct drm_gem_object *obj,
+int flags);
+struct drm_gem_object *virtgpu_gem_prime_import(struct drm_device *dev,
+   struct dma_buf *buf);
+int virtgpu_gem_prime_get_uuid(struct drm_gem_object *obj,
+  uuid_t *uuid);
 struct drm_gem_object *virtgpu_gem_prime_import_sg_table(
struct drm_device *dev, struct dma_buf_attachment *attach,
struct sg_table *sgt);
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c 
b/drivers/gpu/drm/virtio/virtgpu_kms.c
index 023a030ca7b9..7bcd0c75effa 100644
--- a/drivers/gpu/drm/virtio/virtgpu_kms.c
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -125,6 +125,7 @@ int virtio_gpu_init(struct drm_device *dev)
vgdev->dev = dev->dev;
 
spin_lock_init(>display_info_lock);
+   spin_lock_init(>resource_export_lock);
ida_init(>ctx_id_ida);
ida_init(>resource_ida);
init_waitqueue_head(>resp_wq);
@@ -153,6 +154,9 @@ int virtio_gpu_init(struct drm_device *dev)
if (virtio_has_feature(vgdev->vdev, VIRTIO_RING_F_INDIRECT_DESC)) {
vgdev->has_indirect = true;
}
+   if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_RESOURCE_UUID)) {
+   vgdev->has_resource_assign_uuid = true;
+   }
 
DRM_INFO("features: %cvirgl %cedid\n",
 vgdev->has_virgl_3d ? '+' : '-',
diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c 
b/drivers/gpu/drm/virtio/virtgpu_prime.c
index 050d24c39a8f..a753bb70fcf1 100644
--- a/drivers/gpu/drm/virtio/virtgpu_prime.c
+++ b/drivers/gpu/drm/virtio/virtgpu_prime.c
@@

[PATCH v4 0/3] Support virtio cross-device resources

2020-05-26 Thread David Stevens
This patchset implements the current proposal for virtio cross-device
resource sharing [1]. It will be used to import virtio resources into
the virtio-video driver currently under discussion [2]. The patch
under consideration to add support in the virtio-video driver is [3].
It uses the APIs from v3 of this series, but the changes to update it
are relatively minor.

This patchset adds a new flavor of dma-bufs that supports querying the
underlying virtio object UUID, as well as adding support for exporting
resources from virtgpu.

[1] https://markmail.org/thread/2ypjt5cfeu3m6lxu
[2] https://markmail.org/thread/p5d3k566srtdtute
[3] https://markmail.org/thread/j4xlqaaim266qpks

v3 -> v4 changes:
 - Replace dma-buf hooks with virtio dma-buf from v1.
 - Remove virtio_attach callback, as the work that had been done
   in that callback is now done on dma-buf export. The documented
   requirement that get_uuid only be called on attached virtio
   dma-bufs is also removed.
 - Rebase and add call to virtio_gpu_notify for ASSIGN_UUID.

David Stevens (3):
  virtio: add dma-buf support for exported objects
  virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature
  drm/virtio: Support virtgpu exported resources

 drivers/gpu/drm/virtio/virtgpu_drv.c   |  3 +
 drivers/gpu/drm/virtio/virtgpu_drv.h   | 20 ++
 drivers/gpu/drm/virtio/virtgpu_kms.c   |  4 ++
 drivers/gpu/drm/virtio/virtgpu_prime.c | 98 +-
 drivers/gpu/drm/virtio/virtgpu_vq.c| 55 +++
 drivers/virtio/Makefile|  2 +-
 drivers/virtio/virtio.c|  6 ++
 drivers/virtio/virtio_dma_buf.c| 91 
 include/linux/virtio.h |  1 +
 include/linux/virtio_dma_buf.h | 58 +++
 include/uapi/linux/virtio_gpu.h| 19 +
 11 files changed, 353 insertions(+), 4 deletions(-)
 create mode 100644 drivers/virtio/virtio_dma_buf.c
 create mode 100644 include/linux/virtio_dma_buf.h

-- 
2.27.0.rc0.183.gde8f92d652-goog



[PATCH v4 1/3] virtio: add dma-buf support for exported objects

2020-05-26 Thread David Stevens
This change adds a new flavor of dma-bufs that can be used by virtio
drivers to share exported objects. A virtio dma-buf can be queried by
virtio drivers to obtain the UUID which identifies the underlying
exported object.

Signed-off-by: David Stevens 
---
 drivers/virtio/Makefile |  2 +-
 drivers/virtio/virtio.c |  6 +++
 drivers/virtio/virtio_dma_buf.c | 89 +
 include/linux/virtio.h  |  1 +
 include/linux/virtio_dma_buf.h  | 58 +
 5 files changed, 155 insertions(+), 1 deletion(-)
 create mode 100644 drivers/virtio/virtio_dma_buf.c
 create mode 100644 include/linux/virtio_dma_buf.h

diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
index 29a1386ecc03..ecdae5b596de 100644
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o
+obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o virtio_dma_buf.o
 obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
 obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
 virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index a977e32a88f2..5d46f0ded92d 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -357,6 +357,12 @@ int register_virtio_device(struct virtio_device *dev)
 }
 EXPORT_SYMBOL_GPL(register_virtio_device);
 
+bool is_virtio_device(struct device *dev)
+{
+   return dev->bus == _bus;
+}
+EXPORT_SYMBOL_GPL(is_virtio_device);
+
 void unregister_virtio_device(struct virtio_device *dev)
 {
int index = dev->index; /* save for after device release */
diff --git a/drivers/virtio/virtio_dma_buf.c b/drivers/virtio/virtio_dma_buf.c
new file mode 100644
index ..23e3399b11ed
--- /dev/null
+++ b/drivers/virtio/virtio_dma_buf.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * dma-bufs for virtio exported objects
+ *
+ * Copyright (C) 2020 Google, Inc.
+ */
+
+#include 
+
+/**
+ * virtio_dma_buf_export - Creates a new dma-buf for a virtio exported object
+ *
+ * This wraps dma_buf_export() to allow virtio drivers to create a dma-buf
+ * for an virtio exported object that can be queried by other virtio drivers
+ * for the object's UUID.
+ */
+struct dma_buf *virtio_dma_buf_export(
+   const struct virtio_dma_buf_export_info *virtio_exp_info)
+{
+   struct dma_buf_export_info exp_info;
+
+   if (!virtio_exp_info->ops
+   || virtio_exp_info->ops->ops.attach != _dma_buf_attach
+   || !virtio_exp_info->ops->get_uuid) {
+   return ERR_PTR(-EINVAL);
+   }
+
+   exp_info.exp_name = virtio_exp_info->exp_name;
+   exp_info.owner = virtio_exp_info->owner;
+   exp_info.ops = _exp_info->ops->ops;
+   exp_info.size = virtio_exp_info->size;
+   exp_info.flags = virtio_exp_info->flags;
+   exp_info.resv = virtio_exp_info->resv;
+   exp_info.priv = virtio_exp_info->priv;
+   BUILD_BUG_ON(sizeof(struct virtio_dma_buf_export_info)
+!= sizeof(struct dma_buf_export_info));
+
+   return dma_buf_export(_info);
+}
+EXPORT_SYMBOL(virtio_dma_buf_export);
+
+/**
+ * virtio_dma_buf_attach - mandatory attach callback for virtio dma-bufs
+ */
+int virtio_dma_buf_attach(struct dma_buf *dma_buf,
+ struct dma_buf_attachment *attach)
+{
+   int ret;
+   const struct virtio_dma_buf_ops *ops = container_of(
+   dma_buf->ops, const struct virtio_dma_buf_ops, ops);
+
+   if (ops->device_attach) {
+   ret = ops->device_attach(dma_buf, attach);
+   if (ret)
+   return ret;
+   }
+   return 0;
+}
+EXPORT_SYMBOL(virtio_dma_buf_attach);
+
+/**
+ * is_virtio_dma_buf - returns true if the given dma-buf is a virtio dma-buf
+ * @dma_buf: buffer to query
+ */
+bool is_virtio_dma_buf(struct dma_buf *dma_buf)
+{
+   return dma_buf->ops->attach == _dma_buf_attach;
+}
+EXPORT_SYMBOL(is_virtio_dma_buf);
+
+/**
+ * virtio_dma_buf_get_uuid - gets the uuid of the virtio dma-buf's exported 
object
+ * @dma_buf: [in] buffer to query
+ * @uuid: [out] the uuid
+ *
+ * Returns: 0 on success, negative on failure.
+ */
+int virtio_dma_buf_get_uuid(struct dma_buf *dma_buf,
+   uuid_t *uuid)
+{
+   const struct virtio_dma_buf_ops *ops = container_of(
+   dma_buf->ops, const struct virtio_dma_buf_ops, ops);
+
+   if (!is_virtio_dma_buf(dma_buf))
+   return -EINVAL;
+
+   return ops->get_uuid(dma_buf, uuid);
+}
+EXPORT_SYMBOL(virtio_dma_buf_get_uuid);
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 15f906e4a748..9397e25616c4 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -128,6 +128,7 @@ static inline struct virtio_device *dev_to_virtio(struct 
device 

Re: [PATCH v3 4/4] drm/virtio: Support virtgpu exported resources

2020-05-15 Thread David Stevens
> > + if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_RESOURCE_UUID)) {
> > + vgdev->has_resource_assign_uuid = true;
> > + }
>
>
> Just a question: this relies on DMA bufs so I assume it is
> not really assumed to work when DMA API is bypassed, right?
> Rather than worry what does it mean, how about just
> disabling  this feature without PLATFORM_DMA for now?

By PLATFORM_DMA, do you mean CONFIG_DMA_SHARED_BUFFER? Virtio-gpu
depends on DRM, which selects that feature. So I think DMA bufs should
always be available when virtio-gpu is present.

-David


Re: [PATCH v3 1/4] dma-buf: add support for virtio exported objects

2020-05-14 Thread David Stevens
On Thu, May 14, 2020 at 9:30 PM Daniel Vetter  wrote:
> On Thu, May 14, 2020 at 05:19:40PM +0900, David Stevens wrote:
> > Sorry for the duplicate reply, didn't notice this until now.
> >
> > > Just storing
> > > the uuid should be doable (assuming this doesn't change during the
> > > lifetime of the buffer), so no need for a callback.
> >
> > Directly storing the uuid doesn't work that well because of
> > synchronization issues. The uuid needs to be shared between multiple
> > virtio devices with independent command streams, so to prevent races
> > between importing and exporting, the exporting driver can't share the
> > uuid with other drivers until it knows that the device has finished
> > registering the uuid. That requires a round trip to and then back from
> > the device. Using a callback allows the latency from that round trip
> > registration to be hidden.
>
> Uh, that means you actually do something and there's locking involved.
> Makes stuff more complicated, invariant attributes are a lot easier
> generally. Registering that uuid just always doesn't work, and blocking
> when you're exporting?

Registering the id at creation and blocking in gem export is feasible,
but it doesn't work well for systems with a centralized buffer
allocator that doesn't support batch allocations (e.g. gralloc). In
such a system, the round trip latency would almost certainly be
included in the buffer allocation time. At least on the system I'm
working on, I suspect that would add 10s of milliseconds of startup
latency to video pipelines (although I haven't benchmarked the
difference). Doing the blocking as late as possible means most or all
of the latency can be hidden behind other pipeline setup work.

In terms of complexity, I think the synchronization would be basically
the same in either approach, just in different locations. All it would
do is alleviate the need for a callback to fetch the UUID.

-David


Re: [PATCH v3 1/4] dma-buf: add support for virtio exported objects

2020-05-14 Thread David Stevens
Sorry for the duplicate reply, didn't notice this until now.

> Just storing
> the uuid should be doable (assuming this doesn't change during the
> lifetime of the buffer), so no need for a callback.

Directly storing the uuid doesn't work that well because of
synchronization issues. The uuid needs to be shared between multiple
virtio devices with independent command streams, so to prevent races
between importing and exporting, the exporting driver can't share the
uuid with other drivers until it knows that the device has finished
registering the uuid. That requires a round trip to and then back from
the device. Using a callback allows the latency from that round trip
registration to be hidden.

-David


Re: [PATCH v3 1/4] dma-buf: add support for virtio exported objects

2020-05-13 Thread David Stevens
On Thu, May 14, 2020 at 12:45 AM Daniel Vetter  wrote:
> On Wed, Mar 11, 2020 at 12:20 PM David Stevens  wrote:
> >
> > This change adds a new dma-buf operation that allows dma-bufs to be used
> > by virtio drivers to share exported objects. The new operation allows
> > the importing driver to query the exporting driver for the UUID which
> > identifies the underlying exported object.
> >
> > Signed-off-by: David Stevens 
>
> Adding Tomasz Figa, I've discussed this with him at elce last year I
> think. Just to make sure.
>
> Bunch of things:
> - obviously we need the users of this in a few drivers, can't really
> review anything stand-alone

Here is a link to the usage of this feature by the currently under
development virtio-video driver:
https://markmail.org/thread/j4xlqaaim266qpks

> - adding very specific ops to the generic interface is rather awkward,
> eventually everyone wants that and we end up in a mess. I think the
> best solution here would be if we create a struct virtio_dma_buf which
> subclasses dma-buf, add a (hopefully safe) runtime upcasting
> functions, and then a virtio_dma_buf_get_uuid() function. Just storing
> the uuid should be doable (assuming this doesn't change during the
> lifetime of the buffer), so no need for a callback.

So you would prefer a solution similar to the original version of this
patchset? https://markmail.org/message/z7if4u56q5fmaok4


Re: [2.6 patch] unexport icmpmsg_statistics

2007-10-24 Thread David Stevens
My bad -- I see what it's doing, and it looks ok after all.

I thought I saw an INMSGS (but didn't).  These are ICMP errors that
went through icmp_rcv() and were counted correctly before getting
to the protocol error handlers. These are failures due mostly to not
having enough, or the right protocol info in the error packet being
handled. I'm not sure I'd count those as ICMP errors, since the
ICMP header itself is correct, but ok...

SCTP doesn't look so bad, though I think the references are
still questionable (but debatable) as ICMP errors.

sctp_v4_err is incrementing ICMP_MIB_INERRORS if there
isn't enough IP header to find the ports, I see. I'm not sure
that counts as an ICMP error, but it's not so terrible.

It's doing the same thing if a lookup fails to match "vtag" from
the encapsulated error packet. Again, I don't know that those
are ICMP errors (which normally are something wrong with
the ICMP header).

So, I stand corrected, and sorry about the histrionics. Since
these are arguably ICMP errors, and since errors is the only
thing being MIB-counted in DCCP and SCTP, then it now looks
ok to me as-is, and also ok to remove icmpmsg_statistics from
exporting.

+-DLS

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [2.6 patch] unexport icmpmsg_statistics

2007-10-24 Thread David Stevens
I took a look at the DCCP references, and I think they're just
incrementing the wrong MIB variable -- e.g., it's incrementing
ICMP_MIB_INERRORS when the skb length is less than the
header indicates. That's not an ICMP_MIB_INERRORS error,
that's an IPSTATS_MIB_INHDRERRORS error. ICMP_MIB_INERRORS
is when you receive an ICMP error packet; an IP header error
is something else entirely.

That's followed by a failed lookup incrementing ICMP_MIB_INERRORS
which should be an unknown port error in the transport MIB (assuming
it has one-- it's not an ICMP error; could be an IP error, if the address
isn't local, rather than unknown port).

In SCTP, it appears to have similar problems. SCTP errors are not
ICMP errors, though it perhaps should be calling icmp_send() to
send one to the offending host for some of the cases.

I haven't seen any ICMP-relevant stats correctly referenced in
these yet.

I don't want to patch them directly, since I can't easily test them;
if someone who works with DCCP and SCTP would like to, I'd
be happy to review. Any volunteers?

+-DLS

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [2.6 patch] unexport icmpmsg_statistics

2007-10-24 Thread David Stevens
[EMAIL PROTECTED] wrote on 10/24/2007 12:14:37 PM:

> On Wed, Oct 24, 2007 at 12:07:45PM -0700, David Stevens wrote:
> > [EMAIL PROTECTED] wrote on 10/24/2007 09:24:10 AM:
> > 
> > > This patch removes the unused EXPORT_SYMBOL(icmpmsg_statistics).
> > > 
> > > Signed-off-by: Adrian Bunk <[EMAIL PROTECTED]>
> > > 
> > > ---
> > > 4ce74657ac0b1bdcb4c7bc359d05643f8cc4a08b 
> > > diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
> > > index 272c69e..233de06 100644
> > > --- a/net/ipv4/icmp.c
> > > +++ b/net/ipv4/icmp.c
> > > @@ -1104,5 +1104,4 @@ void __init icmp_init(struct net_proto_family 
> > *ops)
> > >  EXPORT_SYMBOL(icmp_err_convert);
> > >  EXPORT_SYMBOL(icmp_send);
> > >  EXPORT_SYMBOL(icmp_statistics);
> > > -EXPORT_SYMBOL(icmpmsg_statistics);
> > >  EXPORT_SYMBOL(xrlim_allow);
> > 
> > "icmpmsg_statistics" belongs with (and replaces some of the 
> > old...)
> > "icmp_statistics". I'm not sure that any modules use it, but I think 
you
> > should remove both or neither.
> 
> icmp_statistics is used by the dccp_ipv4 and sctp modules.

The only items left in icmp_statistics are "InMsgs, InErrs, 
OutMsgs, OutErrs",
so if dccp and sctp are sending or receiving any in or out ICMP messages, 
they
should be using the new macros (which reference icmpmsg_statistics, not
icmp_statistics) to count them.
I took a quick look at SCTP. I don't know if that's going through 
icmp_rcv()
or not; if so, I think it's double-counting; if not, then it isn't 
counting the
individual types (as it should), and it should have ICMPMSG macros doing 
that.

So, again, icmpmsg_statistics either should stay exported, or 
neither
icmpmsg_statistics nor icmp_statistics should be exported (depending on 
how
SCTP and DCCP code is resolved). It's incorrect in the current code to
incrememnt ICMP_MIB_INMSGS without incrementing one of the types too.

 +-DLS


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [2.6 patch] unexport icmpmsg_statistics

2007-10-24 Thread David Stevens
[EMAIL PROTECTED] wrote on 10/24/2007 09:24:10 AM:

> This patch removes the unused EXPORT_SYMBOL(icmpmsg_statistics).
> 
> Signed-off-by: Adrian Bunk <[EMAIL PROTECTED]>
> 
> ---
> 4ce74657ac0b1bdcb4c7bc359d05643f8cc4a08b 
> diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
> index 272c69e..233de06 100644
> --- a/net/ipv4/icmp.c
> +++ b/net/ipv4/icmp.c
> @@ -1104,5 +1104,4 @@ void __init icmp_init(struct net_proto_family 
*ops)
>  EXPORT_SYMBOL(icmp_err_convert);
>  EXPORT_SYMBOL(icmp_send);
>  EXPORT_SYMBOL(icmp_statistics);
> -EXPORT_SYMBOL(icmpmsg_statistics);
>  EXPORT_SYMBOL(xrlim_allow);

"icmpmsg_statistics" belongs with (and replaces some of the 
old...)
"icmp_statistics". I'm not sure that any modules use it, but I think you
should remove both or neither.

+-DLS


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [2.6 patch] unexport icmpmsg_statistics

2007-10-24 Thread David Stevens
[EMAIL PROTECTED] wrote on 10/24/2007 09:24:10 AM:

 This patch removes the unused EXPORT_SYMBOL(icmpmsg_statistics).
 
 Signed-off-by: Adrian Bunk [EMAIL PROTECTED]
 
 ---
 4ce74657ac0b1bdcb4c7bc359d05643f8cc4a08b 
 diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
 index 272c69e..233de06 100644
 --- a/net/ipv4/icmp.c
 +++ b/net/ipv4/icmp.c
 @@ -1104,5 +1104,4 @@ void __init icmp_init(struct net_proto_family 
*ops)
  EXPORT_SYMBOL(icmp_err_convert);
  EXPORT_SYMBOL(icmp_send);
  EXPORT_SYMBOL(icmp_statistics);
 -EXPORT_SYMBOL(icmpmsg_statistics);
  EXPORT_SYMBOL(xrlim_allow);

icmpmsg_statistics belongs with (and replaces some of the 
old...)
icmp_statistics. I'm not sure that any modules use it, but I think you
should remove both or neither.

+-DLS


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [2.6 patch] unexport icmpmsg_statistics

2007-10-24 Thread David Stevens
[EMAIL PROTECTED] wrote on 10/24/2007 12:14:37 PM:

 On Wed, Oct 24, 2007 at 12:07:45PM -0700, David Stevens wrote:
  [EMAIL PROTECTED] wrote on 10/24/2007 09:24:10 AM:
  
   This patch removes the unused EXPORT_SYMBOL(icmpmsg_statistics).
   
   Signed-off-by: Adrian Bunk [EMAIL PROTECTED]
   
   ---
   4ce74657ac0b1bdcb4c7bc359d05643f8cc4a08b 
   diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
   index 272c69e..233de06 100644
   --- a/net/ipv4/icmp.c
   +++ b/net/ipv4/icmp.c
   @@ -1104,5 +1104,4 @@ void __init icmp_init(struct net_proto_family 
  *ops)
EXPORT_SYMBOL(icmp_err_convert);
EXPORT_SYMBOL(icmp_send);
EXPORT_SYMBOL(icmp_statistics);
   -EXPORT_SYMBOL(icmpmsg_statistics);
EXPORT_SYMBOL(xrlim_allow);
  
  icmpmsg_statistics belongs with (and replaces some of the 
  old...)
  icmp_statistics. I'm not sure that any modules use it, but I think 
you
  should remove both or neither.
 
 icmp_statistics is used by the dccp_ipv4 and sctp modules.

The only items left in icmp_statistics are InMsgs, InErrs, 
OutMsgs, OutErrs,
so if dccp and sctp are sending or receiving any in or out ICMP messages, 
they
should be using the new macros (which reference icmpmsg_statistics, not
icmp_statistics) to count them.
I took a quick look at SCTP. I don't know if that's going through 
icmp_rcv()
or not; if so, I think it's double-counting; if not, then it isn't 
counting the
individual types (as it should), and it should have ICMPMSG macros doing 
that.

So, again, icmpmsg_statistics either should stay exported, or 
neither
icmpmsg_statistics nor icmp_statistics should be exported (depending on 
how
SCTP and DCCP code is resolved). It's incorrect in the current code to
incrememnt ICMP_MIB_INMSGS without incrementing one of the types too.

 +-DLS


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [2.6 patch] unexport icmpmsg_statistics

2007-10-24 Thread David Stevens
I took a look at the DCCP references, and I think they're just
incrementing the wrong MIB variable -- e.g., it's incrementing
ICMP_MIB_INERRORS when the skb length is less than the
header indicates. That's not an ICMP_MIB_INERRORS error,
that's an IPSTATS_MIB_INHDRERRORS error. ICMP_MIB_INERRORS
is when you receive an ICMP error packet; an IP header error
is something else entirely.

That's followed by a failed lookup incrementing ICMP_MIB_INERRORS
which should be an unknown port error in the transport MIB (assuming
it has one-- it's not an ICMP error; could be an IP error, if the address
isn't local, rather than unknown port).

In SCTP, it appears to have similar problems. SCTP errors are not
ICMP errors, though it perhaps should be calling icmp_send() to
send one to the offending host for some of the cases.

I haven't seen any ICMP-relevant stats correctly referenced in
these yet.

I don't want to patch them directly, since I can't easily test them;
if someone who works with DCCP and SCTP would like to, I'd
be happy to review. Any volunteers?

+-DLS

-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [2.6 patch] unexport icmpmsg_statistics

2007-10-24 Thread David Stevens
My bad -- I see what it's doing, and it looks ok after all.

I thought I saw an INMSGS (but didn't).  These are ICMP errors that
went through icmp_rcv() and were counted correctly before getting
to the protocol error handlers. These are failures due mostly to not
having enough, or the right protocol info in the error packet being
handled. I'm not sure I'd count those as ICMP errors, since the
ICMP header itself is correct, but ok...

SCTP doesn't look so bad, though I think the references are
still questionable (but debatable) as ICMP errors.

sctp_v4_err is incrementing ICMP_MIB_INERRORS if there
isn't enough IP header to find the ports, I see. I'm not sure
that counts as an ICMP error, but it's not so terrible.

It's doing the same thing if a lookup fails to match vtag from
the encapsulated error packet. Again, I don't know that those
are ICMP errors (which normally are something wrong with
the ICMP header).

So, I stand corrected, and sorry about the histrionics. Since
these are arguably ICMP errors, and since errors is the only
thing being MIB-counted in DCCP and SCTP, then it now looks
ok to me as-is, and also ok to remove icmpmsg_statistics from
exporting.

+-DLS

-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Fallback to ipv4 if we try to add join IPv4 multicast group via ipv4-mapped address.

2007-10-02 Thread David Stevens
Dmitry,
Good catch; a couple comments:

> struct ipv6_pinfo *np = inet6_sk(sk);
> int err;
> +   int addr_type = ipv6_addr_type(addr);
> +
> +   if (addr_type == IPV6_ADDR_MAPPED) {
> +  __be32 v4addr = addr->s6_addr32[3];
> +  struct ip_mreqn mreq;
> +  mreq.imr_multiaddr.s_addr = v4addr;
> +  mreq.imr_address.s_addr = INADDR_ANY;
> +  mreq.imr_ifindex = ifindex;
> +
> +  return ip_mc_join_group(sk, );
> +   }

ipv6_addr_type() returns a bitmask, so you should use:

if (addr_type & IPV6_ADDR_MAPPED) {

Also, you should have a blank line after the "mreq" declaration.

Ditto for both in ipv6_mc_sock_drop().

I don't expect the multicast source filtering interface will
behave well for mapped addresses, either. The mapped multicast
address won't appear to be a multicast address (and return
error there), and all the source filters would have to be
v4mapped addresses and modify the v4 source filters for this
to do as you expect. So, there's more to it (and it may be a
bit messy) to support mapped multicast addresses fully. I'll
think about that part some more.

+-DLS

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Fallback to ipv4 if we try to add join IPv4 multicast group via ipv4-mapped address.

2007-10-02 Thread David Stevens
Dmitry,
Good catch; a couple comments:

 struct ipv6_pinfo *np = inet6_sk(sk);
 int err;
 +   int addr_type = ipv6_addr_type(addr);
 +
 +   if (addr_type == IPV6_ADDR_MAPPED) {
 +  __be32 v4addr = addr-s6_addr32[3];
 +  struct ip_mreqn mreq;
 +  mreq.imr_multiaddr.s_addr = v4addr;
 +  mreq.imr_address.s_addr = INADDR_ANY;
 +  mreq.imr_ifindex = ifindex;
 +
 +  return ip_mc_join_group(sk, mreq);
 +   }

ipv6_addr_type() returns a bitmask, so you should use:

if (addr_type  IPV6_ADDR_MAPPED) {

Also, you should have a blank line after the mreq declaration.

Ditto for both in ipv6_mc_sock_drop().

I don't expect the multicast source filtering interface will
behave well for mapped addresses, either. The mapped multicast
address won't appear to be a multicast address (and return
error there), and all the source filters would have to be
v4mapped addresses and modify the v4 source filters for this
to do as you expect. So, there's more to it (and it may be a
bit messy) to support mapped multicast addresses fully. I'll
think about that part some more.

+-DLS

-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH RESEND] 2.6.22.6 networking [ipv4]: fix wrong destination when reply packetes

2007-09-20 Thread David Stevens
I'm not sure why it's using rt_src here, but there are relevant cases that
your description doesn't cover. For example, what happens if  the source
is not set in the original packet?  Does NAT affect this?

You quote RFC text for ICMP echo and the case where the receiving machine
is the final destination, but you're modifying code that is used for all 
ICMP
types and used for ICMP errors generated when acting as an intermediate
router.

In ordinary cases, and certainly with ICMP echo when the source is set in
the original packet and no rewriting is going on (and the address is not 
spoofed),
using the original source as the destination is fine. But have you tested 
or
considered the other cases?

+-DLS

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH RESEND] 2.6.22.6 networking [ipv4]: fix wrong destination when reply packetes

2007-09-20 Thread David Stevens
I'm not sure why it's using rt_src here, but there are relevant cases that
your description doesn't cover. For example, what happens if  the source
is not set in the original packet?  Does NAT affect this?

You quote RFC text for ICMP echo and the case where the receiving machine
is the final destination, but you're modifying code that is used for all 
ICMP
types and used for ICMP errors generated when acting as an intermediate
router.

In ordinary cases, and certainly with ICMP echo when the source is set in
the original packet and no rewriting is going on (and the address is not 
spoofed),
using the original source as the destination is fine. But have you tested 
or
considered the other cases?

+-DLS

-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: RFC: issues concerning the next NAPI interface

2007-08-24 Thread David Stevens
Stephen Hemminger <[EMAIL PROTECTED]> wrote on 08/24/2007 
08:52:03 AM:

> 
> You need hardware support for deferred interrupts. Most devices have it 
> (e1000, sky2, tg3)
> and it interacts well with NAPI. It is not a generic thing you want done 
by the stack,
> you want the hardware to hold off interrupts until X packets or Y usecs 
have expired.

For generic hardware that doesn't support it, couldn't you use an 
estimater
and adjust the timer dynamicly in software based on sampled values? Switch 
to per-packet
interrupts when the receive rate is low...
Actually, that's how I thought NAPI worked before I found out 
otherwise (ie,
before I looked :-)).

The hardware-accelerated one is essentially siloing as done by 
ancient serial
devices on UNIX systems. If you had a tunable for a target count, and an 
estimator
for the time interval, then switch to per-packet when the estimator 
exceeds a tunable
max threshold (and also, I suppose, if you near overflowing the ring on 
the min
timer granularity), you get almost all of it, right?
Problem is if it increases rapidly, you may drop packets before 
you notice
that the ring is full in the current estimated interval.

 +-DLS


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: RFC: issues concerning the next NAPI interface

2007-08-24 Thread David Stevens
Stephen Hemminger [EMAIL PROTECTED] wrote on 08/24/2007 
08:52:03 AM:

 
 You need hardware support for deferred interrupts. Most devices have it 
 (e1000, sky2, tg3)
 and it interacts well with NAPI. It is not a generic thing you want done 
by the stack,
 you want the hardware to hold off interrupts until X packets or Y usecs 
have expired.

For generic hardware that doesn't support it, couldn't you use an 
estimater
and adjust the timer dynamicly in software based on sampled values? Switch 
to per-packet
interrupts when the receive rate is low...
Actually, that's how I thought NAPI worked before I found out 
otherwise (ie,
before I looked :-)).

The hardware-accelerated one is essentially siloing as done by 
ancient serial
devices on UNIX systems. If you had a tunable for a target count, and an 
estimator
for the time interval, then switch to per-packet when the estimator 
exceeds a tunable
max threshold (and also, I suppose, if you near overflowing the ring on 
the min
timer granularity), you get almost all of it, right?
Problem is if it increases rapidly, you may drop packets before 
you notice
that the ring is full in the current estimated interval.

 +-DLS


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: kernel module to list sockets with their multicast group joins and respective processes

2007-07-16 Thread David Stevens
> sockets that join different groups receive messages from the respective 
> other group (if they are only bound to the wildcard address). Obviously 
> this is handled differently in Linux for IPv4, where the socket matching 

> for incoming message is done solely on the 4-tuple of addresses and 
ports, 
> and IPv6, where the explicit socket joins are taken into account [1,2]. 

These are not different in v4 vs v6. Group membership is 
per-interface,
delivery depends on the binding (for both v4 and v6), and this is the same
as BSD behavior since IP multicasting was invented.
There are two levels of checks, and I think you're confusing them.
inet6_mc_check() is checking that somebody (*anybody*) has joined the
group on the receiving interface, before delivering the packet to IP.
Delivery to a particular socket depends on source filter settings, if
they are in use, and bindings (only). The socket has a list of joined
addresses to track source filters and to check for correct join/leave
(a socket can't leave a group it hasn't joined), but that list does not
determine delivery of a multicast packet to the socket when source 
filtering
is not in use.
 +-DLS



-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: kernel module to list sockets with their multicast group joins and respective processes

2007-07-16 Thread David Stevens
 sockets that join different groups receive messages from the respective 
 other group (if they are only bound to the wildcard address). Obviously 
 this is handled differently in Linux for IPv4, where the socket matching 

 for incoming message is done solely on the 4-tuple of addresses and 
ports, 
 and IPv6, where the explicit socket joins are taken into account [1,2]. 

These are not different in v4 vs v6. Group membership is 
per-interface,
delivery depends on the binding (for both v4 and v6), and this is the same
as BSD behavior since IP multicasting was invented.
There are two levels of checks, and I think you're confusing them.
inet6_mc_check() is checking that somebody (*anybody*) has joined the
group on the receiving interface, before delivering the packet to IP.
Delivery to a particular socket depends on source filter settings, if
they are in use, and bindings (only). The socket has a list of joined
addresses to track source filters and to check for correct join/leave
(a socket can't leave a group it hasn't joined), but that list does not
determine delivery of a multicast packet to the socket when source 
filtering
is not in use.
 +-DLS



-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Scaling Max IP address limitation

2007-06-24 Thread David Stevens
> 
> I keep having hopeful dreams that one day netfilter will grow support 
> for cross-protocol NAT (IE: NAT a TCPv4 connection over TCPv6 to the 
> IPv6-only local web server, or vice versa).  It would seem that would 
> require a merged "xtables" program.

You don't actually need it (at least for easy cases like that),
because IPv6 defines IPv4 mapped IPv6 addresses of the form
:::a.b.c.d. These will generate IPv4 packets for a.b.c.d, from
a v6 socket.
Unless you're using v6only binding (a sysctl option), you can
connect to v6-only servers using a v4 network and a v4 address of the
server. The peer address on those connections will show up as a v4
mapped address, and all the traffic will be v4, but the socket layer
is all v6.

+-DLS


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Scaling Max IP address limitation

2007-06-24 Thread David Stevens
 Unrelated wishful thinking
 I keep having hopeful dreams that one day netfilter will grow support 
 for cross-protocol NAT (IE: NAT a TCPv4 connection over TCPv6 to the 
 IPv6-only local web server, or vice versa).  It would seem that would 
 require a merged xtables program.

You don't actually need it (at least for easy cases like that),
because IPv6 defines IPv4 mapped IPv6 addresses of the form
:::a.b.c.d. These will generate IPv4 packets for a.b.c.d, from
a v6 socket.
Unless you're using v6only binding (a sysctl option), you can
connect to v6-only servers using a v4 network and a v4 address of the
server. The peer address on those connections will show up as a v4
mapped address, and all the traffic will be v4, but the socket layer
is all v6.

+-DLS


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.19.2 regression introduced by "IPV4/IPV6: Fix inet{,6} device initialization order."

2007-01-14 Thread David Stevens
I expect this is the failure to join the all-nodes multicast group,
in which case the fix has already been posted to netdev. I
believe the router advertisements are sent to that, and if the
join failed, it wouldn't receive any of them.

I think it's better to add the fix than withdraw this patch, since
the original bug is a crash.

Details:
The IPv6 code passes the "dev" entry to the multicast group
incrementer and uses it to dereference to get the in6_dev.
IPv4, by contrast, passes the in_dev directly to its equivalent
functions.

IPv6 joins the required "all-nodes" multicast group in the
multicast device initialization function, which due to the fix
won't have a dev entry at that time. The patch posted by
Yoshifuji Hideaki moves the all-nodes join until after the
ip6_ptr is added to the dev.

+-DLS

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.19.2 regression introduced by IPV4/IPV6: Fix inet{,6} device initialization order.

2007-01-14 Thread David Stevens
I expect this is the failure to join the all-nodes multicast group,
in which case the fix has already been posted to netdev. I
believe the router advertisements are sent to that, and if the
join failed, it wouldn't receive any of them.

I think it's better to add the fix than withdraw this patch, since
the original bug is a crash.

Details:
The IPv6 code passes the dev entry to the multicast group
incrementer and uses it to dereference to get the in6_dev.
IPv4, by contrast, passes the in_dev directly to its equivalent
functions.

IPv6 joins the required all-nodes multicast group in the
multicast device initialization function, which due to the fix
won't have a dev entry at that time. The patch posted by
Yoshifuji Hideaki moves the all-nodes join until after the
ip6_ptr is added to the dev.

+-DLS

-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/