Hey Fotis,

Thank you for the patches. I have applied the 1st one from the series. I 
have some questions and suggestions regarding this one though. Please see 
them below.


On Sunday, March 8, 2020 at 10:44:52 AM UTC-4, Fotis Xenakis wrote:
>
> The latest virtio spec adds shared memory regions: 
> "Shared memory regions are an additional facility available to devices 
> that need a region of memory that’s continuously shared between the 
> device and the driver, rather than passed between them in the way 
> virtqueue elements are." 
>
> In virtio over PCI, these are enumerated as a sequence of 
> VIRTIO_PCI_CAP_SHARED_MEMORY_CFG capabilities, one per region. 
>
> This patch extends the virtio over PCI implementation to discover all 
> such regions provided by a device. 
>
> Signed-off-by: Fotis Xenakis <fo...@windowslive.com <javascript:>> 
> --- 
>  drivers/virtio-pci-device.cc | 78 +++++++++++++++++++++++++++++------- 
>  drivers/virtio-pci-device.hh | 44 +++++++++++++------- 
>  2 files changed, 93 insertions(+), 29 deletions(-) 
>
> diff --git a/drivers/virtio-pci-device.cc b/drivers/virtio-pci-device.cc 
> index c107ddd7..8e02c1c1 100644 
> --- a/drivers/virtio-pci-device.cc 
> +++ b/drivers/virtio-pci-device.cc 
> @@ -269,37 +269,87 @@ bool virtio_modern_pci_device::parse_pci_config() 
>          return false; 
>      } 
>   
> -    parse_virtio_capability(_common_cfg, VIRTIO_PCI_CAP_COMMON_CFG); 
> -    parse_virtio_capability(_isr_cfg, VIRTIO_PCI_CAP_ISR_CFG); 
> -    parse_virtio_capability(_notify_cfg, VIRTIO_PCI_CAP_NOTIFY_CFG); 
> -    parse_virtio_capability(_device_cfg, VIRTIO_PCI_CAP_DEVICE_CFG); 
> +    parse_virtio_capability(_common_cfg, VIRTIO_PCI_CAP_COMMON_CFG, 
> nullptr); 
> +    parse_virtio_capability(_isr_cfg, VIRTIO_PCI_CAP_ISR_CFG, nullptr); 
> +    parse_virtio_capability(_notify_cfg, VIRTIO_PCI_CAP_NOTIFY_CFG, 
> nullptr); 
> +    parse_virtio_capability(_device_cfg, VIRTIO_PCI_CAP_DEVICE_CFG, 
> nullptr); 
> +    parse_virtio_capabilities(_shm_cfgs, 
> VIRTIO_PCI_CAP_SHARED_MEMORY_CFG); 
>   
>      if (_notify_cfg) { 
> -        _notify_offset_multiplier 
> =_dev->pci_readl(_notify_cfg->get_cfg_offset() + 
> +        _notify_offset_multiplier = 
> _dev->pci_readl(_notify_cfg->get_cfg_offset() + 
>                  offsetof(virtio_pci_notify_cap, 
> notify_offset_multiplier)); 
>      } 
>   
>      return _common_cfg && _isr_cfg && _notify_cfg && _device_cfg; 
>  } 
>   
> -void 
> virtio_modern_pci_device::parse_virtio_capability(std::unique_ptr<virtio_capability>
>  
> &ptr, u8 type) 
> -{ 
> -    u8 cfg_offset = _dev->find_capability(pci::function::PCI_CAP_VENDOR, 
> [type] (pci::function *fun, u8 offset) { 
> -        u8 cfg_type = fun->pci_readb(offset + offsetof(struct 
> virtio_pci_cap, cfg_type)); 
> -        return type == cfg_type; 
> +// Parse all virtio PCI capabilities whose types match @type and append 
> them to 
> +// @caps. 
> +// From the spec: "The device MAY offer more than one structure of any 
> type - 
> +// this makes it possible for the device to expose multiple interfaces to 
> +// drivers. The order of the capabilities in the capability list 
> specifies the 
> +// order of preference suggested by the device. A device may specify that 
> this 
> +// ordering mechanism be overridden by the use of the id field." 
> +void virtio_modern_pci_device::parse_virtio_capabilities( 
> +    std::vector<std::unique_ptr<virtio_capability>>& caps, u8 type) 
> +{ 
> +    std::vector<u8> past_ids; 
> +    auto pred = [&past_ids] (u8 id) { 
> +        return std::none_of(past_ids.cbegin(), past_ids.cend(), [id] (u8 
> past_id) { 
> +            return past_id == id; 
> +        }); 
> +    }; 
> + 
> +    while (true) { 
> +        std::unique_ptr<virtio_capability> ptr; 
> +        // Search for a matching capability with an id not already in 
> past_ids 
> +        parse_virtio_capability(ptr, type, pred); 
> +        if (!ptr) { 
> +            return; 
> +        } 
> +        u8 id = _dev->pci_readb(ptr->get_cfg_offset() + 
> offsetof(virtio_pci_cap, id)); 
> +        past_ids.push_back(id); 
> +        caps.emplace_back(ptr.release()); 
> +    } 
> +} 
>
So the idea with the implementation of parse_virtio_capabilities() above is 
really to collect all capabilities of the given type (in our case so far 
VIRTIO_PCI_CAP_SHARED_MEMORY_CFG)? I wonder if the code can be simplified 
if we add a method function::find_capabilities(u8 cap_id) in 
pci-function.cc that will simply return a collection of matching offsets?

The general idea - we do not have to do it in this patch. I noticed we 
re-read same data in u8 function::find_capability() over and over (and each 
read is an exit I think) and that is what cause the pci initialization 
slower especially for modern devices (I see 10ms difference in boot time 
between running the same program with '--virtio modern' and '--virtio 
legacy' (default). Maybe some easy caching - again let us not worry now.

> + 
> +// Parse a single virtio PCI capability, whose type must match @type and 
> id must 
> +// satisfy @id_pred. 
> +void 
> virtio_modern_pci_device::parse_virtio_capability(std::unique_ptr<virtio_capability>&
>  
> ptr, u8 type, 
> +    std::function<bool (u8)> id_pred) 
> +{ 
> +    u8 cfg_offset = _dev->find_capability(pci::function::PCI_CAP_VENDOR, 
> +        [type, id_pred] (pci::function *fun, u8 offset) { 
> +            u8 cfg_type = fun->pci_readb(offset + 
> offsetof(virtio_pci_cap, cfg_type)); 
> +            if (cfg_type != type) { 
> +                return false; 
> +            } 
> +            u8 id = fun->pci_readb(offset + offsetof(virtio_pci_cap, 
> id)); 
> +            if (id_pred && !id_pred(id)) { 
> +                return false; 
> +            } 
> +            return true; 
>
Easy optimization suggestion (given that I think each pci_readb is an exit 
to hypervisor) - maybe first check if id_pred not null and only then do 
pci_readb() and call id_pred. Most parse_virtio_capability() calls pass 
id_pred as null, right?

>      }); 
>   
>      if (cfg_offset != 0xFF) { 
> -        u8 bar_index = _dev->pci_readb(cfg_offset + offsetof(struct 
> virtio_pci_cap, bar)); 
> -        u32 offset = _dev->pci_readl(cfg_offset + offsetof(struct 
> virtio_pci_cap, offset)); 
> -        u32 length = _dev->pci_readl(cfg_offset + offsetof(struct 
> virtio_pci_cap, length)); 
> - 
> +        u8 bar_index = _dev->pci_readb(cfg_offset + 
> offsetof(virtio_pci_cap, bar)); 
>          auto bar_no = bar_index + 1; 
>          auto bar = _dev->get_bar(bar_no); 
>          if (bar && bar->is_mmio() && !bar->is_mapped()) { 
>              bar->map(); 
>          } 
>   
> +        u64 offset = _dev->pci_readl(cfg_offset + 
> offsetof(virtio_pci_cap, offset)); 
> +        u64 length = _dev->pci_readl(cfg_offset + 
> offsetof(virtio_pci_cap, length)); 
> +        if (type == VIRTIO_PCI_CAP_SHARED_MEMORY_CFG) { 
> +            // The shared memory region capability is defined by a struct 
> +            // virtio_pci_cap64 
> +            u32 offset_hi = _dev->pci_readl(cfg_offset + 
> offsetof(virtio_pci_cap64, offset_hi)); 
> +            u32 length_hi = _dev->pci_readl(cfg_offset + 
> offsetof(virtio_pci_cap64, length_hi)); 
> +            offset |= ((u64)offset_hi << 32); 
> +            length |= ((u64)length_hi << 32); 
> +        } 
> + 
>          ptr.reset(new 
> virtio_modern_pci_device::virtio_capability(cfg_offset, bar, bar_no, 
> offset, length)); 
>      } 
>  } 
> diff --git a/drivers/virtio-pci-device.hh b/drivers/virtio-pci-device.hh 
> index 5a891d93..9a32aa18 100644 
> --- a/drivers/virtio-pci-device.hh 
> +++ b/drivers/virtio-pci-device.hh 
> @@ -98,7 +98,7 @@ public: 
>      ~virtio_legacy_pci_device() {} 
>   
>      virtual const char *get_version() { return "legacy"; } 
> -    virtual u16 get_type_id() { return _dev->get_subsystem_id(); }; 
> +    virtual u16 get_type_id() { return _dev->get_subsystem_id(); } 
>   
>      virtual void select_queue(int queue); 
>      virtual u16 get_queue_size(); 
> @@ -115,7 +115,7 @@ public: 
>      virtual u8 read_config(u32 offset); 
>      virtual u8 read_and_ack_isr(); 
>   
> -    virtual bool is_modern() { return false; }; 
> +    virtual bool is_modern() { return false; } 
>  protected: 
>      virtual bool parse_pci_config(); 
>   
> @@ -145,6 +145,8 @@ enum VIRTIO_MODERN_PCI_CONFIG { 
>      VIRTIO_PCI_CAP_DEVICE_CFG = 4, 
>      /* PCI configuration access */ 
>      VIRTIO_PCI_CAP_PCI_CFG = 5, 
> +    /* Shared memory region */ 
> +    VIRTIO_PCI_CAP_SHARED_MEMORY_CFG = 8, 
>  }; 
>   
>  /* This is the PCI capability header: */ 
> @@ -154,11 +156,20 @@ struct virtio_pci_cap { 
>      u8 cap_len;     /* Generic PCI field: capability length */ 
>      u8 cfg_type;    /* Identifies the structure. */ 
>      u8 bar;         /* Where to find it. */ 
> -    u8 padding[3];  /* Pad to full dword. */ 
> +    u8 id;          /* Multiple capabilities of the same type */ 
> +    u8 padding[2];  /* Pad to full dword. */ 
>      u32 offset;     /* Offset within bar. */ 
>      u32 length;     /* Length of the structure, in bytes. */ 
>  }; 
>   
> +/* A variant of virtio_pci_cap, for capabilities that require offsets or 
> lengths 
> + * larger than 4GiB */ 
> +struct virtio_pci_cap64 { 
> +    struct virtio_pci_cap cap; 
> +    u32 offset_hi; 
> +    u32 length_hi; 
> +}; 
> + 
>  /* The notification location is found using the VIRTIO_PCI_CAP_NOTIFY_CFG 
> capability. 
>   * This capability is immediately followed by an additional field, like 
> so:*/ 
>  struct virtio_pci_notify_cap { 
> @@ -198,7 +209,7 @@ struct virtio_pci_common_cfg { 
>  class virtio_modern_pci_device : public virtio_pci_device { 
>  public: 
>      struct virtio_capability { 
> -        virtio_capability(u32 cfg_offset, pci::bar* bar, u32 bar_no, u32 
> bar_offset, u32 length) : 
> +        virtio_capability(u32 cfg_offset, pci::bar* bar, u32 bar_no, u64 
> bar_offset, u64 length) : 
>              _cfg_offset(cfg_offset), 
>              _bar(bar), 
>              _bar_no(bar_no), 
> @@ -207,27 +218,27 @@ public: 
>              assert(_length > 0 && _bar_offset >= 0 && _bar_offset + 
> _length <= _bar->get_size()); 
>          } 
>   
> -        u8 virtio_conf_readb(u32 offset) { 
> +        u8 virtio_conf_readb(u64 offset) { 
>              verify_offset(offset, sizeof(u8)); 
>              return _bar->readb(_bar_offset + offset); 
>          }; 
> -        u16 virtio_conf_readw(u32 offset) { 
> +        u16 virtio_conf_readw(u64 offset) { 
>              verify_offset(offset, sizeof(u16)); 
>              return _bar->readw(_bar_offset + offset); 
>          }; 
> -        u32 virtio_conf_readl(u32 offset) { 
> +        u32 virtio_conf_readl(u64 offset) { 
>              verify_offset(offset, sizeof(u32)); 
>              return _bar->readl(_bar_offset + offset); 
>          }; 
> -        void virtio_conf_writeb(u32 offset, u8 val) { 
> +        void virtio_conf_writeb(u64 offset, u8 val) { 
>              verify_offset(offset, sizeof(u8)); 
>              _bar->writeb(_bar_offset + offset, val); 
>          }; 
> -        void virtio_conf_writew(u32 offset, u16 val) { 
> +        void virtio_conf_writew(u64 offset, u16 val) { 
>              verify_offset(offset, sizeof(u16)); 
>              _bar->writew(_bar_offset + offset, val); 
>          }; 
> -        void virtio_conf_writel(u32 offset, u32 val) { 
> +        void virtio_conf_writel(u64 offset, u32 val) { 
>              verify_offset(offset, sizeof(u32)); 
>              _bar->writel(_bar_offset + offset, val); 
>          }; 
> @@ -237,15 +248,15 @@ public: 
>              virtio_d("%s bar=%d, offset=%x, size=%x", prefix, _bar_no, 
> _bar_offset, _length); 
>          } 
>      private: 
> -        inline void verify_offset(u32 offset, u32 size) { 
> +        inline void verify_offset(u64 offset, u32 size) { 
>              assert(offset >= 0 && offset + size <= _length); 
>          } 
>   
>          u32 _cfg_offset; 
>          pci::bar* _bar; 
>          u32 _bar_no; 
> -        u32 _bar_offset; 
> -        u32 _length; 
> +        u64 _bar_offset; 
> +        u64 _length; 
>      }; 
>   
>      explicit virtio_modern_pci_device(pci::device *dev); 
> @@ -276,12 +287,15 @@ public: 
>  protected: 
>      virtual bool parse_pci_config(); 
>  private: 
> -    void parse_virtio_capability(std::unique_ptr<virtio_capability> &ptr, 
> u8 type); 
> +    void 
> parse_virtio_capabilities(std::vector<std::unique_ptr<virtio_capability>>& 
> caps, u8 type); 
> +    void parse_virtio_capability(std::unique_ptr<virtio_capability> &ptr, 
> u8 type, 
> +        std::function<bool (u8)> id_pred); 
>   
>      std::unique_ptr<virtio_capability> _common_cfg; 
>      std::unique_ptr<virtio_capability> _isr_cfg; 
>      std::unique_ptr<virtio_capability> _notify_cfg; 
>      std::unique_ptr<virtio_capability> _device_cfg; 
> +    std::vector<std::unique_ptr<virtio_capability>> _shm_cfgs; 
>   
>      u32 _notify_offset_multiplier; 
>      u32 _queues_notify_offsets[64]; 
> @@ -293,4 +307,4 @@ virtio_device* create_virtio_pci_device(pci::device 
> *dev); 
>   
>  } 
>   
> -#endif //VIRTIO_PCI_DEVICE_HH 
> \ No newline at end of file 
> +#endif //VIRTIO_PCI_DEVICE_HH 
> -- 
> 2.25.1 
>
>

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/76837ad1-7e3b-433f-827f-e8c2ad81cafb%40googlegroups.com.

Reply via email to