On 2/11/2015 12:57 PM, Tetsuya Mukawa wrote: > On 2015/02/11 13:53, Tetsuya Mukawa wrote: >> On 2015/02/11 12:27, Qiu, Michael wrote: >>> On 2/10/2015 11:11 PM, Iremonger, Bernard wrote: >>>>> -----Original Message----- >>>>> From: Qiu, Michael >>>>> Sent: Monday, February 9, 2015 1:10 PM >>>>> To: Tetsuya Mukawa; dev at dpdk.org >>>>> Cc: Iremonger, Bernard >>>>> Subject: Re: [PATCH v7 04/14] eal/pci: Consolidate pci address comparison >>>>> APIs >>>>> >>>>> On 2/9/2015 4:31 PM, Tetsuya Mukawa wrote: >>>>>> This patch replaces pci_addr_comparison() and memcmp() of pci >>>>>> addresses by eal_compare_pci_addr(). >>>>>> >>>>>> v5: >>>>>> - Fix pci_scan_one to handle pt_driver correctly. >>>>>> v4: >>>>>> - Fix calculation method of eal_compare_pci_addr(). >>>>>> - Add parameter checking. >>>>>> >>>>>> Signed-off-by: Tetsuya Mukawa <mukawa at igel.co.jp> >>>>>> --- >>>>>> lib/librte_eal/bsdapp/eal/eal_pci.c | 25 ++++++++--------------- >>>>>> lib/librte_eal/common/eal_common_pci.c | 2 +- >>>>>> lib/librte_eal/common/include/rte_pci.h | 34 >>>>>> +++++++++++++++++++++++++++++++ >>>>>> lib/librte_eal/linuxapp/eal/eal_pci.c | 25 ++++++++--------------- >>>>>> lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 2 +- >>>>>> 5 files changed, 54 insertions(+), 34 deletions(-) >>>>>> >>>>>> diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c >>>>>> b/lib/librte_eal/bsdapp/eal/eal_pci.c >>>>>> index 74ecce7..c844d58 100644 >>>>>> --- a/lib/librte_eal/bsdapp/eal/eal_pci.c >>>>>> +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c >>>>>> @@ -270,20 +270,6 @@ pci_uio_map_resource(struct rte_pci_device *dev) >>>>>> return (0); >>>>>> } >>>>>> >>>>>> -/* Compare two PCI device addresses. */ -static int >>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr >>>>>> *addr2) -{ >>>>>> - uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + >>>>>> (addr->devid << 8) + addr- >>>>>> function; >>>>>> - uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) >>>>>> + (addr2->devid << 8) + >>>>> addr2->function; >>>>>> - >>>>>> - if (dev_addr > dev_addr2) >>>>>> - return 1; >>>>>> - else >>>>>> - return 0; >>>>>> -} >>>>>> - >>>>>> - >>>>>> /* Scan one pci sysfs entry, and fill the devices list from it. */ >>>>>> static int pci_scan_one(int dev_pci_fd, struct pci_conf *conf) @@ >>>>>> -356,13 +342,20 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf) >>>>>> } >>>>>> else { >>>>>> struct rte_pci_device *dev2 = NULL; >>>>>> + int ret; >>>>>> >>>>>> TAILQ_FOREACH(dev2, &pci_device_list, next) { >>>>>> - if (pci_addr_comparison(&dev->addr, >>>>>> &dev2->addr)) >>>>>> + ret = eal_compare_pci_addr(&dev->addr, >>>>>> &dev2->addr); >>>>>> + if (ret > 0) >>>>>> continue; >>>>>> - else { >>>>>> + else if (ret < 0) { >>>>>> TAILQ_INSERT_BEFORE(dev2, dev, next); >>>>>> return 0; >>>>>> + } else { /* already registered */ >>>>>> + /* update pt_driver */ >>>>>> + dev2->pt_driver = dev->pt_driver; >>>>>> + free(dev); >>>>>> + return 0; >>>>>> } >>>>>> } >>>>>> TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff >>>>>> --git >>>>>> a/lib/librte_eal/common/eal_common_pci.c >>>>>> b/lib/librte_eal/common/eal_common_pci.c >>>>>> index f3c7f71..a89f5c3 100644 >>>>>> --- a/lib/librte_eal/common/eal_common_pci.c >>>>>> +++ b/lib/librte_eal/common/eal_common_pci.c >>>>>> @@ -93,7 +93,7 @@ static struct rte_devargs *pci_devargs_lookup(struct >>>>>> rte_pci_device *dev) >>>>>> if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI && >>>>>> devargs->type != RTE_DEVTYPE_WHITELISTED_PCI) >>>>>> continue; >>>>>> - if (!memcmp(&dev->addr, &devargs->pci.addr, >>>>>> sizeof(dev->addr))) >>>>>> + if (!eal_compare_pci_addr(&dev->addr, >>>>>> &devargs->pci.addr)) >>>>>> return devargs; >>>>>> } >>>>>> return NULL; >>>>>> diff --git a/lib/librte_eal/common/include/rte_pci.h >>>>>> b/lib/librte_eal/common/include/rte_pci.h >>>>>> index 7f2d699..4814cd7 100644 >>>>>> --- a/lib/librte_eal/common/include/rte_pci.h >>>>>> +++ b/lib/librte_eal/common/include/rte_pci.h >>>>>> @@ -269,6 +269,40 @@ eal_parse_pci_DomBDF(const char *input, struct >>>>>> rte_pci_addr *dev_addr) } #undef GET_PCIADDR_FIELD >>>>>> >>>>>> +/* Compare two PCI device addresses. */ >>>>>> +/** >>>>>> + * Utility function to compare two PCI device addresses. >>>>>> + * >>>>>> + * @param addr >>>>>> + * The PCI Bus-Device-Function address to compare >>>>>> + * @param addr2 >>>>>> + * The PCI Bus-Device-Function address to compare >>>>>> + * @return >>>>>> + * 0 on equal PCI address. >>>>>> + * Positive on addr is greater than addr2. >>>>>> + * Negative on addr is less than addr2, or error. >>>>>> + */ >>>>>> +static inline int >>>>>> +eal_compare_pci_addr(struct rte_pci_addr *addr, struct rte_pci_addr >>>>>> +*addr2) { >>>>>> + uint64_t dev_addr, dev_addr2; >>>>>> + >>>>>> + if ((addr == NULL) || (addr2 == NULL)) >>>>>> + return -1; >>>>>> + >>>>>> + dev_addr = (addr->domain << 24) | (addr->bus << 16) | >>>>>> + (addr->devid << 8) | addr->function; >>>>>> + dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) | >>>>>> + (addr2->devid << 8) | addr2->function; >>>>>> + >>>>>> + if (dev_addr > dev_addr2) >>>>>> + return 1; >>>>>> + else if (dev_addr < dev_addr2) >>>>>> + return -1; >>>>>> + else >>>>>> + return 0; >>>>>> +} >>>>>> + >>>>>> /** >>>>>> * Probe the PCI bus for registered drivers. >>>>>> * >>>>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c >>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci.c >>>>>> index c0ca5a5..d847102 100644 >>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c >>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c >>>>>> @@ -229,20 +229,6 @@ error: >>>>>> return -1; >>>>>> } >>>>>> >>>>>> -/* Compare two PCI device addresses. */ -static int >>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr >>>>>> *addr2) -{ >>>>>> - uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + >>>>>> (addr->devid << 8) + addr- >>>>>> function; >>>>>> - uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) >>>>>> + (addr2->devid << 8) + >>>>> addr2->function; >>>>>> - >>>>>> - if (dev_addr > dev_addr2) >>>>>> - return 1; >>>>>> - else >>>>>> - return 0; >>>>>> -} >>>>>> - >>>>>> - >>>>>> /* Scan one pci sysfs entry, and fill the devices list from it. */ >>>>>> static int pci_scan_one(const char *dirname, uint16_t domain, uint8_t >>>>>> bus, @@ -353,13 +339,20 @@ pci_scan_one(const char *dirname, uint16_t >>>>>> domain, uint8_t bus, >>>>>> } >>>>>> else { >>>>>> struct rte_pci_device *dev2 = NULL; >>>>>> + int ret; >>>>>> >>>>>> TAILQ_FOREACH(dev2, &pci_device_list, next) { >>>>>> - if (pci_addr_comparison(&dev->addr, >>>>>> &dev2->addr)) >>>>>> + ret = eal_compare_pci_addr(&dev->addr, >>>>>> &dev2->addr); >>>>>> + if (ret > 0) >>>>>> continue; >>>>>> - else { >>>>>> + else if (ret < 0) { >>>>>> TAILQ_INSERT_BEFORE(dev2, dev, next); >>>>>> return 0; >>>>>> + } else { /* already registered */ >>>>>> + /* update pt_driver */ >>>>>> + dev2->pt_driver = dev->pt_driver; >>>> Hi Tetsuya, >>>> >>>> I am seeing a problem with the librte_pmd_ixgbe code where dev->max_vfs is >>>> being lost in some scenarios. >>>> The following line should be added here: >>>> dev2->max_vfs = dev->max_vfs; >>>> >>>> numa_mode should probably be updated too (although it is not causing a >>>> problem at present). >>>> dev2->numa_mode = dev->numa_mode; >>> I'm very curious, why those field miss? I haven't see any places clear >>> this field. >>> >>> What is the root cause? >> Hi Michael, >> >> Here is my guess. >> The above function creates pci device list. > I am sorry. I forgot to add below information. > > "max_vfs" or "numa_node" value is came from sysfs when the above > function is processed.
Yes, but it has already been registered, why it missed? Thanks, Michael >> And current DPDK implementation assumes all devices needed to be managed >> are under igb_uio or vfio when above code is processed. >> To add hotplug function, we also need to think some devices will start >> to be managed under igb_uio or vfio after initializing pci device list. >> Anyway, I guess "max_vfs" value will be changed when igb_uio or vfio >> manages the device. >> >> Hi Bernard, >> >> Could you please check "max_vfs" and "num_node" values, then check the >> values again after the device is managed by igb_uio or vfio? >> In my environment, it seems max_vfs is created by igb_uio. >> But my NIC doesn't have VF, so behavior might be different in your >> environment. >> I guess "numa_node" should not be changed theoretically. >> >> If my guess is correct, how about replacing following values? >> - driver >> - max_vfs >> - resource >> - (numa_node) >> Except for above value, I guess other value shouldn't be changed even >> after the device is managed by igb_uio or vfio. >> >> Thanks, >> Tetsuya >> >>> Thanks, >>> Michael >>> >>>> Regards, >>>> >>>> Bernard. >>>> >>>> >>>> >>>> >>>>>> + free(dev); >>>>>> + return 0; >>>>>> } >>>>>> } >>>>>> TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff >>>>>> --git >>>>>> a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c >>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c >>>>>> index e53f06b..1da3507 100644 >>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c >>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c >>>>>> @@ -123,7 +123,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev) >>>>>> TAILQ_FOREACH(uio_res, pci_res_list, next) { >>>>>> >>>>>> /* skip this element if it doesn't match our PCI >>>>>> address */ >>>>>> - if (memcmp(&uio_res->pci_addr, &dev->addr, >>>>>> sizeof(dev->addr))) >>>>>> + if (eal_compare_pci_addr(&uio_res->pci_addr, >>>>>> &dev->addr)) >>>>>> continue; >>>>>> >>>>>> for (i = 0; i != uio_res->nb_maps; i++) { >>>>> Acked-by: Michael Qiu <michael.qiu at intel.com> >