Re: [PATCH 1/2] eventfd: simplify eventfd_signal()
ntfd_ctx_fileget(struct file *file); > -__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); > +__u64 eventfd_signal(struct eventfd_ctx *ctx); > __u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, __poll_t mask); > int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, > wait_queue_entry_t *wait, > __u64 *cnt); > @@ -58,7 +58,7 @@ static inline struct eventfd_ctx *eventfd_ctx_fdget(int fd) > return ERR_PTR(-ENOSYS); > } > > -static inline int eventfd_signal(struct eventfd_ctx *ctx, __u64 n) > +static inline int eventfd_signal(struct eventfd_ctx *ctx) > { > return -ENOSYS; > } > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index e8ca4bdcb03c..891550f575a1 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -4228,7 +4228,7 @@ static void __mem_cgroup_threshold(struct mem_cgroup > *memcg, bool swap) > * only one element of the array here. > */ > for (; i >= 0 && unlikely(t->entries[i].threshold > usage); i--) > - eventfd_signal(t->entries[i].eventfd, 1); > + eventfd_signal(t->entries[i].eventfd); > > /* i = current_threshold + 1 */ > i++; > @@ -4240,7 +4240,7 @@ static void __mem_cgroup_threshold(struct mem_cgroup > *memcg, bool swap) > * only one element of the array here. > */ > for (; i < t->size && unlikely(t->entries[i].threshold <= usage); i++) > - eventfd_signal(t->entries[i].eventfd, 1); > + eventfd_signal(t->entries[i].eventfd); > > /* Update current_threshold */ > t->current_threshold = i - 1; > @@ -4280,7 +4280,7 @@ static int mem_cgroup_oom_notify_cb(struct mem_cgroup > *memcg) > spin_lock(_oom_lock); > > list_for_each_entry(ev, >oom_notify, list) > - eventfd_signal(ev->eventfd, 1); > + eventfd_signal(ev->eventfd); > > spin_unlock(_oom_lock); > return 0; > @@ -4499,7 +4499,7 @@ static int mem_cgroup_oom_register_event(struct > mem_cgroup *memcg, > > /* already in OOM ? */ > if (memcg->under_oom) > - eventfd_signal(eventfd, 1); > + eventfd_signal(eventfd); > spin_unlock(_oom_lock); > > return 0; > @@ -4791,7 +4791,7 @@ static void memcg_event_remove(struct work_struct *work) > event->unregister_event(memcg, event->eventfd); > > /* Notify userspace the event is going away. */ > - eventfd_signal(event->eventfd, 1); > + eventfd_signal(event->eventfd); > > eventfd_ctx_put(event->eventfd); > kfree(event); > diff --git a/mm/vmpressure.c b/mm/vmpressure.c > index b52644771cc4..ba4cdef37e42 100644 > --- a/mm/vmpressure.c > +++ b/mm/vmpressure.c > @@ -169,7 +169,7 @@ static bool vmpressure_event(struct vmpressure *vmpr, > continue; > if (level < ev->level) > continue; > - eventfd_signal(ev->efd, 1); > + eventfd_signal(ev->efd); > ret = true; > } > mutex_unlock(>events_lock); > diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c > index a60801fb8660..5edcf8d738de 100644 > --- a/samples/vfio-mdev/mtty.c > +++ b/samples/vfio-mdev/mtty.c > @@ -1028,9 +1028,9 @@ static int mtty_trigger_interrupt(struct mdev_state > *mdev_state) > } > > if (mdev_state->irq_index == VFIO_PCI_MSI_IRQ_INDEX) > - ret = eventfd_signal(mdev_state->msi_evtfd, 1); > + ret = eventfd_signal(mdev_state->msi_evtfd); > else > - ret = eventfd_signal(mdev_state->intx_evtfd, 1); > + ret = eventfd_signal(mdev_state->intx_evtfd); > > #if defined(DEBUG_INTR) > pr_info("Intx triggered\n"); > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c > index 89912a17f5d5..c0e230f4c3e9 100644 > --- a/virt/kvm/eventfd.c > +++ b/virt/kvm/eventfd.c > @@ -61,7 +61,7 @@ static void irqfd_resampler_notify(struct > kvm_kernel_irqfd_resampler *resampler) > > list_for_each_entry_srcu(irqfd, >list, resampler_link, > > srcu_read_lock_held(>kvm->irq_srcu)) > - eventfd_signal(irqfd->resamplefd, 1); > + eventfd_signal(irqfd->resamplefd); > } > > /* > @@ -786,7 +786,7 @@ ioeventfd_write(struct kvm_vcpu *vcpu, struct > kvm_io_device *this, gpa_t addr, > if (!ioeventfd_in_range(p, addr, len, val)) > return -EOPNOTSUPP; > > - eventfd_signal(p->eventfd, 1); > + eventfd_signal(p->eventfd); > return 0; > } > > > -- > 2.34.1 > For habanalabs (device.c): Reviewed-by: Oded Gabbay
Re: [PATCH v2 8/8] habanalabs: enable 64-bit DMA mask in POWER9
On Wed, Jun 12, 2019 at 1:53 AM Benjamin Herrenschmidt wrote: > > On Tue, 2019-06-11 at 20:22 +0300, Oded Gabbay wrote: > > > > > So, to summarize: > > > If I call pci_set_dma_mask with 48, then it fails on POWER9. However, > > > in runtime, I don't know if its POWER9 or not, so upon failure I will > > > call it again with 32, which makes our device pretty much unusable. > > > If I call pci_set_dma_mask with 64, and do the dedicated configuration > > > in Goya's PCIe controller, then it won't work on x86-64, because bit > > > 59 will be set and the host won't like it (I checked it). In addition, > > > I might get addresses above 50 bits, which my device can't generate. > > > > > > I hope this makes things more clear. Now, please explain to me how I > > > can call pci_set_dma_mask without any regard to whether I run on > > > x86-64 or POWER9, considering what I wrote above ? > > > > > > Thanks, > > > Oded > > > > Adding ppc mailing list. > > You can't. Your device is broken. Devices that don't support DMAing to > the full 64-bit deserve to be added to the trash pile. > Hmm... right know they are added to customers data-centers but what do I know ;) > As a result, getting it to work will require hacks. Some GPUs have > similar issues and require similar hacks, it's unfortunate. > > Added a couple of guys on CC who might be able to help get those hacks > right. Thanks :) > > It's still very fishy .. the idea is to detect the case where setting a > 64-bit mask will give your system memory mapped at a fixed high address > (1 << 59 in our case) and program that in your chip in the "Fixed high > bits" register that you seem to have (also make sure it doesn't affect > MSIs or it will break them). MSI-X are working. The set of bit 59 doesn't apply to MSI-X transactions (AFAICS from the PCIe controller spec we have). > > This will only work as long as all of the system memory can be > addressed at an offset from that fixed address that itself fits your > device addressing capabilities (50 bits in this case). It may or may > not be the case but there's no way to check since the DMA mask logic > won't really apply. Understood. In the specific system we are integrated to, that is the case - we have less then 48 bits. But, as you pointed out, it is not a generic solution but with my H/W I can't give a generic fit-all solution for POWER9. I'll settle for the best that I can do. > > You might want to consider fixing your HW in the next iteration... This > is going to bite you when x86 increases the max physical memory for > example, or on other architectures. Understood and taken care of. > > Cheers, > Ben. > > > >
Re: [PATCH v2 8/8] habanalabs: enable 64-bit DMA mask in POWER9
On Tue, Jun 11, 2019 at 8:03 PM Oded Gabbay wrote: > > On Tue, Jun 11, 2019 at 6:26 PM Greg KH wrote: > > > > On Tue, Jun 11, 2019 at 08:17:53AM -0700, Christoph Hellwig wrote: > > > On Tue, Jun 11, 2019 at 11:58:57AM +0200, Greg KH wrote: > > > > That feels like a big hack. ppc doesn't have any "what arch am I > > > > running on?" runtime call? Did you ask on the ppc64 mailing list? I'm > > > > ok to take this for now, but odds are you need a better fix for this > > > > sometime... > > > > > > That isn't the worst part of it. The whole idea of checking what I'm > > > running to set a dma mask just doesn't make any sense at all. > > > > Oded, I thought I asked if there was a dma call you should be making to > > keep this type of check from being needed. What happened to that? As > > Christoph points out, none of this should be needed, which is what I > > thought I originally said :) > > > > thanks, > > > > greg k-h > > I'm sorry, but it seems I can't explain what's my problem because you > and Christoph keep mentioning the pci_set_dma_mask() but it doesn't > help me. > I'll try again to explain. > > The main problem specifically for Goya device, is that I can't call > this function with *the same parameter* for POWER9 and x86-64, because > x86-64 supports dma mask of 48-bits while POWER9 supports only 32-bits > or 64-bits. > > The main limitation in my Goya device is that it can generate PCI > outbound transactions with addresses from 0 to (2^50 - 1). > That's why when we first integrated it in x86-64, we used a DMA mask > of 48-bits, by calling pci_set_dma_mask(pdev, 48). That way, the > kernel ensures me that all the DMA addresses are from 0 to (2^48 - 1), > and that address range is accessible by my device. > > If for some reason, the x86-64 machine doesn't support 48-bits, the > standard fallback code in ALL the drivers I have seen is to set the > DMA mask to 32-bits. And that's how my current driver's code is > written. > > Now, when I tried to integrate Goya into a POWER9 machine, I got a > reject from the call to pci_set_dma_mask(pdev, 48). The standard code, > as I wrote above, is to call the same function with 32-bits. That > works BUT it is not practical, as our applications require much more > memory mapped then 32-bits. In addition, once you add more cards which > are all mapped to the same range, it is simply not usable at all. > > Therefore, I consulted with POWER people and they told me I can call > to pci_set_dma_mask with the mask as 64, but I must make sure that ALL > outbound transactions from Goya will be with bit 59 set in the > address. > I can achieve that with a dedicated configuration I make in Goya's > PCIe controller. That's what I did and that works. > > So, to summarize: > If I call pci_set_dma_mask with 48, then it fails on POWER9. However, > in runtime, I don't know if its POWER9 or not, so upon failure I will > call it again with 32, which makes our device pretty much unusable. > If I call pci_set_dma_mask with 64, and do the dedicated configuration > in Goya's PCIe controller, then it won't work on x86-64, because bit > 59 will be set and the host won't like it (I checked it). In addition, > I might get addresses above 50 bits, which my device can't generate. > > I hope this makes things more clear. Now, please explain to me how I > can call pci_set_dma_mask without any regard to whether I run on > x86-64 or POWER9, considering what I wrote above ? > > Thanks, > Oded Adding ppc mailing list. Oded
Re: Question - check in runtime which architecture am I running on
On Tue, Jun 11, 2019 at 5:07 PM Christoph Hellwig wrote: > > On Tue, Jun 11, 2019 at 03:30:08PM +0300, Oded Gabbay wrote: > > Hello POWER developers, > > > > I'm trying to find out if there is an internal kernel API so that a > > PCI driver can call it to check if its PCI device is running inside a > > POWER9 machine. Alternatively, if that's not available, if it is > > running on a machine with powerpc architecture. > > Your driver has absolutely not business knowing this. > > > > > I need this information as my device (Goya AI accelerator) > > unfortunately needs a slightly different configuration of its PCIe > > controller in case of POWER9 (need to set bit 59 to be 1 in all > > outbound transactions). > > No, it doesn't. You can query the output from dma_get_required_mask > to optimize for the DMA addresses you get, and otherwise you simply > set the maximum dma mask you support. That is about the control you > get, and nothing else is a drivers business. I don't want to conduct two discussions as I saw you answered on my patch. I'll add the ppc mailing list to my patch. Oded
Question - check in runtime which architecture am I running on
Hello POWER developers, I'm trying to find out if there is an internal kernel API so that a PCI driver can call it to check if its PCI device is running inside a POWER9 machine. Alternatively, if that's not available, if it is running on a machine with powerpc architecture. I need this information as my device (Goya AI accelerator) unfortunately needs a slightly different configuration of its PCIe controller in case of POWER9 (need to set bit 59 to be 1 in all outbound transactions). Currently I'm reading the PCI vendor and device ID of the parent PCI bus device and checking if it is PHB4 but that is an ugly hack. (see this commit - https://github.com/HabanaAI/linux/commit/1efd75ad5c9779b99a9a38c899e4e25e227626bf) I dug through the code but didn't find anything that can help me so I thought of asking more experienced people. Thanks, Oded
Question about Power8/9, PHB3/4 and setting of DMA mask
Hello, I'm working in a startup called HabanaLabs, and we have an ASIC accelerator for AI called Goya. It is assembled on a PCIe Gen4 card. Driver is going to be in kernel 5.1 We are trying to plug the card into a Power8 machine and load the driver, and we get a failure during the loading of the driver in regard to the driver trying to set the DMA mask. Due to some limitation in Goya, the driver first need to allocate a 2MB chunk in a DMA-able address under 39 bits and then we would like to move to using up to 48 bits. Therefore, the driver first tries to set the DMA mask to 39 bits, allocate the 2MB area and later on, change the DMA mask to 48 bits. On x86 this works fine. However, as I said, on Power8 we got a failure when trying to set to 39 bits. After tracking the code, I reached to this function: pnv_pci_ioda_dma_set_mask() In that function, there is a check (composed of 4 conditions) about the requested dma mask, which appears that we fail and I suspect this is due to the memory_hotplug_max() returning more then 39 bits. My questions are: 1. Is this logic applies to Power9 as well ? 2. Why this condition is mandatory ? Is there some kind of workaround available ? Thanks in advance, Oded
Re: [PATCH] MDIO: FSL_PQ_MDIO: Fix bug on incorrect offset of tbipa register
On 06/12/2013 09:31 PM, Scott Wood wrote: On 06/12/2013 10:08:29 AM, Sebastian Andrzej Siewior wrote: On 06/12/2013 02:47 PM, Oded Gabbay wrote: This patch fixes a bug in the fsl_pq_mdio.c module and in relevant device-tree files regarding the correct offset of the tbipa register in the eTSEC controller in some of Freescale's PQ3 and QorIQ SoC. The bug happens when the mdio in the device tree is configured to be compatible to fsl,gianfar-tbi. Because the mdio device in the device tree points to addresses 25520, 26520 or 27520 (depends on the controller ID), the variable priv-map at function fsl_pq_mdio_probe, points to that address. However, later in the function there is a write to register tbipa that is actually located at 25030, 26030 or 27030. Because the correct address is not io mapped, the contents are written to a different register in the controller. The fix sets the address of the mdio device to start at 25000, 26000 or 27000 and changes the mii_offset field to 0x520 in the relevant entry (fsl,gianfar-tbi) of the fsl_pq_mdio_match array. Note: This patch may break MDIO functionallity of some old Freescale's SoC until Freescale will fix their device tree files. Basically, every device tree which contains an mdio device that is compatible to fsl,gianfar-tbi should be examined. Not as is. Please add a check for the original address. If it has 0x520 at the end print a warning and fix it up. Please add to the patch description which register is modified instead if this patch is not applied. Depending on how critical this it might has to go stable. I'm not sure it's stable material if this is something that has never worked... The device tree binding will also need to be fixed to note the difference in reg between fsl,gianfar-mdio and fsl-gianfar-tbi -- and should give an example of the latter. -Scott I read the 2 comments and I'm not sure what should be the best way to move ahead. I would like to describe what is the impact of not accepting this patch: When you connect any eTSEC, except the first one, using SGMII, you must configure the TBIPA register because the MII management configuration uses the TBIPA address as part of the SGMII initialization sequence, as described in the P2020 Reference manual. So, if that register is not initialized, the sequence is broken the and eTSEC is not functioning (can not send/receive packets). I still think the best way to fix it is what I did: 1. Point the priv-map to the start of the whole registers range of the eTSEC 2. Set mii_offset to 0x520 in the gianfar-tbi entry of the fsl_pq_mdio_match array. 3. Fix all the usages of the gianfar-tbi in the device tree files - change the starting address and reg range I think this is the best way because it is stated in fsl_pq_mdio_probe function that: /* * Some device tree nodes represent only the MII registers, and * others represent the MAC and MII registers. The 'mii_offset' field * contains the offset of the MII registers inside the mapped register * space. */ and that's why we have priv-map and priv-regs. So my fix goes according to the current design of the driver. -Oded ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] MDIO: FSL_PQ_MDIO: Fix bug on incorrect offset of tbipa register
Oded Gabbay wrote: Note: This patch may break MDIO functionallity of some old Freescale's SoC until Freescale will fix their device tree files. Basically, every device tree which contains an mdio device that is compatible to fsl,gianfar-tbi should be examined. On 06/12/2013 04:04 PM, Timur Tabi wrote: I haven't had a chance to review the patch in detail, but I can tell you that breaking compatibility with older device trees is unacceptable. You need to add some code, even if it's an ugly hack, to support those trees. I generally agree with this statement except that without this patch, almost ALL of Freescale's SoC that uses fsl,gianfar-tbi are broken, including the older ones. At least this patch fixes some of the device trees. Because I'm not working at Freescale, I have a very limited access to a few SoC which I could test this patch on. I think it is Freescale's responsibility to release a complementary patch to fix the rest of the SoC device trees. Oded ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] MDIO: FSL_PQ_MDIO: Fix bug on incorrect offset of tbipa register
This patch fixes a bug in the fsl_pq_mdio.c module and in relevant device-tree files regarding the correct offset of the tbipa register in the eTSEC controller in some of Freescale's PQ3 and QorIQ SoC. The bug happens when the mdio in the device tree is configured to be compatible to fsl,gianfar-tbi. Because the mdio device in the device tree points to addresses 25520, 26520 or 27520 (depends on the controller ID), the variable priv-map at function fsl_pq_mdio_probe, points to that address. However, later in the function there is a write to register tbipa that is actually located at 25030, 26030 or 27030. Because the correct address is not io mapped, the contents are written to a different register in the controller. The fix sets the address of the mdio device to start at 25000, 26000 or 27000 and changes the mii_offset field to 0x520 in the relevant entry (fsl,gianfar-tbi) of the fsl_pq_mdio_match array. Note: This patch may break MDIO functionallity of some old Freescale's SoC until Freescale will fix their device tree files. Basically, every device tree which contains an mdio device that is compatible to fsl,gianfar-tbi should be examined. Signed-off-by: Oded Gabbay ogab...@advaoptical.com --- arch/powerpc/boot/dts/fsl/pq3-etsec1-1.dtsi| 4 ++-- arch/powerpc/boot/dts/fsl/pq3-etsec1-2.dtsi| 4 ++-- arch/powerpc/boot/dts/fsl/pq3-etsec1-3.dtsi| 4 ++-- arch/powerpc/boot/dts/ge_imp3a.dts | 4 ++-- arch/powerpc/boot/dts/mpc8536ds.dtsi | 4 ++-- arch/powerpc/boot/dts/mpc8544ds.dtsi | 2 +- arch/powerpc/boot/dts/mpc8548cds.dtsi | 6 +++--- arch/powerpc/boot/dts/mpc8568mds.dts | 2 +- arch/powerpc/boot/dts/mpc8572ds.dtsi | 6 +++--- arch/powerpc/boot/dts/mpc8572ds_camp_core0.dts | 4 ++-- arch/powerpc/boot/dts/mpc8572ds_camp_core1.dts | 2 +- arch/powerpc/boot/dts/p2020ds.dtsi | 4 ++-- arch/powerpc/boot/dts/p2020rdb-pc.dtsi | 4 ++-- arch/powerpc/boot/dts/p2020rdb.dts | 4 ++-- arch/powerpc/boot/dts/ppa8548.dts | 6 +++--- drivers/net/ethernet/freescale/fsl_pq_mdio.c | 2 +- 16 files changed, 31 insertions(+), 31 deletions(-) diff --git a/arch/powerpc/boot/dts/fsl/pq3-etsec1-1.dtsi b/arch/powerpc/boot/dts/fsl/pq3-etsec1-1.dtsi index 96693b4..d38bf63 100644 --- a/arch/powerpc/boot/dts/fsl/pq3-etsec1-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/pq3-etsec1-1.dtsi @@ -46,9 +46,9 @@ ethernet@25000 { interrupts = 35 2 0 0 36 2 0 0 40 2 0 0; }; -mdio@25520 { +mdio@25000 { #address-cells = 1; #size-cells = 0; compatible = fsl,gianfar-tbi; - reg = 0x25520 0x20; + reg = 0x25000 0x1000; }; diff --git a/arch/powerpc/boot/dts/fsl/pq3-etsec1-2.dtsi b/arch/powerpc/boot/dts/fsl/pq3-etsec1-2.dtsi index 6b3fab1..6290b49 100644 --- a/arch/powerpc/boot/dts/fsl/pq3-etsec1-2.dtsi +++ b/arch/powerpc/boot/dts/fsl/pq3-etsec1-2.dtsi @@ -46,9 +46,9 @@ ethernet@26000 { interrupts = 31 2 0 0 32 2 0 0 33 2 0 0; }; -mdio@26520 { +mdio@26000 { #address-cells = 1; #size-cells = 0; compatible = fsl,gianfar-tbi; - reg = 0x26520 0x20; + reg = 0x26000 0x1000; }; diff --git a/arch/powerpc/boot/dts/fsl/pq3-etsec1-3.dtsi b/arch/powerpc/boot/dts/fsl/pq3-etsec1-3.dtsi index 0da592d..5296811 100644 --- a/arch/powerpc/boot/dts/fsl/pq3-etsec1-3.dtsi +++ b/arch/powerpc/boot/dts/fsl/pq3-etsec1-3.dtsi @@ -46,9 +46,9 @@ ethernet@27000 { interrupts = 37 2 0 0 38 2 0 0 39 2 0 0; }; -mdio@27520 { +mdio@27000 { #address-cells = 1; #size-cells = 0; compatible = fsl,gianfar-tbi; - reg = 0x27520 0x20; + reg = 0x27000 0x1000; }; diff --git a/arch/powerpc/boot/dts/ge_imp3a.dts b/arch/powerpc/boot/dts/ge_imp3a.dts index fefae41..49d9b4e 100644 --- a/arch/powerpc/boot/dts/ge_imp3a.dts +++ b/arch/powerpc/boot/dts/ge_imp3a.dts @@ -174,14 +174,14 @@ }; }; - mdio@25520 { + mdio@25000 { tbi1: tbi-phy@11 { reg = 0x11; device_type = tbi-phy; }; }; - mdio@26520 { + mdio@26000 { status = disabled; }; diff --git a/arch/powerpc/boot/dts/mpc8536ds.dtsi b/arch/powerpc/boot/dts/mpc8536ds.dtsi index 7c3dde8..c4df5a1 100644 --- a/arch/powerpc/boot/dts/mpc8536ds.dtsi +++ b/arch/powerpc/boot/dts/mpc8536ds.dtsi @@ -227,11 +227,11 @@ phy-connection-type = rgmii-id; }; - mdio@26520 { + mdio@26000 { #address-cells = 1; #size-cells = 0; compatible = fsl,gianfar-tbi; - reg = 0x26520 0x20; + reg = 0x26000 0x1000; tbi1: tbi-phy@11 { reg = 0x11; diff --git a/arch/powerpc/boot/dts/mpc8544ds.dtsi b/arch/powerpc/boot