Re: [PATCH V8 4/5] libsas: Align SMP req/resp todma_get_cache_alignment()
Hi, Marek, Yes, I know in include/linux/slab.h, there is #define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN But I observed that the req/resp isn't aligned to ARCH_DMA_MINALIGN and I don't know why. Problems I experienced is: In non-coherent mode, mvsas with an expander cannot detect any disks. Huacai -- Original -- From: "Marek Szyprowski"<m.szyprow...@samsung.com>; Date: Tue, Oct 17, 2017 07:55 PM To: "Huacai Chen"<che...@lemote.com>; "Christoph Hellwig"<h...@lst.de>; Cc: "Robin Murphy"<robin.mur...@arm.com>; "Andrew Morton"<a...@linux-foundation.org>; "Fuxin Zhang"<zhan...@lemote.com>; "linux-kernel"<linux-kernel@vger.kernel.org>; "Ralf Baechle"<r...@linux-mips.org>; "JamesHogan"<james.ho...@imgtec.com>; "linux-mips"<linux-m...@linux-mips.org>; "James E . J .Bottomley"<j...@linux.vnet.ibm.com>; "Martin K . Petersen"<martin.peter...@oracle.com>; "linux-scsi"<linux-s...@vger.kernel.org>; "Tejun Heo"<t...@kernel.org>; "linux-ide"<linux-...@vger.kernel.org>; "stable"<sta...@vger.kernel.org>; Subject: Re: [PATCH V8 4/5] libsas: Align SMP req/resp todma_get_cache_alignment() Hi Huacai, On 2017-10-17 10:05, Huacai Chen wrote: > In non-coherent DMA mode, kernel uses cache flushing operations to > maintain I/O coherency, so libsas's SMP request/response should be > aligned to ARCH_DMA_MINALIGN. Otherwise, If a DMA buffer and a kernel > structure share a same cache line, and if the kernel structure has > dirty data, cache_invalidate (no writeback) will cause data corruption. > > Cc: sta...@vger.kernel.org > Signed-off-by: Huacai Chen <che...@lemote.com> > --- > drivers/scsi/libsas/sas_expander.c | 93 > +++--- > 1 file changed, 57 insertions(+), 36 deletions(-) > > diff --git a/drivers/scsi/libsas/sas_expander.c > b/drivers/scsi/libsas/sas_expander.c > index 6b4fd23..124a44b 100644 > --- a/drivers/scsi/libsas/sas_expander.c > +++ b/drivers/scsi/libsas/sas_expander.c > @@ -164,17 +164,17 @@ static int smp_execute_task(struct domain_device *dev, > void *req, int req_size, > > /* -- Allocations -- */ > > -static inline void *alloc_smp_req(int size) > +static inline void *alloc_smp_req(int size, int align) > { > - u8 *p = kzalloc(size, GFP_KERNEL); > + u8 *p = kzalloc(ALIGN(size, align), GFP_KERNEL); If I remember correctly, kernel guarantees that each kmalloced buffer is always at least aligned to the CPU cache line, so CPU cache can be invalidated on the allocated buffer without corrupting anything else. Taking this into account, I wonder if the above change make sense. Have you experienced any problems without this change? > if (p) > p[0] = SMP_REQUEST; > return p; > } > > -static inline void *alloc_smp_resp(int size) > +static inline void *alloc_smp_resp(int size, int align) > { > - return kzalloc(size, GFP_KERNEL); > + return kzalloc(ALIGN(size, align), GFP_KERNEL); Save a above. > } > > static char sas_route_char(struct domain_device *dev, struct ex_phy *phy) > @@ -403,15 +403,17 @@ static int sas_ex_phy_discover_helper(struct > domain_device *dev, u8 *disc_req, > int sas_ex_phy_discover(struct domain_device *dev, int single) > { > struct expander_device *ex = >ex_dev; > - int res = 0; > + int res = 0, align; > u8 *disc_req; > u8 *disc_resp; > > - disc_req = alloc_smp_req(DISCOVER_REQ_SIZE); > + align = dma_get_cache_alignment(>phy->dev); > + > + disc_req = alloc_smp_req(DISCOVER_REQ_SIZE, align); > if (!disc_req) > return -ENOMEM; > > - disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE); > + disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE, align); > if (!disc_resp) { > kfree(disc_req); > return -ENOMEM; > @@ -480,14 +482,15 @@ static int sas_ex_general(struct domain_device *dev) > { > u8 *rg_req; > struct smp_resp *rg_resp; > - int res; > - int i; > + int i, res, align; > > - rg_req = alloc_smp_req(RG_REQ_SIZE); > + align = dma_get_cache_alignment(>phy->dev); > + > + rg_req = alloc_smp_req(RG_REQ_SIZE, align); > if (!rg_req) > return -ENOMEM; > > - rg_resp = alloc_smp_resp(RG_RESP_SIZE); > + rg_resp = alloc_smp_resp(RG_RESP_SIZE, align); > if (!rg_resp) { > kfree(rg_req); > return -ENOMEM; > @@ -552,
Re: [PATCH V8 4/5] libsas: Align SMP req/resp todma_get_cache_alignment()
Hi, Marek, Yes, I know in include/linux/slab.h, there is #define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN But I observed that the req/resp isn't aligned to ARCH_DMA_MINALIGN and I don't know why. Problems I experienced is: In non-coherent mode, mvsas with an expander cannot detect any disks. Huacai -- Original -- From: "Marek Szyprowski"; Date: Tue, Oct 17, 2017 07:55 PM To: "Huacai Chen"; "Christoph Hellwig"; Cc: "Robin Murphy"; "Andrew Morton"; "Fuxin Zhang"; "linux-kernel"; "Ralf Baechle"; "JamesHogan"; "linux-mips"; "James E . J .Bottomley"; "Martin K . Petersen"; "linux-scsi"; "Tejun Heo"; "linux-ide"; "stable"; Subject: Re: [PATCH V8 4/5] libsas: Align SMP req/resp todma_get_cache_alignment() Hi Huacai, On 2017-10-17 10:05, Huacai Chen wrote: > In non-coherent DMA mode, kernel uses cache flushing operations to > maintain I/O coherency, so libsas's SMP request/response should be > aligned to ARCH_DMA_MINALIGN. Otherwise, If a DMA buffer and a kernel > structure share a same cache line, and if the kernel structure has > dirty data, cache_invalidate (no writeback) will cause data corruption. > > Cc: sta...@vger.kernel.org > Signed-off-by: Huacai Chen > --- > drivers/scsi/libsas/sas_expander.c | 93 > +++--- > 1 file changed, 57 insertions(+), 36 deletions(-) > > diff --git a/drivers/scsi/libsas/sas_expander.c > b/drivers/scsi/libsas/sas_expander.c > index 6b4fd23..124a44b 100644 > --- a/drivers/scsi/libsas/sas_expander.c > +++ b/drivers/scsi/libsas/sas_expander.c > @@ -164,17 +164,17 @@ static int smp_execute_task(struct domain_device *dev, > void *req, int req_size, > > /* -- Allocations -- */ > > -static inline void *alloc_smp_req(int size) > +static inline void *alloc_smp_req(int size, int align) > { > - u8 *p = kzalloc(size, GFP_KERNEL); > + u8 *p = kzalloc(ALIGN(size, align), GFP_KERNEL); If I remember correctly, kernel guarantees that each kmalloced buffer is always at least aligned to the CPU cache line, so CPU cache can be invalidated on the allocated buffer without corrupting anything else. Taking this into account, I wonder if the above change make sense. Have you experienced any problems without this change? > if (p) > p[0] = SMP_REQUEST; > return p; > } > > -static inline void *alloc_smp_resp(int size) > +static inline void *alloc_smp_resp(int size, int align) > { > - return kzalloc(size, GFP_KERNEL); > + return kzalloc(ALIGN(size, align), GFP_KERNEL); Save a above. > } > > static char sas_route_char(struct domain_device *dev, struct ex_phy *phy) > @@ -403,15 +403,17 @@ static int sas_ex_phy_discover_helper(struct > domain_device *dev, u8 *disc_req, > int sas_ex_phy_discover(struct domain_device *dev, int single) > { > struct expander_device *ex = >ex_dev; > - int res = 0; > + int res = 0, align; > u8 *disc_req; > u8 *disc_resp; > > - disc_req = alloc_smp_req(DISCOVER_REQ_SIZE); > + align = dma_get_cache_alignment(>phy->dev); > + > + disc_req = alloc_smp_req(DISCOVER_REQ_SIZE, align); > if (!disc_req) > return -ENOMEM; > > - disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE); > + disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE, align); > if (!disc_resp) { > kfree(disc_req); > return -ENOMEM; > @@ -480,14 +482,15 @@ static int sas_ex_general(struct domain_device *dev) > { > u8 *rg_req; > struct smp_resp *rg_resp; > - int res; > - int i; > + int i, res, align; > > - rg_req = alloc_smp_req(RG_REQ_SIZE); > + align = dma_get_cache_alignment(>phy->dev); > + > + rg_req = alloc_smp_req(RG_REQ_SIZE, align); > if (!rg_req) > return -ENOMEM; > > - rg_resp = alloc_smp_resp(RG_RESP_SIZE); > + rg_resp = alloc_smp_resp(RG_RESP_SIZE, align); > if (!rg_resp) { > kfree(rg_req); > return -ENOMEM; > @@ -552,13 +555,15 @@ static int sas_ex_manuf_info(struct domain_device *dev) > { > u8 *mi_req; > u8 *mi_resp; > - int res; > + int res, align; > > - mi_req = alloc_smp_req(MI_REQ_SIZE); > + align = dma_get_cache_alignment(>phy->dev); > + > + mi_req = alloc_smp_req(MI_REQ_SIZE, align); > if (!mi_req) > return -ENOMEM; > > - mi_resp = alloc_smp_resp(MI_RESP_SIZE); > + mi_resp = a