> -----Original Message----- > From: linux-rdma-ow...@vger.kernel.org [mailto:linux-rdma- > ow...@vger.kernel.org] On Behalf Of Alexey Ishchuk > Sent: Friday, October 10, 2014 12:34 PM > To: linux-rdma@vger.kernel.org > Cc: blasc...@linux.vnet.ibm.com; schwidef...@de.ibm.com; > gmue...@de.ibm.com; utz.bac...@de.ibm.com; rol...@kernel.org; Yishai > Hadas; Alexey Ishchuk; Alexey Ishchuk > Subject: [PATCH 1/3] s390/kernel: add system calls for access PCI memory > > Add the new __NR_s390_pci_mmio_write and __NR_s390_pci_mmio_read > system calls to allow user space applications to access device PCI I/O > memory pages on s390x platform. > > Signed-off-by: Alexey Ishchuk <alexey_ishc...@ru.ibm.com> > --- > arch/s390/include/uapi/asm/unistd.h | 4 +- > arch/s390/kernel/Makefile | 1 + > arch/s390/kernel/entry.h | 4 + > arch/s390/kernel/pci_mmio.c | 207 > ++++++++++++++++++++++++++++++++++++ > arch/s390/kernel/syscalls.S | 2 + > 5 files changed, 217 insertions(+), 1 deletion(-) > create mode 100644 arch/s390/kernel/pci_mmio.c > > diff --git a/arch/s390/include/uapi/asm/unistd.h > b/arch/s390/include/uapi/asm/unistd.h > index 3802d2d..ab49d1d 100644 > --- a/arch/s390/include/uapi/asm/unistd.h > +++ b/arch/s390/include/uapi/asm/unistd.h > @@ -283,7 +283,9 @@ > #define __NR_sched_setattr 345 > #define __NR_sched_getattr 346 > #define __NR_renameat2 347 > -#define NR_syscalls 348 > +#define __NR_s390_pci_mmio_write 348 > +#define __NR_s390_pci_mmio_read 349 > +#define NR_syscalls 350 > > /* > * There are some system calls that are not present on 64 bit, some > diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile > index 33225e8..3e71b7e 100644 > --- a/arch/s390/kernel/Makefile > +++ b/arch/s390/kernel/Makefile > @@ -60,6 +60,7 @@ ifdef CONFIG_64BIT > obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o > \ > perf_cpum_cf_events.o > obj-y += runtime_instr.o cache.o > +obj-y += pci_mmio.o > endif > > # vdso > diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h > index 6ac7819..a36b6f9 100644 > --- a/arch/s390/kernel/entry.h > +++ b/arch/s390/kernel/entry.h > @@ -70,4 +70,8 @@ struct old_sigaction; > long sys_s390_personality(unsigned int personality); > long sys_s390_runtime_instr(int command, int signum); > > +long sys_s390_pci_mmio_write(const unsigned long mmio_addr, > + const void *user_buffer, const size_t length); > +long sys_s390_pci_mmio_read(const unsigned long mmio_addr, > + void *user_buffer, const size_t length); > #endif /* _ENTRY_H */ > diff --git a/arch/s390/kernel/pci_mmio.c b/arch/s390/kernel/pci_mmio.c > new file mode 100644 > index 0000000..f318207 > --- /dev/null > +++ b/arch/s390/kernel/pci_mmio.c > @@ -0,0 +1,207 @@ > +/* > + * Access to PCI I/O memory from user space programs. > + * > + * Copyright IBM Corp. 2014 > + * Author(s): Alexey Ishchuk <aishc...@linux.vnet.ibm.com> > + */ > +#include <linux/kernel.h> > +#include <linux/syscalls.h> > +#include <linux/init.h> > +#include <linux/mm.h> > +#include <linux/errno.h> > +#include <linux/pci.h> > + > +union value_buffer { > + u8 buf8; > + u16 buf16; > + u32 buf32; > + u64 buf64; > + u8 buf_large[64]; > +}; > + > +static long get_pfn(const unsigned long user_addr, > + const unsigned long access, > + unsigned long *pfn) > +{ > + struct vm_area_struct *vma = NULL; > + long ret = 0L; > + > + if (!pfn) > + return -EINVAL; > + > + down_read(¤t->mm->mmap_sem); > + vma = find_vma(current->mm, user_addr); > + if (vma) { > + if (!(vma->vm_flags & access)) > + ret = -EACCES; > + else > + ret = follow_pfn(vma, user_addr, pfn); > + } else { > + ret = -EINVAL; > + } > + up_read(¤t->mm->mmap_sem); > + > + return ret; > +} > + > +static inline int verify_page_addr(const unsigned long page_addr) > +{ > + return !(page_addr < ZPCI_IOMAP_ADDR_BASE || > + page_addr > (ZPCI_IOMAP_ADDR_BASE | ZPCI_IOMAP_ADDR_IDX_MASK)); > +} > + > +static long choose_buffer(const size_t length, > + union value_buffer *value, > + void **buf) > +{ > + long ret = 0L; > + > + if (length > sizeof(value->buf_large)) { > + *buf = kmalloc(length, GFP_KERNEL); > + if (!*buf) > + return -ENOMEM; > + ret = 1; > + } else { > + *buf = value->buf_large; > + } > + return ret; > +} > + > +SYSCALL_DEFINE3(s390_pci_mmio_write, > + const unsigned long, mmio_addr, > + const void __user *, user_buffer, > + const size_t, length) > +{ > + long ret = 0L; > + void *buf = NULL; > + long buf_allocated = 0; > + void __iomem *io_addr = NULL; > + unsigned long pfn = 0UL; > + unsigned long offset = 0UL; > + unsigned long page_addr = 0UL; > + union value_buffer value; > + > + if (!length) > + return -EINVAL; > + if (!zpci_is_enabled()) > + return -ENODEV; > + > + ret = get_pfn(mmio_addr, VM_WRITE, &pfn); > + if (ret) > + return ret; > + > + page_addr = pfn << PAGE_SHIFT; > + if (!verify_page_addr(page_addr)) > + return -EFAULT; > + > + offset = mmio_addr & ~PAGE_MASK; > + if (offset + length > PAGE_SIZE) > + return -EINVAL; > + io_addr = (void *)(page_addr | offset); > + > + buf_allocated = choose_buffer(length, &value, &buf); > + if (buf_allocated < 0L) > + return -ENOMEM; > + > + switch (length) { > + case 1: > + ret = get_user(value.buf8, ((u8 *)user_buffer));
This cast (and similar casts across the code) kills the __user annotation of the user buffer pointer. First - fix this to help various static verification tools such as sparse work on your code. Second - are you sure this switch-case block achieves any performance gain compared to always using copy_from_user? If so, why not just push it into the S390 copy from user code? > + break; > + case 2: > + ret = get_user(value.buf16, ((u16 *)user_buffer)); > + break; > + case 4: > + ret = get_user(value.buf32, ((u32 *)user_buffer)); > + break; > + case 8: > + ret = get_user(value.buf64, ((u64 *)user_buffer)); > + break; > + default: > + ret = copy_from_user(buf, user_buffer, length); > + } > + if (ret) > + goto out; > + > + switch (length) { > + case 1: > + __raw_writeb(value.buf8, io_addr); > + break; > + case 2: > + __raw_writew(value.buf16, io_addr); > + break; > + case 4: > + __raw_writel(value.buf32, io_addr); > + break; > + case 8: > + __raw_writeq(value.buf64, io_addr); > + break; > + default: > + memcpy_toio(io_addr, buf, length); > + } > +out: > + if (buf_allocated > 0L) > + kfree(buf); > + return ret; > +} > + > +SYSCALL_DEFINE3(s390_pci_mmio_read, > + const unsigned long, mmio_addr, > + void __user *, user_buffer, > + const size_t, length) > +{ > + long ret = 0L; > + void *buf = NULL; > + long buf_allocated = 0L; > + void __iomem *io_addr = NULL; > + unsigned long pfn = 0UL; > + unsigned long offset = 0UL; > + unsigned long page_addr = 0UL; > + union value_buffer value; > + > + if (!length) > + return -EINVAL; > + if (!zpci_is_enabled()) > + return -ENODEV; > + > + ret = get_pfn(mmio_addr, VM_READ, &pfn); > + if (ret) > + return ret; > + > + page_addr = pfn << PAGE_SHIFT; > + if (!verify_page_addr(page_addr)) > + return -EFAULT; > + > + offset = mmio_addr & ~PAGE_MASK; > + if (offset + length > PAGE_SIZE) > + return -EINVAL; > + io_addr = (void *)(page_addr | offset); > + > + buf_allocated = choose_buffer(length, &value, &buf); > + if (buf_allocated < 0L) > + return -ENOMEM; > + > + switch (length) { > + case 1: > + value.buf8 = __raw_readb(io_addr); > + ret = put_user(value.buf8, ((u8 *)user_buffer)); Add __user annotations in this code block as well. > + break; > + case 2: > + value.buf16 = __raw_readw(io_addr); > + ret = put_user(value.buf16, ((u16 *)user_buffer)); > + break; > + case 4: > + value.buf32 = __raw_readl(io_addr); > + ret = put_user(value.buf32, ((u32 *)user_buffer)); > + break; > + case 8: > + value.buf64 = __raw_readq(io_addr); > + ret = put_user(value.buf64, ((u64 *)user_buffer)); > + break; > + default: > + memcpy_fromio(buf, io_addr, length); > + ret = copy_to_user(user_buffer, buf, length); > + } > + if (buf_allocated > 0L) > + kfree(buf); > + return ret; > +} > diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S > index fe5cdf2..1faa942 100644 > --- a/arch/s390/kernel/syscalls.S > +++ b/arch/s390/kernel/syscalls.S > @@ -356,3 +356,5 @@ > SYSCALL(sys_finit_module,sys_finit_module,compat_sys_finit_module) > SYSCALL(sys_sched_setattr,sys_sched_setattr,compat_sys_sched_setattr) > /* 345 */ > SYSCALL(sys_sched_getattr,sys_sched_getattr,compat_sys_sched_getattr) > SYSCALL(sys_renameat2,sys_renameat2,compat_sys_renameat2) > +SYSCALL(sys_ni_syscall,sys_s390_pci_mmio_write,sys_ni_syscall) > +SYSCALL(sys_ni_syscall,sys_s390_pci_mmio_read,sys_ni_syscall) Generally speaking, looks OK once the __user annotation is added. I suspect you might need ack/review from the S390 maintainer as well for this to be pushed, as the syscall is generic to the entire S390 subsystem. --Shachar -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html