[PATCH v6 3/5] mm: introduce mmap3 for safely defining new mmap flags

2017-08-23 Thread Dan Williams
The mmap(2) syscall suffers from the ABI anti-pattern of not validating
unknown flags. However, proposals like MAP_SYNC and MAP_DIRECT need a
mechanism to define new behavior that is known to fail on older kernels
without the support. Define a new mmap3 syscall that checks for
unsupported flags at syscall entry and add a 'mmap_supported_mask' to
'struct file_operations' so generic code can validate the ->mmap()
handler knows about the specified flags. This also arranges for the
flags to be passed to the handler so it can do further local validation
if the requested behavior can be fulfilled.

Cc: Jan Kara 
Cc: Arnd Bergmann 
Cc: Andrew Morton 
Suggested-by: Andy Lutomirski 
Signed-off-by: Dan Williams 
---
 arch/x86/entry/syscalls/syscall_32.tbl |1 +
 arch/x86/entry/syscalls/syscall_64.tbl |1 +
 include/linux/fs.h |1 +
 include/linux/mm.h |2 +-
 include/linux/mman.h   |   42 
 include/linux/syscalls.h   |3 ++
 mm/mmap.c  |   32 ++--
 7 files changed, 78 insertions(+), 4 deletions(-)

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl 
b/arch/x86/entry/syscalls/syscall_32.tbl
index 448ac2161112..0618b5b38b45 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -391,3 +391,4 @@
 382i386pkey_free   sys_pkey_free
 383i386statx   sys_statx
 384i386arch_prctl  sys_arch_prctl  
compat_sys_arch_prctl
+385i386mmap3   sys_mmap_pgoff_strict
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl 
b/arch/x86/entry/syscalls/syscall_64.tbl
index 5aef183e2f85..e204c736d7e9 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -339,6 +339,7 @@
 330common  pkey_alloc  sys_pkey_alloc
 331common  pkey_free   sys_pkey_free
 332common  statx   sys_statx
+333common  mmap3   sys_mmap_pgoff_strict
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 33d1ee8f51be..db42da9f98c4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1674,6 +1674,7 @@ struct file_operations {
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *, unsigned long);
+   unsigned long mmap_supported_mask;
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
int (*release) (struct inode *, struct file *);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 46b9ac5e8569..49eef48da4b7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2090,7 +2090,7 @@ extern unsigned long get_unmapped_area(struct file *, 
unsigned long, unsigned lo
 
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
-   struct list_head *uf);
+   struct list_head *uf, unsigned long flags);
 extern unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
diff --git a/include/linux/mman.h b/include/linux/mman.h
index c8367041fafd..64b6cb3dec70 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -7,6 +7,48 @@
 #include 
 #include 
 
+/*
+ * Arrange for undefined architecture specific flags to be rejected by
+ * default.
+ */
+#ifndef MAP_32BIT
+#define MAP_32BIT 0
+#endif
+#ifndef MAP_HUGE_2MB
+#define MAP_HUGE_2MB 0
+#endif
+#ifndef MAP_HUGE_1GB
+#define MAP_HUGE_1GB 0
+#endif
+#ifndef MAP_UNINITIALIZED
+#define MAP_UNINITIALIZED 0
+#endif
+
+/*
+ * The historical set of flags that all mmap implementations implicitly
+ * support when file_operations.mmap_supported_mask is zero. With the
+ * mmap3 syscall the deprecated MAP_DENYWRITE and MAP_EXECUTABLE bit
+ * values are explicitly rejected with EOPNOTSUPP rather than being
+ * silently accepted.
+ */
+#define LEGACY_MAP_SUPPORTED_MASK (MAP_SHARED \
+   | MAP_PRIVATE \
+   | MAP_FIXED \
+   | MAP_ANONYMOUS \
+   | MAP_UNINITIALIZED \
+   | MAP_GROWSDOWN \
+   | MAP_LOCKED \
+   | MAP_NORESERVE \
+   | MAP_POPULATE \
+   | MAP_NONBLOCK \
+   | MAP_STACK \
+   | MAP_HUGETLB \
+   | MAP_32BIT \
+   | MAP_HUGE_2MB \
+   | MAP_HUGE_1GB)
+
+#defineMAP_SUPPORTED_MASK (LEGACY_MAP_SUPPORTED_MASK)
+
 extern int sysctl_overcommit_memory;
 extern int sysctl_overcommit_ratio;
 exter

Re: [PATCH v6 3/5] mm: introduce mmap3 for safely defining new mmap flags

2017-08-24 Thread Jan Kara
On Wed 23-08-17 16:48:51, Dan Williams wrote:
> The mmap(2) syscall suffers from the ABI anti-pattern of not validating
> unknown flags. However, proposals like MAP_SYNC and MAP_DIRECT need a
> mechanism to define new behavior that is known to fail on older kernels
> without the support. Define a new mmap3 syscall that checks for
> unsupported flags at syscall entry and add a 'mmap_supported_mask' to
> 'struct file_operations' so generic code can validate the ->mmap()
> handler knows about the specified flags. This also arranges for the
> flags to be passed to the handler so it can do further local validation
> if the requested behavior can be fulfilled.
> 
> Cc: Jan Kara 
> Cc: Arnd Bergmann 
> Cc: Andrew Morton 
> Suggested-by: Andy Lutomirski 
> Signed-off-by: Dan Williams 

OK, are we sold on this approach to introduction of new mmap flags? I'm
asking because working API for mmap flag is basically the only thing that's
missing from my MAP_SYNC patches so I'd like to rebase my patches onto
something that is working...

Honza


> ---
>  arch/x86/entry/syscalls/syscall_32.tbl |1 +
>  arch/x86/entry/syscalls/syscall_64.tbl |1 +
>  include/linux/fs.h |1 +
>  include/linux/mm.h |2 +-
>  include/linux/mman.h   |   42 
> 
>  include/linux/syscalls.h   |3 ++
>  mm/mmap.c  |   32 ++--
>  7 files changed, 78 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/entry/syscalls/syscall_32.tbl 
> b/arch/x86/entry/syscalls/syscall_32.tbl
> index 448ac2161112..0618b5b38b45 100644
> --- a/arch/x86/entry/syscalls/syscall_32.tbl
> +++ b/arch/x86/entry/syscalls/syscall_32.tbl
> @@ -391,3 +391,4 @@
>  382  i386pkey_free   sys_pkey_free
>  383  i386statx   sys_statx
>  384  i386arch_prctl  sys_arch_prctl  
> compat_sys_arch_prctl
> +385  i386mmap3   sys_mmap_pgoff_strict
> diff --git a/arch/x86/entry/syscalls/syscall_64.tbl 
> b/arch/x86/entry/syscalls/syscall_64.tbl
> index 5aef183e2f85..e204c736d7e9 100644
> --- a/arch/x86/entry/syscalls/syscall_64.tbl
> +++ b/arch/x86/entry/syscalls/syscall_64.tbl
> @@ -339,6 +339,7 @@
>  330  common  pkey_alloc  sys_pkey_alloc
>  331  common  pkey_free   sys_pkey_free
>  332  common  statx   sys_statx
> +333  common  mmap3   sys_mmap_pgoff_strict
>  
>  #
>  # x32-specific system call numbers start at 512 to avoid cache impact
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 33d1ee8f51be..db42da9f98c4 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1674,6 +1674,7 @@ struct file_operations {
>   long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
>   long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
>   int (*mmap) (struct file *, struct vm_area_struct *, unsigned long);
> + unsigned long mmap_supported_mask;
>   int (*open) (struct inode *, struct file *);
>   int (*flush) (struct file *, fl_owner_t id);
>   int (*release) (struct inode *, struct file *);
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 46b9ac5e8569..49eef48da4b7 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2090,7 +2090,7 @@ extern unsigned long get_unmapped_area(struct file *, 
> unsigned long, unsigned lo
>  
>  extern unsigned long mmap_region(struct file *file, unsigned long addr,
>   unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
> - struct list_head *uf);
> + struct list_head *uf, unsigned long flags);
>  extern unsigned long do_mmap(struct file *file, unsigned long addr,
>   unsigned long len, unsigned long prot, unsigned long flags,
>   vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
> diff --git a/include/linux/mman.h b/include/linux/mman.h
> index c8367041fafd..64b6cb3dec70 100644
> --- a/include/linux/mman.h
> +++ b/include/linux/mman.h
> @@ -7,6 +7,48 @@
>  #include 
>  #include 
>  
> +/*
> + * Arrange for undefined architecture specific flags to be rejected by
> + * default.
> + */
> +#ifndef MAP_32BIT
> +#define MAP_32BIT 0
> +#endif
> +#ifndef MAP_HUGE_2MB
> +#define MAP_HUGE_2MB 0
> +#endif
> +#ifndef MAP_HUGE_1GB
> +#define MAP_HUGE_1GB 0
> +#endif
> +#ifndef MAP_UNINITIALIZED
> +#define MAP_UNINITIALIZED 0
> +#endif
> +
> +/*
> + * The historical set of flags that all mmap implementations implicitly
> + * support when file_operations.mmap_supported_mask is zero. With the
> + * mmap3 syscall the deprecated MAP_DENYWRITE and MAP_EXECUTABLE bit
> + * values are explicitly rejected with EOPNOTSUPP rather than being
> + * silently accepted.
> + */
> +#define LEGACY_MAP_SUPPORTED_MASK (MAP_SHARED \
> + | MAP_PRIVATE \
> + | MAP_FIX

Re: [PATCH v6 3/5] mm: introduce mmap3 for safely defining new mmap flags

2017-08-24 Thread Christoph Hellwig
On Wed, Aug 23, 2017 at 04:48:51PM -0700, Dan Williams wrote:
> The mmap(2) syscall suffers from the ABI anti-pattern of not validating
> unknown flags. However, proposals like MAP_SYNC and MAP_DIRECT need a
> mechanism to define new behavior that is known to fail on older kernels
> without the support. Define a new mmap3 syscall that checks for
> unsupported flags at syscall entry and add a 'mmap_supported_mask' to
> 'struct file_operations' so generic code can validate the ->mmap()
> handler knows about the specified flags. This also arranges for the
> flags to be passed to the handler so it can do further local validation
> if the requested behavior can be fulfilled.

What is the reason to not go with __MAP_VALID hack?  Adding new
syscalls is extremely painful, it will take forever to trickle this
through all architectures (especially with the various 32-bit
architectures having all kinds of different granularities for the
offset) and then the various C libraries, never mind applications.
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH v6 3/5] mm: introduce mmap3 for safely defining new mmap flags

2017-08-24 Thread Dan Williams
On Thu, Aug 24, 2017 at 9:55 AM, Christoph Hellwig  wrote:
> On Wed, Aug 23, 2017 at 04:48:51PM -0700, Dan Williams wrote:
>> The mmap(2) syscall suffers from the ABI anti-pattern of not validating
>> unknown flags. However, proposals like MAP_SYNC and MAP_DIRECT need a
>> mechanism to define new behavior that is known to fail on older kernels
>> without the support. Define a new mmap3 syscall that checks for
>> unsupported flags at syscall entry and add a 'mmap_supported_mask' to
>> 'struct file_operations' so generic code can validate the ->mmap()
>> handler knows about the specified flags. This also arranges for the
>> flags to be passed to the handler so it can do further local validation
>> if the requested behavior can be fulfilled.
>
> What is the reason to not go with __MAP_VALID hack?  Adding new
> syscalls is extremely painful, it will take forever to trickle this
> through all architectures (especially with the various 32-bit
> architectures having all kinds of different granularities for the
> offset) and then the various C libraries, never mind applications.

I'll let Andy and Kirill restate their concerns, but one of the
arguments that swayed me is that any new mmap flag with this hack must
be documented to only work with MAP_SHARED and that MAP_PRIVATE is
silently ignored. I agree with the mess and delays it causes for other
archs and libc, but at the same time this is for new applications and
libraries that know to look for the new flag, so they need to do the
extra work to check for the new syscall.

However, if the fcntl lease approach works for the DMA cases then we
only have the one mmap flag to add for now, so maybe the weird
MAP_{SHARED|PRIVATE} semantics are tolerable.
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH v6 3/5] mm: introduce mmap3 for safely defining new mmap flags

2017-08-25 Thread Christoph Hellwig
On Thu, Aug 24, 2017 at 10:36:02AM -0700, Dan Williams wrote:
> I'll let Andy and Kirill restate their concerns, but one of the
> arguments that swayed me is that any new mmap flag with this hack must
> be documented to only work with MAP_SHARED and that MAP_PRIVATE is
> silently ignored. I agree with the mess and delays it causes for other
> archs and libc, but at the same time this is for new applications and
> libraries that know to look for the new flag, so they need to do the
> extra work to check for the new syscall.

True.  That is for the original hack, but I spent some more time
looking at the mmap code, and there is one thing I noticed:

include/uapi/asm-generic/mman-common.h:

#define MAP_SHARED  0x01/* Share changes */
#define MAP_PRIVATE 0x02/* Changes are private */
#define MAP_TYPE0x0f/* Mask for type of mapping */

mm/mmap.c:

if (file) {
struct inode *inode = file_inode(file);

switch (flags & MAP_TYPE) {
case MAP_SHARED:
...
case MAP_PRIVATE:
...
default:
return -EINVAL;
}

and very similar for the anonymous and nommu cases.

So if we pick e.g. 0x4 as the valid bit we don't even need to overload
the MAP_SHARED and MAP_PRIVATE meaning.

> 
> However, if the fcntl lease approach works for the DMA cases then we
> only have the one mmap flag to add for now, so maybe the weird
> MAP_{SHARED|PRIVATE} semantics are tolerable.
---end quoted text---
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH v6 3/5] mm: introduce mmap3 for safely defining new mmap flags

2017-08-25 Thread Kirill A. Shutemov
On Fri, Aug 25, 2017 at 06:00:11AM -0700, Christoph Hellwig wrote:
> On Thu, Aug 24, 2017 at 10:36:02AM -0700, Dan Williams wrote:
> > I'll let Andy and Kirill restate their concerns, but one of the
> > arguments that swayed me is that any new mmap flag with this hack must
> > be documented to only work with MAP_SHARED and that MAP_PRIVATE is
> > silently ignored. I agree with the mess and delays it causes for other
> > archs and libc, but at the same time this is for new applications and
> > libraries that know to look for the new flag, so they need to do the
> > extra work to check for the new syscall.
> 
> True.  That is for the original hack, but I spent some more time
> looking at the mmap code, and there is one thing I noticed:
> 
> include/uapi/asm-generic/mman-common.h:
> 
> #define MAP_SHARED  0x01/* Share changes */
> #define MAP_PRIVATE 0x02/* Changes are private */
> #define MAP_TYPE0x0f/* Mask for type of mapping */
> 
> mm/mmap.c:
> 
>   if (file) {
>   struct inode *inode = file_inode(file);
> 
>   switch (flags & MAP_TYPE) {
> case MAP_SHARED:
>   ...
>   case MAP_PRIVATE:
>   ...
>   default:
>   return -EINVAL;
>   }
> 
> and very similar for the anonymous and nommu cases.
> 
> So if we pick e.g. 0x4 as the valid bit we don't even need to overload
> the MAP_SHARED and MAP_PRIVATE meaning.

Not all archs are ready for this:

arch/parisc/include/uapi/asm/mman.h:#define MAP_TYPE0x03/* Mask 
for type of mapping */
arch/parisc/include/uapi/asm/mman.h:#define MAP_FIXED   0x04/* 
Interpret addr exactly */

-- 
 Kirill A. Shutemov
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH v6 3/5] mm: introduce mmap3 for safely defining new mmap flags

2017-08-25 Thread Christoph Hellwig
On Fri, Aug 25, 2017 at 06:58:03PM +0300, Kirill A. Shutemov wrote:
> Not all archs are ready for this:
> 
> arch/parisc/include/uapi/asm/mman.h:#define MAP_TYPE0x03/* 
> Mask for type of mapping */
> arch/parisc/include/uapi/asm/mman.h:#define MAP_FIXED   0x04/* 
> Interpret addr exactly */

I'd be happy to say that we should not care about parisc for
persistent memory.  We'll just have to find a way to exclude
parisc without making life too ugly.
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH v6 3/5] mm: introduce mmap3 for safely defining new mmap flags

2017-08-25 Thread Kirill A. Shutemov
On Fri, Aug 25, 2017 at 09:02:36AM -0700, Christoph Hellwig wrote:
> On Fri, Aug 25, 2017 at 06:58:03PM +0300, Kirill A. Shutemov wrote:
> > Not all archs are ready for this:
> > 
> > arch/parisc/include/uapi/asm/mman.h:#define MAP_TYPE0x03/* 
> > Mask for type of mapping */
> > arch/parisc/include/uapi/asm/mman.h:#define MAP_FIXED   0x04/* 
> > Interpret addr exactly */
> 
> I'd be happy to say that we should not care about parisc for
> persistent memory.  We'll just have to find a way to exclude
> parisc without making life too ugly.

I don't think creapling mmap() interface for one arch is the right way to
go. I think the interface should be universal.

I may imagine MAP_DIRECT can be useful not only for persistent memory.
For tmpfs instead of mlock()?

-- 
 Kirill A. Shutemov
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH v6 3/5] mm: introduce mmap3 for safely defining new mmap flags

2017-08-25 Thread Helge Deller
On 25.08.2017 18:16, Kirill A. Shutemov wrote:
> On Fri, Aug 25, 2017 at 09:02:36AM -0700, Christoph Hellwig wrote:
>> On Fri, Aug 25, 2017 at 06:58:03PM +0300, Kirill A. Shutemov wrote:
>>> Not all archs are ready for this:
>>>
>>> arch/parisc/include/uapi/asm/mman.h:#define MAP_TYPE0x03/* 
>>> Mask for type of mapping */
>>> arch/parisc/include/uapi/asm/mman.h:#define MAP_FIXED   0x04/* 
>>> Interpret addr exactly */
>>
>> I'd be happy to say that we should not care about parisc for
>> persistent memory.  We'll just have to find a way to exclude
>> parisc without making life too ugly.
> 
> I don't think creapling mmap() interface for one arch is the right way to
> go. I think the interface should be universal.
> 
> I may imagine MAP_DIRECT can be useful not only for persistent memory.
> For tmpfs instead of mlock()?

On parisc we have
#define MAP_SHARED  0x01/* Share changes */
#define MAP_PRIVATE 0x02/* Changes are private */
#define MAP_TYPE0x03/* Mask for type of mapping */
#define MAP_FIXED   0x04/* Interpret addr exactly */
#define MAP_ANONYMOUS   0x10/* don't use a file */

So, if you need a MAP_DIRECT, wouldn't e.g.
#define MAP_DIRECT  0x08
be possible (for parisc, and others 0x04).
And if MAP_TYPE needs to include this flag on parisc:
#define MAP_TYPE(0x03 | 0x08)  /* Mask for type of mapping */

Helge
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH v6 3/5] mm: introduce mmap3 for safely defining new mmap flags

2017-08-25 Thread Kirill A. Shutemov
On Fri, Aug 25, 2017 at 04:19:19PM +, Helge Deller wrote:
> On 25.08.2017 18:16, Kirill A. Shutemov wrote:
> > On Fri, Aug 25, 2017 at 09:02:36AM -0700, Christoph Hellwig wrote:
> >> On Fri, Aug 25, 2017 at 06:58:03PM +0300, Kirill A. Shutemov wrote:
> >>> Not all archs are ready for this:
> >>>
> >>> arch/parisc/include/uapi/asm/mman.h:#define MAP_TYPE0x03
> >>> /* Mask for type of mapping */
> >>> arch/parisc/include/uapi/asm/mman.h:#define MAP_FIXED   0x04
> >>> /* Interpret addr exactly */
> >>
> >> I'd be happy to say that we should not care about parisc for
> >> persistent memory.  We'll just have to find a way to exclude
> >> parisc without making life too ugly.
> > 
> > I don't think creapling mmap() interface for one arch is the right way to
> > go. I think the interface should be universal.
> > 
> > I may imagine MAP_DIRECT can be useful not only for persistent memory.
> > For tmpfs instead of mlock()?
> 
> On parisc we have
> #define MAP_SHARED  0x01/* Share changes */
> #define MAP_PRIVATE 0x02/* Changes are private */
> #define MAP_TYPE0x03/* Mask for type of mapping */
> #define MAP_FIXED   0x04/* Interpret addr exactly */
> #define MAP_ANONYMOUS   0x10/* don't use a file */
> 
> So, if you need a MAP_DIRECT, wouldn't e.g.
> #define MAP_DIRECT  0x08
> be possible (for parisc, and others 0x04).
> And if MAP_TYPE needs to include this flag on parisc:
> #define MAP_TYPE(0x03 | 0x08)  /* Mask for type of mapping */

I guess it's better to re-define MAP_TYPE as 0x3 everywhere and make
MAP_DIRECT a normal flag. It's not new type of mapping anyway.

-- 
 Kirill A. Shutemov
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH v6 3/5] mm: introduce mmap3 for safely defining new mmap flags

2017-08-25 Thread Dan Williams
On Fri, Aug 25, 2017 at 9:19 AM, Helge Deller  wrote:
> On 25.08.2017 18:16, Kirill A. Shutemov wrote:
>> On Fri, Aug 25, 2017 at 09:02:36AM -0700, Christoph Hellwig wrote:
>>> On Fri, Aug 25, 2017 at 06:58:03PM +0300, Kirill A. Shutemov wrote:
 Not all archs are ready for this:

 arch/parisc/include/uapi/asm/mman.h:#define MAP_TYPE0x03/* 
 Mask for type of mapping */
 arch/parisc/include/uapi/asm/mman.h:#define MAP_FIXED   0x04/* 
 Interpret addr exactly */
>>>
>>> I'd be happy to say that we should not care about parisc for
>>> persistent memory.  We'll just have to find a way to exclude
>>> parisc without making life too ugly.
>>
>> I don't think creapling mmap() interface for one arch is the right way to
>> go. I think the interface should be universal.
>>
>> I may imagine MAP_DIRECT can be useful not only for persistent memory.
>> For tmpfs instead of mlock()?
>
> On parisc we have
> #define MAP_SHARED  0x01/* Share changes */
> #define MAP_PRIVATE 0x02/* Changes are private */
> #define MAP_TYPE0x03/* Mask for type of mapping */
> #define MAP_FIXED   0x04/* Interpret addr exactly */
> #define MAP_ANONYMOUS   0x10/* don't use a file */
>
> So, if you need a MAP_DIRECT, wouldn't e.g.
> #define MAP_DIRECT  0x08
> be possible (for parisc, and others 0x04).
> And if MAP_TYPE needs to include this flag on parisc:
> #define MAP_TYPE(0x03 | 0x08)  /* Mask for type of mapping */

The problem here is that to support new the mmap flags the arch needs
to find a flag that is guaranteed to fail on older kernels. Defining
MAP_DIRECT to 0x8 on parisc doesn't work because it will simply be
ignored on older parisc kernels.

However, it's already the case that several archs have their own
sys_mmap entry points. Those archs that can't follow the common scheme
(only parsic it seems) will need to add a new mmap syscall. I think
that's a reasonable tradeoff to allow every other architecture to add
this support with their existing mmap syscall paths.

That means MAP_DIRECT should be defined to MAP_TYPE on parisc until it
later defines an opt-in mechanism to a new syscall that honors
MAP_DIRECT as a valid flag.
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH v6 3/5] mm: introduce mmap3 for safely defining new mmap flags

2017-08-26 Thread Helge Deller
* Dan Williams :
> On Fri, Aug 25, 2017 at 9:19 AM, Helge Deller  wrote:
> > On 25.08.2017 18:16, Kirill A. Shutemov wrote:
> >> On Fri, Aug 25, 2017 at 09:02:36AM -0700, Christoph Hellwig wrote:
> >>> On Fri, Aug 25, 2017 at 06:58:03PM +0300, Kirill A. Shutemov wrote:
>  Not all archs are ready for this:
> 
>  arch/parisc/include/uapi/asm/mman.h:#define MAP_TYPE0x03
>  /* Mask for type of mapping */
>  arch/parisc/include/uapi/asm/mman.h:#define MAP_FIXED   0x04
>  /* Interpret addr exactly */
> >>>
> >>> I'd be happy to say that we should not care about parisc for
> >>> persistent memory.  We'll just have to find a way to exclude
> >>> parisc without making life too ugly.
> >>
> >> I don't think creapling mmap() interface for one arch is the right way to
> >> go. I think the interface should be universal.
> >>
> >> I may imagine MAP_DIRECT can be useful not only for persistent memory.
> >> For tmpfs instead of mlock()?
> >
> > On parisc we have
> > #define MAP_SHARED  0x01/* Share changes */
> > #define MAP_PRIVATE 0x02/* Changes are private */
> > #define MAP_TYPE0x03/* Mask for type of mapping */
> > #define MAP_FIXED   0x04/* Interpret addr exactly */
> > #define MAP_ANONYMOUS   0x10/* don't use a file */
> >
> > So, if you need a MAP_DIRECT, wouldn't e.g.
> > #define MAP_DIRECT  0x08
> > be possible (for parisc, and others 0x04).
> > And if MAP_TYPE needs to include this flag on parisc:
> > #define MAP_TYPE(0x03 | 0x08)  /* Mask for type of mapping */
> 
> The problem here is that to support new the mmap flags the arch needs
> to find a flag that is guaranteed to fail on older kernels. Defining
> MAP_DIRECT to 0x8 on parisc doesn't work because it will simply be
> ignored on older parisc kernels.
> 
> However, it's already the case that several archs have their own
> sys_mmap entry points. Those archs that can't follow the common scheme
> (only parsic it seems) will need to add a new mmap syscall. I think
> that's a reasonable tradeoff to allow every other architecture to add
> this support with their existing mmap syscall paths.

I don't want other architectures to suffer just because of parisc.
But adding a new syscall just for usage on parisc won't work either,
because nobody will add code to call it then.
 
> That means MAP_DIRECT should be defined to MAP_TYPE on parisc until it
> later defines an opt-in mechanism to a new syscall that honors
> MAP_DIRECT as a valid flag.

I'd instead propose to to introduce an ABI breakage for parisc users
(which aren't many). Most parisc users update their kernel regularily
anyway, because we fixed so many bugs in the latest kernel.

With the following patch pushed down to the stable kernel series,
MAP_DIRECT will fail as expected on those kernels, while we can
keep parisc up with current developments regarding MAP_DIRECT.

diff --git a/arch/parisc/include/uapi/asm/mman.h 
b/arch/parisc/include/uapi/asm/mman.h
index 9a9c2fe..43b9a1e 100644
--- a/arch/parisc/include/uapi/asm/mman.h
+++ b/arch/parisc/include/uapi/asm/mman.h
@@ -13,6 +13,7 @@
 #define MAP_PRIVATE0x02/* Changes are private */
 #define MAP_TYPE   0x03/* Mask for type of mapping */
 #define MAP_FIXED  0x04/* Interpret addr exactly */
+#define MAP_DIRECT 0x08/* Interpret addr exactly */
 #define MAP_ANONYMOUS  0x10/* don't use a file */
 
 #define MAP_DENYWRITE  0x0800  /* ETXTBSY */
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index 378a754..0499f87 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -270,6 +270,10 @@ asmlinkage unsigned long sys_mmap2(unsigned long addr, 
unsigned long len,
 {
/* Make sure the shift for mmap2 is constant (12), no matter what 
PAGE_SIZE
   we have. */
+#if !defined(CONFIG_HAVE_MAP_DIRECT_SUPPORT)
+   if (flags & MAP_DIRECT)
+   return -EINVAL;
+#endif
return sys_mmap_pgoff(addr, len, prot, flags, fd,
  pgoff >> (PAGE_SHIFT - 12));
 }
@@ -278,6 +282,10 @@ asmlinkage unsigned long sys_mmap(unsigned long addr, 
unsigned long len,
unsigned long prot, unsigned long flags, unsigned long fd,
unsigned long offset)
 {
+#if !defined(CONFIG_HAVE_MAP_DIRECT_SUPPORT)
+   if (flags & MAP_DIRECT)
+   return -EINVAL;
+#endif
if (!(offset & ~PAGE_MASK)) {
return sys_mmap_pgoff(addr, len, prot, flags, fd,
offset >> PAGE_SHIFT);


Helge
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH v6 3/5] mm: introduce mmap3 for safely defining new mmap flags

2017-08-26 Thread Dan Williams
On Sat, Aug 26, 2017 at 12:40 AM, Helge Deller  wrote:
> * Dan Williams :
>> On Fri, Aug 25, 2017 at 9:19 AM, Helge Deller  wrote:
>> > On 25.08.2017 18:16, Kirill A. Shutemov wrote:
>> >> On Fri, Aug 25, 2017 at 09:02:36AM -0700, Christoph Hellwig wrote:
>> >>> On Fri, Aug 25, 2017 at 06:58:03PM +0300, Kirill A. Shutemov wrote:
>>  Not all archs are ready for this:
>> 
>>  arch/parisc/include/uapi/asm/mman.h:#define MAP_TYPE0x03
>>  /* Mask for type of mapping */
>>  arch/parisc/include/uapi/asm/mman.h:#define MAP_FIXED   0x04
>>  /* Interpret addr exactly */
>> >>>
>> >>> I'd be happy to say that we should not care about parisc for
>> >>> persistent memory.  We'll just have to find a way to exclude
>> >>> parisc without making life too ugly.
>> >>
>> >> I don't think creapling mmap() interface for one arch is the right way to
>> >> go. I think the interface should be universal.
>> >>
>> >> I may imagine MAP_DIRECT can be useful not only for persistent memory.
>> >> For tmpfs instead of mlock()?
>> >
>> > On parisc we have
>> > #define MAP_SHARED  0x01/* Share changes */
>> > #define MAP_PRIVATE 0x02/* Changes are private */
>> > #define MAP_TYPE0x03/* Mask for type of mapping */
>> > #define MAP_FIXED   0x04/* Interpret addr exactly */
>> > #define MAP_ANONYMOUS   0x10/* don't use a file */
>> >
>> > So, if you need a MAP_DIRECT, wouldn't e.g.
>> > #define MAP_DIRECT  0x08
>> > be possible (for parisc, and others 0x04).
>> > And if MAP_TYPE needs to include this flag on parisc:
>> > #define MAP_TYPE(0x03 | 0x08)  /* Mask for type of mapping */
>>
>> The problem here is that to support new the mmap flags the arch needs
>> to find a flag that is guaranteed to fail on older kernels. Defining
>> MAP_DIRECT to 0x8 on parisc doesn't work because it will simply be
>> ignored on older parisc kernels.
>>
>> However, it's already the case that several archs have their own
>> sys_mmap entry points. Those archs that can't follow the common scheme
>> (only parsic it seems) will need to add a new mmap syscall. I think
>> that's a reasonable tradeoff to allow every other architecture to add
>> this support with their existing mmap syscall paths.
>
> I don't want other architectures to suffer just because of parisc.
> But adding a new syscall just for usage on parisc won't work either,
> because nobody will add code to call it then.

I don't understand this comment, if / when parisc gets around to
adding pmem and dax support why wouldn't libc grow support for the new
parisc mmap variant? Also, it's not just MAP_DIRECT you would also
need space for a MAP_SYNC flag.

>> That means MAP_DIRECT should be defined to MAP_TYPE on parisc until it
>> later defines an opt-in mechanism to a new syscall that honors
>> MAP_DIRECT as a valid flag.
>
> I'd instead propose to to introduce an ABI breakage for parisc users
> (which aren't many). Most parisc users update their kernel regularily
> anyway, because we fixed so many bugs in the latest kernel.
>
> With the following patch pushed down to the stable kernel series,
> MAP_DIRECT will fail as expected on those kernels, while we can
> keep parisc up with current developments regarding MAP_DIRECT.

The whole point is to avoid an ABI regression and the chance for false
positive results. We're immediately stuck if some application was
expecting 0x8 to be ignored, or conversely an application that
absolutely needs to rely on MAP_SYNC/MAP_DIRECT semantics assumes the
wrong result on a parisc kernel where they are ignored.

I have not seen any patches for parisc pmem+dax enabling so it seems
too early to worry about these "last mile" enabling features of
MAP_DIRECT and MAP_SYNC. In particular parisc doesn't appear to have
ARCH_ENABLE_MEMORY_HOTPLUG, so as far as I can see it can't yet
support the ZONE_DEVICE scheme that is a pre-requisite for MAP_DIRECT.
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH v6 3/5] mm: introduce mmap3 for safely defining new mmap flags

2017-08-26 Thread Helge Deller
On 26.08.2017 17:15, Dan Williams wrote:
> On Sat, Aug 26, 2017 at 12:40 AM, Helge Deller  wrote:
>> * Dan Williams :
>>> On Fri, Aug 25, 2017 at 9:19 AM, Helge Deller  wrote:
 On 25.08.2017 18:16, Kirill A. Shutemov wrote:
> On Fri, Aug 25, 2017 at 09:02:36AM -0700, Christoph Hellwig wrote:
>> On Fri, Aug 25, 2017 at 06:58:03PM +0300, Kirill A. Shutemov wrote:
>>> Not all archs are ready for this:
>>>
>>> arch/parisc/include/uapi/asm/mman.h:#define MAP_TYPE0x03
>>> /* Mask for type of mapping */
>>> arch/parisc/include/uapi/asm/mman.h:#define MAP_FIXED   0x04
>>> /* Interpret addr exactly */
>>
>> I'd be happy to say that we should not care about parisc for
>> persistent memory.  We'll just have to find a way to exclude
>> parisc without making life too ugly.
>
> I don't think creapling mmap() interface for one arch is the right way to
> go. I think the interface should be universal.
>
> I may imagine MAP_DIRECT can be useful not only for persistent memory.
> For tmpfs instead of mlock()?

 On parisc we have
 #define MAP_SHARED  0x01/* Share changes */
 #define MAP_PRIVATE 0x02/* Changes are private */
 #define MAP_TYPE0x03/* Mask for type of mapping */
 #define MAP_FIXED   0x04/* Interpret addr exactly */
 #define MAP_ANONYMOUS   0x10/* don't use a file */

 So, if you need a MAP_DIRECT, wouldn't e.g.
 #define MAP_DIRECT  0x08
 be possible (for parisc, and others 0x04).
 And if MAP_TYPE needs to include this flag on parisc:
 #define MAP_TYPE(0x03 | 0x08)  /* Mask for type of mapping */
>>>
>>> The problem here is that to support new the mmap flags the arch needs
>>> to find a flag that is guaranteed to fail on older kernels. Defining
>>> MAP_DIRECT to 0x8 on parisc doesn't work because it will simply be
>>> ignored on older parisc kernels.
>>>
>>> However, it's already the case that several archs have their own
>>> sys_mmap entry points. Those archs that can't follow the common scheme
>>> (only parsic it seems) will need to add a new mmap syscall. I think
>>> that's a reasonable tradeoff to allow every other architecture to add
>>> this support with their existing mmap syscall paths.
>>
>> I don't want other architectures to suffer just because of parisc.
>> But adding a new syscall just for usage on parisc won't work either,
>> because nobody will add code to call it then.
> 
> I don't understand this comment, if / when parisc gets around to
> adding pmem and dax support why wouldn't libc grow support for the new
> parisc mmap variant? Also, it's not just MAP_DIRECT you would also
> need space for a MAP_SYNC flag.
> 
>>> That means MAP_DIRECT should be defined to MAP_TYPE on parisc until it
>>> later defines an opt-in mechanism to a new syscall that honors
>>> MAP_DIRECT as a valid flag.
>>
>> I'd instead propose to to introduce an ABI breakage for parisc users
>> (which aren't many). Most parisc users update their kernel regularily
>> anyway, because we fixed so many bugs in the latest kernel.
>>
>> With the following patch pushed down to the stable kernel series,
>> MAP_DIRECT will fail as expected on those kernels, while we can
>> keep parisc up with current developments regarding MAP_DIRECT.
> 
> The whole point is to avoid an ABI regression and the chance for false
> positive results. We're immediately stuck if some application was
> expecting 0x8 to be ignored, or conversely an application that
> absolutely needs to rely on MAP_SYNC/MAP_DIRECT semantics assumes the
> wrong result on a parisc kernel where they are ignored.
> 
> I have not seen any patches for parisc pmem+dax enabling so it seems
> too early to worry about these "last mile" enabling features of
> MAP_DIRECT and MAP_SYNC. In particular parisc doesn't appear to have
> ARCH_ENABLE_MEMORY_HOTPLUG, so as far as I can see it can't yet
> support the ZONE_DEVICE scheme that is a pre-requisite for MAP_DIRECT.

I see, but then it's probably best to not to define any MAP_DIRECT or 
MAP_SYNC at all in the headers of those arches which don't support
pmem+dax (parisc, m68k, alpha, and probably quite some others).
That way applications can detect at configure time if the platform
supports that, and can leave out the functionality completely.

Helge
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH v6 3/5] mm: introduce mmap3 for safely defining new mmap flags

2017-08-26 Thread Dan Williams
On Sat, Aug 26, 2017 at 12:50 PM, Helge Deller  wrote:
> On 26.08.2017 17:15, Dan Williams wrote:
[..]
>> I have not seen any patches for parisc pmem+dax enabling so it seems
>> too early to worry about these "last mile" enabling features of
>> MAP_DIRECT and MAP_SYNC. In particular parisc doesn't appear to have
>> ARCH_ENABLE_MEMORY_HOTPLUG, so as far as I can see it can't yet
>> support the ZONE_DEVICE scheme that is a pre-requisite for MAP_DIRECT.
>
> I see, but then it's probably best to not to define any MAP_DIRECT or
> MAP_SYNC at all in the headers of those arches which don't support
> pmem+dax (parisc, m68k, alpha, and probably quite some others).
> That way applications can detect at configure time if the platform
> supports that, and can leave out the functionality completely.

Yes, that's a good idea we can handle this similar to
CONFIG_MMAP_ALLOW_UNINITIALIZED. These patches will also modify
'struct file_operations' so that do_mmap() can validate whether a flag
is supported on per architecture basis. Also the plan is to plumb the
flags passed to the syscall all the way down to the individual mmap
implementations. The ext4 and xfs ->mmap() operations will be able to
return -EOPNOTSUP based on runtime variables.
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm


Re: [PATCH v6 3/5] mm: introduce mmap3 for safely defining new mmap flags

2017-08-26 Thread Kirill A. Shutemov
On Sat, Aug 26, 2017 at 03:46:22PM -0700, Dan Williams wrote:
> On Sat, Aug 26, 2017 at 12:50 PM, Helge Deller  wrote:
> > On 26.08.2017 17:15, Dan Williams wrote:
> [..]
> >> I have not seen any patches for parisc pmem+dax enabling so it seems
> >> too early to worry about these "last mile" enabling features of
> >> MAP_DIRECT and MAP_SYNC. In particular parisc doesn't appear to have
> >> ARCH_ENABLE_MEMORY_HOTPLUG, so as far as I can see it can't yet
> >> support the ZONE_DEVICE scheme that is a pre-requisite for MAP_DIRECT.
> >
> > I see, but then it's probably best to not to define any MAP_DIRECT or
> > MAP_SYNC at all in the headers of those arches which don't support
> > pmem+dax (parisc, m68k, alpha, and probably quite some others).
> > That way applications can detect at configure time if the platform
> > supports that, and can leave out the functionality completely.
> 
> Yes, that's a good idea we can handle this similar to
> CONFIG_MMAP_ALLOW_UNINITIALIZED. These patches will also modify
> 'struct file_operations' so that do_mmap() can validate whether a flag
> is supported on per architecture basis. Also the plan is to plumb the
> flags passed to the syscall all the way down to the individual mmap
> implementations. The ext4 and xfs ->mmap() operations will be able to
> return -EOPNOTSUP based on runtime variables.

BTW, we may be able to reuse the bit used for MAP_UNINITIALIZED -- it's
only used on !MMU machines.

-- 
 Kirill A. Shutemov
___
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm