Re: [PATCH] linux++: delete some forward declarations

2024-06-14 Thread Alexey Dobriyan
On Thu, Jun 13, 2024 at 04:10:12PM -0400, Steven Rostedt wrote:
> On Thu, 13 Jun 2024 13:04:20 -0700
> Andrew Morton  wrote:
> 
> > On Thu, 13 Jun 2024 15:34:02 -0400 Steven Rostedt  
> > wrote:
> > 
> > > On Thu, 13 Jun 2024 22:22:18 +0300
> > > Alexey Dobriyan  wrote:
> > >   
> > > > g++ doesn't like forward enum declarations:
> > > > 
> > > > error: use of enum ‘E’ without previous declaration
> > > >64 | enum E;  
> > > 
> > > But we don't care about g++. Do we?  
> > 
> > It appears that g++ is a useful enum declaration detector.
> > 
> > I'm curious to know how even the above warning was generated.  Does g++
> > work at all on Linux?

With out-of-tree patch, yes.

What happens is that "enum E;" works in C but doesn't work in C++.
The fix (in C++) is to either delete, or change to "enum E:int;".

The same applies to

const struct S s;
const struct S s = {};

First declaration is compile error in C++, sometimes it can be deleted.

This patch is some "unused" parts merged together because it doesn't
make sense to split this much -- every chunk is independent of each
other.

> > > I would make that a separate patch.  
> > 
> > What are you referring to here?
> 
> The enum change should be separate from the struct changes.
> 
> > 
> > > > 
> > > > Delete those which aren't used.
> > > > 
> > > > Delete some unused/unnecessary forward struct declarations for a 
> > > > change.  
> > > 
> > > This is a clean up, but should have a better change log. Just something
> > > simple like:
> > > 
> > >   Delete unnecessary forward struct declarations.  
> > 
> > Alexey specializes in cute changelogs.
> 
> eh

Steven is right. That's what my literature teacher said in high school.

> > I do have a concern about the patch: has it been tested with all
> > possible Kconfigs?  No.  There may be some configs in which the forward
> > declaration is required.
> > 
> > And...  I'm a bit surprised that forward declarations are allowed in C.
> > A billion years ago I used a C compiler which would use 16 bits for
> > an enum if the enumted values would fit in 16 bits.  And it would use 32
> > bits otherwise.  So the enumerated values were *required* for the
> > compiler to be able to figure out the sizeof.  But it was a billion
> > years ago.
> 
> Well, I only looked at the one change in ftrace.h which has a
> "struct seq_file;" that is not used anywhere else in the file, so that
> one definitely can go.

It was tested on arm64 allmodconfig too.

OK if this is concern, I could dust off my compile test farm.



[PATCH] linux++: delete some forward declarations

2024-06-13 Thread Alexey Dobriyan
g++ doesn't like forward enum declarations:

error: use of enum ‘E’ without previous declaration
   64 | enum E;

Delete those which aren't used.

Delete some unused/unnecessary forward struct declarations for a change.

Signed-off-by: Alexey Dobriyan 
---

 fs/ramfs/inode.c |1 -
 include/linux/console.h  |2 --
 include/linux/device.h   |3 ---
 include/linux/ftrace.h   |4 
 include/linux/security.h |6 --
 include/linux/signal.h   |2 --
 include/linux/syscalls.h |7 ---
 include/linux/sysfs.h|2 --
 mm/internal.h|4 
 mm/shmem.c   |1 -
 10 files changed, 32 deletions(-)

--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -51,7 +51,6 @@ struct ramfs_fs_info {
 
 #define RAMFS_DEFAULT_MODE 0755
 
-static const struct super_operations ramfs_ops;
 static const struct inode_operations ramfs_dir_inode_operations;
 
 struct inode *ramfs_get_inode(struct super_block *sb,
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -21,10 +21,8 @@
 #include 
 
 struct vc_data;
-struct console_font_op;
 struct console_font;
 struct module;
-struct tty_struct;
 struct notifier_block;
 
 enum con_scroll {
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -36,10 +36,7 @@
 struct device;
 struct device_private;
 struct device_driver;
-struct driver_private;
 struct module;
-struct class;
-struct subsys_private;
 struct device_node;
 struct fwnode_handle;
 struct iommu_group;
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -531,8 +531,6 @@ extern const void *ftrace_expected;
 
 void ftrace_bug(int err, struct dyn_ftrace *rec);
 
-struct seq_file;
-
 extern int ftrace_text_reserved(const void *start, const void *end);
 
 struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr);
@@ -1147,8 +1145,6 @@ static inline void unpause_graph_tracing(void) { }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
 #ifdef CONFIG_TRACING
-enum ftrace_dump_mode;
-
 #define MAX_TRACER_SIZE100
 extern char ftrace_dump_on_oops[];
 extern int ftrace_dump_on_oops_enabled(void);
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -41,7 +41,6 @@ struct rlimit;
 struct kernel_siginfo;
 struct sembuf;
 struct kern_ipc_perm;
-struct audit_context;
 struct super_block;
 struct inode;
 struct dentry;
@@ -59,8 +58,6 @@ struct xfrm_sec_ctx;
 struct mm_struct;
 struct fs_context;
 struct fs_parameter;
-enum fs_value_type;
-struct watch;
 struct watch_notification;
 struct lsm_ctx;
 
@@ -183,8 +180,6 @@ struct sock;
 struct sockaddr;
 struct socket;
 struct flowi_common;
-struct dst_entry;
-struct xfrm_selector;
 struct xfrm_policy;
 struct xfrm_state;
 struct xfrm_user_sec_ctx;
@@ -219,7 +214,6 @@ extern unsigned long dac_mmap_min_addr;
 #define LSM_PRLIMIT_WRITE 2
 
 /* forward declares to avoid warnings */
-struct sched_param;
 struct request_sock;
 
 /* bprm->unsafe reasons */
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -274,8 +274,6 @@ static inline int valid_signal(unsigned long sig)
return sig <= _NSIG ? 1 : 0;
 }
 
-struct timespec;
-struct pt_regs;
 enum pid_type;
 
 extern int next_signal(struct sigpending *pending, sigset_t *mask);
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -11,8 +11,6 @@
 
 struct __aio_sigset;
 struct epoll_event;
-struct iattr;
-struct inode;
 struct iocb;
 struct io_event;
 struct iovec;
@@ -20,14 +18,12 @@ struct __kernel_old_itimerval;
 struct kexec_segment;
 struct linux_dirent;
 struct linux_dirent64;
-struct list_head;
 struct mmap_arg_struct;
 struct msgbuf;
 struct user_msghdr;
 struct mmsghdr;
 struct msqid_ds;
 struct new_utsname;
-struct nfsctl_arg;
 struct __old_kernel_stat;
 struct oldold_utsname;
 struct old_utsname;
@@ -38,7 +34,6 @@ struct rusage;
 struct sched_param;
 struct sched_attr;
 struct sel_arg_struct;
-struct semaphore;
 struct sembuf;
 struct shmid_ds;
 struct sockaddr;
@@ -48,14 +43,12 @@ struct statfs;
 struct statfs64;
 struct statx;
 struct sysinfo;
-struct timespec;
 struct __kernel_old_timeval;
 struct __kernel_timex;
 struct timezone;
 struct tms;
 struct utimbuf;
 struct mq_attr;
-struct compat_stat;
 struct old_timeval32;
 struct robust_list_head;
 struct futex_waitv;
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -23,9 +23,7 @@
 #include 
 
 struct kobject;
-struct module;
 struct bin_attribute;
-enum kobj_ns_type;
 
 struct attribute {
const char  *name;
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1095,10 +1095,6 @@ unsigned int reclaim_clean_pages_from_list(struct zone 
*zone,
 /* Flags that allow allocations below the min watermark. */
 #define ALLOC_RESERVES 
(ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
 
-enum ttu_flags;
-struct tlbflush_unmap_batch;
-
-
 /*
  * only for MM internal work items which do not depend on
  * any allocations or locks which might depend on allocations
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -261,7 

Re: [PATCH] module: ban '.', '..' as module names, ban '/' in module names

2024-04-15 Thread Alexey Dobriyan
On Sun, Apr 14, 2024 at 01:58:55PM -0700, Luis Chamberlain wrote:
> On Sun, Apr 14, 2024 at 10:05:05PM +0300, Alexey Dobriyan wrote:
> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -3616,4 +3616,12 @@ extern int vfs_fadvise(struct file *file, loff_t 
> > offset, loff_t len,
> >  extern int generic_fadvise(struct file *file, loff_t offset, loff_t len,
> >int advice);
> >  
> > +/*
> > + * Use this if data from userspace end up as directory/filename on
> > + * some virtual filesystem.
> > + */
> > +static inline bool string_is_vfs_ready(const char *s)
> > +{
> > +   return strcmp(s, ".") != 0 && strcmp(s, "..") != 0 && !strchr(s, '/');
> > +}
> >  #endif /* _LINUX_FS_H */
> > --- a/kernel/module/main.c
> > +++ b/kernel/module/main.c
> > @@ -2893,6 +2893,11 @@ static int load_module(struct load_info *info, const 
> > char __user *uargs,
> >  
> > audit_log_kern_module(mod->name);
> >  
> > +   if (!string_is_vfs_ready(mod->name)) {
> > +   err = -EINVAL;
> > +   goto free_module;
> > +   }
> > +
> 
> Sensible change however to put string_is_vfs_ready() in include/linux/fs.h 
> is a stretch if there really are no other users.

This is forward thinking patch :-)

Other subsystems may create files/directories in proc/sysfs, and should
check for bad names as well:

/proc/2821/net/dev_snmp6/eth0

This looks exactly like something coming from userspace and making it
into /proc, so the filter function doesn't belong to kernel/module/internal.h



[PATCH] module: ban '.', '..' as module names, ban '/' in module names

2024-04-14 Thread Alexey Dobriyan
As the title says, ban

.
..

and any name containing '/' as they show in sysfs as directory names:

/sys/module/${mod.name}

sysfs tries to mangle the name and make '/' into '!' which kind of work
but not really.

Corrupting simple module to have name '/est' and loading it works:

# insmod xxx.ko

$ cat /proc/modules
/est 12288 0 - Live 0x (P)

/proc has no problems with it as it ends in data not pathname.

sysfs mangles it to '/sys/module/!test'.

lsmod is confused:

$ lsmod
Module  Size  Used by
libkmod: ERROR ../libkmod/libkmod-module.c:1998 
kmod_module_get_holders: could not open '/sys/module//est/holders': No such 
file or directory
/est  -2  -2

Size and refcount are bogus entirely.

Apparently lsmod doesn't know about sysfs mangling scheme.

Worse, rmmod doesn't work too:

$ sudo rmmod '/est'
rmmod: ERROR: Module /est is not currently loaded

I don't even want to know what it is doing.

Practically there is no nice way for the admin to get rid of the module,
so we should just ban such names. Writing small program to just delete
module by name could possibly work maybe.

Any other subsystem should use nice helper function aptly named

string_is_vfs_ready()

and apply additional restrictions if necessary.

/proc/modules hints that newlines should be banned too,
and \x1f, and whitespace, and similar looking characters 
from different languages and emojis (except obviously).

Signed-off-by: Alexey Dobriyan 
---

 include/linux/fs.h   |8 
 kernel/module/main.c |5 +
 2 files changed, 13 insertions(+)

--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3616,4 +3616,12 @@ extern int vfs_fadvise(struct file *file, loff_t offset, 
loff_t len,
 extern int generic_fadvise(struct file *file, loff_t offset, loff_t len,
   int advice);
 
+/*
+ * Use this if data from userspace end up as directory/filename on
+ * some virtual filesystem.
+ */
+static inline bool string_is_vfs_ready(const char *s)
+{
+   return strcmp(s, ".") != 0 && strcmp(s, "..") != 0 && !strchr(s, '/');
+}
 #endif /* _LINUX_FS_H */
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -2893,6 +2893,11 @@ static int load_module(struct load_info *info, const 
char __user *uargs,
 
audit_log_kern_module(mod->name);
 
+   if (!string_is_vfs_ready(mod->name)) {
+   err = -EINVAL;
+   goto free_module;
+   }
+
/* Reserve our place in the list. */
err = add_unformed_module(mod);
if (err)



[PATCH v3 2/2] uapi: fix header guard in include/uapi/linux/stddef.h

2023-09-12 Thread Alexey Dobriyan
Signed-off-by: Alexey Dobriyan 
---

 include/uapi/linux/stddef.h |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/include/uapi/linux/stddef.h
+++ b/include/uapi/linux/stddef.h
@@ -50,8 +50,9 @@
TYPE NAME[]; \
}
 #endif
-#endif
 
 #ifndef __counted_by
 #define __counted_by(m)
 #endif
+
+#endif


[PATCH v3 1/2] uapi: fix __DECLARE_FLEX_ARRAY for C++

2023-09-12 Thread Alexey Dobriyan
__DECLARE_FLEX_ARRAY(T, member) macro expands to

struct {
struct {} __empty_member;
T member[];
};

which is subtly wrong in C++ because sizeof(struct{}) is 1 not 0,
changing UAPI structures layouts.

This can be fixed by expanding to

T member[];

Now g++ doesn't like "T member[]" either, throwing errors on
the following code:

struct S {
union {
T1 member1[];
T2 member2[];
};
};

or

struct S {
T member[];
};

Use "T member[0];" which seems to work and does the right thing wrt
structure layout.

Signed-off-by: Alexey Dobriyan 
Fixes: 3080ea5553cc ("stddef: Introduce DECLARE_FLEX_ARRAY() helper")
---

 include/uapi/linux/stddef.h |6 ++
 1 file changed, 6 insertions(+)

--- a/include/uapi/linux/stddef.h
+++ b/include/uapi/linux/stddef.h
@@ -29,6 +29,11 @@
struct TAG { MEMBERS } ATTRS NAME; \
}
 
+#ifdef __cplusplus
+/* sizeof(struct{}) is 1 in C++, not 0, can't use C version of the macro. */
+#define __DECLARE_FLEX_ARRAY(T, member)\
+   T member[0]
+#else
 /**
  * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union
  *
@@ -45,6 +50,7 @@
TYPE NAME[]; \
}
 #endif
+#endif
 
 #ifndef __counted_by
 #define __counted_by(m)


Re: [PATCH v2] uapi: fix __DECLARE_FLEX_ARRAY for C++

2023-09-12 Thread Alexey Dobriyan
On Mon, Sep 11, 2023 at 08:19:20AM +, David Laight wrote:
> ...
> > Okay, can you please split the patch so they can be backported
> > separately? Then I'll get them landed, etc.
> 
> Since the header with just the extra #endif is badly broken on C++
> isn't it best to ensure they get back-ported together?
> So one patch is probably better.

Header guard misplacement is not a bug, file ends with:

#ifndef __counted_by
#define __counted_by(m)
#endif

it is just looks confusing in the diff.


Re: [PATCH v2] docs: proc.rst: meminfo: briefly describe gaps in memory accounting

2021-04-20 Thread Alexey Dobriyan
On Tue, Apr 20, 2021 at 03:57:07PM +0200, Michal Hocko wrote:
> On Tue 20-04-21 14:24:30, Matthew Wilcox wrote:
> > On Tue, Apr 20, 2021 at 03:13:54PM +0300, Mike Rapoport wrote:
> > > Add a paragraph that explains that it may happen that the counters in
> > > /proc/meminfo do not add up to the overall memory usage.
> > 
> > ... that is, the sum may be lower because memory is allocated for other
> > purposes that is not reported here, right?
> 
> yes. Many direct page allocator users are not accounted in any of the
> existing counters.

Does virtio_balloon dereserve special mention?

>From inside VM memory borrowing looks like one giant memory leak resulting
in support tickets (not that people who file them read internal kernel
documentation...)


[PATCH v2] kconfig: redo fake deps at include/config/*.h

2021-04-15 Thread Alexey Dobriyan
Make include/config/foo/bar.h fake deps files generation simpler.

* delete .h suffix
those aren't header files, shorten filenames,

* delete tolower()
Linux filesystems can deal with both upper and lowercase
filenames very well,

* put everything in 1 directory
Presumably 'mkdir -p' split is from dark times when filesystems
handled huge directories badly, disks were round adding to
seek times.

x86_64 allmodconfig lists 12364 files in include/config.

../obj/include/config/
├── 104_QUAD_8
├── 60XX_WDT
├── 64BIT
...
├── ZSWAP_DEFAULT_ON
├── ZSWAP_ZPOOL_DEFAULT
└── ZSWAP_ZPOOL_DEFAULT_ZBUD

0 directories, 12364 files

Signed-off-by: Alexey Dobriyan 
---

 include/linux/compiler-version.h |2 +-
 init/Kconfig |2 +-
 kernel/gen_kheaders.sh   |2 +-
 scripts/Makefile.build   |4 ++--
 scripts/basic/fixdep.c   |   39 ---
 scripts/kconfig/confdata.c   |   15 +--
 6 files changed, 14 insertions(+), 50 deletions(-)

--- a/include/linux/compiler-version.h
+++ b/include/linux/compiler-version.h
@@ -9,6 +9,6 @@
  * This header exists to force full rebuild when the compiler is upgraded.
  *
  * When fixdep scans this, it will find this string "CONFIG_CC_VERSION_TEXT"
- * and add dependency on include/config/cc/version/text.h, which is touched
+ * and add dependency on include/config/CC_VERSION_TEXT, which is touched
  * by Kconfig when the version string from the compiler changes.
  */
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -21,7 +21,7 @@ config CC_VERSION_TEXT
 
  - Ensure full rebuild when the compiler is updated
include/linux/compiler-version.h contains this option in the comment
-   line so fixdep adds include/config/cc/version/text.h into the
+   line so fixdep adds include/config/CC_VERSION_TEXT into the
auto-generated dependency. When the compiler is updated, syncconfig
will touch it and then every file will be rebuilt.
 
--- a/kernel/gen_kheaders.sh
+++ b/kernel/gen_kheaders.sh
@@ -36,7 +36,7 @@ all_dirs="$all_dirs $dir_list"
 #
 # When Kconfig regenerates include/generated/autoconf.h, its timestamp is
 # updated, but the contents might be still the same. When any CONFIG option is
-# changed, Kconfig touches the corresponding timestamp file include/config/*.h.
+# changed, Kconfig touches the corresponding timestamp file include/config/*.
 # Hence, the md5sum detects the configuration change anyway. We do not need to
 # check include/generated/autoconf.h explicitly.
 #
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -238,8 +238,8 @@ endif # CONFIG_STACK_VALIDATION
 
 # Rebuild all objects when objtool changes, or is enabled/disabled.
 objtool_dep = $(objtool_obj)   \
- $(wildcard include/config/orc/unwinder.h  \
-include/config/stack/validation.h)
+ $(wildcard include/config/ORC_UNWINDER\
+include/config/STACK_VALIDATION)
 
 ifdef CONFIG_TRIM_UNUSED_KSYMS
 cmd_gen_ksymdeps = \
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c
@@ -34,7 +34,7 @@
  * the config symbols are rebuilt.
  *
  * So if the user changes his CONFIG_HIS_DRIVER option, only the objects
- * which depend on "include/config/his/driver.h" will be rebuilt,
+ * which depend on "include/config/HIS_DRIVER" will be rebuilt,
  * so most likely only his driver ;-)
  *
  * The idea above dates, by the way, back to Michael E Chastain, AFAIK.
@@ -74,7 +74,7 @@
  *
  * and then basically copies the ..d file to stdout, in the
  * process filtering out the dependency on autoconf.h and adding
- * dependencies on include/config/my/option.h for every
+ * dependencies on include/config/MY_OPTION for every
  * CONFIG_MY_OPTION encountered in any of the prerequisites.
  *
  * We don't even try to really parse the header files, but
@@ -124,38 +124,6 @@ static void xprintf(const char *format, ...)
va_end(ap);
 }
 
-static void xputchar(int c)
-{
-   int ret;
-
-   ret = putchar(c);
-   if (ret == EOF) {
-   perror("fixdep");
-   exit(1);
-   }
-}
-
-/*
- * Print out a dependency path from a symbol name
- */
-static void print_dep(const char *m, int slen, const char *dir)
-{
-   int c, prev_c = '/', i;
-
-   xprintf("$(wildcard %s/", dir);
-   for (i = 0; i < slen; i++) {
-   c = m[i];
-   if (c == '_')
-   c = '/';
-   else
-   c = tolower(c);
-   if (c != '/' || prev_c != '/')
-   xputchar(c);
-   prev_c = c;
-   }
-   xprintf(".h) \\\n");
-}
-
 struct ite

[PATCH] kconfig: redo fake deps at include/config/*.h

2021-04-14 Thread Alexey Dobriyan
Make include/config/foo/bar.h fake deps files generation simpler.

* delete .h suffix
those aren't header files, shorten filenames,

* delete tolower()
Linux filesystems can deal with both upper and lowercase
filenames very well,

* put everything in 1 directory
Presumably 'mkdir -p' split is from dark times when filesystems
handled huge directories badly, disks were round adding to
seek times.

x86_64 allmodconfig lists 12364 files in include/config.

../obj/include/config/
├── 104_QUAD_8
├── 60XX_WDT
├── 64BIT
├── 6LOWPAN
├── 6LOWPAN_DEBUGFS
├── 6LOWPAN_GHC_EXT_HDR_DEST
├── 6LOWPAN_GHC_EXT_HDR_FRAG
├── 6LOWPAN_GHC_EXT_HDR_HOP
├── 6LOWPAN_GHC_EXT_HDR_ROUTE

...
├── ZSTD_COMPRESS
├── ZSTD_DECOMPRESS
├── ZSWAP
├── ZSWAP_COMPRESSOR_DEFAULT
├── ZSWAP_COMPRESSOR_DEFAULT_LZO
├── ZSWAP_DEFAULT_ON
├── ZSWAP_ZPOOL_DEFAULT
└── ZSWAP_ZPOOL_DEFAULT_ZBUD

0 directories, 12364 files

Signed-off-by: Alexey Dobriyan 
---

 include/linux/compiler-version.h |2 +-
 init/Kconfig |2 +-
 scripts/Makefile.build   |4 ++--
 scripts/basic/fixdep.c   |   30 +++---
 scripts/kconfig/confdata.c   |   11 +++
 5 files changed, 10 insertions(+), 39 deletions(-)

--- a/include/linux/compiler-version.h
+++ b/include/linux/compiler-version.h
@@ -9,6 +9,6 @@
  * This header exists to force full rebuild when the compiler is upgraded.
  *
  * When fixdep scans this, it will find this string "CONFIG_CC_VERSION_TEXT"
- * and add dependency on include/config/cc/version/text.h, which is touched
+ * and add dependency on include/config/CC_VERSION_TEXT, which is touched
  * by Kconfig when the version string from the compiler changes.
  */
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -21,7 +21,7 @@ config CC_VERSION_TEXT
 
  - Ensure full rebuild when the compiler is updated
include/linux/compiler-version.h contains this option in the comment
-   line so fixdep adds include/config/cc/version/text.h into the
+   line so fixdep adds include/config/CC_VERSION_TEXT into the
auto-generated dependency. When the compiler is updated, syncconfig
will touch it and then every file will be rebuilt.
 
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -238,8 +238,8 @@ endif # CONFIG_STACK_VALIDATION
 
 # Rebuild all objects when objtool changes, or is enabled/disabled.
 objtool_dep = $(objtool_obj)   \
- $(wildcard include/config/orc/unwinder.h  \
-include/config/stack/validation.h)
+ $(wildcard include/config/ORC_UNWINDER\
+include/config/STACK_VALIDATION)
 
 ifdef CONFIG_TRIM_UNUSED_KSYMS
 cmd_gen_ksymdeps = \
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c
@@ -34,7 +34,7 @@
  * the config symbols are rebuilt.
  *
  * So if the user changes his CONFIG_HIS_DRIVER option, only the objects
- * which depend on "include/config/his/driver.h" will be rebuilt,
+ * which depend on "include/config/HIS_DRIVER" will be rebuilt,
  * so most likely only his driver ;-)
  *
  * The idea above dates, by the way, back to Michael E Chastain, AFAIK.
@@ -74,7 +74,7 @@
  *
  * and then basically copies the ..d file to stdout, in the
  * process filtering out the dependency on autoconf.h and adding
- * dependencies on include/config/my/option.h for every
+ * dependencies on include/config/MY_OPTION for every
  * CONFIG_MY_OPTION encountered in any of the prerequisites.
  *
  * We don't even try to really parse the header files, but
@@ -124,36 +124,12 @@ static void xprintf(const char *format, ...)
va_end(ap);
 }
 
-static void xputchar(int c)
-{
-   int ret;
-
-   ret = putchar(c);
-   if (ret == EOF) {
-   perror("fixdep");
-   exit(1);
-   }
-}
-
 /*
  * Print out a dependency path from a symbol name
  */
 static void print_dep(const char *m, int slen, const char *dir)
 {
-   int c, prev_c = '/', i;
-
-   xprintf("$(wildcard %s/", dir);
-   for (i = 0; i < slen; i++) {
-   c = m[i];
-   if (c == '_')
-   c = '/';
-   else
-   c = tolower(c);
-   if (c != '/' || prev_c != '/')
-   xputchar(c);
-   prev_c = c;
-   }
-   xprintf(".h) \\\n");
+   xprintf("$(wildcard %s/%.*s) \\\n", dir, slen, m);
 }
 
 struct item {
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c
@@ -130,19 +130,14 @@ static size_t depfile_prefix_len;
 static int conf_touch_dep(const char *name)
 {
int fd, ret;
-   const 

[PATCH] IDE: support QT Creator

2021-04-12 Thread Alexey Dobriyan
Add little script to generate QT Creator project files, so that parsing
and navigating codebase mostly works.

The important part is to run 'make prepare' before opening project
so that generated headers are picked up and clone coding style
in the editor to match kernel coding style.

Then run

$ ./scripts/ide/qt-creator.sh xxx

from top-level directory

Note:
ibmvnic.c is excluded from project files to workaround libCPlusPlus crash.

Signed-off-by: Alexey Dobriyan 
---

 scripts/ide/qt-creator.sh |   49 ++
 1 file changed, 49 insertions(+)

new file mode 100755
--- /dev/null
+++ b/scripts/ide/qt-creator.sh
@@ -0,0 +1,49 @@
+#!/bin/sh
+# Copyright (c) 2021 Alexey Dobriyan 
+#
+# Permission to use, copy, modify, and distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+# Generate Qt Creator project files.
+set -eo pipefail
+
+test -n "$1" || { echo >&2 "usage: $0 project-name"; exit 1; }
+p="$1"
+
+filename="$p.cflags"
+echo "$filename"
+echo '-std=gnu89' >"$filename"
+
+filename="$p.config"
+echo $filename
+cat <$filename
+#define __KERNEL__
+#include 
+EOF
+
+filename="$p.creator"
+echo $filename
+echo '[General]' >$filename
+
+filename="$p.cxxflags"
+echo $filename
+echo '-std=gnu++17' >$filename
+
+filename="$p.files"
+echo $filename
+# ibmvnic.c: workaround https://bugzilla.redhat.com/show_bug.cgi?id=1886548
+git ls-tree -r HEAD --name-only | LC_ALL=C sort | grep -v -e 'ibmvnic.c' 
>$filename
+
+filename="$p.includes"
+echo $filename
+echo 'include' >$filename
+for i in arch/*/include; do echo $i; done | LC_ALL=C sort >>$filename


[RFC] Tainting tasks after poking at them

2021-04-10 Thread Alexey Dobriyan
I'm not a security guy, but

The idea is to irrevocably mark task as tainted after its registers
and/or memory have been changed by another task.

The list definitely includes
* ptrace PTRACE_POKEUSER, PTRACE_POKETEXT, PTRACE_POKEDATA,
  PTRACE_SETREGS, PTRACE_SETFPREGS.
* process_vm_writev(2)

If an attacker gets an arbitrary code execution in context of task T,
then every task which can be attached to from T is compromised as well
via registers/memory manipulating system calls.

Tainted flag can be examined in kernel coredumps and maybe even help
with post mortem analysis (no idea if it is really true).

Note:
struct mm_struct should be tainted as well (i've noticed right before
sending this email).

---

 arch/x86/kernel/process_64.c |2 ++
 arch/x86/kernel/ptrace.c |7 +++
 arch/x86/kernel/tls.c|2 ++
 fs/proc/base.c   |   10 ++
 include/linux/sched.h|   14 ++
 kernel/ptrace.c  |3 +++
 mm/process_vm_access.c   |4 
 7 files changed, 42 insertions(+)

--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -468,6 +468,7 @@ void x86_fsbase_write_task(struct task_struct *task, 
unsigned long fsbase)
WARN_ON_ONCE(task == current);
 
task->thread.fsbase = fsbase;
+   task_set_tainted(task);
 }
 
 void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase)
@@ -475,6 +476,7 @@ void x86_gsbase_write_task(struct task_struct *task, 
unsigned long gsbase)
WARN_ON_ONCE(task == current);
 
task->thread.gsbase = gsbase;
+   task_set_tainted(task);
 }
 
 static void
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -214,6 +214,8 @@ static int set_segment_reg(struct task_struct *task,
task_user_gs(task) = value;
}
 
+   task_set_tainted(task);
+
return 0;
 }
 
@@ -315,6 +317,8 @@ static int set_segment_reg(struct task_struct *task,
break;
}
 
+   task_set_tainted(task);
+
return 0;
 }
 
@@ -349,6 +353,8 @@ static int set_flags(struct task_struct *task, unsigned 
long value)
 
regs->flags = (regs->flags & ~FLAG_MASK) | (value & FLAG_MASK);
 
+   task_set_tainted(task);
+
return 0;
 }
 
@@ -382,6 +388,7 @@ static int putreg(struct task_struct *child,
}
 
*pt_regs_access(task_pt_regs(child), offset) = value;
+   task_set_tainted(child);
return 0;
 }
 
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -106,6 +106,8 @@ static void set_tls_desc(struct task_struct *p, int idx,
load_TLS(t, cpu);
 
put_cpu();
+
+   task_set_tainted(p);
 }
 
 /*
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3149,6 +3149,14 @@ static int proc_stack_depth(struct seq_file *m, struct 
pid_namespace *ns,
 }
 #endif /* CONFIG_STACKLEAK_METRICS */
 
+static int proc_pid_tainted(struct seq_file *m, struct pid_namespace *_,
+   struct pid *__, struct task_struct *tsk)
+{
+   seq_putc(m, '0' + task_is_tainted(tsk));
+   seq_putc(m, '\n');
+   return 0;
+}
+
 /*
  * Thread groups
  */
@@ -3265,6 +3273,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_SECCOMP_CACHE_DEBUG
ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache),
 #endif
+   ONE("tainted", S_IRUGO, proc_pid_tainted),
 };
 
 static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@@ -3598,6 +3607,7 @@ static const struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_SECCOMP_CACHE_DEBUG
ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache),
 #endif
+   ONE("tainted", S_IRUGO, proc_pid_tainted),
 };
 
 static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -668,6 +668,7 @@ struct task_struct {
/* Per task flags (PF_*), defined further below: */
unsigned intflags;
unsigned intptrace;
+   booltainted;
 
 #ifdef CONFIG_SMP
int on_cpu;
@@ -2026,6 +2027,19 @@ extern long sched_getaffinity(pid_t pid, struct cpumask 
*mask);
 unsigned long sched_cpu_util(int cpu, unsigned long max);
 #endif /* CONFIG_SMP */
 
+static inline bool task_is_tainted(const struct task_struct *tsk)
+{
+   return READ_ONCE(tsk->tainted);
+}
+
+static inline void task_set_tainted(struct task_struct *tsk)
+{
+   /* Self-flagellation is OK. */
+   if (tsk != current) {
+   WRITE_ONCE(tsk->tainted, true);
+   }
+}
+
 #ifdef CONFIG_RSEQ
 
 /*
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -1297,6 +1297,9 @@ int generic_ptrace_pokedata(struct task_struct *tsk, 
unsigned long addr,
 
copied = ptrace_access_vm(tsk, addr, , sizeof(data),
FOLL_FORCE | FOLL_WRITE);
+   if (copied > 0) {
+   task_set_tainted(tsk);
+   }
 

Re: [PATCH] proc: smoke test lseek()

2021-04-07 Thread Alexey Dobriyan
On Wed, Apr 07, 2021 at 08:58:09PM +0100, Matthew Wilcox wrote:
> On Wed, Apr 07, 2021 at 10:55:14PM +0300, Alexey Dobriyan wrote:
> > Now that ->proc_lseek has been made mandatory it would be nice to test
> > that nothing has been forgotten.
> 
> > @@ -45,6 +45,8 @@ static void f_reg(DIR *d, const char *filename)
> > fd = openat(dirfd(d), filename, O_RDONLY|O_NONBLOCK);
> > if (fd == -1)
> > return;
> > +   /* struct proc_ops::proc_lseek is mandatory if file is seekable. */
> > +   (void)lseek(fd, 0, SEEK_SET);
> > rv = read(fd, buf, sizeof(buf));
> > assert((0 <= rv && rv <= sizeof(buf)) || rv == -1);
> > close(fd);
> 
> why throw away the return value?  if it returns an error seeking to
> offset 0, something is terribly wrong.

Some files may use nonseekable_open().
This smoke test doesn't verify that seeking is done correctly anyway.


[PATCH] proc: smoke test lseek()

2021-04-07 Thread Alexey Dobriyan
Now that ->proc_lseek has been made mandatory it would be nice to test
that nothing has been forgotten.

Signed-off-by: Alexey Dobriyan 
---

May want to fold into
proc-mandate-proc_lseek-in-struct-proc_ops.patch

 tools/testing/selftests/proc/read.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

--- a/tools/testing/selftests/proc/read.c
+++ b/tools/testing/selftests/proc/read.c
@@ -14,7 +14,7 @@
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 // Test
-// 1) read of every file in /proc
+// 1) read and lseek on every file in /proc
 // 2) readlink of every symlink in /proc
 // 3) recursively (1) + (2) for every directory in /proc
 // 4) write to /proc/*/clear_refs and /proc/*/task/*/clear_refs
@@ -45,6 +45,8 @@ static void f_reg(DIR *d, const char *filename)
fd = openat(dirfd(d), filename, O_RDONLY|O_NONBLOCK);
if (fd == -1)
return;
+   /* struct proc_ops::proc_lseek is mandatory if file is seekable. */
+   (void)lseek(fd, 0, SEEK_SET);
rv = read(fd, buf, sizeof(buf));
assert((0 <= rv && rv <= sizeof(buf)) || rv == -1);
close(fd);


[PATCH] Dixup sysname wrt recent cultural developments

2021-04-01 Thread Alexey Dobriyan
It is apparent that emoji outbreak can not be contained.
In that case might as well embrace it.

Change main kernel banner, /proc/version and uname(2) output.

Signed-off-by: Alexey Dobriyan 
---

 include/linux/uts.h |2 +-
 init/version.c  |2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

--- a/include/linux/uts.h
+++ b/include/linux/uts.h
@@ -6,7 +6,7 @@
  * Defines for what uname() should return 
  */
 #ifndef UTS_SYSNAME
-#define UTS_SYSNAME "Linux"
+#define UTS_SYSNAME ""
 #endif
 
 #ifndef UTS_NODENAME
--- a/init/version.c
+++ b/init/version.c
@@ -36,7 +36,7 @@ EXPORT_SYMBOL_GPL(init_uts_ns);
 
 /* FIXED STRINGS! Don't touch! */
 const char linux_banner[] =
-   "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@"
+   " version " UTS_RELEASE " (" LINUX_COMPILE_BY "@"
LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n";
 
 const char linux_proc_banner[] =


Re: [PATCH] Document that PF_KTHREAD _is_ ABI

2021-03-31 Thread Alexey Dobriyan
On Mon, Mar 22, 2021 at 07:53:10AM +, Christoph Hellwig wrote:
> On Sat, Mar 20, 2021 at 10:23:12AM -0700, Andy Lutomirski wrote:
> > > https://github.com/systemd/systemd/blob/main/src/basic/process-util.c#L354
> > > src/basic/process-util.c:is_kernel_thread()
> > 
> > Eww.
> > 
> > Could we fix it differently and more permanently by modifying the proc
> > code to display the values systemd expects?
> 
> Yes, do_task_stat needs a mapping from kernel flags to UABI flags.  And
> we should already discard everything we think we can from the UABI
> now, and only add the ones back that are required to not break
> userspace.

Sure we do. Who is going to find all the flags? I found PF_KTHREAD. :^)

More seriously,

/proc/$pid/stat was expanded to include tsk->flags in 0.99.1 (!!!)

Developers kept adding and shuffling flags probably not even realising
what's going on. The last incident happened at 5.10 when PF_IO_WORKER
was exchanged with PF_VCPU for smaller codegen.

Can I get PF_KTHREAD wasrning because it is _definitely_ used?

I'll post CSV file from 2.6.11 days:

version,#x0001,#x0002,#x0004,#x0008,#x0010,#x0020,#x0040,#x0080,#x0100,#x0200,#x0400,#x0800,#x1000,#x2000,#x4000,#x8000,#x0001,#x0002,#x0004,#x0008,#x0010,#x0020,#x0040,#x0080,#x0100,#x0200,#x0400,#x0800,#x1000,#x2000,#x4000,#x8000
v2.6.11,PF_ALIGNWARN,PF_STARTING,PF_EXITING,PF_DEAD,,,PF_FORKNOEXEC,,PF_SUPERPRIV,PF_DUMPCORE,PF_SIGNALED,PF_MEMALLOC,PF_FLUSHER,PF_USED_MATH,PF_FREEZE,PF_NOFREEZE,PF_FROZEN,PF_FSTRANS,PF_KSWAPD,PF_SWAPOFF,PF_LESS_THROTTLE,PF_SYNCWRITE,PF_BORROWED_MM,
v2.6.12,PF_ALIGNWARN,PF_STARTING,PF_EXITING,PF_DEAD,,,PF_FORKNOEXEC,,PF_SUPERPRIV,PF_DUMPCORE,PF_SIGNALED,PF_MEMALLOC,PF_FLUSHER,PF_USED_MATH,PF_FREEZE,PF_NOFREEZE,PF_FROZEN,PF_FSTRANS,PF_KSWAPD,PF_SWAPOFF,PF_LESS_THROTTLE,PF_SYNCWRITE,PF_BORROWED_MM,PF_RANDOMIZE
v2.6.13,PF_ALIGNWARN,PF_STARTING,PF_EXITING,PF_DEAD,,,PF_FORKNOEXEC,,PF_SUPERPRIV,PF_DUMPCORE,PF_SIGNALED,PF_MEMALLOC,PF_FLUSHER,PF_USED_MATH,PF_FREEZE,PF_NOFREEZE,PF_FROZEN,PF_FSTRANS,PF_KSWAPD,PF_SWAPOFF,PF_LESS_THROTTLE,PF_SYNCWRITE,PF_BORROWED_MM,PF_RANDOMIZE
v2.6.14,PF_ALIGNWARN,PF_STARTING,PF_EXITING,PF_DEAD,,,PF_FORKNOEXEC,,PF_SUPERPRIV,PF_DUMPCORE,PF_SIGNALED,PF_MEMALLOC,PF_FLUSHER,PF_USED_MATH,PF_FREEZE,PF_NOFREEZE,PF_FROZEN,PF_FSTRANS,PF_KSWAPD,PF_SWAPOFF,PF_LESS_THROTTLE,PF_SYNCWRITE,PF_BORROWED_MM,PF_RANDOMIZE
v2.6.15,PF_ALIGNWARN,PF_STARTING,PF_EXITING,PF_DEAD,,,PF_FORKNOEXEC,,PF_SUPERPRIV,PF_DUMPCORE,PF_SIGNALED,PF_MEMALLOC,PF_FLUSHER,PF_USED_MATH,PF_FREEZE,PF_NOFREEZE,PF_FROZEN,PF_FSTRANS,PF_KSWAPD,PF_SWAPOFF,PF_LESS_THROTTLE,PF_SYNCWRITE,PF_BORROWED_MM,PF_RANDOMIZE
v2.6.16,PF_ALIGNWARN,PF_STARTING,PF_EXITING,PF_DEAD,,,PF_FORKNOEXEC,,PF_SUPERPRIV,PF_DUMPCORE,PF_SIGNALED,PF_MEMALLOC,PF_FLUSHER,PF_USED_MATH,PF_FREEZE,PF_NOFREEZE,PF_FROZEN,PF_FSTRANS,PF_KSWAPD,PF_SWAPOFF,PF_LESS_THROTTLE,PF_SYNCWRITE,PF_BORROWED_MM,PF_RANDOMIZE,PF_SWAPWRITE,,,
v2.6.17,PF_ALIGNWARN,PF_STARTING,PF_EXITING,PF_DEAD,,,PF_FORKNOEXEC,,PF_SUPERPRIV,PF_DUMPCORE,PF_SIGNALED,PF_MEMALLOC,PF_FLUSHER,PF_USED_MATH,PF_FREEZE,PF_NOFREEZE,PF_FROZEN,PF_FSTRANS,PF_KSWAPD,PF_SWAPOFF,PF_LESS_THROTTLE,PF_SYNCWRITE,PF_BORROWED_MM,PF_RANDOMIZE,PF_SWAPWRITE,,PF_SPREAD_PAGE,PF_SPREAD_SLAB,PF_MEMPOLICY,,,
v2.6.18,PF_ALIGNWARN,PF_STARTING,PF_EXITING,PF_DEAD,,,PF_FORKNOEXEC,,PF_SUPERPRIV,PF_DUMPCORE,PF_SIGNALED,PF_MEMALLOC,PF_FLUSHER,PF_USED_MATH,PF_FREEZE,PF_NOFREEZE,PF_FROZEN,PF_FSTRANS,PF_KSWAPD,PF_SWAPOFF,PF_LESS_THROTTLE,PF_BORROWED_MM,PF_RANDOMIZE,PF_SWAPWRITE,PF_SPREAD_PAGE,PF_SPREAD_SLAB,,,PF_MEMPOLICY,PF_MUTEX_TESTER,,
v2.6.19,PF_ALIGNWARN,PF_STARTING,PF_EXITINGPF_FORKNOEXEC,,PF_SUPERPRIV,PF_DUMPCORE,PF_SIGNALED,PF_MEMALLOC,PF_FLUSHER,PF_USED_MATH,PF_FREEZE,PF_NOFREEZE,PF_FROZEN,PF_FSTRANS,PF_KSWAPD,PF_SWAPOFF,PF_LESS_THROTTLE,PF_BORROWED_MM,PF_RANDOMIZE,PF_SWAPWRITE,PF_SPREAD_PAGE,PF_SPREAD_SLAB,,,PF_MEMPOLICY,PF_MUTEX_TESTER,,
v2.6.20,PF_ALIGNWARN,PF_STARTING,PF_EXITINGPF_FORKNOEXEC,,PF_SUPERPRIV,PF_DUMPCORE,PF_SIGNALED,PF_MEMALLOC,PF_FLUSHER,PF_USED_MATH,,PF_NOFREEZE,PF_FROZEN,PF_FSTRANS,PF_KSWAPD,PF_SWAPOFF,PF_LESS_THROTTLE,PF_BORROWED_MM,PF_RANDOMIZE,PF_SWAPWRITE,PF_SPREAD_PAGE,PF_SPREAD_SLAB,,,PF_MEMPOLICY,PF_MUTEX_TESTER,,
v2.6.21,PF_ALIGNWARN,PF_STARTING,PF_EXITINGPF_FORKNOEXEC,,PF_SUPERPRIV,PF_DUMPCORE,PF_SIGNALED,PF_MEMALLOC,PF_FLUSHER,PF_USED_MATH,,PF_NOFREEZE,PF_FROZEN,PF_FSTRANS,PF_KSWAPD,PF_SWAPOFF,PF_LESS_THROTTLE,PF_BORROWED_MM,PF_RANDOMIZE,PF_SWAPWRITE,PF_SPREAD_PAGE,PF_SPREAD_SLAB,,,PF_MEMPOLICY,PF_MUTEX_TESTER,,

Re: [PATCH] Document that PF_KTHREAD _is_ ABI

2021-03-20 Thread Alexey Dobriyan
On Sat, Mar 20, 2021 at 10:23:12AM -0700, Andy Lutomirski wrote:
> > On Mar 20, 2021, at 9:31 AM, Alexey Dobriyan  wrote:
> >
> > PF_KTHREAD value is visible via field number 9 of /proc/*/stat
> >
> >$ sudo cat /proc/2/stat
> >2 (kthreadd) S 0 0 0 0 -1 2129984 0 ...
> >  ^^^
> >
> > It is used by at least systemd to check for kernel-threadness:
> > https://github.com/systemd/systemd/blob/main/src/basic/process-util.c#L354
> > src/basic/process-util.c:is_kernel_thread()
> 
> Eww.
> 
> Could we fix it differently and more permanently by modifying the proc
> code to display the values systemd expects?

Right now there is no need to fix anything because 4 bits are available.
I put a comment so that PF_KTHREAD won't be moved accidently definitely
breaking systemd.

> > It means that the value can't be changed despite perceived notion that
> > task_struct flags are internal to kernel and can be shuffled at whim.
> >
> > Formally, _all_ struct task_struct::flags PF_* values are kernel ABI
> > which is a disaster.
> >
> > I hope we can mask everything but few flags and hope for the best :^)

> > +/*
> > + * PF_KTHREAD is part of kernel ABI, visible via value #9 in 
> > /proc/$pid/stat
> > + */
> > #define PF_KTHREAD0x0020/* I am a kernel thread */

I think everything should be masked except PF_KTHREAD and maybe few
flags for which known users exist and then don't touch them.

Some flags are clearly internal like PF_MEMALLOC and PF_IDLE.

Some aren't -- PF_FORKNOEXEC. However it is silly for userspace to query it
because programs knows if it forked but didn't exec without external help.


[PATCH] Document that PF_KTHREAD _is_ ABI

2021-03-20 Thread Alexey Dobriyan
PF_KTHREAD value is visible via field number 9 of /proc/*/stat

$ sudo cat /proc/2/stat
2 (kthreadd) S 0 0 0 0 -1 2129984 0 ...
  ^^^

It is used by at least systemd to check for kernel-threadness:
https://github.com/systemd/systemd/blob/main/src/basic/process-util.c#L354
src/basic/process-util.c:is_kernel_thread()

It means that the value can't be changed despite perceived notion that
task_struct flags are internal to kernel and can be shuffled at whim.

Formally, _all_ struct task_struct::flags PF_* values are kernel ABI
which is a disaster.

I hope we can mask everything but few flags and hope for the best :^)

Note for beginner Linux programmers:
every other way you find on the interwebs and Stack Overflow
for checking if pid belongs to a kernel thread is broken one way or
another.

Signed-off-by: Alexey Dobriyan 
---

 include/linux/sched.h |3 +++
 1 file changed, 3 insertions(+)

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1566,6 +1566,9 @@ extern struct pid *cad_pid;
 #define PF_MEMALLOC_NOIO   0x0008  /* All allocation requests will 
inherit GFP_NOIO */
 #define PF_LOCAL_THROTTLE  0x0010  /* Throttle writes only against 
the bdi I write to,
 * I am cleaning dirty pages 
from some other bdi. */
+/*
+ * PF_KTHREAD is part of kernel ABI, visible via value #9 in /proc/$pid/stat
+ */
 #define PF_KTHREAD 0x0020  /* I am a kernel thread */
 #define PF_RANDOMIZE   0x0040  /* Randomize virtual address 
space */
 #define PF_SWAPWRITE   0x0080  /* Allowed to write to swap */


[PATCH] proc: test subset=pid

2021-03-20 Thread Alexey Dobriyan
Test that /proc instance mounted with

mount -t proc -o subset=pid

contains only ".", "..", "self", "thread-self" and pid directories.

Note:
Currently "subset=pid" doesn't return "." and ".." via readdir.
This must be a bug.

Signed-off-by: Alexey Dobriyan 
---

 tools/testing/selftests/proc/Makefile  |1 
 tools/testing/selftests/proc/proc-subset-pid.c |  121 +
 2 files changed, 122 insertions(+)

--- a/tools/testing/selftests/proc/Makefile
+++ b/tools/testing/selftests/proc/Makefile
@@ -12,6 +12,7 @@ TEST_GEN_PROGS += proc-self-map-files-001
 TEST_GEN_PROGS += proc-self-map-files-002
 TEST_GEN_PROGS += proc-self-syscall
 TEST_GEN_PROGS += proc-self-wchan
+TEST_GEN_PROGS += proc-subset-pid
 TEST_GEN_PROGS += proc-uptime-001
 TEST_GEN_PROGS += proc-uptime-002
 TEST_GEN_PROGS += read
new file mode 100644
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-subset-pid.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2021 Alexey Dobriyan 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ * Test that "mount -t proc -o subset=pid" hides everything but pids,
+ * /proc/self and /proc/thread-self.
+ */
+#undef NDEBUG
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+static inline bool streq(const char *a, const char *b)
+{
+   return strcmp(a, b) == 0;
+}
+
+static void make_private_proc(void)
+{
+   if (unshare(CLONE_NEWNS) == -1) {
+   if (errno == ENOSYS || errno == EPERM) {
+   exit(4);
+   }
+   exit(1);
+   }
+   if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) {
+   exit(1);
+   }
+   if (mount(NULL, "/proc", "proc", 0, "subset=pid") == -1) {
+   exit(1);
+   }
+}
+
+static bool string_is_pid(const char *s)
+{
+   while (1) {
+   switch (*s++) {
+   case '0':case '1':case '2':case '3':case '4':
+   case '5':case '6':case '7':case '8':case '9':
+   continue;
+
+   case '\0':
+   return true;
+
+   default:
+   return false;
+   }
+   }
+}
+
+int main(void)
+{
+   make_private_proc();
+
+   DIR *d = opendir("/proc");
+   assert(d);
+
+   struct dirent *de;
+
+   bool dot = false;
+   bool dot_dot = false;
+   bool self = false;
+   bool thread_self = false;
+
+   while ((de = readdir(d))) {
+   if (streq(de->d_name, ".")) {
+   assert(!dot);
+   dot = true;
+   assert(de->d_type == DT_DIR);
+   } else if (streq(de->d_name, "..")) {
+   assert(!dot_dot);
+   dot_dot = true;
+   assert(de->d_type == DT_DIR);
+   } else if (streq(de->d_name, "self")) {
+   assert(!self);
+   self = true;
+   assert(de->d_type == DT_LNK);
+   } else if (streq(de->d_name, "thread-self")) {
+   assert(!thread_self);
+   thread_self = true;
+   assert(de->d_type == DT_LNK);
+   } else {
+   if (!string_is_pid(de->d_name)) {
+   fprintf(stderr, "d_name '%s'\n", de->d_name);
+   assert(0);
+   }
+   assert(de->d_type == DT_DIR);
+   }
+   }
+
+   char c;
+   int rv = readlink("/proc/cpuinfo", , 1);
+   assert(rv == -1 && errno == ENOENT);
+
+   int fd = open("/proc/cpuinfo", O_RDONLY);
+   assert(fd == -1 && errno == ENOENT);
+
+   return 0;
+}


[PATCH] proc: delete redundant subset=pid check

2021-03-20 Thread Alexey Dobriyan
Two checks in lookup and readdir code should be enough to not have
third check in open code.

Can't open what can't be looked up?

Signed-off-by: Alexey Dobriyan 
---

 fs/proc/inode.c |4 
 1 file changed, 4 deletions(-)

--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -483,7 +483,6 @@ proc_reg_get_unmapped_area(struct file *file, unsigned long 
orig_addr,
 
 static int proc_reg_open(struct inode *inode, struct file *file)
 {
-   struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
struct proc_dir_entry *pde = PDE(inode);
int rv = 0;
typeof_member(struct proc_ops, proc_open) open;
@@ -497,9 +496,6 @@ static int proc_reg_open(struct inode *inode, struct file 
*file)
return rv;
}
 
-   if (fs_info->pidonly == PROC_PIDONLY_ON)
-   return -ENOENT;
-
/*
 * Ensure that
 * 1) PDE's ->release hook will be called no matter what


[PATCH] proc: mandate ->proc_lseek in "struct proc_ops"

2021-03-20 Thread Alexey Dobriyan
Now that proc_ops are separate from file_operations and other
operations it easy to check all instances to have ->proc_lseek
hook and remove check in main code.

Note:
nonseekable_open() files naturally don't require ->proc_lseek.

Garbage collect pde_lseek() function.

Signed-off-by: Alexey Dobriyan 
---

 drivers/isdn/capi/kcapi_proc.c |1 +
 drivers/net/wireless/intersil/hostap/hostap_proc.c |1 +
 drivers/scsi/esas2r/esas2r_main.c  |1 +
 fs/proc/inode.c|   14 ++
 include/linux/proc_fs.h|1 +
 5 files changed, 6 insertions(+), 12 deletions(-)

--- a/drivers/isdn/capi/kcapi_proc.c
+++ b/drivers/isdn/capi/kcapi_proc.c
@@ -201,6 +201,7 @@ static ssize_t empty_read(struct file *file, char __user 
*buf,
 
 static const struct proc_ops empty_proc_ops = {
.proc_read  = empty_read,
+   .proc_lseek = default_llseek,
 };
 
 // ---
--- a/drivers/net/wireless/intersil/hostap/hostap_proc.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_proc.c
@@ -227,6 +227,7 @@ static ssize_t prism2_aux_dump_proc_no_read(struct file 
*file, char __user *buf,
 
 static const struct proc_ops prism2_aux_dump_proc_ops = {
.proc_read  = prism2_aux_dump_proc_no_read,
+   .proc_lseek = default_llseek,
 };
 
 
--- a/drivers/scsi/esas2r/esas2r_main.c
+++ b/drivers/scsi/esas2r/esas2r_main.c
@@ -617,6 +617,7 @@ static const struct file_operations esas2r_proc_fops = {
 };
 
 static const struct proc_ops esas2r_proc_ops = {
+   .proc_lseek = default_llseek,
.proc_ioctl = esas2r_proc_ioctl,
 #ifdef CONFIG_COMPAT
.proc_compat_ioctl  = compat_ptr_ioctl,
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -273,25 +273,15 @@ void proc_entry_rundown(struct proc_dir_entry *de)
spin_unlock(>pde_unload_lock);
 }
 
-static loff_t pde_lseek(struct proc_dir_entry *pde, struct file *file, loff_t 
offset, int whence)
-{
-   typeof_member(struct proc_ops, proc_lseek) lseek;
-
-   lseek = pde->proc_ops->proc_lseek;
-   if (!lseek)
-   lseek = default_llseek;
-   return lseek(file, offset, whence);
-}
-
 static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
 {
struct proc_dir_entry *pde = PDE(file_inode(file));
loff_t rv = -EINVAL;
 
if (pde_is_permanent(pde)) {
-   return pde_lseek(pde, file, offset, whence);
+   return pde->proc_ops->proc_lseek(file, offset, whence);
} else if (use_pde(pde)) {
-   rv = pde_lseek(pde, file, offset, whence);
+   rv = pde->proc_ops->proc_lseek(file, offset, whence);
unuse_pde(pde);
}
return rv;
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -32,6 +32,7 @@ struct proc_ops {
ssize_t (*proc_read)(struct file *, char __user *, size_t, loff_t *);
ssize_t (*proc_read_iter)(struct kiocb *, struct iov_iter *);
ssize_t (*proc_write)(struct file *, const char __user *, size_t, 
loff_t *);
+   /* mandatory unless nonseekable_open() or equivalent is used */
loff_t  (*proc_lseek)(struct file *, loff_t, int);
int (*proc_release)(struct inode *, struct file *);
__poll_t (*proc_poll)(struct file *, struct poll_table_struct *);


[PATCH] proc: save LOC in __xlate_proc_name()

2021-03-20 Thread Alexey Dobriyan
Can't look at this verbosity anymore.

Signed-off-by: Alexey Dobriyan 
---

 fs/proc/generic.c |   11 ++-
 1 file changed, 2 insertions(+), 9 deletions(-)

--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -166,15 +166,8 @@ static int __xlate_proc_name(const char *name, struct 
proc_dir_entry **ret,
const char  *cp = name, *next;
struct proc_dir_entry   *de;
 
-   de = *ret;
-   if (!de)
-   de = _root;
-
-   while (1) {
-   next = strchr(cp, '/');
-   if (!next)
-   break;
-
+   de = *ret ?: _root;
+   while ((next = strchr(cp, '/'))) {
de = pde_subdir_find(de, cp, next - cp);
if (!de) {
WARN(1, "name '%s'\n", name);


Re: auxv stuff (Re: [PATCH] prctl: fix PR_SET_MM_AUXV kernel stack leak)

2021-03-16 Thread Alexey Dobriyan
On Mon, Mar 15, 2021 at 09:42:47AM +0300, Cyrill Gorcunov wrote:
> On Mon, Mar 15, 2021 at 09:00:00AM +0300, Alexey Dobriyan wrote:
> > On Sun, Mar 14, 2021 at 02:40:05PM -0700, Linus Torvalds wrote:
> > > [mm->saved_auxv]
> > > 
> > > That's a separate issue, and I can't find it in myself to care (and
> > > nobody has ever complained), but I thought I'd mention it.
> > 
> > There is another (non-security) one. Compat 32-bit process will report
> > 2 longs too many:
> 
> Good catch! Alexey, should I address it? Or you have fixed it already?

I didn't and I don't know how frankly.
Something I've noticed during more important auxv rewrite.


Re: [PATCH] prctl: fix PR_SET_MM_AUXV kernel stack leak

2021-03-15 Thread Alexey Dobriyan
On Mon, Mar 15, 2021 at 01:29:02PM +0300, Dan Carpenter wrote:
> On Sun, Mar 14, 2021 at 11:51:14PM +0300, Alexey Dobriyan wrote:
> > prctl(PR_SET_MM, PR_SET_MM_AUXV, addr, 1);
> > 
> > will copy 1 byte from userspace to (quite big) on-stack array
> > and then stash everything to mm->saved_auxv.
> 
> What?  It won't save everything, only the 1 byte.  What am I not seeing?

It does copy 1 byte. How embarassing of me.

I was confused by another way of setting auxv data:

if (prctl_map.auxv_size)
memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv));

This does full array copy but the array is fully initialised so there is
no problem.

Stop the presses!

> kernel/sys.c
>   2073  static int prctl_set_auxv(struct mm_struct *mm, unsigned long addr,
>   2074unsigned long len)
>   2075  {
>   2076  /*
>   2077   * This doesn't move the auxiliary vector itself since it's 
> pinned to
>   2078   * mm_struct, but it permits filling the vector with new 
> values.  It's
>   2079   * up to the caller to provide sane values here, otherwise 
> userspace
>   2080   * tools which use this vector might be unhappy.
>   2081   */
>   2082  unsigned long user_auxv[AT_VECTOR_SIZE] = {};
>   2083  
>   2084  if (len > sizeof(user_auxv))
>   2085  return -EINVAL;
>   2086  
>   2087  if (copy_from_user(user_auxv, (const void __user *)addr, len))
>^ ^^^
> Copies one byte from user space.
> 
>   2088  return -EFAULT;
>   2089  
>   2090  /* Make sure the last entry is always AT_NULL */
>   2091  user_auxv[AT_VECTOR_SIZE - 2] = 0;
>   2092  user_auxv[AT_VECTOR_SIZE - 1] = 0;
>   2093  
>   2094  BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
>   2095  
>   2096  task_lock(current);
>   2097  memcpy(mm->saved_auxv, user_auxv, len);
> ^^
> Saves one byte to mm->saved_auxv.
> 
>   2098  task_unlock(current);
>   2099  
>   2100  return 0;
>   2101  }
> 
> regards,
> dan carpenter
> 


auxv stuff (Re: [PATCH] prctl: fix PR_SET_MM_AUXV kernel stack leak)

2021-03-15 Thread Alexey Dobriyan
On Sun, Mar 14, 2021 at 02:40:05PM -0700, Linus Torvalds wrote:
> [mm->saved_auxv]
> 
> That's a separate issue, and I can't find it in myself to care (and
> nobody has ever complained), but I thought I'd mention it.

There is another (non-security) one. Compat 32-bit process will report
2 longs too many:

  20 00 00 00 40 85 f5 f7  21 00 00 00 00 80 f5 f7  | ...@...!...|
0010  10 00 00 00 ff fb 8b 0f  06 00 00 00 00 10 00 00  ||
0020  11 00 00 00 64 00 00 00  03 00 00 00 34 80 04 08  |d...4...|
0030  04 00 00 00 20 00 00 00  05 00 00 00 08 00 00 00  | ...|
0040  07 00 00 00 00 90 f5 f7  08 00 00 00 00 00 00 00  ||
0050  09 00 00 00 25 83 04 08  0b 00 00 00 00 00 00 00  |%...|
0060  0c 00 00 00 00 00 00 00  0d 00 00 00 00 00 00 00  ||
0070  0e 00 00 00 00 00 00 00  17 00 00 00 00 00 00 00  ||
0080  19 00 00 00 8b 27 99 ff  1a 00 00 00 02 00 00 00  |.'..|
0090  1f 00 00 00 f0 2f 99 ff  0f 00 00 00 9b 27 99 ff  |./...'..|
00a0  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  ||
  AT_NULL AT_NULL  ^^^
00b0


[PATCH] prctl: fix PR_SET_MM_AUXV kernel stack leak

2021-03-14 Thread Alexey Dobriyan
prctl(PR_SET_MM, PR_SET_MM_AUXV, addr, 1);

will copy 1 byte from userspace to (quite big) on-stack array
and then stash everything to mm->saved_auxv.
AT_NULL terminator will be inserted at the very end.

/proc/*/auxv handler will find that AT_NULL terminator
and copy original stack contents to userspace.

This devious scheme requires CAP_SYS_RESOURCE.

Signed-off-by: Alexey Dobriyan 
---

apply to >=3.5

 kernel/sys.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2079,7 +2079,7 @@ static int prctl_set_auxv(struct mm_struct *mm, unsigned 
long addr,
 * up to the caller to provide sane values here, otherwise userspace
 * tools which use this vector might be unhappy.
 */
-   unsigned long user_auxv[AT_VECTOR_SIZE];
+   unsigned long user_auxv[AT_VECTOR_SIZE] = {};
 
if (len > sizeof(user_auxv))
return -EINVAL;


[PATCH] cred: make init_groups static

2021-03-14 Thread Alexey Dobriyan
Nothing really uses init_groups. It can be accessed via init_cred
if necessary.

Signed-off-by: Alexey Dobriyan 
---

 include/linux/cred.h  |1 -
 include/linux/init_task.h |1 -
 kernel/cred.c |2 +-
 3 files changed, 1 insertion(+), 3 deletions(-)

--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -53,7 +53,6 @@ do {  \
groups_free(group_info);\
 } while (0)
 
-extern struct group_info init_groups;
 #ifdef CONFIG_MULTIUSER
 extern struct group_info *groups_alloc(int);
 extern void groups_free(struct group_info *);
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -25,7 +25,6 @@
 extern struct files_struct init_files;
 extern struct fs_struct init_fs;
 extern struct nsproxy init_nsproxy;
-extern struct group_info init_groups;
 extern struct cred init_cred;
 
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -33,7 +33,7 @@ do {  
\
 static struct kmem_cache *cred_jar;
 
 /* init to 2 - one for init_task, one to ensure it is never freed */
-struct group_info init_groups = { .usage = ATOMIC_INIT(2) };
+static struct group_info init_groups = { .usage = ATOMIC_INIT(2) };
 
 /*
  * The initial credentials for the initial task


Re: [PATCH] fs: proc: fix error return code of proc_map_files_readdir()

2021-03-09 Thread Alexey Dobriyan
On Tue, Mar 09, 2021 at 10:30:23AM -0800, Eric Biggers wrote:
> On Tue, Mar 09, 2021 at 01:55:27AM -0800, Jia-Ju Bai wrote:
> > When get_task_mm() returns NULL to mm, no error return code of
> > proc_map_files_readdir() is assigned.
> > To fix this bug, ret is assigned with -ENOENT in this case.

> > --- a/fs/proc/base.c
> > +++ b/fs/proc/base.c
> > @@ -2332,8 +2332,10 @@ proc_map_files_readdir(struct file *file, struct 
> > dir_context *ctx)
> > goto out_put_task;
> >  
> > mm = get_task_mm(task);
> > -   if (!mm)
> > +   if (!mm) {
> > +   ret = -ENOENT;
> > goto out_put_task;
> > +   }
> >  
> > ret = mmap_read_lock_killable(mm);
> 
> Is there something in particular that makes you think that returning ENOENT is
> the correct behavior in this case?  Try 'ls /proc/$pid/map_files' where pid 
> is a
> kernel thread; it's an empty directory, which is probably intentional.  Your
> patch would change reading the directory to fail with ENOENT.

Yes. 0 from readdir means "no more stuff", not an error.


Re: [PATCH 02/11] pragma once: convert arch/arm/tools/gen-mach-types

2021-03-02 Thread Alexey Dobriyan
On Mon, Mar 01, 2021 at 10:19:50AM +, Russell King - ARM Linux admin wrote:
> On Sun, Feb 28, 2021 at 07:59:16PM +0300, Alexey Dobriyan wrote:
> > From 72842f89ae91a4d02ea29604f87c373052bd3f64 Mon Sep 17 00:00:00 2001
> > From: Alexey Dobriyan 
> > Date: Tue, 9 Feb 2021 14:37:40 +0300
> > Subject: [PATCH 02/11] pragma once: convert arch/arm/tools/gen-mach-types
> > 
> > Generate arch/arm/include/generated/asm/mach-types.h without include
> > guard.
> 
> The fundamental question of "why" is missing from this commit message.
> Are we making this change to all kernel headers?

Apparently, no. Linus doesn't like it.


Re: [PATCH 00/11] pragma once: treewide conversion

2021-02-28 Thread Alexey Dobriyan
On Sun, Feb 28, 2021 at 09:46:17AM -0800, Linus Torvalds wrote:
> On Sun, Feb 28, 2021 at 8:57 AM Alexey Dobriyan  wrote:
> >
> > This is bulk deletion of preprocessor include guards and conversion
> > to #pragma once directive.
> 
> So as mentioned earlier, I'm not 100% convinced about the advantage of
> #pragma once.
> 
> But I decided to actually test it, and it turns out that it causes
> problems for at least sparse.

Oh no.

> Sparse *does* support pragma once, but it does it purely based on
> pathname equality.

Doing what gcc or clang does seems like a smart thing to do.

> So a simple test-program like this:
> 
>  File 'pragma.h':
> 
> #pragma once
> #include "header.h"
> 
> works fine. But this doesn't work at all:
> 
> #pragma once
> #include "./header.h"
> 
> because it causes the filename to be different every time, and you
> eventually end up with trying to open   "././../pragma.h" and it
> causes ENAMETOOLONG.
> 
> So at least sparse isn't ready for this.
> 
> I guess sparse could always simplify the name, but that's non-trivial.
> 
> And honestly, using st_dev/st_ino is problematic too, since
> 
>  (a) they can easily be re-used for generated files
> 
>  (b) you'd have to actually open/fstat the filename to use it, which
> obviates one of the optimizations

fstat is more or less necessary anyway to allocate just enough memory
for 1 read. fstat is not a problem, read is (and subsequent parsing).

> Trying the same on gcc, you don't get that endless "add "./" behavior"
> that sparse did, but a quick test shows that it actually opens the
> file and reads it three times: once for "pramga.h", once for
> "./pragma.h" and a third time for "pragma.h". It only seems to
> _expand_ it once, though.
> 
> I have no idea what gcc does. Maybe it does some "different name, so
> let's open and read it, and then does st_dev/st_ino again". But if so,
> why the _third_ time? Is it some guard against "st_ino might have been
> re-used, so I'll open the original name and re-verify"?
> 
> End result: #pragma is fundamentally less reliable than the
> traditional #ifdef guard. The #ifdef guard works fine even if you
> re-read the file for whatever reason, while #pragma relies on some
> kind of magical behavior.
> 
> I'm adding Luc in case he has any ideas of what the magical behavior might be.

gcc does

open "/" + "whatever between quotes"
fstat

so that "1.h" and "./1.h" differ

https://github.com/gcc-mirror/gcc/blob/master/libcpp/files.c#L377

clang does better:

"./" + "whatever between quotes"
open
fstat
normalise pathname via readlink /proc/*/fd

I think it is quite hard to break something with double inclusion
without trying to actually break stuff. Macros has to be token
for token identical or compiler warn. Types definition too.
Function prototypes and so on.

This is how I found half of the exception list.

The "no leading ./ in includes is trivially enforced with checkpatch.pl
or even grep! And it will optimise the build now that gcc behaviour has
been uncovered.

Include guards aren't without problems.

We have at least 1 identical include guard in the tree for two
completely unrelated headers (allmodconfig of some fringe arch found it)
Nobody complains because only defconfigs are run apparently.

Developer can typo it disabling double inclusion.

#ifndef FOO_H
#define FOO_h
#endif

I've seen a reference to a static checker checking for such stuff
so this problem exists.

Invisibly broken inlcude guards (see qla2xxx patch in the series).


Re: [PATCH 09/11] pragma once: convert scripts/selinux/genheaders/genheaders.c

2021-02-28 Thread Alexey Dobriyan
On Sun, Feb 28, 2021 at 01:37:41PM -0500, Paul Moore wrote:
> On Sun, Feb 28, 2021 at 12:04 PM Alexey Dobriyan  wrote:
> >
> > From 097f2c8b2af7d9e88cff59376ea0ad51b95341cb Mon Sep 17 00:00:00 2001
> > From: Alexey Dobriyan 
> > Date: Tue, 9 Feb 2021 00:39:23 +0300
> > Subject: [PATCH 09/11] pragma once: convert 
> > scripts/selinux/genheaders/genheaders.c
> >
> > Generate security/selinux/flask.h and security/selinux/av_permissions.h
> > without include guards.
> >
> > Signed-off-by: Alexey Dobriyan 
> > ---
> >  scripts/selinux/genheaders/genheaders.c | 6 ++
> >  1 file changed, 2 insertions(+), 4 deletions(-)
> 
> My LKML subscription must have died at some point due to mail bounces,
> or maybe I dopped it (?), because I'm not seeing the rest of this
> patchset for context.
> 
> However, unless the rest of the kernel transitions to this, or there
> is some other big win that I'm missing, I don't see much of a reason
> for this; can you provide some compelling reason for why we should
> make this change?  A quick search on "#pragma once" seems to indicate
> it is non-standard, so why replace the simple #ifdef/#define solution
> for this?

See 
https://lore.kernel.org/lkml/CAHk-=wjFWZMVWTbvUMVxQqGKvGMC_BNrahCtTkpEjxoC0k-T=a...@mail.gmail.com/T/#t


[PATCH 12/11] pragma once: scripted treewide conversion

2021-02-28 Thread Alexey Dobriyan
[  Bcc a lot of lists so that people understand what's this is all ]
[  about without creating uber-cc-thread.  ]
[  Apologies if I missed your subsystem]
[  Please see [PATCH 11/11: pragma once: conversion script (in Python 2)]  ]

Hi, Linus.

Please run the script below from top-level directory, it will convert
most kernel headers to #pragma once directive advancing them into
21-st century.

The advantages are:

* less LOC

18087 files changed, 18878 insertions(+), 99804 deletions(-)
= -81 kLOC (give or take)

* less mental tax on developers forced to name things which aren't even
  real code

* less junk in preprocessor hashtables and editors/IDEs autocompletion
  lists

There are two bit exceptions: UAPI headers and ACPICA.
Given ubiquity of #pragma once, I personally think even these subsystems
should be converted in the future.

Compile tested on alpha, arc, arm, arm64, h8300, ia64, m68k, microblaze,
mips, nios2, parisc, powerpc, riscv, s390, sh, sparc, um-i386, um-x86_64,
i386, x86_64, xtensa (allnoconfig, all defconfigs, allmodconfig with or
without SMP/DEBUG_KERNEL + misc stuff).

Not compile tested on csky, hexagon, nds32, openrisc. 

Love,
Alexey

Signed-off-by: Alexey Dobriyan 



#!/bin/sh -x
find . -type f -name '*.h' -print   |\
LC_ALL=C sort   |\
sed -e 's#^./##g'   |\
xargs ./scripts/pragma-once.py

find . -type d -name 'uapi' | xargs git checkout -f
git checkout -f arch/alpha/include/asm/cmpxchg.h
git checkout -f arch/arm/mach-imx/hardware.h
git checkout -f arch/arm/mach-ixp4xx/include/mach/hardware.h
git checkout -f arch/arm/mach-sa1100/include/mach/hardware.h
git checkout -f arch/mips/include/asm/mips-cps.h
git checkout -f arch/x86/boot/boot.h
git checkout -f arch/x86/boot/ctype.h
git checkout -f arch/x86/include/asm/cpufeatures.h
git checkout -f arch/x86/include/asm/disabled-features.h
git checkout -f arch/x86/include/asm/required-features.h
git checkout -f arch/x86/include/asm/vmxfeatures.h
git checkout -f arch/x86/include/asm/vvar.h
git checkout -f drivers/acpi/acpica/
git checkout -f drivers/gpu/drm/amd/pm/inc/vega10_ppsmc.h
git checkout -f drivers/gpu/drm/amd/pm/powerplay/ppsmc.h
git checkout -f drivers/input/misc/yealink.h
git checkout -f drivers/media/usb/dvb-usb-v2/mxl111sf-demod.h
git checkout -f drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.h
git checkout -f drivers/pcmcia/yenta_socket.h
git checkout -f drivers/staging/rtl8723bs/include/hal_com_h2c.h
git checkout -f include/linux/acpi.h
git checkout -f include/linux/bitops.h
git checkout -f include/linux/compiler_types.h
git checkout -f include/linux/device.h
git checkout -f include/linux/kbuild.h
git checkout -f include/linux/libfdt_env.h
git checkout -f include/linux/local_lock.h
git checkout -f include/linux/spinlock.h
git checkout -f include/linux/spinlock_api_smp.h
git checkout -f include/linux/spinlock_types.h
git checkout -f include/linux/tracepoint.h
git checkout -f mm/gup_test.h
git checkout -f net/batman-adv/main.h
git checkout -f scripts/dtc/
git checkout -f tools/include/linux/bitops.h
git checkout -f tools/include/linux/compiler.h
git checkout -f tools/testing/selftests/clone3/clone3_selftests.h
git checkout -f tools/testing/selftests/futex/include/atomic.h
git checkout -f tools/testing/selftests/futex/include/futextest.h
git checkout -f tools/testing/selftests/futex/include/logging.h
git checkout -f tools/testing/selftests/kselftest.h
git checkout -f tools/testing/selftests/kselftest_harness.h
git checkout -f tools/testing/selftests/pidfd/pidfd.h
git checkout -f tools/testing/selftests/x86/helpers.h


[PATCH 11/11] pragma once: conversion script (in Python 2)

2021-02-28 Thread Alexey Dobriyan
>From 2bffcdfec69a8d28e9cb2c535724fbba8e12b820 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan 
Date: Tue, 9 Feb 2021 14:47:34 +0300
Subject: [PATCH 11/11] pragma once: conversion script (in Python 2)

Script accepts list of files to be converted from the command line,
strips include guard if any and inserts "#pragma once" directive in
the beginning.

The following patterns are recognised:

#ifndef FOO_H   #ifndef FOO_H
#define FOO_H   #ifndef FOO_H 1

#endif
#endif // comment
#endif /* one line comment */

This is how almost all include guards look like.

Scripts doesn't pretend to be a compiler. For example, comments inside
preprocessor directive aren't recognised because people don't write code
like this:

# /*
   * legal C
   */def\
ine FOO /*
 * no, we don't care
 */

Trailing multiline comments aren't recognised as well.

Script can cut through SPDX comments:

/* SPDX-License-Identifier: xxx
 *  <=== pragma once will be inserted here
 * Copyright ...
 */

In other words, the script is simple but not too simple.

It cowardly exits and doesn't do anything as a safety measure in case of
an existing pragma once directive, missing/broken include guard or a bug.
Running it second time shouldn't do anything.

Signed-off-by: Alexey Dobriyan 
---
 scripts/pragma-once.py | 159 +
 1 file changed, 159 insertions(+)
 create mode 100755 scripts/pragma-once.py

diff --git a/scripts/pragma-once.py b/scripts/pragma-once.py
new file mode 100755
index ..7c8a274aad28
--- /dev/null
+++ b/scripts/pragma-once.py
@@ -0,0 +1,159 @@
+#!/usr/bin/python2
+# Copyright (c) 2021 Alexey Dobriyan 
+#
+# Permission to use, copy, modify, and distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+# Change include guard to "#pragma once" directive in place.
+import os
+import re
+import sys
+
+def read_file(filename):
+with open(filename) as f:
+return f.read()
+
+def write_file(filename, buf):
+tmp = '%s.pragma-once' % filename
+with open(tmp, 'w') as f:
+f.write(buf)
+os.rename(tmp, filename)
+
+def ws(c):
+return c == ' ' or c == '\t' or c == '\n'
+
+re_ifndef = re.compile('#[ \t]*ifndef[ \t]+([A-Za-z_][A-Za-z0-9_]*)\n')
+re_define = re.compile('#[ \t]*define[ \t]+([A-Za-z_][A-Za-z0-9_]*)([ 
\t]+1)?\n')
+re_endif1 = re.compile('#[ \t]*endif[ \t]*//')
+re_endif2 = re.compile('#[ \t]*endif[ \t]*/\*')
+
+def pragma_once(c):
+i = 0
+
+# skip leading whitespace and comments
+while i < len(c):
+if ws(c[i]):
+i += 1
+elif c[i] == '/' and c[i + 1] == '*':
+i = c.index('*/', i + 2) + 2
+elif c[i] == '/' and c[i + 1] == '/':
+i = c.index('\n', i + 2) + 1
+else:
+break;
+
+# find #ifndef
+ifndef_start = i
+match = re_ifndef.match(c, i)
+guard = match.group(1)
+i = match.end()
+
+# find #define
+match = re_define.match(c, i)
+if match.group(1) != guard:
+raise
+i = match.end()
+
+while ws(c[i]):
+i += 1
+
+define_end = i
+
+# trim whitespace before #ifndef
+i = ifndef_start
+while i > 0 and ws(c[i - 1]):
+i -= 1
+if c[i] == '\n':
+i += 1
+ifndef_start = i
+
+#print repr(c[ifndef_start:define_end])
+
+# find #endif
+i = c.rindex('\n#endif', i) + 1
+endif_start = i
+
+match = None
+if match is None:
+match = re_endif1.match(c, i)
+if match:
+try:
+i = c.index('\n', match.end()) + 1
+except ValueError:
+i = len(c)
+
+if match is None:
+match = re_endif2.match(c, i)
+if match:
+try:
+i = c.index('*/', match.end()) + 2
+except ValueError:
+i = len(c)
+
+if match is None:
+i = endif_start + len('#endif')
+
+while i < len(c) and ws(c[i]):
+i += 1
+if i != len(c):
+raise
+
+endif_end = i
+
+# trim whitespace before #endif
+i = endif_start
+while i > 0 and ws(c[i - 1]):
+i -= 1
+if c[i] == '\n':
+

[PATCH 10/11] pragma once: delete few backslashes

2021-02-28 Thread Alexey Dobriyan
>From 251ca5673886b5bb0a42004944290b9d2b267a4a Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan 
Date: Fri, 19 Feb 2021 13:37:24 +0300
Subject: [PATCH 10/11] pragma once: delete few backslashes

Some macros contain one backslash too many and end up being the last
macro in a header file. When #pragma once conversion script truncates
the last #endif and whitespace before it, such backslash triggers
a warning about "OMG file ends up in a backslash-newline".

Needless to say I don't want to handle another case in my script,
so delete useless backslashes instead.

Signed-off-by: Alexey Dobriyan 
---
 arch/arc/include/asm/cacheflush.h  | 2 +-
 drivers/net/ethernet/mellanox/mlxsw/item.h | 2 +-
 include/linux/once.h   | 2 +-
 include/media/drv-intf/exynos-fimc.h   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arc/include/asm/cacheflush.h 
b/arch/arc/include/asm/cacheflush.h
index e201b4b1655a..46704c341b17 100644
--- a/arch/arc/include/asm/cacheflush.h
+++ b/arch/arc/include/asm/cacheflush.h
@@ -112,6 +112,6 @@ do {
\
 } while (0)
 
 #define copy_from_user_page(vma, page, vaddr, dst, src, len)   \
-   memcpy(dst, src, len);  \
+   memcpy(dst, src, len)
 
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/item.h 
b/drivers/net/ethernet/mellanox/mlxsw/item.h
index e92cadc98128..cc0133401dd1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/item.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/item.h
@@ -504,6 +504,6 @@ mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u16 
index, u8 val) \
return __mlxsw_item_bit_array_set(buf,  
\
  &__ITEM_NAME(_type, _cname, _iname),  
\
  index, val);  
\
-}  
\
+}
 
 #endif
diff --git a/include/linux/once.h b/include/linux/once.h
index 9225ee6d96c7..0af450ff94a5 100644
--- a/include/linux/once.h
+++ b/include/linux/once.h
@@ -55,6 +55,6 @@ void __do_once_done(bool *done, struct static_key_true 
*once_key,
 #define get_random_once(buf, nbytes)\
DO_ONCE(get_random_bytes, (buf), (nbytes))
 #define get_random_once_wait(buf, nbytes)\
-   DO_ONCE(get_random_bytes_wait, (buf), (nbytes))  \
+   DO_ONCE(get_random_bytes_wait, (buf), (nbytes))
 
 #endif /* _LINUX_ONCE_H */
diff --git a/include/media/drv-intf/exynos-fimc.h 
b/include/media/drv-intf/exynos-fimc.h
index 6b9ef631d6bb..6c5fbdacf4b5 100644
--- a/include/media/drv-intf/exynos-fimc.h
+++ b/include/media/drv-intf/exynos-fimc.h
@@ -152,6 +152,6 @@ static inline struct exynos_video_entity 
*vdev_to_exynos_video_entity(
 #define fimc_pipeline_call(ent, op, args...) \
((!(ent) || !(ent)->pipe) ? -ENOENT : \
(((ent)->pipe->ops && (ent)->pipe->ops->op) ? \
-   (ent)->pipe->ops->op(((ent)->pipe), ##args) : -ENOIOCTLCMD))  \
+   (ent)->pipe->ops->op(((ent)->pipe), ##args) : -ENOIOCTLCMD))
 
 #endif /* S5P_FIMC_H_ */
-- 
2.29.2



[PATCH 09/11] pragma once: convert scripts/selinux/genheaders/genheaders.c

2021-02-28 Thread Alexey Dobriyan
>From 097f2c8b2af7d9e88cff59376ea0ad51b95341cb Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan 
Date: Tue, 9 Feb 2021 00:39:23 +0300
Subject: [PATCH 09/11] pragma once: convert 
scripts/selinux/genheaders/genheaders.c

Generate security/selinux/flask.h and security/selinux/av_permissions.h
without include guards.

Signed-off-by: Alexey Dobriyan 
---
 scripts/selinux/genheaders/genheaders.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/scripts/selinux/genheaders/genheaders.c 
b/scripts/selinux/genheaders/genheaders.c
index f355b3e0e968..e13ee4221993 100644
--- a/scripts/selinux/genheaders/genheaders.c
+++ b/scripts/selinux/genheaders/genheaders.c
@@ -74,8 +74,8 @@ int main(int argc, char *argv[])
initial_sid_to_string[i] = stoupperx(s);
}
 
+   fprintf(fout, "#pragma once\n");
fprintf(fout, "/* This file is automatically generated.  Do not edit. 
*/\n");
-   fprintf(fout, "#ifndef _SELINUX_FLASK_H_\n#define 
_SELINUX_FLASK_H_\n\n");
 
for (i = 0; secclass_map[i].name; i++) {
struct security_class_mapping *map = _map[i];
@@ -109,7 +109,6 @@ int main(int argc, char *argv[])
fprintf(fout, "\treturn sock;\n");
fprintf(fout, "}\n");
 
-   fprintf(fout, "\n#endif\n");
fclose(fout);
 
fout = fopen(argv[2], "w");
@@ -119,8 +118,8 @@ int main(int argc, char *argv[])
exit(4);
}
 
+   fprintf(fout, "#pragma once\n");
fprintf(fout, "/* This file is automatically generated.  Do not edit. 
*/\n");
-   fprintf(fout, "#ifndef _SELINUX_AV_PERMISSIONS_H_\n#define 
_SELINUX_AV_PERMISSIONS_H_\n\n");
 
for (i = 0; secclass_map[i].name; i++) {
struct security_class_mapping *map = _map[i];
@@ -136,7 +135,6 @@ int main(int argc, char *argv[])
}
}
 
-   fprintf(fout, "\n#endif\n");
fclose(fout);
exit(0);
 }
-- 
2.29.2



[PATCH 07/11] pragma once: convert kernel/time/timeconst.bc

2021-02-28 Thread Alexey Dobriyan
>From e428633ff0df5fe8501aaf785c6961fc766344b2 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan 
Date: Tue, 9 Feb 2021 00:31:23 +0300
Subject: [PATCH 07/11] pragma once: convert kernel/time/timeconst.bc

Generate include/generated/timeconst.h without include guard.

Signed-off-by: Alexey Dobriyan 
---
 kernel/time/timeconst.bc | 8 +---
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/kernel/time/timeconst.bc b/kernel/time/timeconst.bc
index 7ed0e0fb5831..42b3a542e5f5 100644
--- a/kernel/time/timeconst.bc
+++ b/kernel/time/timeconst.bc
@@ -41,12 +41,9 @@ define fmuls(b,n,d) {
 }
 
 define timeconst(hz) {
+   print "#pragma once\n"
print "/* Automatically generated by kernel/time/timeconst.bc */\n"
print "/* Time conversion constants for HZ == ", hz, " */\n"
-   print "\n"
-
-   print "#ifndef KERNEL_TIMECONST_H\n"
-   print "#define KERNEL_TIMECONST_H\n\n"
 
print "#include \n"
print "#include \n\n"
@@ -106,9 +103,6 @@ define timeconst(hz) {
print "#define HZ_TO_NSEC_DEN\t\t", hz/cd, "\n"
print "#define NSEC_TO_HZ_NUM\t\t", hz/cd, "\n"
print "#define NSEC_TO_HZ_DEN\t\t", 10/cd, "\n"
-   print "\n"
-
-   print "#endif /* KERNEL_TIMECONST_H */\n"
}
halt
 }
-- 
2.29.2



[PATCH 08/11] pragma once: convert scripts/atomic/

2021-02-28 Thread Alexey Dobriyan
>From f10fe79897fa9600f144c76bc5df52dba28b7a66 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan 
Date: Tue, 9 Feb 2021 01:37:55 +0300
Subject: [PATCH 08/11] pragma once: convert scripts/atomic/

Generate atomic headers without include guards.

Signed-off-by: Alexey Dobriyan 
---
 include/asm-generic/atomic-instrumented.h |  9 ++---
 include/asm-generic/atomic-long.h |  9 ++---
 include/linux/atomic-arch-fallback.h  |  9 ++---
 include/linux/atomic-fallback.h   |  9 ++---
 scripts/atomic/gen-atomic-fallback.sh | 10 +-
 scripts/atomic/gen-atomic-instrumented.sh | 10 +-
 scripts/atomic/gen-atomic-long.sh |  7 +--
 7 files changed, 11 insertions(+), 52 deletions(-)

diff --git a/include/asm-generic/atomic-instrumented.h 
b/include/asm-generic/atomic-instrumented.h
index 888b6cfeed91..7c50dc944da4 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-
+#pragma once
 // Generated by scripts/atomic/gen-atomic-instrumented.sh
 // DO NOT MODIFY THIS FILE DIRECTLY
 
@@ -14,9 +14,6 @@
  * arch_ variants (i.e. arch_atomic_read()/arch_atomic_cmpxchg()) to avoid
  * double instrumentation.
  */
-#ifndef _ASM_GENERIC_ATOMIC_INSTRUMENTED_H
-#define _ASM_GENERIC_ATOMIC_INSTRUMENTED_H
-
 #include 
 #include 
 #include 
@@ -1828,6 +1825,4 @@ atomic64_dec_if_positive(atomic64_t *v)
instrument_atomic_write(__ai_ptr, 2 * sizeof(*__ai_ptr)); \
arch_cmpxchg_double_local(__ai_ptr, __VA_ARGS__); \
 })
-
-#endif /* _ASM_GENERIC_ATOMIC_INSTRUMENTED_H */
-// 4bec382e44520f4d8267e42620054db26a659ea3
+// d4532f98463d7403bde1d3199c19ef660be362a4
diff --git a/include/asm-generic/atomic-long.h 
b/include/asm-generic/atomic-long.h
index 073cf40f431b..99627cd42f32 100644
--- a/include/asm-generic/atomic-long.h
+++ b/include/asm-generic/atomic-long.h
@@ -1,11 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-
+#pragma once
 // Generated by scripts/atomic/gen-atomic-long.sh
 // DO NOT MODIFY THIS FILE DIRECTLY
-
-#ifndef _ASM_GENERIC_ATOMIC_LONG_H
-#define _ASM_GENERIC_ATOMIC_LONG_H
-
 #include 
 #include 
 
@@ -1010,5 +1006,4 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 }
 
 #endif /* CONFIG_64BIT */
-#endif /* _ASM_GENERIC_ATOMIC_LONG_H */
-// a624200981f552b2c6be4f32fe44da8289f30d87
+// d6f8dde6d86814728f0671cfc505c9a3361a70a0
diff --git a/include/linux/atomic-arch-fallback.h 
b/include/linux/atomic-arch-fallback.h
index a3dba31df01e..477c53f3a4d6 100644
--- a/include/linux/atomic-arch-fallback.h
+++ b/include/linux/atomic-arch-fallback.h
@@ -1,11 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-
+#pragma once
 // Generated by scripts/atomic/gen-atomic-fallback.sh
 // DO NOT MODIFY THIS FILE DIRECTLY
-
-#ifndef _LINUX_ATOMIC_FALLBACK_H
-#define _LINUX_ATOMIC_FALLBACK_H
-
 #include 
 
 #ifndef arch_xchg_relaxed
@@ -2357,5 +2353,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v)
 #define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
 #endif
 
-#endif /* _LINUX_ATOMIC_FALLBACK_H */
-// cca554917d7ea73d5e3e7397dd70c484cad9b2c4
+// 97eae5341271dde782071fb73ff76f4b7bfa4808
diff --git a/include/linux/atomic-fallback.h b/include/linux/atomic-fallback.h
index 2a3f55d98be9..eecc9ee88af6 100644
--- a/include/linux/atomic-fallback.h
+++ b/include/linux/atomic-fallback.h
@@ -1,11 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
-
+#pragma once
 // Generated by scripts/atomic/gen-atomic-fallback.sh
 // DO NOT MODIFY THIS FILE DIRECTLY
-
-#ifndef _LINUX_ATOMIC_FALLBACK_H
-#define _LINUX_ATOMIC_FALLBACK_H
-
 #include 
 
 #ifndef xchg_relaxed
@@ -2591,5 +2587,4 @@ atomic64_dec_if_positive(atomic64_t *v)
 #define atomic64_dec_if_positive atomic64_dec_if_positive
 #endif
 
-#endif /* _LINUX_ATOMIC_FALLBACK_H */
-// d78e6c293c661c15188f0ec05bce45188c8d5892
+// a697a2a982652cdb954bc317199caba6ae5c3ed9
diff --git a/scripts/atomic/gen-atomic-fallback.sh 
b/scripts/atomic/gen-atomic-fallback.sh
index 317a6cec76e1..27a63ae3a458 100755
--- a/scripts/atomic/gen-atomic-fallback.sh
+++ b/scripts/atomic/gen-atomic-fallback.sh
@@ -223,13 +223,9 @@ gen_try_cmpxchg_fallbacks()
 
 cat << EOF
 // SPDX-License-Identifier: GPL-2.0
-
+#pragma once
 // Generated by $0
 // DO NOT MODIFY THIS FILE DIRECTLY
-
-#ifndef _LINUX_ATOMIC_FALLBACK_H
-#define _LINUX_ATOMIC_FALLBACK_H
-
 #include 
 
 EOF
@@ -254,7 +250,3 @@ EOF
 grep '^[a-z]' "$1" | while read name meta args; do
gen_proto "${meta}" "${name}" "${ARCH}" "atomic64" "s64" ${args}
 done
-
-cat <
 #include 
 #include 
@@ -202,8 +199,3 @@ gen_xchg "cmpxchg_double" "2 * "
 printf "\n\n"
 
 gen_xchg "cmpxchg_double_local" "2 * "
-
-cat <
 #include 
 
@@ -98,5 +94,4 @@ done
 
 cat <

[PATCH 06/11] pragma once: convert include/linux/cb710.h

2021-02-28 Thread Alexey Dobriyan
>From 1c4107e55b322dada46879837d4d64841bc5f150 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan 
Date: Tue, 9 Feb 2021 16:56:54 +0300
Subject: [PATCH 06/11] pragma once: convert include/linux/cb710.h

This file is concatenation of two files with two include guards.
Convert it manually.

Signed-off-by: Alexey Dobriyan 
---
 include/linux/cb710.h | 10 +-
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/include/linux/cb710.h b/include/linux/cb710.h
index 405657a9a0d5..f3055e9442db 100644
--- a/include/linux/cb710.h
+++ b/include/linux/cb710.h
@@ -1,12 +1,10 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
+#pragma once
 /*
  *  cb710/cb710.h
  *
  *  Copyright by Michał Mirosław, 2008-2009
  */
-#ifndef LINUX_CB710_DRIVER_H
-#define LINUX_CB710_DRIVER_H
-
 #include 
 #include 
 #include 
@@ -121,15 +119,11 @@ void cb710_dump_regs(struct cb710_chip *chip, unsigned 
dump);
 #define CB710_DUMP_ACCESS_ALL  0x700
 #define CB710_DUMP_ACCESS_MASK 0x700
 
-#endif /* LINUX_CB710_DRIVER_H */
 /*
  *  cb710/sgbuf2.h
  *
  *  Copyright by Michał Mirosław, 2008-2009
  */
-#ifndef LINUX_CB710_SG_H
-#define LINUX_CB710_SG_H
-
 #include 
 #include 
 
@@ -197,5 +191,3 @@ static inline void cb710_sg_dwiter_read_to_io(struct 
sg_mapping_iter *miter,
while (count-- > 0)
iowrite32(cb710_sg_dwiter_read_next_block(miter), port);
 }
-
-#endif /* LINUX_CB710_SG_H */
-- 
2.29.2



[PATCH 04/11] pragma once: convert drivers/gpu/drm/pl111/pl111_nomadik.h

2021-02-28 Thread Alexey Dobriyan
>From fe8504a1a0b5352cbc676b933c3dbb79ae9f59c9 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan 
Date: Tue, 9 Feb 2021 16:50:24 +0300
Subject: [PATCH 04/11] pragma once: convert 
drivers/gpu/drm/pl111/pl111_nomadik.h

This file has broken include guard, convert it manually.

Signed-off-by: Alexey Dobriyan 
---
 drivers/gpu/drm/pl111/pl111_nomadik.h | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/pl111/pl111_nomadik.h 
b/drivers/gpu/drm/pl111/pl111_nomadik.h
index 47ccf5c839fc..00592a38c7d8 100644
--- a/drivers/gpu/drm/pl111/pl111_nomadik.h
+++ b/drivers/gpu/drm/pl111/pl111_nomadik.h
@@ -1,9 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0+
-
-#ifndef PL111_NOMADIK_H
-#define PL111_NOMADIK_H
-#endif
-
+#pragma once
 struct device;
 
 #ifdef CONFIG_ARCH_NOMADIK
-- 
2.29.2



[PATCH 05/11] pragma once: convert drivers/scsi/qla2xxx/qla_target.h

2021-02-28 Thread Alexey Dobriyan
>From 1f58b4923ca9bfb8b1e73554d3793ee98ab58a77 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan 
Date: Tue, 9 Feb 2021 17:14:25 +0300
Subject: [PATCH 05/11] pragma once: convert drivers/scsi/qla2xxx/qla_target.h

This file has broken include guard which is not obvious just by looking
at the code. Convert it manually. I think I got #endif right.

Signed-off-by: Alexey Dobriyan 
---
 drivers/scsi/qla2xxx/qla_target.h | 9 ++---
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_target.h 
b/drivers/scsi/qla2xxx/qla_target.h
index 10e5e6c8087d..923910dd1809 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -15,10 +15,7 @@
  * This is the global def file that is useful for including from the
  * target portion.
  */
-
-#ifndef __QLA_TARGET_H
-#define __QLA_TARGET_H
-
+#pragma once
 #include "qla_def.h"
 #include "qla_dsd.h"
 
@@ -116,7 +113,6 @@
(min(1270, ((ql) > 0) ? (QLA_TGT_DATASEGS_PER_CMD_24XX + \
QLA_TGT_DATASEGS_PER_CONT_24XX*((ql) - 1)) : 0))
 #endif
-#endif
 
 #define GET_TARGET_ID(ha, iocb) ((HAS_EXTENDED_IDS(ha))
\
 ? le16_to_cpu((iocb)->u.isp2x.target.extended) \
@@ -244,6 +240,7 @@ struct ctio_to_2xxx {
 #ifndef CTIO_RET_TYPE
 #define CTIO_RET_TYPE  0x17/* CTIO return entry */
 #define ATIO_TYPE7 0x06 /* Accept target I/O entry for 24xx */
+#endif
 
 struct fcp_hdr {
uint8_t  r_ctl;
@@ -1082,5 +1079,3 @@ extern void qlt_do_generation_tick(struct scsi_qla_host 
*, int *);
 
 void qlt_send_resp_ctio(struct qla_qpair *, struct qla_tgt_cmd *, uint8_t,
 uint8_t, uint8_t, uint8_t);
-
-#endif /* __QLA_TARGET_H */
-- 
2.29.2



[PATCH 03/11] pragma once: convert arch/s390/tools/gen_facilities.c

2021-02-28 Thread Alexey Dobriyan
>From 45622ce1e4db512ad603dd90f959e61285b7541a Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan 
Date: Tue, 9 Feb 2021 14:43:52 +0300
Subject: [PATCH 03/11] pragma once: convert arch/s390/tools/gen_facilities.c

Generate arch/s390/include/generated/asm/facility-defs.h without include
guard.

Signed-off-by: Alexey Dobriyan 
---
 arch/s390/tools/gen_facilities.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index 61ce5b59b828..cd5055994206 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -157,8 +157,7 @@ static void print_facility_lists(void)
 
 int main(int argc, char **argv)
 {
-   printf("#ifndef __ASM_S390_FACILITY_DEFS__\n");
-   printf("#define __ASM_S390_FACILITY_DEFS__\n");
+   printf("#pragma once\n");
printf("/*\n");
printf(" * DO NOT MODIFY.\n");
printf(" *\n");
@@ -166,6 +165,6 @@ int main(int argc, char **argv)
printf(" */\n\n");
printf("#include \n\n");
print_facility_lists();
-   printf("\n#endif\n");
+   printf("\n");
return 0;
 }
-- 
2.29.2



[PATCH 01/11] pragma once: delete include/linux/atm_suni.h

2021-02-28 Thread Alexey Dobriyan
>From c17ac63e1334c742686cd411736699c1d34d45a7 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan 
Date: Wed, 10 Feb 2021 21:07:45 +0300
Subject: [PATCH 01/11] pragma once: delete include/linux/atm_suni.h

This file has been empty since 2.3.99-pre3!
Delete it instead of converting to #pragma once.

Signed-off-by: Alexey Dobriyan 
---
 drivers/atm/fore200e.c   |  1 -
 drivers/atm/suni.c   |  1 -
 include/linux/atm_suni.h | 12 
 3 files changed, 14 deletions(-)
 delete mode 100644 include/linux/atm_suni.h

diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index 9a70bee84125..0b9c99c3d218 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -21,7 +21,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
diff --git a/drivers/atm/suni.c b/drivers/atm/suni.c
index c920a8c52925..21e5acc766b8 100644
--- a/drivers/atm/suni.c
+++ b/drivers/atm/suni.c
@@ -21,7 +21,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
diff --git a/include/linux/atm_suni.h b/include/linux/atm_suni.h
deleted file mode 100644
index 84f3aab54468..
--- a/include/linux/atm_suni.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* atm_suni.h - Driver-specific declarations of the SUNI driver (for use by
-   driver-specific utilities) */
-
-/* Written 1998,2000 by Werner Almesberger, EPFL ICA */
-
-
-#ifndef LINUX_ATM_SUNI_H
-#define LINUX_ATM_SUNI_H
-
-/* everything obsoleted */
-
-#endif
-- 
2.29.2



[PATCH 02/11] pragma once: convert arch/arm/tools/gen-mach-types

2021-02-28 Thread Alexey Dobriyan
>From 72842f89ae91a4d02ea29604f87c373052bd3f64 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan 
Date: Tue, 9 Feb 2021 14:37:40 +0300
Subject: [PATCH 02/11] pragma once: convert arch/arm/tools/gen-mach-types

Generate arch/arm/include/generated/asm/mach-types.h without include
guard.

Signed-off-by: Alexey Dobriyan 
---
 arch/arm/tools/gen-mach-types | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/arm/tools/gen-mach-types b/arch/arm/tools/gen-mach-types
index cbe1c33bb871..c28cd4b50f76 100644
--- a/arch/arm/tools/gen-mach-types
+++ b/arch/arm/tools/gen-mach-types
@@ -23,12 +23,11 @@ NF == 3 {
 
 
 END{
+ printf("#pragma once\n");
  printf("/*\n");
  printf(" * This was automagically generated from %s!\n", FILENAME);
  printf(" * Do NOT edit\n");
- printf(" */\n\n");
- printf("#ifndef __ASM_ARM_MACH_TYPE_H\n");
- printf("#define __ASM_ARM_MACH_TYPE_H\n\n");
+ printf(" */\n");
  printf("#ifndef __ASSEMBLY__\n");
  printf("/* The type of machine we're running on */\n");
  printf("extern unsigned int __machine_arch_type;\n");
@@ -68,6 +67,5 @@ END   {
 
  printf("\n#ifndef machine_arch_type\n");
  printf("#define machine_arch_type\t__machine_arch_type\n");
- printf("#endif\n\n");
  printf("#endif\n");
}
-- 
2.29.2



[PATCH 00/11] pragma once: treewide conversion

2021-02-28 Thread Alexey Dobriyan
This is bulk deletion of preprocessor include guards and conversion
to #pragma once directive.

See [PATCH 11/11] and [PATCH 12/11] for the script and rationale.
Everything else is trivial stuff.


Re: #pragma once (was Re: incoming)

2021-02-26 Thread Alexey Dobriyan
On Fri, Feb 26, 2021 at 01:53:48PM -0800, Linus Torvalds wrote:
> On Fri, Feb 26, 2021 at 12:17 PM Alexey Dobriyan  wrote:
> >
> > I want to sent treewide "#pragma once" conversion:
> 
> Are there *any* advantages to it?
> 
> It's non-standard,

It is effectively standard:
https://en.wikipedia.org/wiki/Pragma_once#Portability

and I'll spare UAPI headers from conversion.

> and the historical argument for it ("it can reduce
> compile times because the preprocessor doesn't open the file twice" is
> pure and utter hogwash. Any preprocessor worth its salt does the same
> thing for the standard and traditional #ifndef/#define guard sequence.
> 
> Honestly, "#pragma once" was always a hack for bad preprocessors that
> weren't smart enough to just figure it out from the regular guarding
> macros.
> 
> I can't imagine that any preprocessor that incompetent exists any
> more, and if i does, we sure shouldn't be using it.
> 
> So #pragma once seems to have no actual advantages.

The advantage is removing busywork that is include guards.

There are rules and schemes about how to create guard macro.

Should it be prefixed by underscore?
Should it be prefixed by two underscores?
Should it be full path uppercased or just last path component?
Should the guard macro be lowercased?
Should it be changed when header is moved?
Should trailing #endif contain comment?
Should #define be just #define or "#define FOO 1"?

I've even seen advice (or an IDE doing that) that is should contain
timestamp of a header creation time to minimise collisions (implying
collisions could happen as could typos as could broken guards)

All this zoo of styles and made up mental work is completely avoided
by using #pragma once:

1) put #pragma once on the first line

or

2) put #pragma once on the second line after SPDX banner

and that's it.

No fuss, no filled up preprocessor hashtables, no implicit arguing
about styles. And way less LOC:

18092 files changed, 18883 insertions(+), 99841 deletions(-)

Now if old school header guard is necessary it can be used like in
good old times.

Nobody would miss include guards.


#pragma once (was Re: incoming)

2021-02-26 Thread Alexey Dobriyan
Linus wrote:

> I'm hoping to just do -rc1 this weekend after all - despite my late
> start due to loss of power for several days.
> 
> I'll allow late stragglers with good reason through, but the fewer of
> those there are, the better, of course.

I want to sent treewide "#pragma once" conversion:

18092 files changed, 18883 insertions(+), 99841 deletions(-)

ideally it is done as 1 commit avoiding my death by thousand cuts of
going through all of maintainers.

There may be a compile failure or two but the list of exceptions is
stable.

Opinions? :-)


Re: [PATCH] proc: Convert S_ permission uses to octal

2021-02-13 Thread Alexey Dobriyan
On Fri, Feb 12, 2021 at 04:01:48PM -0600, Eric W. Biederman wrote:
> Joe Perches  writes:
> 
> > Convert S_ permissions to the more readable octal.

> Something like that should be able to address the readability while
> still using symbolic constants.

Macros are easy. I've sent a patch long time ago which does essentially

#define rwxrwxrwx 0777
...

But then kernel will start using something nobode else does.

This whole issue is like sizeof(*ptr) vs sizeof(sizeof(struct S)).
No preferred way overall with ever vigilant checkpatch.pl guarding
kernel 24/7. :-)


Re: [PATCH v5 2/2] procfs/dmabuf: Add inode number to /proc/*/fdinfo

2021-02-08 Thread Alexey Dobriyan
On Mon, Feb 08, 2021 at 03:22:44PM +, Matthew Wilcox wrote:
> On Mon, Feb 08, 2021 at 03:14:28PM +, Kalesh Singh wrote:
> > -   seq_printf(m, "pos:\t%lli\nflags:\t0%o\nmnt_id:\t%i\n",
> > +   seq_printf(m, "pos:\t%lli\nflags:\t0%o\nmnt_id:\t%i\ninode_no:\t%lu\n",
> 
> You changed it everywhere but here ...

call it "st_ino", because that's how fstat calls it?


Re: [PATCH 1/5] sched: make struct task_struct::state 32-bit

2021-02-08 Thread Alexey Dobriyan
On Mon, Feb 08, 2021 at 04:25:35PM +0100, Peter Zijlstra wrote:
> On Mon, Feb 08, 2021 at 05:52:45PM +0300, Alexey Dobriyan wrote:
> > On Mon, Feb 08, 2021 at 05:30:25PM +0300, Alexey Dobriyan wrote:
> > > On Mon, Feb 08, 2021 at 11:34:18AM +0100, Peter Zijlstra wrote:
> > > > On Sat, Feb 06, 2021 at 06:18:32PM +0300, Alexey Dobriyan wrote:
> > > > 
> > > > > Silently delete "extern" from prototypes.
> > > > 
> > > > NAK, extern is right.
> > > 
> > > Extern is only necessary for variables.
> > 
> > Specifically C17, 6.2.2 p5 (linkage of identifiers):
> > 
> > if the declaration of an identifier for a function has no
> > storage-class specifier, its linkage is determined exactly as if
> > it were declared with the storage-class specifier "extern".
> > 
> > This is why nothing happens if "extern" is deleted.
> 
> I know, but I still very much like extern on the function declarations
> too. It tells me the definition isn't to be found in this TU.

What can I say. The absense of function body should tell that.


Re: [PATCH 1/5] sched: make struct task_struct::state 32-bit

2021-02-08 Thread Alexey Dobriyan
On Mon, Feb 08, 2021 at 05:30:25PM +0300, Alexey Dobriyan wrote:
> On Mon, Feb 08, 2021 at 11:34:18AM +0100, Peter Zijlstra wrote:
> > On Sat, Feb 06, 2021 at 06:18:32PM +0300, Alexey Dobriyan wrote:
> > 
> > > Silently delete "extern" from prototypes.
> > 
> > NAK, extern is right.
> 
> Extern is only necessary for variables.

Specifically C17, 6.2.2 p5 (linkage of identifiers):

if the declaration of an identifier for a function has no
storage-class specifier, its linkage is determined exactly as if
it were declared with the storage-class specifier "extern".

This is why nothing happens if "extern" is deleted.


Re: [PATCH 1/5] sched: make struct task_struct::state 32-bit

2021-02-08 Thread Alexey Dobriyan
On Mon, Feb 08, 2021 at 11:39:25AM +0100, Peter Zijlstra wrote:
> On Sat, Feb 06, 2021 at 06:18:32PM +0300, Alexey Dobriyan wrote:
> > 32-bit accesses are shorter than 64-bit accesses on x86_64.
> > Nothing uses 64-bitness of struct task_struct::state.
> > 
> > Propagate 32-bitness to other variables and functions.
> 
> You're saving a handful of bytes, why?

Eeh? To save handful of bytes.

> > --- a/include/linux/sched.h
> > +++ b/include/linux/sched.h
> > @@ -655,7 +655,7 @@ struct task_struct {
> > struct thread_info  thread_info;
> >  #endif
> > /* -1 unrunnable, 0 runnable, >0 stopped: */
> > -   volatile long   state;
> > +   volatile intstate;
> 
> A much larger, but probably more useful cleanup would be to get rid of
> that volatile.

volatile is separate patch. It is independent of ->state type.
I didn't think about this specific volatile at all.


Re: [PATCH 1/5] sched: make struct task_struct::state 32-bit

2021-02-08 Thread Alexey Dobriyan
On Mon, Feb 08, 2021 at 11:34:18AM +0100, Peter Zijlstra wrote:
> On Sat, Feb 06, 2021 at 06:18:32PM +0300, Alexey Dobriyan wrote:
> 
> > Silently delete "extern" from prototypes.
> 
> NAK, extern is right.

Extern is only necessary for variables.


[PATCH 5/5] sched: make multiple runqueue task counters 32-bit

2021-02-06 Thread Alexey Dobriyan
Make

struct dl_rq::dl_nr_migratory
struct dl_rq::dl_nr_running

struct rt_rq::rt_nr_boosted
struct rt_rq::rt_nr_migratory
struct rt_rq::rt_nr_total

struct rq::nr_uninterruptible

32-bit.

If total number of tasks can't exceed 2**32 (and less due to futex pid
limits), then per-runqueue counters can't as well.

This patchset has been sponsored by REX Prefix Eradication Society.

Signed-off-by: Alexey Dobriyan 
---

 kernel/sched/loadavg.c |2 +-
 kernel/sched/sched.h   |   12 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -81,7 +81,7 @@ long calc_load_fold_active(struct rq *this_rq, long adjust)
long nr_active, delta = 0;
 
nr_active = this_rq->nr_running - adjust;
-   nr_active += (long)this_rq->nr_uninterruptible;
+   nr_active += (int)this_rq->nr_uninterruptible;
 
if (nr_active != this_rq->calc_load_active) {
delta = nr_active - this_rq->calc_load_active;
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -622,8 +622,8 @@ struct rt_rq {
} highest_prio;
 #endif
 #ifdef CONFIG_SMP
-   unsigned long   rt_nr_migratory;
-   unsigned long   rt_nr_total;
+   unsigned intrt_nr_migratory;
+   unsigned intrt_nr_total;
int overloaded;
struct plist_head   pushable_tasks;
 
@@ -637,7 +637,7 @@ struct rt_rq {
raw_spinlock_t  rt_runtime_lock;
 
 #ifdef CONFIG_RT_GROUP_SCHED
-   unsigned long   rt_nr_boosted;
+   unsigned intrt_nr_boosted;
 
struct rq   *rq;
struct task_group   *tg;
@@ -654,7 +654,7 @@ struct dl_rq {
/* runqueue is an rbtree, ordered by deadline */
struct rb_root_cached   root;
 
-   unsigned long   dl_nr_running;
+   unsigned intdl_nr_running;
 
 #ifdef CONFIG_SMP
/*
@@ -668,7 +668,7 @@ struct dl_rq {
u64 next;
} earliest_dl;
 
-   unsigned long   dl_nr_migratory;
+   unsigned intdl_nr_migratory;
int overloaded;
 
/*
@@ -946,7 +946,7 @@ struct rq {
 * one CPU and if it got migrated afterwards it may decrease
 * it on another CPU. Always updated under the runqueue lock:
 */
-   unsigned long   nr_uninterruptible;
+   unsigned intnr_uninterruptible;
 
struct task_struct __rcu*curr;
struct task_struct  *idle;


[PATCH 4/5] sched: make nr_iowait_cpu() return 32-bit

2021-02-06 Thread Alexey Dobriyan
Runqueue ->nr_iowait counters are 32-bit anyway.

Propagate 32-bitness into other code, but don't try too hard.

Signed-off-by: Alexey Dobriyan 
---

 drivers/cpuidle/governors/menu.c |6 +++---
 include/linux/sched/stat.h   |2 +-
 kernel/sched/core.c  |2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -117,7 +117,7 @@ struct menu_device {
int interval_ptr;
 };
 
-static inline int which_bucket(u64 duration_ns, unsigned long nr_iowaiters)
+static inline int which_bucket(u64 duration_ns, unsigned int nr_iowaiters)
 {
int bucket = 0;
 
@@ -150,7 +150,7 @@ static inline int which_bucket(u64 duration_ns, unsigned 
long nr_iowaiters)
  * to be, the higher this multiplier, and thus the higher
  * the barrier to go to an expensive C state.
  */
-static inline int performance_multiplier(unsigned long nr_iowaiters)
+static inline int performance_multiplier(unsigned int nr_iowaiters)
 {
/* for IO wait tasks (per cpu!) we add 10x each */
return 1 + 10 * nr_iowaiters;
@@ -270,7 +270,7 @@ static int menu_select(struct cpuidle_driver *drv, struct 
cpuidle_device *dev,
unsigned int predicted_us;
u64 predicted_ns;
u64 interactivity_req;
-   unsigned long nr_iowaiters;
+   unsigned int nr_iowaiters;
ktime_t delta_next;
int i, idx;
 
--- a/include/linux/sched/stat.h
+++ b/include/linux/sched/stat.h
@@ -19,7 +19,7 @@ extern int nr_processes(void);
 unsigned int nr_running(void);
 extern bool single_task_running(void);
 unsigned int nr_iowait(void);
-extern unsigned long nr_iowait_cpu(int cpu);
+unsigned int nr_iowait_cpu(int cpu);
 
 static inline int sched_info_on(void)
 {
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4383,7 +4383,7 @@ unsigned long long nr_context_switches(void)
  * it does become runnable.
  */
 
-unsigned long nr_iowait_cpu(int cpu)
+unsigned int nr_iowait_cpu(int cpu)
 {
return atomic_read(_rq(cpu)->nr_iowait);
 }


[PATCH 3/5] sched: make nr_iowait() return 32-bit value

2021-02-06 Thread Alexey Dobriyan
Creating 2**32 tasks to wait in D-state is impossible and wasteful.

Return "unsigned int" and save on REX prefixes.

Signed-off-by: Alexey Dobriyan 
---

 fs/proc/stat.c |2 +-
 include/linux/sched/stat.h |2 +-
 kernel/sched/core.c|4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -201,7 +201,7 @@ static int show_stat(struct seq_file *p, void *v)
"btime %llu\n"
"processes %lu\n"
"procs_running %u\n"
-   "procs_blocked %lu\n",
+   "procs_blocked %u\n",
nr_context_switches(),
(unsigned long long)boottime.tv_sec,
total_forks,
--- a/include/linux/sched/stat.h
+++ b/include/linux/sched/stat.h
@@ -18,7 +18,7 @@ DECLARE_PER_CPU(unsigned long, process_counts);
 extern int nr_processes(void);
 unsigned int nr_running(void);
 extern bool single_task_running(void);
-extern unsigned long nr_iowait(void);
+unsigned int nr_iowait(void);
 extern unsigned long nr_iowait_cpu(int cpu);
 
 static inline int sched_info_on(void)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4418,9 +4418,9 @@ unsigned long nr_iowait_cpu(int cpu)
  * Task CPU affinities can make all that even more 'interesting'.
  */
 
-unsigned long nr_iowait(void)
+unsigned int nr_iowait(void)
 {
-   unsigned long i, sum = 0;
+   unsigned int i, sum = 0;
 
for_each_possible_cpu(i)
sum += nr_iowait_cpu(i);


[PATCH 2/5] sched: make nr_running() return 32-bit

2021-02-06 Thread Alexey Dobriyan
Creating 2**32 tasks to run is impossible due to futex pid limits
and wasteful anyway. Nobody have done it.

Bring nr_running() into 32-bit world to save on REX prefixes.

Signed-off-by: Alexey Dobriyan 
---

 fs/proc/loadavg.c  |2 +-
 fs/proc/stat.c |2 +-
 include/linux/sched/stat.h |2 +-
 kernel/sched/core.c|4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -16,7 +16,7 @@ static int loadavg_proc_show(struct seq_file *m, void *v)
 
get_avenrun(avnrun, FIXED_1/200, 0);
 
-   seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n",
+   seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %d/%d %d\n",
LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]),
LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -200,7 +200,7 @@ static int show_stat(struct seq_file *p, void *v)
"\nctxt %llu\n"
"btime %llu\n"
"processes %lu\n"
-   "procs_running %lu\n"
+   "procs_running %u\n"
"procs_blocked %lu\n",
nr_context_switches(),
(unsigned long long)boottime.tv_sec,
--- a/include/linux/sched/stat.h
+++ b/include/linux/sched/stat.h
@@ -16,7 +16,7 @@ extern unsigned long total_forks;
 extern int nr_threads;
 DECLARE_PER_CPU(unsigned long, process_counts);
 extern int nr_processes(void);
-extern unsigned long nr_running(void);
+unsigned int nr_running(void);
 extern bool single_task_running(void);
 extern unsigned long nr_iowait(void);
 extern unsigned long nr_iowait_cpu(int cpu);
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4336,9 +4336,9 @@ context_switch(struct rq *rq, struct task_struct *prev,
  * externally visible scheduler statistics: current number of runnable
  * threads, total number of context switches performed since bootup.
  */
-unsigned long nr_running(void)
+unsigned int nr_running(void)
 {
-   unsigned long i, sum = 0;
+   unsigned int i, sum = 0;
 
for_each_online_cpu(i)
sum += cpu_rq(i)->nr_running;


[PATCH 1/5] sched: make struct task_struct::state 32-bit

2021-02-06 Thread Alexey Dobriyan
32-bit accesses are shorter than 64-bit accesses on x86_64.
Nothing uses 64-bitness of struct task_struct::state.

Propagate 32-bitness to other variables and functions.

Silently delete "extern" from prototypes.

Signed-off-by: Alexey Dobriyan 
---

 block/blk-mq.c   |2 +-
 drivers/md/dm.c  |6 +++---
 fs/userfaultfd.c |4 ++--
 include/linux/sched.h|6 +++---
 include/linux/sched/debug.h  |2 +-
 include/linux/sched/signal.h |2 +-
 kernel/freezer.c |2 +-
 kernel/kthread.c |4 ++--
 kernel/locking/mutex.c   |6 +++---
 kernel/locking/semaphore.c   |2 +-
 kernel/rcu/rcutorture.c  |4 ++--
 kernel/rcu/tree_stall.h  |6 +++---
 kernel/sched/core.c  |   10 +-
 lib/syscall.c|2 +-
 14 files changed, 29 insertions(+), 29 deletions(-)

--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3841,7 +3841,7 @@ static bool blk_mq_poll_hybrid(struct request_queue *q,
 int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin)
 {
struct blk_mq_hw_ctx *hctx;
-   long state;
+   int state;
 
if (!blk_qc_t_valid(cookie) ||
!test_bit(QUEUE_FLAG_POLL, >queue_flags))
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2253,7 +2253,7 @@ static bool md_in_flight_bios(struct mapped_device *md)
return sum != 0;
 }
 
-static int dm_wait_for_bios_completion(struct mapped_device *md, long 
task_state)
+static int dm_wait_for_bios_completion(struct mapped_device *md, int 
task_state)
 {
int r = 0;
DEFINE_WAIT(wait);
@@ -2276,7 +2276,7 @@ static int dm_wait_for_bios_completion(struct 
mapped_device *md, long task_state
return r;
 }
 
-static int dm_wait_for_completion(struct mapped_device *md, long task_state)
+static int dm_wait_for_completion(struct mapped_device *md, int task_state)
 {
int r = 0;
 
@@ -2403,7 +2403,7 @@ static void unlock_fs(struct mapped_device *md)
  * are being added to md->deferred list.
  */
 static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
-   unsigned suspend_flags, long task_state,
+   unsigned suspend_flags, int task_state,
int dmf_suspended_flag)
 {
bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -339,7 +339,7 @@ static inline bool userfaultfd_must_wait(struct 
userfaultfd_ctx *ctx,
return ret;
 }
 
-static inline long userfaultfd_get_blocking_state(unsigned int flags)
+static inline int userfaultfd_get_blocking_state(unsigned int flags)
 {
if (flags & FAULT_FLAG_INTERRUPTIBLE)
return TASK_INTERRUPTIBLE;
@@ -372,7 +372,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned 
long reason)
struct userfaultfd_wait_queue uwq;
vm_fault_t ret = VM_FAULT_SIGBUS;
bool must_wait;
-   long blocking_state;
+   int blocking_state;
 
/*
 * We don't do userfault handling for the final child pid update.
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -655,7 +655,7 @@ struct task_struct {
struct thread_info  thread_info;
 #endif
/* -1 unrunnable, 0 runnable, >0 stopped: */
-   volatile long   state;
+   volatile intstate;
 
/*
 * This begins the randomizable portion of task_struct. Only
@@ -1806,10 +1806,10 @@ static __always_inline void scheduler_ipi(void)
 */
preempt_fold_need_resched();
 }
-extern unsigned long wait_task_inactive(struct task_struct *, long 
match_state);
+unsigned long wait_task_inactive(struct task_struct *, int match_state);
 #else
 static inline void scheduler_ipi(void) { }
-static inline unsigned long wait_task_inactive(struct task_struct *p, long 
match_state)
+static inline unsigned long wait_task_inactive(struct task_struct *p, int 
match_state)
 {
return 1;
 }
--- a/include/linux/sched/debug.h
+++ b/include/linux/sched/debug.h
@@ -14,7 +14,7 @@ extern void dump_cpu_task(int cpu);
 /*
  * Only dump TASK_* tasks. (0 for all tasks)
  */
-extern void show_state_filter(unsigned long state_filter);
+void show_state_filter(unsigned int state_filter);
 
 static inline void show_state(void)
 {
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -381,7 +381,7 @@ static inline int fatal_signal_pending(struct task_struct 
*p)
return task_sigpending(p) && __fatal_signal_pending(p);
 }
 
-static inline int signal_pending_state(long state, struct task_struct *p)
+static inline int signal_pending_state(int state, struct task_struct *p)
 {
if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL)))
return 0;
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -58,7 +58,7 @@ bool __refrigerator(bool check_kthr_stop)
/* Hmm, should we be all

[tip: timers/core] timens: Delete no-op time_ns_init()

2021-02-05 Thread tip-bot2 for Alexey Dobriyan
The following commit has been merged into the timers/core branch of tip:

Commit-ID: 174bcc691f44fdd05046c694fc650933819f72c7
Gitweb:
https://git.kernel.org/tip/174bcc691f44fdd05046c694fc650933819f72c7
Author:Alexey Dobriyan 
AuthorDate:Tue, 29 Dec 2020 00:54:02 +03:00
Committer: Thomas Gleixner 
CommitterDate: Fri, 05 Feb 2021 19:32:09 +01:00

timens: Delete no-op time_ns_init()

Signed-off-by: Alexey Dobriyan 
Signed-off-by: Thomas Gleixner 
Acked-by: Andrei Vagin 
Link: https://lore.kernel.org/r/20201228215402.GA572900@localhost.localdomain
---
 kernel/time/namespace.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c
index 6ca625f..12eab0d 100644
--- a/kernel/time/namespace.c
+++ b/kernel/time/namespace.c
@@ -465,9 +465,3 @@ struct time_namespace init_time_ns = {
.ns.ops = _operations,
.frozen_offsets = true,
 };
-
-static int __init time_ns_init(void)
-{
-   return 0;
-}
-subsys_initcall(time_ns_init);


[tip: x86/cleanups] x86/asm: Fixup TASK_SIZE_MAX comment

2021-02-05 Thread tip-bot2 for Alexey Dobriyan
The following commit has been merged into the x86/cleanups branch of tip:

Commit-ID: 4f63b320afdd9af406f4426b0ff1a2cdb23e5b8d
Gitweb:
https://git.kernel.org/tip/4f63b320afdd9af406f4426b0ff1a2cdb23e5b8d
Author:Alexey Dobriyan 
AuthorDate:Thu, 05 Mar 2020 21:17:19 +03:00
Committer: Borislav Petkov 
CommitterDate: Fri, 05 Feb 2021 10:37:39 +01:00

x86/asm: Fixup TASK_SIZE_MAX comment

Comment says "by preventing anything executable" which is not true. Even
PROT_NONE mapping can't be installed at (1<<47 - 4096).

  mmap(0x7000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, 
-1, 0) = -1 ENOMEM

 [ bp: Fixup to the moved location in page_64_types.h. ]

Signed-off-by: Alexey Dobriyan 
Signed-off-by: Borislav Petkov 
Reviewed-by: Andy Lutomirski 
Link: https://lkml.kernel.org/r/20200305181719.GA5490@avx2
---
 arch/x86/include/asm/page_64_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/page_64_types.h 
b/arch/x86/include/asm/page_64_types.h
index 645bd1d..64297ea 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -66,7 +66,7 @@
  * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
  * address, then that syscall will enter the kernel with a
  * non-canonical return address, and SYSRET will explode dangerously.
- * We avoid this particular problem by preventing anything executable
+ * We avoid this particular problem by preventing anything
  * from being mapped at the maximum canonical address.
  *
  * On AMD CPUs in the Ryzen family, there's a nasty bug in which the


Re: linux-next: build failure after merge of the akpm-current tree

2021-01-27 Thread Alexey Dobriyan
On Wed, Jan 27, 2021 at 11:21:18PM +1100, Stephen Rothwell wrote:
> Caused by commit
> 
>   5567a1a4b1c3 ("ramfs: support O_TMPFILE")

Can this be merged or sent to Al, please? It's ancient patch.


Re: [PATCH] fs/proc: Expose RSEQ configuration

2021-01-13 Thread Alexey Dobriyan
On Wed, Jan 13, 2021 at 06:41:27PM +0100, Piotr Figiel wrote:
> +static int proc_pid_rseq(struct seq_file *m, struct pid_namespace *ns,
> + struct pid *pid, struct task_struct *task)
> +{
> + int res = lock_trace(task);
> +
> + if (res)
> + return res;
> + seq_printf(m, "0x%llx 0x%x\n", (uint64_t)task->rseq, task->rseq_sig);

may I suggest

"%tx", (uintptr_t)  // or %lx

Mandatory 64-bit is too much on 32-bit.

Or even "%tx %08x" ?


Re: [PATCH] fs/proc: Expose RSEQ configuration

2021-01-13 Thread Alexey Dobriyan
On Wed, Jan 13, 2021 at 06:41:27PM +0100, Piotr Figiel wrote:
> For userspace checkpoint and restore (C/R) some way of getting process
> state containing RSEQ configuration is needed.

> + seq_printf(m, "0x%llx 0x%x\n", (uint64_t)task->rseq, task->rseq_sig);

%llx is too much on 32-bit. "%tx %x" is better (or even %08x)


[PATCH] timens: delete no-op time_ns_init()

2020-12-28 Thread Alexey Dobriyan
Signed-off-by: Alexey Dobriyan 
---

 kernel/time/namespace.c |6 --
 1 file changed, 6 deletions(-)

--- a/kernel/time/namespace.c
+++ b/kernel/time/namespace.c
@@ -465,9 +465,3 @@ struct time_namespace init_time_ns = {
.ns.ops = _operations,
.frozen_offsets = true,
 };
-
-static int __init time_ns_init(void)
-{
-   return 0;
-}
-subsys_initcall(time_ns_init);


Re: + proc-wchan-use-printk-format-instead-of-lookup_symbol_name-fix.patch added to -mm tree

2020-12-28 Thread Alexey Dobriyan
On Tue, Dec 22, 2020 at 06:18:34PM -0800, a...@linux-foundation.org wrote:
>
>  proc-wchan-use-printk-format-instead-of-lookup_symbol_name-fix.patch

> --- 
> a/fs/proc/base.c~proc-wchan-use-printk-format-instead-of-lookup_symbol_name-fix
> +++ a/fs/proc/base.c
> @@ -384,15 +384,8 @@ static const struct file_operations proc
>  static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
> struct pid *pid, struct task_struct *task)
>  {
> - unsigned long wchan;
> -
>   if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
> - wchan = get_wchan(task);
> - else
> - wchan = 0;
> -
> - if (wchan)
> - seq_printf(m, "%ps", (void *) wchan);
> + seq_printf(m, "%ps", (void *)get_wchan(task));
>   else
>   seq_putc(m, '0');

These patches change '0' to '0x0'.

Also /proc/*/wchan shows '0' for everything which is strange.

I'm not sure if we should care as wchan is obsoleted by /proc/*/stack .


[PATCH] proc: fix lookup in /proc/net subdirectories after setns(2)

2020-12-05 Thread Alexey Dobriyan
commit 1fde6f21d90f8ba5da3cb9c54ca991ed72696c43
proc: fix /proc/net/* after setns(2)

only forced revalidation of regular files under /proc/net/

However, /proc/net/ is unusual in the sense of /proc/net/foo handlers
take netns pointer from parent directory which is old netns.

Steps to reproduce:

(void)open("/proc/net/sctp/snmp", O_RDONLY);
unshare(CLONE_NEWNET);

int fd = open("/proc/net/sctp/snmp", O_RDONLY);
read(fd, , 1);

Read will read wrong data from original netns.

Patch forces lookup on every directory under /proc/net .

Fixes: 1da4d377f943 ("proc: revalidate misc dentries")
Reported-by: "Rantala, Tommi T. (Nokia - FI/Espoo)" 
Signed-off-by: Alexey Dobriyan 
---

 fs/proc/generic.c   |   24 ++--
 fs/proc/internal.h  |7 +++
 fs/proc/proc_net.c  |   16 
 include/linux/proc_fs.h |8 +++-
 4 files changed, 36 insertions(+), 19 deletions(-)

--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -349,6 +349,16 @@ static const struct file_operations proc_dir_operations = {
.iterate_shared = proc_readdir,
 };
 
+static int proc_net_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+   return 0;
+}
+
+const struct dentry_operations proc_net_dentry_ops = {
+   .d_revalidate   = proc_net_d_revalidate,
+   .d_delete   = always_delete_dentry,
+};
+
 /*
  * proc directories can do almost nothing..
  */
@@ -471,8 +481,8 @@ struct proc_dir_entry *proc_symlink(const char *name,
 }
 EXPORT_SYMBOL(proc_symlink);
 
-struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode,
-   struct proc_dir_entry *parent, void *data)
+struct proc_dir_entry *_proc_mkdir(const char *name, umode_t mode,
+   struct proc_dir_entry *parent, void *data, bool force_lookup)
 {
struct proc_dir_entry *ent;
 
@@ -484,10 +494,20 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, 
umode_t mode,
ent->data = data;
ent->proc_dir_ops = _dir_operations;
ent->proc_iops = _dir_inode_operations;
+   if (force_lookup) {
+   pde_force_lookup(ent);
+   }
ent = proc_register(parent, ent);
}
return ent;
 }
+EXPORT_SYMBOL_GPL(_proc_mkdir);
+
+struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode,
+   struct proc_dir_entry *parent, void *data)
+{
+   return _proc_mkdir(name, mode, parent, data, false);
+}
 EXPORT_SYMBOL_GPL(proc_mkdir_data);
 
 struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode,
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -310,3 +310,10 @@ extern unsigned long task_statm(struct mm_struct *,
unsigned long *, unsigned long *,
unsigned long *, unsigned long *);
 extern void task_mem(struct seq_file *, struct mm_struct *);
+
+extern const struct dentry_operations proc_net_dentry_ops;
+static inline void pde_force_lookup(struct proc_dir_entry *pde)
+{
+   /* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */
+   pde->proc_dops = _net_dentry_ops;
+}
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -39,22 +39,6 @@ static struct net *get_proc_net(const struct inode *inode)
return maybe_get_net(PDE_NET(PDE(inode)));
 }
 
-static int proc_net_d_revalidate(struct dentry *dentry, unsigned int flags)
-{
-   return 0;
-}
-
-static const struct dentry_operations proc_net_dentry_ops = {
-   .d_revalidate   = proc_net_d_revalidate,
-   .d_delete   = always_delete_dentry,
-};
-
-static void pde_force_lookup(struct proc_dir_entry *pde)
-{
-   /* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */
-   pde->proc_dops = _net_dentry_ops;
-}
-
 static int seq_open_net(struct inode *inode, struct file *file)
 {
unsigned int state_size = PDE(inode)->state_size;
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -80,6 +80,7 @@ extern void proc_flush_pid(struct pid *);
 
 extern struct proc_dir_entry *proc_symlink(const char *,
struct proc_dir_entry *, const char *);
+struct proc_dir_entry *_proc_mkdir(const char *, umode_t, struct 
proc_dir_entry *, void *, bool);
 extern struct proc_dir_entry *proc_mkdir(const char *, struct proc_dir_entry 
*);
 extern struct proc_dir_entry *proc_mkdir_data(const char *, umode_t,
  struct proc_dir_entry *, void *);
@@ -162,6 +163,11 @@ static inline struct proc_dir_entry *proc_symlink(const 
char *name,
 static inline struct proc_dir_entry *proc_mkdir(const char *name,
struct proc_dir_entry *parent) {return NULL;}
 static inline struct proc_dir_entry *proc_create_mount_point(const char *name) 
{ return NULL; }
+static inline struct proc_dir_entry *_proc_mkd

[PATCH] proc: fix lookup in /proc/net subdirectories after setns(2)

2020-12-05 Thread Alexey Dobriyan
commit 1fde6f21d90f8ba5da3cb9c54ca991ed72696c43
proc: fix /proc/net/* after setns(2)

only forced revalidation of regular files under /proc/net/

However, /proc/net/ is unusual in the sense of /proc/net/foo handlers
take netns pointer from parent directory which is old netns.

Steps to reproduce:

(void)open("/proc/net/sctp/snmp", O_RDONLY);
unshare(CLONE_NEWNET);

int fd = open("/proc/net/sctp/snmp", O_RDONLY);
read(fd, , 1);

Read will read wrong data from original netns.

Patch forces lookup on every directory under /proc/net .

Fixes: 1da4d377f943 ("proc: revalidate misc dentries")
Reported-by: "Rantala, Tommi T. (Nokia - FI/Espoo)" 
Signed-off-by: Alexey Dobriyan 
---

 fs/proc/generic.c   |   24 ++--
 fs/proc/internal.h  |7 +++
 fs/proc/proc_net.c  |   16 
 include/linux/proc_fs.h |8 +++-
 4 files changed, 36 insertions(+), 19 deletions(-)

--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -349,6 +349,16 @@ static const struct file_operations proc_dir_operations = {
.iterate_shared = proc_readdir,
 };
 
+static int proc_net_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+   return 0;
+}
+
+const struct dentry_operations proc_net_dentry_ops = {
+   .d_revalidate   = proc_net_d_revalidate,
+   .d_delete   = always_delete_dentry,
+};
+
 /*
  * proc directories can do almost nothing..
  */
@@ -471,8 +481,8 @@ struct proc_dir_entry *proc_symlink(const char *name,
 }
 EXPORT_SYMBOL(proc_symlink);
 
-struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode,
-   struct proc_dir_entry *parent, void *data)
+struct proc_dir_entry *_proc_mkdir(const char *name, umode_t mode,
+   struct proc_dir_entry *parent, void *data, bool force_lookup)
 {
struct proc_dir_entry *ent;
 
@@ -484,10 +494,20 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, 
umode_t mode,
ent->data = data;
ent->proc_dir_ops = _dir_operations;
ent->proc_iops = _dir_inode_operations;
+   if (force_lookup) {
+   pde_force_lookup(ent);
+   }
ent = proc_register(parent, ent);
}
return ent;
 }
+EXPORT_SYMBOL_GPL(_proc_mkdir);
+
+struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode,
+   struct proc_dir_entry *parent, void *data)
+{
+   return _proc_mkdir(name, mode, parent, data, false);
+}
 EXPORT_SYMBOL_GPL(proc_mkdir_data);
 
 struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode,
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -310,3 +310,10 @@ extern unsigned long task_statm(struct mm_struct *,
unsigned long *, unsigned long *,
unsigned long *, unsigned long *);
 extern void task_mem(struct seq_file *, struct mm_struct *);
+
+extern const struct dentry_operations proc_net_dentry_ops;
+static inline void pde_force_lookup(struct proc_dir_entry *pde)
+{
+   /* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */
+   pde->proc_dops = _net_dentry_ops;
+}
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -39,22 +39,6 @@ static struct net *get_proc_net(const struct inode *inode)
return maybe_get_net(PDE_NET(PDE(inode)));
 }
 
-static int proc_net_d_revalidate(struct dentry *dentry, unsigned int flags)
-{
-   return 0;
-}
-
-static const struct dentry_operations proc_net_dentry_ops = {
-   .d_revalidate   = proc_net_d_revalidate,
-   .d_delete   = always_delete_dentry,
-};
-
-static void pde_force_lookup(struct proc_dir_entry *pde)
-{
-   /* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */
-   pde->proc_dops = _net_dentry_ops;
-}
-
 static int seq_open_net(struct inode *inode, struct file *file)
 {
unsigned int state_size = PDE(inode)->state_size;
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -80,6 +80,7 @@ extern void proc_flush_pid(struct pid *);
 
 extern struct proc_dir_entry *proc_symlink(const char *,
struct proc_dir_entry *, const char *);
+struct proc_dir_entry *_proc_mkdir(const char *, umode_t, struct 
proc_dir_entry *, void *, bool);
 extern struct proc_dir_entry *proc_mkdir(const char *, struct proc_dir_entry 
*);
 extern struct proc_dir_entry *proc_mkdir_data(const char *, umode_t,
  struct proc_dir_entry *, void *);
@@ -162,6 +163,11 @@ static inline struct proc_dir_entry *proc_symlink(const 
char *name,
 static inline struct proc_dir_entry *proc_mkdir(const char *name,
struct proc_dir_entry *parent) {return NULL;}
 static inline struct proc_dir_entry *proc_create_mount_point(const char *name) 
{ return NULL; }
+static inline struct proc_dir_entry *_proc_mkd

Re: [PATCH 05/10] proc: use %u for pid printing and slightly less stack

2020-12-03 Thread Alexey Dobriyan
On Fri, Dec 04, 2020 at 02:31:59AM +0800, Wen Yang wrote:
> From: Alexey Dobriyan 
> 
> [ Upstream commit e3912ac37e07a13c70675cd75020694de4841c74 ]
> 
> PROC_NUMBUF is 13 which is enough for "negative int + \n + \0".
> 
> However PIDs and TGIDs are never negative and newline is not a concern,
> so use just 10 per integer.
> 
> Link: http://lkml.kernel.org/r/20171120203005.GA27743@avx2
> Signed-off-by: Alexey Dobriyan 
> Cc: Alexander Viro 
> Signed-off-by: Andrew Morton 
> Signed-off-by: Linus Torvalds 
> Cc:  # 4.9.x

A what? How does this belong to stable?


[PATCH] mm: cleanup kstrto*() usage

2020-11-22 Thread Alexey Dobriyan
Range checks can folded into proper conversion function.
kstrto*() exist for all arithmetic types.

Signed-off-by: Alexey Dobriyan 
---

 mm/khugepaged.c |   18 +-
 mm/ksm.c|   18 +-
 2 files changed, 18 insertions(+), 18 deletions(-)

--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -131,11 +131,11 @@ static ssize_t scan_sleep_millisecs_store(struct kobject 
*kobj,
  struct kobj_attribute *attr,
  const char *buf, size_t count)
 {
-   unsigned long msecs;
+   unsigned int msecs;
int err;
 
-   err = kstrtoul(buf, 10, );
-   if (err || msecs > UINT_MAX)
+   err = kstrtouint(buf, 10, );
+   if (err)
return -EINVAL;
 
khugepaged_scan_sleep_millisecs = msecs;
@@ -159,11 +159,11 @@ static ssize_t alloc_sleep_millisecs_store(struct kobject 
*kobj,
   struct kobj_attribute *attr,
   const char *buf, size_t count)
 {
-   unsigned long msecs;
+   unsigned int msecs;
int err;
 
-   err = kstrtoul(buf, 10, );
-   if (err || msecs > UINT_MAX)
+   err = kstrtouint(buf, 10, );
+   if (err)
return -EINVAL;
 
khugepaged_alloc_sleep_millisecs = msecs;
@@ -186,11 +186,11 @@ static ssize_t pages_to_scan_store(struct kobject *kobj,
   struct kobj_attribute *attr,
   const char *buf, size_t count)
 {
+   unsigned int pages;
int err;
-   unsigned long pages;
 
-   err = kstrtoul(buf, 10, );
-   if (err || !pages || pages > UINT_MAX)
+   err = kstrtouint(buf, 10, );
+   if (err || !pages)
return -EINVAL;
 
khugepaged_pages_to_scan = pages;
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2840,11 +2840,11 @@ static ssize_t sleep_millisecs_store(struct kobject 
*kobj,
 struct kobj_attribute *attr,
 const char *buf, size_t count)
 {
-   unsigned long msecs;
+   unsigned int msecs;
int err;
 
-   err = kstrtoul(buf, 10, );
-   if (err || msecs > UINT_MAX)
+   err = kstrtouint(buf, 10, );
+   if (err)
return -EINVAL;
 
ksm_thread_sleep_millisecs = msecs;
@@ -2864,11 +2864,11 @@ static ssize_t pages_to_scan_store(struct kobject *kobj,
   struct kobj_attribute *attr,
   const char *buf, size_t count)
 {
+   unsigned int nr_pages;
int err;
-   unsigned long nr_pages;
 
-   err = kstrtoul(buf, 10, _pages);
-   if (err || nr_pages > UINT_MAX)
+   err = kstrtouint(buf, 10, _pages);
+   if (err)
return -EINVAL;
 
ksm_thread_pages_to_scan = nr_pages;
@@ -2886,11 +2886,11 @@ static ssize_t run_show(struct kobject *kobj, struct 
kobj_attribute *attr,
 static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
 const char *buf, size_t count)
 {
+   unsigned int flags;
int err;
-   unsigned long flags;
 
-   err = kstrtoul(buf, 10, );
-   if (err || flags > UINT_MAX)
+   err = kstrtouint(buf, 10, );
+   if (err)
return -EINVAL;
if (flags > KSM_RUN_UNMERGE)
return -EINVAL;


[PATCH] lib: cleanup kstrto*() usage

2020-11-22 Thread Alexey Dobriyan
Use proper conversion functions.
kstrto*() variants exist for all standard types.

Signed-off-by: Alexey Dobriyan 
---

 lib/test_firmware.c |9 +++--
 lib/test_kmod.c |   26 ++
 2 files changed, 13 insertions(+), 22 deletions(-)

--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -364,18 +364,15 @@ static ssize_t test_dev_config_show_int(char *buf, int 
val)
 
 static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
 {
+   u8 val;
int ret;
-   long new;
 
-   ret = kstrtol(buf, 10, );
+   ret = kstrtou8(buf, 10, );
if (ret)
return ret;
 
-   if (new > U8_MAX)
-   return -EINVAL;
-
mutex_lock(_fw_mutex);
-   *(u8 *)cfg = new;
+   *(u8 *)cfg = val;
mutex_unlock(_fw_mutex);
 
/* Always return full write size even if we didn't consume all */
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -877,20 +877,17 @@ static int test_dev_config_update_uint_sync(struct 
kmod_test_device *test_dev,
int (*test_sync)(struct 
kmod_test_device *test_dev))
 {
int ret;
-   unsigned long new;
+   unsigned int val;
unsigned int old_val;
 
-   ret = kstrtoul(buf, 10, );
+   ret = kstrtouint(buf, 10, );
if (ret)
return ret;
 
-   if (new > UINT_MAX)
-   return -EINVAL;
-
mutex_lock(_dev->config_mutex);
 
old_val = *config;
-   *(unsigned int *)config = new;
+   *(unsigned int *)config = val;
 
ret = test_sync(test_dev);
if (ret) {
@@ -914,18 +911,18 @@ static int test_dev_config_update_uint_range(struct 
kmod_test_device *test_dev,
 unsigned int min,
 unsigned int max)
 {
+   unsigned int val;
int ret;
-   unsigned long new;
 
-   ret = kstrtoul(buf, 10, );
+   ret = kstrtouint(buf, 10, );
if (ret)
return ret;
 
-   if (new < min || new > max)
+   if (val < min || val > max)
return -EINVAL;
 
mutex_lock(_dev->config_mutex);
-   *config = new;
+   *config = val;
mutex_unlock(_dev->config_mutex);
 
/* Always return full write size even if we didn't consume all */
@@ -936,18 +933,15 @@ static int test_dev_config_update_int(struct 
kmod_test_device *test_dev,
  const char *buf, size_t size,
  int *config)
 {
+   int val;
int ret;
-   long new;
 
-   ret = kstrtol(buf, 10, );
+   ret = kstrtoint(buf, 10, );
if (ret)
return ret;
 
-   if (new < INT_MIN || new > INT_MAX)
-   return -EINVAL;
-
mutex_lock(_dev->config_mutex);
-   *config = new;
+   *config = val;
mutex_unlock(_dev->config_mutex);
/* Always return full write size even if we didn't consume all */
return size;


[PATCH] driver core: cleanup kstrto*() usage

2020-11-22 Thread Alexey Dobriyan
kstrto*() functions can write result directly to target memory
if no additional checks needs to be done.

Signed-off-by: Alexey Dobriyan 
---

 drivers/base/core.c |   12 +++-
 1 file changed, 3 insertions(+), 9 deletions(-)

--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1701,12 +1701,10 @@ ssize_t device_store_ulong(struct device *dev,
 {
struct dev_ext_attribute *ea = to_ext_attr(attr);
int ret;
-   unsigned long new;
 
-   ret = kstrtoul(buf, 0, );
+   ret = kstrtoul(buf, 0, (unsigned long *)ea->var);
if (ret)
return ret;
-   *(unsigned long *)(ea->var) = new;
/* Always return full write size even if we didn't consume all */
return size;
 }
@@ -1726,16 +1724,12 @@ ssize_t device_store_int(struct device *dev,
 const char *buf, size_t size)
 {
struct dev_ext_attribute *ea = to_ext_attr(attr);
+   int val;
int ret;
-   long new;
 
-   ret = kstrtol(buf, 0, );
+   ret = kstrtoint(buf, 0, (int *)ea->var);
if (ret)
return ret;
-
-   if (new > INT_MAX || new < INT_MIN)
-   return -EINVAL;
-   *(int *)(ea->var) = new;
/* Always return full write size even if we didn't consume all */
return size;
 }


Re: [PATCH] kbuild: Run syncconfig with -s

2020-09-14 Thread Alexey Dobriyan
> BTW., there's another, rather spurious bug I recently triggered in kbuild.
> 
> Occasionally when I Ctrl-C a kernel build on a system with a lot of CPUs, 
> the .o.cmd file gets corrupted:

Those are temporary files, truncated at page boundary.

$ stat -c %s XXX.pata_sil680.mod.o.cmd
12288

I tried to fix this by inserting shell 'trap' directive but it failed
somewhere else.

cmd_and_fixdep = \
$(cmd);  \
scripts/basic/fixdep $(depfile) $@ '$(make-cmd)' > $(dot-target).cmd;\
rm -f $(depfile)


Re: [RESEND PATCH 2/2] /proc/stat: Simplify iowait and idle calculations when cpu is offline

2020-09-10 Thread Alexey Dobriyan
On Wed, Sep 09, 2020 at 08:41:22AM -0600, Tom Hromatka wrote:
>  static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
>  {
> - u64 idle, idle_usecs = -1ULL;
> + u64 idle, idle_usecs;
>  
> - if (cpu_online(cpu))
> - idle_usecs = get_cpu_idle_time_us(cpu, NULL);
> -
> - if (idle_usecs == -1ULL)
> - /* !NO_HZ or cpu offline so we can rely on cpustat.idle */
> - idle = kcs->cpustat[CPUTIME_IDLE];
> - else
> - idle = idle_usecs * NSEC_PER_USEC;
> + idle_usecs = get_cpu_idle_time_us(cpu, NULL);
> + idle = idle_usecs * NSEC_PER_USEC;
>  
>   return idle;
>  }
>  
>  static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu)
>  {
> - u64 iowait, iowait_usecs = -1ULL;
> -
> - if (cpu_online(cpu))
> - iowait_usecs = get_cpu_iowait_time_us(cpu, NULL);
> + u64 iowait, iowait_usecs;
>  
> - if (iowait_usecs == -1ULL)
> - /* !NO_HZ or cpu offline so we can rely on cpustat.iowait */
> - iowait = kcs->cpustat[CPUTIME_IOWAIT];
> - else
> - iowait = iowait_usecs * NSEC_PER_USEC;
> + iowait_usecs = get_cpu_iowait_time_us(cpu, NULL);
> + iowait = iowait_usecs * NSEC_PER_USEC;

You can gc variables in both cases:

return get_cpu_iowait_time_us() * NSEC_PER_USEC;


Re: remove the last set_fs() in common code, and remove it for x86 and powerpc v3

2020-09-04 Thread Alexey Dobriyan
On Fri, Sep 04, 2020 at 08:00:24AM +0200, Ingo Molnar wrote:
> * Christoph Hellwig  wrote:
> > this series removes the last set_fs() used to force a kernel address
> > space for the uaccess code in the kernel read/write/splice code, and then
> > stops implementing the address space overrides entirely for x86 and
> > powerpc.
> 
> Cool! For the x86 bits:
> 
>   Acked-by: Ingo Molnar 

set_fs() is older than some kernel hackers!

$ cd linux-0.11/
$ find . -type f -name '*.h' | xargs grep -e set_fs -w -n -A3
./include/asm/segment.h:61:extern inline void set_fs(unsigned long val)
./include/asm/segment.h-62-{
./include/asm/segment.h-63- __asm__("mov %0,%%fs"::"a" ((unsigned 
short) val));
./include/asm/segment.h-64-}


make defconfig (Re: + x86-defconfigs-explicitly-unset-config_64bit-in-i386_defconfig.patch added to -mm tree)

2020-08-21 Thread Alexey Dobriyan
On Thu, Aug 20, 2020 at 02:29:40PM -0700, a...@linux-foundation.org wrote:
> Subject: x86/defconfigs: Explicitly unset CONFIG_64BIT in i386_defconfig
> 
> A recent refresh of the defconfigs got rid of the following (unset)
> config:
> 
>   # CONFIG_64BIT is not set
> 
> Innocuous as it seems, when the config file is saved again the
> behavior is changed so that CONFIG_64BIT=y.
> 
> Currently,
> 
>   $ make i386_defconfig
>   $ grep CONFIG_64BIT .config
>   CONFIG_64BIT=y
> 
> whereas previously (and with this patch):
> 
>   $ make i386_defconfig
>   $ grep CONFIG_64BIT .config
>   # CONFIG_64BIT is not set

It is highly, highly, highly advisable to always pass ARCH when dealing
with 32/64-bit archs:

+---+
|   make ARCH=x86_64 defconfig  |
|   make ARCH=i386 defconfig|
+---+

The reason is that long ago ARCH was deduced from bitness of the system
make was run on, so that

make allnoconfig

gave 32-bit config on 32-but system and 64-bit on 64-bit system which is
natural thing to do.

During i386/x86_64 merge CONFIG_64BIT became user visible option!
" make allnoconfig" started giving 32-bit config even on x86_64 and 64-bit
defconfig and allmodconfig which it does to this day.

Always passing ARCH is the only way to maintain sanity. I have shell
alias to always pass ARCH=x86_64 so that bitness is both deterministic
and can be overridden.


Re: [PATCH 1/2] mm: add GFP mask param to strndup_user

2020-08-16 Thread Alexey Dobriyan
On Sat, Aug 15, 2020 at 11:42:10PM +0100, Al Viro wrote:
> On Sat, Aug 15, 2020 at 11:23:43AM -0700, Pascal Bouchareine wrote:
> > Let caller specify allocation.
> > Preserve existing calls with GFP_USER.
> 
> Bloody bad idea, unless you slap a BUG_ON(flags & GFP_ATOMIC) on it,
> to make sure nobody tries _that_.  Note that copying from userland
> is an inherently blocking operation, and this interface invites
> just that.
> 
> What do you need that flag for, anyway?

You need it for kmem accounting.


Re: [PATCH 00/23] proc: Introduce /proc/namespaces/ directory to expose namespaces lineary

2020-08-03 Thread Alexey Dobriyan
On Mon, Aug 03, 2020 at 01:03:17PM +0300, Kirill Tkhai wrote:
> On 31.07.2020 01:13, Eric W. Biederman wrote:
> > Kirill Tkhai  writes:
> > 
> >> On 30.07.2020 17:34, Eric W. Biederman wrote:
> >>> Kirill Tkhai  writes:
> >>>
>  Currently, there is no a way to list or iterate all or subset of 
>  namespaces
>  in the system. Some namespaces are exposed in /proc/[pid]/ns/ 
>  directories,
>  but some also may be as open files, which are not attached to a process.
>  When a namespace open fd is sent over unix socket and then closed, it is
>  impossible to know whether the namespace exists or not.
> 
>  Also, even if namespace is exposed as attached to a process or as open 
>  file,
>  iteration over /proc/*/ns/* or /proc/*/fd/* namespaces is not fast, 
>  because
>  this multiplies at tasks and fds number.
> >>>
> >>> I am very dubious about this.
> >>>
> >>> I have been avoiding exactly this kind of interface because it can
> >>> create rather fundamental problems with checkpoint restart.
> >>
> >> restart/restore :)
> >>
> >>> You do have some filtering and the filtering is not based on current.
> >>> Which is good.
> >>>
> >>> A view that is relative to a user namespace might be ok.It almost
> >>> certainly does better as it's own little filesystem than as an extension
> >>> to proc though.
> >>>
> >>> The big thing we want to ensure is that if you migrate you can restore
> >>> everything.  I don't see how you will be able to restore these files
> >>> after migration.  Anything like this without having a complete
> >>> checkpoint/restore story is a non-starter.
> >>
> >> There is no difference between files in /proc/namespaces/ directory and 
> >> /proc/[pid]/ns/.
> >>
> >> CRIU can restore open files in /proc/[pid]/ns, the same will be with 
> >> /proc/namespaces/ files.
> >> As a person who worked deeply for pid_ns and user_ns support in CRIU, I 
> >> don't see any
> >> problem here.
> > 
> > An obvious diffference is that you are adding the inode to the inode to
> > the file name.  Which means that now you really do have to preserve the
> > inode numbers during process migration.
> >
> > Which means now we have to do all of the work to make inode number
> > restoration possible.  Which means now we need to have multiple
> > instances of nsfs so that we can restore inode numbers.
> > 
> > I think this is still possible but we have been delaying figuring out
> > how to restore inode numbers long enough that may be actual technical
> > problems making it happen.
> 
> Yeah, this matters. But it looks like here is not a dead end. We just need
> change the names the namespaces are exported to particular fs and to support
> rename().
> 
> Before introduction a principally new filesystem type for this, can't
> this be solved in current /proc?
> 
> Alexey, does rename() is prohibited for /proc fs?

Techically it is allowed: add ->rename to /proc/ns inode.
But nobody does it.


Re: [PATCH 11/23] fs: Add /proc/namespaces/ directory

2020-07-30 Thread Alexey Dobriyan
On Thu, Jul 30, 2020 at 03:00:19PM +0300, Kirill Tkhai wrote:

> # ls /proc/namespaces/ -l
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'cgroup:[4026531835]' -> 
> 'cgroup:[4026531835]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'ipc:[4026531839]' -> 'ipc:[4026531839]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026531840]' -> 'mnt:[4026531840]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026531861]' -> 'mnt:[4026531861]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532133]' -> 'mnt:[4026532133]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532134]' -> 'mnt:[4026532134]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532135]' -> 'mnt:[4026532135]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532136]' -> 'mnt:[4026532136]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'net:[4026531993]' -> 'net:[4026531993]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'pid:[4026531836]' -> 'pid:[4026531836]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'time:[4026531834]' -> 
> 'time:[4026531834]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'user:[4026531837]' -> 
> 'user:[4026531837]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'uts:[4026531838]' -> 'uts:[4026531838]'

I'd say make it '%s-%llu'. The brackets don't carry any information.
And ':' forces quoting with recent coreutils.

> +static int parse_namespace_dentry_name(const struct dentry *dentry,
> + const char **type, unsigned int *type_len, unsigned int *inum)
> +{
> + const char *p, *name;
> + int count;
> +
> + *type = name = dentry->d_name.name;
> + p = strchr(name, ':');
> + *type_len = p - name;
> + if (!p || p == name)
> + return -ENOENT;
> +
> + p += 1;
> + if (sscanf(p, "[%u]%n", inum, ) != 1 || *(p + count) != '\0' ||
> + *inum < PROC_NS_MIN_INO)
> + return -ENOENT;

sscanf is banned from lookup code due to lax whitespace rules.
See

commit ac7f1061c2c11bb8936b1b6a94cdb48de732f7a4
proc: fix /proc/*/map_files lookup

Of course someone sneaked in 1 instance, yikes.

$ grep -e scanf -n -r fs/proc/
fs/proc/base.c:1596:err = sscanf(pos, "%9s %lld %lu", clock,

> +static int proc_namespaces_readdir(struct file *file, struct dir_context 
> *ctx)

> + len = snprintf(name, sizeof(name), "%s:[%u]", ns->ops->name, 
> inum);

[] -- no need.


make oldconfig (Re: mmotm 2020-07-27-18-18 uploaded (mm/page_alloc.c))

2020-07-29 Thread Alexey Dobriyan
On Tue, Jul 28, 2020 at 06:44:19PM -0700, Andrew Morton wrote:
> On Tue, 28 Jul 2020 15:39:21 -0700 Randy Dunlap  wrote:
> 
> > On 7/28/20 2:55 PM, Andrew Morton wrote:
> > > On Tue, 28 Jul 2020 05:33:58 -0700 Randy Dunlap  
> > > wrote:
> > > 
> > >> On 7/27/20 6:19 PM, Andrew Morton wrote:
> > >>> The mm-of-the-moment snapshot 2020-07-27-18-18 has been uploaded to
> > >>>
> > >>>http://www.ozlabs.org/~akpm/mmotm/
> > 
> > 
> > >> on x86_64:
> > >>
> > >> ../mm/page_alloc.c:8355:48: warning: ‘struct compact_control’ declared 
> > >> inside parameter list will not be visible outside of this definition or 
> > >> declaration
> > >>  static int __alloc_contig_migrate_range(struct compact_control *cc,
> > >> ^~~
> > > 
> > > As is usually the case with your reports, I can't figure out how to
> > > reproduce it.  I copy then .config, run `make oldconfig' (need to hit
> > > enter a zillion times because the .config is whacky)

If it helps with Enter:

yes '' | make oldconfig

Works 99.99% of the time except when there is numeric/string option
without default value.


Re: [PATCH v3] proc,fcntl: introduce F_SET_DESCRIPTION

2020-07-27 Thread Alexey Dobriyan
On Fri, Jul 24, 2020 at 10:22:36PM -0700, Pascal Bouchareine wrote:
> This command attaches a description to a file descriptor for
> troubleshooting purposes. The free string is displayed in the
> process fdinfo file for that fd /proc/pid/fdinfo/fd.
> 
> One intended usage is to allow processes to self-document sockets
> for netstat and friends to report

> +static long fcntl_set_description(struct file *file, char __user *desc)
> +{
> + char *d;
> +
> + d = strndup_user(desc, MAX_FILE_DESC_SIZE);

This should be kmem accounted because allocation is persistent.
To make things more entertaining, strndup_user() doesn't have gfp_t argument.

> + if (IS_ERR(d))
> + return PTR_ERR(d);
> +
> + spin_lock(>f_lock);
> + kfree(file->f_description);
> + file->f_description = d;
> + spin_unlock(>f_lock);

Generally kfree under spinlock is not good idea.
You can replace the pointer and free without spinlock.

> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -980,6 +980,9 @@ struct file {
>   struct address_space*f_mapping;
>   errseq_tf_wb_err;
>   errseq_tf_sb_err; /* for syncfs */
> +
> +#define MAX_FILE_DESC_SIZE 256
> + char*f_description;

struct file is nicely aligned to 256 bytes on distro configs.
Will this break everything?

$ cat /sys/kernel/slab/filp/object_size


[tip: locking/core] rwsem: fix commas in initialisation

2020-07-17 Thread tip-bot2 for Alexey Dobriyan
The following commit has been merged into the locking/core branch of tip:

Commit-ID: a9232dc5607dbada801f2fe83ea307cda762969a
Gitweb:
https://git.kernel.org/tip/a9232dc5607dbada801f2fe83ea307cda762969a
Author:Alexey Dobriyan 
AuthorDate:Sat, 11 Jul 2020 17:59:54 +03:00
Committer: Peter Zijlstra 
CommitterDate: Thu, 16 Jul 2020 23:19:51 +02:00

rwsem: fix commas in initialisation

Leading comma prevents arbitrary reordering of initialisation clauses.
The whole point of C99 initialisation is to allow any such reordering.

Signed-off-by: Alexey Dobriyan 
Signed-off-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20200711145954.GA1178171@localhost.localdomain
---
 include/linux/rwsem.h | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 7e5b2a4..25e3fde 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -60,39 +60,39 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
 }
 
 #define RWSEM_UNLOCKED_VALUE   0L
-#define __RWSEM_INIT_COUNT(name)   .count = 
ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE)
+#define __RWSEM_COUNT_INIT(name)   .count = 
ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE)
 
 /* Common initializer macros and functions */
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # define __RWSEM_DEP_MAP_INIT(lockname)\
-   , .dep_map = {  \
+   .dep_map = {\
.name = #lockname,  \
.wait_type_inner = LD_WAIT_SLEEP,   \
-   }
+   },
 #else
 # define __RWSEM_DEP_MAP_INIT(lockname)
 #endif
 
 #ifdef CONFIG_DEBUG_RWSEMS
-# define __DEBUG_RWSEM_INITIALIZER(lockname) , .magic = 
+# define __RWSEM_DEBUG_INIT(lockname) .magic = ,
 #else
-# define __DEBUG_RWSEM_INITIALIZER(lockname)
+# define __RWSEM_DEBUG_INIT(lockname)
 #endif
 
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-#define __RWSEM_OPT_INIT(lockname) , .osq = OSQ_LOCK_UNLOCKED
+#define __RWSEM_OPT_INIT(lockname) .osq = OSQ_LOCK_UNLOCKED,
 #else
 #define __RWSEM_OPT_INIT(lockname)
 #endif
 
 #define __RWSEM_INITIALIZER(name)  \
-   { __RWSEM_INIT_COUNT(name), \
+   { __RWSEM_COUNT_INIT(name), \
  .owner = ATOMIC_LONG_INIT(0), \
- .wait_list = LIST_HEAD_INIT((name).wait_list),\
- .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) \
  __RWSEM_OPT_INIT(name)\
- __DEBUG_RWSEM_INITIALIZER(name)   \
+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),\
+ .wait_list = LIST_HEAD_INIT((name).wait_list),\
+ __RWSEM_DEBUG_INIT(name)  \
  __RWSEM_DEP_MAP_INIT(name) }
 
 #define DECLARE_RWSEM(name) \


Re: [PATCH] rwsem: fix commas in initialisation

2020-07-13 Thread Alexey Dobriyan
On Mon, Jul 13, 2020 at 01:51:41PM +0200, Peter Zijlstra wrote:
> On Sat, Jul 11, 2020 at 05:59:54PM +0300, Alexey Dobriyan wrote:
> > Leading comma prevents arbitrary reordering of initialisation clauses.
> > The whole point of C99 initialisation is to allow any such reordering.
> 
> I'm conflicted on this argument, the only reason I'd be inclined to take
> this patch is that it allows fixing the initialization order to not be
> random.

Yes, this is how the issue was noticed.

> That is, I'd fold in the below.
> 
> --- a/include/linux/rwsem.h
> +++ b/include/linux/rwsem.h
> @@ -89,9 +89,9 @@ static inline int rwsem_is_locked(struct
>  #define __RWSEM_INITIALIZER(name)\
>   { __RWSEM_INIT_COUNT(name), \
> .owner = ATOMIC_LONG_INIT(0), \
> -   .wait_list = LIST_HEAD_INIT((name).wait_list),\
> -   .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),\
> __RWSEM_OPT_INIT(name)\
> +   .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),\
> +   .wait_list = LIST_HEAD_INIT((name).wait_list),\

One less chunk to compile with g++, a billion to go :^)


[PATCH] rwsem: fix commas in initialisation

2020-07-11 Thread Alexey Dobriyan
Leading comma prevents arbitrary reordering of initialisation clauses.
The whole point of C99 initialisation is to allow any such reordering.

Signed-off-by: Alexey Dobriyan 
---

 include/linux/rwsem.h |   10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -66,22 +66,22 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # define __RWSEM_DEP_MAP_INIT(lockname)\
-   , .dep_map = {  \
+   .dep_map = {\
.name = #lockname,  \
.wait_type_inner = LD_WAIT_SLEEP,   \
-   }
+   },
 #else
 # define __RWSEM_DEP_MAP_INIT(lockname)
 #endif
 
 #ifdef CONFIG_DEBUG_RWSEMS
-# define __DEBUG_RWSEM_INITIALIZER(lockname) , .magic = 
+# define __DEBUG_RWSEM_INITIALIZER(lockname) .magic = ,
 #else
 # define __DEBUG_RWSEM_INITIALIZER(lockname)
 #endif
 
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-#define __RWSEM_OPT_INIT(lockname) , .osq = OSQ_LOCK_UNLOCKED
+#define __RWSEM_OPT_INIT(lockname) .osq = OSQ_LOCK_UNLOCKED,
 #else
 #define __RWSEM_OPT_INIT(lockname)
 #endif
@@ -90,7 +90,7 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
{ __RWSEM_INIT_COUNT(name), \
  .owner = ATOMIC_LONG_INIT(0), \
  .wait_list = LIST_HEAD_INIT((name).wait_list),\
- .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) \
+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),\
  __RWSEM_OPT_INIT(name)\
  __DEBUG_RWSEM_INITIALIZER(name)   \
  __RWSEM_DEP_MAP_INIT(name) }


[PATCH] ipc: uninline functions

2020-07-10 Thread Alexey Dobriyan
Two functions are only called via function pointers, don't bother
inlining them.

Signed-off-by: Alexey Dobriyan 
---

 ipc/sem.c |3 +--
 ipc/shm.c |3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -585,8 +585,7 @@ static int newary(struct ipc_namespace *ns, struct 
ipc_params *params)
 /*
  * Called with sem_ids.rwsem and ipcp locked.
  */
-static inline int sem_more_checks(struct kern_ipc_perm *ipcp,
-   struct ipc_params *params)
+static int sem_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params 
*params)
 {
struct sem_array *sma;
 
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -711,8 +711,7 @@ static int newseg(struct ipc_namespace *ns, struct 
ipc_params *params)
 /*
  * Called with shm_ids.rwsem and ipcp locked.
  */
-static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
-   struct ipc_params *params)
+static int shm_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params 
*params)
 {
struct shmid_kernel *shp;
 


Re: [PATCH 1/3] readfile: implement readfile syscall

2020-07-04 Thread Alexey Dobriyan
Al wrote:

> > On Sat, Jul 04, 2020 at 09:41:09PM +0200, Miklos Szeredi wrote:
> >  1) just leave the first explanation (it's an open + read + close
> > equivalent) and leave out the rest
> >
> >  2) add a loop around the vfs_read() in the code.
> 
> 3) don't bother with the entire thing, until somebody manages to demonstrate
> a setup where it does make a real difference (compared to than the obvious
> sequence of syscalls, that is).

Ehh? System call overead is trivially measurable.
https://lwn.net/Articles/814175/

> At which point we'll need to figure out
> what's going on and deal with the underlying problem of that setup.

Run top?

Teach userspace to read 1 page minimum?

192803 read(4, "cpu  3718263 4417 342808 7127674"..., 1024) = 1024
192803 read(4, " 0 21217 21617 21954 10201 15425"..., 1024) = 1024
192803 read(4, " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0"..., 1024) = 1024
192803 read(4, "", 1024)

Teach userspace to pread?

192803 openat(AT_FDCWD, "/proc/uptime", O_RDONLY) = 5
192803 lseek(5, 0, SEEK_SET)= 0
192803 read(5, "47198.56 713699.82\n", 8191) = 19

Rhetorical question: what is harder: ditch the main source of overhead
(VFS, seq_file, text) or teach userspace how to read files?

Here is open+read /proc/cpuinfo in python2 and python3.
Python2 case is terrifying.

BTW is there is something broken with seqfiles and record keeping?
Why does it return only 2 records per read?

Python 3:

openat(AT_FDCWD, "/proc/cpuinfo", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
ioctl(3, TCGETS, 0x7ffe6f1f0850) = -1 ENOTTY (Inappropriate ioctl for device)
lseek(3, 0, SEEK_CUR)= 0
ioctl(3, TCGETS, 0x7ffe6f1f0710) = -1 ENOTTY (Inappropriate ioctl for device)
lseek(3, 0, SEEK_CUR)= 0
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
read(3, "processor\t: 0\nvendor_id\t: Genuin"..., 8192) = 3038
read(3, "processor\t: 2\nvendor_id\t: Genuin"..., 5154) = 3038
read(3, "processor\t: 4\nvendor_id\t: Genuin"..., 2116) = 2116
read(3, "clmulqdq dtes64 monitor ds_cpl v"..., 8448) = 3966
read(3, "processor\t: 8\nvendor_id\t: Genuin"..., 4482) = 3038
read(3, "processor\t: 10\nvendor_id\t: Genui"..., 1444) = 1444
read(3, "t\t: 64\naddress sizes\t: 46 bits p"..., 16896) = 3116
read(3, "processor\t: 13\nvendor_id\t: Genui"..., 13780) = 3044
read(3, "processor\t: 15\nvendor_id\t: Genui"..., 10736) = 1522
read(3, "", 9214)= 0


Python 2

openat(AT_FDCWD, "/proc/cpuinfo", O_RDONLY) = 3
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)= 0
lseek(3, 0, SEEK_CUR)= 0
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
read(3, "processor\t: 0\nvendor_id\t: Genuin"..., 1024) = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)= 1024
lseek(3, 0, SEEK_CUR)= 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)= 1024
lseek(3, 0, SEEK_CUR)= 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)= 1024
lseek(3, 0, SEEK_CUR)= 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)= 1024
lseek(3, 0, SEEK_CUR)= 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)= 1024
lseek(3, 0, SEEK_CUR)= 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)= 1024
lseek(3, 0, SEEK_CUR)= 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)= 1024
lseek(3, 0, SEEK_CUR)= 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)= 1024
lseek(3, 0, SEEK_CUR)= 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)= 1024
lseek(3, 0, SEEK_CUR)= 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)= 1024
lseek(3, 0, SEEK_CUR)= 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)= 1024
lseek(3, 0, SEEK_CUR)= 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)= 1024
lseek(3, 0, SEEK_CUR)= 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)= 1024
lseek(3, 0, SEEK_CUR)= 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)= 1024
lseek(3, 0, SEEK_CUR)= 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)= 1024
lseek(3, 0, SEEK_CUR)= 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)= 1024
lseek(3, 0, SEEK_CUR)= 1024
fstat(3, 

Re: [PATCH] fs/proc: add short desc for /proc/softirqs

2020-07-01 Thread Alexey Dobriyan
On Wed, Jul 01, 2020 at 10:35:03PM +0800, zhenwei pi wrote:
> Only softirq name is not friendly to end-users, typically 'HI' is
> difficult to understand. During developing irqtop/lsirq utilities
> for util-linux, Karel Zak considered that we should give more
> information to end-users. Discuss about this:
> https://github.com/karelzak/util-linux/pull/1079
> 
> Add short desc for /proc/softirqs in this patch, then /proc/softirqs
> gets more human-readable.

> --- a/fs/proc/softirqs.c
> +++ b/fs/proc/softirqs.c
> @@ -20,7 +20,7 @@ static int show_softirqs(struct seq_file *p, void *v)
>   seq_printf(p, "%12s:", softirq_to_name[i]);
>   for_each_possible_cpu(j)
>   seq_printf(p, " %10u", kstat_softirqs_cpu(i, j));
> - seq_putc(p, '\n');
> + seq_printf(p, "  %s\n", softirq_to_desc[i]);

This could break parsers. I'd rather leave it as is and update proc(5).

Of course this file doesn't need more characters in it. :-\


Re: process '/usr/bin/rsync' started with executable stack

2020-06-23 Thread Alexey Dobriyan
On Tue, Jun 23, 2020 at 02:33:50PM -0700, Christian Kujau wrote:
> On Tue, 23 Jun 2020, Kees Cook wrote:
> > > $ checksec --format=json --extended --file=`which rsync` | jq
> > > {
> > >   "/usr/bin/rsync": {
> > > "relro": "full",
> > > "canary": "yes",
> > > "nx": "no",
> > ^^
> > 
> > It is, indeed, marked executable, it seems. What distro is this?
> 
> Arch Linux (x86-64) with 5.6.5.a-1-hardened[0], running in a Xen DomU.

BTW this bug was exactly the one described in the changelog:
compiling assembly brings executable stack by default:

$ git-show 73faaab26d7db19ae6e04396a6e9d6372ed8e4ad
commit 73faaab26d7db19ae6e04396a6e9d6372ed8e4ad

Pass --noexecstack to assembler.

--- a/Makefile.in
+++ b/Makefile.in
@@ -135,7 +135,7 @@ simd-checksum-x86_64.o: simd-checksum-x86_64.cpp
$(CXX) -I. $(CXXFLAGS) $(CPPFLAGS) -c -o $@ 
$(srcdir)/simd-checksum-x86_64.cpp

 lib/md5-asm-x86_64.o: lib/md5-asm-x86_64.S config.h lib/md-defines.h
-   $(CC) -I. -c -o $@ $(srcdir)/lib/md5-asm-x86_64.S
+   $(CC) -I. -Wa,--noexecstack -c -o $@ 
$(srcdir)/lib/md5-asm-x86_64.S


Re: process '/usr/bin/rsync' started with executable stack

2020-06-23 Thread Alexey Dobriyan
On Tue, Jun 23, 2020 at 02:33:50PM -0700, Christian Kujau wrote:
> On Tue, 23 Jun 2020, Kees Cook wrote:
> > > $ checksec --format=json --extended --file=`which rsync` | jq
> > > {
> > >   "/usr/bin/rsync": {
> > > "relro": "full",
> > > "canary": "yes",
> > > "nx": "no",
> > ^^
> > 
> > It is, indeed, marked executable, it seems. What distro is this?
> 
> Arch Linux (x86-64) with 5.6.5.a-1-hardened[0], running in a Xen DomU.
> 
> Christian.
> 
> [0] 
> https://git.archlinux.org/svntogit/packages.git/tree/trunk?h=packages/linux-hardened

Fixed in testing (rsync-3.2.1-1-x86_64.pkg)

  GNU_STACK  0x 0x 0x
 0x 0x  RW 0x10


Re: process '/usr/bin/rsync' started with executable stack

2020-06-23 Thread Alexey Dobriyan
On Tue, Jun 23, 2020 at 02:33:50PM -0700, Christian Kujau wrote:
> On Tue, 23 Jun 2020, Kees Cook wrote:
> > > $ checksec --format=json --extended --file=`which rsync` | jq
> > > {
> > >   "/usr/bin/rsync": {
> > > "relro": "full",
> > > "canary": "yes",
> > > "nx": "no",
> > ^^
> > 
> > It is, indeed, marked executable, it seems. What distro is this?
> 
> Arch Linux (x86-64) with 5.6.5.a-1-hardened[0], running in a Xen DomU.

Congratulations!

https://www.archlinux.org/packages/extra/x86_64/rsync/

  GNU_STACK  0x 0x 0x
 0x 0x  RWE0x10


Re: process '/usr/bin/rsync' started with executable stack

2020-06-23 Thread Alexey Dobriyan
On Wed, Jun 24, 2020 at 12:12:18AM +0300, Alexey Dobriyan wrote:
> On Tue, Jun 23, 2020 at 10:39:26AM -0700, Christian Kujau wrote:
> > Hi,
> > 
> > exactly this[0] happened today, on a 5.6.5 kernel:
> > 
> >   process '/usr/bin/rsync' started with executable stack
> > 
> > But I can't reproduce this message,

This message is once-per-reboot.

If you run something with exec stack after the message
you shouldn't get it second time.

If you rebooted and rsync starts clean, then it is separate story.

> > and rsync (v3.2.0, not exactly 
> > abandonware) runs several times a day, so to repeat Andrew's questions[0] 
> > from last year:
> > 
> >   > What are poor users supposed to do if this message comes out? 
> >   > Hopefully google the message and end up at this thread.  What do you
> >   > want to tell them?
> > 
> > Also, the PID is missing from that message.
> 
> That's intentional. I for one hate pids.
> 
> > I had some long running rsync 
> > processes running earlier, maybe the RWE status would have been visible in 
> > /proc/$PID/map, or somewhere else maybe?
> 
> If you think process is still running, /proc/*/maps should have 'rwxp'
> indeed. You can do quick
> 
>   $ grep -e '\[stack\]' /proc/*/maps'
> 
> to find it.

Run as root obviously, or you won't get full picture.


Re: process '/usr/bin/rsync' started with executable stack

2020-06-23 Thread Alexey Dobriyan
On Tue, Jun 23, 2020 at 10:39:26AM -0700, Christian Kujau wrote:
> Hi,
> 
> exactly this[0] happened today, on a 5.6.5 kernel:
> 
>   process '/usr/bin/rsync' started with executable stack
> 
> But I can't reproduce this message, and rsync (v3.2.0, not exactly 
> abandonware) runs several times a day, so to repeat Andrew's questions[0] 
> from last year:
> 
>   > What are poor users supposed to do if this message comes out? 
>   > Hopefully google the message and end up at this thread.  What do you
>   > want to tell them?
> 
> Also, the PID is missing from that message.

That's intentional. I for one hate pids.

> I had some long running rsync 
> processes running earlier, maybe the RWE status would have been visible in 
> /proc/$PID/map, or somewhere else maybe?

If you think process is still running, /proc/*/maps should have 'rwxp'
indeed. You can do quick

$ grep -e '\[stack\]' /proc/*/maps'

to find it.

> $ checksec --format=json --extended --file=`which rsync` | jq
> {
>   "/usr/bin/rsync": {
> "relro": "full",
> "canary": "yes",
> "nx": "no",
> "pie": "yes",
> "clangcfi": "no",
> "safestack": "no",
> "rpath": "no",
> "runpath": "no",
> "symbols": "no",
> "fortify_source": "yes",
> "fortified": "10",
> "fortify-able": "19"
>   }
> }

$ readelf -l /usr/bin/rsync | grep GNU_STACK -A1
  GNU_STACK  0x 0x 
0x
 0x 0x  RW 0x10


Re: [PATCH] linux++, this: rename "struct notifier_block *this"

2020-06-20 Thread Alexey Dobriyan
On Fri, Jun 19, 2020 at 11:37:47AM -0700, Linus Torvalds wrote:
> On Thu, Jun 18, 2020 at 2:06 PM Alexey Dobriyan  wrote:
> >
> > Rename
> > struct notifier_block *this
> > to
> > struct notifier_block *nb
> >
> > "nb" is arguably a better name for notifier block.
> 
> Maybe it's a better name. But it doesn't seem worth it.
> 
> Because C++ reserved words are entirely irrelevant.
> 
> We did this same dance almost three decades ago, and the fact is, C++
> has other reserved words that make it all pointless.

The real problems are "class" and "new" indeed.

> There is no way I will accept the renaming of various "new" variables.

I'm not sending "new".

> We did it, it was bad, we undid it, and we now have a _lot_ more uses
> of 'new' and 'old', and no, we're not changing it for a braindead
> language that isn't relevant to the kernel.
> 
> The fact is, C++ chose bad identifiers to make reserved words.
> 
> If you want to build the kernel with C++, you'd be a lot better off just doing
> 
>/* C++ braindamage */
>#define this __this
>#define new __new
> 
> and deal with that instead.

Can't do this because of placement new.

> Because no, the 'new' renaming will never happen, and while 'this'
> isn't nearly as common or relevant a name, once you have the same
> issue with 'new', what's the point of trying to deal with 'this'?

I'm not sending "new".

There is stuff which can be merge without breaking source compatibility
and readability of C version:

private => priv
virtual => virt
this=> self (in some contexts)

and those which can not. I'm not sending the latter.


Re: [PATCH 3/3] io_uring: add support for zone-append

2020-06-19 Thread Alexey Dobriyan
>   uint64_t val = cqe->res; // assuming non-error here
> 
>   if (cqe->flags & IORING_CQE_F_ZONE_FOO)
>   val |= (cqe->flags >> 16) << 32ULL;

Jens, ULL in shift doesn't do anything for widening the result.
You need

val |= (uint64_t)(cqe->flags >> 16) << 32;


[PATCH] linux++, this: rename "struct notifier_block *this"

2020-06-18 Thread Alexey Dobriyan
Rename
struct notifier_block *this
to
struct notifier_block *nb

"nb" is arguably a better name for notifier block.

Someone used "this" back in the days and everyone else copied.
In nearly 100% of cases it is unused with notable exception of
net/x25/af_ax25.c

Both gcc and g++ accept new name. It would make my adventure of carrying
linux++ patchset slightly less miserable.

Signed-off-by: Alexey Dobriyan 
---

 arch/alpha/kernel/setup.c|2 +-
 arch/mips/kernel/pm-cps.c|2 +-
 arch/mips/sgi-ip22/ip22-reset.c  |2 +-
 arch/mips/sgi-ip32/ip32-reset.c  |2 +-
 arch/mips/txx9/generic/setup_tx4939.c|2 +-
 arch/parisc/kernel/pdc_chassis.c |4 ++--
 arch/powerpc/kernel/setup-common.c   |2 +-
 arch/powerpc/platforms/85xx/mpc85xx_cds.c|2 +-
 arch/powerpc/sysdev/fsl_soc.c|2 +-
 arch/um/drivers/net_kern.c   |2 +-
 arch/um/drivers/vector_kern.c|2 +-
 arch/x86/xen/enlighten.c |2 +-
 arch/xtensa/platforms/iss/setup.c|2 +-
 crypto/algboss.c |2 +-
 drivers/acpi/apei/ghes.c |2 +-
 drivers/acpi/sleep.c |2 +-
 drivers/auxdisplay/charlcd.c |2 +-
 drivers/char/ipmi/ipmi_msghandler.c  |2 +-
 drivers/char/ipmi/ipmi_watchdog.c|2 +-
 drivers/clk/clk-nomadik.c|2 +-
 drivers/clk/rockchip/clk.c   |2 +-
 drivers/clk/samsung/clk-s3c2412.c|2 +-
 drivers/clk/samsung/clk-s3c2443.c|2 +-
 drivers/cpufreq/s3c2416-cpufreq.c|2 +-
 drivers/cpufreq/s5pv210-cpufreq.c|2 +-
 drivers/crypto/chelsio/chtls/chtls_main.c|2 +-
 drivers/edac/altera_edac.c   |4 ++--
 drivers/edac/octeon_edac-pc.c|4 ++--
 drivers/edac/sifive_edac.c   |4 ++--
 drivers/gpu/drm/i915/display/intel_dp.c  |4 ++--
 drivers/hwmon/w83793.c   |2 +-
 drivers/infiniband/core/roce_gid_mgmt.c  |   12 ++--
 drivers/infiniband/hw/mlx4/main.c|4 ++--
 drivers/infiniband/hw/mlx5/main.c|4 ++--
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c   |2 +-
 drivers/infiniband/ulp/ipoib/ipoib_main.c|2 +-
 drivers/macintosh/adbhid.c   |2 +-
 drivers/md/md.c  |2 +-
 drivers/mfd/rn5t618.c|2 +-
 drivers/misc/mic/cosm_client/cosm_scif_client.c  |2 +-
 drivers/mmc/core/pwrseq_emmc.c   |4 ++--
 drivers/net/bonding/bond_main.c  |2 +-
 drivers/net/ethernet/broadcom/cnic.c |2 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c  |2 +-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c   |4 ++--
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h |2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c  |4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/lag.c|4 ++--
 drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c |4 ++--
 drivers/net/ethernet/qlogic/qede/qede_main.c |2 +-
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c |4 ++--
 drivers/net/ethernet/sfc/efx.c   |2 +-
 drivers/net/ethernet/sfc/falcon/efx.c|2 +-
 drivers/net/hamradio/bpqether.c  |2 +-
 drivers/net/hyperv/netvsc_drv.c  |2 +-
 drivers/net/macsec.c |2 +-
 drivers/net/netconsole.c |2 +-
 drivers/net/ppp/pppoe.c  |2 +-
 drivers/net/wan/hdlc.c   |2 +-
 drivers/net/wan/lapbether.c  |2 +-
 drivers/net/wireless/virt_wifi.c |2 +-
 drivers/parisc/power.c   |2 +-
 drivers/platform/x86/intel_telemetry_debugfs.c   |2 +-
 drivers/power/reset/arm-versatile-reboot.c   |2 +-
 drivers/power/reset/at91-reset.c |4 ++--
 drivers/power/reset/axxia-reset.c|2 +-
 drivers/power/reset/brcm-kona-reset.c|2 +-
 drivers/power/reset/brcmstb-reboot.c |2 +-
 drivers/power/reset/gpio-restart.c   |4 ++--
 drivers/power/reset/hisi-reboot.c|2 +-
 drivers/power/re

Re: [PATCH] x86/asm/64: Align start of __clear_user() loop to 16-bytes

2020-06-18 Thread Alexey Dobriyan
On Thu, Jun 18, 2020 at 04:39:35PM +, David Laight wrote:
> From: Alexey Dobriyan 
> > Sent: 18 June 2020 14:17
> ...
> > > > diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
> > > > index fff28c6f73a2..b0dfac3d3df7 100644
> > > > --- a/arch/x86/lib/usercopy_64.c
> > > > +++ b/arch/x86/lib/usercopy_64.c
> > > > @@ -24,6 +24,7 @@ unsigned long __clear_user(void __user *addr, 
> > > > unsigned long size)
> > > > asm volatile(
> > > > "   testq  %[size8],%[size8]\n"
> > > > "   jz 4f\n"
> > > > +   "   .align 16\n"
> > > > "0: movq $0,(%[dst])\n"
> > > > "   addq   $8,%[dst]\n"
> > > > "   decl %%ecx ; jnz   0b\n"
> > >
> > > You can do better that that loop.
> > > Change 'dst' to point to the end of the buffer, negate the count
> > > and divide by 8 and you get:
> > >   "0: movq $0,($[dst],%%ecx,8)\n"
> > >   "   add $1,%%ecx"
> > >   "   jnz 0b\n"
> > > which might run at one iteration per clock especially on cpu that pair
> > > the add and jnz into a single uop.
> > > (You need to use add not inc.)
> > 
> > /dev/zero should probably use REP STOSB etc just like everything else.
> 
> Almost certainly it shouldn't, and neither should anything else.
> Potentially it could use whatever memset() is patched to.
> That MIGHT be 'rep stos' on some cpu variants, but in general
> it is slow.

Yes, that's what I meant: alternatives choosing REP variant.
memset loops are so 21-st century.


Re: [PATCH] x86/asm/64: Align start of __clear_user() loop to 16-bytes

2020-06-18 Thread Alexey Dobriyan
On Thu, Jun 18, 2020 at 10:48:05AM +, David Laight wrote:
> From: Matt Fleming
> > Sent: 18 June 2020 11:20
> > x86 CPUs can suffer severe performance drops if a tight loop, such as
> > the ones in __clear_user(), straddles a 16-byte instruction fetch
> > window, or worse, a 64-byte cacheline. This issues was discovered in the
> > SUSE kernel with the following commit,
> > 
> >   1153933703d9 ("x86/asm/64: Micro-optimize __clear_user() - Use immediate 
> > constants")
> > 
> > which increased the code object size from 10 bytes to 15 bytes and
> > caused the 8-byte copy loop in __clear_user() to be split across a
> > 64-byte cacheline.
> > 
> > Aligning the start of the loop to 16-bytes makes this fit neatly inside
> > a single instruction fetch window again and restores the performance of
> > __clear_user() which is used heavily when reading from /dev/zero.
> > 
> > Here are some numbers from running libmicro's read_z* and pread_z*
> > microbenchmarks which read from /dev/zero:
> > 
> >   Zen 1 (Naples)
> > 
> >   libmicro-file
> > 5.7.0-rc6  5.7.0-rc6
> >   5.7.0-rc6
> > revert-1153933703d9+
> >align16+
> >   Time mean95-pread_z100k   9.9195 (   0.00%)  5.9856 (  39.66%)
> >   5.9938 (  39.58%)
> >   Time mean95-pread_z10k1.1378 (   0.00%)  0.7450 (  34.52%)
> >   0.7467 (  34.38%)
> >   Time mean95-pread_z1k 0.2623 (   0.00%)  0.2251 (  14.18%)
> >   0.2252 (  14.15%)
> >   Time mean95-pread_zw100k  9.9974 (   0.00%)  6.0648 (  39.34%)
> >   6.0756 (  39.23%)
> >   Time mean95-read_z100k9.8940 (   0.00%)  5.9885 (  39.47%)
> >   5.9994 (  39.36%)
> >   Time mean95-read_z10k 1.1394 (   0.00%)  0.7483 (  34.33%)
> >   0.7482 (  34.33%)
> > 
> > Note that this doesn't affect Haswell or Broadwell microarchitectures
> > which seem to avoid the alignment issue by executing the loop straight
> > out of the Loop Stream Detector (verified using perf events).
> 
> Which cpu was affected?
> At least one source (www.agner.org/optimize) implies that both ivy
> bridge and sandy bridge have uop caches that mean (If I've read it
> correctly) the loop shouldn't be affected by the alignment).
> 
> > diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
> > index fff28c6f73a2..b0dfac3d3df7 100644
> > --- a/arch/x86/lib/usercopy_64.c
> > +++ b/arch/x86/lib/usercopy_64.c
> > @@ -24,6 +24,7 @@ unsigned long __clear_user(void __user *addr, unsigned 
> > long size)
> > asm volatile(
> > "   testq  %[size8],%[size8]\n"
> > "   jz 4f\n"
> > +   "   .align 16\n"
> > "0: movq $0,(%[dst])\n"
> > "   addq   $8,%[dst]\n"
> > "   decl %%ecx ; jnz   0b\n"
> 
> You can do better that that loop.
> Change 'dst' to point to the end of the buffer, negate the count
> and divide by 8 and you get:
>   "0: movq $0,($[dst],%%ecx,8)\n"
>   "   add $1,%%ecx"
>   "   jnz 0b\n"
> which might run at one iteration per clock especially on cpu that pair
> the add and jnz into a single uop.
> (You need to use add not inc.)

/dev/zero should probably use REP STOSB etc just like everything else.


Re: [PATCH] mm, slab: Use kmem_cache_zalloc() instead of kmem_cache_alloc() with flag GFP_ZERO.

2020-06-17 Thread Alexey Dobriyan
>  static inline void *kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags)
>  {
> - return kmem_cache_alloc(k, flags | __GFP_ZERO);
> + return kmem_cache_zalloc(k, flags);
>  }

That's a good one.


Re: [PATCH] proc/base: Skip assignment to len when there is no error on d_path in do_proc_readlink.

2020-05-27 Thread Alexey Dobriyan
On Wed, May 27, 2020 at 09:41:53AM -0500, Eric W. Biederman wrote:
> Kaitao Cheng  writes:
> 
> > we don't need {len = PTR_ERR(pathname)} when IS_ERR(pathname) is false,
> > it's better to move it into if(IS_ERR(pathname)){}.
> 
> Please look at the generated code.
> 
> I believe you will find that your change will generate worse assembly.

I think patch is good.

Super duper CPUs which speculate thousands instructions forward won't
care but more embedded ones do. Or in other words 1 unnecessary instruction
on common path is more important for slow CPUs than for fast CPUs.

This style separates common path from error path more cleanly.

And finally "len" here is local, so the issue barely deserves mention
but if target is in memory code like this happens:

static struct socket *sockfd_lookup_light(int fd, int *err, int 
*fput_needed)
{
struct fd f = fdget(fd);
struct socket *sock;

*err = -EBADF;
if (f.file) {

// unconditionally write to *err as well.

sock = sock_from_file(f.file, err);
if (likely(sock)) {
*fput_needed = f.flags;
return sock;
}
fdput(f);
}
return NULL;
}

so current style promotes more memory dirtying than necessary.

There is another place like this in sk_buff.c (search for ENOBUFS).

> > pathname = d_path(path, tmp, PAGE_SIZE);
> > -   len = PTR_ERR(pathname);
> > -   if (IS_ERR(pathname))
> > +   if (IS_ERR(pathname)) {
> > +   len = PTR_ERR(pathname);
> > goto out;
> > +   }


Re: [PATCH] proc: fix inode uid/gid writeback race

2019-10-21 Thread Alexey Dobriyan
On Mon, Oct 21, 2019 at 11:24:27AM +0200, Marco Elver wrote:
> On Sun, 20 Oct 2019 at 19:30, Alexey Dobriyan  wrote:
> >
> > (euid, egid) pair is snapshotted correctly from task under RCU,
> > but writeback to inode can be done in any order.
> >
> > Fix by doing writeback under inode->i_lock where necessary
> > (/proc/* , /proc/*/fd/* , /proc/*/map_files/* revalidate).
> >
> > Reported-by: syzbot+e392f8008a294fdf8...@syzkaller.appspotmail.com
> > Signed-off-by: Alexey Dobriyan 
> > ---
> 
> Thanks!
> 
> This certainly fixes the problem of inconsistent uid/gid pair due to
> racing writebacks, as well as the data-race. If that is the only
> purpose of this patch, then from what I see this is fine:
> 
> Acked-by: Marco Elver 
> 
> However, there is probably still a more fundamental problem as outlined below.
> 
> >  fs/proc/base.c |   25 +++--
> >  fs/proc/fd.c   |2 +-
> >  fs/proc/internal.h |2 ++
> >  3 files changed, 26 insertions(+), 3 deletions(-)
> >
> > --- a/fs/proc/base.c
> > +++ b/fs/proc/base.c
> > @@ -1743,6 +1743,25 @@ void task_dump_owner(struct task_struct *task, 
> > umode_t mode,
> > *rgid = gid;
> >  }
> >
> > +/* use if inode is live */
> > +void task_dump_owner_to_inode(struct task_struct *task, umode_t mode,
> > + struct inode *inode)
> > +{
> > +   kuid_t uid;
> > +   kgid_t gid;
> > +
> > +   task_dump_owner(task, mode, , );
> > +   /*
> > +* There is no atomic "change all credentials at once" system call,
> > +* guaranteeing more than _some_ snapshot from "struct cred" ends up
> > +* in inode is not possible.
> > +*/
> > +   spin_lock(>i_lock);
> > +   inode->i_uid = uid;
> > +   inode->i_gid = gid;
> > +   spin_unlock(>i_lock);
> 
> 2 tasks can still race here, and the inconsistent scenario I outlined in
> https://lore.kernel.org/linux-fsdevel/328b2905951a7...@google.com/
> could still happen I think (although extremely unlikely). Mainly,
> causality may still be violated -- but I may be wrong as I don't know
> the rest of the code (so please be critical of my suggestion).
> 
> The problem is that if 2 threads race here, one has snapshotted old
> uid/gid, and the other the new uid/gid. Then it is still possible for
> the old uid/gid to be written back after new uid/gid, which would
> result in this bad scenario:
> 
> === TASK 1 ===
> | seteuid(1000);
> | seteuid(0);
> | stat("/proc/", );
> | assert(fstat.st_uid == 0);  // fails
> === TASK 2 ===
> | stat("/proc/", ...);
> 
> AFAIK it's not something that can easily be fixed without some
> timestamp on the uid/gid pair (timestamp updated after setuid/seteuid
> etc) obtained in the RCU reader critical section. Then in this
> critical section, uid/gid should only be written if the current pair
> in inode is older according to snapshot timestamp.

This probably requires bloating "struct cred" with generation number.
I'm not sure what to do other than cc our credential overlords.

> > +}
> > +
> >  struct inode *proc_pid_make_inode(struct super_block * sb,
> >   struct task_struct *task, umode_t mode)
> >  {
> > @@ -1769,6 +1788,7 @@ struct inode *proc_pid_make_inode(struct super_block 
> > * sb,
> > if (!ei->pid)
> > goto out_unlock;
> >
> > +   /* fresh inode -- no races */
> > task_dump_owner(task, 0, >i_uid, >i_gid);
> > security_task_to_inode(task, inode);
> >
> > @@ -1802,6 +1822,7 @@ int pid_getattr(const struct path *path, struct kstat 
> > *stat,
> >  */
> > return -ENOENT;
> > }
> > +   /* "struct kstat" is thread local, atomic snapshot is 
> > enough */
> > task_dump_owner(task, inode->i_mode, >uid, 
> > >gid);
> > }
> > rcu_read_unlock();
> > @@ -1815,7 +1836,7 @@ int pid_getattr(const struct path *path, struct kstat 
> > *stat,
> >   */
> >  void pid_update_inode(struct task_struct *task, struct inode *inode)
> >  {
> > -   task_dump_owner(task, inode->i_mode, >i_uid, >i_gid);
> > +   task_dump_owner_to_inode(task, inode->i_mode, inode);
> >
> > inode->i_mode &= ~(S_ISUID | S_ISGID);
> >   

Re: [BUGFIX PATCH v2 1/5] selftests: proc: Make va_max 1GB on 32bit arch

2019-10-21 Thread Alexey Dobriyan
On Mon, Oct 21, 2019 at 05:28:09PM +0900, Masami Hiramatsu wrote:
> Currently proc-self-map-files-002.c sets va_max (max test address
> of user virtual address) to 4GB, but it is too big for 32bit
> arch and 1UL << 32 is overflow on 32bit long.
> 
> Make va_max 1GB on 32bit arch like i386 and arm.

> +#if __BITS_PER_LONG == 32
> +# define VA_MAX (1UL << 30)
> +#elif __BITS_PER_LONG == 64
> +# define VA_MAX (1UL << 32)
> +#else
> +# define VA_MAX 0
> +#endif
> +
>  int main(void)
>  {
>   const int PAGE_SIZE = sysconf(_SC_PAGESIZE);
> - const unsigned long va_max = 1UL << 32;
> + const unsigned long va_max = VA_MAX;

No, just make it like 1MB unconditionally.
This is not intended to cover all address space, just large enough part
(larger than reasonable vm.mmap_min_addr)


Re: [PATCH] fs: proc: Clarify warnings for invalid proc dir names

2019-10-21 Thread Alexey Dobriyan
On Sun, Oct 20, 2019 at 06:17:42PM -0400, Joel Savitz wrote:
> When one attempts to create a directory in /proc with an invalid name,
> such as one in a subdirectory that doesn't exist, one with a name beyond
> 256 characters, or a reserved name such as '.' or '..', the kernel
> throws a warning message that looks like this:
> 
>   [ 7913.252558] name 'invalid_name'

Yes, the important part is filename:line which uniquely identifies
the issue.

> This warning message is nearly the same for all invalid cases, including
> the removal of a nonexistent directory. This patch clarifies the warning
> message and differentiates the invalid creation/removal cases so as to
> allow the user to more quickly understand their mistake.

> --- a/fs/proc/generic.c
> +++ b/fs/proc/generic.c
> @@ -173,7 +173,7 @@ static int __xlate_proc_name(const char *name, struct 
> proc_dir_entry **ret,
>   len = next - cp;
>   de = pde_subdir_find(de, cp, len);
>   if (!de) {
> - WARN(1, "name '%s'\n", name);
> + WARN(1, "invalid proc dir name '%s'\n", name);

Wrong string anyway, this is nonexistent name, directory or not.

> - WARN(1, "name len %u\n", qstr.len);
> + WARN(1, "invalid proc dir name len %u\n", qstr.len);
>   return NULL;
>   }
>   if (qstr.len == 1 && fn[0] == '.') {
> - WARN(1, "name '.'\n");
> + WARN(1, "invalid proc dir name '.'\n");
>   return NULL;
>   }
>   if (qstr.len == 2 && fn[0] == '.' && fn[1] == '.') {
> - WARN(1, "name '..'\n");
> + WARN(1, "invalid proc dir name '..'\n");
>   return NULL;
>   }
>   if (*parent == _root && name_to_int() != ~0U) {
> @@ -402,7 +402,7 @@ static struct proc_dir_entry *__proc_create(struct 
> proc_dir_entry **parent,
>   return NULL;
>   }
>   if (is_empty_pde(*parent)) {
> - WARN(1, "attempt to add to permanently empty directory");
> + WARN(1, "attempt to add to permanently empty directory in 
> proc");

"proc" will be spilled over all backtrace.

> @@ -670,7 +670,7 @@ void remove_proc_entry(const char *name, struct 
> proc_dir_entry *parent)
>   rb_erase(>subdir_node, >subdir);
>   write_unlock(_subdir_lock);
>   if (!de) {
> - WARN(1, "name '%s'\n", name);
> + WARN(1, "unable to remove nonexistent proc dir '%s'\n", name);

I'm not sure if such chatty strings are necessary.


  1   2   3   4   5   6   7   8   9   10   >